{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 29140, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 3.4317089910775565e-05, "grad_norm": 5.499968011688152, "learning_rate": 0.0, "loss": 0.77, "step": 1 }, { "epoch": 6.863417982155113e-05, "grad_norm": 4.220692247501644, "learning_rate": 1.142857142857143e-08, "loss": 0.6856, "step": 2 }, { "epoch": 0.0001029512697323267, "grad_norm": 5.4384471278782085, "learning_rate": 2.285714285714286e-08, "loss": 0.7387, "step": 3 }, { "epoch": 0.00013726835964310226, "grad_norm": 5.04697285918977, "learning_rate": 3.4285714285714286e-08, "loss": 0.7376, "step": 4 }, { "epoch": 0.00017158544955387783, "grad_norm": 5.26612251477461, "learning_rate": 4.571428571428572e-08, "loss": 0.7042, "step": 5 }, { "epoch": 0.0002059025394646534, "grad_norm": 5.132427311630669, "learning_rate": 5.714285714285715e-08, "loss": 0.6902, "step": 6 }, { "epoch": 0.00024021962937542896, "grad_norm": 5.046653930775454, "learning_rate": 6.857142857142857e-08, "loss": 0.7854, "step": 7 }, { "epoch": 0.0002745367192862045, "grad_norm": 5.9635278997403205, "learning_rate": 8e-08, "loss": 0.7321, "step": 8 }, { "epoch": 0.0003088538091969801, "grad_norm": 4.280000638570349, "learning_rate": 9.142857142857144e-08, "loss": 0.7686, "step": 9 }, { "epoch": 0.00034317089910775565, "grad_norm": 5.22096254069719, "learning_rate": 1.0285714285714286e-07, "loss": 0.729, "step": 10 }, { "epoch": 0.0003774879890185312, "grad_norm": 5.596918108135708, "learning_rate": 1.142857142857143e-07, "loss": 0.8217, "step": 11 }, { "epoch": 0.0004118050789293068, "grad_norm": 4.716194610449657, "learning_rate": 1.2571428571428572e-07, "loss": 0.7301, "step": 12 }, { "epoch": 0.00044612216884008235, "grad_norm": 7.479963622152302, "learning_rate": 1.3714285714285715e-07, "loss": 0.7422, "step": 13 }, { "epoch": 0.0004804392587508579, "grad_norm": 6.902284362290883, "learning_rate": 1.4857142857142857e-07, "loss": 0.759, "step": 14 }, { "epoch": 0.0005147563486616335, "grad_norm": 6.012620661925655, "learning_rate": 1.6e-07, "loss": 0.7222, "step": 15 }, { "epoch": 0.000549073438572409, "grad_norm": 5.3038908222621, "learning_rate": 1.7142857142857146e-07, "loss": 0.6616, "step": 16 }, { "epoch": 0.0005833905284831847, "grad_norm": 4.664447298360061, "learning_rate": 1.828571428571429e-07, "loss": 0.723, "step": 17 }, { "epoch": 0.0006177076183939602, "grad_norm": 4.539552467615656, "learning_rate": 1.942857142857143e-07, "loss": 0.762, "step": 18 }, { "epoch": 0.0006520247083047358, "grad_norm": 5.526387043062544, "learning_rate": 2.0571428571428572e-07, "loss": 0.7041, "step": 19 }, { "epoch": 0.0006863417982155113, "grad_norm": 5.141329389470742, "learning_rate": 2.1714285714285715e-07, "loss": 0.6988, "step": 20 }, { "epoch": 0.0007206588881262869, "grad_norm": 4.899594996815149, "learning_rate": 2.285714285714286e-07, "loss": 0.6801, "step": 21 }, { "epoch": 0.0007549759780370624, "grad_norm": 4.957792137991793, "learning_rate": 2.4000000000000003e-07, "loss": 0.6951, "step": 22 }, { "epoch": 0.000789293067947838, "grad_norm": 4.906450683934331, "learning_rate": 2.5142857142857143e-07, "loss": 0.7591, "step": 23 }, { "epoch": 0.0008236101578586136, "grad_norm": 4.917569250800057, "learning_rate": 2.628571428571429e-07, "loss": 0.6949, "step": 24 }, { "epoch": 0.0008579272477693892, "grad_norm": 5.0848250606712995, "learning_rate": 2.742857142857143e-07, "loss": 0.785, "step": 25 }, { "epoch": 0.0008922443376801647, "grad_norm": 4.219562350592378, "learning_rate": 2.8571428571428575e-07, "loss": 0.7035, "step": 26 }, { "epoch": 0.0009265614275909403, "grad_norm": 4.729662645797646, "learning_rate": 2.9714285714285715e-07, "loss": 0.7407, "step": 27 }, { "epoch": 0.0009608785175017158, "grad_norm": 4.082590922421693, "learning_rate": 3.085714285714286e-07, "loss": 0.698, "step": 28 }, { "epoch": 0.0009951956074124913, "grad_norm": 3.955687598020697, "learning_rate": 3.2e-07, "loss": 0.6857, "step": 29 }, { "epoch": 0.001029512697323267, "grad_norm": 3.600906629556292, "learning_rate": 3.314285714285714e-07, "loss": 0.7239, "step": 30 }, { "epoch": 0.0010638297872340426, "grad_norm": 3.4676149745233684, "learning_rate": 3.428571428571429e-07, "loss": 0.6939, "step": 31 }, { "epoch": 0.001098146877144818, "grad_norm": 3.4137231531984344, "learning_rate": 3.542857142857143e-07, "loss": 0.7121, "step": 32 }, { "epoch": 0.0011324639670555936, "grad_norm": 3.668241750433041, "learning_rate": 3.657142857142858e-07, "loss": 0.6358, "step": 33 }, { "epoch": 0.0011667810569663693, "grad_norm": 3.9448996307135236, "learning_rate": 3.771428571428572e-07, "loss": 0.6985, "step": 34 }, { "epoch": 0.0012010981468771448, "grad_norm": 3.6377972256797637, "learning_rate": 3.885714285714286e-07, "loss": 0.7963, "step": 35 }, { "epoch": 0.0012354152367879203, "grad_norm": 3.37533082112631, "learning_rate": 4.0000000000000003e-07, "loss": 0.6404, "step": 36 }, { "epoch": 0.0012697323266986959, "grad_norm": 3.341254578485645, "learning_rate": 4.1142857142857144e-07, "loss": 0.7823, "step": 37 }, { "epoch": 0.0013040494166094716, "grad_norm": 3.1638496781566734, "learning_rate": 4.228571428571429e-07, "loss": 0.7634, "step": 38 }, { "epoch": 0.001338366506520247, "grad_norm": 3.2597251256195605, "learning_rate": 4.342857142857143e-07, "loss": 0.7196, "step": 39 }, { "epoch": 0.0013726835964310226, "grad_norm": 3.0943035237681116, "learning_rate": 4.457142857142858e-07, "loss": 0.7405, "step": 40 }, { "epoch": 0.0014070006863417981, "grad_norm": 3.045413319646286, "learning_rate": 4.571428571428572e-07, "loss": 0.6645, "step": 41 }, { "epoch": 0.0014413177762525738, "grad_norm": 3.1538529394229737, "learning_rate": 4.6857142857142855e-07, "loss": 0.5806, "step": 42 }, { "epoch": 0.0014756348661633494, "grad_norm": 2.4193848717009616, "learning_rate": 4.800000000000001e-07, "loss": 0.6162, "step": 43 }, { "epoch": 0.0015099519560741249, "grad_norm": 2.9377760068835848, "learning_rate": 4.914285714285714e-07, "loss": 0.6248, "step": 44 }, { "epoch": 0.0015442690459849004, "grad_norm": 2.6661954761812514, "learning_rate": 5.028571428571429e-07, "loss": 0.6617, "step": 45 }, { "epoch": 0.001578586135895676, "grad_norm": 2.711365494132507, "learning_rate": 5.142857142857143e-07, "loss": 0.6387, "step": 46 }, { "epoch": 0.0016129032258064516, "grad_norm": 2.6868206147779885, "learning_rate": 5.257142857142858e-07, "loss": 0.7202, "step": 47 }, { "epoch": 0.0016472203157172271, "grad_norm": 2.4497174700930953, "learning_rate": 5.371428571428572e-07, "loss": 0.7004, "step": 48 }, { "epoch": 0.0016815374056280029, "grad_norm": 2.570590761072381, "learning_rate": 5.485714285714286e-07, "loss": 0.6621, "step": 49 }, { "epoch": 0.0017158544955387784, "grad_norm": 2.3446567935723, "learning_rate": 5.6e-07, "loss": 0.6647, "step": 50 }, { "epoch": 0.0017501715854495539, "grad_norm": 2.3032655160662787, "learning_rate": 5.714285714285715e-07, "loss": 0.7225, "step": 51 }, { "epoch": 0.0017844886753603294, "grad_norm": 2.351721638330525, "learning_rate": 5.82857142857143e-07, "loss": 0.6842, "step": 52 }, { "epoch": 0.0018188057652711051, "grad_norm": 2.3898322935732637, "learning_rate": 5.942857142857143e-07, "loss": 0.6955, "step": 53 }, { "epoch": 0.0018531228551818806, "grad_norm": 2.181818323185616, "learning_rate": 6.057142857142858e-07, "loss": 0.5928, "step": 54 }, { "epoch": 0.0018874399450926561, "grad_norm": 2.4171120333705796, "learning_rate": 6.171428571428572e-07, "loss": 0.6337, "step": 55 }, { "epoch": 0.0019217570350034316, "grad_norm": 2.00875777529754, "learning_rate": 6.285714285714287e-07, "loss": 0.652, "step": 56 }, { "epoch": 0.0019560741249142074, "grad_norm": 2.0467068804717314, "learning_rate": 6.4e-07, "loss": 0.6413, "step": 57 }, { "epoch": 0.0019903912148249827, "grad_norm": 2.01812133740771, "learning_rate": 6.514285714285715e-07, "loss": 0.6209, "step": 58 }, { "epoch": 0.0020247083047357584, "grad_norm": 1.939841532088563, "learning_rate": 6.628571428571428e-07, "loss": 0.6529, "step": 59 }, { "epoch": 0.002059025394646534, "grad_norm": 1.7590096471010765, "learning_rate": 6.742857142857144e-07, "loss": 0.649, "step": 60 }, { "epoch": 0.0020933424845573094, "grad_norm": 1.952192754381422, "learning_rate": 6.857142857142858e-07, "loss": 0.5434, "step": 61 }, { "epoch": 0.002127659574468085, "grad_norm": 1.7869121057441837, "learning_rate": 6.971428571428572e-07, "loss": 0.5883, "step": 62 }, { "epoch": 0.002161976664378861, "grad_norm": 1.7563699989387431, "learning_rate": 7.085714285714286e-07, "loss": 0.6225, "step": 63 }, { "epoch": 0.002196293754289636, "grad_norm": 1.9902576529637306, "learning_rate": 7.2e-07, "loss": 0.7038, "step": 64 }, { "epoch": 0.002230610844200412, "grad_norm": 1.9306433492561186, "learning_rate": 7.314285714285715e-07, "loss": 0.6107, "step": 65 }, { "epoch": 0.002264927934111187, "grad_norm": 1.8037419151971508, "learning_rate": 7.428571428571429e-07, "loss": 0.5624, "step": 66 }, { "epoch": 0.002299245024021963, "grad_norm": 1.7174803805837184, "learning_rate": 7.542857142857144e-07, "loss": 0.5833, "step": 67 }, { "epoch": 0.0023335621139327386, "grad_norm": 1.535889497172449, "learning_rate": 7.657142857142857e-07, "loss": 0.5982, "step": 68 }, { "epoch": 0.002367879203843514, "grad_norm": 1.8386755358266427, "learning_rate": 7.771428571428572e-07, "loss": 0.6188, "step": 69 }, { "epoch": 0.0024021962937542897, "grad_norm": 4.742192298174231, "learning_rate": 7.885714285714287e-07, "loss": 0.5682, "step": 70 }, { "epoch": 0.0024365133836650654, "grad_norm": 1.7690646516964355, "learning_rate": 8.000000000000001e-07, "loss": 0.5795, "step": 71 }, { "epoch": 0.0024708304735758407, "grad_norm": 1.7438511954499645, "learning_rate": 8.114285714285715e-07, "loss": 0.6084, "step": 72 }, { "epoch": 0.0025051475634866164, "grad_norm": 1.4997856048671157, "learning_rate": 8.228571428571429e-07, "loss": 0.5694, "step": 73 }, { "epoch": 0.0025394646533973917, "grad_norm": 1.6599129754969026, "learning_rate": 8.342857142857144e-07, "loss": 0.6078, "step": 74 }, { "epoch": 0.0025737817433081674, "grad_norm": 1.6093353888766044, "learning_rate": 8.457142857142858e-07, "loss": 0.5638, "step": 75 }, { "epoch": 0.002608098833218943, "grad_norm": 1.5515520553456779, "learning_rate": 8.571428571428572e-07, "loss": 0.6712, "step": 76 }, { "epoch": 0.0026424159231297185, "grad_norm": 1.4821905126838961, "learning_rate": 8.685714285714286e-07, "loss": 0.5961, "step": 77 }, { "epoch": 0.002676733013040494, "grad_norm": 1.6421318956983533, "learning_rate": 8.8e-07, "loss": 0.6381, "step": 78 }, { "epoch": 0.00271105010295127, "grad_norm": 1.2905187646500254, "learning_rate": 8.914285714285716e-07, "loss": 0.543, "step": 79 }, { "epoch": 0.002745367192862045, "grad_norm": 1.6270941463488264, "learning_rate": 9.02857142857143e-07, "loss": 0.5682, "step": 80 }, { "epoch": 0.002779684282772821, "grad_norm": 1.2881519870166998, "learning_rate": 9.142857142857144e-07, "loss": 0.5704, "step": 81 }, { "epoch": 0.0028140013726835962, "grad_norm": 1.5054540732025492, "learning_rate": 9.257142857142858e-07, "loss": 0.5839, "step": 82 }, { "epoch": 0.002848318462594372, "grad_norm": 1.8187690109998085, "learning_rate": 9.371428571428571e-07, "loss": 0.63, "step": 83 }, { "epoch": 0.0028826355525051477, "grad_norm": 1.5096159599199546, "learning_rate": 9.485714285714287e-07, "loss": 0.6125, "step": 84 }, { "epoch": 0.002916952642415923, "grad_norm": 1.4227220998343495, "learning_rate": 9.600000000000001e-07, "loss": 0.6049, "step": 85 }, { "epoch": 0.0029512697323266987, "grad_norm": 1.3383522964140968, "learning_rate": 9.714285714285715e-07, "loss": 0.5068, "step": 86 }, { "epoch": 0.0029855868222374744, "grad_norm": 1.57995000657501, "learning_rate": 9.828571428571428e-07, "loss": 0.6918, "step": 87 }, { "epoch": 0.0030199039121482497, "grad_norm": 1.265233602907717, "learning_rate": 9.942857142857144e-07, "loss": 0.4576, "step": 88 }, { "epoch": 0.0030542210020590255, "grad_norm": 1.1751021047785843, "learning_rate": 1.0057142857142857e-06, "loss": 0.5269, "step": 89 }, { "epoch": 0.0030885380919698007, "grad_norm": 1.539005225153064, "learning_rate": 1.0171428571428573e-06, "loss": 0.6054, "step": 90 }, { "epoch": 0.0031228551818805765, "grad_norm": 1.382607276301428, "learning_rate": 1.0285714285714286e-06, "loss": 0.6406, "step": 91 }, { "epoch": 0.003157172271791352, "grad_norm": 1.4875361757952366, "learning_rate": 1.04e-06, "loss": 0.5301, "step": 92 }, { "epoch": 0.0031914893617021275, "grad_norm": 1.1335854012149946, "learning_rate": 1.0514285714285716e-06, "loss": 0.5514, "step": 93 }, { "epoch": 0.0032258064516129032, "grad_norm": 1.4223481470467856, "learning_rate": 1.062857142857143e-06, "loss": 0.6326, "step": 94 }, { "epoch": 0.003260123541523679, "grad_norm": 1.2825356736512323, "learning_rate": 1.0742857142857145e-06, "loss": 0.5595, "step": 95 }, { "epoch": 0.0032944406314344542, "grad_norm": 1.3294058692346153, "learning_rate": 1.0857142857142858e-06, "loss": 0.523, "step": 96 }, { "epoch": 0.00332875772134523, "grad_norm": 1.4063641557878823, "learning_rate": 1.0971428571428572e-06, "loss": 0.5634, "step": 97 }, { "epoch": 0.0033630748112560057, "grad_norm": 1.3766255792106794, "learning_rate": 1.1085714285714287e-06, "loss": 0.513, "step": 98 }, { "epoch": 0.003397391901166781, "grad_norm": 1.2718930511268127, "learning_rate": 1.12e-06, "loss": 0.5743, "step": 99 }, { "epoch": 0.0034317089910775567, "grad_norm": 1.2380630658341438, "learning_rate": 1.1314285714285714e-06, "loss": 0.5943, "step": 100 }, { "epoch": 0.003466026080988332, "grad_norm": 1.3305926576616556, "learning_rate": 1.142857142857143e-06, "loss": 0.6269, "step": 101 }, { "epoch": 0.0035003431708991077, "grad_norm": 1.1741769702747447, "learning_rate": 1.1542857142857143e-06, "loss": 0.522, "step": 102 }, { "epoch": 0.0035346602608098835, "grad_norm": 1.2822206109677183, "learning_rate": 1.165714285714286e-06, "loss": 0.5097, "step": 103 }, { "epoch": 0.0035689773507206588, "grad_norm": 1.1716035377604175, "learning_rate": 1.1771428571428572e-06, "loss": 0.54, "step": 104 }, { "epoch": 0.0036032944406314345, "grad_norm": 1.2355359414780065, "learning_rate": 1.1885714285714286e-06, "loss": 0.5478, "step": 105 }, { "epoch": 0.0036376115305422102, "grad_norm": 1.1098800518225915, "learning_rate": 1.2000000000000002e-06, "loss": 0.5273, "step": 106 }, { "epoch": 0.0036719286204529855, "grad_norm": 1.31150553240449, "learning_rate": 1.2114285714285715e-06, "loss": 0.4985, "step": 107 }, { "epoch": 0.0037062457103637612, "grad_norm": 1.0212832539572372, "learning_rate": 1.222857142857143e-06, "loss": 0.4707, "step": 108 }, { "epoch": 0.0037405628002745365, "grad_norm": 1.2112038105298282, "learning_rate": 1.2342857142857144e-06, "loss": 0.627, "step": 109 }, { "epoch": 0.0037748798901853123, "grad_norm": 1.2248400048795172, "learning_rate": 1.2457142857142858e-06, "loss": 0.6118, "step": 110 }, { "epoch": 0.003809196980096088, "grad_norm": 1.2089578665044614, "learning_rate": 1.2571428571428573e-06, "loss": 0.5843, "step": 111 }, { "epoch": 0.0038435140700068633, "grad_norm": 1.094627425443528, "learning_rate": 1.2685714285714287e-06, "loss": 0.5682, "step": 112 }, { "epoch": 0.003877831159917639, "grad_norm": 1.1715430857461289, "learning_rate": 1.28e-06, "loss": 0.5347, "step": 113 }, { "epoch": 0.003912148249828415, "grad_norm": 1.2105358893169231, "learning_rate": 1.2914285714285716e-06, "loss": 0.5313, "step": 114 }, { "epoch": 0.0039464653397391905, "grad_norm": 1.247652540568031, "learning_rate": 1.302857142857143e-06, "loss": 0.563, "step": 115 }, { "epoch": 0.003980782429649965, "grad_norm": 1.2313555057949375, "learning_rate": 1.3142857142857143e-06, "loss": 0.5619, "step": 116 }, { "epoch": 0.004015099519560741, "grad_norm": 1.2756774364002577, "learning_rate": 1.3257142857142856e-06, "loss": 0.5925, "step": 117 }, { "epoch": 0.004049416609471517, "grad_norm": 1.2706935438916533, "learning_rate": 1.3371428571428572e-06, "loss": 0.5071, "step": 118 }, { "epoch": 0.0040837336993822925, "grad_norm": 1.1073927707746212, "learning_rate": 1.3485714285714288e-06, "loss": 0.5505, "step": 119 }, { "epoch": 0.004118050789293068, "grad_norm": 1.0927387801572457, "learning_rate": 1.3600000000000001e-06, "loss": 0.5759, "step": 120 }, { "epoch": 0.004152367879203843, "grad_norm": 1.19713136273704, "learning_rate": 1.3714285714285717e-06, "loss": 0.498, "step": 121 }, { "epoch": 0.004186684969114619, "grad_norm": 1.4023002318658226, "learning_rate": 1.382857142857143e-06, "loss": 0.5918, "step": 122 }, { "epoch": 0.0042210020590253946, "grad_norm": 1.1809435949128166, "learning_rate": 1.3942857142857144e-06, "loss": 0.6535, "step": 123 }, { "epoch": 0.00425531914893617, "grad_norm": 1.0776415025500383, "learning_rate": 1.4057142857142857e-06, "loss": 0.515, "step": 124 }, { "epoch": 0.004289636238846946, "grad_norm": 1.1082363149183656, "learning_rate": 1.4171428571428573e-06, "loss": 0.542, "step": 125 }, { "epoch": 0.004323953328757722, "grad_norm": 1.2223106418742, "learning_rate": 1.4285714285714286e-06, "loss": 0.5611, "step": 126 }, { "epoch": 0.004358270418668497, "grad_norm": 0.990098921452616, "learning_rate": 1.44e-06, "loss": 0.4904, "step": 127 }, { "epoch": 0.004392587508579272, "grad_norm": 1.2361788669009413, "learning_rate": 1.4514285714285713e-06, "loss": 0.5858, "step": 128 }, { "epoch": 0.004426904598490048, "grad_norm": 1.1707617656284877, "learning_rate": 1.462857142857143e-06, "loss": 0.5331, "step": 129 }, { "epoch": 0.004461221688400824, "grad_norm": 1.09049355091428, "learning_rate": 1.4742857142857144e-06, "loss": 0.5507, "step": 130 }, { "epoch": 0.0044955387783115995, "grad_norm": 1.2197435491721744, "learning_rate": 1.4857142857142858e-06, "loss": 0.5288, "step": 131 }, { "epoch": 0.004529855868222374, "grad_norm": 1.2457546369971364, "learning_rate": 1.4971428571428574e-06, "loss": 0.5549, "step": 132 }, { "epoch": 0.00456417295813315, "grad_norm": 1.038770019554098, "learning_rate": 1.5085714285714287e-06, "loss": 0.5635, "step": 133 }, { "epoch": 0.004598490048043926, "grad_norm": 1.2484486956517178, "learning_rate": 1.52e-06, "loss": 0.6103, "step": 134 }, { "epoch": 0.0046328071379547016, "grad_norm": 1.0214651123742255, "learning_rate": 1.5314285714285714e-06, "loss": 0.5165, "step": 135 }, { "epoch": 0.004667124227865477, "grad_norm": 1.0625858438499067, "learning_rate": 1.542857142857143e-06, "loss": 0.5218, "step": 136 }, { "epoch": 0.004701441317776252, "grad_norm": 1.0266583557798652, "learning_rate": 1.5542857142857143e-06, "loss": 0.5474, "step": 137 }, { "epoch": 0.004735758407687028, "grad_norm": 1.0310041247458563, "learning_rate": 1.5657142857142859e-06, "loss": 0.5467, "step": 138 }, { "epoch": 0.004770075497597804, "grad_norm": 1.057782002143496, "learning_rate": 1.5771428571428574e-06, "loss": 0.6234, "step": 139 }, { "epoch": 0.004804392587508579, "grad_norm": 1.0888909474741364, "learning_rate": 1.5885714285714288e-06, "loss": 0.4938, "step": 140 }, { "epoch": 0.004838709677419355, "grad_norm": 1.0242687432593278, "learning_rate": 1.6000000000000001e-06, "loss": 0.5483, "step": 141 }, { "epoch": 0.004873026767330131, "grad_norm": 1.0406728326120878, "learning_rate": 1.6114285714285715e-06, "loss": 0.5717, "step": 142 }, { "epoch": 0.004907343857240906, "grad_norm": 1.0891779244328543, "learning_rate": 1.622857142857143e-06, "loss": 0.5342, "step": 143 }, { "epoch": 0.004941660947151681, "grad_norm": 1.0925299197509002, "learning_rate": 1.6342857142857144e-06, "loss": 0.5431, "step": 144 }, { "epoch": 0.004975978037062457, "grad_norm": 1.1297977643861379, "learning_rate": 1.6457142857142857e-06, "loss": 0.5292, "step": 145 }, { "epoch": 0.005010295126973233, "grad_norm": 1.0115187808921573, "learning_rate": 1.657142857142857e-06, "loss": 0.5282, "step": 146 }, { "epoch": 0.0050446122168840086, "grad_norm": 1.1051914597261425, "learning_rate": 1.6685714285714289e-06, "loss": 0.5286, "step": 147 }, { "epoch": 0.005078929306794783, "grad_norm": 1.0693675119383674, "learning_rate": 1.6800000000000002e-06, "loss": 0.5168, "step": 148 }, { "epoch": 0.005113246396705559, "grad_norm": 1.140273399792065, "learning_rate": 1.6914285714285716e-06, "loss": 0.5481, "step": 149 }, { "epoch": 0.005147563486616335, "grad_norm": 1.1192661439203047, "learning_rate": 1.7028571428571431e-06, "loss": 0.4675, "step": 150 }, { "epoch": 0.005181880576527111, "grad_norm": 1.014856292880158, "learning_rate": 1.7142857142857145e-06, "loss": 0.5014, "step": 151 }, { "epoch": 0.005216197666437886, "grad_norm": 1.0009139879417772, "learning_rate": 1.7257142857142858e-06, "loss": 0.5156, "step": 152 }, { "epoch": 0.005250514756348662, "grad_norm": 1.1389184740346374, "learning_rate": 1.7371428571428572e-06, "loss": 0.5564, "step": 153 }, { "epoch": 0.005284831846259437, "grad_norm": 0.9955638369582381, "learning_rate": 1.7485714285714287e-06, "loss": 0.4834, "step": 154 }, { "epoch": 0.005319148936170213, "grad_norm": 1.2512301359748847, "learning_rate": 1.76e-06, "loss": 0.5494, "step": 155 }, { "epoch": 0.005353466026080988, "grad_norm": 1.0291946787721973, "learning_rate": 1.7714285714285714e-06, "loss": 0.5476, "step": 156 }, { "epoch": 0.005387783115991764, "grad_norm": 1.1473477593789603, "learning_rate": 1.7828571428571432e-06, "loss": 0.4778, "step": 157 }, { "epoch": 0.00542210020590254, "grad_norm": 0.9639301815436975, "learning_rate": 1.7942857142857146e-06, "loss": 0.4747, "step": 158 }, { "epoch": 0.005456417295813315, "grad_norm": 1.056995443832311, "learning_rate": 1.805714285714286e-06, "loss": 0.5216, "step": 159 }, { "epoch": 0.00549073438572409, "grad_norm": 1.1491392370026603, "learning_rate": 1.8171428571428573e-06, "loss": 0.4339, "step": 160 }, { "epoch": 0.005525051475634866, "grad_norm": 0.994077099192546, "learning_rate": 1.8285714285714288e-06, "loss": 0.4804, "step": 161 }, { "epoch": 0.005559368565545642, "grad_norm": 1.075648610547987, "learning_rate": 1.8400000000000002e-06, "loss": 0.5257, "step": 162 }, { "epoch": 0.005593685655456418, "grad_norm": 0.9811335840169721, "learning_rate": 1.8514285714285715e-06, "loss": 0.4817, "step": 163 }, { "epoch": 0.0056280027453671925, "grad_norm": 1.1098229676090854, "learning_rate": 1.8628571428571429e-06, "loss": 0.5068, "step": 164 }, { "epoch": 0.005662319835277968, "grad_norm": 1.0601178496048813, "learning_rate": 1.8742857142857142e-06, "loss": 0.5073, "step": 165 }, { "epoch": 0.005696636925188744, "grad_norm": 1.2076277680552936, "learning_rate": 1.885714285714286e-06, "loss": 0.5744, "step": 166 }, { "epoch": 0.00573095401509952, "grad_norm": 1.0879019383128208, "learning_rate": 1.8971428571428573e-06, "loss": 0.4321, "step": 167 }, { "epoch": 0.005765271105010295, "grad_norm": 1.0718245910268274, "learning_rate": 1.9085714285714287e-06, "loss": 0.5446, "step": 168 }, { "epoch": 0.005799588194921071, "grad_norm": 1.1169069178840672, "learning_rate": 1.9200000000000003e-06, "loss": 0.5549, "step": 169 }, { "epoch": 0.005833905284831846, "grad_norm": 1.0603223455286144, "learning_rate": 1.9314285714285714e-06, "loss": 0.5175, "step": 170 }, { "epoch": 0.005868222374742622, "grad_norm": 1.0283589812399647, "learning_rate": 1.942857142857143e-06, "loss": 0.4267, "step": 171 }, { "epoch": 0.005902539464653397, "grad_norm": 1.1016265740936764, "learning_rate": 1.9542857142857145e-06, "loss": 0.4629, "step": 172 }, { "epoch": 0.005936856554564173, "grad_norm": 1.213449780732671, "learning_rate": 1.9657142857142856e-06, "loss": 0.5031, "step": 173 }, { "epoch": 0.005971173644474949, "grad_norm": 1.0851092861162857, "learning_rate": 1.977142857142857e-06, "loss": 0.4758, "step": 174 }, { "epoch": 0.006005490734385724, "grad_norm": 1.2626167080838262, "learning_rate": 1.9885714285714288e-06, "loss": 0.499, "step": 175 }, { "epoch": 0.0060398078242964994, "grad_norm": 1.097137003205027, "learning_rate": 2.0000000000000003e-06, "loss": 0.5169, "step": 176 }, { "epoch": 0.006074124914207275, "grad_norm": 1.1700418201722897, "learning_rate": 2.0114285714285715e-06, "loss": 0.4842, "step": 177 }, { "epoch": 0.006108442004118051, "grad_norm": 0.9979207601380884, "learning_rate": 2.022857142857143e-06, "loss": 0.5194, "step": 178 }, { "epoch": 0.006142759094028827, "grad_norm": 1.0501442633302494, "learning_rate": 2.0342857142857146e-06, "loss": 0.5557, "step": 179 }, { "epoch": 0.0061770761839396015, "grad_norm": 1.2998523224586227, "learning_rate": 2.0457142857142857e-06, "loss": 0.4758, "step": 180 }, { "epoch": 0.006211393273850377, "grad_norm": 1.1097389685318504, "learning_rate": 2.0571428571428573e-06, "loss": 0.5024, "step": 181 }, { "epoch": 0.006245710363761153, "grad_norm": 1.0590323614084007, "learning_rate": 2.068571428571429e-06, "loss": 0.4838, "step": 182 }, { "epoch": 0.006280027453671929, "grad_norm": 1.0853674428242244, "learning_rate": 2.08e-06, "loss": 0.4609, "step": 183 }, { "epoch": 0.006314344543582704, "grad_norm": 0.9500681689502032, "learning_rate": 2.0914285714285716e-06, "loss": 0.4809, "step": 184 }, { "epoch": 0.00634866163349348, "grad_norm": 1.0307851462957691, "learning_rate": 2.102857142857143e-06, "loss": 0.5149, "step": 185 }, { "epoch": 0.006382978723404255, "grad_norm": 1.0730996274660451, "learning_rate": 2.1142857142857147e-06, "loss": 0.4718, "step": 186 }, { "epoch": 0.006417295813315031, "grad_norm": 1.0556304150044489, "learning_rate": 2.125714285714286e-06, "loss": 0.4991, "step": 187 }, { "epoch": 0.0064516129032258064, "grad_norm": 1.0106749783593512, "learning_rate": 2.1371428571428574e-06, "loss": 0.4657, "step": 188 }, { "epoch": 0.006485929993136582, "grad_norm": 1.0533878164613033, "learning_rate": 2.148571428571429e-06, "loss": 0.4574, "step": 189 }, { "epoch": 0.006520247083047358, "grad_norm": 1.067575180419852, "learning_rate": 2.16e-06, "loss": 0.5196, "step": 190 }, { "epoch": 0.006554564172958133, "grad_norm": 1.0394836635516709, "learning_rate": 2.1714285714285716e-06, "loss": 0.4955, "step": 191 }, { "epoch": 0.0065888812628689085, "grad_norm": 1.217412484404595, "learning_rate": 2.1828571428571428e-06, "loss": 0.4835, "step": 192 }, { "epoch": 0.006623198352779684, "grad_norm": 1.173476447756217, "learning_rate": 2.1942857142857143e-06, "loss": 0.5388, "step": 193 }, { "epoch": 0.00665751544269046, "grad_norm": 1.0473268230638002, "learning_rate": 2.205714285714286e-06, "loss": 0.4949, "step": 194 }, { "epoch": 0.006691832532601236, "grad_norm": 1.0708155804348463, "learning_rate": 2.2171428571428575e-06, "loss": 0.4979, "step": 195 }, { "epoch": 0.006726149622512011, "grad_norm": 1.0180761874935778, "learning_rate": 2.228571428571429e-06, "loss": 0.5217, "step": 196 }, { "epoch": 0.006760466712422786, "grad_norm": 1.29648925817539, "learning_rate": 2.24e-06, "loss": 0.4682, "step": 197 }, { "epoch": 0.006794783802333562, "grad_norm": 1.0099193986122361, "learning_rate": 2.2514285714285717e-06, "loss": 0.4792, "step": 198 }, { "epoch": 0.006829100892244338, "grad_norm": 1.1372985596737242, "learning_rate": 2.262857142857143e-06, "loss": 0.5281, "step": 199 }, { "epoch": 0.0068634179821551134, "grad_norm": 1.064425884593535, "learning_rate": 2.2742857142857144e-06, "loss": 0.4983, "step": 200 }, { "epoch": 0.006897735072065889, "grad_norm": 0.958819354360802, "learning_rate": 2.285714285714286e-06, "loss": 0.4512, "step": 201 }, { "epoch": 0.006932052161976664, "grad_norm": 1.1546391747687854, "learning_rate": 2.297142857142857e-06, "loss": 0.5091, "step": 202 }, { "epoch": 0.00696636925188744, "grad_norm": 1.1550367494255236, "learning_rate": 2.3085714285714287e-06, "loss": 0.513, "step": 203 }, { "epoch": 0.0070006863417982155, "grad_norm": 1.1683052196018886, "learning_rate": 2.3200000000000002e-06, "loss": 0.5191, "step": 204 }, { "epoch": 0.007035003431708991, "grad_norm": 1.1430638092564565, "learning_rate": 2.331428571428572e-06, "loss": 0.4651, "step": 205 }, { "epoch": 0.007069320521619767, "grad_norm": 1.2614332783166118, "learning_rate": 2.342857142857143e-06, "loss": 0.4797, "step": 206 }, { "epoch": 0.007103637611530542, "grad_norm": 0.9780695351199558, "learning_rate": 2.3542857142857145e-06, "loss": 0.4814, "step": 207 }, { "epoch": 0.0071379547014413175, "grad_norm": 1.164914065949844, "learning_rate": 2.365714285714286e-06, "loss": 0.5094, "step": 208 }, { "epoch": 0.007172271791352093, "grad_norm": 1.085954145145673, "learning_rate": 2.377142857142857e-06, "loss": 0.5015, "step": 209 }, { "epoch": 0.007206588881262869, "grad_norm": 1.1617948873475852, "learning_rate": 2.3885714285714288e-06, "loss": 0.4639, "step": 210 }, { "epoch": 0.007240905971173645, "grad_norm": 1.191729515255843, "learning_rate": 2.4000000000000003e-06, "loss": 0.4637, "step": 211 }, { "epoch": 0.0072752230610844204, "grad_norm": 1.0802566545916041, "learning_rate": 2.4114285714285715e-06, "loss": 0.5441, "step": 212 }, { "epoch": 0.007309540150995195, "grad_norm": 1.0645777260444993, "learning_rate": 2.422857142857143e-06, "loss": 0.4823, "step": 213 }, { "epoch": 0.007343857240905971, "grad_norm": 0.9941774901050563, "learning_rate": 2.4342857142857146e-06, "loss": 0.5164, "step": 214 }, { "epoch": 0.007378174330816747, "grad_norm": 1.1173961327902366, "learning_rate": 2.445714285714286e-06, "loss": 0.4886, "step": 215 }, { "epoch": 0.0074124914207275225, "grad_norm": 0.9531470157155164, "learning_rate": 2.4571428571428573e-06, "loss": 0.4899, "step": 216 }, { "epoch": 0.007446808510638298, "grad_norm": 1.0705715924991686, "learning_rate": 2.468571428571429e-06, "loss": 0.5592, "step": 217 }, { "epoch": 0.007481125600549073, "grad_norm": 1.0990843881036996, "learning_rate": 2.4800000000000004e-06, "loss": 0.4455, "step": 218 }, { "epoch": 0.007515442690459849, "grad_norm": 1.006132476601445, "learning_rate": 2.4914285714285715e-06, "loss": 0.5001, "step": 219 }, { "epoch": 0.0075497597803706245, "grad_norm": 0.9266282718065276, "learning_rate": 2.502857142857143e-06, "loss": 0.4628, "step": 220 }, { "epoch": 0.0075840768702814, "grad_norm": 1.0702545470105296, "learning_rate": 2.5142857142857147e-06, "loss": 0.441, "step": 221 }, { "epoch": 0.007618393960192176, "grad_norm": 1.2685083968822797, "learning_rate": 2.525714285714286e-06, "loss": 0.4815, "step": 222 }, { "epoch": 0.007652711050102951, "grad_norm": 0.9708018592120706, "learning_rate": 2.5371428571428574e-06, "loss": 0.4908, "step": 223 }, { "epoch": 0.007687028140013727, "grad_norm": 1.1232638336035432, "learning_rate": 2.5485714285714285e-06, "loss": 0.4667, "step": 224 }, { "epoch": 0.007721345229924502, "grad_norm": 1.2103741955215093, "learning_rate": 2.56e-06, "loss": 0.4801, "step": 225 }, { "epoch": 0.007755662319835278, "grad_norm": 1.0620031845557085, "learning_rate": 2.571428571428571e-06, "loss": 0.5542, "step": 226 }, { "epoch": 0.007789979409746054, "grad_norm": 1.1320907092708374, "learning_rate": 2.582857142857143e-06, "loss": 0.5128, "step": 227 }, { "epoch": 0.00782429649965683, "grad_norm": 1.005524220438851, "learning_rate": 2.5942857142857147e-06, "loss": 0.4821, "step": 228 }, { "epoch": 0.007858613589567605, "grad_norm": 0.9555642780184039, "learning_rate": 2.605714285714286e-06, "loss": 0.517, "step": 229 }, { "epoch": 0.007892930679478381, "grad_norm": 0.9530374117919782, "learning_rate": 2.6171428571428574e-06, "loss": 0.4107, "step": 230 }, { "epoch": 0.007927247769389157, "grad_norm": 0.9890006956361646, "learning_rate": 2.6285714285714286e-06, "loss": 0.4405, "step": 231 }, { "epoch": 0.00796156485929993, "grad_norm": 0.9843429867019199, "learning_rate": 2.64e-06, "loss": 0.4844, "step": 232 }, { "epoch": 0.007995881949210706, "grad_norm": 1.0440177595521225, "learning_rate": 2.6514285714285713e-06, "loss": 0.4549, "step": 233 }, { "epoch": 0.008030199039121482, "grad_norm": 1.2896754598507838, "learning_rate": 2.6628571428571433e-06, "loss": 0.4956, "step": 234 }, { "epoch": 0.008064516129032258, "grad_norm": 1.2211786591033071, "learning_rate": 2.6742857142857144e-06, "loss": 0.5606, "step": 235 }, { "epoch": 0.008098833218943034, "grad_norm": 1.0952039892705325, "learning_rate": 2.685714285714286e-06, "loss": 0.5045, "step": 236 }, { "epoch": 0.00813315030885381, "grad_norm": 1.1226241227627978, "learning_rate": 2.6971428571428575e-06, "loss": 0.4622, "step": 237 }, { "epoch": 0.008167467398764585, "grad_norm": 1.0376035701996844, "learning_rate": 2.7085714285714287e-06, "loss": 0.4344, "step": 238 }, { "epoch": 0.00820178448867536, "grad_norm": 1.1073034949196756, "learning_rate": 2.7200000000000002e-06, "loss": 0.5405, "step": 239 }, { "epoch": 0.008236101578586136, "grad_norm": 1.1008859199415255, "learning_rate": 2.7314285714285714e-06, "loss": 0.4828, "step": 240 }, { "epoch": 0.008270418668496912, "grad_norm": 0.9775443511627674, "learning_rate": 2.7428571428571433e-06, "loss": 0.4653, "step": 241 }, { "epoch": 0.008304735758407686, "grad_norm": 1.0043563207894282, "learning_rate": 2.7542857142857145e-06, "loss": 0.4764, "step": 242 }, { "epoch": 0.008339052848318462, "grad_norm": 1.1129280368882615, "learning_rate": 2.765714285714286e-06, "loss": 0.5186, "step": 243 }, { "epoch": 0.008373369938229238, "grad_norm": 1.032208767352571, "learning_rate": 2.777142857142857e-06, "loss": 0.4685, "step": 244 }, { "epoch": 0.008407687028140013, "grad_norm": 1.0843855302914618, "learning_rate": 2.7885714285714287e-06, "loss": 0.5046, "step": 245 }, { "epoch": 0.008442004118050789, "grad_norm": 1.0723710999283937, "learning_rate": 2.8000000000000003e-06, "loss": 0.5051, "step": 246 }, { "epoch": 0.008476321207961565, "grad_norm": 1.0355935978477793, "learning_rate": 2.8114285714285714e-06, "loss": 0.514, "step": 247 }, { "epoch": 0.00851063829787234, "grad_norm": 1.0457179262577527, "learning_rate": 2.8228571428571434e-06, "loss": 0.5119, "step": 248 }, { "epoch": 0.008544955387783116, "grad_norm": 0.9700625111592251, "learning_rate": 2.8342857142857146e-06, "loss": 0.503, "step": 249 }, { "epoch": 0.008579272477693892, "grad_norm": 0.9796692578934189, "learning_rate": 2.845714285714286e-06, "loss": 0.475, "step": 250 }, { "epoch": 0.008613589567604668, "grad_norm": 1.0118348595459115, "learning_rate": 2.8571428571428573e-06, "loss": 0.4918, "step": 251 }, { "epoch": 0.008647906657515443, "grad_norm": 0.934349988428768, "learning_rate": 2.868571428571429e-06, "loss": 0.4876, "step": 252 }, { "epoch": 0.008682223747426217, "grad_norm": 1.1007088020738727, "learning_rate": 2.88e-06, "loss": 0.5171, "step": 253 }, { "epoch": 0.008716540837336993, "grad_norm": 0.9931813939875397, "learning_rate": 2.8914285714285715e-06, "loss": 0.468, "step": 254 }, { "epoch": 0.008750857927247769, "grad_norm": 1.1116509119453695, "learning_rate": 2.9028571428571427e-06, "loss": 0.3822, "step": 255 }, { "epoch": 0.008785175017158545, "grad_norm": 1.166548186375568, "learning_rate": 2.9142857142857146e-06, "loss": 0.4256, "step": 256 }, { "epoch": 0.00881949210706932, "grad_norm": 1.0778811963347472, "learning_rate": 2.925714285714286e-06, "loss": 0.4535, "step": 257 }, { "epoch": 0.008853809196980096, "grad_norm": 0.9967888919541875, "learning_rate": 2.9371428571428573e-06, "loss": 0.4967, "step": 258 }, { "epoch": 0.008888126286890872, "grad_norm": 0.9881026908184362, "learning_rate": 2.948571428571429e-06, "loss": 0.4366, "step": 259 }, { "epoch": 0.008922443376801648, "grad_norm": 0.9425589275449163, "learning_rate": 2.96e-06, "loss": 0.4736, "step": 260 }, { "epoch": 0.008956760466712423, "grad_norm": 0.9562175423849187, "learning_rate": 2.9714285714285716e-06, "loss": 0.4382, "step": 261 }, { "epoch": 0.008991077556623199, "grad_norm": 1.0932372993819957, "learning_rate": 2.9828571428571427e-06, "loss": 0.49, "step": 262 }, { "epoch": 0.009025394646533975, "grad_norm": 1.0018014905390282, "learning_rate": 2.9942857142857147e-06, "loss": 0.4723, "step": 263 }, { "epoch": 0.009059711736444749, "grad_norm": 1.0179911800974701, "learning_rate": 3.005714285714286e-06, "loss": 0.463, "step": 264 }, { "epoch": 0.009094028826355524, "grad_norm": 1.044958171937767, "learning_rate": 3.0171428571428574e-06, "loss": 0.4392, "step": 265 }, { "epoch": 0.0091283459162663, "grad_norm": 1.0307345284579024, "learning_rate": 3.028571428571429e-06, "loss": 0.4371, "step": 266 }, { "epoch": 0.009162663006177076, "grad_norm": 1.1001772951126059, "learning_rate": 3.04e-06, "loss": 0.4489, "step": 267 }, { "epoch": 0.009196980096087852, "grad_norm": 1.054026131663069, "learning_rate": 3.0514285714285717e-06, "loss": 0.5037, "step": 268 }, { "epoch": 0.009231297185998627, "grad_norm": 1.1110068182520705, "learning_rate": 3.062857142857143e-06, "loss": 0.5509, "step": 269 }, { "epoch": 0.009265614275909403, "grad_norm": 1.2642227405212734, "learning_rate": 3.074285714285715e-06, "loss": 0.5024, "step": 270 }, { "epoch": 0.009299931365820179, "grad_norm": 1.07379358004195, "learning_rate": 3.085714285714286e-06, "loss": 0.4826, "step": 271 }, { "epoch": 0.009334248455730955, "grad_norm": 1.040894953469504, "learning_rate": 3.0971428571428575e-06, "loss": 0.4649, "step": 272 }, { "epoch": 0.00936856554564173, "grad_norm": 1.1008698956059928, "learning_rate": 3.1085714285714286e-06, "loss": 0.4588, "step": 273 }, { "epoch": 0.009402882635552504, "grad_norm": 1.0626559055293507, "learning_rate": 3.12e-06, "loss": 0.4314, "step": 274 }, { "epoch": 0.00943719972546328, "grad_norm": 1.089774808615115, "learning_rate": 3.1314285714285718e-06, "loss": 0.4089, "step": 275 }, { "epoch": 0.009471516815374056, "grad_norm": 0.9299797059515118, "learning_rate": 3.142857142857143e-06, "loss": 0.3915, "step": 276 }, { "epoch": 0.009505833905284831, "grad_norm": 1.162126085822206, "learning_rate": 3.154285714285715e-06, "loss": 0.5454, "step": 277 }, { "epoch": 0.009540150995195607, "grad_norm": 1.1244430107990067, "learning_rate": 3.165714285714286e-06, "loss": 0.4594, "step": 278 }, { "epoch": 0.009574468085106383, "grad_norm": 0.995916889242705, "learning_rate": 3.1771428571428576e-06, "loss": 0.5194, "step": 279 }, { "epoch": 0.009608785175017159, "grad_norm": 1.1307099329043462, "learning_rate": 3.1885714285714287e-06, "loss": 0.4889, "step": 280 }, { "epoch": 0.009643102264927934, "grad_norm": 1.0547956425308533, "learning_rate": 3.2000000000000003e-06, "loss": 0.4328, "step": 281 }, { "epoch": 0.00967741935483871, "grad_norm": 0.9821357399926197, "learning_rate": 3.2114285714285714e-06, "loss": 0.4805, "step": 282 }, { "epoch": 0.009711736444749486, "grad_norm": 1.0816649533826574, "learning_rate": 3.222857142857143e-06, "loss": 0.4624, "step": 283 }, { "epoch": 0.009746053534660262, "grad_norm": 0.8889526812640808, "learning_rate": 3.234285714285715e-06, "loss": 0.4504, "step": 284 }, { "epoch": 0.009780370624571036, "grad_norm": 1.0604283218798778, "learning_rate": 3.245714285714286e-06, "loss": 0.4698, "step": 285 }, { "epoch": 0.009814687714481811, "grad_norm": 0.9587823228147461, "learning_rate": 3.2571428571428577e-06, "loss": 0.4076, "step": 286 }, { "epoch": 0.009849004804392587, "grad_norm": 1.1061954185243859, "learning_rate": 3.268571428571429e-06, "loss": 0.6003, "step": 287 }, { "epoch": 0.009883321894303363, "grad_norm": 1.195835906219983, "learning_rate": 3.2800000000000004e-06, "loss": 0.4622, "step": 288 }, { "epoch": 0.009917638984214138, "grad_norm": 0.9392253857416102, "learning_rate": 3.2914285714285715e-06, "loss": 0.4864, "step": 289 }, { "epoch": 0.009951956074124914, "grad_norm": 0.9681668718490495, "learning_rate": 3.302857142857143e-06, "loss": 0.563, "step": 290 }, { "epoch": 0.00998627316403569, "grad_norm": 1.0080921231132722, "learning_rate": 3.314285714285714e-06, "loss": 0.4456, "step": 291 }, { "epoch": 0.010020590253946466, "grad_norm": 0.966199265021642, "learning_rate": 3.325714285714286e-06, "loss": 0.4466, "step": 292 }, { "epoch": 0.010054907343857241, "grad_norm": 1.0179160764047839, "learning_rate": 3.3371428571428577e-06, "loss": 0.4422, "step": 293 }, { "epoch": 0.010089224433768017, "grad_norm": 1.041872296633017, "learning_rate": 3.348571428571429e-06, "loss": 0.4677, "step": 294 }, { "epoch": 0.010123541523678793, "grad_norm": 0.972498094035038, "learning_rate": 3.3600000000000004e-06, "loss": 0.4468, "step": 295 }, { "epoch": 0.010157858613589567, "grad_norm": 1.0554803959706165, "learning_rate": 3.3714285714285716e-06, "loss": 0.5292, "step": 296 }, { "epoch": 0.010192175703500343, "grad_norm": 1.0509315863062032, "learning_rate": 3.382857142857143e-06, "loss": 0.4866, "step": 297 }, { "epoch": 0.010226492793411118, "grad_norm": 1.094109698582846, "learning_rate": 3.3942857142857143e-06, "loss": 0.4686, "step": 298 }, { "epoch": 0.010260809883321894, "grad_norm": 1.0191324097336216, "learning_rate": 3.4057142857142863e-06, "loss": 0.4334, "step": 299 }, { "epoch": 0.01029512697323267, "grad_norm": 1.1320803177719492, "learning_rate": 3.4171428571428574e-06, "loss": 0.4558, "step": 300 }, { "epoch": 0.010329444063143445, "grad_norm": 1.0840551401351068, "learning_rate": 3.428571428571429e-06, "loss": 0.4512, "step": 301 }, { "epoch": 0.010363761153054221, "grad_norm": 1.0008847328464818, "learning_rate": 3.44e-06, "loss": 0.4511, "step": 302 }, { "epoch": 0.010398078242964997, "grad_norm": 1.0477827731688254, "learning_rate": 3.4514285714285717e-06, "loss": 0.4638, "step": 303 }, { "epoch": 0.010432395332875773, "grad_norm": 1.047273511589782, "learning_rate": 3.4628571428571432e-06, "loss": 0.5001, "step": 304 }, { "epoch": 0.010466712422786548, "grad_norm": 1.0274395705720811, "learning_rate": 3.4742857142857144e-06, "loss": 0.5141, "step": 305 }, { "epoch": 0.010501029512697324, "grad_norm": 1.0434081167488207, "learning_rate": 3.4857142857142863e-06, "loss": 0.5032, "step": 306 }, { "epoch": 0.010535346602608098, "grad_norm": 0.9903141366349302, "learning_rate": 3.4971428571428575e-06, "loss": 0.4913, "step": 307 }, { "epoch": 0.010569663692518874, "grad_norm": 1.0736968146539343, "learning_rate": 3.508571428571429e-06, "loss": 0.4258, "step": 308 }, { "epoch": 0.01060398078242965, "grad_norm": 1.0406190443939123, "learning_rate": 3.52e-06, "loss": 0.4915, "step": 309 }, { "epoch": 0.010638297872340425, "grad_norm": 0.953779879441263, "learning_rate": 3.5314285714285717e-06, "loss": 0.4529, "step": 310 }, { "epoch": 0.010672614962251201, "grad_norm": 1.0630203444703947, "learning_rate": 3.542857142857143e-06, "loss": 0.4804, "step": 311 }, { "epoch": 0.010706932052161977, "grad_norm": 1.1300284683140736, "learning_rate": 3.5542857142857144e-06, "loss": 0.4777, "step": 312 }, { "epoch": 0.010741249142072752, "grad_norm": 0.991008993101478, "learning_rate": 3.5657142857142864e-06, "loss": 0.4649, "step": 313 }, { "epoch": 0.010775566231983528, "grad_norm": 1.1525743826429289, "learning_rate": 3.5771428571428576e-06, "loss": 0.4551, "step": 314 }, { "epoch": 0.010809883321894304, "grad_norm": 1.082333163380843, "learning_rate": 3.588571428571429e-06, "loss": 0.4451, "step": 315 }, { "epoch": 0.01084420041180508, "grad_norm": 1.0282794689682164, "learning_rate": 3.6000000000000003e-06, "loss": 0.4527, "step": 316 }, { "epoch": 0.010878517501715854, "grad_norm": 1.0560179780970775, "learning_rate": 3.611428571428572e-06, "loss": 0.4847, "step": 317 }, { "epoch": 0.01091283459162663, "grad_norm": 0.895651707607666, "learning_rate": 3.622857142857143e-06, "loss": 0.4388, "step": 318 }, { "epoch": 0.010947151681537405, "grad_norm": 0.9776003355408718, "learning_rate": 3.6342857142857145e-06, "loss": 0.479, "step": 319 }, { "epoch": 0.01098146877144818, "grad_norm": 0.9910660026305325, "learning_rate": 3.6457142857142857e-06, "loss": 0.5856, "step": 320 }, { "epoch": 0.011015785861358957, "grad_norm": 1.0664109737843384, "learning_rate": 3.6571428571428576e-06, "loss": 0.4945, "step": 321 }, { "epoch": 0.011050102951269732, "grad_norm": 0.9423982613842184, "learning_rate": 3.668571428571429e-06, "loss": 0.406, "step": 322 }, { "epoch": 0.011084420041180508, "grad_norm": 1.1101934017400612, "learning_rate": 3.6800000000000003e-06, "loss": 0.4898, "step": 323 }, { "epoch": 0.011118737131091284, "grad_norm": 1.045315174043726, "learning_rate": 3.691428571428572e-06, "loss": 0.5393, "step": 324 }, { "epoch": 0.01115305422100206, "grad_norm": 0.968701483330537, "learning_rate": 3.702857142857143e-06, "loss": 0.4897, "step": 325 }, { "epoch": 0.011187371310912835, "grad_norm": 0.9747829259358192, "learning_rate": 3.7142857142857146e-06, "loss": 0.4355, "step": 326 }, { "epoch": 0.011221688400823611, "grad_norm": 1.0051200601244898, "learning_rate": 3.7257142857142857e-06, "loss": 0.4455, "step": 327 }, { "epoch": 0.011256005490734385, "grad_norm": 1.3204292715118613, "learning_rate": 3.7371428571428577e-06, "loss": 0.4473, "step": 328 }, { "epoch": 0.01129032258064516, "grad_norm": 1.1562184707842227, "learning_rate": 3.7485714285714284e-06, "loss": 0.5289, "step": 329 }, { "epoch": 0.011324639670555936, "grad_norm": 1.013435804177912, "learning_rate": 3.7600000000000004e-06, "loss": 0.4775, "step": 330 }, { "epoch": 0.011358956760466712, "grad_norm": 1.0654448397611405, "learning_rate": 3.771428571428572e-06, "loss": 0.4254, "step": 331 }, { "epoch": 0.011393273850377488, "grad_norm": 1.1890815761140971, "learning_rate": 3.782857142857143e-06, "loss": 0.4635, "step": 332 }, { "epoch": 0.011427590940288264, "grad_norm": 1.0201595883584205, "learning_rate": 3.7942857142857147e-06, "loss": 0.4068, "step": 333 }, { "epoch": 0.01146190803019904, "grad_norm": 10.25290460076219, "learning_rate": 3.805714285714286e-06, "loss": 0.4401, "step": 334 }, { "epoch": 0.011496225120109815, "grad_norm": 0.9931154591659579, "learning_rate": 3.817142857142857e-06, "loss": 0.4822, "step": 335 }, { "epoch": 0.01153054221002059, "grad_norm": 1.1990303894467653, "learning_rate": 3.828571428571429e-06, "loss": 0.5023, "step": 336 }, { "epoch": 0.011564859299931366, "grad_norm": 1.1376445781171134, "learning_rate": 3.8400000000000005e-06, "loss": 0.5383, "step": 337 }, { "epoch": 0.011599176389842142, "grad_norm": 1.0633791408933875, "learning_rate": 3.851428571428571e-06, "loss": 0.5314, "step": 338 }, { "epoch": 0.011633493479752916, "grad_norm": 1.033720961138532, "learning_rate": 3.862857142857143e-06, "loss": 0.4874, "step": 339 }, { "epoch": 0.011667810569663692, "grad_norm": 1.0260504032783626, "learning_rate": 3.874285714285715e-06, "loss": 0.4149, "step": 340 }, { "epoch": 0.011702127659574468, "grad_norm": 0.9532417082287241, "learning_rate": 3.885714285714286e-06, "loss": 0.4918, "step": 341 }, { "epoch": 0.011736444749485243, "grad_norm": 1.113121213164326, "learning_rate": 3.8971428571428575e-06, "loss": 0.4889, "step": 342 }, { "epoch": 0.011770761839396019, "grad_norm": 1.0953591369802056, "learning_rate": 3.908571428571429e-06, "loss": 0.4416, "step": 343 }, { "epoch": 0.011805078929306795, "grad_norm": 1.0669706210903986, "learning_rate": 3.920000000000001e-06, "loss": 0.4529, "step": 344 }, { "epoch": 0.01183939601921757, "grad_norm": 1.0513411847141105, "learning_rate": 3.931428571428571e-06, "loss": 0.5001, "step": 345 }, { "epoch": 0.011873713109128346, "grad_norm": 1.0705033654827023, "learning_rate": 3.942857142857143e-06, "loss": 0.4389, "step": 346 }, { "epoch": 0.011908030199039122, "grad_norm": 1.0347549773768616, "learning_rate": 3.954285714285714e-06, "loss": 0.3631, "step": 347 }, { "epoch": 0.011942347288949898, "grad_norm": 0.9890340268694005, "learning_rate": 3.965714285714286e-06, "loss": 0.4327, "step": 348 }, { "epoch": 0.011976664378860673, "grad_norm": 1.0300489467435519, "learning_rate": 3.9771428571428575e-06, "loss": 0.4598, "step": 349 }, { "epoch": 0.012010981468771447, "grad_norm": 0.9463262110594297, "learning_rate": 3.988571428571429e-06, "loss": 0.4809, "step": 350 }, { "epoch": 0.012045298558682223, "grad_norm": 1.0692273528238099, "learning_rate": 4.000000000000001e-06, "loss": 0.4273, "step": 351 }, { "epoch": 0.012079615648592999, "grad_norm": 0.947225788571457, "learning_rate": 4.011428571428571e-06, "loss": 0.4724, "step": 352 }, { "epoch": 0.012113932738503775, "grad_norm": 1.0416126878527296, "learning_rate": 4.022857142857143e-06, "loss": 0.4599, "step": 353 }, { "epoch": 0.01214824982841455, "grad_norm": 1.2088816183226143, "learning_rate": 4.0342857142857145e-06, "loss": 0.4997, "step": 354 }, { "epoch": 0.012182566918325326, "grad_norm": 1.0230125099561842, "learning_rate": 4.045714285714286e-06, "loss": 0.4672, "step": 355 }, { "epoch": 0.012216884008236102, "grad_norm": 1.0116281593673153, "learning_rate": 4.057142857142858e-06, "loss": 0.418, "step": 356 }, { "epoch": 0.012251201098146878, "grad_norm": 1.0145869967937944, "learning_rate": 4.068571428571429e-06, "loss": 0.4649, "step": 357 }, { "epoch": 0.012285518188057653, "grad_norm": 1.0457370837234365, "learning_rate": 4.08e-06, "loss": 0.4033, "step": 358 }, { "epoch": 0.012319835277968429, "grad_norm": 1.0053234875135433, "learning_rate": 4.0914285714285715e-06, "loss": 0.4496, "step": 359 }, { "epoch": 0.012354152367879203, "grad_norm": 1.0191787382801316, "learning_rate": 4.102857142857143e-06, "loss": 0.4693, "step": 360 }, { "epoch": 0.012388469457789979, "grad_norm": 1.0433916623922928, "learning_rate": 4.114285714285715e-06, "loss": 0.4427, "step": 361 }, { "epoch": 0.012422786547700754, "grad_norm": 0.961130766544998, "learning_rate": 4.125714285714286e-06, "loss": 0.4234, "step": 362 }, { "epoch": 0.01245710363761153, "grad_norm": 0.9746459124047282, "learning_rate": 4.137142857142858e-06, "loss": 0.4338, "step": 363 }, { "epoch": 0.012491420727522306, "grad_norm": 1.0005050750349127, "learning_rate": 4.148571428571429e-06, "loss": 0.4071, "step": 364 }, { "epoch": 0.012525737817433082, "grad_norm": 1.0329375585405816, "learning_rate": 4.16e-06, "loss": 0.4375, "step": 365 }, { "epoch": 0.012560054907343857, "grad_norm": 0.9541981115732744, "learning_rate": 4.1714285714285715e-06, "loss": 0.5064, "step": 366 }, { "epoch": 0.012594371997254633, "grad_norm": 1.0712696472404364, "learning_rate": 4.182857142857143e-06, "loss": 0.4541, "step": 367 }, { "epoch": 0.012628689087165409, "grad_norm": 1.1052544189267506, "learning_rate": 4.194285714285715e-06, "loss": 0.5268, "step": 368 }, { "epoch": 0.012663006177076185, "grad_norm": 1.1302913579634983, "learning_rate": 4.205714285714286e-06, "loss": 0.4278, "step": 369 }, { "epoch": 0.01269732326698696, "grad_norm": 1.188149096959153, "learning_rate": 4.217142857142858e-06, "loss": 0.4295, "step": 370 }, { "epoch": 0.012731640356897734, "grad_norm": 1.068718894896239, "learning_rate": 4.228571428571429e-06, "loss": 0.4608, "step": 371 }, { "epoch": 0.01276595744680851, "grad_norm": 1.2192370490903184, "learning_rate": 4.24e-06, "loss": 0.4491, "step": 372 }, { "epoch": 0.012800274536719286, "grad_norm": 1.0449044556615772, "learning_rate": 4.251428571428572e-06, "loss": 0.4557, "step": 373 }, { "epoch": 0.012834591626630061, "grad_norm": 1.022360293907628, "learning_rate": 4.262857142857143e-06, "loss": 0.4599, "step": 374 }, { "epoch": 0.012868908716540837, "grad_norm": 1.1202530335281753, "learning_rate": 4.274285714285715e-06, "loss": 0.4434, "step": 375 }, { "epoch": 0.012903225806451613, "grad_norm": 1.3117712657800906, "learning_rate": 4.2857142857142855e-06, "loss": 0.4128, "step": 376 }, { "epoch": 0.012937542896362389, "grad_norm": 1.1272433763780803, "learning_rate": 4.297142857142858e-06, "loss": 0.4375, "step": 377 }, { "epoch": 0.012971859986273164, "grad_norm": 1.2180489824610397, "learning_rate": 4.3085714285714294e-06, "loss": 0.4394, "step": 378 }, { "epoch": 0.01300617707618394, "grad_norm": 1.031827968149523, "learning_rate": 4.32e-06, "loss": 0.4321, "step": 379 }, { "epoch": 0.013040494166094716, "grad_norm": 1.0222336578476061, "learning_rate": 4.331428571428572e-06, "loss": 0.3961, "step": 380 }, { "epoch": 0.013074811256005492, "grad_norm": 0.901950557188123, "learning_rate": 4.342857142857143e-06, "loss": 0.3961, "step": 381 }, { "epoch": 0.013109128345916266, "grad_norm": 1.0473525540426785, "learning_rate": 4.354285714285715e-06, "loss": 0.4439, "step": 382 }, { "epoch": 0.013143445435827041, "grad_norm": 0.9440867284240204, "learning_rate": 4.3657142857142855e-06, "loss": 0.4574, "step": 383 }, { "epoch": 0.013177762525737817, "grad_norm": 1.2518963713977476, "learning_rate": 4.377142857142858e-06, "loss": 0.4442, "step": 384 }, { "epoch": 0.013212079615648593, "grad_norm": 0.995001768768572, "learning_rate": 4.388571428571429e-06, "loss": 0.5015, "step": 385 }, { "epoch": 0.013246396705559368, "grad_norm": 1.0481433537653422, "learning_rate": 4.4e-06, "loss": 0.4391, "step": 386 }, { "epoch": 0.013280713795470144, "grad_norm": 1.0485117955606862, "learning_rate": 4.411428571428572e-06, "loss": 0.5058, "step": 387 }, { "epoch": 0.01331503088538092, "grad_norm": 1.1735077589839777, "learning_rate": 4.422857142857143e-06, "loss": 0.4094, "step": 388 }, { "epoch": 0.013349347975291696, "grad_norm": 1.1237708183240518, "learning_rate": 4.434285714285715e-06, "loss": 0.5044, "step": 389 }, { "epoch": 0.013383665065202471, "grad_norm": 1.0904133157248392, "learning_rate": 4.445714285714286e-06, "loss": 0.3964, "step": 390 }, { "epoch": 0.013417982155113247, "grad_norm": 1.1293449801923865, "learning_rate": 4.457142857142858e-06, "loss": 0.4451, "step": 391 }, { "epoch": 0.013452299245024023, "grad_norm": 1.086359629467889, "learning_rate": 4.468571428571429e-06, "loss": 0.4324, "step": 392 }, { "epoch": 0.013486616334934797, "grad_norm": 1.232903260597476, "learning_rate": 4.48e-06, "loss": 0.4179, "step": 393 }, { "epoch": 0.013520933424845573, "grad_norm": 1.0021085146955002, "learning_rate": 4.491428571428572e-06, "loss": 0.4448, "step": 394 }, { "epoch": 0.013555250514756348, "grad_norm": 1.0568739825769813, "learning_rate": 4.5028571428571434e-06, "loss": 0.4304, "step": 395 }, { "epoch": 0.013589567604667124, "grad_norm": 1.0322162520731444, "learning_rate": 4.514285714285714e-06, "loss": 0.503, "step": 396 }, { "epoch": 0.0136238846945779, "grad_norm": 1.2519918087938722, "learning_rate": 4.525714285714286e-06, "loss": 0.4766, "step": 397 }, { "epoch": 0.013658201784488675, "grad_norm": 1.0283182391795915, "learning_rate": 4.537142857142858e-06, "loss": 0.428, "step": 398 }, { "epoch": 0.013692518874399451, "grad_norm": 1.160678205408543, "learning_rate": 4.548571428571429e-06, "loss": 0.4811, "step": 399 }, { "epoch": 0.013726835964310227, "grad_norm": 1.0372915466242947, "learning_rate": 4.56e-06, "loss": 0.4628, "step": 400 }, { "epoch": 0.013761153054221003, "grad_norm": 1.009060575336637, "learning_rate": 4.571428571428572e-06, "loss": 0.4659, "step": 401 }, { "epoch": 0.013795470144131778, "grad_norm": 1.075119608593472, "learning_rate": 4.5828571428571435e-06, "loss": 0.4591, "step": 402 }, { "epoch": 0.013829787234042552, "grad_norm": 1.0906155694019142, "learning_rate": 4.594285714285714e-06, "loss": 0.4369, "step": 403 }, { "epoch": 0.013864104323953328, "grad_norm": 0.9576995207344825, "learning_rate": 4.605714285714286e-06, "loss": 0.4396, "step": 404 }, { "epoch": 0.013898421413864104, "grad_norm": 1.0292068867899413, "learning_rate": 4.617142857142857e-06, "loss": 0.4663, "step": 405 }, { "epoch": 0.01393273850377488, "grad_norm": 1.1007033715565058, "learning_rate": 4.628571428571429e-06, "loss": 0.4322, "step": 406 }, { "epoch": 0.013967055593685655, "grad_norm": 1.0961783565199443, "learning_rate": 4.6400000000000005e-06, "loss": 0.4687, "step": 407 }, { "epoch": 0.014001372683596431, "grad_norm": 0.9668790389695606, "learning_rate": 4.651428571428572e-06, "loss": 0.4252, "step": 408 }, { "epoch": 0.014035689773507207, "grad_norm": 1.0161306086340849, "learning_rate": 4.662857142857144e-06, "loss": 0.3867, "step": 409 }, { "epoch": 0.014070006863417982, "grad_norm": 1.0199690170766655, "learning_rate": 4.674285714285714e-06, "loss": 0.4342, "step": 410 }, { "epoch": 0.014104323953328758, "grad_norm": 1.0819209273183141, "learning_rate": 4.685714285714286e-06, "loss": 0.4286, "step": 411 }, { "epoch": 0.014138641043239534, "grad_norm": 1.0261695563778535, "learning_rate": 4.6971428571428574e-06, "loss": 0.4802, "step": 412 }, { "epoch": 0.01417295813315031, "grad_norm": 1.1367501375270475, "learning_rate": 4.708571428571429e-06, "loss": 0.4135, "step": 413 }, { "epoch": 0.014207275223061084, "grad_norm": 0.9848500600296701, "learning_rate": 4.7200000000000005e-06, "loss": 0.4715, "step": 414 }, { "epoch": 0.01424159231297186, "grad_norm": 1.1605189575291792, "learning_rate": 4.731428571428572e-06, "loss": 0.4603, "step": 415 }, { "epoch": 0.014275909402882635, "grad_norm": 1.1132914994401244, "learning_rate": 4.742857142857144e-06, "loss": 0.4968, "step": 416 }, { "epoch": 0.01431022649279341, "grad_norm": 1.16926302188581, "learning_rate": 4.754285714285714e-06, "loss": 0.4735, "step": 417 }, { "epoch": 0.014344543582704187, "grad_norm": 1.1077954709280657, "learning_rate": 4.765714285714286e-06, "loss": 0.445, "step": 418 }, { "epoch": 0.014378860672614962, "grad_norm": 1.0156087313671216, "learning_rate": 4.7771428571428575e-06, "loss": 0.4603, "step": 419 }, { "epoch": 0.014413177762525738, "grad_norm": 1.0525676295762163, "learning_rate": 4.788571428571429e-06, "loss": 0.4114, "step": 420 }, { "epoch": 0.014447494852436514, "grad_norm": 0.9782760385364228, "learning_rate": 4.800000000000001e-06, "loss": 0.4052, "step": 421 }, { "epoch": 0.01448181194234729, "grad_norm": 1.003656094026538, "learning_rate": 4.811428571428572e-06, "loss": 0.4127, "step": 422 }, { "epoch": 0.014516129032258065, "grad_norm": 1.0016605973452817, "learning_rate": 4.822857142857143e-06, "loss": 0.4923, "step": 423 }, { "epoch": 0.014550446122168841, "grad_norm": 1.0308184211310527, "learning_rate": 4.8342857142857145e-06, "loss": 0.4669, "step": 424 }, { "epoch": 0.014584763212079615, "grad_norm": 0.9065332467514127, "learning_rate": 4.845714285714286e-06, "loss": 0.424, "step": 425 }, { "epoch": 0.01461908030199039, "grad_norm": 1.112139076031101, "learning_rate": 4.857142857142858e-06, "loss": 0.3938, "step": 426 }, { "epoch": 0.014653397391901166, "grad_norm": 1.009627777032358, "learning_rate": 4.868571428571429e-06, "loss": 0.45, "step": 427 }, { "epoch": 0.014687714481811942, "grad_norm": 1.1364383335074073, "learning_rate": 4.880000000000001e-06, "loss": 0.4301, "step": 428 }, { "epoch": 0.014722031571722718, "grad_norm": 0.9247526483391572, "learning_rate": 4.891428571428572e-06, "loss": 0.4586, "step": 429 }, { "epoch": 0.014756348661633494, "grad_norm": 1.0044209566994657, "learning_rate": 4.902857142857143e-06, "loss": 0.4493, "step": 430 }, { "epoch": 0.01479066575154427, "grad_norm": 1.1323561811637488, "learning_rate": 4.9142857142857145e-06, "loss": 0.4366, "step": 431 }, { "epoch": 0.014824982841455045, "grad_norm": 1.0787987568216513, "learning_rate": 4.925714285714286e-06, "loss": 0.4718, "step": 432 }, { "epoch": 0.01485929993136582, "grad_norm": 1.0423773881112097, "learning_rate": 4.937142857142858e-06, "loss": 0.5014, "step": 433 }, { "epoch": 0.014893617021276596, "grad_norm": 1.0034539207464204, "learning_rate": 4.948571428571429e-06, "loss": 0.4518, "step": 434 }, { "epoch": 0.014927934111187372, "grad_norm": 1.0700420040125282, "learning_rate": 4.960000000000001e-06, "loss": 0.4572, "step": 435 }, { "epoch": 0.014962251201098146, "grad_norm": 1.253600183135946, "learning_rate": 4.971428571428572e-06, "loss": 0.4783, "step": 436 }, { "epoch": 0.014996568291008922, "grad_norm": 0.9523038545750465, "learning_rate": 4.982857142857143e-06, "loss": 0.4633, "step": 437 }, { "epoch": 0.015030885380919698, "grad_norm": 1.0101820967751332, "learning_rate": 4.994285714285715e-06, "loss": 0.463, "step": 438 }, { "epoch": 0.015065202470830473, "grad_norm": 1.0224333349395476, "learning_rate": 5.005714285714286e-06, "loss": 0.437, "step": 439 }, { "epoch": 0.015099519560741249, "grad_norm": 1.0415025445812909, "learning_rate": 5.017142857142857e-06, "loss": 0.4255, "step": 440 }, { "epoch": 0.015133836650652025, "grad_norm": 1.0550363295815222, "learning_rate": 5.028571428571429e-06, "loss": 0.3957, "step": 441 }, { "epoch": 0.0151681537405628, "grad_norm": 1.0719302593515994, "learning_rate": 5.04e-06, "loss": 0.4233, "step": 442 }, { "epoch": 0.015202470830473576, "grad_norm": 0.9846757789637185, "learning_rate": 5.051428571428572e-06, "loss": 0.4451, "step": 443 }, { "epoch": 0.015236787920384352, "grad_norm": 0.9664751654012032, "learning_rate": 5.062857142857144e-06, "loss": 0.4505, "step": 444 }, { "epoch": 0.015271105010295128, "grad_norm": 1.0592945761714236, "learning_rate": 5.074285714285715e-06, "loss": 0.4791, "step": 445 }, { "epoch": 0.015305422100205902, "grad_norm": 0.9923351024504454, "learning_rate": 5.085714285714286e-06, "loss": 0.4209, "step": 446 }, { "epoch": 0.015339739190116677, "grad_norm": 1.0385401906840528, "learning_rate": 5.097142857142857e-06, "loss": 0.4953, "step": 447 }, { "epoch": 0.015374056280027453, "grad_norm": 1.0185333772230019, "learning_rate": 5.108571428571429e-06, "loss": 0.4022, "step": 448 }, { "epoch": 0.015408373369938229, "grad_norm": 1.1204685199738367, "learning_rate": 5.12e-06, "loss": 0.4562, "step": 449 }, { "epoch": 0.015442690459849005, "grad_norm": 1.1301128733890373, "learning_rate": 5.131428571428572e-06, "loss": 0.4539, "step": 450 }, { "epoch": 0.01547700754975978, "grad_norm": 1.032885749441881, "learning_rate": 5.142857142857142e-06, "loss": 0.4341, "step": 451 }, { "epoch": 0.015511324639670556, "grad_norm": 1.090533513803887, "learning_rate": 5.154285714285715e-06, "loss": 0.5112, "step": 452 }, { "epoch": 0.015545641729581332, "grad_norm": 1.165981043757325, "learning_rate": 5.165714285714286e-06, "loss": 0.4716, "step": 453 }, { "epoch": 0.015579958819492108, "grad_norm": 1.0832541364567294, "learning_rate": 5.177142857142857e-06, "loss": 0.4471, "step": 454 }, { "epoch": 0.015614275909402883, "grad_norm": 1.0219548297370873, "learning_rate": 5.1885714285714295e-06, "loss": 0.3787, "step": 455 }, { "epoch": 0.01564859299931366, "grad_norm": 1.0808429740957728, "learning_rate": 5.2e-06, "loss": 0.4318, "step": 456 }, { "epoch": 0.015682910089224435, "grad_norm": 1.0616532161054708, "learning_rate": 5.211428571428572e-06, "loss": 0.4544, "step": 457 }, { "epoch": 0.01571722717913521, "grad_norm": 0.9910919287701385, "learning_rate": 5.2228571428571425e-06, "loss": 0.4531, "step": 458 }, { "epoch": 0.015751544269045986, "grad_norm": 1.0450350608826364, "learning_rate": 5.234285714285715e-06, "loss": 0.4059, "step": 459 }, { "epoch": 0.015785861358956762, "grad_norm": 1.0953589028568684, "learning_rate": 5.2457142857142864e-06, "loss": 0.422, "step": 460 }, { "epoch": 0.015820178448867538, "grad_norm": 1.083017215316762, "learning_rate": 5.257142857142857e-06, "loss": 0.4416, "step": 461 }, { "epoch": 0.015854495538778313, "grad_norm": 1.0922113685846722, "learning_rate": 5.268571428571429e-06, "loss": 0.449, "step": 462 }, { "epoch": 0.015888812628689086, "grad_norm": 0.9177251861201904, "learning_rate": 5.28e-06, "loss": 0.3989, "step": 463 }, { "epoch": 0.01592312971859986, "grad_norm": 0.9666106255001774, "learning_rate": 5.291428571428572e-06, "loss": 0.4106, "step": 464 }, { "epoch": 0.015957446808510637, "grad_norm": 0.9488713928369378, "learning_rate": 5.3028571428571425e-06, "loss": 0.4056, "step": 465 }, { "epoch": 0.015991763898421413, "grad_norm": 1.018746545464629, "learning_rate": 5.314285714285715e-06, "loss": 0.4448, "step": 466 }, { "epoch": 0.01602608098833219, "grad_norm": 1.0351219460895562, "learning_rate": 5.3257142857142865e-06, "loss": 0.4171, "step": 467 }, { "epoch": 0.016060398078242964, "grad_norm": 0.9658723396190945, "learning_rate": 5.337142857142857e-06, "loss": 0.4115, "step": 468 }, { "epoch": 0.01609471516815374, "grad_norm": 0.9636072870051583, "learning_rate": 5.348571428571429e-06, "loss": 0.4401, "step": 469 }, { "epoch": 0.016129032258064516, "grad_norm": 1.1300520580633777, "learning_rate": 5.36e-06, "loss": 0.4548, "step": 470 }, { "epoch": 0.01616334934797529, "grad_norm": 0.9401994658242147, "learning_rate": 5.371428571428572e-06, "loss": 0.4392, "step": 471 }, { "epoch": 0.016197666437886067, "grad_norm": 1.048795403656564, "learning_rate": 5.382857142857143e-06, "loss": 0.4286, "step": 472 }, { "epoch": 0.016231983527796843, "grad_norm": 1.0838900315674975, "learning_rate": 5.394285714285715e-06, "loss": 0.4027, "step": 473 }, { "epoch": 0.01626630061770762, "grad_norm": 1.046872478084112, "learning_rate": 5.405714285714287e-06, "loss": 0.512, "step": 474 }, { "epoch": 0.016300617707618394, "grad_norm": 0.964006999102513, "learning_rate": 5.417142857142857e-06, "loss": 0.4293, "step": 475 }, { "epoch": 0.01633493479752917, "grad_norm": 1.0647799136708636, "learning_rate": 5.428571428571429e-06, "loss": 0.4334, "step": 476 }, { "epoch": 0.016369251887439946, "grad_norm": 0.8546962297253475, "learning_rate": 5.4400000000000004e-06, "loss": 0.4202, "step": 477 }, { "epoch": 0.01640356897735072, "grad_norm": 1.0374843123829502, "learning_rate": 5.451428571428572e-06, "loss": 0.3862, "step": 478 }, { "epoch": 0.016437886067261497, "grad_norm": 0.9650041541964437, "learning_rate": 5.462857142857143e-06, "loss": 0.4047, "step": 479 }, { "epoch": 0.016472203157172273, "grad_norm": 0.9883663915013904, "learning_rate": 5.474285714285714e-06, "loss": 0.4828, "step": 480 }, { "epoch": 0.01650652024708305, "grad_norm": 1.0481744908013935, "learning_rate": 5.485714285714287e-06, "loss": 0.4189, "step": 481 }, { "epoch": 0.016540837336993824, "grad_norm": 0.9788659985391057, "learning_rate": 5.497142857142857e-06, "loss": 0.4561, "step": 482 }, { "epoch": 0.0165751544269046, "grad_norm": 0.9966301221146109, "learning_rate": 5.508571428571429e-06, "loss": 0.4174, "step": 483 }, { "epoch": 0.016609471516815372, "grad_norm": 1.030916314841653, "learning_rate": 5.5200000000000005e-06, "loss": 0.4709, "step": 484 }, { "epoch": 0.016643788606726148, "grad_norm": 1.1354571611531228, "learning_rate": 5.531428571428572e-06, "loss": 0.4752, "step": 485 }, { "epoch": 0.016678105696636924, "grad_norm": 0.9891865504072982, "learning_rate": 5.542857142857143e-06, "loss": 0.4702, "step": 486 }, { "epoch": 0.0167124227865477, "grad_norm": 1.088972363243323, "learning_rate": 5.554285714285714e-06, "loss": 0.4482, "step": 487 }, { "epoch": 0.016746739876458475, "grad_norm": 1.0040437640760813, "learning_rate": 5.565714285714287e-06, "loss": 0.4771, "step": 488 }, { "epoch": 0.01678105696636925, "grad_norm": 1.0606056032418365, "learning_rate": 5.5771428571428575e-06, "loss": 0.4317, "step": 489 }, { "epoch": 0.016815374056280027, "grad_norm": 1.0027570800807184, "learning_rate": 5.588571428571429e-06, "loss": 0.4043, "step": 490 }, { "epoch": 0.016849691146190802, "grad_norm": 0.9296306908096545, "learning_rate": 5.600000000000001e-06, "loss": 0.3998, "step": 491 }, { "epoch": 0.016884008236101578, "grad_norm": 0.9612413577411021, "learning_rate": 5.611428571428572e-06, "loss": 0.3933, "step": 492 }, { "epoch": 0.016918325326012354, "grad_norm": 0.9561474701111136, "learning_rate": 5.622857142857143e-06, "loss": 0.5083, "step": 493 }, { "epoch": 0.01695264241592313, "grad_norm": 0.9880086987877243, "learning_rate": 5.6342857142857144e-06, "loss": 0.4114, "step": 494 }, { "epoch": 0.016986959505833905, "grad_norm": 1.0500166721805448, "learning_rate": 5.645714285714287e-06, "loss": 0.4782, "step": 495 }, { "epoch": 0.01702127659574468, "grad_norm": 1.0182809190428448, "learning_rate": 5.6571428571428576e-06, "loss": 0.4571, "step": 496 }, { "epoch": 0.017055593685655457, "grad_norm": 1.0353392601373164, "learning_rate": 5.668571428571429e-06, "loss": 0.4034, "step": 497 }, { "epoch": 0.017089910775566233, "grad_norm": 1.055472864250468, "learning_rate": 5.68e-06, "loss": 0.481, "step": 498 }, { "epoch": 0.01712422786547701, "grad_norm": 1.0874305671376678, "learning_rate": 5.691428571428572e-06, "loss": 0.4501, "step": 499 }, { "epoch": 0.017158544955387784, "grad_norm": 1.1549141180198128, "learning_rate": 5.702857142857143e-06, "loss": 0.4766, "step": 500 }, { "epoch": 0.01719286204529856, "grad_norm": 1.1486813002783771, "learning_rate": 5.7142857142857145e-06, "loss": 0.4754, "step": 501 }, { "epoch": 0.017227179135209335, "grad_norm": 1.0129964059780774, "learning_rate": 5.725714285714287e-06, "loss": 0.4752, "step": 502 }, { "epoch": 0.01726149622512011, "grad_norm": 1.203328792808758, "learning_rate": 5.737142857142858e-06, "loss": 0.4355, "step": 503 }, { "epoch": 0.017295813315030887, "grad_norm": 1.12691462914881, "learning_rate": 5.748571428571429e-06, "loss": 0.4574, "step": 504 }, { "epoch": 0.017330130404941663, "grad_norm": 1.2401824942186213, "learning_rate": 5.76e-06, "loss": 0.3871, "step": 505 }, { "epoch": 0.017364447494852435, "grad_norm": 1.0083009971554373, "learning_rate": 5.771428571428572e-06, "loss": 0.3761, "step": 506 }, { "epoch": 0.01739876458476321, "grad_norm": 1.0639841748359689, "learning_rate": 5.782857142857143e-06, "loss": 0.4375, "step": 507 }, { "epoch": 0.017433081674673986, "grad_norm": 1.1244982680604962, "learning_rate": 5.794285714285715e-06, "loss": 0.408, "step": 508 }, { "epoch": 0.017467398764584762, "grad_norm": 1.101500718479179, "learning_rate": 5.805714285714285e-06, "loss": 0.4601, "step": 509 }, { "epoch": 0.017501715854495538, "grad_norm": 1.09603557859421, "learning_rate": 5.817142857142858e-06, "loss": 0.4169, "step": 510 }, { "epoch": 0.017536032944406314, "grad_norm": 0.9747158146820368, "learning_rate": 5.828571428571429e-06, "loss": 0.4526, "step": 511 }, { "epoch": 0.01757035003431709, "grad_norm": 1.0068282507087165, "learning_rate": 5.84e-06, "loss": 0.5098, "step": 512 }, { "epoch": 0.017604667124227865, "grad_norm": 1.0582926643836137, "learning_rate": 5.851428571428572e-06, "loss": 0.4126, "step": 513 }, { "epoch": 0.01763898421413864, "grad_norm": 1.0808301709220358, "learning_rate": 5.862857142857143e-06, "loss": 0.4331, "step": 514 }, { "epoch": 0.017673301304049416, "grad_norm": 1.1339555987642813, "learning_rate": 5.874285714285715e-06, "loss": 0.4581, "step": 515 }, { "epoch": 0.017707618393960192, "grad_norm": 1.0374981168629647, "learning_rate": 5.885714285714285e-06, "loss": 0.4501, "step": 516 }, { "epoch": 0.017741935483870968, "grad_norm": 0.9409900485775324, "learning_rate": 5.897142857142858e-06, "loss": 0.4776, "step": 517 }, { "epoch": 0.017776252573781744, "grad_norm": 0.9152941471316812, "learning_rate": 5.908571428571429e-06, "loss": 0.3877, "step": 518 }, { "epoch": 0.01781056966369252, "grad_norm": 1.0675197272467045, "learning_rate": 5.92e-06, "loss": 0.456, "step": 519 }, { "epoch": 0.017844886753603295, "grad_norm": 1.0231064549666677, "learning_rate": 5.9314285714285725e-06, "loss": 0.4304, "step": 520 }, { "epoch": 0.01787920384351407, "grad_norm": 1.0495102723526013, "learning_rate": 5.942857142857143e-06, "loss": 0.4438, "step": 521 }, { "epoch": 0.017913520933424847, "grad_norm": 0.9905290258709318, "learning_rate": 5.954285714285715e-06, "loss": 0.4362, "step": 522 }, { "epoch": 0.017947838023335622, "grad_norm": 1.2680823332192896, "learning_rate": 5.9657142857142855e-06, "loss": 0.4294, "step": 523 }, { "epoch": 0.017982155113246398, "grad_norm": 1.2071103618489618, "learning_rate": 5.977142857142858e-06, "loss": 0.5023, "step": 524 }, { "epoch": 0.018016472203157174, "grad_norm": 0.9968619599802343, "learning_rate": 5.9885714285714294e-06, "loss": 0.4595, "step": 525 }, { "epoch": 0.01805078929306795, "grad_norm": 1.081467954405906, "learning_rate": 6e-06, "loss": 0.4179, "step": 526 }, { "epoch": 0.018085106382978722, "grad_norm": 0.970785120949359, "learning_rate": 6.011428571428572e-06, "loss": 0.4253, "step": 527 }, { "epoch": 0.018119423472889497, "grad_norm": 0.9959620114428962, "learning_rate": 6.022857142857143e-06, "loss": 0.468, "step": 528 }, { "epoch": 0.018153740562800273, "grad_norm": 1.124468347012336, "learning_rate": 6.034285714285715e-06, "loss": 0.4318, "step": 529 }, { "epoch": 0.01818805765271105, "grad_norm": 0.9768011978666639, "learning_rate": 6.0457142857142855e-06, "loss": 0.4241, "step": 530 }, { "epoch": 0.018222374742621825, "grad_norm": 1.0049496729507044, "learning_rate": 6.057142857142858e-06, "loss": 0.4201, "step": 531 }, { "epoch": 0.0182566918325326, "grad_norm": 1.0464217835524867, "learning_rate": 6.0685714285714295e-06, "loss": 0.4643, "step": 532 }, { "epoch": 0.018291008922443376, "grad_norm": 1.0989579834302605, "learning_rate": 6.08e-06, "loss": 0.4233, "step": 533 }, { "epoch": 0.018325326012354152, "grad_norm": 1.0314908825876534, "learning_rate": 6.091428571428572e-06, "loss": 0.4314, "step": 534 }, { "epoch": 0.018359643102264928, "grad_norm": 1.0041894686336124, "learning_rate": 6.102857142857143e-06, "loss": 0.4271, "step": 535 }, { "epoch": 0.018393960192175703, "grad_norm": 0.9638085616918631, "learning_rate": 6.114285714285715e-06, "loss": 0.4878, "step": 536 }, { "epoch": 0.01842827728208648, "grad_norm": 1.0395343735163032, "learning_rate": 6.125714285714286e-06, "loss": 0.4369, "step": 537 }, { "epoch": 0.018462594371997255, "grad_norm": 0.9922709539767779, "learning_rate": 6.137142857142858e-06, "loss": 0.4306, "step": 538 }, { "epoch": 0.01849691146190803, "grad_norm": 1.0253602038975569, "learning_rate": 6.14857142857143e-06, "loss": 0.3897, "step": 539 }, { "epoch": 0.018531228551818806, "grad_norm": 1.0732982421563617, "learning_rate": 6.16e-06, "loss": 0.497, "step": 540 }, { "epoch": 0.018565545641729582, "grad_norm": 1.1779804445857676, "learning_rate": 6.171428571428572e-06, "loss": 0.4092, "step": 541 }, { "epoch": 0.018599862731640358, "grad_norm": 0.9720885383284665, "learning_rate": 6.1828571428571434e-06, "loss": 0.4118, "step": 542 }, { "epoch": 0.018634179821551133, "grad_norm": 0.9427894346093095, "learning_rate": 6.194285714285715e-06, "loss": 0.403, "step": 543 }, { "epoch": 0.01866849691146191, "grad_norm": 1.0639307603491268, "learning_rate": 6.205714285714286e-06, "loss": 0.4072, "step": 544 }, { "epoch": 0.018702814001372685, "grad_norm": 0.8371194651154403, "learning_rate": 6.217142857142857e-06, "loss": 0.3885, "step": 545 }, { "epoch": 0.01873713109128346, "grad_norm": 1.070203792044283, "learning_rate": 6.22857142857143e-06, "loss": 0.4564, "step": 546 }, { "epoch": 0.018771448181194236, "grad_norm": 0.9588827241669403, "learning_rate": 6.24e-06, "loss": 0.4231, "step": 547 }, { "epoch": 0.01880576527110501, "grad_norm": 1.1373172136825256, "learning_rate": 6.251428571428572e-06, "loss": 0.4082, "step": 548 }, { "epoch": 0.018840082361015784, "grad_norm": 1.0214123772373473, "learning_rate": 6.2628571428571435e-06, "loss": 0.4889, "step": 549 }, { "epoch": 0.01887439945092656, "grad_norm": 1.0155204490388163, "learning_rate": 6.274285714285715e-06, "loss": 0.4048, "step": 550 }, { "epoch": 0.018908716540837336, "grad_norm": 1.2128866062005654, "learning_rate": 6.285714285714286e-06, "loss": 0.4539, "step": 551 }, { "epoch": 0.01894303363074811, "grad_norm": 1.1291671769588065, "learning_rate": 6.297142857142857e-06, "loss": 0.4861, "step": 552 }, { "epoch": 0.018977350720658887, "grad_norm": 1.142279176841993, "learning_rate": 6.30857142857143e-06, "loss": 0.4477, "step": 553 }, { "epoch": 0.019011667810569663, "grad_norm": 1.0573419895019311, "learning_rate": 6.3200000000000005e-06, "loss": 0.47, "step": 554 }, { "epoch": 0.01904598490048044, "grad_norm": 0.9754773102341994, "learning_rate": 6.331428571428572e-06, "loss": 0.3857, "step": 555 }, { "epoch": 0.019080301990391214, "grad_norm": 1.0251479387302316, "learning_rate": 6.342857142857143e-06, "loss": 0.4283, "step": 556 }, { "epoch": 0.01911461908030199, "grad_norm": 1.0379834075053622, "learning_rate": 6.354285714285715e-06, "loss": 0.498, "step": 557 }, { "epoch": 0.019148936170212766, "grad_norm": 1.0218263574092294, "learning_rate": 6.365714285714286e-06, "loss": 0.3986, "step": 558 }, { "epoch": 0.01918325326012354, "grad_norm": 1.1050339825799704, "learning_rate": 6.3771428571428574e-06, "loss": 0.4606, "step": 559 }, { "epoch": 0.019217570350034317, "grad_norm": 0.989204935119856, "learning_rate": 6.38857142857143e-06, "loss": 0.4089, "step": 560 }, { "epoch": 0.019251887439945093, "grad_norm": 0.9627134407661115, "learning_rate": 6.4000000000000006e-06, "loss": 0.462, "step": 561 }, { "epoch": 0.01928620452985587, "grad_norm": 1.033675288591452, "learning_rate": 6.411428571428572e-06, "loss": 0.5179, "step": 562 }, { "epoch": 0.019320521619766644, "grad_norm": 1.0903943600702275, "learning_rate": 6.422857142857143e-06, "loss": 0.4497, "step": 563 }, { "epoch": 0.01935483870967742, "grad_norm": 1.0030559804402968, "learning_rate": 6.434285714285715e-06, "loss": 0.4725, "step": 564 }, { "epoch": 0.019389155799588196, "grad_norm": 0.968792778243471, "learning_rate": 6.445714285714286e-06, "loss": 0.4483, "step": 565 }, { "epoch": 0.01942347288949897, "grad_norm": 1.0836321037431331, "learning_rate": 6.4571428571428575e-06, "loss": 0.4502, "step": 566 }, { "epoch": 0.019457789979409747, "grad_norm": 1.0147641298821075, "learning_rate": 6.46857142857143e-06, "loss": 0.4948, "step": 567 }, { "epoch": 0.019492107069320523, "grad_norm": 0.9923068944734943, "learning_rate": 6.480000000000001e-06, "loss": 0.422, "step": 568 }, { "epoch": 0.0195264241592313, "grad_norm": 0.8942073787797337, "learning_rate": 6.491428571428572e-06, "loss": 0.3893, "step": 569 }, { "epoch": 0.01956074124914207, "grad_norm": 1.0378307150232169, "learning_rate": 6.502857142857143e-06, "loss": 0.3977, "step": 570 }, { "epoch": 0.019595058339052847, "grad_norm": 0.9816119640458011, "learning_rate": 6.514285714285715e-06, "loss": 0.415, "step": 571 }, { "epoch": 0.019629375428963623, "grad_norm": 0.9729977025056266, "learning_rate": 6.525714285714286e-06, "loss": 0.4299, "step": 572 }, { "epoch": 0.0196636925188744, "grad_norm": 1.0245867977024505, "learning_rate": 6.537142857142858e-06, "loss": 0.4591, "step": 573 }, { "epoch": 0.019698009608785174, "grad_norm": 0.9496908959278731, "learning_rate": 6.548571428571428e-06, "loss": 0.4006, "step": 574 }, { "epoch": 0.01973232669869595, "grad_norm": 0.9246620600435899, "learning_rate": 6.560000000000001e-06, "loss": 0.4549, "step": 575 }, { "epoch": 0.019766643788606725, "grad_norm": 1.1207818675240733, "learning_rate": 6.571428571428572e-06, "loss": 0.437, "step": 576 }, { "epoch": 0.0198009608785175, "grad_norm": 0.8569313848084118, "learning_rate": 6.582857142857143e-06, "loss": 0.4241, "step": 577 }, { "epoch": 0.019835277968428277, "grad_norm": 0.9607543956821845, "learning_rate": 6.594285714285715e-06, "loss": 0.4384, "step": 578 }, { "epoch": 0.019869595058339053, "grad_norm": 0.9860369736606276, "learning_rate": 6.605714285714286e-06, "loss": 0.3886, "step": 579 }, { "epoch": 0.01990391214824983, "grad_norm": 0.906261642261882, "learning_rate": 6.617142857142858e-06, "loss": 0.4123, "step": 580 }, { "epoch": 0.019938229238160604, "grad_norm": 1.2341411450099673, "learning_rate": 6.628571428571428e-06, "loss": 0.4317, "step": 581 }, { "epoch": 0.01997254632807138, "grad_norm": 0.9496995252645066, "learning_rate": 6.640000000000001e-06, "loss": 0.3931, "step": 582 }, { "epoch": 0.020006863417982156, "grad_norm": 1.0028468290520032, "learning_rate": 6.651428571428572e-06, "loss": 0.4751, "step": 583 }, { "epoch": 0.02004118050789293, "grad_norm": 0.9614131362055712, "learning_rate": 6.662857142857143e-06, "loss": 0.4248, "step": 584 }, { "epoch": 0.020075497597803707, "grad_norm": 0.9916014337614464, "learning_rate": 6.6742857142857155e-06, "loss": 0.4171, "step": 585 }, { "epoch": 0.020109814687714483, "grad_norm": 0.937690121422548, "learning_rate": 6.685714285714286e-06, "loss": 0.4214, "step": 586 }, { "epoch": 0.02014413177762526, "grad_norm": 1.0314804589114008, "learning_rate": 6.697142857142858e-06, "loss": 0.4637, "step": 587 }, { "epoch": 0.020178448867536034, "grad_norm": 1.0048561603393047, "learning_rate": 6.7085714285714285e-06, "loss": 0.4243, "step": 588 }, { "epoch": 0.02021276595744681, "grad_norm": 1.03485536091314, "learning_rate": 6.720000000000001e-06, "loss": 0.4244, "step": 589 }, { "epoch": 0.020247083047357586, "grad_norm": 0.9820200252936384, "learning_rate": 6.7314285714285724e-06, "loss": 0.4139, "step": 590 }, { "epoch": 0.020281400137268358, "grad_norm": 0.9845952036995803, "learning_rate": 6.742857142857143e-06, "loss": 0.4756, "step": 591 }, { "epoch": 0.020315717227179134, "grad_norm": 1.1396676556669698, "learning_rate": 6.754285714285715e-06, "loss": 0.4816, "step": 592 }, { "epoch": 0.02035003431708991, "grad_norm": 1.1151005410728612, "learning_rate": 6.765714285714286e-06, "loss": 0.4091, "step": 593 }, { "epoch": 0.020384351407000685, "grad_norm": 1.2075989516280734, "learning_rate": 6.777142857142858e-06, "loss": 0.449, "step": 594 }, { "epoch": 0.02041866849691146, "grad_norm": 1.0411102373148204, "learning_rate": 6.7885714285714286e-06, "loss": 0.4378, "step": 595 }, { "epoch": 0.020452985586822237, "grad_norm": 1.289694849024685, "learning_rate": 6.800000000000001e-06, "loss": 0.51, "step": 596 }, { "epoch": 0.020487302676733012, "grad_norm": 1.0849944851353144, "learning_rate": 6.8114285714285725e-06, "loss": 0.4619, "step": 597 }, { "epoch": 0.020521619766643788, "grad_norm": 1.1078553572296908, "learning_rate": 6.822857142857143e-06, "loss": 0.3784, "step": 598 }, { "epoch": 0.020555936856554564, "grad_norm": 0.9924450955688394, "learning_rate": 6.834285714285715e-06, "loss": 0.4386, "step": 599 }, { "epoch": 0.02059025394646534, "grad_norm": 0.8990307197002719, "learning_rate": 6.845714285714286e-06, "loss": 0.3745, "step": 600 }, { "epoch": 0.020624571036376115, "grad_norm": 1.0217041558973532, "learning_rate": 6.857142857142858e-06, "loss": 0.4885, "step": 601 }, { "epoch": 0.02065888812628689, "grad_norm": 0.8530860257701213, "learning_rate": 6.868571428571429e-06, "loss": 0.4296, "step": 602 }, { "epoch": 0.020693205216197667, "grad_norm": 1.110123463225771, "learning_rate": 6.88e-06, "loss": 0.4365, "step": 603 }, { "epoch": 0.020727522306108442, "grad_norm": 1.039708945553102, "learning_rate": 6.891428571428573e-06, "loss": 0.4646, "step": 604 }, { "epoch": 0.020761839396019218, "grad_norm": 1.0328951618773863, "learning_rate": 6.902857142857143e-06, "loss": 0.3907, "step": 605 }, { "epoch": 0.020796156485929994, "grad_norm": 1.0731562810936854, "learning_rate": 6.914285714285715e-06, "loss": 0.3845, "step": 606 }, { "epoch": 0.02083047357584077, "grad_norm": 0.9926481646280811, "learning_rate": 6.9257142857142864e-06, "loss": 0.3977, "step": 607 }, { "epoch": 0.020864790665751545, "grad_norm": 1.06392077390861, "learning_rate": 6.937142857142858e-06, "loss": 0.5192, "step": 608 }, { "epoch": 0.02089910775566232, "grad_norm": 0.9905372700875911, "learning_rate": 6.948571428571429e-06, "loss": 0.4066, "step": 609 }, { "epoch": 0.020933424845573097, "grad_norm": 1.0285812155382668, "learning_rate": 6.96e-06, "loss": 0.372, "step": 610 }, { "epoch": 0.020967741935483872, "grad_norm": 1.051562975626444, "learning_rate": 6.971428571428573e-06, "loss": 0.3999, "step": 611 }, { "epoch": 0.021002059025394648, "grad_norm": 0.9992488586884183, "learning_rate": 6.982857142857143e-06, "loss": 0.414, "step": 612 }, { "epoch": 0.02103637611530542, "grad_norm": 1.1723341371793992, "learning_rate": 6.994285714285715e-06, "loss": 0.4472, "step": 613 }, { "epoch": 0.021070693205216196, "grad_norm": 0.9414017334131695, "learning_rate": 7.0057142857142865e-06, "loss": 0.4264, "step": 614 }, { "epoch": 0.021105010295126972, "grad_norm": 1.0646417463924978, "learning_rate": 7.017142857142858e-06, "loss": 0.4349, "step": 615 }, { "epoch": 0.021139327385037748, "grad_norm": 0.9692449897298595, "learning_rate": 7.028571428571429e-06, "loss": 0.4603, "step": 616 }, { "epoch": 0.021173644474948523, "grad_norm": 0.9724283228255924, "learning_rate": 7.04e-06, "loss": 0.4239, "step": 617 }, { "epoch": 0.0212079615648593, "grad_norm": 1.1285987523434577, "learning_rate": 7.051428571428573e-06, "loss": 0.4493, "step": 618 }, { "epoch": 0.021242278654770075, "grad_norm": 0.9865652410675224, "learning_rate": 7.0628571428571435e-06, "loss": 0.4336, "step": 619 }, { "epoch": 0.02127659574468085, "grad_norm": 1.0145211307883197, "learning_rate": 7.074285714285715e-06, "loss": 0.4339, "step": 620 }, { "epoch": 0.021310912834591626, "grad_norm": 1.0204961659852259, "learning_rate": 7.085714285714286e-06, "loss": 0.5035, "step": 621 }, { "epoch": 0.021345229924502402, "grad_norm": 1.132436588867697, "learning_rate": 7.097142857142858e-06, "loss": 0.4442, "step": 622 }, { "epoch": 0.021379547014413178, "grad_norm": 1.0376681732258057, "learning_rate": 7.108571428571429e-06, "loss": 0.4781, "step": 623 }, { "epoch": 0.021413864104323953, "grad_norm": 1.0628973819230412, "learning_rate": 7.1200000000000004e-06, "loss": 0.4715, "step": 624 }, { "epoch": 0.02144818119423473, "grad_norm": 0.9782530322213412, "learning_rate": 7.131428571428573e-06, "loss": 0.39, "step": 625 }, { "epoch": 0.021482498284145505, "grad_norm": 0.983435584125832, "learning_rate": 7.1428571428571436e-06, "loss": 0.4698, "step": 626 }, { "epoch": 0.02151681537405628, "grad_norm": 1.3080222823131633, "learning_rate": 7.154285714285715e-06, "loss": 0.4018, "step": 627 }, { "epoch": 0.021551132463967056, "grad_norm": 0.9163153172200429, "learning_rate": 7.165714285714286e-06, "loss": 0.4331, "step": 628 }, { "epoch": 0.021585449553877832, "grad_norm": 0.9705834245891227, "learning_rate": 7.177142857142858e-06, "loss": 0.4038, "step": 629 }, { "epoch": 0.021619766643788608, "grad_norm": 0.9733228507451485, "learning_rate": 7.188571428571429e-06, "loss": 0.3423, "step": 630 }, { "epoch": 0.021654083733699384, "grad_norm": 1.0754579843432868, "learning_rate": 7.2000000000000005e-06, "loss": 0.4163, "step": 631 }, { "epoch": 0.02168840082361016, "grad_norm": 0.9340791080980282, "learning_rate": 7.211428571428573e-06, "loss": 0.4569, "step": 632 }, { "epoch": 0.021722717913520935, "grad_norm": 0.9856374485673564, "learning_rate": 7.222857142857144e-06, "loss": 0.4203, "step": 633 }, { "epoch": 0.021757035003431707, "grad_norm": 1.087907528579415, "learning_rate": 7.234285714285715e-06, "loss": 0.3904, "step": 634 }, { "epoch": 0.021791352093342483, "grad_norm": 1.0086666364962797, "learning_rate": 7.245714285714286e-06, "loss": 0.401, "step": 635 }, { "epoch": 0.02182566918325326, "grad_norm": 0.9860970196692267, "learning_rate": 7.257142857142858e-06, "loss": 0.4193, "step": 636 }, { "epoch": 0.021859986273164034, "grad_norm": 1.1741545211511102, "learning_rate": 7.268571428571429e-06, "loss": 0.4176, "step": 637 }, { "epoch": 0.02189430336307481, "grad_norm": 0.9376020815229807, "learning_rate": 7.280000000000001e-06, "loss": 0.3701, "step": 638 }, { "epoch": 0.021928620452985586, "grad_norm": 1.0859908744753053, "learning_rate": 7.291428571428571e-06, "loss": 0.4222, "step": 639 }, { "epoch": 0.02196293754289636, "grad_norm": 0.978606713425527, "learning_rate": 7.302857142857144e-06, "loss": 0.4275, "step": 640 }, { "epoch": 0.021997254632807137, "grad_norm": 1.0841212462269723, "learning_rate": 7.314285714285715e-06, "loss": 0.4419, "step": 641 }, { "epoch": 0.022031571722717913, "grad_norm": 1.0386058839522796, "learning_rate": 7.325714285714286e-06, "loss": 0.4425, "step": 642 }, { "epoch": 0.02206588881262869, "grad_norm": 0.9403806897801562, "learning_rate": 7.337142857142858e-06, "loss": 0.468, "step": 643 }, { "epoch": 0.022100205902539465, "grad_norm": 0.9776087461806158, "learning_rate": 7.348571428571429e-06, "loss": 0.4161, "step": 644 }, { "epoch": 0.02213452299245024, "grad_norm": 0.9125016303355894, "learning_rate": 7.360000000000001e-06, "loss": 0.4251, "step": 645 }, { "epoch": 0.022168840082361016, "grad_norm": 1.1718556341845339, "learning_rate": 7.371428571428571e-06, "loss": 0.4119, "step": 646 }, { "epoch": 0.02220315717227179, "grad_norm": 0.876822486334835, "learning_rate": 7.382857142857144e-06, "loss": 0.4217, "step": 647 }, { "epoch": 0.022237474262182567, "grad_norm": 1.0725051297267338, "learning_rate": 7.394285714285715e-06, "loss": 0.4083, "step": 648 }, { "epoch": 0.022271791352093343, "grad_norm": 1.0277033382874652, "learning_rate": 7.405714285714286e-06, "loss": 0.4372, "step": 649 }, { "epoch": 0.02230610844200412, "grad_norm": 1.1241969720827463, "learning_rate": 7.417142857142857e-06, "loss": 0.4704, "step": 650 }, { "epoch": 0.022340425531914895, "grad_norm": 0.8526982288988749, "learning_rate": 7.428571428571429e-06, "loss": 0.4465, "step": 651 }, { "epoch": 0.02237474262182567, "grad_norm": 0.8681355773069502, "learning_rate": 7.440000000000001e-06, "loss": 0.3935, "step": 652 }, { "epoch": 0.022409059711736446, "grad_norm": 1.0126339227411723, "learning_rate": 7.4514285714285715e-06, "loss": 0.423, "step": 653 }, { "epoch": 0.022443376801647222, "grad_norm": 0.9448105788223445, "learning_rate": 7.462857142857144e-06, "loss": 0.3872, "step": 654 }, { "epoch": 0.022477693891557998, "grad_norm": 1.036626173538064, "learning_rate": 7.4742857142857154e-06, "loss": 0.4611, "step": 655 }, { "epoch": 0.02251201098146877, "grad_norm": 1.052569303961661, "learning_rate": 7.485714285714286e-06, "loss": 0.3696, "step": 656 }, { "epoch": 0.022546328071379546, "grad_norm": 1.1022608209859417, "learning_rate": 7.497142857142857e-06, "loss": 0.4027, "step": 657 }, { "epoch": 0.02258064516129032, "grad_norm": 0.9598018178809097, "learning_rate": 7.508571428571429e-06, "loss": 0.437, "step": 658 }, { "epoch": 0.022614962251201097, "grad_norm": 0.9041302628055069, "learning_rate": 7.520000000000001e-06, "loss": 0.392, "step": 659 }, { "epoch": 0.022649279341111873, "grad_norm": 1.056981889644757, "learning_rate": 7.5314285714285716e-06, "loss": 0.4098, "step": 660 }, { "epoch": 0.02268359643102265, "grad_norm": 1.0632977676345747, "learning_rate": 7.542857142857144e-06, "loss": 0.3983, "step": 661 }, { "epoch": 0.022717913520933424, "grad_norm": 1.0071843437203405, "learning_rate": 7.5542857142857155e-06, "loss": 0.3914, "step": 662 }, { "epoch": 0.0227522306108442, "grad_norm": 1.0212424221137186, "learning_rate": 7.565714285714286e-06, "loss": 0.4836, "step": 663 }, { "epoch": 0.022786547700754976, "grad_norm": 0.9060207764043314, "learning_rate": 7.577142857142857e-06, "loss": 0.4187, "step": 664 }, { "epoch": 0.02282086479066575, "grad_norm": 1.064478209438958, "learning_rate": 7.588571428571429e-06, "loss": 0.3966, "step": 665 }, { "epoch": 0.022855181880576527, "grad_norm": 1.1934133363208497, "learning_rate": 7.600000000000001e-06, "loss": 0.4203, "step": 666 }, { "epoch": 0.022889498970487303, "grad_norm": 1.0431274362887728, "learning_rate": 7.611428571428572e-06, "loss": 0.4403, "step": 667 }, { "epoch": 0.02292381606039808, "grad_norm": 0.9931885760827107, "learning_rate": 7.622857142857143e-06, "loss": 0.3828, "step": 668 }, { "epoch": 0.022958133150308854, "grad_norm": 1.0381900650263445, "learning_rate": 7.634285714285715e-06, "loss": 0.4209, "step": 669 }, { "epoch": 0.02299245024021963, "grad_norm": 1.0298037874135588, "learning_rate": 7.645714285714286e-06, "loss": 0.4154, "step": 670 }, { "epoch": 0.023026767330130406, "grad_norm": 0.8929631682468003, "learning_rate": 7.657142857142858e-06, "loss": 0.393, "step": 671 }, { "epoch": 0.02306108442004118, "grad_norm": 1.183827571484568, "learning_rate": 7.66857142857143e-06, "loss": 0.4226, "step": 672 }, { "epoch": 0.023095401509951957, "grad_norm": 1.0063018593096023, "learning_rate": 7.680000000000001e-06, "loss": 0.3847, "step": 673 }, { "epoch": 0.023129718599862733, "grad_norm": 1.031369290793901, "learning_rate": 7.691428571428573e-06, "loss": 0.4714, "step": 674 }, { "epoch": 0.02316403568977351, "grad_norm": 0.9866102683899439, "learning_rate": 7.702857142857142e-06, "loss": 0.4063, "step": 675 }, { "epoch": 0.023198352779684284, "grad_norm": 0.9783786610694147, "learning_rate": 7.714285714285716e-06, "loss": 0.4025, "step": 676 }, { "epoch": 0.023232669869595057, "grad_norm": 0.962350077306143, "learning_rate": 7.725714285714286e-06, "loss": 0.4171, "step": 677 }, { "epoch": 0.023266986959505832, "grad_norm": 0.9534599300507764, "learning_rate": 7.737142857142857e-06, "loss": 0.386, "step": 678 }, { "epoch": 0.023301304049416608, "grad_norm": 1.0529944986361381, "learning_rate": 7.74857142857143e-06, "loss": 0.3866, "step": 679 }, { "epoch": 0.023335621139327384, "grad_norm": 0.9662197131283649, "learning_rate": 7.76e-06, "loss": 0.4409, "step": 680 }, { "epoch": 0.02336993822923816, "grad_norm": 0.9546523293541527, "learning_rate": 7.771428571428572e-06, "loss": 0.4693, "step": 681 }, { "epoch": 0.023404255319148935, "grad_norm": 1.0971994051825356, "learning_rate": 7.782857142857143e-06, "loss": 0.4303, "step": 682 }, { "epoch": 0.02343857240905971, "grad_norm": 0.9035699980322983, "learning_rate": 7.794285714285715e-06, "loss": 0.3844, "step": 683 }, { "epoch": 0.023472889498970487, "grad_norm": 0.9537746176630402, "learning_rate": 7.805714285714286e-06, "loss": 0.3867, "step": 684 }, { "epoch": 0.023507206588881262, "grad_norm": 1.0033030513992884, "learning_rate": 7.817142857142858e-06, "loss": 0.3777, "step": 685 }, { "epoch": 0.023541523678792038, "grad_norm": 0.943640327256563, "learning_rate": 7.828571428571428e-06, "loss": 0.4283, "step": 686 }, { "epoch": 0.023575840768702814, "grad_norm": 1.0885324782531638, "learning_rate": 7.840000000000001e-06, "loss": 0.4494, "step": 687 }, { "epoch": 0.02361015785861359, "grad_norm": 0.9569348937293835, "learning_rate": 7.851428571428573e-06, "loss": 0.3435, "step": 688 }, { "epoch": 0.023644474948524365, "grad_norm": 0.9515690721978425, "learning_rate": 7.862857142857143e-06, "loss": 0.4866, "step": 689 }, { "epoch": 0.02367879203843514, "grad_norm": 0.9119588259957516, "learning_rate": 7.874285714285716e-06, "loss": 0.3895, "step": 690 }, { "epoch": 0.023713109128345917, "grad_norm": 1.0327315847111362, "learning_rate": 7.885714285714286e-06, "loss": 0.4048, "step": 691 }, { "epoch": 0.023747426218256693, "grad_norm": 0.972319494105272, "learning_rate": 7.897142857142857e-06, "loss": 0.459, "step": 692 }, { "epoch": 0.023781743308167468, "grad_norm": 0.9630569069107677, "learning_rate": 7.908571428571429e-06, "loss": 0.3444, "step": 693 }, { "epoch": 0.023816060398078244, "grad_norm": 1.0891006702524693, "learning_rate": 7.92e-06, "loss": 0.4408, "step": 694 }, { "epoch": 0.02385037748798902, "grad_norm": 1.00996172341121, "learning_rate": 7.931428571428572e-06, "loss": 0.4623, "step": 695 }, { "epoch": 0.023884694577899795, "grad_norm": 0.9616374545401519, "learning_rate": 7.942857142857144e-06, "loss": 0.4157, "step": 696 }, { "epoch": 0.02391901166781057, "grad_norm": 1.0865602982868368, "learning_rate": 7.954285714285715e-06, "loss": 0.4198, "step": 697 }, { "epoch": 0.023953328757721347, "grad_norm": 1.118088520661832, "learning_rate": 7.965714285714287e-06, "loss": 0.4162, "step": 698 }, { "epoch": 0.02398764584763212, "grad_norm": 1.0092251827054937, "learning_rate": 7.977142857142858e-06, "loss": 0.411, "step": 699 }, { "epoch": 0.024021962937542895, "grad_norm": 0.9178980405977047, "learning_rate": 7.988571428571428e-06, "loss": 0.3825, "step": 700 }, { "epoch": 0.02405628002745367, "grad_norm": 0.991330992289708, "learning_rate": 8.000000000000001e-06, "loss": 0.45, "step": 701 }, { "epoch": 0.024090597117364446, "grad_norm": 0.8798648814835068, "learning_rate": 8.011428571428573e-06, "loss": 0.3678, "step": 702 }, { "epoch": 0.024124914207275222, "grad_norm": 1.016493087131559, "learning_rate": 8.022857142857143e-06, "loss": 0.4683, "step": 703 }, { "epoch": 0.024159231297185998, "grad_norm": 1.0027237644862868, "learning_rate": 8.034285714285714e-06, "loss": 0.4377, "step": 704 }, { "epoch": 0.024193548387096774, "grad_norm": 0.991584760394713, "learning_rate": 8.045714285714286e-06, "loss": 0.4183, "step": 705 }, { "epoch": 0.02422786547700755, "grad_norm": 1.0169870581133231, "learning_rate": 8.057142857142857e-06, "loss": 0.4531, "step": 706 }, { "epoch": 0.024262182566918325, "grad_norm": 0.9802732942295803, "learning_rate": 8.068571428571429e-06, "loss": 0.4296, "step": 707 }, { "epoch": 0.0242964996568291, "grad_norm": 0.9831287328662405, "learning_rate": 8.08e-06, "loss": 0.4197, "step": 708 }, { "epoch": 0.024330816746739876, "grad_norm": 0.9657587170357843, "learning_rate": 8.091428571428572e-06, "loss": 0.4635, "step": 709 }, { "epoch": 0.024365133836650652, "grad_norm": 1.0764158385291935, "learning_rate": 8.102857142857144e-06, "loss": 0.3978, "step": 710 }, { "epoch": 0.024399450926561428, "grad_norm": 1.052085110009054, "learning_rate": 8.114285714285715e-06, "loss": 0.3912, "step": 711 }, { "epoch": 0.024433768016472204, "grad_norm": 1.0252805531193887, "learning_rate": 8.125714285714287e-06, "loss": 0.4054, "step": 712 }, { "epoch": 0.02446808510638298, "grad_norm": 1.0250427887237126, "learning_rate": 8.137142857142858e-06, "loss": 0.4882, "step": 713 }, { "epoch": 0.024502402196293755, "grad_norm": 0.9915827342276244, "learning_rate": 8.148571428571428e-06, "loss": 0.4217, "step": 714 }, { "epoch": 0.02453671928620453, "grad_norm": 0.971485128340721, "learning_rate": 8.16e-06, "loss": 0.3494, "step": 715 }, { "epoch": 0.024571036376115307, "grad_norm": 1.0485093750500867, "learning_rate": 8.171428571428573e-06, "loss": 0.3991, "step": 716 }, { "epoch": 0.024605353466026082, "grad_norm": 0.9688653048581157, "learning_rate": 8.182857142857143e-06, "loss": 0.4224, "step": 717 }, { "epoch": 0.024639670555936858, "grad_norm": 0.9669840762775338, "learning_rate": 8.194285714285714e-06, "loss": 0.5105, "step": 718 }, { "epoch": 0.024673987645847634, "grad_norm": 1.0142243894845324, "learning_rate": 8.205714285714286e-06, "loss": 0.4559, "step": 719 }, { "epoch": 0.024708304735758406, "grad_norm": 1.010427315124933, "learning_rate": 8.217142857142858e-06, "loss": 0.4511, "step": 720 }, { "epoch": 0.02474262182566918, "grad_norm": 1.1288999155636574, "learning_rate": 8.22857142857143e-06, "loss": 0.4866, "step": 721 }, { "epoch": 0.024776938915579957, "grad_norm": 1.069571665334018, "learning_rate": 8.24e-06, "loss": 0.3254, "step": 722 }, { "epoch": 0.024811256005490733, "grad_norm": 0.9098665573018446, "learning_rate": 8.251428571428572e-06, "loss": 0.4161, "step": 723 }, { "epoch": 0.02484557309540151, "grad_norm": 1.0602212721133906, "learning_rate": 8.262857142857144e-06, "loss": 0.4158, "step": 724 }, { "epoch": 0.024879890185312285, "grad_norm": 0.9760667968166923, "learning_rate": 8.274285714285715e-06, "loss": 0.3939, "step": 725 }, { "epoch": 0.02491420727522306, "grad_norm": 0.90324316106548, "learning_rate": 8.285714285714287e-06, "loss": 0.4308, "step": 726 }, { "epoch": 0.024948524365133836, "grad_norm": 1.0524034642966413, "learning_rate": 8.297142857142859e-06, "loss": 0.4528, "step": 727 }, { "epoch": 0.024982841455044612, "grad_norm": 1.0385324314834359, "learning_rate": 8.308571428571428e-06, "loss": 0.4549, "step": 728 }, { "epoch": 0.025017158544955388, "grad_norm": 0.974209196967774, "learning_rate": 8.32e-06, "loss": 0.4453, "step": 729 }, { "epoch": 0.025051475634866163, "grad_norm": 1.1084174756084417, "learning_rate": 8.331428571428573e-06, "loss": 0.4067, "step": 730 }, { "epoch": 0.02508579272477694, "grad_norm": 1.0524427420224645, "learning_rate": 8.342857142857143e-06, "loss": 0.433, "step": 731 }, { "epoch": 0.025120109814687715, "grad_norm": 1.069571392191593, "learning_rate": 8.354285714285715e-06, "loss": 0.4336, "step": 732 }, { "epoch": 0.02515442690459849, "grad_norm": 0.9510320793204335, "learning_rate": 8.365714285714286e-06, "loss": 0.412, "step": 733 }, { "epoch": 0.025188743994509266, "grad_norm": 1.0305560992715446, "learning_rate": 8.377142857142858e-06, "loss": 0.4122, "step": 734 }, { "epoch": 0.025223061084420042, "grad_norm": 1.0156325639401202, "learning_rate": 8.38857142857143e-06, "loss": 0.453, "step": 735 }, { "epoch": 0.025257378174330818, "grad_norm": 1.0860849682747429, "learning_rate": 8.400000000000001e-06, "loss": 0.4498, "step": 736 }, { "epoch": 0.025291695264241593, "grad_norm": 0.9190495895141079, "learning_rate": 8.411428571428572e-06, "loss": 0.364, "step": 737 }, { "epoch": 0.02532601235415237, "grad_norm": 1.0392999257678266, "learning_rate": 8.422857142857144e-06, "loss": 0.4364, "step": 738 }, { "epoch": 0.025360329444063145, "grad_norm": 1.081132736808912, "learning_rate": 8.434285714285716e-06, "loss": 0.5177, "step": 739 }, { "epoch": 0.02539464653397392, "grad_norm": 0.9364684229449985, "learning_rate": 8.445714285714285e-06, "loss": 0.4447, "step": 740 }, { "epoch": 0.025428963623884696, "grad_norm": 0.9452385844375374, "learning_rate": 8.457142857142859e-06, "loss": 0.423, "step": 741 }, { "epoch": 0.02546328071379547, "grad_norm": 1.0434611685699997, "learning_rate": 8.468571428571429e-06, "loss": 0.4901, "step": 742 }, { "epoch": 0.025497597803706244, "grad_norm": 0.937922471600692, "learning_rate": 8.48e-06, "loss": 0.3822, "step": 743 }, { "epoch": 0.02553191489361702, "grad_norm": 1.1173102372659058, "learning_rate": 8.491428571428572e-06, "loss": 0.4487, "step": 744 }, { "epoch": 0.025566231983527796, "grad_norm": 0.9049296166662164, "learning_rate": 8.502857142857143e-06, "loss": 0.4427, "step": 745 }, { "epoch": 0.02560054907343857, "grad_norm": 1.1830598014415408, "learning_rate": 8.514285714285715e-06, "loss": 0.4053, "step": 746 }, { "epoch": 0.025634866163349347, "grad_norm": 0.9241373160599425, "learning_rate": 8.525714285714286e-06, "loss": 0.4053, "step": 747 }, { "epoch": 0.025669183253260123, "grad_norm": 0.9523813378926388, "learning_rate": 8.537142857142858e-06, "loss": 0.3688, "step": 748 }, { "epoch": 0.0257035003431709, "grad_norm": 1.0236381117610116, "learning_rate": 8.54857142857143e-06, "loss": 0.4605, "step": 749 }, { "epoch": 0.025737817433081674, "grad_norm": 0.9182486385189614, "learning_rate": 8.560000000000001e-06, "loss": 0.4364, "step": 750 }, { "epoch": 0.02577213452299245, "grad_norm": 1.032534375489128, "learning_rate": 8.571428571428571e-06, "loss": 0.3842, "step": 751 }, { "epoch": 0.025806451612903226, "grad_norm": 1.1552612823016633, "learning_rate": 8.582857142857144e-06, "loss": 0.3972, "step": 752 }, { "epoch": 0.025840768702814, "grad_norm": 0.9871686772833618, "learning_rate": 8.594285714285716e-06, "loss": 0.3874, "step": 753 }, { "epoch": 0.025875085792724777, "grad_norm": 1.0546841998585041, "learning_rate": 8.605714285714286e-06, "loss": 0.3865, "step": 754 }, { "epoch": 0.025909402882635553, "grad_norm": 1.0712555594914313, "learning_rate": 8.617142857142859e-06, "loss": 0.3941, "step": 755 }, { "epoch": 0.02594371997254633, "grad_norm": 1.0162717391902705, "learning_rate": 8.628571428571429e-06, "loss": 0.4567, "step": 756 }, { "epoch": 0.025978037062457104, "grad_norm": 0.9696007001508711, "learning_rate": 8.64e-06, "loss": 0.3839, "step": 757 }, { "epoch": 0.02601235415236788, "grad_norm": 0.9788867202044023, "learning_rate": 8.651428571428572e-06, "loss": 0.4202, "step": 758 }, { "epoch": 0.026046671242278656, "grad_norm": 1.0804343472370639, "learning_rate": 8.662857142857143e-06, "loss": 0.4703, "step": 759 }, { "epoch": 0.02608098833218943, "grad_norm": 1.1387411843746782, "learning_rate": 8.674285714285715e-06, "loss": 0.5048, "step": 760 }, { "epoch": 0.026115305422100207, "grad_norm": 0.9344663866699453, "learning_rate": 8.685714285714287e-06, "loss": 0.3976, "step": 761 }, { "epoch": 0.026149622512010983, "grad_norm": 0.8956539910778866, "learning_rate": 8.697142857142858e-06, "loss": 0.3884, "step": 762 }, { "epoch": 0.026183939601921755, "grad_norm": 0.9118691278016006, "learning_rate": 8.70857142857143e-06, "loss": 0.4297, "step": 763 }, { "epoch": 0.02621825669183253, "grad_norm": 1.1679539117125588, "learning_rate": 8.720000000000001e-06, "loss": 0.4442, "step": 764 }, { "epoch": 0.026252573781743307, "grad_norm": 1.117001490542191, "learning_rate": 8.731428571428571e-06, "loss": 0.4749, "step": 765 }, { "epoch": 0.026286890871654083, "grad_norm": 0.9980925992940124, "learning_rate": 8.742857142857144e-06, "loss": 0.4157, "step": 766 }, { "epoch": 0.026321207961564858, "grad_norm": 1.029860761355821, "learning_rate": 8.754285714285716e-06, "loss": 0.4286, "step": 767 }, { "epoch": 0.026355525051475634, "grad_norm": 0.9987143998195335, "learning_rate": 8.765714285714286e-06, "loss": 0.4066, "step": 768 }, { "epoch": 0.02638984214138641, "grad_norm": 0.920497071950643, "learning_rate": 8.777142857142857e-06, "loss": 0.4093, "step": 769 }, { "epoch": 0.026424159231297185, "grad_norm": 0.917454683130532, "learning_rate": 8.788571428571429e-06, "loss": 0.3846, "step": 770 }, { "epoch": 0.02645847632120796, "grad_norm": 0.9734076089796075, "learning_rate": 8.8e-06, "loss": 0.3839, "step": 771 }, { "epoch": 0.026492793411118737, "grad_norm": 1.1218595363037658, "learning_rate": 8.811428571428572e-06, "loss": 0.4227, "step": 772 }, { "epoch": 0.026527110501029513, "grad_norm": 1.0043699595976983, "learning_rate": 8.822857142857144e-06, "loss": 0.3783, "step": 773 }, { "epoch": 0.02656142759094029, "grad_norm": 1.199368963308293, "learning_rate": 8.834285714285715e-06, "loss": 0.4608, "step": 774 }, { "epoch": 0.026595744680851064, "grad_norm": 1.0143595847012135, "learning_rate": 8.845714285714287e-06, "loss": 0.4955, "step": 775 }, { "epoch": 0.02663006177076184, "grad_norm": 0.9591916231697963, "learning_rate": 8.857142857142858e-06, "loss": 0.4375, "step": 776 }, { "epoch": 0.026664378860672616, "grad_norm": 1.0011709996887752, "learning_rate": 8.86857142857143e-06, "loss": 0.4691, "step": 777 }, { "epoch": 0.02669869595058339, "grad_norm": 1.003557510393122, "learning_rate": 8.880000000000001e-06, "loss": 0.3667, "step": 778 }, { "epoch": 0.026733013040494167, "grad_norm": 0.8791881450488238, "learning_rate": 8.891428571428571e-06, "loss": 0.3808, "step": 779 }, { "epoch": 0.026767330130404943, "grad_norm": 1.1126349200860406, "learning_rate": 8.902857142857143e-06, "loss": 0.4248, "step": 780 }, { "epoch": 0.02680164722031572, "grad_norm": 0.946204204004292, "learning_rate": 8.914285714285716e-06, "loss": 0.4413, "step": 781 }, { "epoch": 0.026835964310226494, "grad_norm": 0.9832307678830725, "learning_rate": 8.925714285714286e-06, "loss": 0.4013, "step": 782 }, { "epoch": 0.02687028140013727, "grad_norm": 1.1204453052160899, "learning_rate": 8.937142857142857e-06, "loss": 0.4059, "step": 783 }, { "epoch": 0.026904598490048046, "grad_norm": 0.9913821815739984, "learning_rate": 8.948571428571429e-06, "loss": 0.4164, "step": 784 }, { "epoch": 0.026938915579958818, "grad_norm": 0.9449906559323775, "learning_rate": 8.96e-06, "loss": 0.4079, "step": 785 }, { "epoch": 0.026973232669869594, "grad_norm": 0.8704419117950223, "learning_rate": 8.971428571428572e-06, "loss": 0.4009, "step": 786 }, { "epoch": 0.02700754975978037, "grad_norm": 1.0486783066607832, "learning_rate": 8.982857142857144e-06, "loss": 0.4212, "step": 787 }, { "epoch": 0.027041866849691145, "grad_norm": 0.9643094763259715, "learning_rate": 8.994285714285715e-06, "loss": 0.4671, "step": 788 }, { "epoch": 0.02707618393960192, "grad_norm": 0.9299824154425878, "learning_rate": 9.005714285714287e-06, "loss": 0.4163, "step": 789 }, { "epoch": 0.027110501029512696, "grad_norm": 0.9106129258487952, "learning_rate": 9.017142857142858e-06, "loss": 0.3883, "step": 790 }, { "epoch": 0.027144818119423472, "grad_norm": 1.0982831531721775, "learning_rate": 9.028571428571428e-06, "loss": 0.3913, "step": 791 }, { "epoch": 0.027179135209334248, "grad_norm": 0.9729495849098162, "learning_rate": 9.040000000000002e-06, "loss": 0.4423, "step": 792 }, { "epoch": 0.027213452299245024, "grad_norm": 0.9517319237427312, "learning_rate": 9.051428571428571e-06, "loss": 0.38, "step": 793 }, { "epoch": 0.0272477693891558, "grad_norm": 0.8489003255982026, "learning_rate": 9.062857142857143e-06, "loss": 0.4498, "step": 794 }, { "epoch": 0.027282086479066575, "grad_norm": 0.9242302609907969, "learning_rate": 9.074285714285716e-06, "loss": 0.3946, "step": 795 }, { "epoch": 0.02731640356897735, "grad_norm": 0.9045871004867987, "learning_rate": 9.085714285714286e-06, "loss": 0.4091, "step": 796 }, { "epoch": 0.027350720658888127, "grad_norm": 0.9320900296601925, "learning_rate": 9.097142857142858e-06, "loss": 0.4095, "step": 797 }, { "epoch": 0.027385037748798902, "grad_norm": 0.9729973917848117, "learning_rate": 9.10857142857143e-06, "loss": 0.3625, "step": 798 }, { "epoch": 0.027419354838709678, "grad_norm": 1.0074334818753854, "learning_rate": 9.12e-06, "loss": 0.4026, "step": 799 }, { "epoch": 0.027453671928620454, "grad_norm": 0.8827837469195451, "learning_rate": 9.131428571428572e-06, "loss": 0.4242, "step": 800 }, { "epoch": 0.02748798901853123, "grad_norm": 1.0083450900181041, "learning_rate": 9.142857142857144e-06, "loss": 0.3808, "step": 801 }, { "epoch": 0.027522306108442005, "grad_norm": 1.074471123687467, "learning_rate": 9.154285714285715e-06, "loss": 0.4192, "step": 802 }, { "epoch": 0.02755662319835278, "grad_norm": 0.9086795466297471, "learning_rate": 9.165714285714287e-06, "loss": 0.4195, "step": 803 }, { "epoch": 0.027590940288263557, "grad_norm": 1.0087233268436515, "learning_rate": 9.177142857142859e-06, "loss": 0.434, "step": 804 }, { "epoch": 0.027625257378174332, "grad_norm": 0.9497502928000198, "learning_rate": 9.188571428571428e-06, "loss": 0.4234, "step": 805 }, { "epoch": 0.027659574468085105, "grad_norm": 1.0132239229317686, "learning_rate": 9.200000000000002e-06, "loss": 0.4155, "step": 806 }, { "epoch": 0.02769389155799588, "grad_norm": 0.9581465193812766, "learning_rate": 9.211428571428572e-06, "loss": 0.4474, "step": 807 }, { "epoch": 0.027728208647906656, "grad_norm": 0.9880803467128394, "learning_rate": 9.222857142857143e-06, "loss": 0.3869, "step": 808 }, { "epoch": 0.027762525737817432, "grad_norm": 0.9409671005021525, "learning_rate": 9.234285714285715e-06, "loss": 0.4252, "step": 809 }, { "epoch": 0.027796842827728208, "grad_norm": 1.0094209143520714, "learning_rate": 9.245714285714286e-06, "loss": 0.4082, "step": 810 }, { "epoch": 0.027831159917638983, "grad_norm": 0.9104526908337696, "learning_rate": 9.257142857142858e-06, "loss": 0.4286, "step": 811 }, { "epoch": 0.02786547700754976, "grad_norm": 0.9927137763731955, "learning_rate": 9.26857142857143e-06, "loss": 0.362, "step": 812 }, { "epoch": 0.027899794097460535, "grad_norm": 0.9163512920588022, "learning_rate": 9.280000000000001e-06, "loss": 0.3356, "step": 813 }, { "epoch": 0.02793411118737131, "grad_norm": 0.9343393199064279, "learning_rate": 9.291428571428572e-06, "loss": 0.4429, "step": 814 }, { "epoch": 0.027968428277282086, "grad_norm": 1.0453436303010712, "learning_rate": 9.302857142857144e-06, "loss": 0.4072, "step": 815 }, { "epoch": 0.028002745367192862, "grad_norm": 0.9476946470739972, "learning_rate": 9.314285714285714e-06, "loss": 0.4087, "step": 816 }, { "epoch": 0.028037062457103638, "grad_norm": 1.0320320036831212, "learning_rate": 9.325714285714287e-06, "loss": 0.4214, "step": 817 }, { "epoch": 0.028071379547014413, "grad_norm": 1.0642292804879083, "learning_rate": 9.337142857142859e-06, "loss": 0.4371, "step": 818 }, { "epoch": 0.02810569663692519, "grad_norm": 1.0493530921541723, "learning_rate": 9.348571428571429e-06, "loss": 0.4334, "step": 819 }, { "epoch": 0.028140013726835965, "grad_norm": 0.9400361609774804, "learning_rate": 9.360000000000002e-06, "loss": 0.4735, "step": 820 }, { "epoch": 0.02817433081674674, "grad_norm": 0.9880677628607899, "learning_rate": 9.371428571428572e-06, "loss": 0.4272, "step": 821 }, { "epoch": 0.028208647906657516, "grad_norm": 0.9832872182006372, "learning_rate": 9.382857142857143e-06, "loss": 0.437, "step": 822 }, { "epoch": 0.028242964996568292, "grad_norm": 1.0430258748783834, "learning_rate": 9.394285714285715e-06, "loss": 0.3803, "step": 823 }, { "epoch": 0.028277282086479068, "grad_norm": 1.0570017155983695, "learning_rate": 9.405714285714286e-06, "loss": 0.4376, "step": 824 }, { "epoch": 0.028311599176389843, "grad_norm": 0.9170070564402628, "learning_rate": 9.417142857142858e-06, "loss": 0.423, "step": 825 }, { "epoch": 0.02834591626630062, "grad_norm": 0.9864947684423276, "learning_rate": 9.42857142857143e-06, "loss": 0.4289, "step": 826 }, { "epoch": 0.028380233356211395, "grad_norm": 0.9648813170600694, "learning_rate": 9.440000000000001e-06, "loss": 0.3766, "step": 827 }, { "epoch": 0.028414550446122167, "grad_norm": 0.9066351188379116, "learning_rate": 9.451428571428573e-06, "loss": 0.4107, "step": 828 }, { "epoch": 0.028448867536032943, "grad_norm": 0.9043434394520634, "learning_rate": 9.462857142857144e-06, "loss": 0.4016, "step": 829 }, { "epoch": 0.02848318462594372, "grad_norm": 1.0890847811805293, "learning_rate": 9.474285714285714e-06, "loss": 0.4165, "step": 830 }, { "epoch": 0.028517501715854494, "grad_norm": 0.9352298059946109, "learning_rate": 9.485714285714287e-06, "loss": 0.4023, "step": 831 }, { "epoch": 0.02855181880576527, "grad_norm": 0.9287421482695551, "learning_rate": 9.497142857142859e-06, "loss": 0.4625, "step": 832 }, { "epoch": 0.028586135895676046, "grad_norm": 1.0915739942866463, "learning_rate": 9.508571428571429e-06, "loss": 0.4131, "step": 833 }, { "epoch": 0.02862045298558682, "grad_norm": 0.948333746669833, "learning_rate": 9.52e-06, "loss": 0.3606, "step": 834 }, { "epoch": 0.028654770075497597, "grad_norm": 1.0315314415803662, "learning_rate": 9.531428571428572e-06, "loss": 0.3675, "step": 835 }, { "epoch": 0.028689087165408373, "grad_norm": 0.9373624093372921, "learning_rate": 9.542857142857143e-06, "loss": 0.4082, "step": 836 }, { "epoch": 0.02872340425531915, "grad_norm": 0.9188877965932588, "learning_rate": 9.554285714285715e-06, "loss": 0.3819, "step": 837 }, { "epoch": 0.028757721345229924, "grad_norm": 1.0081771081483248, "learning_rate": 9.565714285714287e-06, "loss": 0.4023, "step": 838 }, { "epoch": 0.0287920384351407, "grad_norm": 1.003069919465592, "learning_rate": 9.577142857142858e-06, "loss": 0.3953, "step": 839 }, { "epoch": 0.028826355525051476, "grad_norm": 1.0210966083807322, "learning_rate": 9.58857142857143e-06, "loss": 0.4353, "step": 840 }, { "epoch": 0.02886067261496225, "grad_norm": 1.0394112916220126, "learning_rate": 9.600000000000001e-06, "loss": 0.4056, "step": 841 }, { "epoch": 0.028894989704873027, "grad_norm": 0.9587047174544578, "learning_rate": 9.611428571428573e-06, "loss": 0.3434, "step": 842 }, { "epoch": 0.028929306794783803, "grad_norm": 0.9643909414898025, "learning_rate": 9.622857142857144e-06, "loss": 0.4501, "step": 843 }, { "epoch": 0.02896362388469458, "grad_norm": 0.9419417702475621, "learning_rate": 9.634285714285714e-06, "loss": 0.4499, "step": 844 }, { "epoch": 0.028997940974605355, "grad_norm": 1.0515308855096375, "learning_rate": 9.645714285714286e-06, "loss": 0.3966, "step": 845 }, { "epoch": 0.02903225806451613, "grad_norm": 0.9602121239807039, "learning_rate": 9.657142857142859e-06, "loss": 0.442, "step": 846 }, { "epoch": 0.029066575154426906, "grad_norm": 0.9862854658262364, "learning_rate": 9.668571428571429e-06, "loss": 0.3962, "step": 847 }, { "epoch": 0.029100892244337682, "grad_norm": 0.9218224061742685, "learning_rate": 9.68e-06, "loss": 0.4344, "step": 848 }, { "epoch": 0.029135209334248454, "grad_norm": 0.8147544076250856, "learning_rate": 9.691428571428572e-06, "loss": 0.4206, "step": 849 }, { "epoch": 0.02916952642415923, "grad_norm": 1.0744017176434988, "learning_rate": 9.702857142857144e-06, "loss": 0.4594, "step": 850 }, { "epoch": 0.029203843514070005, "grad_norm": 0.9746204660723113, "learning_rate": 9.714285714285715e-06, "loss": 0.437, "step": 851 }, { "epoch": 0.02923816060398078, "grad_norm": 1.0135543856567608, "learning_rate": 9.725714285714287e-06, "loss": 0.4114, "step": 852 }, { "epoch": 0.029272477693891557, "grad_norm": 0.95667983732166, "learning_rate": 9.737142857142858e-06, "loss": 0.4355, "step": 853 }, { "epoch": 0.029306794783802333, "grad_norm": 0.9908856272454568, "learning_rate": 9.74857142857143e-06, "loss": 0.3667, "step": 854 }, { "epoch": 0.02934111187371311, "grad_norm": 0.9288310658045095, "learning_rate": 9.760000000000001e-06, "loss": 0.3939, "step": 855 }, { "epoch": 0.029375428963623884, "grad_norm": 0.9839359081111028, "learning_rate": 9.771428571428571e-06, "loss": 0.4421, "step": 856 }, { "epoch": 0.02940974605353466, "grad_norm": 0.9406142761376032, "learning_rate": 9.782857142857145e-06, "loss": 0.4286, "step": 857 }, { "epoch": 0.029444063143445436, "grad_norm": 1.0522849987870653, "learning_rate": 9.794285714285714e-06, "loss": 0.5225, "step": 858 }, { "epoch": 0.02947838023335621, "grad_norm": 0.9121704446067439, "learning_rate": 9.805714285714286e-06, "loss": 0.4073, "step": 859 }, { "epoch": 0.029512697323266987, "grad_norm": 0.9375664960310249, "learning_rate": 9.81714285714286e-06, "loss": 0.4306, "step": 860 }, { "epoch": 0.029547014413177763, "grad_norm": 1.0211197915434609, "learning_rate": 9.828571428571429e-06, "loss": 0.358, "step": 861 }, { "epoch": 0.02958133150308854, "grad_norm": 0.966461848503985, "learning_rate": 9.84e-06, "loss": 0.3747, "step": 862 }, { "epoch": 0.029615648592999314, "grad_norm": 0.952283361461335, "learning_rate": 9.851428571428572e-06, "loss": 0.3759, "step": 863 }, { "epoch": 0.02964996568291009, "grad_norm": 0.979164368654833, "learning_rate": 9.862857142857144e-06, "loss": 0.4222, "step": 864 }, { "epoch": 0.029684282772820866, "grad_norm": 0.9772649843833031, "learning_rate": 9.874285714285715e-06, "loss": 0.393, "step": 865 }, { "epoch": 0.02971859986273164, "grad_norm": 0.8393108035498088, "learning_rate": 9.885714285714287e-06, "loss": 0.4022, "step": 866 }, { "epoch": 0.029752916952642417, "grad_norm": 0.9812675208102174, "learning_rate": 9.897142857142858e-06, "loss": 0.3495, "step": 867 }, { "epoch": 0.029787234042553193, "grad_norm": 0.9499111059377583, "learning_rate": 9.90857142857143e-06, "loss": 0.4124, "step": 868 }, { "epoch": 0.02982155113246397, "grad_norm": 0.9841048533692925, "learning_rate": 9.920000000000002e-06, "loss": 0.3957, "step": 869 }, { "epoch": 0.029855868222374744, "grad_norm": 0.9167119367157858, "learning_rate": 9.931428571428571e-06, "loss": 0.4255, "step": 870 }, { "epoch": 0.029890185312285517, "grad_norm": 0.9068826455979354, "learning_rate": 9.942857142857145e-06, "loss": 0.3776, "step": 871 }, { "epoch": 0.029924502402196292, "grad_norm": 0.9586416549071625, "learning_rate": 9.954285714285715e-06, "loss": 0.3774, "step": 872 }, { "epoch": 0.029958819492107068, "grad_norm": 1.0579986746728418, "learning_rate": 9.965714285714286e-06, "loss": 0.4513, "step": 873 }, { "epoch": 0.029993136582017844, "grad_norm": 0.9216243002569924, "learning_rate": 9.977142857142858e-06, "loss": 0.4361, "step": 874 }, { "epoch": 0.03002745367192862, "grad_norm": 0.9599724500296243, "learning_rate": 9.98857142857143e-06, "loss": 0.3529, "step": 875 }, { "epoch": 0.030061770761839395, "grad_norm": 1.0281617461629486, "learning_rate": 1e-05, "loss": 0.4619, "step": 876 }, { "epoch": 0.03009608785175017, "grad_norm": 0.9259634615904633, "learning_rate": 9.999999969115414e-06, "loss": 0.4386, "step": 877 }, { "epoch": 0.030130404941660947, "grad_norm": 0.8282836468043968, "learning_rate": 9.99999987646166e-06, "loss": 0.3947, "step": 878 }, { "epoch": 0.030164722031571722, "grad_norm": 1.0736033620288166, "learning_rate": 9.999999722038736e-06, "loss": 0.4584, "step": 879 }, { "epoch": 0.030199039121482498, "grad_norm": 0.9748651055984962, "learning_rate": 9.999999505846644e-06, "loss": 0.4482, "step": 880 }, { "epoch": 0.030233356211393274, "grad_norm": 0.9564391433470503, "learning_rate": 9.999999227885388e-06, "loss": 0.4068, "step": 881 }, { "epoch": 0.03026767330130405, "grad_norm": 1.0466327038650958, "learning_rate": 9.99999888815497e-06, "loss": 0.4211, "step": 882 }, { "epoch": 0.030301990391214825, "grad_norm": 1.0259782426880721, "learning_rate": 9.999998486655397e-06, "loss": 0.4911, "step": 883 }, { "epoch": 0.0303363074811256, "grad_norm": 0.8575885333037483, "learning_rate": 9.999998023386671e-06, "loss": 0.4096, "step": 884 }, { "epoch": 0.030370624571036377, "grad_norm": 1.0001149055673957, "learning_rate": 9.9999974983488e-06, "loss": 0.4425, "step": 885 }, { "epoch": 0.030404941660947152, "grad_norm": 0.9984536462080184, "learning_rate": 9.999996911541787e-06, "loss": 0.4105, "step": 886 }, { "epoch": 0.030439258750857928, "grad_norm": 0.9706054190223926, "learning_rate": 9.999996262965644e-06, "loss": 0.3988, "step": 887 }, { "epoch": 0.030473575840768704, "grad_norm": 0.8621388174178957, "learning_rate": 9.999995552620374e-06, "loss": 0.3302, "step": 888 }, { "epoch": 0.03050789293067948, "grad_norm": 0.9733250912043294, "learning_rate": 9.999994780505991e-06, "loss": 0.4246, "step": 889 }, { "epoch": 0.030542210020590255, "grad_norm": 0.9413375538673745, "learning_rate": 9.9999939466225e-06, "loss": 0.4012, "step": 890 }, { "epoch": 0.03057652711050103, "grad_norm": 1.040964779796807, "learning_rate": 9.999993050969915e-06, "loss": 0.4324, "step": 891 }, { "epoch": 0.030610844200411803, "grad_norm": 0.9820611031455958, "learning_rate": 9.999992093548244e-06, "loss": 0.4341, "step": 892 }, { "epoch": 0.03064516129032258, "grad_norm": 0.9036639768327178, "learning_rate": 9.9999910743575e-06, "loss": 0.3858, "step": 893 }, { "epoch": 0.030679478380233355, "grad_norm": 0.9828937029040055, "learning_rate": 9.999989993397697e-06, "loss": 0.4268, "step": 894 }, { "epoch": 0.03071379547014413, "grad_norm": 1.0733240966676563, "learning_rate": 9.999988850668846e-06, "loss": 0.4267, "step": 895 }, { "epoch": 0.030748112560054906, "grad_norm": 0.9557271848096078, "learning_rate": 9.999987646170963e-06, "loss": 0.4015, "step": 896 }, { "epoch": 0.030782429649965682, "grad_norm": 0.9948254098364888, "learning_rate": 9.999986379904061e-06, "loss": 0.4757, "step": 897 }, { "epoch": 0.030816746739876458, "grad_norm": 1.034031512050819, "learning_rate": 9.999985051868157e-06, "loss": 0.4527, "step": 898 }, { "epoch": 0.030851063829787233, "grad_norm": 1.0293738398762209, "learning_rate": 9.999983662063267e-06, "loss": 0.4302, "step": 899 }, { "epoch": 0.03088538091969801, "grad_norm": 0.9452734942521407, "learning_rate": 9.999982210489408e-06, "loss": 0.4023, "step": 900 }, { "epoch": 0.030919698009608785, "grad_norm": 1.1228317813002582, "learning_rate": 9.999980697146599e-06, "loss": 0.477, "step": 901 }, { "epoch": 0.03095401509951956, "grad_norm": 1.0474501947904462, "learning_rate": 9.999979122034857e-06, "loss": 0.4204, "step": 902 }, { "epoch": 0.030988332189430336, "grad_norm": 0.9233546495523076, "learning_rate": 9.999977485154204e-06, "loss": 0.3972, "step": 903 }, { "epoch": 0.031022649279341112, "grad_norm": 0.9577088576548196, "learning_rate": 9.999975786504656e-06, "loss": 0.4287, "step": 904 }, { "epoch": 0.031056966369251888, "grad_norm": 0.9318934213943082, "learning_rate": 9.99997402608624e-06, "loss": 0.3635, "step": 905 }, { "epoch": 0.031091283459162664, "grad_norm": 0.8464455814157498, "learning_rate": 9.999972203898971e-06, "loss": 0.4003, "step": 906 }, { "epoch": 0.03112560054907344, "grad_norm": 1.177990495611826, "learning_rate": 9.999970319942878e-06, "loss": 0.4498, "step": 907 }, { "epoch": 0.031159917638984215, "grad_norm": 1.0666943933990858, "learning_rate": 9.999968374217979e-06, "loss": 0.4359, "step": 908 }, { "epoch": 0.03119423472889499, "grad_norm": 0.9474966235842195, "learning_rate": 9.9999663667243e-06, "loss": 0.4327, "step": 909 }, { "epoch": 0.031228551818805766, "grad_norm": 0.9900316481881953, "learning_rate": 9.999964297461866e-06, "loss": 0.3659, "step": 910 }, { "epoch": 0.03126286890871654, "grad_norm": 1.0211417391604398, "learning_rate": 9.999962166430704e-06, "loss": 0.4247, "step": 911 }, { "epoch": 0.03129718599862732, "grad_norm": 0.9155995906587023, "learning_rate": 9.999959973630837e-06, "loss": 0.4141, "step": 912 }, { "epoch": 0.03133150308853809, "grad_norm": 0.9404376396926385, "learning_rate": 9.999957719062295e-06, "loss": 0.3529, "step": 913 }, { "epoch": 0.03136582017844887, "grad_norm": 0.9670699178788782, "learning_rate": 9.999955402725104e-06, "loss": 0.4487, "step": 914 }, { "epoch": 0.03140013726835964, "grad_norm": 0.8908405452159203, "learning_rate": 9.999953024619293e-06, "loss": 0.4061, "step": 915 }, { "epoch": 0.03143445435827042, "grad_norm": 0.9679644864231871, "learning_rate": 9.999950584744893e-06, "loss": 0.4364, "step": 916 }, { "epoch": 0.03146877144818119, "grad_norm": 0.9743636755046412, "learning_rate": 9.999948083101932e-06, "loss": 0.4221, "step": 917 }, { "epoch": 0.03150308853809197, "grad_norm": 1.0081005534665426, "learning_rate": 9.999945519690443e-06, "loss": 0.4061, "step": 918 }, { "epoch": 0.031537405628002745, "grad_norm": 1.000775276788878, "learning_rate": 9.999942894510456e-06, "loss": 0.4344, "step": 919 }, { "epoch": 0.031571722717913524, "grad_norm": 0.9976651152111968, "learning_rate": 9.999940207562003e-06, "loss": 0.4031, "step": 920 }, { "epoch": 0.031606039807824296, "grad_norm": 1.005628327030142, "learning_rate": 9.999937458845119e-06, "loss": 0.4401, "step": 921 }, { "epoch": 0.031640356897735075, "grad_norm": 1.0068001437245366, "learning_rate": 9.999934648359837e-06, "loss": 0.4545, "step": 922 }, { "epoch": 0.03167467398764585, "grad_norm": 1.107842567095394, "learning_rate": 9.999931776106191e-06, "loss": 0.427, "step": 923 }, { "epoch": 0.03170899107755663, "grad_norm": 0.9054416705830978, "learning_rate": 9.999928842084219e-06, "loss": 0.4475, "step": 924 }, { "epoch": 0.0317433081674674, "grad_norm": 0.9728268804584564, "learning_rate": 9.999925846293954e-06, "loss": 0.4478, "step": 925 }, { "epoch": 0.03177762525737817, "grad_norm": 0.9929440747314742, "learning_rate": 9.999922788735434e-06, "loss": 0.4899, "step": 926 }, { "epoch": 0.03181194234728895, "grad_norm": 0.954667401176189, "learning_rate": 9.9999196694087e-06, "loss": 0.41, "step": 927 }, { "epoch": 0.03184625943719972, "grad_norm": 0.9099689224706428, "learning_rate": 9.999916488313785e-06, "loss": 0.3763, "step": 928 }, { "epoch": 0.0318805765271105, "grad_norm": 0.9430486192716787, "learning_rate": 9.999913245450732e-06, "loss": 0.4078, "step": 929 }, { "epoch": 0.031914893617021274, "grad_norm": 0.9080479427719037, "learning_rate": 9.99990994081958e-06, "loss": 0.4103, "step": 930 }, { "epoch": 0.03194921070693205, "grad_norm": 0.9675071730445736, "learning_rate": 9.99990657442037e-06, "loss": 0.4768, "step": 931 }, { "epoch": 0.031983527796842826, "grad_norm": 0.9778081796787084, "learning_rate": 9.999903146253144e-06, "loss": 0.4319, "step": 932 }, { "epoch": 0.032017844886753605, "grad_norm": 1.024633512667766, "learning_rate": 9.999899656317944e-06, "loss": 0.4334, "step": 933 }, { "epoch": 0.03205216197666438, "grad_norm": 1.0137069788464956, "learning_rate": 9.999896104614813e-06, "loss": 0.4503, "step": 934 }, { "epoch": 0.032086479066575156, "grad_norm": 1.1395886018855914, "learning_rate": 9.999892491143795e-06, "loss": 0.423, "step": 935 }, { "epoch": 0.03212079615648593, "grad_norm": 0.9832056671387422, "learning_rate": 9.999888815904935e-06, "loss": 0.4225, "step": 936 }, { "epoch": 0.03215511324639671, "grad_norm": 1.0533932985993193, "learning_rate": 9.999885078898278e-06, "loss": 0.4121, "step": 937 }, { "epoch": 0.03218943033630748, "grad_norm": 0.966160565923273, "learning_rate": 9.999881280123869e-06, "loss": 0.3847, "step": 938 }, { "epoch": 0.03222374742621826, "grad_norm": 0.8347755587936785, "learning_rate": 9.999877419581756e-06, "loss": 0.362, "step": 939 }, { "epoch": 0.03225806451612903, "grad_norm": 1.0130330987225082, "learning_rate": 9.999873497271986e-06, "loss": 0.3939, "step": 940 }, { "epoch": 0.03229238160603981, "grad_norm": 0.90303703948737, "learning_rate": 9.99986951319461e-06, "loss": 0.3926, "step": 941 }, { "epoch": 0.03232669869595058, "grad_norm": 1.134838104679573, "learning_rate": 9.999865467349676e-06, "loss": 0.4273, "step": 942 }, { "epoch": 0.03236101578586136, "grad_norm": 0.9365153075428199, "learning_rate": 9.999861359737233e-06, "loss": 0.4141, "step": 943 }, { "epoch": 0.032395332875772134, "grad_norm": 0.9339507020407388, "learning_rate": 9.999857190357332e-06, "loss": 0.4489, "step": 944 }, { "epoch": 0.032429649965682913, "grad_norm": 0.9577281509058081, "learning_rate": 9.999852959210024e-06, "loss": 0.3971, "step": 945 }, { "epoch": 0.032463967055593686, "grad_norm": 1.1087905882530888, "learning_rate": 9.999848666295363e-06, "loss": 0.435, "step": 946 }, { "epoch": 0.03249828414550446, "grad_norm": 0.9916302767295122, "learning_rate": 9.9998443116134e-06, "loss": 0.4044, "step": 947 }, { "epoch": 0.03253260123541524, "grad_norm": 0.9843252155373721, "learning_rate": 9.999839895164189e-06, "loss": 0.4768, "step": 948 }, { "epoch": 0.03256691832532601, "grad_norm": 0.9441697712386097, "learning_rate": 9.999835416947787e-06, "loss": 0.4089, "step": 949 }, { "epoch": 0.03260123541523679, "grad_norm": 1.0111220250713504, "learning_rate": 9.999830876964246e-06, "loss": 0.4749, "step": 950 }, { "epoch": 0.03263555250514756, "grad_norm": 1.0041976527603051, "learning_rate": 9.999826275213625e-06, "loss": 0.4119, "step": 951 }, { "epoch": 0.03266986959505834, "grad_norm": 0.9904224326650636, "learning_rate": 9.99982161169598e-06, "loss": 0.3726, "step": 952 }, { "epoch": 0.03270418668496911, "grad_norm": 1.0162280812950344, "learning_rate": 9.999816886411367e-06, "loss": 0.4649, "step": 953 }, { "epoch": 0.03273850377487989, "grad_norm": 0.9495342437030087, "learning_rate": 9.999812099359846e-06, "loss": 0.4621, "step": 954 }, { "epoch": 0.032772820864790664, "grad_norm": 0.9601926662835517, "learning_rate": 9.999807250541476e-06, "loss": 0.4342, "step": 955 }, { "epoch": 0.03280713795470144, "grad_norm": 0.9540568250543212, "learning_rate": 9.999802339956317e-06, "loss": 0.4265, "step": 956 }, { "epoch": 0.032841455044612215, "grad_norm": 0.961675831689777, "learning_rate": 9.999797367604429e-06, "loss": 0.4659, "step": 957 }, { "epoch": 0.032875772134522994, "grad_norm": 0.8969941942386587, "learning_rate": 9.999792333485874e-06, "loss": 0.409, "step": 958 }, { "epoch": 0.03291008922443377, "grad_norm": 1.0030055773648714, "learning_rate": 9.999787237600714e-06, "loss": 0.4366, "step": 959 }, { "epoch": 0.032944406314344546, "grad_norm": 1.0866890238919014, "learning_rate": 9.99978207994901e-06, "loss": 0.4727, "step": 960 }, { "epoch": 0.03297872340425532, "grad_norm": 0.8847319042618422, "learning_rate": 9.99977686053083e-06, "loss": 0.3969, "step": 961 }, { "epoch": 0.0330130404941661, "grad_norm": 1.076743517181622, "learning_rate": 9.999771579346236e-06, "loss": 0.3856, "step": 962 }, { "epoch": 0.03304735758407687, "grad_norm": 0.8951490798310531, "learning_rate": 9.999766236395292e-06, "loss": 0.3411, "step": 963 }, { "epoch": 0.03308167467398765, "grad_norm": 0.9216143626388688, "learning_rate": 9.999760831678065e-06, "loss": 0.454, "step": 964 }, { "epoch": 0.03311599176389842, "grad_norm": 0.8384204679794967, "learning_rate": 9.999755365194624e-06, "loss": 0.4513, "step": 965 }, { "epoch": 0.0331503088538092, "grad_norm": 0.9014465300132707, "learning_rate": 9.999749836945031e-06, "loss": 0.4038, "step": 966 }, { "epoch": 0.03318462594371997, "grad_norm": 0.9710909100134572, "learning_rate": 9.999744246929361e-06, "loss": 0.4412, "step": 967 }, { "epoch": 0.033218943033630745, "grad_norm": 0.9329099153820343, "learning_rate": 9.999738595147679e-06, "loss": 0.3938, "step": 968 }, { "epoch": 0.033253260123541524, "grad_norm": 1.0269159362298304, "learning_rate": 9.999732881600058e-06, "loss": 0.4142, "step": 969 }, { "epoch": 0.033287577213452296, "grad_norm": 0.9561510219773626, "learning_rate": 9.999727106286562e-06, "loss": 0.4289, "step": 970 }, { "epoch": 0.033321894303363075, "grad_norm": 0.9477030058218273, "learning_rate": 9.99972126920727e-06, "loss": 0.4483, "step": 971 }, { "epoch": 0.03335621139327385, "grad_norm": 0.9504079111381349, "learning_rate": 9.99971537036225e-06, "loss": 0.4919, "step": 972 }, { "epoch": 0.03339052848318463, "grad_norm": 1.0539658699365477, "learning_rate": 9.999709409751576e-06, "loss": 0.4393, "step": 973 }, { "epoch": 0.0334248455730954, "grad_norm": 1.001505073743713, "learning_rate": 9.999703387375321e-06, "loss": 0.4266, "step": 974 }, { "epoch": 0.03345916266300618, "grad_norm": 1.0172242409071783, "learning_rate": 9.99969730323356e-06, "loss": 0.4099, "step": 975 }, { "epoch": 0.03349347975291695, "grad_norm": 1.2028688819294957, "learning_rate": 9.999691157326367e-06, "loss": 0.5023, "step": 976 }, { "epoch": 0.03352779684282773, "grad_norm": 0.918188946038447, "learning_rate": 9.99968494965382e-06, "loss": 0.3812, "step": 977 }, { "epoch": 0.0335621139327385, "grad_norm": 1.09621155409713, "learning_rate": 9.999678680215993e-06, "loss": 0.4644, "step": 978 }, { "epoch": 0.03359643102264928, "grad_norm": 1.0028276879749622, "learning_rate": 9.999672349012967e-06, "loss": 0.4052, "step": 979 }, { "epoch": 0.033630748112560054, "grad_norm": 0.9786512893007951, "learning_rate": 9.999665956044816e-06, "loss": 0.4751, "step": 980 }, { "epoch": 0.03366506520247083, "grad_norm": 0.9405106644949832, "learning_rate": 9.999659501311623e-06, "loss": 0.3787, "step": 981 }, { "epoch": 0.033699382292381605, "grad_norm": 1.023386305615513, "learning_rate": 9.999652984813465e-06, "loss": 0.4077, "step": 982 }, { "epoch": 0.033733699382292384, "grad_norm": 0.9406953606553669, "learning_rate": 9.999646406550423e-06, "loss": 0.4217, "step": 983 }, { "epoch": 0.033768016472203156, "grad_norm": 0.9106557012031932, "learning_rate": 9.999639766522579e-06, "loss": 0.4272, "step": 984 }, { "epoch": 0.033802333562113936, "grad_norm": 0.9923724238144812, "learning_rate": 9.999633064730016e-06, "loss": 0.4748, "step": 985 }, { "epoch": 0.03383665065202471, "grad_norm": 0.96992376651296, "learning_rate": 9.999626301172813e-06, "loss": 0.3937, "step": 986 }, { "epoch": 0.03387096774193549, "grad_norm": 0.8599090778417239, "learning_rate": 9.999619475851058e-06, "loss": 0.3945, "step": 987 }, { "epoch": 0.03390528483184626, "grad_norm": 1.010924112331213, "learning_rate": 9.999612588764834e-06, "loss": 0.4295, "step": 988 }, { "epoch": 0.03393960192175703, "grad_norm": 0.9534615909970277, "learning_rate": 9.999605639914222e-06, "loss": 0.4531, "step": 989 }, { "epoch": 0.03397391901166781, "grad_norm": 0.9206482071835843, "learning_rate": 9.999598629299316e-06, "loss": 0.4155, "step": 990 }, { "epoch": 0.03400823610157858, "grad_norm": 0.8676539752360872, "learning_rate": 9.999591556920194e-06, "loss": 0.4171, "step": 991 }, { "epoch": 0.03404255319148936, "grad_norm": 0.8811226863623598, "learning_rate": 9.999584422776949e-06, "loss": 0.3581, "step": 992 }, { "epoch": 0.034076870281400135, "grad_norm": 1.096483483506919, "learning_rate": 9.999577226869667e-06, "loss": 0.4445, "step": 993 }, { "epoch": 0.034111187371310914, "grad_norm": 0.9937256360590166, "learning_rate": 9.999569969198438e-06, "loss": 0.4312, "step": 994 }, { "epoch": 0.034145504461221686, "grad_norm": 0.9660920956496266, "learning_rate": 9.99956264976335e-06, "loss": 0.4317, "step": 995 }, { "epoch": 0.034179821551132465, "grad_norm": 0.8457738238945489, "learning_rate": 9.999555268564497e-06, "loss": 0.3919, "step": 996 }, { "epoch": 0.03421413864104324, "grad_norm": 0.816526629535674, "learning_rate": 9.999547825601963e-06, "loss": 0.3931, "step": 997 }, { "epoch": 0.03424845573095402, "grad_norm": 0.824148608961745, "learning_rate": 9.999540320875848e-06, "loss": 0.4218, "step": 998 }, { "epoch": 0.03428277282086479, "grad_norm": 0.9044695346556066, "learning_rate": 9.999532754386239e-06, "loss": 0.4012, "step": 999 }, { "epoch": 0.03431708991077557, "grad_norm": 0.8505478296208654, "learning_rate": 9.999525126133234e-06, "loss": 0.3543, "step": 1000 }, { "epoch": 0.03435140700068634, "grad_norm": 1.17309356957566, "learning_rate": 9.999517436116923e-06, "loss": 0.401, "step": 1001 }, { "epoch": 0.03438572409059712, "grad_norm": 1.0286774528119509, "learning_rate": 9.999509684337404e-06, "loss": 0.4457, "step": 1002 }, { "epoch": 0.03442004118050789, "grad_norm": 0.9145754587250627, "learning_rate": 9.999501870794772e-06, "loss": 0.3655, "step": 1003 }, { "epoch": 0.03445435827041867, "grad_norm": 1.0969689643179161, "learning_rate": 9.999493995489123e-06, "loss": 0.5358, "step": 1004 }, { "epoch": 0.03448867536032944, "grad_norm": 0.9519816802335295, "learning_rate": 9.999486058420553e-06, "loss": 0.3809, "step": 1005 }, { "epoch": 0.03452299245024022, "grad_norm": 0.9683483297576355, "learning_rate": 9.999478059589162e-06, "loss": 0.4168, "step": 1006 }, { "epoch": 0.034557309540150995, "grad_norm": 1.0534833062611935, "learning_rate": 9.99946999899505e-06, "loss": 0.4899, "step": 1007 }, { "epoch": 0.034591626630061774, "grad_norm": 1.0596833455068597, "learning_rate": 9.999461876638314e-06, "loss": 0.4146, "step": 1008 }, { "epoch": 0.034625943719972546, "grad_norm": 0.9272308730931305, "learning_rate": 9.999453692519057e-06, "loss": 0.3876, "step": 1009 }, { "epoch": 0.034660260809883325, "grad_norm": 0.988309014846172, "learning_rate": 9.999445446637376e-06, "loss": 0.3871, "step": 1010 }, { "epoch": 0.0346945778997941, "grad_norm": 0.8923285392567302, "learning_rate": 9.999437138993376e-06, "loss": 0.3484, "step": 1011 }, { "epoch": 0.03472889498970487, "grad_norm": 0.8538256079000122, "learning_rate": 9.999428769587159e-06, "loss": 0.3861, "step": 1012 }, { "epoch": 0.03476321207961565, "grad_norm": 0.9669514280764253, "learning_rate": 9.999420338418828e-06, "loss": 0.417, "step": 1013 }, { "epoch": 0.03479752916952642, "grad_norm": 0.9608158690907882, "learning_rate": 9.999411845488489e-06, "loss": 0.4678, "step": 1014 }, { "epoch": 0.0348318462594372, "grad_norm": 1.3161787723801923, "learning_rate": 9.999403290796244e-06, "loss": 0.3857, "step": 1015 }, { "epoch": 0.03486616334934797, "grad_norm": 0.9122290020429557, "learning_rate": 9.999394674342199e-06, "loss": 0.3969, "step": 1016 }, { "epoch": 0.03490048043925875, "grad_norm": 0.9930448450721114, "learning_rate": 9.999385996126465e-06, "loss": 0.4054, "step": 1017 }, { "epoch": 0.034934797529169524, "grad_norm": 0.9764263968694915, "learning_rate": 9.999377256149143e-06, "loss": 0.4421, "step": 1018 }, { "epoch": 0.0349691146190803, "grad_norm": 0.9594599659981147, "learning_rate": 9.999368454410346e-06, "loss": 0.47, "step": 1019 }, { "epoch": 0.035003431708991076, "grad_norm": 0.9845681497646563, "learning_rate": 9.999359590910178e-06, "loss": 0.4532, "step": 1020 }, { "epoch": 0.035037748798901855, "grad_norm": 0.9894137380592815, "learning_rate": 9.999350665648752e-06, "loss": 0.3838, "step": 1021 }, { "epoch": 0.03507206588881263, "grad_norm": 1.0667302164333459, "learning_rate": 9.999341678626177e-06, "loss": 0.4444, "step": 1022 }, { "epoch": 0.035106382978723406, "grad_norm": 0.8415607328426545, "learning_rate": 9.999332629842565e-06, "loss": 0.389, "step": 1023 }, { "epoch": 0.03514070006863418, "grad_norm": 0.9385533809164973, "learning_rate": 9.999323519298025e-06, "loss": 0.3657, "step": 1024 }, { "epoch": 0.03517501715854496, "grad_norm": 0.9394050989014119, "learning_rate": 9.999314346992672e-06, "loss": 0.4423, "step": 1025 }, { "epoch": 0.03520933424845573, "grad_norm": 0.9479776689383179, "learning_rate": 9.99930511292662e-06, "loss": 0.4168, "step": 1026 }, { "epoch": 0.03524365133836651, "grad_norm": 0.9646377963641269, "learning_rate": 9.999295817099983e-06, "loss": 0.3906, "step": 1027 }, { "epoch": 0.03527796842827728, "grad_norm": 1.069852254198916, "learning_rate": 9.999286459512873e-06, "loss": 0.3981, "step": 1028 }, { "epoch": 0.03531228551818806, "grad_norm": 0.9004200606145661, "learning_rate": 9.999277040165408e-06, "loss": 0.3652, "step": 1029 }, { "epoch": 0.03534660260809883, "grad_norm": 0.9279647459647242, "learning_rate": 9.999267559057702e-06, "loss": 0.3584, "step": 1030 }, { "epoch": 0.03538091969800961, "grad_norm": 1.085109976930754, "learning_rate": 9.999258016189877e-06, "loss": 0.4397, "step": 1031 }, { "epoch": 0.035415236787920384, "grad_norm": 0.9422641201257904, "learning_rate": 9.999248411562046e-06, "loss": 0.4312, "step": 1032 }, { "epoch": 0.03544955387783116, "grad_norm": 0.9883640519092199, "learning_rate": 9.99923874517433e-06, "loss": 0.4009, "step": 1033 }, { "epoch": 0.035483870967741936, "grad_norm": 1.1735180037119701, "learning_rate": 9.999229017026846e-06, "loss": 0.5038, "step": 1034 }, { "epoch": 0.03551818805765271, "grad_norm": 1.018292242684677, "learning_rate": 9.999219227119719e-06, "loss": 0.378, "step": 1035 }, { "epoch": 0.03555250514756349, "grad_norm": 0.8812175393180651, "learning_rate": 9.999209375453065e-06, "loss": 0.4044, "step": 1036 }, { "epoch": 0.03558682223747426, "grad_norm": 1.0351630304165564, "learning_rate": 9.999199462027008e-06, "loss": 0.3997, "step": 1037 }, { "epoch": 0.03562113932738504, "grad_norm": 1.078645103103066, "learning_rate": 9.999189486841669e-06, "loss": 0.4167, "step": 1038 }, { "epoch": 0.03565545641729581, "grad_norm": 0.9748578547364664, "learning_rate": 9.999179449897176e-06, "loss": 0.4392, "step": 1039 }, { "epoch": 0.03568977350720659, "grad_norm": 0.8875454096613955, "learning_rate": 9.999169351193646e-06, "loss": 0.3826, "step": 1040 }, { "epoch": 0.03572409059711736, "grad_norm": 1.030516237566605, "learning_rate": 9.999159190731208e-06, "loss": 0.3346, "step": 1041 }, { "epoch": 0.03575840768702814, "grad_norm": 0.9550864494354881, "learning_rate": 9.999148968509988e-06, "loss": 0.3724, "step": 1042 }, { "epoch": 0.035792724776938914, "grad_norm": 0.9464338586551784, "learning_rate": 9.99913868453011e-06, "loss": 0.3566, "step": 1043 }, { "epoch": 0.03582704186684969, "grad_norm": 0.978323162987987, "learning_rate": 9.999128338791702e-06, "loss": 0.4164, "step": 1044 }, { "epoch": 0.035861358956760465, "grad_norm": 0.9689696178128799, "learning_rate": 9.99911793129489e-06, "loss": 0.4443, "step": 1045 }, { "epoch": 0.035895676046671245, "grad_norm": 0.9542509053509266, "learning_rate": 9.999107462039806e-06, "loss": 0.4321, "step": 1046 }, { "epoch": 0.03592999313658202, "grad_norm": 1.0180647343515987, "learning_rate": 9.999096931026579e-06, "loss": 0.4282, "step": 1047 }, { "epoch": 0.035964310226492796, "grad_norm": 0.9850786156363935, "learning_rate": 9.999086338255335e-06, "loss": 0.3711, "step": 1048 }, { "epoch": 0.03599862731640357, "grad_norm": 1.0227893130971244, "learning_rate": 9.999075683726208e-06, "loss": 0.448, "step": 1049 }, { "epoch": 0.03603294440631435, "grad_norm": 0.898867435082176, "learning_rate": 9.999064967439332e-06, "loss": 0.4251, "step": 1050 }, { "epoch": 0.03606726149622512, "grad_norm": 0.828170742481426, "learning_rate": 9.999054189394835e-06, "loss": 0.3221, "step": 1051 }, { "epoch": 0.0361015785861359, "grad_norm": 0.992134168957827, "learning_rate": 9.999043349592852e-06, "loss": 0.4289, "step": 1052 }, { "epoch": 0.03613589567604667, "grad_norm": 0.969272942530777, "learning_rate": 9.999032448033517e-06, "loss": 0.4017, "step": 1053 }, { "epoch": 0.036170212765957444, "grad_norm": 0.9872192625367459, "learning_rate": 9.999021484716965e-06, "loss": 0.4417, "step": 1054 }, { "epoch": 0.03620452985586822, "grad_norm": 1.0800197213745946, "learning_rate": 9.99901045964333e-06, "loss": 0.3922, "step": 1055 }, { "epoch": 0.036238846945778995, "grad_norm": 0.8449223584660792, "learning_rate": 9.998999372812747e-06, "loss": 0.3974, "step": 1056 }, { "epoch": 0.036273164035689774, "grad_norm": 1.038752238345532, "learning_rate": 9.998988224225358e-06, "loss": 0.4045, "step": 1057 }, { "epoch": 0.036307481125600546, "grad_norm": 0.937112204975349, "learning_rate": 9.998977013881297e-06, "loss": 0.4105, "step": 1058 }, { "epoch": 0.036341798215511326, "grad_norm": 0.9251710698388065, "learning_rate": 9.998965741780702e-06, "loss": 0.4541, "step": 1059 }, { "epoch": 0.0363761153054221, "grad_norm": 0.9451716346347014, "learning_rate": 9.998954407923716e-06, "loss": 0.4466, "step": 1060 }, { "epoch": 0.03641043239533288, "grad_norm": 1.0125317660585278, "learning_rate": 9.998943012310473e-06, "loss": 0.3687, "step": 1061 }, { "epoch": 0.03644474948524365, "grad_norm": 0.9633111694028167, "learning_rate": 9.99893155494112e-06, "loss": 0.4357, "step": 1062 }, { "epoch": 0.03647906657515443, "grad_norm": 0.9720709533783581, "learning_rate": 9.998920035815795e-06, "loss": 0.4368, "step": 1063 }, { "epoch": 0.0365133836650652, "grad_norm": 0.9584084065342019, "learning_rate": 9.998908454934642e-06, "loss": 0.4108, "step": 1064 }, { "epoch": 0.03654770075497598, "grad_norm": 0.9150997033978457, "learning_rate": 9.998896812297802e-06, "loss": 0.3633, "step": 1065 }, { "epoch": 0.03658201784488675, "grad_norm": 0.8730411414363433, "learning_rate": 9.99888510790542e-06, "loss": 0.3488, "step": 1066 }, { "epoch": 0.03661633493479753, "grad_norm": 0.9207867100941767, "learning_rate": 9.998873341757641e-06, "loss": 0.3631, "step": 1067 }, { "epoch": 0.036650652024708304, "grad_norm": 0.9948890625596843, "learning_rate": 9.99886151385461e-06, "loss": 0.4351, "step": 1068 }, { "epoch": 0.03668496911461908, "grad_norm": 0.925017832721837, "learning_rate": 9.998849624196474e-06, "loss": 0.4102, "step": 1069 }, { "epoch": 0.036719286204529855, "grad_norm": 0.9380704100150788, "learning_rate": 9.998837672783377e-06, "loss": 0.3753, "step": 1070 }, { "epoch": 0.036753603294440634, "grad_norm": 0.9032462113394795, "learning_rate": 9.99882565961547e-06, "loss": 0.436, "step": 1071 }, { "epoch": 0.03678792038435141, "grad_norm": 0.931071959537652, "learning_rate": 9.9988135846929e-06, "loss": 0.3659, "step": 1072 }, { "epoch": 0.036822237474262186, "grad_norm": 1.0707951861391871, "learning_rate": 9.998801448015815e-06, "loss": 0.4546, "step": 1073 }, { "epoch": 0.03685655456417296, "grad_norm": 0.9156617630423187, "learning_rate": 9.998789249584368e-06, "loss": 0.3328, "step": 1074 }, { "epoch": 0.03689087165408373, "grad_norm": 0.9347820753412573, "learning_rate": 9.998776989398707e-06, "loss": 0.3794, "step": 1075 }, { "epoch": 0.03692518874399451, "grad_norm": 0.8301155298347329, "learning_rate": 9.998764667458984e-06, "loss": 0.4002, "step": 1076 }, { "epoch": 0.03695950583390528, "grad_norm": 1.0254809187041913, "learning_rate": 9.998752283765352e-06, "loss": 0.4046, "step": 1077 }, { "epoch": 0.03699382292381606, "grad_norm": 1.1379076912290913, "learning_rate": 9.998739838317964e-06, "loss": 0.4412, "step": 1078 }, { "epoch": 0.03702814001372683, "grad_norm": 0.9462268551295099, "learning_rate": 9.998727331116973e-06, "loss": 0.4187, "step": 1079 }, { "epoch": 0.03706245710363761, "grad_norm": 1.1305136065732029, "learning_rate": 9.998714762162533e-06, "loss": 0.3983, "step": 1080 }, { "epoch": 0.037096774193548385, "grad_norm": 0.9391541410530378, "learning_rate": 9.9987021314548e-06, "loss": 0.3953, "step": 1081 }, { "epoch": 0.037131091283459164, "grad_norm": 1.009335590827098, "learning_rate": 9.99868943899393e-06, "loss": 0.4767, "step": 1082 }, { "epoch": 0.037165408373369936, "grad_norm": 1.1136527520283006, "learning_rate": 9.99867668478008e-06, "loss": 0.382, "step": 1083 }, { "epoch": 0.037199725463280715, "grad_norm": 0.9255397331620298, "learning_rate": 9.99866386881341e-06, "loss": 0.4606, "step": 1084 }, { "epoch": 0.03723404255319149, "grad_norm": 0.9155576352450328, "learning_rate": 9.998650991094073e-06, "loss": 0.3957, "step": 1085 }, { "epoch": 0.03726835964310227, "grad_norm": 0.9223543958702418, "learning_rate": 9.998638051622232e-06, "loss": 0.4096, "step": 1086 }, { "epoch": 0.03730267673301304, "grad_norm": 0.9844543687436116, "learning_rate": 9.998625050398045e-06, "loss": 0.4492, "step": 1087 }, { "epoch": 0.03733699382292382, "grad_norm": 0.8816732873463635, "learning_rate": 9.998611987421673e-06, "loss": 0.457, "step": 1088 }, { "epoch": 0.03737131091283459, "grad_norm": 0.8551618233504091, "learning_rate": 9.998598862693278e-06, "loss": 0.3666, "step": 1089 }, { "epoch": 0.03740562800274537, "grad_norm": 0.8846177599718753, "learning_rate": 9.998585676213022e-06, "loss": 0.3879, "step": 1090 }, { "epoch": 0.03743994509265614, "grad_norm": 0.871167159567324, "learning_rate": 9.998572427981069e-06, "loss": 0.4149, "step": 1091 }, { "epoch": 0.03747426218256692, "grad_norm": 0.9210957947121968, "learning_rate": 9.998559117997579e-06, "loss": 0.4887, "step": 1092 }, { "epoch": 0.03750857927247769, "grad_norm": 0.9505976119317832, "learning_rate": 9.99854574626272e-06, "loss": 0.4384, "step": 1093 }, { "epoch": 0.03754289636238847, "grad_norm": 0.9525582835354542, "learning_rate": 9.998532312776657e-06, "loss": 0.3663, "step": 1094 }, { "epoch": 0.037577213452299245, "grad_norm": 1.0668426083231513, "learning_rate": 9.998518817539553e-06, "loss": 0.4386, "step": 1095 }, { "epoch": 0.03761153054221002, "grad_norm": 0.8567839370545286, "learning_rate": 9.99850526055158e-06, "loss": 0.3956, "step": 1096 }, { "epoch": 0.037645847632120796, "grad_norm": 0.9317299016690476, "learning_rate": 9.9984916418129e-06, "loss": 0.4367, "step": 1097 }, { "epoch": 0.03768016472203157, "grad_norm": 0.8398790163536003, "learning_rate": 9.998477961323682e-06, "loss": 0.4117, "step": 1098 }, { "epoch": 0.03771448181194235, "grad_norm": 1.0111533306675924, "learning_rate": 9.998464219084098e-06, "loss": 0.3759, "step": 1099 }, { "epoch": 0.03774879890185312, "grad_norm": 0.8534970266501337, "learning_rate": 9.998450415094316e-06, "loss": 0.4406, "step": 1100 }, { "epoch": 0.0377831159917639, "grad_norm": 1.0130723398457153, "learning_rate": 9.998436549354508e-06, "loss": 0.4122, "step": 1101 }, { "epoch": 0.03781743308167467, "grad_norm": 0.9792793345631742, "learning_rate": 9.998422621864843e-06, "loss": 0.3986, "step": 1102 }, { "epoch": 0.03785175017158545, "grad_norm": 0.9622952626977453, "learning_rate": 9.998408632625494e-06, "loss": 0.4413, "step": 1103 }, { "epoch": 0.03788606726149622, "grad_norm": 0.8758226569634903, "learning_rate": 9.998394581636635e-06, "loss": 0.3754, "step": 1104 }, { "epoch": 0.037920384351407, "grad_norm": 0.8832817320258707, "learning_rate": 9.998380468898437e-06, "loss": 0.3592, "step": 1105 }, { "epoch": 0.037954701441317774, "grad_norm": 0.9950019278035631, "learning_rate": 9.998366294411076e-06, "loss": 0.4446, "step": 1106 }, { "epoch": 0.037989018531228554, "grad_norm": 0.9183734810043163, "learning_rate": 9.998352058174727e-06, "loss": 0.3947, "step": 1107 }, { "epoch": 0.038023335621139326, "grad_norm": 0.9014998951397308, "learning_rate": 9.998337760189566e-06, "loss": 0.3674, "step": 1108 }, { "epoch": 0.038057652711050105, "grad_norm": 0.9134884754817653, "learning_rate": 9.99832340045577e-06, "loss": 0.4124, "step": 1109 }, { "epoch": 0.03809196980096088, "grad_norm": 0.8926087144421134, "learning_rate": 9.998308978973515e-06, "loss": 0.3533, "step": 1110 }, { "epoch": 0.038126286890871657, "grad_norm": 0.9363594481704602, "learning_rate": 9.998294495742982e-06, "loss": 0.4568, "step": 1111 }, { "epoch": 0.03816060398078243, "grad_norm": 1.0329695975718456, "learning_rate": 9.998279950764346e-06, "loss": 0.4522, "step": 1112 }, { "epoch": 0.03819492107069321, "grad_norm": 0.9645652746028592, "learning_rate": 9.998265344037788e-06, "loss": 0.376, "step": 1113 }, { "epoch": 0.03822923816060398, "grad_norm": 0.943017213822318, "learning_rate": 9.99825067556349e-06, "loss": 0.3708, "step": 1114 }, { "epoch": 0.03826355525051476, "grad_norm": 1.0918949698655747, "learning_rate": 9.998235945341633e-06, "loss": 0.4509, "step": 1115 }, { "epoch": 0.03829787234042553, "grad_norm": 0.8910059546971077, "learning_rate": 9.998221153372399e-06, "loss": 0.3914, "step": 1116 }, { "epoch": 0.03833218943033631, "grad_norm": 0.839630769481541, "learning_rate": 9.998206299655969e-06, "loss": 0.3753, "step": 1117 }, { "epoch": 0.03836650652024708, "grad_norm": 1.4061152513147535, "learning_rate": 9.998191384192526e-06, "loss": 0.3967, "step": 1118 }, { "epoch": 0.038400823610157855, "grad_norm": 0.8300726131102464, "learning_rate": 9.998176406982258e-06, "loss": 0.3607, "step": 1119 }, { "epoch": 0.038435140700068635, "grad_norm": 0.9796145577415595, "learning_rate": 9.998161368025346e-06, "loss": 0.4124, "step": 1120 }, { "epoch": 0.03846945778997941, "grad_norm": 0.8679970296417784, "learning_rate": 9.99814626732198e-06, "loss": 0.4443, "step": 1121 }, { "epoch": 0.038503774879890186, "grad_norm": 0.8976704647723087, "learning_rate": 9.998131104872342e-06, "loss": 0.3661, "step": 1122 }, { "epoch": 0.03853809196980096, "grad_norm": 1.7297215830863948, "learning_rate": 9.998115880676623e-06, "loss": 0.4653, "step": 1123 }, { "epoch": 0.03857240905971174, "grad_norm": 0.9042172270670661, "learning_rate": 9.998100594735007e-06, "loss": 0.4018, "step": 1124 }, { "epoch": 0.03860672614962251, "grad_norm": 1.1704199193285751, "learning_rate": 9.998085247047687e-06, "loss": 0.4146, "step": 1125 }, { "epoch": 0.03864104323953329, "grad_norm": 0.8138565447077316, "learning_rate": 9.99806983761485e-06, "loss": 0.3875, "step": 1126 }, { "epoch": 0.03867536032944406, "grad_norm": 1.0331062116981404, "learning_rate": 9.99805436643669e-06, "loss": 0.4413, "step": 1127 }, { "epoch": 0.03870967741935484, "grad_norm": 0.9322520179529697, "learning_rate": 9.998038833513392e-06, "loss": 0.3741, "step": 1128 }, { "epoch": 0.03874399450926561, "grad_norm": 0.9831115409831805, "learning_rate": 9.998023238845154e-06, "loss": 0.4246, "step": 1129 }, { "epoch": 0.03877831159917639, "grad_norm": 1.0669022125177547, "learning_rate": 9.998007582432166e-06, "loss": 0.3934, "step": 1130 }, { "epoch": 0.038812628689087164, "grad_norm": 0.9497302381737505, "learning_rate": 9.99799186427462e-06, "loss": 0.4029, "step": 1131 }, { "epoch": 0.03884694577899794, "grad_norm": 0.8984116175051114, "learning_rate": 9.997976084372712e-06, "loss": 0.3602, "step": 1132 }, { "epoch": 0.038881262868908716, "grad_norm": 0.854285223590178, "learning_rate": 9.997960242726636e-06, "loss": 0.4209, "step": 1133 }, { "epoch": 0.038915579958819495, "grad_norm": 1.1223621348441035, "learning_rate": 9.99794433933659e-06, "loss": 0.3782, "step": 1134 }, { "epoch": 0.03894989704873027, "grad_norm": 1.0127884102663511, "learning_rate": 9.997928374202767e-06, "loss": 0.3892, "step": 1135 }, { "epoch": 0.038984214138641046, "grad_norm": 0.8587932595395541, "learning_rate": 9.997912347325366e-06, "loss": 0.3304, "step": 1136 }, { "epoch": 0.03901853122855182, "grad_norm": 1.0038375172123688, "learning_rate": 9.997896258704588e-06, "loss": 0.4182, "step": 1137 }, { "epoch": 0.0390528483184626, "grad_norm": 0.9458211879185806, "learning_rate": 9.997880108340625e-06, "loss": 0.4304, "step": 1138 }, { "epoch": 0.03908716540837337, "grad_norm": 0.9607712326648762, "learning_rate": 9.997863896233681e-06, "loss": 0.4274, "step": 1139 }, { "epoch": 0.03912148249828414, "grad_norm": 0.9932632295034204, "learning_rate": 9.997847622383956e-06, "loss": 0.3998, "step": 1140 }, { "epoch": 0.03915579958819492, "grad_norm": 0.9993805138990981, "learning_rate": 9.997831286791652e-06, "loss": 0.3728, "step": 1141 }, { "epoch": 0.039190116678105694, "grad_norm": 0.8728498120701643, "learning_rate": 9.997814889456968e-06, "loss": 0.4179, "step": 1142 }, { "epoch": 0.03922443376801647, "grad_norm": 0.917288578873304, "learning_rate": 9.997798430380108e-06, "loss": 0.3666, "step": 1143 }, { "epoch": 0.039258750857927245, "grad_norm": 0.9009513458161974, "learning_rate": 9.997781909561274e-06, "loss": 0.4003, "step": 1144 }, { "epoch": 0.039293067947838024, "grad_norm": 0.9211304547051834, "learning_rate": 9.997765327000673e-06, "loss": 0.3784, "step": 1145 }, { "epoch": 0.0393273850377488, "grad_norm": 0.7989316658295631, "learning_rate": 9.997748682698507e-06, "loss": 0.3513, "step": 1146 }, { "epoch": 0.039361702127659576, "grad_norm": 0.8926576021743013, "learning_rate": 9.997731976654984e-06, "loss": 0.3868, "step": 1147 }, { "epoch": 0.03939601921757035, "grad_norm": 1.0844788329066035, "learning_rate": 9.997715208870309e-06, "loss": 0.4347, "step": 1148 }, { "epoch": 0.03943033630748113, "grad_norm": 1.0268482605856393, "learning_rate": 9.997698379344688e-06, "loss": 0.4407, "step": 1149 }, { "epoch": 0.0394646533973919, "grad_norm": 0.971997523337362, "learning_rate": 9.997681488078332e-06, "loss": 0.3873, "step": 1150 }, { "epoch": 0.03949897048730268, "grad_norm": 0.8760717728510415, "learning_rate": 9.997664535071447e-06, "loss": 0.3757, "step": 1151 }, { "epoch": 0.03953328757721345, "grad_norm": 0.8626014181790039, "learning_rate": 9.997647520324244e-06, "loss": 0.3718, "step": 1152 }, { "epoch": 0.03956760466712423, "grad_norm": 0.93873581186672, "learning_rate": 9.997630443836934e-06, "loss": 0.4676, "step": 1153 }, { "epoch": 0.039601921757035, "grad_norm": 1.0331840773976877, "learning_rate": 9.997613305609724e-06, "loss": 0.4286, "step": 1154 }, { "epoch": 0.03963623884694578, "grad_norm": 1.05945842757202, "learning_rate": 9.997596105642828e-06, "loss": 0.392, "step": 1155 }, { "epoch": 0.039670555936856554, "grad_norm": 0.9652046449135773, "learning_rate": 9.997578843936461e-06, "loss": 0.3897, "step": 1156 }, { "epoch": 0.03970487302676733, "grad_norm": 0.935849079790742, "learning_rate": 9.997561520490834e-06, "loss": 0.4205, "step": 1157 }, { "epoch": 0.039739190116678105, "grad_norm": 0.9394350188873013, "learning_rate": 9.997544135306158e-06, "loss": 0.4077, "step": 1158 }, { "epoch": 0.039773507206588884, "grad_norm": 0.9709618348446584, "learning_rate": 9.997526688382653e-06, "loss": 0.4244, "step": 1159 }, { "epoch": 0.03980782429649966, "grad_norm": 0.9107250048527733, "learning_rate": 9.997509179720532e-06, "loss": 0.3953, "step": 1160 }, { "epoch": 0.03984214138641043, "grad_norm": 0.9691582167084244, "learning_rate": 9.99749160932001e-06, "loss": 0.4671, "step": 1161 }, { "epoch": 0.03987645847632121, "grad_norm": 0.9254044121100228, "learning_rate": 9.99747397718131e-06, "loss": 0.3961, "step": 1162 }, { "epoch": 0.03991077556623198, "grad_norm": 0.838832048048842, "learning_rate": 9.997456283304642e-06, "loss": 0.3842, "step": 1163 }, { "epoch": 0.03994509265614276, "grad_norm": 0.9559583041453481, "learning_rate": 9.99743852769023e-06, "loss": 0.4318, "step": 1164 }, { "epoch": 0.03997940974605353, "grad_norm": 0.8396434817880501, "learning_rate": 9.997420710338287e-06, "loss": 0.4272, "step": 1165 }, { "epoch": 0.04001372683596431, "grad_norm": 0.8851329945362179, "learning_rate": 9.997402831249043e-06, "loss": 0.325, "step": 1166 }, { "epoch": 0.04004804392587508, "grad_norm": 0.92892664559864, "learning_rate": 9.997384890422711e-06, "loss": 0.3635, "step": 1167 }, { "epoch": 0.04008236101578586, "grad_norm": 0.9580711852314985, "learning_rate": 9.997366887859516e-06, "loss": 0.3857, "step": 1168 }, { "epoch": 0.040116678105696635, "grad_norm": 0.8785751084198682, "learning_rate": 9.997348823559678e-06, "loss": 0.375, "step": 1169 }, { "epoch": 0.040150995195607414, "grad_norm": 0.9727777464732149, "learning_rate": 9.997330697523421e-06, "loss": 0.4072, "step": 1170 }, { "epoch": 0.040185312285518186, "grad_norm": 0.9760304661243572, "learning_rate": 9.997312509750971e-06, "loss": 0.4482, "step": 1171 }, { "epoch": 0.040219629375428965, "grad_norm": 0.9778980170215434, "learning_rate": 9.997294260242552e-06, "loss": 0.4299, "step": 1172 }, { "epoch": 0.04025394646533974, "grad_norm": 0.8271899444209894, "learning_rate": 9.997275948998389e-06, "loss": 0.3965, "step": 1173 }, { "epoch": 0.04028826355525052, "grad_norm": 0.9118846161049208, "learning_rate": 9.997257576018707e-06, "loss": 0.3668, "step": 1174 }, { "epoch": 0.04032258064516129, "grad_norm": 0.9206994409299484, "learning_rate": 9.997239141303733e-06, "loss": 0.3861, "step": 1175 }, { "epoch": 0.04035689773507207, "grad_norm": 0.9241185705401377, "learning_rate": 9.997220644853695e-06, "loss": 0.447, "step": 1176 }, { "epoch": 0.04039121482498284, "grad_norm": 0.9299829910211229, "learning_rate": 9.997202086668825e-06, "loss": 0.433, "step": 1177 }, { "epoch": 0.04042553191489362, "grad_norm": 0.959473309482754, "learning_rate": 9.997183466749347e-06, "loss": 0.4123, "step": 1178 }, { "epoch": 0.04045984900480439, "grad_norm": 0.9408690000788221, "learning_rate": 9.997164785095494e-06, "loss": 0.4022, "step": 1179 }, { "epoch": 0.04049416609471517, "grad_norm": 0.9716954513242618, "learning_rate": 9.997146041707498e-06, "loss": 0.4424, "step": 1180 }, { "epoch": 0.040528483184625944, "grad_norm": 0.8849097303070835, "learning_rate": 9.997127236585586e-06, "loss": 0.3373, "step": 1181 }, { "epoch": 0.040562800274536716, "grad_norm": 0.9362273695715582, "learning_rate": 9.997108369729995e-06, "loss": 0.482, "step": 1182 }, { "epoch": 0.040597117364447495, "grad_norm": 0.9345121039255253, "learning_rate": 9.997089441140955e-06, "loss": 0.3998, "step": 1183 }, { "epoch": 0.04063143445435827, "grad_norm": 1.0385140356824838, "learning_rate": 9.997070450818703e-06, "loss": 0.4579, "step": 1184 }, { "epoch": 0.040665751544269046, "grad_norm": 0.9890555719920495, "learning_rate": 9.99705139876347e-06, "loss": 0.3756, "step": 1185 }, { "epoch": 0.04070006863417982, "grad_norm": 0.9366302156386966, "learning_rate": 9.997032284975492e-06, "loss": 0.4251, "step": 1186 }, { "epoch": 0.0407343857240906, "grad_norm": 1.017677809365473, "learning_rate": 9.997013109455007e-06, "loss": 0.4693, "step": 1187 }, { "epoch": 0.04076870281400137, "grad_norm": 0.9980367879064818, "learning_rate": 9.996993872202252e-06, "loss": 0.4372, "step": 1188 }, { "epoch": 0.04080301990391215, "grad_norm": 0.8829067952624124, "learning_rate": 9.996974573217461e-06, "loss": 0.4107, "step": 1189 }, { "epoch": 0.04083733699382292, "grad_norm": 0.9347492649865355, "learning_rate": 9.996955212500878e-06, "loss": 0.3809, "step": 1190 }, { "epoch": 0.0408716540837337, "grad_norm": 0.9376469171449823, "learning_rate": 9.996935790052738e-06, "loss": 0.3619, "step": 1191 }, { "epoch": 0.04090597117364447, "grad_norm": 0.9605330554520866, "learning_rate": 9.996916305873282e-06, "loss": 0.3667, "step": 1192 }, { "epoch": 0.04094028826355525, "grad_norm": 0.87689738718658, "learning_rate": 9.996896759962751e-06, "loss": 0.427, "step": 1193 }, { "epoch": 0.040974605353466025, "grad_norm": 0.9533969507725483, "learning_rate": 9.996877152321387e-06, "loss": 0.4528, "step": 1194 }, { "epoch": 0.041008922443376804, "grad_norm": 0.9757370071475087, "learning_rate": 9.99685748294943e-06, "loss": 0.44, "step": 1195 }, { "epoch": 0.041043239533287576, "grad_norm": 1.0253973699228025, "learning_rate": 9.996837751847125e-06, "loss": 0.4662, "step": 1196 }, { "epoch": 0.041077556623198355, "grad_norm": 0.8825487552976979, "learning_rate": 9.996817959014718e-06, "loss": 0.3887, "step": 1197 }, { "epoch": 0.04111187371310913, "grad_norm": 1.0083003349965123, "learning_rate": 9.996798104452447e-06, "loss": 0.3999, "step": 1198 }, { "epoch": 0.04114619080301991, "grad_norm": 0.9603287118530481, "learning_rate": 9.996778188160563e-06, "loss": 0.3997, "step": 1199 }, { "epoch": 0.04118050789293068, "grad_norm": 0.899842897089264, "learning_rate": 9.99675821013931e-06, "loss": 0.4324, "step": 1200 }, { "epoch": 0.04121482498284146, "grad_norm": 0.9842458401661589, "learning_rate": 9.996738170388935e-06, "loss": 0.4162, "step": 1201 }, { "epoch": 0.04124914207275223, "grad_norm": 0.9145267456081122, "learning_rate": 9.996718068909686e-06, "loss": 0.4122, "step": 1202 }, { "epoch": 0.04128345916266301, "grad_norm": 0.9134352091793657, "learning_rate": 9.99669790570181e-06, "loss": 0.4316, "step": 1203 }, { "epoch": 0.04131777625257378, "grad_norm": 0.9633368566289239, "learning_rate": 9.99667768076556e-06, "loss": 0.457, "step": 1204 }, { "epoch": 0.041352093342484554, "grad_norm": 0.962773364549401, "learning_rate": 9.996657394101179e-06, "loss": 0.4006, "step": 1205 }, { "epoch": 0.04138641043239533, "grad_norm": 0.9478676457440051, "learning_rate": 9.996637045708921e-06, "loss": 0.4389, "step": 1206 }, { "epoch": 0.041420727522306106, "grad_norm": 0.9867217241465044, "learning_rate": 9.99661663558904e-06, "loss": 0.4236, "step": 1207 }, { "epoch": 0.041455044612216885, "grad_norm": 0.962294872249673, "learning_rate": 9.996596163741785e-06, "loss": 0.3714, "step": 1208 }, { "epoch": 0.04148936170212766, "grad_norm": 0.8917026560275784, "learning_rate": 9.996575630167412e-06, "loss": 0.4079, "step": 1209 }, { "epoch": 0.041523678792038436, "grad_norm": 0.9841413405720134, "learning_rate": 9.996555034866169e-06, "loss": 0.3663, "step": 1210 }, { "epoch": 0.04155799588194921, "grad_norm": 0.9313644226731244, "learning_rate": 9.996534377838316e-06, "loss": 0.3501, "step": 1211 }, { "epoch": 0.04159231297185999, "grad_norm": 0.9318042379437508, "learning_rate": 9.996513659084106e-06, "loss": 0.3854, "step": 1212 }, { "epoch": 0.04162663006177076, "grad_norm": 0.9222839620718888, "learning_rate": 9.996492878603794e-06, "loss": 0.4509, "step": 1213 }, { "epoch": 0.04166094715168154, "grad_norm": 0.9198833428061796, "learning_rate": 9.99647203639764e-06, "loss": 0.4223, "step": 1214 }, { "epoch": 0.04169526424159231, "grad_norm": 0.897476659118433, "learning_rate": 9.996451132465898e-06, "loss": 0.365, "step": 1215 }, { "epoch": 0.04172958133150309, "grad_norm": 0.8418033212665362, "learning_rate": 9.996430166808828e-06, "loss": 0.346, "step": 1216 }, { "epoch": 0.04176389842141386, "grad_norm": 0.8906449497189286, "learning_rate": 9.996409139426688e-06, "loss": 0.429, "step": 1217 }, { "epoch": 0.04179821551132464, "grad_norm": 0.8644186467095487, "learning_rate": 9.996388050319739e-06, "loss": 0.3772, "step": 1218 }, { "epoch": 0.041832532601235414, "grad_norm": 0.8982261194854245, "learning_rate": 9.99636689948824e-06, "loss": 0.3619, "step": 1219 }, { "epoch": 0.041866849691146193, "grad_norm": 0.8748926645689891, "learning_rate": 9.996345686932454e-06, "loss": 0.4163, "step": 1220 }, { "epoch": 0.041901166781056966, "grad_norm": 0.8746681888268437, "learning_rate": 9.996324412652641e-06, "loss": 0.3978, "step": 1221 }, { "epoch": 0.041935483870967745, "grad_norm": 1.0800029698824967, "learning_rate": 9.996303076649066e-06, "loss": 0.4415, "step": 1222 }, { "epoch": 0.04196980096087852, "grad_norm": 1.0230531961694929, "learning_rate": 9.996281678921992e-06, "loss": 0.3877, "step": 1223 }, { "epoch": 0.042004118050789296, "grad_norm": 0.8342230139095763, "learning_rate": 9.996260219471683e-06, "loss": 0.389, "step": 1224 }, { "epoch": 0.04203843514070007, "grad_norm": 0.9413645811904794, "learning_rate": 9.996238698298403e-06, "loss": 0.3609, "step": 1225 }, { "epoch": 0.04207275223061084, "grad_norm": 0.969431357943391, "learning_rate": 9.996217115402419e-06, "loss": 0.4314, "step": 1226 }, { "epoch": 0.04210706932052162, "grad_norm": 0.938180234507195, "learning_rate": 9.996195470783998e-06, "loss": 0.3874, "step": 1227 }, { "epoch": 0.04214138641043239, "grad_norm": 0.8699800868554175, "learning_rate": 9.996173764443407e-06, "loss": 0.3992, "step": 1228 }, { "epoch": 0.04217570350034317, "grad_norm": 0.9179138251071365, "learning_rate": 9.996151996380914e-06, "loss": 0.3993, "step": 1229 }, { "epoch": 0.042210020590253944, "grad_norm": 0.8548804950832976, "learning_rate": 9.996130166596787e-06, "loss": 0.409, "step": 1230 }, { "epoch": 0.04224433768016472, "grad_norm": 0.8896810794081459, "learning_rate": 9.996108275091298e-06, "loss": 0.4433, "step": 1231 }, { "epoch": 0.042278654770075495, "grad_norm": 0.8758296584465418, "learning_rate": 9.996086321864718e-06, "loss": 0.4502, "step": 1232 }, { "epoch": 0.042312971859986274, "grad_norm": 0.8164689253050504, "learning_rate": 9.996064306917312e-06, "loss": 0.3839, "step": 1233 }, { "epoch": 0.04234728894989705, "grad_norm": 0.8928612628184962, "learning_rate": 9.99604223024936e-06, "loss": 0.3726, "step": 1234 }, { "epoch": 0.042381606039807826, "grad_norm": 0.9716044251050169, "learning_rate": 9.99602009186113e-06, "loss": 0.4156, "step": 1235 }, { "epoch": 0.0424159231297186, "grad_norm": 1.069996653340283, "learning_rate": 9.995997891752896e-06, "loss": 0.4024, "step": 1236 }, { "epoch": 0.04245024021962938, "grad_norm": 0.9091884111337029, "learning_rate": 9.995975629924932e-06, "loss": 0.3481, "step": 1237 }, { "epoch": 0.04248455730954015, "grad_norm": 1.1733904270121005, "learning_rate": 9.995953306377514e-06, "loss": 0.4029, "step": 1238 }, { "epoch": 0.04251887439945093, "grad_norm": 0.9437428880760794, "learning_rate": 9.99593092111092e-06, "loss": 0.3933, "step": 1239 }, { "epoch": 0.0425531914893617, "grad_norm": 0.8789397988011731, "learning_rate": 9.995908474125424e-06, "loss": 0.3461, "step": 1240 }, { "epoch": 0.04258750857927248, "grad_norm": 0.9048484563488989, "learning_rate": 9.995885965421302e-06, "loss": 0.4063, "step": 1241 }, { "epoch": 0.04262182566918325, "grad_norm": 0.8849166916459675, "learning_rate": 9.995863394998833e-06, "loss": 0.426, "step": 1242 }, { "epoch": 0.04265614275909403, "grad_norm": 0.960792518380771, "learning_rate": 9.995840762858297e-06, "loss": 0.3773, "step": 1243 }, { "epoch": 0.042690459849004804, "grad_norm": 1.0043249036055726, "learning_rate": 9.995818068999972e-06, "loss": 0.3853, "step": 1244 }, { "epoch": 0.04272477693891558, "grad_norm": 0.876858190423364, "learning_rate": 9.995795313424142e-06, "loss": 0.3495, "step": 1245 }, { "epoch": 0.042759094028826355, "grad_norm": 0.9170435164808087, "learning_rate": 9.995772496131084e-06, "loss": 0.4184, "step": 1246 }, { "epoch": 0.04279341111873713, "grad_norm": 0.8935841671402587, "learning_rate": 9.99574961712108e-06, "loss": 0.4229, "step": 1247 }, { "epoch": 0.04282772820864791, "grad_norm": 0.9106707942705817, "learning_rate": 9.995726676394415e-06, "loss": 0.3872, "step": 1248 }, { "epoch": 0.04286204529855868, "grad_norm": 0.8408581038793475, "learning_rate": 9.995703673951371e-06, "loss": 0.3916, "step": 1249 }, { "epoch": 0.04289636238846946, "grad_norm": 0.8253012718851755, "learning_rate": 9.995680609792233e-06, "loss": 0.3838, "step": 1250 }, { "epoch": 0.04293067947838023, "grad_norm": 0.9436742283530739, "learning_rate": 9.995657483917286e-06, "loss": 0.3367, "step": 1251 }, { "epoch": 0.04296499656829101, "grad_norm": 0.9576040182711791, "learning_rate": 9.995634296326815e-06, "loss": 0.4037, "step": 1252 }, { "epoch": 0.04299931365820178, "grad_norm": 0.9905465282258346, "learning_rate": 9.995611047021106e-06, "loss": 0.4353, "step": 1253 }, { "epoch": 0.04303363074811256, "grad_norm": 0.8611861550486676, "learning_rate": 9.995587736000447e-06, "loss": 0.3446, "step": 1254 }, { "epoch": 0.043067947838023334, "grad_norm": 0.9151204499365572, "learning_rate": 9.995564363265126e-06, "loss": 0.3749, "step": 1255 }, { "epoch": 0.04310226492793411, "grad_norm": 0.886834575259231, "learning_rate": 9.99554092881543e-06, "loss": 0.4324, "step": 1256 }, { "epoch": 0.043136582017844885, "grad_norm": 0.9133053366433709, "learning_rate": 9.995517432651652e-06, "loss": 0.3972, "step": 1257 }, { "epoch": 0.043170899107755664, "grad_norm": 0.846753602952413, "learning_rate": 9.99549387477408e-06, "loss": 0.3898, "step": 1258 }, { "epoch": 0.043205216197666436, "grad_norm": 0.9472882760663707, "learning_rate": 9.995470255183004e-06, "loss": 0.4163, "step": 1259 }, { "epoch": 0.043239533287577216, "grad_norm": 0.8069662215061715, "learning_rate": 9.99544657387872e-06, "loss": 0.3597, "step": 1260 }, { "epoch": 0.04327385037748799, "grad_norm": 0.8482044793461909, "learning_rate": 9.995422830861517e-06, "loss": 0.3391, "step": 1261 }, { "epoch": 0.04330816746739877, "grad_norm": 0.9023002538742471, "learning_rate": 9.995399026131686e-06, "loss": 0.3696, "step": 1262 }, { "epoch": 0.04334248455730954, "grad_norm": 0.9402034482822492, "learning_rate": 9.995375159689527e-06, "loss": 0.3986, "step": 1263 }, { "epoch": 0.04337680164722032, "grad_norm": 0.8921698010079432, "learning_rate": 9.995351231535331e-06, "loss": 0.3653, "step": 1264 }, { "epoch": 0.04341111873713109, "grad_norm": 0.9524535123964226, "learning_rate": 9.995327241669394e-06, "loss": 0.3616, "step": 1265 }, { "epoch": 0.04344543582704187, "grad_norm": 0.923928508668001, "learning_rate": 9.995303190092014e-06, "loss": 0.3937, "step": 1266 }, { "epoch": 0.04347975291695264, "grad_norm": 0.8796253445575885, "learning_rate": 9.995279076803486e-06, "loss": 0.3965, "step": 1267 }, { "epoch": 0.043514070006863415, "grad_norm": 1.0054260735391838, "learning_rate": 9.99525490180411e-06, "loss": 0.4161, "step": 1268 }, { "epoch": 0.043548387096774194, "grad_norm": 0.9998917630680187, "learning_rate": 9.995230665094183e-06, "loss": 0.4211, "step": 1269 }, { "epoch": 0.043582704186684966, "grad_norm": 0.8681020618692671, "learning_rate": 9.995206366674005e-06, "loss": 0.3527, "step": 1270 }, { "epoch": 0.043617021276595745, "grad_norm": 1.0132860304334232, "learning_rate": 9.995182006543876e-06, "loss": 0.407, "step": 1271 }, { "epoch": 0.04365133836650652, "grad_norm": 0.8858000687356298, "learning_rate": 9.995157584704099e-06, "loss": 0.3835, "step": 1272 }, { "epoch": 0.0436856554564173, "grad_norm": 0.8880331263947672, "learning_rate": 9.99513310115497e-06, "loss": 0.3726, "step": 1273 }, { "epoch": 0.04371997254632807, "grad_norm": 1.00350312473001, "learning_rate": 9.995108555896799e-06, "loss": 0.3585, "step": 1274 }, { "epoch": 0.04375428963623885, "grad_norm": 0.8775536027154245, "learning_rate": 9.995083948929885e-06, "loss": 0.3787, "step": 1275 }, { "epoch": 0.04378860672614962, "grad_norm": 0.796639037936063, "learning_rate": 9.995059280254532e-06, "loss": 0.3287, "step": 1276 }, { "epoch": 0.0438229238160604, "grad_norm": 0.987414282110769, "learning_rate": 9.995034549871043e-06, "loss": 0.4226, "step": 1277 }, { "epoch": 0.04385724090597117, "grad_norm": 0.9776117274105873, "learning_rate": 9.995009757779728e-06, "loss": 0.5069, "step": 1278 }, { "epoch": 0.04389155799588195, "grad_norm": 0.9085165449217497, "learning_rate": 9.994984903980891e-06, "loss": 0.3331, "step": 1279 }, { "epoch": 0.04392587508579272, "grad_norm": 0.996891844163413, "learning_rate": 9.994959988474838e-06, "loss": 0.4462, "step": 1280 }, { "epoch": 0.0439601921757035, "grad_norm": 0.8352180700479204, "learning_rate": 9.99493501126188e-06, "loss": 0.4469, "step": 1281 }, { "epoch": 0.043994509265614275, "grad_norm": 0.8380291887874816, "learning_rate": 9.994909972342322e-06, "loss": 0.4101, "step": 1282 }, { "epoch": 0.044028826355525054, "grad_norm": 0.9020550250721231, "learning_rate": 9.994884871716474e-06, "loss": 0.4189, "step": 1283 }, { "epoch": 0.044063143445435826, "grad_norm": 0.8654402260815645, "learning_rate": 9.994859709384648e-06, "loss": 0.3815, "step": 1284 }, { "epoch": 0.044097460535346605, "grad_norm": 0.9257583394360233, "learning_rate": 9.994834485347154e-06, "loss": 0.4422, "step": 1285 }, { "epoch": 0.04413177762525738, "grad_norm": 0.9375249717989331, "learning_rate": 9.994809199604303e-06, "loss": 0.4037, "step": 1286 }, { "epoch": 0.04416609471516816, "grad_norm": 0.8870256371356968, "learning_rate": 9.994783852156407e-06, "loss": 0.4477, "step": 1287 }, { "epoch": 0.04420041180507893, "grad_norm": 0.8904443099469556, "learning_rate": 9.994758443003781e-06, "loss": 0.4108, "step": 1288 }, { "epoch": 0.04423472889498971, "grad_norm": 0.9510903986147675, "learning_rate": 9.994732972146737e-06, "loss": 0.4015, "step": 1289 }, { "epoch": 0.04426904598490048, "grad_norm": 0.9990284192861704, "learning_rate": 9.99470743958559e-06, "loss": 0.4612, "step": 1290 }, { "epoch": 0.04430336307481125, "grad_norm": 0.8257153065548685, "learning_rate": 9.994681845320657e-06, "loss": 0.4136, "step": 1291 }, { "epoch": 0.04433768016472203, "grad_norm": 0.8987822961892868, "learning_rate": 9.994656189352253e-06, "loss": 0.3239, "step": 1292 }, { "epoch": 0.044371997254632804, "grad_norm": 0.9525849991715694, "learning_rate": 9.994630471680694e-06, "loss": 0.4216, "step": 1293 }, { "epoch": 0.04440631434454358, "grad_norm": 0.9403750432806306, "learning_rate": 9.9946046923063e-06, "loss": 0.3855, "step": 1294 }, { "epoch": 0.044440631434454356, "grad_norm": 0.9237235332927416, "learning_rate": 9.994578851229386e-06, "loss": 0.4336, "step": 1295 }, { "epoch": 0.044474948524365135, "grad_norm": 0.9074226414893336, "learning_rate": 9.994552948450275e-06, "loss": 0.3603, "step": 1296 }, { "epoch": 0.04450926561427591, "grad_norm": 1.0110716417662835, "learning_rate": 9.994526983969286e-06, "loss": 0.4407, "step": 1297 }, { "epoch": 0.044543582704186686, "grad_norm": 1.0818930657882193, "learning_rate": 9.994500957786738e-06, "loss": 0.3885, "step": 1298 }, { "epoch": 0.04457789979409746, "grad_norm": 0.8133203097688456, "learning_rate": 9.994474869902953e-06, "loss": 0.349, "step": 1299 }, { "epoch": 0.04461221688400824, "grad_norm": 1.0943160143917294, "learning_rate": 9.994448720318257e-06, "loss": 0.4281, "step": 1300 }, { "epoch": 0.04464653397391901, "grad_norm": 0.9277069423311175, "learning_rate": 9.994422509032968e-06, "loss": 0.3752, "step": 1301 }, { "epoch": 0.04468085106382979, "grad_norm": 0.8216837916392883, "learning_rate": 9.994396236047411e-06, "loss": 0.3289, "step": 1302 }, { "epoch": 0.04471516815374056, "grad_norm": 0.966720387410084, "learning_rate": 9.994369901361912e-06, "loss": 0.4051, "step": 1303 }, { "epoch": 0.04474948524365134, "grad_norm": 0.9823833271197528, "learning_rate": 9.994343504976795e-06, "loss": 0.4016, "step": 1304 }, { "epoch": 0.04478380233356211, "grad_norm": 1.0144502801880488, "learning_rate": 9.99431704689239e-06, "loss": 0.4065, "step": 1305 }, { "epoch": 0.04481811942347289, "grad_norm": 0.8736296641017941, "learning_rate": 9.99429052710902e-06, "loss": 0.4748, "step": 1306 }, { "epoch": 0.044852436513383664, "grad_norm": 0.8935723506143606, "learning_rate": 9.99426394562701e-06, "loss": 0.366, "step": 1307 }, { "epoch": 0.044886753603294444, "grad_norm": 0.8007992927494346, "learning_rate": 9.994237302446693e-06, "loss": 0.3973, "step": 1308 }, { "epoch": 0.044921070693205216, "grad_norm": 0.878275084401331, "learning_rate": 9.994210597568397e-06, "loss": 0.3609, "step": 1309 }, { "epoch": 0.044955387783115995, "grad_norm": 0.8122929502387282, "learning_rate": 9.994183830992452e-06, "loss": 0.3973, "step": 1310 }, { "epoch": 0.04498970487302677, "grad_norm": 0.9841348088060172, "learning_rate": 9.99415700271919e-06, "loss": 0.374, "step": 1311 }, { "epoch": 0.04502402196293754, "grad_norm": 0.9378128693361512, "learning_rate": 9.994130112748939e-06, "loss": 0.3816, "step": 1312 }, { "epoch": 0.04505833905284832, "grad_norm": 0.9419605305343183, "learning_rate": 9.994103161082034e-06, "loss": 0.3937, "step": 1313 }, { "epoch": 0.04509265614275909, "grad_norm": 0.9255206394021634, "learning_rate": 9.994076147718805e-06, "loss": 0.3713, "step": 1314 }, { "epoch": 0.04512697323266987, "grad_norm": 0.8917965608476736, "learning_rate": 9.994049072659591e-06, "loss": 0.354, "step": 1315 }, { "epoch": 0.04516129032258064, "grad_norm": 1.0453443881191855, "learning_rate": 9.99402193590472e-06, "loss": 0.3699, "step": 1316 }, { "epoch": 0.04519560741249142, "grad_norm": 0.9502962454469687, "learning_rate": 9.993994737454533e-06, "loss": 0.4359, "step": 1317 }, { "epoch": 0.045229924502402194, "grad_norm": 0.911195490635074, "learning_rate": 9.99396747730936e-06, "loss": 0.372, "step": 1318 }, { "epoch": 0.04526424159231297, "grad_norm": 0.8980425636667395, "learning_rate": 9.993940155469545e-06, "loss": 0.3568, "step": 1319 }, { "epoch": 0.045298558682223745, "grad_norm": 1.061639879095648, "learning_rate": 9.993912771935422e-06, "loss": 0.3802, "step": 1320 }, { "epoch": 0.045332875772134525, "grad_norm": 0.913682613300919, "learning_rate": 9.993885326707326e-06, "loss": 0.3811, "step": 1321 }, { "epoch": 0.0453671928620453, "grad_norm": 0.9042370541772754, "learning_rate": 9.9938578197856e-06, "loss": 0.3979, "step": 1322 }, { "epoch": 0.045401509951956076, "grad_norm": 1.018003491924934, "learning_rate": 9.993830251170584e-06, "loss": 0.4273, "step": 1323 }, { "epoch": 0.04543582704186685, "grad_norm": 0.9252013197268649, "learning_rate": 9.993802620862616e-06, "loss": 0.4226, "step": 1324 }, { "epoch": 0.04547014413177763, "grad_norm": 0.9519349599616841, "learning_rate": 9.99377492886204e-06, "loss": 0.3819, "step": 1325 }, { "epoch": 0.0455044612216884, "grad_norm": 0.8178611992992176, "learning_rate": 9.993747175169198e-06, "loss": 0.3258, "step": 1326 }, { "epoch": 0.04553877831159918, "grad_norm": 0.789080930603835, "learning_rate": 9.993719359784428e-06, "loss": 0.3394, "step": 1327 }, { "epoch": 0.04557309540150995, "grad_norm": 0.9166777386961192, "learning_rate": 9.99369148270808e-06, "loss": 0.3817, "step": 1328 }, { "epoch": 0.04560741249142073, "grad_norm": 0.918159769955094, "learning_rate": 9.993663543940497e-06, "loss": 0.4116, "step": 1329 }, { "epoch": 0.0456417295813315, "grad_norm": 0.995423233927047, "learning_rate": 9.99363554348202e-06, "loss": 0.3848, "step": 1330 }, { "epoch": 0.04567604667124228, "grad_norm": 0.8481881200845599, "learning_rate": 9.993607481333e-06, "loss": 0.362, "step": 1331 }, { "epoch": 0.045710363761153054, "grad_norm": 1.0024010735975837, "learning_rate": 9.993579357493782e-06, "loss": 0.4225, "step": 1332 }, { "epoch": 0.045744680851063826, "grad_norm": 0.8976505161026879, "learning_rate": 9.993551171964712e-06, "loss": 0.4023, "step": 1333 }, { "epoch": 0.045778997940974606, "grad_norm": 0.9098184890366682, "learning_rate": 9.993522924746138e-06, "loss": 0.4006, "step": 1334 }, { "epoch": 0.04581331503088538, "grad_norm": 0.8185554805419906, "learning_rate": 9.993494615838413e-06, "loss": 0.3697, "step": 1335 }, { "epoch": 0.04584763212079616, "grad_norm": 0.865415512333141, "learning_rate": 9.99346624524188e-06, "loss": 0.4417, "step": 1336 }, { "epoch": 0.04588194921070693, "grad_norm": 0.9104830515940454, "learning_rate": 9.993437812956896e-06, "loss": 0.353, "step": 1337 }, { "epoch": 0.04591626630061771, "grad_norm": 0.7634266831270446, "learning_rate": 9.99340931898381e-06, "loss": 0.3265, "step": 1338 }, { "epoch": 0.04595058339052848, "grad_norm": 0.8579153083545858, "learning_rate": 9.993380763322973e-06, "loss": 0.424, "step": 1339 }, { "epoch": 0.04598490048043926, "grad_norm": 0.8647691530717504, "learning_rate": 9.993352145974738e-06, "loss": 0.3716, "step": 1340 }, { "epoch": 0.04601921757035003, "grad_norm": 0.9444663422951776, "learning_rate": 9.99332346693946e-06, "loss": 0.5102, "step": 1341 }, { "epoch": 0.04605353466026081, "grad_norm": 0.8625744090604602, "learning_rate": 9.99329472621749e-06, "loss": 0.4067, "step": 1342 }, { "epoch": 0.046087851750171584, "grad_norm": 0.8590509247276042, "learning_rate": 9.993265923809187e-06, "loss": 0.4286, "step": 1343 }, { "epoch": 0.04612216884008236, "grad_norm": 1.0356792462050262, "learning_rate": 9.993237059714904e-06, "loss": 0.3674, "step": 1344 }, { "epoch": 0.046156485929993135, "grad_norm": 0.9507287898101737, "learning_rate": 9.993208133934999e-06, "loss": 0.3629, "step": 1345 }, { "epoch": 0.046190803019903914, "grad_norm": 1.017957806003986, "learning_rate": 9.99317914646983e-06, "loss": 0.4189, "step": 1346 }, { "epoch": 0.04622512010981469, "grad_norm": 0.9928202173441943, "learning_rate": 9.993150097319753e-06, "loss": 0.433, "step": 1347 }, { "epoch": 0.046259437199725466, "grad_norm": 0.9581893491409904, "learning_rate": 9.99312098648513e-06, "loss": 0.4356, "step": 1348 }, { "epoch": 0.04629375428963624, "grad_norm": 0.8614458886158857, "learning_rate": 9.993091813966315e-06, "loss": 0.3561, "step": 1349 }, { "epoch": 0.04632807137954702, "grad_norm": 0.8376593049365346, "learning_rate": 9.993062579763675e-06, "loss": 0.3851, "step": 1350 }, { "epoch": 0.04636238846945779, "grad_norm": 0.9029452197538826, "learning_rate": 9.993033283877567e-06, "loss": 0.3765, "step": 1351 }, { "epoch": 0.04639670555936857, "grad_norm": 0.872734665833352, "learning_rate": 9.993003926308355e-06, "loss": 0.4025, "step": 1352 }, { "epoch": 0.04643102264927934, "grad_norm": 0.9605975524276658, "learning_rate": 9.9929745070564e-06, "loss": 0.4151, "step": 1353 }, { "epoch": 0.04646533973919011, "grad_norm": 0.844493417377377, "learning_rate": 9.992945026122066e-06, "loss": 0.3974, "step": 1354 }, { "epoch": 0.04649965682910089, "grad_norm": 0.9269179367301175, "learning_rate": 9.992915483505718e-06, "loss": 0.3841, "step": 1355 }, { "epoch": 0.046533973919011665, "grad_norm": 0.983625089284188, "learning_rate": 9.99288587920772e-06, "loss": 0.4229, "step": 1356 }, { "epoch": 0.046568291008922444, "grad_norm": 0.9070070811366956, "learning_rate": 9.992856213228439e-06, "loss": 0.3659, "step": 1357 }, { "epoch": 0.046602608098833216, "grad_norm": 0.9062323566345121, "learning_rate": 9.992826485568239e-06, "loss": 0.3965, "step": 1358 }, { "epoch": 0.046636925188743995, "grad_norm": 0.9572881076361414, "learning_rate": 9.99279669622749e-06, "loss": 0.3543, "step": 1359 }, { "epoch": 0.04667124227865477, "grad_norm": 0.8614028825114433, "learning_rate": 9.992766845206558e-06, "loss": 0.3597, "step": 1360 }, { "epoch": 0.04670555936856555, "grad_norm": 0.9739166520199689, "learning_rate": 9.992736932505814e-06, "loss": 0.3918, "step": 1361 }, { "epoch": 0.04673987645847632, "grad_norm": 0.9986933544440196, "learning_rate": 9.992706958125626e-06, "loss": 0.4293, "step": 1362 }, { "epoch": 0.0467741935483871, "grad_norm": 0.9516372297666208, "learning_rate": 9.992676922066362e-06, "loss": 0.3939, "step": 1363 }, { "epoch": 0.04680851063829787, "grad_norm": 0.9236900294659164, "learning_rate": 9.992646824328398e-06, "loss": 0.3683, "step": 1364 }, { "epoch": 0.04684282772820865, "grad_norm": 0.9092978392123944, "learning_rate": 9.992616664912102e-06, "loss": 0.3725, "step": 1365 }, { "epoch": 0.04687714481811942, "grad_norm": 0.8195373413247141, "learning_rate": 9.99258644381785e-06, "loss": 0.3584, "step": 1366 }, { "epoch": 0.0469114619080302, "grad_norm": 0.8103472421063204, "learning_rate": 9.992556161046011e-06, "loss": 0.4063, "step": 1367 }, { "epoch": 0.04694577899794097, "grad_norm": 0.967993316032357, "learning_rate": 9.992525816596963e-06, "loss": 0.373, "step": 1368 }, { "epoch": 0.04698009608785175, "grad_norm": 0.9622189564449384, "learning_rate": 9.992495410471078e-06, "loss": 0.4299, "step": 1369 }, { "epoch": 0.047014413177762525, "grad_norm": 0.8899132244752426, "learning_rate": 9.992464942668733e-06, "loss": 0.4108, "step": 1370 }, { "epoch": 0.047048730267673304, "grad_norm": 0.9348691950477745, "learning_rate": 9.992434413190306e-06, "loss": 0.3589, "step": 1371 }, { "epoch": 0.047083047357584076, "grad_norm": 0.9585686491266876, "learning_rate": 9.992403822036169e-06, "loss": 0.3796, "step": 1372 }, { "epoch": 0.047117364447494856, "grad_norm": 0.9368979074710625, "learning_rate": 9.992373169206705e-06, "loss": 0.3823, "step": 1373 }, { "epoch": 0.04715168153740563, "grad_norm": 0.9313612875187411, "learning_rate": 9.992342454702293e-06, "loss": 0.4017, "step": 1374 }, { "epoch": 0.04718599862731641, "grad_norm": 0.8709715201731089, "learning_rate": 9.992311678523309e-06, "loss": 0.3552, "step": 1375 }, { "epoch": 0.04722031571722718, "grad_norm": 0.883060214587582, "learning_rate": 9.992280840670135e-06, "loss": 0.3869, "step": 1376 }, { "epoch": 0.04725463280713795, "grad_norm": 0.9410705183526904, "learning_rate": 9.99224994114315e-06, "loss": 0.4058, "step": 1377 }, { "epoch": 0.04728894989704873, "grad_norm": 0.9100497741105498, "learning_rate": 9.99221897994274e-06, "loss": 0.402, "step": 1378 }, { "epoch": 0.0473232669869595, "grad_norm": 0.8402995699439625, "learning_rate": 9.992187957069283e-06, "loss": 0.4003, "step": 1379 }, { "epoch": 0.04735758407687028, "grad_norm": 0.852730544715945, "learning_rate": 9.992156872523166e-06, "loss": 0.3539, "step": 1380 }, { "epoch": 0.047391901166781054, "grad_norm": 0.9638057343649272, "learning_rate": 9.99212572630477e-06, "loss": 0.3501, "step": 1381 }, { "epoch": 0.047426218256691834, "grad_norm": 0.9674489341164154, "learning_rate": 9.99209451841448e-06, "loss": 0.4343, "step": 1382 }, { "epoch": 0.047460535346602606, "grad_norm": 0.9242736309687433, "learning_rate": 9.992063248852684e-06, "loss": 0.441, "step": 1383 }, { "epoch": 0.047494852436513385, "grad_norm": 0.9682796640848359, "learning_rate": 9.992031917619764e-06, "loss": 0.3887, "step": 1384 }, { "epoch": 0.04752916952642416, "grad_norm": 0.8425290993720765, "learning_rate": 9.992000524716113e-06, "loss": 0.3608, "step": 1385 }, { "epoch": 0.047563486616334937, "grad_norm": 0.9157998199626967, "learning_rate": 9.991969070142113e-06, "loss": 0.3831, "step": 1386 }, { "epoch": 0.04759780370624571, "grad_norm": 1.0367991267497232, "learning_rate": 9.991937553898157e-06, "loss": 0.395, "step": 1387 }, { "epoch": 0.04763212079615649, "grad_norm": 0.8411266734598011, "learning_rate": 9.991905975984632e-06, "loss": 0.4278, "step": 1388 }, { "epoch": 0.04766643788606726, "grad_norm": 0.8791774786822515, "learning_rate": 9.99187433640193e-06, "loss": 0.3805, "step": 1389 }, { "epoch": 0.04770075497597804, "grad_norm": 0.8224250102828397, "learning_rate": 9.99184263515044e-06, "loss": 0.3701, "step": 1390 }, { "epoch": 0.04773507206588881, "grad_norm": 0.9628628504916494, "learning_rate": 9.991810872230552e-06, "loss": 0.3805, "step": 1391 }, { "epoch": 0.04776938915579959, "grad_norm": 0.8480929016381347, "learning_rate": 9.991779047642663e-06, "loss": 0.3531, "step": 1392 }, { "epoch": 0.04780370624571036, "grad_norm": 0.8790890644297987, "learning_rate": 9.991747161387163e-06, "loss": 0.3346, "step": 1393 }, { "epoch": 0.04783802333562114, "grad_norm": 0.8947480164548853, "learning_rate": 9.991715213464446e-06, "loss": 0.4004, "step": 1394 }, { "epoch": 0.047872340425531915, "grad_norm": 0.944049791317657, "learning_rate": 9.991683203874906e-06, "loss": 0.3547, "step": 1395 }, { "epoch": 0.047906657515442694, "grad_norm": 0.9247840586068403, "learning_rate": 9.991651132618943e-06, "loss": 0.3547, "step": 1396 }, { "epoch": 0.047940974605353466, "grad_norm": 0.957843174049681, "learning_rate": 9.991618999696948e-06, "loss": 0.3849, "step": 1397 }, { "epoch": 0.04797529169526424, "grad_norm": 0.9840539779548063, "learning_rate": 9.99158680510932e-06, "loss": 0.4104, "step": 1398 }, { "epoch": 0.04800960878517502, "grad_norm": 0.8875351914623104, "learning_rate": 9.991554548856457e-06, "loss": 0.4094, "step": 1399 }, { "epoch": 0.04804392587508579, "grad_norm": 0.9232310905892376, "learning_rate": 9.991522230938757e-06, "loss": 0.4471, "step": 1400 }, { "epoch": 0.04807824296499657, "grad_norm": 0.8619249113258812, "learning_rate": 9.99148985135662e-06, "loss": 0.38, "step": 1401 }, { "epoch": 0.04811256005490734, "grad_norm": 0.825234946849919, "learning_rate": 9.991457410110446e-06, "loss": 0.4248, "step": 1402 }, { "epoch": 0.04814687714481812, "grad_norm": 0.9497101615417617, "learning_rate": 9.991424907200633e-06, "loss": 0.3971, "step": 1403 }, { "epoch": 0.04818119423472889, "grad_norm": 0.8870435139834625, "learning_rate": 9.991392342627585e-06, "loss": 0.4738, "step": 1404 }, { "epoch": 0.04821551132463967, "grad_norm": 0.9040891444038834, "learning_rate": 9.991359716391705e-06, "loss": 0.4423, "step": 1405 }, { "epoch": 0.048249828414550444, "grad_norm": 0.9225210507719869, "learning_rate": 9.991327028493395e-06, "loss": 0.3991, "step": 1406 }, { "epoch": 0.04828414550446122, "grad_norm": 0.940312102662908, "learning_rate": 9.99129427893306e-06, "loss": 0.3901, "step": 1407 }, { "epoch": 0.048318462594371996, "grad_norm": 0.8508503265088401, "learning_rate": 9.991261467711102e-06, "loss": 0.3894, "step": 1408 }, { "epoch": 0.048352779684282775, "grad_norm": 0.922536677319517, "learning_rate": 9.991228594827927e-06, "loss": 0.415, "step": 1409 }, { "epoch": 0.04838709677419355, "grad_norm": 0.9688010101997923, "learning_rate": 9.991195660283942e-06, "loss": 0.4394, "step": 1410 }, { "epoch": 0.048421413864104326, "grad_norm": 0.9677533382093926, "learning_rate": 9.991162664079556e-06, "loss": 0.4274, "step": 1411 }, { "epoch": 0.0484557309540151, "grad_norm": 0.9266128403562846, "learning_rate": 9.991129606215173e-06, "loss": 0.4374, "step": 1412 }, { "epoch": 0.04849004804392588, "grad_norm": 0.9034753023786415, "learning_rate": 9.991096486691204e-06, "loss": 0.3953, "step": 1413 }, { "epoch": 0.04852436513383665, "grad_norm": 0.8593854413170562, "learning_rate": 9.991063305508056e-06, "loss": 0.3588, "step": 1414 }, { "epoch": 0.04855868222374743, "grad_norm": 0.868333821176706, "learning_rate": 9.991030062666139e-06, "loss": 0.3777, "step": 1415 }, { "epoch": 0.0485929993136582, "grad_norm": 0.8087042504598646, "learning_rate": 9.990996758165864e-06, "loss": 0.345, "step": 1416 }, { "epoch": 0.04862731640356898, "grad_norm": 0.9078619274128058, "learning_rate": 9.990963392007646e-06, "loss": 0.4046, "step": 1417 }, { "epoch": 0.04866163349347975, "grad_norm": 0.9107678221465391, "learning_rate": 9.99092996419189e-06, "loss": 0.4598, "step": 1418 }, { "epoch": 0.048695950583390525, "grad_norm": 0.8748011945181594, "learning_rate": 9.990896474719018e-06, "loss": 0.3614, "step": 1419 }, { "epoch": 0.048730267673301304, "grad_norm": 0.8658021357324361, "learning_rate": 9.990862923589436e-06, "loss": 0.4338, "step": 1420 }, { "epoch": 0.04876458476321208, "grad_norm": 0.963713185624495, "learning_rate": 9.990829310803563e-06, "loss": 0.3955, "step": 1421 }, { "epoch": 0.048798901853122856, "grad_norm": 0.9775832092546194, "learning_rate": 9.99079563636181e-06, "loss": 0.3811, "step": 1422 }, { "epoch": 0.04883321894303363, "grad_norm": 0.9317919211375868, "learning_rate": 9.990761900264597e-06, "loss": 0.3936, "step": 1423 }, { "epoch": 0.04886753603294441, "grad_norm": 1.1805826668262231, "learning_rate": 9.99072810251234e-06, "loss": 0.4163, "step": 1424 }, { "epoch": 0.04890185312285518, "grad_norm": 0.8473856774434579, "learning_rate": 9.990694243105455e-06, "loss": 0.3736, "step": 1425 }, { "epoch": 0.04893617021276596, "grad_norm": 0.8745310554358655, "learning_rate": 9.990660322044362e-06, "loss": 0.4356, "step": 1426 }, { "epoch": 0.04897048730267673, "grad_norm": 1.0955381252992786, "learning_rate": 9.99062633932948e-06, "loss": 0.4111, "step": 1427 }, { "epoch": 0.04900480439258751, "grad_norm": 0.908016298295898, "learning_rate": 9.990592294961225e-06, "loss": 0.3807, "step": 1428 }, { "epoch": 0.04903912148249828, "grad_norm": 0.9312863456880209, "learning_rate": 9.990558188940024e-06, "loss": 0.4338, "step": 1429 }, { "epoch": 0.04907343857240906, "grad_norm": 0.8616974490710233, "learning_rate": 9.990524021266293e-06, "loss": 0.4004, "step": 1430 }, { "epoch": 0.049107755662319834, "grad_norm": 1.0872493509604337, "learning_rate": 9.990489791940456e-06, "loss": 0.4293, "step": 1431 }, { "epoch": 0.04914207275223061, "grad_norm": 0.9491898431606747, "learning_rate": 9.990455500962938e-06, "loss": 0.3733, "step": 1432 }, { "epoch": 0.049176389842141385, "grad_norm": 1.1107658857605083, "learning_rate": 9.990421148334159e-06, "loss": 0.4682, "step": 1433 }, { "epoch": 0.049210706932052165, "grad_norm": 0.9526340426101776, "learning_rate": 9.990386734054544e-06, "loss": 0.3631, "step": 1434 }, { "epoch": 0.04924502402196294, "grad_norm": 0.8873251867426173, "learning_rate": 9.990352258124521e-06, "loss": 0.3765, "step": 1435 }, { "epoch": 0.049279341111873716, "grad_norm": 0.9173025584602741, "learning_rate": 9.990317720544514e-06, "loss": 0.41, "step": 1436 }, { "epoch": 0.04931365820178449, "grad_norm": 0.8178026805880542, "learning_rate": 9.99028312131495e-06, "loss": 0.3874, "step": 1437 }, { "epoch": 0.04934797529169527, "grad_norm": 0.9672603445896462, "learning_rate": 9.990248460436256e-06, "loss": 0.4162, "step": 1438 }, { "epoch": 0.04938229238160604, "grad_norm": 0.8998361852199526, "learning_rate": 9.990213737908858e-06, "loss": 0.4059, "step": 1439 }, { "epoch": 0.04941660947151681, "grad_norm": 0.8776533070976092, "learning_rate": 9.990178953733189e-06, "loss": 0.4164, "step": 1440 }, { "epoch": 0.04945092656142759, "grad_norm": 0.9203169112872865, "learning_rate": 9.990144107909676e-06, "loss": 0.4097, "step": 1441 }, { "epoch": 0.04948524365133836, "grad_norm": 0.8798847408963083, "learning_rate": 9.990109200438754e-06, "loss": 0.3912, "step": 1442 }, { "epoch": 0.04951956074124914, "grad_norm": 0.8809480826601724, "learning_rate": 9.990074231320847e-06, "loss": 0.4022, "step": 1443 }, { "epoch": 0.049553877831159915, "grad_norm": 0.8859968533965348, "learning_rate": 9.990039200556391e-06, "loss": 0.3455, "step": 1444 }, { "epoch": 0.049588194921070694, "grad_norm": 0.9348943025693408, "learning_rate": 9.990004108145821e-06, "loss": 0.3537, "step": 1445 }, { "epoch": 0.049622512010981466, "grad_norm": 0.8387160898820805, "learning_rate": 9.989968954089567e-06, "loss": 0.3452, "step": 1446 }, { "epoch": 0.049656829100892245, "grad_norm": 1.065417895551449, "learning_rate": 9.989933738388062e-06, "loss": 0.4291, "step": 1447 }, { "epoch": 0.04969114619080302, "grad_norm": 0.8432961226864891, "learning_rate": 9.989898461041744e-06, "loss": 0.3595, "step": 1448 }, { "epoch": 0.0497254632807138, "grad_norm": 0.9841545777148154, "learning_rate": 9.98986312205105e-06, "loss": 0.3836, "step": 1449 }, { "epoch": 0.04975978037062457, "grad_norm": 0.8200703272135375, "learning_rate": 9.989827721416414e-06, "loss": 0.3973, "step": 1450 }, { "epoch": 0.04979409746053535, "grad_norm": 0.8563130595828423, "learning_rate": 9.989792259138275e-06, "loss": 0.3522, "step": 1451 }, { "epoch": 0.04982841455044612, "grad_norm": 0.9753241180214114, "learning_rate": 9.989756735217068e-06, "loss": 0.4242, "step": 1452 }, { "epoch": 0.0498627316403569, "grad_norm": 1.0088084831203064, "learning_rate": 9.989721149653237e-06, "loss": 0.3872, "step": 1453 }, { "epoch": 0.04989704873026767, "grad_norm": 0.9111379744262358, "learning_rate": 9.989685502447216e-06, "loss": 0.3683, "step": 1454 }, { "epoch": 0.04993136582017845, "grad_norm": 0.8971339037029044, "learning_rate": 9.98964979359945e-06, "loss": 0.4024, "step": 1455 }, { "epoch": 0.049965682910089224, "grad_norm": 0.957949396399507, "learning_rate": 9.989614023110377e-06, "loss": 0.4503, "step": 1456 }, { "epoch": 0.05, "grad_norm": 0.9574454403572399, "learning_rate": 9.98957819098044e-06, "loss": 0.4186, "step": 1457 }, { "epoch": 0.050034317089910775, "grad_norm": 0.8989601159599647, "learning_rate": 9.989542297210081e-06, "loss": 0.4065, "step": 1458 }, { "epoch": 0.050068634179821554, "grad_norm": 0.8519413069478047, "learning_rate": 9.989506341799745e-06, "loss": 0.375, "step": 1459 }, { "epoch": 0.050102951269732326, "grad_norm": 0.899560037403041, "learning_rate": 9.989470324749875e-06, "loss": 0.3692, "step": 1460 }, { "epoch": 0.050137268359643106, "grad_norm": 0.8128861776820002, "learning_rate": 9.989434246060919e-06, "loss": 0.3575, "step": 1461 }, { "epoch": 0.05017158544955388, "grad_norm": 0.9251586064709127, "learning_rate": 9.989398105733316e-06, "loss": 0.4299, "step": 1462 }, { "epoch": 0.05020590253946465, "grad_norm": 0.840958146482039, "learning_rate": 9.989361903767519e-06, "loss": 0.4073, "step": 1463 }, { "epoch": 0.05024021962937543, "grad_norm": 0.9054427506618153, "learning_rate": 9.989325640163972e-06, "loss": 0.4114, "step": 1464 }, { "epoch": 0.0502745367192862, "grad_norm": 0.9446090511419084, "learning_rate": 9.989289314923122e-06, "loss": 0.3919, "step": 1465 }, { "epoch": 0.05030885380919698, "grad_norm": 0.9037391083849978, "learning_rate": 9.989252928045423e-06, "loss": 0.3828, "step": 1466 }, { "epoch": 0.05034317089910775, "grad_norm": 1.0287633302161423, "learning_rate": 9.989216479531318e-06, "loss": 0.4234, "step": 1467 }, { "epoch": 0.05037748798901853, "grad_norm": 0.8922965160576768, "learning_rate": 9.989179969381261e-06, "loss": 0.3982, "step": 1468 }, { "epoch": 0.050411805078929305, "grad_norm": 0.95044473246953, "learning_rate": 9.989143397595703e-06, "loss": 0.3776, "step": 1469 }, { "epoch": 0.050446122168840084, "grad_norm": 0.8359929430321646, "learning_rate": 9.989106764175095e-06, "loss": 0.3796, "step": 1470 }, { "epoch": 0.050480439258750856, "grad_norm": 0.9851619356664256, "learning_rate": 9.98907006911989e-06, "loss": 0.4088, "step": 1471 }, { "epoch": 0.050514756348661635, "grad_norm": 0.9800766835340873, "learning_rate": 9.989033312430541e-06, "loss": 0.4552, "step": 1472 }, { "epoch": 0.05054907343857241, "grad_norm": 0.9225207383294418, "learning_rate": 9.988996494107501e-06, "loss": 0.3734, "step": 1473 }, { "epoch": 0.05058339052848319, "grad_norm": 0.9589090158578085, "learning_rate": 9.988959614151227e-06, "loss": 0.3479, "step": 1474 }, { "epoch": 0.05061770761839396, "grad_norm": 1.1251806181088126, "learning_rate": 9.988922672562172e-06, "loss": 0.3666, "step": 1475 }, { "epoch": 0.05065202470830474, "grad_norm": 0.8526774549546423, "learning_rate": 9.988885669340794e-06, "loss": 0.4469, "step": 1476 }, { "epoch": 0.05068634179821551, "grad_norm": 1.035664002871205, "learning_rate": 9.988848604487551e-06, "loss": 0.4141, "step": 1477 }, { "epoch": 0.05072065888812629, "grad_norm": 1.1074573433582848, "learning_rate": 9.9888114780029e-06, "loss": 0.363, "step": 1478 }, { "epoch": 0.05075497597803706, "grad_norm": 0.8965453785786895, "learning_rate": 9.9887742898873e-06, "loss": 0.3881, "step": 1479 }, { "epoch": 0.05078929306794784, "grad_norm": 1.0171282544796094, "learning_rate": 9.988737040141209e-06, "loss": 0.4262, "step": 1480 }, { "epoch": 0.05082361015785861, "grad_norm": 0.9413619709611699, "learning_rate": 9.988699728765088e-06, "loss": 0.3819, "step": 1481 }, { "epoch": 0.05085792724776939, "grad_norm": 1.0096047239873538, "learning_rate": 9.988662355759399e-06, "loss": 0.3771, "step": 1482 }, { "epoch": 0.050892244337680165, "grad_norm": 0.9478562658529666, "learning_rate": 9.988624921124602e-06, "loss": 0.4468, "step": 1483 }, { "epoch": 0.05092656142759094, "grad_norm": 0.9695905051231453, "learning_rate": 9.988587424861161e-06, "loss": 0.3632, "step": 1484 }, { "epoch": 0.050960878517501716, "grad_norm": 0.9350415124508412, "learning_rate": 9.98854986696954e-06, "loss": 0.3483, "step": 1485 }, { "epoch": 0.05099519560741249, "grad_norm": 0.9117167991029513, "learning_rate": 9.988512247450197e-06, "loss": 0.402, "step": 1486 }, { "epoch": 0.05102951269732327, "grad_norm": 1.039052812935526, "learning_rate": 9.988474566303606e-06, "loss": 0.4509, "step": 1487 }, { "epoch": 0.05106382978723404, "grad_norm": 0.9183398211441123, "learning_rate": 9.988436823530225e-06, "loss": 0.3837, "step": 1488 }, { "epoch": 0.05109814687714482, "grad_norm": 0.844887586336918, "learning_rate": 9.988399019130523e-06, "loss": 0.3777, "step": 1489 }, { "epoch": 0.05113246396705559, "grad_norm": 0.9727022287371829, "learning_rate": 9.988361153104969e-06, "loss": 0.3929, "step": 1490 }, { "epoch": 0.05116678105696637, "grad_norm": 1.030911616652489, "learning_rate": 9.988323225454026e-06, "loss": 0.4039, "step": 1491 }, { "epoch": 0.05120109814687714, "grad_norm": 0.9186201722950016, "learning_rate": 9.988285236178167e-06, "loss": 0.41, "step": 1492 }, { "epoch": 0.05123541523678792, "grad_norm": 0.9384508826539909, "learning_rate": 9.98824718527786e-06, "loss": 0.4225, "step": 1493 }, { "epoch": 0.051269732326698694, "grad_norm": 0.859906171540128, "learning_rate": 9.988209072753574e-06, "loss": 0.3863, "step": 1494 }, { "epoch": 0.051304049416609473, "grad_norm": 0.8590850811390797, "learning_rate": 9.98817089860578e-06, "loss": 0.3879, "step": 1495 }, { "epoch": 0.051338366506520246, "grad_norm": 0.9849853520110111, "learning_rate": 9.988132662834951e-06, "loss": 0.4105, "step": 1496 }, { "epoch": 0.051372683596431025, "grad_norm": 0.9870527692571277, "learning_rate": 9.988094365441559e-06, "loss": 0.4147, "step": 1497 }, { "epoch": 0.0514070006863418, "grad_norm": 0.9618943429080067, "learning_rate": 9.988056006426076e-06, "loss": 0.5022, "step": 1498 }, { "epoch": 0.051441317776252576, "grad_norm": 1.002609619503651, "learning_rate": 9.988017585788979e-06, "loss": 0.4098, "step": 1499 }, { "epoch": 0.05147563486616335, "grad_norm": 0.9885730780561173, "learning_rate": 9.987979103530736e-06, "loss": 0.4028, "step": 1500 }, { "epoch": 0.05150995195607413, "grad_norm": 0.8632588667494423, "learning_rate": 9.98794055965183e-06, "loss": 0.4358, "step": 1501 }, { "epoch": 0.0515442690459849, "grad_norm": 0.9348741880653083, "learning_rate": 9.987901954152733e-06, "loss": 0.3539, "step": 1502 }, { "epoch": 0.05157858613589568, "grad_norm": 0.8926054104305216, "learning_rate": 9.987863287033921e-06, "loss": 0.356, "step": 1503 }, { "epoch": 0.05161290322580645, "grad_norm": 1.0257119643958341, "learning_rate": 9.987824558295874e-06, "loss": 0.3639, "step": 1504 }, { "epoch": 0.051647220315717224, "grad_norm": 0.9357293529294259, "learning_rate": 9.98778576793907e-06, "loss": 0.3944, "step": 1505 }, { "epoch": 0.051681537405628, "grad_norm": 0.9279377485886386, "learning_rate": 9.987746915963989e-06, "loss": 0.4098, "step": 1506 }, { "epoch": 0.051715854495538775, "grad_norm": 1.0203435724246575, "learning_rate": 9.987708002371109e-06, "loss": 0.3993, "step": 1507 }, { "epoch": 0.051750171585449554, "grad_norm": 0.959662780750312, "learning_rate": 9.98766902716091e-06, "loss": 0.4099, "step": 1508 }, { "epoch": 0.05178448867536033, "grad_norm": 0.9036391505996408, "learning_rate": 9.98762999033388e-06, "loss": 0.4061, "step": 1509 }, { "epoch": 0.051818805765271106, "grad_norm": 0.9865172637379743, "learning_rate": 9.987590891890492e-06, "loss": 0.3834, "step": 1510 }, { "epoch": 0.05185312285518188, "grad_norm": 0.9468650927722274, "learning_rate": 9.987551731831235e-06, "loss": 0.4233, "step": 1511 }, { "epoch": 0.05188743994509266, "grad_norm": 1.0226621330540822, "learning_rate": 9.987512510156592e-06, "loss": 0.4171, "step": 1512 }, { "epoch": 0.05192175703500343, "grad_norm": 1.008355513341404, "learning_rate": 9.987473226867046e-06, "loss": 0.3835, "step": 1513 }, { "epoch": 0.05195607412491421, "grad_norm": 0.9282089333588088, "learning_rate": 9.987433881963084e-06, "loss": 0.4488, "step": 1514 }, { "epoch": 0.05199039121482498, "grad_norm": 0.9020651208991098, "learning_rate": 9.987394475445191e-06, "loss": 0.3229, "step": 1515 }, { "epoch": 0.05202470830473576, "grad_norm": 1.0002591559988092, "learning_rate": 9.987355007313854e-06, "loss": 0.4156, "step": 1516 }, { "epoch": 0.05205902539464653, "grad_norm": 0.92639840605952, "learning_rate": 9.987315477569559e-06, "loss": 0.4782, "step": 1517 }, { "epoch": 0.05209334248455731, "grad_norm": 0.9281611405826606, "learning_rate": 9.987275886212797e-06, "loss": 0.3817, "step": 1518 }, { "epoch": 0.052127659574468084, "grad_norm": 0.9819205780192377, "learning_rate": 9.987236233244058e-06, "loss": 0.4841, "step": 1519 }, { "epoch": 0.05216197666437886, "grad_norm": 0.8902830114670611, "learning_rate": 9.987196518663826e-06, "loss": 0.4319, "step": 1520 }, { "epoch": 0.052196293754289635, "grad_norm": 0.8850456617133253, "learning_rate": 9.9871567424726e-06, "loss": 0.3944, "step": 1521 }, { "epoch": 0.052230610844200415, "grad_norm": 0.9945998226159531, "learning_rate": 9.987116904670864e-06, "loss": 0.3997, "step": 1522 }, { "epoch": 0.05226492793411119, "grad_norm": 1.0334204242580947, "learning_rate": 9.987077005259112e-06, "loss": 0.3906, "step": 1523 }, { "epoch": 0.052299245024021966, "grad_norm": 0.9993967013520486, "learning_rate": 9.98703704423784e-06, "loss": 0.429, "step": 1524 }, { "epoch": 0.05233356211393274, "grad_norm": 0.9823001103945724, "learning_rate": 9.986997021607539e-06, "loss": 0.4561, "step": 1525 }, { "epoch": 0.05236787920384351, "grad_norm": 0.9753379043153347, "learning_rate": 9.986956937368704e-06, "loss": 0.3908, "step": 1526 }, { "epoch": 0.05240219629375429, "grad_norm": 0.8374986305109375, "learning_rate": 9.98691679152183e-06, "loss": 0.3737, "step": 1527 }, { "epoch": 0.05243651338366506, "grad_norm": 0.9340229457236762, "learning_rate": 9.986876584067413e-06, "loss": 0.4053, "step": 1528 }, { "epoch": 0.05247083047357584, "grad_norm": 1.0630192310218192, "learning_rate": 9.986836315005952e-06, "loss": 0.4558, "step": 1529 }, { "epoch": 0.052505147563486614, "grad_norm": 0.8641554540737523, "learning_rate": 9.98679598433794e-06, "loss": 0.3642, "step": 1530 }, { "epoch": 0.05253946465339739, "grad_norm": 0.8908730566221825, "learning_rate": 9.98675559206388e-06, "loss": 0.4672, "step": 1531 }, { "epoch": 0.052573781743308165, "grad_norm": 0.9660588564342211, "learning_rate": 9.986715138184267e-06, "loss": 0.4089, "step": 1532 }, { "epoch": 0.052608098833218944, "grad_norm": 0.980222304628557, "learning_rate": 9.986674622699603e-06, "loss": 0.4182, "step": 1533 }, { "epoch": 0.052642415923129716, "grad_norm": 0.8929235936048739, "learning_rate": 9.986634045610388e-06, "loss": 0.3768, "step": 1534 }, { "epoch": 0.052676733013040496, "grad_norm": 1.02954118911814, "learning_rate": 9.986593406917123e-06, "loss": 0.4445, "step": 1535 }, { "epoch": 0.05271105010295127, "grad_norm": 0.9538489098590661, "learning_rate": 9.98655270662031e-06, "loss": 0.4125, "step": 1536 }, { "epoch": 0.05274536719286205, "grad_norm": 1.022892653020069, "learning_rate": 9.986511944720452e-06, "loss": 0.4269, "step": 1537 }, { "epoch": 0.05277968428277282, "grad_norm": 0.8837410501115681, "learning_rate": 9.986471121218054e-06, "loss": 0.3658, "step": 1538 }, { "epoch": 0.0528140013726836, "grad_norm": 0.909907419756363, "learning_rate": 9.986430236113616e-06, "loss": 0.3468, "step": 1539 }, { "epoch": 0.05284831846259437, "grad_norm": 1.0034349582917252, "learning_rate": 9.986389289407648e-06, "loss": 0.3869, "step": 1540 }, { "epoch": 0.05288263555250515, "grad_norm": 0.87770737378822, "learning_rate": 9.986348281100654e-06, "loss": 0.348, "step": 1541 }, { "epoch": 0.05291695264241592, "grad_norm": 1.045049445090399, "learning_rate": 9.986307211193141e-06, "loss": 0.4157, "step": 1542 }, { "epoch": 0.0529512697323267, "grad_norm": 0.9470811667594132, "learning_rate": 9.986266079685614e-06, "loss": 0.3839, "step": 1543 }, { "epoch": 0.052985586822237474, "grad_norm": 1.0050443537843523, "learning_rate": 9.986224886578585e-06, "loss": 0.3736, "step": 1544 }, { "epoch": 0.05301990391214825, "grad_norm": 1.0568924476951862, "learning_rate": 9.98618363187256e-06, "loss": 0.3933, "step": 1545 }, { "epoch": 0.053054221002059025, "grad_norm": 1.0195453072927427, "learning_rate": 9.98614231556805e-06, "loss": 0.4508, "step": 1546 }, { "epoch": 0.053088538091969804, "grad_norm": 1.015654970169575, "learning_rate": 9.986100937665563e-06, "loss": 0.4111, "step": 1547 }, { "epoch": 0.05312285518188058, "grad_norm": 0.9185174998635485, "learning_rate": 9.986059498165615e-06, "loss": 0.42, "step": 1548 }, { "epoch": 0.05315717227179135, "grad_norm": 0.8778622012281763, "learning_rate": 9.986017997068714e-06, "loss": 0.3797, "step": 1549 }, { "epoch": 0.05319148936170213, "grad_norm": 0.8718606005252711, "learning_rate": 9.985976434375373e-06, "loss": 0.3583, "step": 1550 }, { "epoch": 0.0532258064516129, "grad_norm": 0.9469129219738088, "learning_rate": 9.985934810086107e-06, "loss": 0.4325, "step": 1551 }, { "epoch": 0.05326012354152368, "grad_norm": 0.8906782778431865, "learning_rate": 9.985893124201428e-06, "loss": 0.3688, "step": 1552 }, { "epoch": 0.05329444063143445, "grad_norm": 0.9212744820320968, "learning_rate": 9.985851376721855e-06, "loss": 0.4118, "step": 1553 }, { "epoch": 0.05332875772134523, "grad_norm": 0.8893830132129127, "learning_rate": 9.9858095676479e-06, "loss": 0.4612, "step": 1554 }, { "epoch": 0.053363074811256, "grad_norm": 0.9260723615267925, "learning_rate": 9.98576769698008e-06, "loss": 0.4249, "step": 1555 }, { "epoch": 0.05339739190116678, "grad_norm": 0.9989563150850966, "learning_rate": 9.985725764718914e-06, "loss": 0.4184, "step": 1556 }, { "epoch": 0.053431708991077555, "grad_norm": 0.901838678938783, "learning_rate": 9.985683770864917e-06, "loss": 0.3418, "step": 1557 }, { "epoch": 0.053466026080988334, "grad_norm": 0.9566366500007986, "learning_rate": 9.985641715418612e-06, "loss": 0.3752, "step": 1558 }, { "epoch": 0.053500343170899106, "grad_norm": 0.8332680196471611, "learning_rate": 9.985599598380515e-06, "loss": 0.3444, "step": 1559 }, { "epoch": 0.053534660260809885, "grad_norm": 0.8854692988487175, "learning_rate": 9.985557419751148e-06, "loss": 0.3868, "step": 1560 }, { "epoch": 0.05356897735072066, "grad_norm": 0.9179884107623022, "learning_rate": 9.985515179531033e-06, "loss": 0.3807, "step": 1561 }, { "epoch": 0.05360329444063144, "grad_norm": 0.8343459428584296, "learning_rate": 9.98547287772069e-06, "loss": 0.3483, "step": 1562 }, { "epoch": 0.05363761153054221, "grad_norm": 0.8489172115101682, "learning_rate": 9.98543051432064e-06, "loss": 0.3916, "step": 1563 }, { "epoch": 0.05367192862045299, "grad_norm": 0.8578335319664773, "learning_rate": 9.98538808933141e-06, "loss": 0.399, "step": 1564 }, { "epoch": 0.05370624571036376, "grad_norm": 0.9229760980179228, "learning_rate": 9.985345602753523e-06, "loss": 0.4045, "step": 1565 }, { "epoch": 0.05374056280027454, "grad_norm": 0.9343243358748151, "learning_rate": 9.985303054587503e-06, "loss": 0.434, "step": 1566 }, { "epoch": 0.05377487989018531, "grad_norm": 0.9260315656514785, "learning_rate": 9.985260444833875e-06, "loss": 0.3889, "step": 1567 }, { "epoch": 0.05380919698009609, "grad_norm": 0.902697223518026, "learning_rate": 9.985217773493168e-06, "loss": 0.395, "step": 1568 }, { "epoch": 0.05384351407000686, "grad_norm": 0.937411756538958, "learning_rate": 9.985175040565906e-06, "loss": 0.39, "step": 1569 }, { "epoch": 0.053877831159917636, "grad_norm": 0.8877903733399978, "learning_rate": 9.98513224605262e-06, "loss": 0.4215, "step": 1570 }, { "epoch": 0.053912148249828415, "grad_norm": 1.0475876287564165, "learning_rate": 9.985089389953837e-06, "loss": 0.4356, "step": 1571 }, { "epoch": 0.05394646533973919, "grad_norm": 0.9103690728766332, "learning_rate": 9.985046472270085e-06, "loss": 0.3968, "step": 1572 }, { "epoch": 0.053980782429649966, "grad_norm": 0.9538511169750927, "learning_rate": 9.985003493001898e-06, "loss": 0.3657, "step": 1573 }, { "epoch": 0.05401509951956074, "grad_norm": 0.9047353011418088, "learning_rate": 9.984960452149803e-06, "loss": 0.3883, "step": 1574 }, { "epoch": 0.05404941660947152, "grad_norm": 0.9469961318993763, "learning_rate": 9.984917349714335e-06, "loss": 0.4129, "step": 1575 }, { "epoch": 0.05408373369938229, "grad_norm": 1.0232904034236283, "learning_rate": 9.984874185696025e-06, "loss": 0.4355, "step": 1576 }, { "epoch": 0.05411805078929307, "grad_norm": 0.9073739409158385, "learning_rate": 9.984830960095407e-06, "loss": 0.4015, "step": 1577 }, { "epoch": 0.05415236787920384, "grad_norm": 0.8153527741633778, "learning_rate": 9.984787672913012e-06, "loss": 0.3752, "step": 1578 }, { "epoch": 0.05418668496911462, "grad_norm": 0.9024352736275276, "learning_rate": 9.984744324149378e-06, "loss": 0.3879, "step": 1579 }, { "epoch": 0.05422100205902539, "grad_norm": 0.8902552305155801, "learning_rate": 9.984700913805038e-06, "loss": 0.3712, "step": 1580 }, { "epoch": 0.05425531914893617, "grad_norm": 0.9329721637172635, "learning_rate": 9.984657441880533e-06, "loss": 0.3982, "step": 1581 }, { "epoch": 0.054289636238846944, "grad_norm": 1.242689058365321, "learning_rate": 9.984613908376394e-06, "loss": 0.3954, "step": 1582 }, { "epoch": 0.054323953328757724, "grad_norm": 0.8981711072176011, "learning_rate": 9.984570313293163e-06, "loss": 0.4302, "step": 1583 }, { "epoch": 0.054358270418668496, "grad_norm": 0.9508626333113691, "learning_rate": 9.984526656631378e-06, "loss": 0.3983, "step": 1584 }, { "epoch": 0.054392587508579275, "grad_norm": 0.8662803974970479, "learning_rate": 9.984482938391576e-06, "loss": 0.3756, "step": 1585 }, { "epoch": 0.05442690459849005, "grad_norm": 0.8562367438063275, "learning_rate": 9.984439158574299e-06, "loss": 0.3463, "step": 1586 }, { "epoch": 0.05446122168840083, "grad_norm": 0.8867577837085483, "learning_rate": 9.984395317180088e-06, "loss": 0.4242, "step": 1587 }, { "epoch": 0.0544955387783116, "grad_norm": 0.9236430332898099, "learning_rate": 9.984351414209484e-06, "loss": 0.4347, "step": 1588 }, { "epoch": 0.05452985586822238, "grad_norm": 1.0060912931481767, "learning_rate": 9.984307449663028e-06, "loss": 0.4173, "step": 1589 }, { "epoch": 0.05456417295813315, "grad_norm": 0.9753897294856473, "learning_rate": 9.984263423541265e-06, "loss": 0.4246, "step": 1590 }, { "epoch": 0.05459849004804392, "grad_norm": 0.8718430406801522, "learning_rate": 9.98421933584474e-06, "loss": 0.3888, "step": 1591 }, { "epoch": 0.0546328071379547, "grad_norm": 0.9401179973707751, "learning_rate": 9.984175186573997e-06, "loss": 0.4069, "step": 1592 }, { "epoch": 0.054667124227865474, "grad_norm": 0.7865895335664878, "learning_rate": 9.984130975729578e-06, "loss": 0.3887, "step": 1593 }, { "epoch": 0.05470144131777625, "grad_norm": 0.8134944210666338, "learning_rate": 9.984086703312032e-06, "loss": 0.3491, "step": 1594 }, { "epoch": 0.054735758407687025, "grad_norm": 0.9445581377946629, "learning_rate": 9.984042369321908e-06, "loss": 0.4282, "step": 1595 }, { "epoch": 0.054770075497597805, "grad_norm": 1.0479271798505037, "learning_rate": 9.983997973759749e-06, "loss": 0.3949, "step": 1596 }, { "epoch": 0.05480439258750858, "grad_norm": 0.9232533285789112, "learning_rate": 9.983953516626106e-06, "loss": 0.3913, "step": 1597 }, { "epoch": 0.054838709677419356, "grad_norm": 1.0421563972658687, "learning_rate": 9.98390899792153e-06, "loss": 0.4517, "step": 1598 }, { "epoch": 0.05487302676733013, "grad_norm": 1.0155038726253234, "learning_rate": 9.983864417646567e-06, "loss": 0.3444, "step": 1599 }, { "epoch": 0.05490734385724091, "grad_norm": 0.912690364176926, "learning_rate": 9.98381977580177e-06, "loss": 0.4418, "step": 1600 }, { "epoch": 0.05494166094715168, "grad_norm": 0.8755328092948635, "learning_rate": 9.98377507238769e-06, "loss": 0.4577, "step": 1601 }, { "epoch": 0.05497597803706246, "grad_norm": 0.9325951496892272, "learning_rate": 9.983730307404881e-06, "loss": 0.3825, "step": 1602 }, { "epoch": 0.05501029512697323, "grad_norm": 0.9761268607373378, "learning_rate": 9.983685480853894e-06, "loss": 0.4779, "step": 1603 }, { "epoch": 0.05504461221688401, "grad_norm": 0.8768534191541116, "learning_rate": 9.983640592735282e-06, "loss": 0.3585, "step": 1604 }, { "epoch": 0.05507892930679478, "grad_norm": 0.9980695140010507, "learning_rate": 9.983595643049602e-06, "loss": 0.4168, "step": 1605 }, { "epoch": 0.05511324639670556, "grad_norm": 0.9118484650521302, "learning_rate": 9.983550631797407e-06, "loss": 0.3656, "step": 1606 }, { "epoch": 0.055147563486616334, "grad_norm": 0.8168929352183816, "learning_rate": 9.983505558979257e-06, "loss": 0.3723, "step": 1607 }, { "epoch": 0.05518188057652711, "grad_norm": 0.8132290830333336, "learning_rate": 9.983460424595703e-06, "loss": 0.3393, "step": 1608 }, { "epoch": 0.055216197666437886, "grad_norm": 0.7874287711213904, "learning_rate": 9.983415228647306e-06, "loss": 0.3509, "step": 1609 }, { "epoch": 0.055250514756348665, "grad_norm": 0.9327799083688271, "learning_rate": 9.983369971134626e-06, "loss": 0.4263, "step": 1610 }, { "epoch": 0.05528483184625944, "grad_norm": 0.8312153255599188, "learning_rate": 9.983324652058218e-06, "loss": 0.3981, "step": 1611 }, { "epoch": 0.05531914893617021, "grad_norm": 0.9327177268159285, "learning_rate": 9.983279271418644e-06, "loss": 0.4, "step": 1612 }, { "epoch": 0.05535346602608099, "grad_norm": 0.9001289085668653, "learning_rate": 9.983233829216466e-06, "loss": 0.3426, "step": 1613 }, { "epoch": 0.05538778311599176, "grad_norm": 0.8587711234689902, "learning_rate": 9.983188325452244e-06, "loss": 0.3936, "step": 1614 }, { "epoch": 0.05542210020590254, "grad_norm": 0.9313659094034138, "learning_rate": 9.983142760126538e-06, "loss": 0.3977, "step": 1615 }, { "epoch": 0.05545641729581331, "grad_norm": 0.8733349517071122, "learning_rate": 9.983097133239913e-06, "loss": 0.345, "step": 1616 }, { "epoch": 0.05549073438572409, "grad_norm": 0.9215153846673391, "learning_rate": 9.983051444792935e-06, "loss": 0.3788, "step": 1617 }, { "epoch": 0.055525051475634864, "grad_norm": 0.8930465401522235, "learning_rate": 9.983005694786166e-06, "loss": 0.3869, "step": 1618 }, { "epoch": 0.05555936856554564, "grad_norm": 0.8632284912088445, "learning_rate": 9.982959883220169e-06, "loss": 0.3893, "step": 1619 }, { "epoch": 0.055593685655456415, "grad_norm": 0.9417760230992946, "learning_rate": 9.982914010095514e-06, "loss": 0.3726, "step": 1620 }, { "epoch": 0.055628002745367194, "grad_norm": 0.9816249189738917, "learning_rate": 9.982868075412764e-06, "loss": 0.4124, "step": 1621 }, { "epoch": 0.05566231983527797, "grad_norm": 0.8205472053707811, "learning_rate": 9.98282207917249e-06, "loss": 0.3729, "step": 1622 }, { "epoch": 0.055696636925188746, "grad_norm": 0.9195117609054322, "learning_rate": 9.982776021375259e-06, "loss": 0.4259, "step": 1623 }, { "epoch": 0.05573095401509952, "grad_norm": 0.9413741148428405, "learning_rate": 9.98272990202164e-06, "loss": 0.4041, "step": 1624 }, { "epoch": 0.0557652711050103, "grad_norm": 0.8910381050252907, "learning_rate": 9.982683721112201e-06, "loss": 0.3893, "step": 1625 }, { "epoch": 0.05579958819492107, "grad_norm": 0.8208521783482754, "learning_rate": 9.982637478647513e-06, "loss": 0.3987, "step": 1626 }, { "epoch": 0.05583390528483185, "grad_norm": 0.9855833481240575, "learning_rate": 9.98259117462815e-06, "loss": 0.4152, "step": 1627 }, { "epoch": 0.05586822237474262, "grad_norm": 0.8199487596094588, "learning_rate": 9.98254480905468e-06, "loss": 0.3782, "step": 1628 }, { "epoch": 0.0559025394646534, "grad_norm": 0.8549263890305617, "learning_rate": 9.98249838192768e-06, "loss": 0.3928, "step": 1629 }, { "epoch": 0.05593685655456417, "grad_norm": 0.8790114103028127, "learning_rate": 9.982451893247721e-06, "loss": 0.4104, "step": 1630 }, { "epoch": 0.05597117364447495, "grad_norm": 0.8640860610724959, "learning_rate": 9.982405343015377e-06, "loss": 0.3743, "step": 1631 }, { "epoch": 0.056005490734385724, "grad_norm": 0.965897099524108, "learning_rate": 9.982358731231225e-06, "loss": 0.3789, "step": 1632 }, { "epoch": 0.0560398078242965, "grad_norm": 1.082493112701991, "learning_rate": 9.982312057895839e-06, "loss": 0.3939, "step": 1633 }, { "epoch": 0.056074124914207275, "grad_norm": 0.8781223607276012, "learning_rate": 9.982265323009797e-06, "loss": 0.4013, "step": 1634 }, { "epoch": 0.05610844200411805, "grad_norm": 0.9050522204888946, "learning_rate": 9.982218526573676e-06, "loss": 0.3818, "step": 1635 }, { "epoch": 0.05614275909402883, "grad_norm": 0.9684608403097945, "learning_rate": 9.982171668588054e-06, "loss": 0.3913, "step": 1636 }, { "epoch": 0.0561770761839396, "grad_norm": 1.0441932478353506, "learning_rate": 9.982124749053508e-06, "loss": 0.3926, "step": 1637 }, { "epoch": 0.05621139327385038, "grad_norm": 0.8705987231983617, "learning_rate": 9.98207776797062e-06, "loss": 0.3895, "step": 1638 }, { "epoch": 0.05624571036376115, "grad_norm": 0.9593807141163201, "learning_rate": 9.98203072533997e-06, "loss": 0.4312, "step": 1639 }, { "epoch": 0.05628002745367193, "grad_norm": 0.9613844802887577, "learning_rate": 9.981983621162139e-06, "loss": 0.3798, "step": 1640 }, { "epoch": 0.0563143445435827, "grad_norm": 0.9673668925711528, "learning_rate": 9.981936455437708e-06, "loss": 0.3541, "step": 1641 }, { "epoch": 0.05634866163349348, "grad_norm": 0.846563676644521, "learning_rate": 9.981889228167262e-06, "loss": 0.3843, "step": 1642 }, { "epoch": 0.05638297872340425, "grad_norm": 0.8424852111453375, "learning_rate": 9.981841939351382e-06, "loss": 0.3513, "step": 1643 }, { "epoch": 0.05641729581331503, "grad_norm": 1.0222982744518427, "learning_rate": 9.981794588990653e-06, "loss": 0.4394, "step": 1644 }, { "epoch": 0.056451612903225805, "grad_norm": 0.8541467438251471, "learning_rate": 9.98174717708566e-06, "loss": 0.4094, "step": 1645 }, { "epoch": 0.056485929993136584, "grad_norm": 0.8907654834547813, "learning_rate": 9.981699703636988e-06, "loss": 0.367, "step": 1646 }, { "epoch": 0.056520247083047356, "grad_norm": 1.1193010859963615, "learning_rate": 9.981652168645225e-06, "loss": 0.4042, "step": 1647 }, { "epoch": 0.056554564172958136, "grad_norm": 1.0024080849190498, "learning_rate": 9.981604572110959e-06, "loss": 0.4024, "step": 1648 }, { "epoch": 0.05658888126286891, "grad_norm": 0.8781904472427698, "learning_rate": 9.981556914034775e-06, "loss": 0.3965, "step": 1649 }, { "epoch": 0.05662319835277969, "grad_norm": 0.9748276564237055, "learning_rate": 9.981509194417264e-06, "loss": 0.3478, "step": 1650 }, { "epoch": 0.05665751544269046, "grad_norm": 0.9452420017612008, "learning_rate": 9.981461413259014e-06, "loss": 0.4305, "step": 1651 }, { "epoch": 0.05669183253260124, "grad_norm": 0.853796185007251, "learning_rate": 9.981413570560617e-06, "loss": 0.3921, "step": 1652 }, { "epoch": 0.05672614962251201, "grad_norm": 0.8745434411609104, "learning_rate": 9.981365666322662e-06, "loss": 0.3518, "step": 1653 }, { "epoch": 0.05676046671242279, "grad_norm": 0.9922375414031969, "learning_rate": 9.981317700545742e-06, "loss": 0.4037, "step": 1654 }, { "epoch": 0.05679478380233356, "grad_norm": 0.8477735041798443, "learning_rate": 9.981269673230449e-06, "loss": 0.3772, "step": 1655 }, { "epoch": 0.056829100892244334, "grad_norm": 0.8170527406184992, "learning_rate": 9.98122158437738e-06, "loss": 0.3387, "step": 1656 }, { "epoch": 0.056863417982155114, "grad_norm": 0.8186759786314541, "learning_rate": 9.981173433987122e-06, "loss": 0.384, "step": 1657 }, { "epoch": 0.056897735072065886, "grad_norm": 0.9370228568052534, "learning_rate": 9.981125222060275e-06, "loss": 0.3487, "step": 1658 }, { "epoch": 0.056932052161976665, "grad_norm": 0.8906612213897197, "learning_rate": 9.981076948597432e-06, "loss": 0.3657, "step": 1659 }, { "epoch": 0.05696636925188744, "grad_norm": 0.8431174695592812, "learning_rate": 9.981028613599193e-06, "loss": 0.4115, "step": 1660 }, { "epoch": 0.057000686341798217, "grad_norm": 0.8985450257410104, "learning_rate": 9.980980217066152e-06, "loss": 0.4134, "step": 1661 }, { "epoch": 0.05703500343170899, "grad_norm": 0.8913819215464539, "learning_rate": 9.980931758998907e-06, "loss": 0.4943, "step": 1662 }, { "epoch": 0.05706932052161977, "grad_norm": 0.8720208163887697, "learning_rate": 9.980883239398058e-06, "loss": 0.413, "step": 1663 }, { "epoch": 0.05710363761153054, "grad_norm": 0.8727859854694314, "learning_rate": 9.980834658264205e-06, "loss": 0.362, "step": 1664 }, { "epoch": 0.05713795470144132, "grad_norm": 0.8506082923533476, "learning_rate": 9.980786015597945e-06, "loss": 0.3665, "step": 1665 }, { "epoch": 0.05717227179135209, "grad_norm": 0.8462126244923047, "learning_rate": 9.980737311399881e-06, "loss": 0.3483, "step": 1666 }, { "epoch": 0.05720658888126287, "grad_norm": 0.8903599831867463, "learning_rate": 9.980688545670615e-06, "loss": 0.3932, "step": 1667 }, { "epoch": 0.05724090597117364, "grad_norm": 0.913471838348564, "learning_rate": 9.98063971841075e-06, "loss": 0.3627, "step": 1668 }, { "epoch": 0.05727522306108442, "grad_norm": 0.9533756078312233, "learning_rate": 9.980590829620886e-06, "loss": 0.3964, "step": 1669 }, { "epoch": 0.057309540150995195, "grad_norm": 0.9256779677045888, "learning_rate": 9.980541879301629e-06, "loss": 0.464, "step": 1670 }, { "epoch": 0.057343857240905974, "grad_norm": 0.9836523884484613, "learning_rate": 9.980492867453585e-06, "loss": 0.3769, "step": 1671 }, { "epoch": 0.057378174330816746, "grad_norm": 0.859462863269452, "learning_rate": 9.980443794077358e-06, "loss": 0.4431, "step": 1672 }, { "epoch": 0.057412491420727525, "grad_norm": 0.970249181774676, "learning_rate": 9.980394659173556e-06, "loss": 0.4117, "step": 1673 }, { "epoch": 0.0574468085106383, "grad_norm": 0.9322336244500217, "learning_rate": 9.980345462742783e-06, "loss": 0.405, "step": 1674 }, { "epoch": 0.05748112560054908, "grad_norm": 0.8304053744073092, "learning_rate": 9.980296204785647e-06, "loss": 0.3412, "step": 1675 }, { "epoch": 0.05751544269045985, "grad_norm": 0.8572792016640942, "learning_rate": 9.98024688530276e-06, "loss": 0.3848, "step": 1676 }, { "epoch": 0.05754975978037062, "grad_norm": 0.8882676410143079, "learning_rate": 9.98019750429473e-06, "loss": 0.4113, "step": 1677 }, { "epoch": 0.0575840768702814, "grad_norm": 0.92247240433865, "learning_rate": 9.980148061762162e-06, "loss": 0.3815, "step": 1678 }, { "epoch": 0.05761839396019217, "grad_norm": 0.8220358039861589, "learning_rate": 9.980098557705675e-06, "loss": 0.3939, "step": 1679 }, { "epoch": 0.05765271105010295, "grad_norm": 0.827831926133414, "learning_rate": 9.980048992125875e-06, "loss": 0.3655, "step": 1680 }, { "epoch": 0.057687028140013724, "grad_norm": 0.8613971412034268, "learning_rate": 9.979999365023377e-06, "loss": 0.4356, "step": 1681 }, { "epoch": 0.0577213452299245, "grad_norm": 0.8058798920798226, "learning_rate": 9.979949676398792e-06, "loss": 0.3546, "step": 1682 }, { "epoch": 0.057755662319835276, "grad_norm": 0.9656232704536295, "learning_rate": 9.979899926252733e-06, "loss": 0.3767, "step": 1683 }, { "epoch": 0.057789979409746055, "grad_norm": 0.9169285420861162, "learning_rate": 9.97985011458582e-06, "loss": 0.4404, "step": 1684 }, { "epoch": 0.05782429649965683, "grad_norm": 0.8386828509233026, "learning_rate": 9.979800241398663e-06, "loss": 0.3797, "step": 1685 }, { "epoch": 0.057858613589567606, "grad_norm": 0.858540957780553, "learning_rate": 9.97975030669188e-06, "loss": 0.386, "step": 1686 }, { "epoch": 0.05789293067947838, "grad_norm": 0.9886983813616154, "learning_rate": 9.979700310466087e-06, "loss": 0.4357, "step": 1687 }, { "epoch": 0.05792724776938916, "grad_norm": 0.8916794139438206, "learning_rate": 9.979650252721904e-06, "loss": 0.3487, "step": 1688 }, { "epoch": 0.05796156485929993, "grad_norm": 0.9278955805763281, "learning_rate": 9.979600133459948e-06, "loss": 0.4248, "step": 1689 }, { "epoch": 0.05799588194921071, "grad_norm": 0.9030705629622418, "learning_rate": 9.979549952680836e-06, "loss": 0.3866, "step": 1690 }, { "epoch": 0.05803019903912148, "grad_norm": 0.8961260694685549, "learning_rate": 9.979499710385192e-06, "loss": 0.3809, "step": 1691 }, { "epoch": 0.05806451612903226, "grad_norm": 0.9166373164746215, "learning_rate": 9.979449406573633e-06, "loss": 0.375, "step": 1692 }, { "epoch": 0.05809883321894303, "grad_norm": 0.8534494453507867, "learning_rate": 9.979399041246783e-06, "loss": 0.3396, "step": 1693 }, { "epoch": 0.05813315030885381, "grad_norm": 0.8982284130527395, "learning_rate": 9.979348614405263e-06, "loss": 0.3891, "step": 1694 }, { "epoch": 0.058167467398764584, "grad_norm": 0.974871816033078, "learning_rate": 9.979298126049696e-06, "loss": 0.4148, "step": 1695 }, { "epoch": 0.058201784488675364, "grad_norm": 0.9672473084704549, "learning_rate": 9.979247576180706e-06, "loss": 0.3603, "step": 1696 }, { "epoch": 0.058236101578586136, "grad_norm": 0.8807674521963013, "learning_rate": 9.979196964798918e-06, "loss": 0.3613, "step": 1697 }, { "epoch": 0.05827041866849691, "grad_norm": 0.9352605848073314, "learning_rate": 9.979146291904955e-06, "loss": 0.4341, "step": 1698 }, { "epoch": 0.05830473575840769, "grad_norm": 0.8937117035491897, "learning_rate": 9.979095557499446e-06, "loss": 0.3441, "step": 1699 }, { "epoch": 0.05833905284831846, "grad_norm": 0.8554490828796043, "learning_rate": 9.979044761583016e-06, "loss": 0.3659, "step": 1700 }, { "epoch": 0.05837336993822924, "grad_norm": 0.9285134968171538, "learning_rate": 9.978993904156295e-06, "loss": 0.3862, "step": 1701 }, { "epoch": 0.05840768702814001, "grad_norm": 0.9419642307477631, "learning_rate": 9.978942985219907e-06, "loss": 0.438, "step": 1702 }, { "epoch": 0.05844200411805079, "grad_norm": 0.889097688442848, "learning_rate": 9.978892004774483e-06, "loss": 0.336, "step": 1703 }, { "epoch": 0.05847632120796156, "grad_norm": 0.8465355279010772, "learning_rate": 9.978840962820655e-06, "loss": 0.3612, "step": 1704 }, { "epoch": 0.05851063829787234, "grad_norm": 0.9672848374035192, "learning_rate": 9.97878985935905e-06, "loss": 0.4068, "step": 1705 }, { "epoch": 0.058544955387783114, "grad_norm": 1.0003775222206222, "learning_rate": 9.978738694390302e-06, "loss": 0.3959, "step": 1706 }, { "epoch": 0.05857927247769389, "grad_norm": 0.9614913860115303, "learning_rate": 9.978687467915043e-06, "loss": 0.3725, "step": 1707 }, { "epoch": 0.058613589567604665, "grad_norm": 0.8809146541983791, "learning_rate": 9.978636179933903e-06, "loss": 0.4372, "step": 1708 }, { "epoch": 0.058647906657515445, "grad_norm": 0.8685891277507131, "learning_rate": 9.978584830447518e-06, "loss": 0.3876, "step": 1709 }, { "epoch": 0.05868222374742622, "grad_norm": 0.9223940011966325, "learning_rate": 9.978533419456522e-06, "loss": 0.3276, "step": 1710 }, { "epoch": 0.058716540837336996, "grad_norm": 1.059136312218711, "learning_rate": 9.978481946961548e-06, "loss": 0.3982, "step": 1711 }, { "epoch": 0.05875085792724777, "grad_norm": 0.9442992348850897, "learning_rate": 9.978430412963239e-06, "loss": 0.418, "step": 1712 }, { "epoch": 0.05878517501715855, "grad_norm": 0.9722164924811942, "learning_rate": 9.978378817462221e-06, "loss": 0.3398, "step": 1713 }, { "epoch": 0.05881949210706932, "grad_norm": 0.9618918528611493, "learning_rate": 9.978327160459141e-06, "loss": 0.3829, "step": 1714 }, { "epoch": 0.0588538091969801, "grad_norm": 0.9262463476645618, "learning_rate": 9.978275441954632e-06, "loss": 0.3807, "step": 1715 }, { "epoch": 0.05888812628689087, "grad_norm": 0.8682090994265003, "learning_rate": 9.978223661949333e-06, "loss": 0.3561, "step": 1716 }, { "epoch": 0.05892244337680165, "grad_norm": 0.9037179531672123, "learning_rate": 9.978171820443885e-06, "loss": 0.3928, "step": 1717 }, { "epoch": 0.05895676046671242, "grad_norm": 0.8527490202770619, "learning_rate": 9.97811991743893e-06, "loss": 0.3851, "step": 1718 }, { "epoch": 0.0589910775566232, "grad_norm": 0.869960142393549, "learning_rate": 9.978067952935105e-06, "loss": 0.4036, "step": 1719 }, { "epoch": 0.059025394646533974, "grad_norm": 1.1285493072181876, "learning_rate": 9.978015926933053e-06, "loss": 0.3694, "step": 1720 }, { "epoch": 0.059059711736444746, "grad_norm": 0.8334028743238204, "learning_rate": 9.97796383943342e-06, "loss": 0.3872, "step": 1721 }, { "epoch": 0.059094028826355526, "grad_norm": 0.8614332639869025, "learning_rate": 9.977911690436848e-06, "loss": 0.4037, "step": 1722 }, { "epoch": 0.0591283459162663, "grad_norm": 0.7990427497327, "learning_rate": 9.977859479943982e-06, "loss": 0.3793, "step": 1723 }, { "epoch": 0.05916266300617708, "grad_norm": 0.8689672845407431, "learning_rate": 9.977807207955462e-06, "loss": 0.3751, "step": 1724 }, { "epoch": 0.05919698009608785, "grad_norm": 0.912777655183617, "learning_rate": 9.977754874471941e-06, "loss": 0.4077, "step": 1725 }, { "epoch": 0.05923129718599863, "grad_norm": 0.9826029192695662, "learning_rate": 9.97770247949406e-06, "loss": 0.4167, "step": 1726 }, { "epoch": 0.0592656142759094, "grad_norm": 0.8525240272530378, "learning_rate": 9.97765002302247e-06, "loss": 0.3277, "step": 1727 }, { "epoch": 0.05929993136582018, "grad_norm": 0.8367894887798529, "learning_rate": 9.977597505057816e-06, "loss": 0.409, "step": 1728 }, { "epoch": 0.05933424845573095, "grad_norm": 0.8851858552311469, "learning_rate": 9.977544925600748e-06, "loss": 0.4408, "step": 1729 }, { "epoch": 0.05936856554564173, "grad_norm": 0.860119738814464, "learning_rate": 9.977492284651916e-06, "loss": 0.3757, "step": 1730 }, { "epoch": 0.059402882635552504, "grad_norm": 1.0958926231276194, "learning_rate": 9.97743958221197e-06, "loss": 0.4107, "step": 1731 }, { "epoch": 0.05943719972546328, "grad_norm": 0.8644978358596882, "learning_rate": 9.977386818281561e-06, "loss": 0.3697, "step": 1732 }, { "epoch": 0.059471516815374055, "grad_norm": 0.9361167919534926, "learning_rate": 9.977333992861342e-06, "loss": 0.4094, "step": 1733 }, { "epoch": 0.059505833905284834, "grad_norm": 1.0177542387303538, "learning_rate": 9.977281105951963e-06, "loss": 0.4007, "step": 1734 }, { "epoch": 0.059540150995195606, "grad_norm": 0.9014028354687431, "learning_rate": 9.97722815755408e-06, "loss": 0.3553, "step": 1735 }, { "epoch": 0.059574468085106386, "grad_norm": 0.8895682010847491, "learning_rate": 9.977175147668346e-06, "loss": 0.3493, "step": 1736 }, { "epoch": 0.05960878517501716, "grad_norm": 0.8416651402704289, "learning_rate": 9.977122076295415e-06, "loss": 0.3544, "step": 1737 }, { "epoch": 0.05964310226492794, "grad_norm": 0.8247791225461875, "learning_rate": 9.977068943435944e-06, "loss": 0.341, "step": 1738 }, { "epoch": 0.05967741935483871, "grad_norm": 0.9004343146070962, "learning_rate": 9.977015749090589e-06, "loss": 0.3408, "step": 1739 }, { "epoch": 0.05971173644474949, "grad_norm": 0.9243419047570907, "learning_rate": 9.976962493260007e-06, "loss": 0.3956, "step": 1740 }, { "epoch": 0.05974605353466026, "grad_norm": 0.8889719099463661, "learning_rate": 9.976909175944856e-06, "loss": 0.3884, "step": 1741 }, { "epoch": 0.05978037062457103, "grad_norm": 0.9513127181142379, "learning_rate": 9.976855797145793e-06, "loss": 0.4331, "step": 1742 }, { "epoch": 0.05981468771448181, "grad_norm": 0.9225794732213223, "learning_rate": 9.976802356863483e-06, "loss": 0.4676, "step": 1743 }, { "epoch": 0.059849004804392585, "grad_norm": 1.0118843467304075, "learning_rate": 9.976748855098578e-06, "loss": 0.3655, "step": 1744 }, { "epoch": 0.059883321894303364, "grad_norm": 0.8406226354356019, "learning_rate": 9.976695291851746e-06, "loss": 0.3484, "step": 1745 }, { "epoch": 0.059917638984214136, "grad_norm": 1.0901529453970193, "learning_rate": 9.976641667123645e-06, "loss": 0.3786, "step": 1746 }, { "epoch": 0.059951956074124915, "grad_norm": 0.8690072307418457, "learning_rate": 9.976587980914937e-06, "loss": 0.3316, "step": 1747 }, { "epoch": 0.05998627316403569, "grad_norm": 0.9111185962365086, "learning_rate": 9.97653423322629e-06, "loss": 0.3742, "step": 1748 }, { "epoch": 0.06002059025394647, "grad_norm": 0.8775710236072355, "learning_rate": 9.976480424058362e-06, "loss": 0.3535, "step": 1749 }, { "epoch": 0.06005490734385724, "grad_norm": 0.8505440272962523, "learning_rate": 9.97642655341182e-06, "loss": 0.425, "step": 1750 }, { "epoch": 0.06008922443376802, "grad_norm": 0.9971580044523222, "learning_rate": 9.976372621287332e-06, "loss": 0.3581, "step": 1751 }, { "epoch": 0.06012354152367879, "grad_norm": 1.0005735697394567, "learning_rate": 9.97631862768556e-06, "loss": 0.4181, "step": 1752 }, { "epoch": 0.06015785861358957, "grad_norm": 0.9158047004384732, "learning_rate": 9.976264572607176e-06, "loss": 0.3867, "step": 1753 }, { "epoch": 0.06019217570350034, "grad_norm": 0.8810658982158647, "learning_rate": 9.976210456052842e-06, "loss": 0.4014, "step": 1754 }, { "epoch": 0.06022649279341112, "grad_norm": 1.014533703456878, "learning_rate": 9.97615627802323e-06, "loss": 0.3631, "step": 1755 }, { "epoch": 0.06026080988332189, "grad_norm": 0.9669221076496232, "learning_rate": 9.97610203851901e-06, "loss": 0.3817, "step": 1756 }, { "epoch": 0.06029512697323267, "grad_norm": 0.8203009449347716, "learning_rate": 9.97604773754085e-06, "loss": 0.3709, "step": 1757 }, { "epoch": 0.060329444063143445, "grad_norm": 0.9992077370852004, "learning_rate": 9.975993375089423e-06, "loss": 0.3875, "step": 1758 }, { "epoch": 0.060363761153054224, "grad_norm": 0.8422672293436305, "learning_rate": 9.975938951165398e-06, "loss": 0.4234, "step": 1759 }, { "epoch": 0.060398078242964996, "grad_norm": 0.8244268241532051, "learning_rate": 9.97588446576945e-06, "loss": 0.3943, "step": 1760 }, { "epoch": 0.060432395332875775, "grad_norm": 0.905148220824675, "learning_rate": 9.975829918902249e-06, "loss": 0.4333, "step": 1761 }, { "epoch": 0.06046671242278655, "grad_norm": 0.8731756811583289, "learning_rate": 9.975775310564471e-06, "loss": 0.3654, "step": 1762 }, { "epoch": 0.06050102951269732, "grad_norm": 0.8962342186606871, "learning_rate": 9.975720640756792e-06, "loss": 0.4014, "step": 1763 }, { "epoch": 0.0605353466026081, "grad_norm": 0.8489135372726656, "learning_rate": 9.975665909479883e-06, "loss": 0.374, "step": 1764 }, { "epoch": 0.06056966369251887, "grad_norm": 0.8671244524989375, "learning_rate": 9.975611116734425e-06, "loss": 0.3439, "step": 1765 }, { "epoch": 0.06060398078242965, "grad_norm": 0.8572289993102177, "learning_rate": 9.975556262521092e-06, "loss": 0.3853, "step": 1766 }, { "epoch": 0.06063829787234042, "grad_norm": 0.8778193226214163, "learning_rate": 9.975501346840562e-06, "loss": 0.3944, "step": 1767 }, { "epoch": 0.0606726149622512, "grad_norm": 1.173280038538995, "learning_rate": 9.975446369693514e-06, "loss": 0.4017, "step": 1768 }, { "epoch": 0.060706932052161974, "grad_norm": 0.9180022723797269, "learning_rate": 9.975391331080627e-06, "loss": 0.3291, "step": 1769 }, { "epoch": 0.060741249142072753, "grad_norm": 0.8049693673595022, "learning_rate": 9.975336231002579e-06, "loss": 0.3895, "step": 1770 }, { "epoch": 0.060775566231983526, "grad_norm": 0.8203409622838945, "learning_rate": 9.975281069460055e-06, "loss": 0.3319, "step": 1771 }, { "epoch": 0.060809883321894305, "grad_norm": 0.9552450204494112, "learning_rate": 9.975225846453732e-06, "loss": 0.3344, "step": 1772 }, { "epoch": 0.06084420041180508, "grad_norm": 0.8896656611856215, "learning_rate": 9.975170561984296e-06, "loss": 0.3054, "step": 1773 }, { "epoch": 0.060878517501715856, "grad_norm": 0.9674023244127445, "learning_rate": 9.975115216052426e-06, "loss": 0.4095, "step": 1774 }, { "epoch": 0.06091283459162663, "grad_norm": 0.9192882832726038, "learning_rate": 9.97505980865881e-06, "loss": 0.3636, "step": 1775 }, { "epoch": 0.06094715168153741, "grad_norm": 0.8804176954407072, "learning_rate": 9.97500433980413e-06, "loss": 0.381, "step": 1776 }, { "epoch": 0.06098146877144818, "grad_norm": 0.9080786059335323, "learning_rate": 9.97494880948907e-06, "loss": 0.3471, "step": 1777 }, { "epoch": 0.06101578586135896, "grad_norm": 0.9522943572827387, "learning_rate": 9.974893217714321e-06, "loss": 0.3867, "step": 1778 }, { "epoch": 0.06105010295126973, "grad_norm": 0.8099504653099088, "learning_rate": 9.974837564480563e-06, "loss": 0.4156, "step": 1779 }, { "epoch": 0.06108442004118051, "grad_norm": 0.8596492103931288, "learning_rate": 9.974781849788489e-06, "loss": 0.4072, "step": 1780 }, { "epoch": 0.06111873713109128, "grad_norm": 0.7369268114164045, "learning_rate": 9.974726073638785e-06, "loss": 0.3321, "step": 1781 }, { "epoch": 0.06115305422100206, "grad_norm": 0.9493808068745213, "learning_rate": 9.97467023603214e-06, "loss": 0.3635, "step": 1782 }, { "epoch": 0.061187371310912834, "grad_norm": 0.9204327522939192, "learning_rate": 9.974614336969245e-06, "loss": 0.4127, "step": 1783 }, { "epoch": 0.06122168840082361, "grad_norm": 1.0914501167260842, "learning_rate": 9.97455837645079e-06, "loss": 0.4087, "step": 1784 }, { "epoch": 0.061256005490734386, "grad_norm": 0.8792794078742647, "learning_rate": 9.974502354477464e-06, "loss": 0.3981, "step": 1785 }, { "epoch": 0.06129032258064516, "grad_norm": 0.9218955131332325, "learning_rate": 9.974446271049962e-06, "loss": 0.3777, "step": 1786 }, { "epoch": 0.06132463967055594, "grad_norm": 0.8305566869144229, "learning_rate": 9.974390126168976e-06, "loss": 0.3601, "step": 1787 }, { "epoch": 0.06135895676046671, "grad_norm": 0.8038224035038931, "learning_rate": 9.9743339198352e-06, "loss": 0.3887, "step": 1788 }, { "epoch": 0.06139327385037749, "grad_norm": 0.9564238218453929, "learning_rate": 9.974277652049328e-06, "loss": 0.3952, "step": 1789 }, { "epoch": 0.06142759094028826, "grad_norm": 0.8420100730013137, "learning_rate": 9.974221322812055e-06, "loss": 0.4096, "step": 1790 }, { "epoch": 0.06146190803019904, "grad_norm": 0.7866530460901561, "learning_rate": 9.974164932124076e-06, "loss": 0.3721, "step": 1791 }, { "epoch": 0.06149622512010981, "grad_norm": 0.8947089888471534, "learning_rate": 9.97410847998609e-06, "loss": 0.4059, "step": 1792 }, { "epoch": 0.06153054221002059, "grad_norm": 0.8491825777179972, "learning_rate": 9.974051966398793e-06, "loss": 0.3242, "step": 1793 }, { "epoch": 0.061564859299931364, "grad_norm": 0.9700373628089398, "learning_rate": 9.97399539136288e-06, "loss": 0.4322, "step": 1794 }, { "epoch": 0.06159917638984214, "grad_norm": 0.8340300004253798, "learning_rate": 9.973938754879056e-06, "loss": 0.3805, "step": 1795 }, { "epoch": 0.061633493479752915, "grad_norm": 0.7570914047112199, "learning_rate": 9.973882056948018e-06, "loss": 0.361, "step": 1796 }, { "epoch": 0.061667810569663695, "grad_norm": 1.021029393527674, "learning_rate": 9.973825297570465e-06, "loss": 0.3959, "step": 1797 }, { "epoch": 0.06170212765957447, "grad_norm": 1.81100478877708, "learning_rate": 9.9737684767471e-06, "loss": 0.3775, "step": 1798 }, { "epoch": 0.061736444749485246, "grad_norm": 0.9174575849152392, "learning_rate": 9.973711594478623e-06, "loss": 0.3711, "step": 1799 }, { "epoch": 0.06177076183939602, "grad_norm": 0.964604429693805, "learning_rate": 9.97365465076574e-06, "loss": 0.3757, "step": 1800 }, { "epoch": 0.0618050789293068, "grad_norm": 0.7850829208819722, "learning_rate": 9.973597645609152e-06, "loss": 0.3568, "step": 1801 }, { "epoch": 0.06183939601921757, "grad_norm": 0.9244018812124976, "learning_rate": 9.973540579009562e-06, "loss": 0.4005, "step": 1802 }, { "epoch": 0.06187371310912835, "grad_norm": 0.927966429763715, "learning_rate": 9.973483450967679e-06, "loss": 0.3667, "step": 1803 }, { "epoch": 0.06190803019903912, "grad_norm": 0.9358699294141454, "learning_rate": 9.973426261484205e-06, "loss": 0.4245, "step": 1804 }, { "epoch": 0.0619423472889499, "grad_norm": 0.9327808831022233, "learning_rate": 9.973369010559848e-06, "loss": 0.365, "step": 1805 }, { "epoch": 0.06197666437886067, "grad_norm": 0.9738228693812516, "learning_rate": 9.973311698195316e-06, "loss": 0.3907, "step": 1806 }, { "epoch": 0.062010981468771445, "grad_norm": 0.9166357740174196, "learning_rate": 9.973254324391315e-06, "loss": 0.363, "step": 1807 }, { "epoch": 0.062045298558682224, "grad_norm": 0.8744833019733441, "learning_rate": 9.973196889148557e-06, "loss": 0.3686, "step": 1808 }, { "epoch": 0.062079615648592996, "grad_norm": 0.8928000163340091, "learning_rate": 9.973139392467749e-06, "loss": 0.3714, "step": 1809 }, { "epoch": 0.062113932738503776, "grad_norm": 0.920777804532007, "learning_rate": 9.973081834349602e-06, "loss": 0.4083, "step": 1810 }, { "epoch": 0.06214824982841455, "grad_norm": 0.8753217079899052, "learning_rate": 9.973024214794826e-06, "loss": 0.4329, "step": 1811 }, { "epoch": 0.06218256691832533, "grad_norm": 0.9363391267377387, "learning_rate": 9.972966533804135e-06, "loss": 0.3713, "step": 1812 }, { "epoch": 0.0622168840082361, "grad_norm": 0.753310876798817, "learning_rate": 9.97290879137824e-06, "loss": 0.3426, "step": 1813 }, { "epoch": 0.06225120109814688, "grad_norm": 0.8460119883156906, "learning_rate": 9.972850987517855e-06, "loss": 0.4315, "step": 1814 }, { "epoch": 0.06228551818805765, "grad_norm": 0.8385233785436439, "learning_rate": 9.972793122223691e-06, "loss": 0.343, "step": 1815 }, { "epoch": 0.06231983527796843, "grad_norm": 1.110471525027551, "learning_rate": 9.97273519549647e-06, "loss": 0.3756, "step": 1816 }, { "epoch": 0.0623541523678792, "grad_norm": 0.8657580522080897, "learning_rate": 9.9726772073369e-06, "loss": 0.3325, "step": 1817 }, { "epoch": 0.06238846945778998, "grad_norm": 0.958603621461233, "learning_rate": 9.972619157745704e-06, "loss": 0.43, "step": 1818 }, { "epoch": 0.062422786547700754, "grad_norm": 0.8597212435294979, "learning_rate": 9.972561046723593e-06, "loss": 0.3815, "step": 1819 }, { "epoch": 0.06245710363761153, "grad_norm": 0.8675153751950228, "learning_rate": 9.972502874271288e-06, "loss": 0.3724, "step": 1820 }, { "epoch": 0.062491420727522305, "grad_norm": 0.8996908505591756, "learning_rate": 9.972444640389509e-06, "loss": 0.3357, "step": 1821 }, { "epoch": 0.06252573781743308, "grad_norm": 0.8435822331620202, "learning_rate": 9.972386345078973e-06, "loss": 0.3322, "step": 1822 }, { "epoch": 0.06256005490734386, "grad_norm": 0.7863329861842907, "learning_rate": 9.9723279883404e-06, "loss": 0.3583, "step": 1823 }, { "epoch": 0.06259437199725464, "grad_norm": 0.8498404011312082, "learning_rate": 9.972269570174514e-06, "loss": 0.3518, "step": 1824 }, { "epoch": 0.06262868908716542, "grad_norm": 0.8431614458623145, "learning_rate": 9.972211090582031e-06, "loss": 0.3951, "step": 1825 }, { "epoch": 0.06266300617707618, "grad_norm": 0.8893350180234653, "learning_rate": 9.97215254956368e-06, "loss": 0.3732, "step": 1826 }, { "epoch": 0.06269732326698696, "grad_norm": 0.8729892987746837, "learning_rate": 9.972093947120181e-06, "loss": 0.3963, "step": 1827 }, { "epoch": 0.06273164035689774, "grad_norm": 1.0395168930428211, "learning_rate": 9.972035283252257e-06, "loss": 0.3895, "step": 1828 }, { "epoch": 0.0627659574468085, "grad_norm": 0.861163366363201, "learning_rate": 9.971976557960634e-06, "loss": 0.328, "step": 1829 }, { "epoch": 0.06280027453671928, "grad_norm": 1.2320193630706104, "learning_rate": 9.971917771246038e-06, "loss": 0.3667, "step": 1830 }, { "epoch": 0.06283459162663006, "grad_norm": 0.9535642001266421, "learning_rate": 9.971858923109195e-06, "loss": 0.4059, "step": 1831 }, { "epoch": 0.06286890871654084, "grad_norm": 0.9948076932570926, "learning_rate": 9.97180001355083e-06, "loss": 0.427, "step": 1832 }, { "epoch": 0.06290322580645161, "grad_norm": 0.9448445215718726, "learning_rate": 9.971741042571675e-06, "loss": 0.3888, "step": 1833 }, { "epoch": 0.06293754289636239, "grad_norm": 0.867212996021649, "learning_rate": 9.971682010172453e-06, "loss": 0.3445, "step": 1834 }, { "epoch": 0.06297185998627317, "grad_norm": 0.8877745842762227, "learning_rate": 9.971622916353898e-06, "loss": 0.4017, "step": 1835 }, { "epoch": 0.06300617707618394, "grad_norm": 0.8257422618190023, "learning_rate": 9.971563761116739e-06, "loss": 0.33, "step": 1836 }, { "epoch": 0.06304049416609471, "grad_norm": 0.8707742537558111, "learning_rate": 9.971504544461703e-06, "loss": 0.3839, "step": 1837 }, { "epoch": 0.06307481125600549, "grad_norm": 0.9321696545435787, "learning_rate": 9.971445266389526e-06, "loss": 0.4165, "step": 1838 }, { "epoch": 0.06310912834591627, "grad_norm": 0.869413148684216, "learning_rate": 9.97138592690094e-06, "loss": 0.3567, "step": 1839 }, { "epoch": 0.06314344543582705, "grad_norm": 0.9540610944494953, "learning_rate": 9.971326525996676e-06, "loss": 0.4293, "step": 1840 }, { "epoch": 0.06317776252573781, "grad_norm": 0.995337591929167, "learning_rate": 9.97126706367747e-06, "loss": 0.4083, "step": 1841 }, { "epoch": 0.06321207961564859, "grad_norm": 0.8702975190305902, "learning_rate": 9.971207539944054e-06, "loss": 0.4506, "step": 1842 }, { "epoch": 0.06324639670555937, "grad_norm": 0.837555706609314, "learning_rate": 9.971147954797165e-06, "loss": 0.3774, "step": 1843 }, { "epoch": 0.06328071379547015, "grad_norm": 0.8299402325956424, "learning_rate": 9.971088308237538e-06, "loss": 0.3414, "step": 1844 }, { "epoch": 0.06331503088538092, "grad_norm": 0.9427328216886579, "learning_rate": 9.971028600265912e-06, "loss": 0.3667, "step": 1845 }, { "epoch": 0.0633493479752917, "grad_norm": 1.100309536664033, "learning_rate": 9.970968830883023e-06, "loss": 0.4278, "step": 1846 }, { "epoch": 0.06338366506520247, "grad_norm": 1.23146117265889, "learning_rate": 9.970909000089609e-06, "loss": 0.3683, "step": 1847 }, { "epoch": 0.06341798215511325, "grad_norm": 0.8160736889017414, "learning_rate": 9.97084910788641e-06, "loss": 0.3644, "step": 1848 }, { "epoch": 0.06345229924502402, "grad_norm": 0.8500522069327864, "learning_rate": 9.970789154274167e-06, "loss": 0.3929, "step": 1849 }, { "epoch": 0.0634866163349348, "grad_norm": 0.8873613775649475, "learning_rate": 9.97072913925362e-06, "loss": 0.3718, "step": 1850 }, { "epoch": 0.06352093342484558, "grad_norm": 0.7399870984744545, "learning_rate": 9.970669062825507e-06, "loss": 0.3428, "step": 1851 }, { "epoch": 0.06355525051475634, "grad_norm": 0.8895202454163644, "learning_rate": 9.970608924990574e-06, "loss": 0.4363, "step": 1852 }, { "epoch": 0.06358956760466712, "grad_norm": 0.8826703716177575, "learning_rate": 9.970548725749562e-06, "loss": 0.3585, "step": 1853 }, { "epoch": 0.0636238846945779, "grad_norm": 0.7776198436711991, "learning_rate": 9.970488465103217e-06, "loss": 0.3837, "step": 1854 }, { "epoch": 0.06365820178448868, "grad_norm": 0.8874786969935645, "learning_rate": 9.970428143052283e-06, "loss": 0.4012, "step": 1855 }, { "epoch": 0.06369251887439945, "grad_norm": 0.8608276513217779, "learning_rate": 9.970367759597502e-06, "loss": 0.3784, "step": 1856 }, { "epoch": 0.06372683596431022, "grad_norm": 0.7906054295866585, "learning_rate": 9.970307314739623e-06, "loss": 0.3498, "step": 1857 }, { "epoch": 0.063761153054221, "grad_norm": 0.8427624458915635, "learning_rate": 9.970246808479393e-06, "loss": 0.4302, "step": 1858 }, { "epoch": 0.06379547014413178, "grad_norm": 0.9509920768818392, "learning_rate": 9.970186240817557e-06, "loss": 0.4126, "step": 1859 }, { "epoch": 0.06382978723404255, "grad_norm": 0.8288239277342476, "learning_rate": 9.970125611754865e-06, "loss": 0.3248, "step": 1860 }, { "epoch": 0.06386410432395333, "grad_norm": 0.9508186849773871, "learning_rate": 9.970064921292066e-06, "loss": 0.4533, "step": 1861 }, { "epoch": 0.0638984214138641, "grad_norm": 0.9461317912146828, "learning_rate": 9.97000416942991e-06, "loss": 0.4206, "step": 1862 }, { "epoch": 0.06393273850377489, "grad_norm": 0.9150633455286585, "learning_rate": 9.969943356169145e-06, "loss": 0.4268, "step": 1863 }, { "epoch": 0.06396705559368565, "grad_norm": 0.7688503025420758, "learning_rate": 9.969882481510527e-06, "loss": 0.3473, "step": 1864 }, { "epoch": 0.06400137268359643, "grad_norm": 0.8709162820751255, "learning_rate": 9.969821545454803e-06, "loss": 0.3576, "step": 1865 }, { "epoch": 0.06403568977350721, "grad_norm": 0.8889928426024859, "learning_rate": 9.969760548002729e-06, "loss": 0.4099, "step": 1866 }, { "epoch": 0.06407000686341799, "grad_norm": 0.830800974085556, "learning_rate": 9.969699489155057e-06, "loss": 0.3543, "step": 1867 }, { "epoch": 0.06410432395332875, "grad_norm": 0.9669572777224461, "learning_rate": 9.969638368912541e-06, "loss": 0.4413, "step": 1868 }, { "epoch": 0.06413864104323953, "grad_norm": 0.988603592755886, "learning_rate": 9.969577187275941e-06, "loss": 0.3911, "step": 1869 }, { "epoch": 0.06417295813315031, "grad_norm": 0.8570400542771219, "learning_rate": 9.969515944246005e-06, "loss": 0.3917, "step": 1870 }, { "epoch": 0.06420727522306108, "grad_norm": 0.8752491515261196, "learning_rate": 9.969454639823495e-06, "loss": 0.3914, "step": 1871 }, { "epoch": 0.06424159231297186, "grad_norm": 1.054047721634254, "learning_rate": 9.969393274009165e-06, "loss": 0.3862, "step": 1872 }, { "epoch": 0.06427590940288264, "grad_norm": 0.8652675292128096, "learning_rate": 9.969331846803777e-06, "loss": 0.3487, "step": 1873 }, { "epoch": 0.06431022649279342, "grad_norm": 0.8612320557182251, "learning_rate": 9.969270358208088e-06, "loss": 0.3465, "step": 1874 }, { "epoch": 0.06434454358270418, "grad_norm": 0.8982798048543345, "learning_rate": 9.969208808222854e-06, "loss": 0.3795, "step": 1875 }, { "epoch": 0.06437886067261496, "grad_norm": 0.8950950879441308, "learning_rate": 9.969147196848842e-06, "loss": 0.4008, "step": 1876 }, { "epoch": 0.06441317776252574, "grad_norm": 0.9646329448330824, "learning_rate": 9.969085524086808e-06, "loss": 0.3985, "step": 1877 }, { "epoch": 0.06444749485243652, "grad_norm": 0.9709742403226935, "learning_rate": 9.969023789937518e-06, "loss": 0.3888, "step": 1878 }, { "epoch": 0.06448181194234728, "grad_norm": 0.8790454293709712, "learning_rate": 9.96896199440173e-06, "loss": 0.4011, "step": 1879 }, { "epoch": 0.06451612903225806, "grad_norm": 0.987124997765559, "learning_rate": 9.96890013748021e-06, "loss": 0.3371, "step": 1880 }, { "epoch": 0.06455044612216884, "grad_norm": 1.0584015120014225, "learning_rate": 9.968838219173723e-06, "loss": 0.4173, "step": 1881 }, { "epoch": 0.06458476321207962, "grad_norm": 0.8005794214121754, "learning_rate": 9.968776239483034e-06, "loss": 0.3781, "step": 1882 }, { "epoch": 0.06461908030199039, "grad_norm": 0.955423612921288, "learning_rate": 9.968714198408906e-06, "loss": 0.3991, "step": 1883 }, { "epoch": 0.06465339739190117, "grad_norm": 0.9205180103012095, "learning_rate": 9.968652095952108e-06, "loss": 0.4408, "step": 1884 }, { "epoch": 0.06468771448181194, "grad_norm": 1.0010872374070552, "learning_rate": 9.968589932113403e-06, "loss": 0.3866, "step": 1885 }, { "epoch": 0.06472203157172272, "grad_norm": 1.0911400151443587, "learning_rate": 9.968527706893565e-06, "loss": 0.4252, "step": 1886 }, { "epoch": 0.06475634866163349, "grad_norm": 0.8863672720651934, "learning_rate": 9.968465420293361e-06, "loss": 0.3743, "step": 1887 }, { "epoch": 0.06479066575154427, "grad_norm": 0.9354364858120522, "learning_rate": 9.968403072313557e-06, "loss": 0.3787, "step": 1888 }, { "epoch": 0.06482498284145505, "grad_norm": 0.9257785781386005, "learning_rate": 9.968340662954927e-06, "loss": 0.3806, "step": 1889 }, { "epoch": 0.06485929993136583, "grad_norm": 0.8118532793920252, "learning_rate": 9.96827819221824e-06, "loss": 0.3659, "step": 1890 }, { "epoch": 0.06489361702127659, "grad_norm": 0.8672846997975763, "learning_rate": 9.968215660104268e-06, "loss": 0.3902, "step": 1891 }, { "epoch": 0.06492793411118737, "grad_norm": 0.9542902487529378, "learning_rate": 9.968153066613785e-06, "loss": 0.4691, "step": 1892 }, { "epoch": 0.06496225120109815, "grad_norm": 0.8752712846630143, "learning_rate": 9.968090411747562e-06, "loss": 0.3417, "step": 1893 }, { "epoch": 0.06499656829100892, "grad_norm": 0.8649269943117428, "learning_rate": 9.968027695506375e-06, "loss": 0.3904, "step": 1894 }, { "epoch": 0.0650308853809197, "grad_norm": 0.8995413447189355, "learning_rate": 9.967964917890997e-06, "loss": 0.4016, "step": 1895 }, { "epoch": 0.06506520247083047, "grad_norm": 0.8700653307721204, "learning_rate": 9.967902078902207e-06, "loss": 0.3529, "step": 1896 }, { "epoch": 0.06509951956074125, "grad_norm": 0.832349127918489, "learning_rate": 9.967839178540777e-06, "loss": 0.3579, "step": 1897 }, { "epoch": 0.06513383665065202, "grad_norm": 0.9007691795460655, "learning_rate": 9.967776216807486e-06, "loss": 0.3228, "step": 1898 }, { "epoch": 0.0651681537405628, "grad_norm": 0.8266172936025783, "learning_rate": 9.967713193703113e-06, "loss": 0.3966, "step": 1899 }, { "epoch": 0.06520247083047358, "grad_norm": 0.8804615200826315, "learning_rate": 9.967650109228433e-06, "loss": 0.3915, "step": 1900 }, { "epoch": 0.06523678792038436, "grad_norm": 0.8698687434277244, "learning_rate": 9.967586963384228e-06, "loss": 0.4133, "step": 1901 }, { "epoch": 0.06527110501029512, "grad_norm": 0.8903512043305077, "learning_rate": 9.96752375617128e-06, "loss": 0.3765, "step": 1902 }, { "epoch": 0.0653054221002059, "grad_norm": 0.9357394491958808, "learning_rate": 9.967460487590365e-06, "loss": 0.4233, "step": 1903 }, { "epoch": 0.06533973919011668, "grad_norm": 0.9561539815422981, "learning_rate": 9.96739715764227e-06, "loss": 0.4184, "step": 1904 }, { "epoch": 0.06537405628002746, "grad_norm": 0.7723116182834762, "learning_rate": 9.96733376632777e-06, "loss": 0.3848, "step": 1905 }, { "epoch": 0.06540837336993822, "grad_norm": 0.8930357358289556, "learning_rate": 9.967270313647657e-06, "loss": 0.4283, "step": 1906 }, { "epoch": 0.065442690459849, "grad_norm": 0.8211420582254856, "learning_rate": 9.967206799602709e-06, "loss": 0.3317, "step": 1907 }, { "epoch": 0.06547700754975978, "grad_norm": 0.8305479862617692, "learning_rate": 9.967143224193711e-06, "loss": 0.3712, "step": 1908 }, { "epoch": 0.06551132463967056, "grad_norm": 0.8773512977433967, "learning_rate": 9.967079587421451e-06, "loss": 0.3709, "step": 1909 }, { "epoch": 0.06554564172958133, "grad_norm": 0.8577358281006161, "learning_rate": 9.967015889286714e-06, "loss": 0.3865, "step": 1910 }, { "epoch": 0.0655799588194921, "grad_norm": 0.9227911254740055, "learning_rate": 9.966952129790286e-06, "loss": 0.3585, "step": 1911 }, { "epoch": 0.06561427590940289, "grad_norm": 0.8969503334380494, "learning_rate": 9.966888308932955e-06, "loss": 0.3559, "step": 1912 }, { "epoch": 0.06564859299931365, "grad_norm": 0.7615652486980816, "learning_rate": 9.966824426715512e-06, "loss": 0.3493, "step": 1913 }, { "epoch": 0.06568291008922443, "grad_norm": 0.8939073775477783, "learning_rate": 9.96676048313874e-06, "loss": 0.4479, "step": 1914 }, { "epoch": 0.06571722717913521, "grad_norm": 0.982089802681921, "learning_rate": 9.966696478203436e-06, "loss": 0.3804, "step": 1915 }, { "epoch": 0.06575154426904599, "grad_norm": 0.8028327204327478, "learning_rate": 9.966632411910387e-06, "loss": 0.3551, "step": 1916 }, { "epoch": 0.06578586135895675, "grad_norm": 0.9016417019727045, "learning_rate": 9.966568284260386e-06, "loss": 0.344, "step": 1917 }, { "epoch": 0.06582017844886753, "grad_norm": 0.8957171723793993, "learning_rate": 9.966504095254221e-06, "loss": 0.3653, "step": 1918 }, { "epoch": 0.06585449553877831, "grad_norm": 0.9355572799677769, "learning_rate": 9.96643984489269e-06, "loss": 0.3523, "step": 1919 }, { "epoch": 0.06588881262868909, "grad_norm": 0.8285925685126367, "learning_rate": 9.966375533176586e-06, "loss": 0.3742, "step": 1920 }, { "epoch": 0.06592312971859986, "grad_norm": 0.9174895908550025, "learning_rate": 9.966311160106701e-06, "loss": 0.3671, "step": 1921 }, { "epoch": 0.06595744680851064, "grad_norm": 0.8861560555016573, "learning_rate": 9.966246725683833e-06, "loss": 0.3513, "step": 1922 }, { "epoch": 0.06599176389842142, "grad_norm": 0.898595977114211, "learning_rate": 9.966182229908778e-06, "loss": 0.3626, "step": 1923 }, { "epoch": 0.0660260809883322, "grad_norm": 1.0150299902015774, "learning_rate": 9.96611767278233e-06, "loss": 0.4117, "step": 1924 }, { "epoch": 0.06606039807824296, "grad_norm": 0.884266839206987, "learning_rate": 9.966053054305287e-06, "loss": 0.3926, "step": 1925 }, { "epoch": 0.06609471516815374, "grad_norm": 0.8903038607354014, "learning_rate": 9.96598837447845e-06, "loss": 0.4023, "step": 1926 }, { "epoch": 0.06612903225806452, "grad_norm": 0.989462235502608, "learning_rate": 9.965923633302616e-06, "loss": 0.3556, "step": 1927 }, { "epoch": 0.0661633493479753, "grad_norm": 0.8523806418453136, "learning_rate": 9.965858830778586e-06, "loss": 0.4113, "step": 1928 }, { "epoch": 0.06619766643788606, "grad_norm": 0.8108574380853326, "learning_rate": 9.965793966907159e-06, "loss": 0.3604, "step": 1929 }, { "epoch": 0.06623198352779684, "grad_norm": 0.8778595244155721, "learning_rate": 9.965729041689137e-06, "loss": 0.3609, "step": 1930 }, { "epoch": 0.06626630061770762, "grad_norm": 0.8769144824969747, "learning_rate": 9.965664055125321e-06, "loss": 0.3977, "step": 1931 }, { "epoch": 0.0663006177076184, "grad_norm": 0.9286432448224929, "learning_rate": 9.965599007216516e-06, "loss": 0.3554, "step": 1932 }, { "epoch": 0.06633493479752917, "grad_norm": 0.8744392340813578, "learning_rate": 9.965533897963526e-06, "loss": 0.4476, "step": 1933 }, { "epoch": 0.06636925188743995, "grad_norm": 0.9205673206049801, "learning_rate": 9.965468727367152e-06, "loss": 0.4369, "step": 1934 }, { "epoch": 0.06640356897735072, "grad_norm": 0.8352190570015963, "learning_rate": 9.965403495428201e-06, "loss": 0.3823, "step": 1935 }, { "epoch": 0.06643788606726149, "grad_norm": 0.9031353106378689, "learning_rate": 9.96533820214748e-06, "loss": 0.3532, "step": 1936 }, { "epoch": 0.06647220315717227, "grad_norm": 1.015433042987657, "learning_rate": 9.965272847525794e-06, "loss": 0.3238, "step": 1937 }, { "epoch": 0.06650652024708305, "grad_norm": 1.0548734748013628, "learning_rate": 9.965207431563952e-06, "loss": 0.4078, "step": 1938 }, { "epoch": 0.06654083733699383, "grad_norm": 0.803646987077044, "learning_rate": 9.96514195426276e-06, "loss": 0.3347, "step": 1939 }, { "epoch": 0.06657515442690459, "grad_norm": 0.9584045653467567, "learning_rate": 9.965076415623028e-06, "loss": 0.3988, "step": 1940 }, { "epoch": 0.06660947151681537, "grad_norm": 0.946163160815483, "learning_rate": 9.965010815645566e-06, "loss": 0.4603, "step": 1941 }, { "epoch": 0.06664378860672615, "grad_norm": 0.9316018731066135, "learning_rate": 9.964945154331183e-06, "loss": 0.3767, "step": 1942 }, { "epoch": 0.06667810569663693, "grad_norm": 0.820935616897959, "learning_rate": 9.964879431680691e-06, "loss": 0.4093, "step": 1943 }, { "epoch": 0.0667124227865477, "grad_norm": 0.7931700178160518, "learning_rate": 9.964813647694904e-06, "loss": 0.4339, "step": 1944 }, { "epoch": 0.06674673987645847, "grad_norm": 0.9768564725223094, "learning_rate": 9.964747802374633e-06, "loss": 0.3561, "step": 1945 }, { "epoch": 0.06678105696636925, "grad_norm": 0.9520270214184211, "learning_rate": 9.964681895720688e-06, "loss": 0.4128, "step": 1946 }, { "epoch": 0.06681537405628003, "grad_norm": 0.8452283963002911, "learning_rate": 9.96461592773389e-06, "loss": 0.3509, "step": 1947 }, { "epoch": 0.0668496911461908, "grad_norm": 0.8458054865422346, "learning_rate": 9.964549898415048e-06, "loss": 0.3669, "step": 1948 }, { "epoch": 0.06688400823610158, "grad_norm": 0.8961622188267119, "learning_rate": 9.964483807764982e-06, "loss": 0.3358, "step": 1949 }, { "epoch": 0.06691832532601236, "grad_norm": 0.850716984916015, "learning_rate": 9.964417655784506e-06, "loss": 0.4478, "step": 1950 }, { "epoch": 0.06695264241592314, "grad_norm": 0.8746331457448427, "learning_rate": 9.964351442474436e-06, "loss": 0.3818, "step": 1951 }, { "epoch": 0.0669869595058339, "grad_norm": 0.8681429484372597, "learning_rate": 9.964285167835595e-06, "loss": 0.4214, "step": 1952 }, { "epoch": 0.06702127659574468, "grad_norm": 1.004901594798075, "learning_rate": 9.964218831868795e-06, "loss": 0.3917, "step": 1953 }, { "epoch": 0.06705559368565546, "grad_norm": 0.9665425366153252, "learning_rate": 9.96415243457486e-06, "loss": 0.4097, "step": 1954 }, { "epoch": 0.06708991077556624, "grad_norm": 0.8591787040313722, "learning_rate": 9.964085975954612e-06, "loss": 0.3862, "step": 1955 }, { "epoch": 0.067124227865477, "grad_norm": 0.8796989613680967, "learning_rate": 9.964019456008866e-06, "loss": 0.3287, "step": 1956 }, { "epoch": 0.06715854495538778, "grad_norm": 0.9675255348913758, "learning_rate": 9.96395287473845e-06, "loss": 0.4299, "step": 1957 }, { "epoch": 0.06719286204529856, "grad_norm": 0.8671993507759835, "learning_rate": 9.963886232144182e-06, "loss": 0.4591, "step": 1958 }, { "epoch": 0.06722717913520933, "grad_norm": 0.8253621495828383, "learning_rate": 9.963819528226886e-06, "loss": 0.4012, "step": 1959 }, { "epoch": 0.06726149622512011, "grad_norm": 0.9022613793207046, "learning_rate": 9.963752762987389e-06, "loss": 0.3419, "step": 1960 }, { "epoch": 0.06729581331503089, "grad_norm": 0.8687773681082458, "learning_rate": 9.963685936426512e-06, "loss": 0.3774, "step": 1961 }, { "epoch": 0.06733013040494167, "grad_norm": 0.9169007237888943, "learning_rate": 9.963619048545084e-06, "loss": 0.3663, "step": 1962 }, { "epoch": 0.06736444749485243, "grad_norm": 0.9214214479304697, "learning_rate": 9.96355209934393e-06, "loss": 0.3952, "step": 1963 }, { "epoch": 0.06739876458476321, "grad_norm": 1.0199485045764063, "learning_rate": 9.963485088823874e-06, "loss": 0.4228, "step": 1964 }, { "epoch": 0.06743308167467399, "grad_norm": 0.8908435062325525, "learning_rate": 9.96341801698575e-06, "loss": 0.3791, "step": 1965 }, { "epoch": 0.06746739876458477, "grad_norm": 0.8579396499380721, "learning_rate": 9.963350883830381e-06, "loss": 0.3704, "step": 1966 }, { "epoch": 0.06750171585449553, "grad_norm": 0.8896317939182345, "learning_rate": 9.963283689358598e-06, "loss": 0.3348, "step": 1967 }, { "epoch": 0.06753603294440631, "grad_norm": 0.8943732913985941, "learning_rate": 9.963216433571233e-06, "loss": 0.4016, "step": 1968 }, { "epoch": 0.06757035003431709, "grad_norm": 0.8546152840096684, "learning_rate": 9.963149116469116e-06, "loss": 0.3937, "step": 1969 }, { "epoch": 0.06760466712422787, "grad_norm": 0.8628905468179638, "learning_rate": 9.963081738053076e-06, "loss": 0.4231, "step": 1970 }, { "epoch": 0.06763898421413864, "grad_norm": 0.8579672331721842, "learning_rate": 9.96301429832395e-06, "loss": 0.372, "step": 1971 }, { "epoch": 0.06767330130404942, "grad_norm": 0.7912483428439062, "learning_rate": 9.962946797282567e-06, "loss": 0.3633, "step": 1972 }, { "epoch": 0.0677076183939602, "grad_norm": 0.8246281027268744, "learning_rate": 9.962879234929762e-06, "loss": 0.3616, "step": 1973 }, { "epoch": 0.06774193548387097, "grad_norm": 0.85432741158401, "learning_rate": 9.962811611266372e-06, "loss": 0.4145, "step": 1974 }, { "epoch": 0.06777625257378174, "grad_norm": 0.935255846839545, "learning_rate": 9.962743926293228e-06, "loss": 0.4211, "step": 1975 }, { "epoch": 0.06781056966369252, "grad_norm": 0.9154436281706939, "learning_rate": 9.962676180011172e-06, "loss": 0.3656, "step": 1976 }, { "epoch": 0.0678448867536033, "grad_norm": 0.8873895048067554, "learning_rate": 9.962608372421036e-06, "loss": 0.3592, "step": 1977 }, { "epoch": 0.06787920384351406, "grad_norm": 1.1033152047990418, "learning_rate": 9.96254050352366e-06, "loss": 0.3594, "step": 1978 }, { "epoch": 0.06791352093342484, "grad_norm": 1.0548360131683232, "learning_rate": 9.962472573319881e-06, "loss": 0.3802, "step": 1979 }, { "epoch": 0.06794783802333562, "grad_norm": 0.8960252346786582, "learning_rate": 9.96240458181054e-06, "loss": 0.4233, "step": 1980 }, { "epoch": 0.0679821551132464, "grad_norm": 1.0800335604104652, "learning_rate": 9.962336528996477e-06, "loss": 0.3368, "step": 1981 }, { "epoch": 0.06801647220315717, "grad_norm": 0.9374537577721884, "learning_rate": 9.962268414878528e-06, "loss": 0.3708, "step": 1982 }, { "epoch": 0.06805078929306795, "grad_norm": 0.8462861679768287, "learning_rate": 9.962200239457542e-06, "loss": 0.3243, "step": 1983 }, { "epoch": 0.06808510638297872, "grad_norm": 0.8958020641655156, "learning_rate": 9.962132002734354e-06, "loss": 0.3841, "step": 1984 }, { "epoch": 0.0681194234728895, "grad_norm": 0.9281015217472198, "learning_rate": 9.962063704709814e-06, "loss": 0.4023, "step": 1985 }, { "epoch": 0.06815374056280027, "grad_norm": 0.8261754679518217, "learning_rate": 9.96199534538476e-06, "loss": 0.3153, "step": 1986 }, { "epoch": 0.06818805765271105, "grad_norm": 0.9269477457402215, "learning_rate": 9.96192692476004e-06, "loss": 0.3463, "step": 1987 }, { "epoch": 0.06822237474262183, "grad_norm": 0.8582742763994358, "learning_rate": 9.961858442836496e-06, "loss": 0.3447, "step": 1988 }, { "epoch": 0.0682566918325326, "grad_norm": 0.9313720433602573, "learning_rate": 9.961789899614977e-06, "loss": 0.3881, "step": 1989 }, { "epoch": 0.06829100892244337, "grad_norm": 0.8978253126891301, "learning_rate": 9.961721295096329e-06, "loss": 0.3993, "step": 1990 }, { "epoch": 0.06832532601235415, "grad_norm": 0.8660412407362126, "learning_rate": 9.961652629281397e-06, "loss": 0.417, "step": 1991 }, { "epoch": 0.06835964310226493, "grad_norm": 0.9510402738198325, "learning_rate": 9.961583902171034e-06, "loss": 0.4224, "step": 1992 }, { "epoch": 0.06839396019217571, "grad_norm": 0.8292709624002563, "learning_rate": 9.961515113766087e-06, "loss": 0.3629, "step": 1993 }, { "epoch": 0.06842827728208647, "grad_norm": 0.9458460550949012, "learning_rate": 9.961446264067402e-06, "loss": 0.387, "step": 1994 }, { "epoch": 0.06846259437199725, "grad_norm": 1.0149751213607277, "learning_rate": 9.961377353075836e-06, "loss": 0.3502, "step": 1995 }, { "epoch": 0.06849691146190803, "grad_norm": 0.966626425649442, "learning_rate": 9.961308380792236e-06, "loss": 0.3671, "step": 1996 }, { "epoch": 0.06853122855181881, "grad_norm": 0.9022954593894956, "learning_rate": 9.961239347217456e-06, "loss": 0.3964, "step": 1997 }, { "epoch": 0.06856554564172958, "grad_norm": 0.9772820902191548, "learning_rate": 9.961170252352348e-06, "loss": 0.4072, "step": 1998 }, { "epoch": 0.06859986273164036, "grad_norm": 0.8445022406814586, "learning_rate": 9.961101096197765e-06, "loss": 0.3997, "step": 1999 }, { "epoch": 0.06863417982155114, "grad_norm": 0.779508093540095, "learning_rate": 9.961031878754561e-06, "loss": 0.3411, "step": 2000 }, { "epoch": 0.0686684969114619, "grad_norm": 0.8661987723664537, "learning_rate": 9.960962600023594e-06, "loss": 0.3778, "step": 2001 }, { "epoch": 0.06870281400137268, "grad_norm": 0.882683646775174, "learning_rate": 9.960893260005717e-06, "loss": 0.3968, "step": 2002 }, { "epoch": 0.06873713109128346, "grad_norm": 0.7845014364938567, "learning_rate": 9.96082385870179e-06, "loss": 0.4119, "step": 2003 }, { "epoch": 0.06877144818119424, "grad_norm": 0.8676449847525787, "learning_rate": 9.960754396112664e-06, "loss": 0.3895, "step": 2004 }, { "epoch": 0.068805765271105, "grad_norm": 0.8594836819107036, "learning_rate": 9.960684872239203e-06, "loss": 0.416, "step": 2005 }, { "epoch": 0.06884008236101578, "grad_norm": 0.87289314853635, "learning_rate": 9.960615287082265e-06, "loss": 0.4141, "step": 2006 }, { "epoch": 0.06887439945092656, "grad_norm": 0.858110311218216, "learning_rate": 9.960545640642708e-06, "loss": 0.3618, "step": 2007 }, { "epoch": 0.06890871654083734, "grad_norm": 0.9001377804178516, "learning_rate": 9.960475932921392e-06, "loss": 0.3611, "step": 2008 }, { "epoch": 0.06894303363074811, "grad_norm": 0.9200941009312551, "learning_rate": 9.960406163919181e-06, "loss": 0.3432, "step": 2009 }, { "epoch": 0.06897735072065889, "grad_norm": 0.9090560047081558, "learning_rate": 9.960336333636934e-06, "loss": 0.3478, "step": 2010 }, { "epoch": 0.06901166781056967, "grad_norm": 0.9536816711450001, "learning_rate": 9.960266442075515e-06, "loss": 0.417, "step": 2011 }, { "epoch": 0.06904598490048044, "grad_norm": 0.9006795664995012, "learning_rate": 9.960196489235787e-06, "loss": 0.4231, "step": 2012 }, { "epoch": 0.06908030199039121, "grad_norm": 0.8123134141707856, "learning_rate": 9.960126475118614e-06, "loss": 0.3423, "step": 2013 }, { "epoch": 0.06911461908030199, "grad_norm": 0.9251485694941499, "learning_rate": 9.960056399724862e-06, "loss": 0.3433, "step": 2014 }, { "epoch": 0.06914893617021277, "grad_norm": 0.8307899831997202, "learning_rate": 9.959986263055397e-06, "loss": 0.3257, "step": 2015 }, { "epoch": 0.06918325326012355, "grad_norm": 0.811625940682821, "learning_rate": 9.959916065111084e-06, "loss": 0.3423, "step": 2016 }, { "epoch": 0.06921757035003431, "grad_norm": 0.8090026882002483, "learning_rate": 9.95984580589279e-06, "loss": 0.3676, "step": 2017 }, { "epoch": 0.06925188743994509, "grad_norm": 0.8976727048120196, "learning_rate": 9.959775485401382e-06, "loss": 0.373, "step": 2018 }, { "epoch": 0.06928620452985587, "grad_norm": 0.8654088907526922, "learning_rate": 9.959705103637733e-06, "loss": 0.3387, "step": 2019 }, { "epoch": 0.06932052161976665, "grad_norm": 0.8816953307762097, "learning_rate": 9.95963466060271e-06, "loss": 0.3956, "step": 2020 }, { "epoch": 0.06935483870967742, "grad_norm": 0.8960775056939286, "learning_rate": 9.959564156297181e-06, "loss": 0.383, "step": 2021 }, { "epoch": 0.0693891557995882, "grad_norm": 0.8446316544102555, "learning_rate": 9.959493590722022e-06, "loss": 0.3995, "step": 2022 }, { "epoch": 0.06942347288949897, "grad_norm": 0.8178845030809042, "learning_rate": 9.9594229638781e-06, "loss": 0.3745, "step": 2023 }, { "epoch": 0.06945778997940974, "grad_norm": 0.8292834583942762, "learning_rate": 9.95935227576629e-06, "loss": 0.3927, "step": 2024 }, { "epoch": 0.06949210706932052, "grad_norm": 0.850670570344331, "learning_rate": 9.959281526387464e-06, "loss": 0.3992, "step": 2025 }, { "epoch": 0.0695264241592313, "grad_norm": 0.8287705228374546, "learning_rate": 9.959210715742498e-06, "loss": 0.3646, "step": 2026 }, { "epoch": 0.06956074124914208, "grad_norm": 0.8009688846547756, "learning_rate": 9.959139843832265e-06, "loss": 0.3729, "step": 2027 }, { "epoch": 0.06959505833905284, "grad_norm": 0.9154395217254321, "learning_rate": 9.959068910657639e-06, "loss": 0.3949, "step": 2028 }, { "epoch": 0.06962937542896362, "grad_norm": 0.8395210217828267, "learning_rate": 9.9589979162195e-06, "loss": 0.3767, "step": 2029 }, { "epoch": 0.0696636925188744, "grad_norm": 0.8135006018684011, "learning_rate": 9.958926860518725e-06, "loss": 0.3945, "step": 2030 }, { "epoch": 0.06969800960878518, "grad_norm": 0.9043558308403633, "learning_rate": 9.958855743556188e-06, "loss": 0.4211, "step": 2031 }, { "epoch": 0.06973232669869595, "grad_norm": 0.7519297158590781, "learning_rate": 9.95878456533277e-06, "loss": 0.3242, "step": 2032 }, { "epoch": 0.06976664378860672, "grad_norm": 0.7586248270330572, "learning_rate": 9.95871332584935e-06, "loss": 0.3125, "step": 2033 }, { "epoch": 0.0698009608785175, "grad_norm": 0.8961453356776141, "learning_rate": 9.958642025106808e-06, "loss": 0.3307, "step": 2034 }, { "epoch": 0.06983527796842828, "grad_norm": 0.8801499084689303, "learning_rate": 9.958570663106027e-06, "loss": 0.3403, "step": 2035 }, { "epoch": 0.06986959505833905, "grad_norm": 0.8260676116747284, "learning_rate": 9.958499239847884e-06, "loss": 0.3163, "step": 2036 }, { "epoch": 0.06990391214824983, "grad_norm": 0.7677870370383018, "learning_rate": 9.958427755333265e-06, "loss": 0.3852, "step": 2037 }, { "epoch": 0.0699382292381606, "grad_norm": 0.835819315168192, "learning_rate": 9.958356209563052e-06, "loss": 0.3606, "step": 2038 }, { "epoch": 0.06997254632807139, "grad_norm": 0.8498218360251169, "learning_rate": 9.95828460253813e-06, "loss": 0.3709, "step": 2039 }, { "epoch": 0.07000686341798215, "grad_norm": 0.908391508421659, "learning_rate": 9.958212934259382e-06, "loss": 0.3782, "step": 2040 }, { "epoch": 0.07004118050789293, "grad_norm": 0.7599504361740973, "learning_rate": 9.958141204727691e-06, "loss": 0.3752, "step": 2041 }, { "epoch": 0.07007549759780371, "grad_norm": 0.853985059316109, "learning_rate": 9.95806941394395e-06, "loss": 0.3978, "step": 2042 }, { "epoch": 0.07010981468771448, "grad_norm": 0.7810541164319069, "learning_rate": 9.95799756190904e-06, "loss": 0.389, "step": 2043 }, { "epoch": 0.07014413177762525, "grad_norm": 0.8526264566692864, "learning_rate": 9.957925648623851e-06, "loss": 0.3884, "step": 2044 }, { "epoch": 0.07017844886753603, "grad_norm": 1.1654901072018975, "learning_rate": 9.95785367408927e-06, "loss": 0.3812, "step": 2045 }, { "epoch": 0.07021276595744681, "grad_norm": 0.8947309422016488, "learning_rate": 9.95778163830619e-06, "loss": 0.3762, "step": 2046 }, { "epoch": 0.07024708304735758, "grad_norm": 0.8134223696547637, "learning_rate": 9.957709541275496e-06, "loss": 0.4046, "step": 2047 }, { "epoch": 0.07028140013726836, "grad_norm": 0.8391894206346973, "learning_rate": 9.95763738299808e-06, "loss": 0.3691, "step": 2048 }, { "epoch": 0.07031571722717914, "grad_norm": 0.8222548006549774, "learning_rate": 9.957565163474837e-06, "loss": 0.3796, "step": 2049 }, { "epoch": 0.07035003431708992, "grad_norm": 1.3342822010503372, "learning_rate": 9.957492882706654e-06, "loss": 0.366, "step": 2050 }, { "epoch": 0.07038435140700068, "grad_norm": 0.8697141139063475, "learning_rate": 9.957420540694427e-06, "loss": 0.3924, "step": 2051 }, { "epoch": 0.07041866849691146, "grad_norm": 0.8565443470294216, "learning_rate": 9.957348137439048e-06, "loss": 0.3832, "step": 2052 }, { "epoch": 0.07045298558682224, "grad_norm": 0.9095348415403269, "learning_rate": 9.957275672941414e-06, "loss": 0.3837, "step": 2053 }, { "epoch": 0.07048730267673302, "grad_norm": 1.0153074250316882, "learning_rate": 9.957203147202417e-06, "loss": 0.3825, "step": 2054 }, { "epoch": 0.07052161976664378, "grad_norm": 0.7892626431426237, "learning_rate": 9.957130560222957e-06, "loss": 0.408, "step": 2055 }, { "epoch": 0.07055593685655456, "grad_norm": 0.8375218964598067, "learning_rate": 9.957057912003927e-06, "loss": 0.3296, "step": 2056 }, { "epoch": 0.07059025394646534, "grad_norm": 0.9227749766827916, "learning_rate": 9.956985202546225e-06, "loss": 0.3724, "step": 2057 }, { "epoch": 0.07062457103637612, "grad_norm": 0.8981150191694296, "learning_rate": 9.956912431850752e-06, "loss": 0.4108, "step": 2058 }, { "epoch": 0.07065888812628689, "grad_norm": 0.8328913432172066, "learning_rate": 9.956839599918403e-06, "loss": 0.3427, "step": 2059 }, { "epoch": 0.07069320521619767, "grad_norm": 0.9381222624222993, "learning_rate": 9.956766706750081e-06, "loss": 0.4224, "step": 2060 }, { "epoch": 0.07072752230610845, "grad_norm": 0.9674897161143299, "learning_rate": 9.956693752346685e-06, "loss": 0.4673, "step": 2061 }, { "epoch": 0.07076183939601922, "grad_norm": 0.7899000374819195, "learning_rate": 9.956620736709117e-06, "loss": 0.3516, "step": 2062 }, { "epoch": 0.07079615648592999, "grad_norm": 0.8428998978536696, "learning_rate": 9.956547659838279e-06, "loss": 0.3571, "step": 2063 }, { "epoch": 0.07083047357584077, "grad_norm": 0.81752712792056, "learning_rate": 9.956474521735073e-06, "loss": 0.3794, "step": 2064 }, { "epoch": 0.07086479066575155, "grad_norm": 0.8960614881955967, "learning_rate": 9.956401322400404e-06, "loss": 0.4147, "step": 2065 }, { "epoch": 0.07089910775566231, "grad_norm": 0.8573354440819304, "learning_rate": 9.956328061835174e-06, "loss": 0.3837, "step": 2066 }, { "epoch": 0.07093342484557309, "grad_norm": 0.7484167111649961, "learning_rate": 9.95625474004029e-06, "loss": 0.3236, "step": 2067 }, { "epoch": 0.07096774193548387, "grad_norm": 0.8145347102726834, "learning_rate": 9.956181357016657e-06, "loss": 0.3618, "step": 2068 }, { "epoch": 0.07100205902539465, "grad_norm": 0.9086477715122872, "learning_rate": 9.956107912765181e-06, "loss": 0.3787, "step": 2069 }, { "epoch": 0.07103637611530542, "grad_norm": 0.9154729220878561, "learning_rate": 9.95603440728677e-06, "loss": 0.3871, "step": 2070 }, { "epoch": 0.0710706932052162, "grad_norm": 0.937675710049938, "learning_rate": 9.955960840582332e-06, "loss": 0.3748, "step": 2071 }, { "epoch": 0.07110501029512697, "grad_norm": 0.8340856027445843, "learning_rate": 9.955887212652777e-06, "loss": 0.3896, "step": 2072 }, { "epoch": 0.07113932738503775, "grad_norm": 0.8244975471650298, "learning_rate": 9.955813523499012e-06, "loss": 0.4281, "step": 2073 }, { "epoch": 0.07117364447494852, "grad_norm": 0.8613860813834655, "learning_rate": 9.95573977312195e-06, "loss": 0.3763, "step": 2074 }, { "epoch": 0.0712079615648593, "grad_norm": 0.897598801335022, "learning_rate": 9.9556659615225e-06, "loss": 0.4377, "step": 2075 }, { "epoch": 0.07124227865477008, "grad_norm": 0.9380904544249845, "learning_rate": 9.955592088701576e-06, "loss": 0.3752, "step": 2076 }, { "epoch": 0.07127659574468086, "grad_norm": 0.8982812475549132, "learning_rate": 9.955518154660089e-06, "loss": 0.4263, "step": 2077 }, { "epoch": 0.07131091283459162, "grad_norm": 0.9164485492548899, "learning_rate": 9.955444159398953e-06, "loss": 0.3603, "step": 2078 }, { "epoch": 0.0713452299245024, "grad_norm": 0.8775726270429404, "learning_rate": 9.955370102919081e-06, "loss": 0.3387, "step": 2079 }, { "epoch": 0.07137954701441318, "grad_norm": 0.957885246097915, "learning_rate": 9.95529598522139e-06, "loss": 0.3861, "step": 2080 }, { "epoch": 0.07141386410432396, "grad_norm": 0.9176887547671068, "learning_rate": 9.955221806306793e-06, "loss": 0.3448, "step": 2081 }, { "epoch": 0.07144818119423473, "grad_norm": 0.8758726147719303, "learning_rate": 9.955147566176208e-06, "loss": 0.3976, "step": 2082 }, { "epoch": 0.0714824982841455, "grad_norm": 0.885977671590128, "learning_rate": 9.955073264830553e-06, "loss": 0.3901, "step": 2083 }, { "epoch": 0.07151681537405628, "grad_norm": 0.8281746960443974, "learning_rate": 9.954998902270746e-06, "loss": 0.3899, "step": 2084 }, { "epoch": 0.07155113246396705, "grad_norm": 0.9333464918515474, "learning_rate": 9.954924478497703e-06, "loss": 0.408, "step": 2085 }, { "epoch": 0.07158544955387783, "grad_norm": 0.8967108591505876, "learning_rate": 9.954849993512343e-06, "loss": 0.365, "step": 2086 }, { "epoch": 0.07161976664378861, "grad_norm": 0.9171884521971656, "learning_rate": 9.95477544731559e-06, "loss": 0.3489, "step": 2087 }, { "epoch": 0.07165408373369939, "grad_norm": 0.8925024489621652, "learning_rate": 9.954700839908365e-06, "loss": 0.3399, "step": 2088 }, { "epoch": 0.07168840082361015, "grad_norm": 0.9995310606183825, "learning_rate": 9.954626171291584e-06, "loss": 0.3653, "step": 2089 }, { "epoch": 0.07172271791352093, "grad_norm": 0.8795671461814475, "learning_rate": 9.954551441466176e-06, "loss": 0.3724, "step": 2090 }, { "epoch": 0.07175703500343171, "grad_norm": 1.039298989619279, "learning_rate": 9.95447665043306e-06, "loss": 0.3555, "step": 2091 }, { "epoch": 0.07179135209334249, "grad_norm": 0.8807375624134399, "learning_rate": 9.954401798193162e-06, "loss": 0.4029, "step": 2092 }, { "epoch": 0.07182566918325325, "grad_norm": 0.8860401716430644, "learning_rate": 9.954326884747404e-06, "loss": 0.4228, "step": 2093 }, { "epoch": 0.07185998627316403, "grad_norm": 1.0090369144090128, "learning_rate": 9.954251910096717e-06, "loss": 0.3526, "step": 2094 }, { "epoch": 0.07189430336307481, "grad_norm": 0.9116918226328155, "learning_rate": 9.954176874242022e-06, "loss": 0.3818, "step": 2095 }, { "epoch": 0.07192862045298559, "grad_norm": 0.8308987359705549, "learning_rate": 9.954101777184247e-06, "loss": 0.3753, "step": 2096 }, { "epoch": 0.07196293754289636, "grad_norm": 0.9136611317019321, "learning_rate": 9.954026618924321e-06, "loss": 0.4079, "step": 2097 }, { "epoch": 0.07199725463280714, "grad_norm": 0.8387623651208836, "learning_rate": 9.953951399463173e-06, "loss": 0.326, "step": 2098 }, { "epoch": 0.07203157172271792, "grad_norm": 0.815429391452162, "learning_rate": 9.953876118801732e-06, "loss": 0.3585, "step": 2099 }, { "epoch": 0.0720658888126287, "grad_norm": 0.9979486501442348, "learning_rate": 9.953800776940925e-06, "loss": 0.3844, "step": 2100 }, { "epoch": 0.07210020590253946, "grad_norm": 0.8033893514379692, "learning_rate": 9.953725373881684e-06, "loss": 0.4505, "step": 2101 }, { "epoch": 0.07213452299245024, "grad_norm": 0.7855979945142031, "learning_rate": 9.953649909624943e-06, "loss": 0.3472, "step": 2102 }, { "epoch": 0.07216884008236102, "grad_norm": 0.8733097467281675, "learning_rate": 9.953574384171634e-06, "loss": 0.4881, "step": 2103 }, { "epoch": 0.0722031571722718, "grad_norm": 0.9355317853565224, "learning_rate": 9.953498797522687e-06, "loss": 0.3988, "step": 2104 }, { "epoch": 0.07223747426218256, "grad_norm": 0.8340262599349, "learning_rate": 9.953423149679038e-06, "loss": 0.3643, "step": 2105 }, { "epoch": 0.07227179135209334, "grad_norm": 0.9433548054640576, "learning_rate": 9.95334744064162e-06, "loss": 0.3644, "step": 2106 }, { "epoch": 0.07230610844200412, "grad_norm": 0.8233997555100636, "learning_rate": 9.953271670411373e-06, "loss": 0.4073, "step": 2107 }, { "epoch": 0.07234042553191489, "grad_norm": 0.8631444460428428, "learning_rate": 9.953195838989227e-06, "loss": 0.3952, "step": 2108 }, { "epoch": 0.07237474262182567, "grad_norm": 0.8441269246677267, "learning_rate": 9.95311994637612e-06, "loss": 0.3414, "step": 2109 }, { "epoch": 0.07240905971173645, "grad_norm": 0.8711953826829465, "learning_rate": 9.953043992572993e-06, "loss": 0.3163, "step": 2110 }, { "epoch": 0.07244337680164722, "grad_norm": 0.9676672892404465, "learning_rate": 9.952967977580781e-06, "loss": 0.4263, "step": 2111 }, { "epoch": 0.07247769389155799, "grad_norm": 0.8522528987796428, "learning_rate": 9.952891901400426e-06, "loss": 0.3569, "step": 2112 }, { "epoch": 0.07251201098146877, "grad_norm": 0.9418889488112824, "learning_rate": 9.952815764032863e-06, "loss": 0.3577, "step": 2113 }, { "epoch": 0.07254632807137955, "grad_norm": 0.7893261425862063, "learning_rate": 9.952739565479038e-06, "loss": 0.3546, "step": 2114 }, { "epoch": 0.07258064516129033, "grad_norm": 0.9375055521735378, "learning_rate": 9.952663305739889e-06, "loss": 0.3919, "step": 2115 }, { "epoch": 0.07261496225120109, "grad_norm": 0.9053972872709779, "learning_rate": 9.95258698481636e-06, "loss": 0.3515, "step": 2116 }, { "epoch": 0.07264927934111187, "grad_norm": 0.9836719385664965, "learning_rate": 9.952510602709393e-06, "loss": 0.3997, "step": 2117 }, { "epoch": 0.07268359643102265, "grad_norm": 0.8909436760903281, "learning_rate": 9.95243415941993e-06, "loss": 0.4231, "step": 2118 }, { "epoch": 0.07271791352093343, "grad_norm": 0.8897135246384006, "learning_rate": 9.952357654948918e-06, "loss": 0.4157, "step": 2119 }, { "epoch": 0.0727522306108442, "grad_norm": 0.8617666332973646, "learning_rate": 9.952281089297303e-06, "loss": 0.3962, "step": 2120 }, { "epoch": 0.07278654770075497, "grad_norm": 0.9166466779186937, "learning_rate": 9.952204462466027e-06, "loss": 0.3485, "step": 2121 }, { "epoch": 0.07282086479066575, "grad_norm": 0.898762004501189, "learning_rate": 9.952127774456037e-06, "loss": 0.368, "step": 2122 }, { "epoch": 0.07285518188057653, "grad_norm": 0.8012355410898981, "learning_rate": 9.952051025268284e-06, "loss": 0.4062, "step": 2123 }, { "epoch": 0.0728894989704873, "grad_norm": 0.8116172614610192, "learning_rate": 9.951974214903713e-06, "loss": 0.3365, "step": 2124 }, { "epoch": 0.07292381606039808, "grad_norm": 0.9123785987664075, "learning_rate": 9.951897343363274e-06, "loss": 0.3686, "step": 2125 }, { "epoch": 0.07295813315030886, "grad_norm": 0.8682447265399389, "learning_rate": 9.951820410647919e-06, "loss": 0.391, "step": 2126 }, { "epoch": 0.07299245024021964, "grad_norm": 0.9024434020536634, "learning_rate": 9.951743416758595e-06, "loss": 0.3562, "step": 2127 }, { "epoch": 0.0730267673301304, "grad_norm": 0.8672536345400446, "learning_rate": 9.951666361696252e-06, "loss": 0.3319, "step": 2128 }, { "epoch": 0.07306108442004118, "grad_norm": 1.015615088411416, "learning_rate": 9.951589245461847e-06, "loss": 0.3942, "step": 2129 }, { "epoch": 0.07309540150995196, "grad_norm": 1.0110942474258826, "learning_rate": 9.951512068056328e-06, "loss": 0.3747, "step": 2130 }, { "epoch": 0.07312971859986273, "grad_norm": 0.8754740460198309, "learning_rate": 9.951434829480652e-06, "loss": 0.3659, "step": 2131 }, { "epoch": 0.0731640356897735, "grad_norm": 0.851874772115127, "learning_rate": 9.95135752973577e-06, "loss": 0.3673, "step": 2132 }, { "epoch": 0.07319835277968428, "grad_norm": 0.7895442962013663, "learning_rate": 9.95128016882264e-06, "loss": 0.3728, "step": 2133 }, { "epoch": 0.07323266986959506, "grad_norm": 0.8859773576767604, "learning_rate": 9.951202746742214e-06, "loss": 0.4192, "step": 2134 }, { "epoch": 0.07326698695950583, "grad_norm": 0.8398554647765224, "learning_rate": 9.951125263495452e-06, "loss": 0.3459, "step": 2135 }, { "epoch": 0.07330130404941661, "grad_norm": 1.0081146474963587, "learning_rate": 9.951047719083308e-06, "loss": 0.3465, "step": 2136 }, { "epoch": 0.07333562113932739, "grad_norm": 0.8381062108996651, "learning_rate": 9.950970113506745e-06, "loss": 0.4049, "step": 2137 }, { "epoch": 0.07336993822923817, "grad_norm": 0.8733522386125693, "learning_rate": 9.950892446766716e-06, "loss": 0.3618, "step": 2138 }, { "epoch": 0.07340425531914893, "grad_norm": 0.9743862526377763, "learning_rate": 9.950814718864185e-06, "loss": 0.429, "step": 2139 }, { "epoch": 0.07343857240905971, "grad_norm": 0.8721442549342519, "learning_rate": 9.950736929800109e-06, "loss": 0.3528, "step": 2140 }, { "epoch": 0.07347288949897049, "grad_norm": 0.9286775469304744, "learning_rate": 9.95065907957545e-06, "loss": 0.3662, "step": 2141 }, { "epoch": 0.07350720658888127, "grad_norm": 0.8623288118432432, "learning_rate": 9.95058116819117e-06, "loss": 0.3943, "step": 2142 }, { "epoch": 0.07354152367879203, "grad_norm": 0.9058811865389704, "learning_rate": 9.950503195648231e-06, "loss": 0.4368, "step": 2143 }, { "epoch": 0.07357584076870281, "grad_norm": 0.8225269161754079, "learning_rate": 9.9504251619476e-06, "loss": 0.3364, "step": 2144 }, { "epoch": 0.07361015785861359, "grad_norm": 0.8769288241288775, "learning_rate": 9.950347067090233e-06, "loss": 0.4113, "step": 2145 }, { "epoch": 0.07364447494852437, "grad_norm": 0.9250663882288942, "learning_rate": 9.950268911077103e-06, "loss": 0.386, "step": 2146 }, { "epoch": 0.07367879203843514, "grad_norm": 0.9092310981454587, "learning_rate": 9.950190693909171e-06, "loss": 0.4392, "step": 2147 }, { "epoch": 0.07371310912834592, "grad_norm": 0.8626815319013605, "learning_rate": 9.950112415587404e-06, "loss": 0.3506, "step": 2148 }, { "epoch": 0.0737474262182567, "grad_norm": 0.9117200424143217, "learning_rate": 9.950034076112769e-06, "loss": 0.3909, "step": 2149 }, { "epoch": 0.07378174330816746, "grad_norm": 0.9747458294990433, "learning_rate": 9.949955675486236e-06, "loss": 0.4225, "step": 2150 }, { "epoch": 0.07381606039807824, "grad_norm": 0.8529498903450441, "learning_rate": 9.949877213708769e-06, "loss": 0.3464, "step": 2151 }, { "epoch": 0.07385037748798902, "grad_norm": 0.9924962919466561, "learning_rate": 9.949798690781342e-06, "loss": 0.369, "step": 2152 }, { "epoch": 0.0738846945778998, "grad_norm": 0.8369875420665659, "learning_rate": 9.949720106704923e-06, "loss": 0.316, "step": 2153 }, { "epoch": 0.07391901166781056, "grad_norm": 0.9108567297968426, "learning_rate": 9.949641461480483e-06, "loss": 0.4034, "step": 2154 }, { "epoch": 0.07395332875772134, "grad_norm": 0.9895086785243168, "learning_rate": 9.949562755108993e-06, "loss": 0.3816, "step": 2155 }, { "epoch": 0.07398764584763212, "grad_norm": 0.8203983071625964, "learning_rate": 9.949483987591424e-06, "loss": 0.3695, "step": 2156 }, { "epoch": 0.0740219629375429, "grad_norm": 0.7930981042280373, "learning_rate": 9.949405158928753e-06, "loss": 0.3697, "step": 2157 }, { "epoch": 0.07405628002745367, "grad_norm": 0.8636338146541969, "learning_rate": 9.949326269121953e-06, "loss": 0.4154, "step": 2158 }, { "epoch": 0.07409059711736445, "grad_norm": 0.881234324425295, "learning_rate": 9.949247318171995e-06, "loss": 0.3696, "step": 2159 }, { "epoch": 0.07412491420727522, "grad_norm": 1.28653982692999, "learning_rate": 9.949168306079857e-06, "loss": 0.4339, "step": 2160 }, { "epoch": 0.074159231297186, "grad_norm": 0.8644748439151143, "learning_rate": 9.949089232846515e-06, "loss": 0.4217, "step": 2161 }, { "epoch": 0.07419354838709677, "grad_norm": 0.8799476946788802, "learning_rate": 9.949010098472947e-06, "loss": 0.3556, "step": 2162 }, { "epoch": 0.07422786547700755, "grad_norm": 1.0147728510333704, "learning_rate": 9.948930902960126e-06, "loss": 0.4043, "step": 2163 }, { "epoch": 0.07426218256691833, "grad_norm": 0.9588049082500952, "learning_rate": 9.948851646309037e-06, "loss": 0.3994, "step": 2164 }, { "epoch": 0.07429649965682911, "grad_norm": 0.9355504990997097, "learning_rate": 9.948772328520653e-06, "loss": 0.3988, "step": 2165 }, { "epoch": 0.07433081674673987, "grad_norm": 0.8992682914630142, "learning_rate": 9.948692949595957e-06, "loss": 0.393, "step": 2166 }, { "epoch": 0.07436513383665065, "grad_norm": 0.9601374755201711, "learning_rate": 9.948613509535931e-06, "loss": 0.3557, "step": 2167 }, { "epoch": 0.07439945092656143, "grad_norm": 0.8586235566013679, "learning_rate": 9.948534008341554e-06, "loss": 0.337, "step": 2168 }, { "epoch": 0.07443376801647221, "grad_norm": 0.7390251068127757, "learning_rate": 9.948454446013808e-06, "loss": 0.3402, "step": 2169 }, { "epoch": 0.07446808510638298, "grad_norm": 0.9391912948638785, "learning_rate": 9.948374822553676e-06, "loss": 0.3377, "step": 2170 }, { "epoch": 0.07450240219629375, "grad_norm": 0.8656692576249375, "learning_rate": 9.948295137962144e-06, "loss": 0.4144, "step": 2171 }, { "epoch": 0.07453671928620453, "grad_norm": 0.8263328304730271, "learning_rate": 9.948215392240195e-06, "loss": 0.3979, "step": 2172 }, { "epoch": 0.0745710363761153, "grad_norm": 0.8578656181223332, "learning_rate": 9.948135585388811e-06, "loss": 0.3704, "step": 2173 }, { "epoch": 0.07460535346602608, "grad_norm": 0.910575875810489, "learning_rate": 9.948055717408984e-06, "loss": 0.3656, "step": 2174 }, { "epoch": 0.07463967055593686, "grad_norm": 0.9075001285018834, "learning_rate": 9.947975788301695e-06, "loss": 0.4283, "step": 2175 }, { "epoch": 0.07467398764584764, "grad_norm": 0.9081756804996216, "learning_rate": 9.947895798067937e-06, "loss": 0.3877, "step": 2176 }, { "epoch": 0.0747083047357584, "grad_norm": 0.9477338480129415, "learning_rate": 9.947815746708692e-06, "loss": 0.4155, "step": 2177 }, { "epoch": 0.07474262182566918, "grad_norm": 0.9201329337678243, "learning_rate": 9.947735634224952e-06, "loss": 0.342, "step": 2178 }, { "epoch": 0.07477693891557996, "grad_norm": 0.8790163578208345, "learning_rate": 9.94765546061771e-06, "loss": 0.3547, "step": 2179 }, { "epoch": 0.07481125600549074, "grad_norm": 0.8794598568647082, "learning_rate": 9.94757522588795e-06, "loss": 0.4349, "step": 2180 }, { "epoch": 0.0748455730954015, "grad_norm": 0.8605419610220232, "learning_rate": 9.947494930036668e-06, "loss": 0.3793, "step": 2181 }, { "epoch": 0.07487989018531228, "grad_norm": 0.9659508370431853, "learning_rate": 9.947414573064856e-06, "loss": 0.4017, "step": 2182 }, { "epoch": 0.07491420727522306, "grad_norm": 0.8348218677995188, "learning_rate": 9.947334154973502e-06, "loss": 0.3515, "step": 2183 }, { "epoch": 0.07494852436513384, "grad_norm": 0.9171765320684203, "learning_rate": 9.947253675763602e-06, "loss": 0.4193, "step": 2184 }, { "epoch": 0.07498284145504461, "grad_norm": 0.830461034220748, "learning_rate": 9.947173135436153e-06, "loss": 0.3654, "step": 2185 }, { "epoch": 0.07501715854495539, "grad_norm": 0.979154668942358, "learning_rate": 9.947092533992148e-06, "loss": 0.4514, "step": 2186 }, { "epoch": 0.07505147563486617, "grad_norm": 0.8393436584493642, "learning_rate": 9.947011871432582e-06, "loss": 0.3907, "step": 2187 }, { "epoch": 0.07508579272477695, "grad_norm": 0.8298585041669526, "learning_rate": 9.946931147758451e-06, "loss": 0.4012, "step": 2188 }, { "epoch": 0.07512010981468771, "grad_norm": 0.8183036344427921, "learning_rate": 9.946850362970753e-06, "loss": 0.3828, "step": 2189 }, { "epoch": 0.07515442690459849, "grad_norm": 0.874013078128044, "learning_rate": 9.946769517070486e-06, "loss": 0.3739, "step": 2190 }, { "epoch": 0.07518874399450927, "grad_norm": 0.8663227568734834, "learning_rate": 9.946688610058653e-06, "loss": 0.3788, "step": 2191 }, { "epoch": 0.07522306108442003, "grad_norm": 1.3892158341847902, "learning_rate": 9.946607641936245e-06, "loss": 0.3347, "step": 2192 }, { "epoch": 0.07525737817433081, "grad_norm": 0.8447024079235127, "learning_rate": 9.946526612704269e-06, "loss": 0.3552, "step": 2193 }, { "epoch": 0.07529169526424159, "grad_norm": 1.0004350824755697, "learning_rate": 9.946445522363723e-06, "loss": 0.3672, "step": 2194 }, { "epoch": 0.07532601235415237, "grad_norm": 0.9002210559653813, "learning_rate": 9.946364370915609e-06, "loss": 0.3743, "step": 2195 }, { "epoch": 0.07536032944406314, "grad_norm": 0.8505471094178619, "learning_rate": 9.946283158360931e-06, "loss": 0.3827, "step": 2196 }, { "epoch": 0.07539464653397392, "grad_norm": 0.8346927037793735, "learning_rate": 9.946201884700691e-06, "loss": 0.354, "step": 2197 }, { "epoch": 0.0754289636238847, "grad_norm": 0.8895111368373789, "learning_rate": 9.946120549935893e-06, "loss": 0.3639, "step": 2198 }, { "epoch": 0.07546328071379547, "grad_norm": 0.8776375232677738, "learning_rate": 9.946039154067544e-06, "loss": 0.3825, "step": 2199 }, { "epoch": 0.07549759780370624, "grad_norm": 0.992842753340463, "learning_rate": 9.945957697096647e-06, "loss": 0.3761, "step": 2200 }, { "epoch": 0.07553191489361702, "grad_norm": 0.7943395006353855, "learning_rate": 9.945876179024208e-06, "loss": 0.329, "step": 2201 }, { "epoch": 0.0755662319835278, "grad_norm": 0.9379991230812941, "learning_rate": 9.945794599851234e-06, "loss": 0.386, "step": 2202 }, { "epoch": 0.07560054907343858, "grad_norm": 0.930426195975187, "learning_rate": 9.945712959578737e-06, "loss": 0.3838, "step": 2203 }, { "epoch": 0.07563486616334934, "grad_norm": 0.8084445242231206, "learning_rate": 9.945631258207721e-06, "loss": 0.3593, "step": 2204 }, { "epoch": 0.07566918325326012, "grad_norm": 1.2166467358731976, "learning_rate": 9.945549495739198e-06, "loss": 0.3644, "step": 2205 }, { "epoch": 0.0757035003431709, "grad_norm": 1.0074283200810157, "learning_rate": 9.945467672174176e-06, "loss": 0.3549, "step": 2206 }, { "epoch": 0.07573781743308168, "grad_norm": 0.9533391190941193, "learning_rate": 9.945385787513667e-06, "loss": 0.3832, "step": 2207 }, { "epoch": 0.07577213452299245, "grad_norm": 0.8653417366073418, "learning_rate": 9.945303841758682e-06, "loss": 0.3914, "step": 2208 }, { "epoch": 0.07580645161290323, "grad_norm": 1.0219859369164286, "learning_rate": 9.945221834910234e-06, "loss": 0.3752, "step": 2209 }, { "epoch": 0.075840768702814, "grad_norm": 0.830253095192054, "learning_rate": 9.945139766969336e-06, "loss": 0.4265, "step": 2210 }, { "epoch": 0.07587508579272478, "grad_norm": 0.9556438107088852, "learning_rate": 9.945057637937e-06, "loss": 0.4676, "step": 2211 }, { "epoch": 0.07590940288263555, "grad_norm": 0.849561071855715, "learning_rate": 9.944975447814244e-06, "loss": 0.3568, "step": 2212 }, { "epoch": 0.07594371997254633, "grad_norm": 0.8179661405818279, "learning_rate": 9.944893196602081e-06, "loss": 0.3983, "step": 2213 }, { "epoch": 0.07597803706245711, "grad_norm": 1.2289362761062943, "learning_rate": 9.944810884301529e-06, "loss": 0.4137, "step": 2214 }, { "epoch": 0.07601235415236787, "grad_norm": 0.8227026717524618, "learning_rate": 9.944728510913602e-06, "loss": 0.3653, "step": 2215 }, { "epoch": 0.07604667124227865, "grad_norm": 0.8424902587670879, "learning_rate": 9.94464607643932e-06, "loss": 0.3606, "step": 2216 }, { "epoch": 0.07608098833218943, "grad_norm": 0.9619716813420807, "learning_rate": 9.9445635808797e-06, "loss": 0.3457, "step": 2217 }, { "epoch": 0.07611530542210021, "grad_norm": 0.8866083606554735, "learning_rate": 9.944481024235763e-06, "loss": 0.3496, "step": 2218 }, { "epoch": 0.07614962251201098, "grad_norm": 1.2068949623619156, "learning_rate": 9.944398406508527e-06, "loss": 0.406, "step": 2219 }, { "epoch": 0.07618393960192175, "grad_norm": 0.9496037730990882, "learning_rate": 9.944315727699012e-06, "loss": 0.4015, "step": 2220 }, { "epoch": 0.07621825669183253, "grad_norm": 0.9397787499014884, "learning_rate": 9.944232987808242e-06, "loss": 0.3882, "step": 2221 }, { "epoch": 0.07625257378174331, "grad_norm": 0.9240249491749417, "learning_rate": 9.944150186837239e-06, "loss": 0.3818, "step": 2222 }, { "epoch": 0.07628689087165408, "grad_norm": 0.815237624481589, "learning_rate": 9.944067324787023e-06, "loss": 0.3306, "step": 2223 }, { "epoch": 0.07632120796156486, "grad_norm": 0.9719302965982355, "learning_rate": 9.94398440165862e-06, "loss": 0.4315, "step": 2224 }, { "epoch": 0.07635552505147564, "grad_norm": 0.9092066536078676, "learning_rate": 9.943901417453054e-06, "loss": 0.3286, "step": 2225 }, { "epoch": 0.07638984214138642, "grad_norm": 0.8554864655230335, "learning_rate": 9.94381837217135e-06, "loss": 0.3973, "step": 2226 }, { "epoch": 0.07642415923129718, "grad_norm": 0.972203143934328, "learning_rate": 9.943735265814533e-06, "loss": 0.3946, "step": 2227 }, { "epoch": 0.07645847632120796, "grad_norm": 1.0544603352843358, "learning_rate": 9.943652098383632e-06, "loss": 0.3584, "step": 2228 }, { "epoch": 0.07649279341111874, "grad_norm": 0.8429062995448031, "learning_rate": 9.943568869879672e-06, "loss": 0.4339, "step": 2229 }, { "epoch": 0.07652711050102952, "grad_norm": 1.0180020797323932, "learning_rate": 9.943485580303683e-06, "loss": 0.3782, "step": 2230 }, { "epoch": 0.07656142759094028, "grad_norm": 0.7553599998002279, "learning_rate": 9.943402229656691e-06, "loss": 0.3152, "step": 2231 }, { "epoch": 0.07659574468085106, "grad_norm": 0.8568835600889597, "learning_rate": 9.943318817939732e-06, "loss": 0.3537, "step": 2232 }, { "epoch": 0.07663006177076184, "grad_norm": 1.0210942550953526, "learning_rate": 9.94323534515383e-06, "loss": 0.3692, "step": 2233 }, { "epoch": 0.07666437886067262, "grad_norm": 0.9802467976836163, "learning_rate": 9.943151811300018e-06, "loss": 0.365, "step": 2234 }, { "epoch": 0.07669869595058339, "grad_norm": 0.9632836057607845, "learning_rate": 9.943068216379327e-06, "loss": 0.3823, "step": 2235 }, { "epoch": 0.07673301304049417, "grad_norm": 0.8391127395179523, "learning_rate": 9.942984560392794e-06, "loss": 0.3686, "step": 2236 }, { "epoch": 0.07676733013040495, "grad_norm": 1.0586230180555214, "learning_rate": 9.94290084334145e-06, "loss": 0.3976, "step": 2237 }, { "epoch": 0.07680164722031571, "grad_norm": 0.9272000688422987, "learning_rate": 9.942817065226328e-06, "loss": 0.3779, "step": 2238 }, { "epoch": 0.07683596431022649, "grad_norm": 0.8007115176030597, "learning_rate": 9.942733226048464e-06, "loss": 0.4151, "step": 2239 }, { "epoch": 0.07687028140013727, "grad_norm": 0.9248607628794329, "learning_rate": 9.942649325808895e-06, "loss": 0.3808, "step": 2240 }, { "epoch": 0.07690459849004805, "grad_norm": 0.8761151562674564, "learning_rate": 9.942565364508655e-06, "loss": 0.375, "step": 2241 }, { "epoch": 0.07693891557995881, "grad_norm": 0.9238938942934969, "learning_rate": 9.942481342148781e-06, "loss": 0.4163, "step": 2242 }, { "epoch": 0.07697323266986959, "grad_norm": 0.8444223268311205, "learning_rate": 9.942397258730313e-06, "loss": 0.369, "step": 2243 }, { "epoch": 0.07700754975978037, "grad_norm": 0.8259720952956634, "learning_rate": 9.942313114254289e-06, "loss": 0.3948, "step": 2244 }, { "epoch": 0.07704186684969115, "grad_norm": 0.7810393362304581, "learning_rate": 9.94222890872175e-06, "loss": 0.3588, "step": 2245 }, { "epoch": 0.07707618393960192, "grad_norm": 0.9082866864642885, "learning_rate": 9.942144642133736e-06, "loss": 0.4362, "step": 2246 }, { "epoch": 0.0771105010295127, "grad_norm": 0.8864745285108252, "learning_rate": 9.942060314491285e-06, "loss": 0.3816, "step": 2247 }, { "epoch": 0.07714481811942348, "grad_norm": 0.9023202638966356, "learning_rate": 9.94197592579544e-06, "loss": 0.433, "step": 2248 }, { "epoch": 0.07717913520933425, "grad_norm": 0.798501214515473, "learning_rate": 9.941891476047245e-06, "loss": 0.3205, "step": 2249 }, { "epoch": 0.07721345229924502, "grad_norm": 0.8400230059736847, "learning_rate": 9.941806965247742e-06, "loss": 0.4073, "step": 2250 }, { "epoch": 0.0772477693891558, "grad_norm": 0.8447549974202624, "learning_rate": 9.941722393397976e-06, "loss": 0.354, "step": 2251 }, { "epoch": 0.07728208647906658, "grad_norm": 0.8239431844610896, "learning_rate": 9.941637760498992e-06, "loss": 0.3313, "step": 2252 }, { "epoch": 0.07731640356897736, "grad_norm": 0.9018764540411723, "learning_rate": 9.941553066551834e-06, "loss": 0.4069, "step": 2253 }, { "epoch": 0.07735072065888812, "grad_norm": 0.877011839303019, "learning_rate": 9.941468311557551e-06, "loss": 0.3759, "step": 2254 }, { "epoch": 0.0773850377487989, "grad_norm": 0.9284497047871609, "learning_rate": 9.941383495517186e-06, "loss": 0.3624, "step": 2255 }, { "epoch": 0.07741935483870968, "grad_norm": 0.7967744390159145, "learning_rate": 9.94129861843179e-06, "loss": 0.3706, "step": 2256 }, { "epoch": 0.07745367192862045, "grad_norm": 0.869488466324574, "learning_rate": 9.94121368030241e-06, "loss": 0.3443, "step": 2257 }, { "epoch": 0.07748798901853123, "grad_norm": 0.9229885421245441, "learning_rate": 9.941128681130096e-06, "loss": 0.4093, "step": 2258 }, { "epoch": 0.077522306108442, "grad_norm": 0.877227652065541, "learning_rate": 9.941043620915899e-06, "loss": 0.3504, "step": 2259 }, { "epoch": 0.07755662319835278, "grad_norm": 0.9234957670798777, "learning_rate": 9.940958499660868e-06, "loss": 0.3888, "step": 2260 }, { "epoch": 0.07759094028826355, "grad_norm": 0.7885267061439454, "learning_rate": 9.940873317366055e-06, "loss": 0.314, "step": 2261 }, { "epoch": 0.07762525737817433, "grad_norm": 0.9068858175155696, "learning_rate": 9.940788074032513e-06, "loss": 0.3985, "step": 2262 }, { "epoch": 0.07765957446808511, "grad_norm": 0.8374900479605195, "learning_rate": 9.940702769661295e-06, "loss": 0.3925, "step": 2263 }, { "epoch": 0.07769389155799589, "grad_norm": 0.9458191310071389, "learning_rate": 9.940617404253454e-06, "loss": 0.3563, "step": 2264 }, { "epoch": 0.07772820864790665, "grad_norm": 0.7466548826199222, "learning_rate": 9.940531977810046e-06, "loss": 0.3295, "step": 2265 }, { "epoch": 0.07776252573781743, "grad_norm": 0.9000727453063232, "learning_rate": 9.940446490332125e-06, "loss": 0.397, "step": 2266 }, { "epoch": 0.07779684282772821, "grad_norm": 0.9350466952188573, "learning_rate": 9.940360941820746e-06, "loss": 0.3674, "step": 2267 }, { "epoch": 0.07783115991763899, "grad_norm": 1.0693280063625012, "learning_rate": 9.94027533227697e-06, "loss": 0.4331, "step": 2268 }, { "epoch": 0.07786547700754975, "grad_norm": 0.9439454124319121, "learning_rate": 9.94018966170185e-06, "loss": 0.4556, "step": 2269 }, { "epoch": 0.07789979409746053, "grad_norm": 0.9198508664776434, "learning_rate": 9.940103930096447e-06, "loss": 0.4023, "step": 2270 }, { "epoch": 0.07793411118737131, "grad_norm": 0.8260257215021903, "learning_rate": 9.940018137461822e-06, "loss": 0.3205, "step": 2271 }, { "epoch": 0.07796842827728209, "grad_norm": 0.9646457311901144, "learning_rate": 9.93993228379903e-06, "loss": 0.3936, "step": 2272 }, { "epoch": 0.07800274536719286, "grad_norm": 0.8537255941705258, "learning_rate": 9.939846369109133e-06, "loss": 0.3473, "step": 2273 }, { "epoch": 0.07803706245710364, "grad_norm": 0.9809528570072977, "learning_rate": 9.939760393393197e-06, "loss": 0.3775, "step": 2274 }, { "epoch": 0.07807137954701442, "grad_norm": 0.9144721343184008, "learning_rate": 9.939674356652279e-06, "loss": 0.3533, "step": 2275 }, { "epoch": 0.0781056966369252, "grad_norm": 0.8658344469925152, "learning_rate": 9.93958825888744e-06, "loss": 0.3703, "step": 2276 }, { "epoch": 0.07814001372683596, "grad_norm": 0.8063947027292171, "learning_rate": 9.939502100099752e-06, "loss": 0.3465, "step": 2277 }, { "epoch": 0.07817433081674674, "grad_norm": 0.8325322463640524, "learning_rate": 9.93941588029027e-06, "loss": 0.416, "step": 2278 }, { "epoch": 0.07820864790665752, "grad_norm": 0.7947519314080486, "learning_rate": 9.939329599460068e-06, "loss": 0.3938, "step": 2279 }, { "epoch": 0.07824296499656828, "grad_norm": 0.8731975470127159, "learning_rate": 9.939243257610204e-06, "loss": 0.3549, "step": 2280 }, { "epoch": 0.07827728208647906, "grad_norm": 0.8013633481311908, "learning_rate": 9.939156854741751e-06, "loss": 0.3653, "step": 2281 }, { "epoch": 0.07831159917638984, "grad_norm": 0.7889854164136382, "learning_rate": 9.939070390855772e-06, "loss": 0.3447, "step": 2282 }, { "epoch": 0.07834591626630062, "grad_norm": 0.9015105223809992, "learning_rate": 9.938983865953337e-06, "loss": 0.4348, "step": 2283 }, { "epoch": 0.07838023335621139, "grad_norm": 0.9435693688031233, "learning_rate": 9.938897280035514e-06, "loss": 0.3734, "step": 2284 }, { "epoch": 0.07841455044612217, "grad_norm": 0.8750366943002206, "learning_rate": 9.938810633103375e-06, "loss": 0.3968, "step": 2285 }, { "epoch": 0.07844886753603295, "grad_norm": 0.8381491667203681, "learning_rate": 9.938723925157987e-06, "loss": 0.3922, "step": 2286 }, { "epoch": 0.07848318462594372, "grad_norm": 0.9345675709187823, "learning_rate": 9.938637156200424e-06, "loss": 0.4138, "step": 2287 }, { "epoch": 0.07851750171585449, "grad_norm": 1.0054048314760518, "learning_rate": 9.938550326231755e-06, "loss": 0.393, "step": 2288 }, { "epoch": 0.07855181880576527, "grad_norm": 0.9105583243564734, "learning_rate": 9.938463435253056e-06, "loss": 0.4181, "step": 2289 }, { "epoch": 0.07858613589567605, "grad_norm": 0.8498391943741427, "learning_rate": 9.938376483265399e-06, "loss": 0.3942, "step": 2290 }, { "epoch": 0.07862045298558683, "grad_norm": 0.881230675503703, "learning_rate": 9.938289470269857e-06, "loss": 0.3889, "step": 2291 }, { "epoch": 0.0786547700754976, "grad_norm": 0.7763567845759954, "learning_rate": 9.938202396267506e-06, "loss": 0.3628, "step": 2292 }, { "epoch": 0.07868908716540837, "grad_norm": 0.8517705427380012, "learning_rate": 9.938115261259425e-06, "loss": 0.369, "step": 2293 }, { "epoch": 0.07872340425531915, "grad_norm": 0.7937998297643443, "learning_rate": 9.938028065246684e-06, "loss": 0.3514, "step": 2294 }, { "epoch": 0.07875772134522993, "grad_norm": 0.8671737329077278, "learning_rate": 9.937940808230364e-06, "loss": 0.4404, "step": 2295 }, { "epoch": 0.0787920384351407, "grad_norm": 0.9190403166271617, "learning_rate": 9.937853490211543e-06, "loss": 0.3816, "step": 2296 }, { "epoch": 0.07882635552505148, "grad_norm": 0.9249169556816994, "learning_rate": 9.9377661111913e-06, "loss": 0.408, "step": 2297 }, { "epoch": 0.07886067261496225, "grad_norm": 0.9579565703131698, "learning_rate": 9.937678671170711e-06, "loss": 0.3719, "step": 2298 }, { "epoch": 0.07889498970487303, "grad_norm": 0.9167712887335757, "learning_rate": 9.93759117015086e-06, "loss": 0.4036, "step": 2299 }, { "epoch": 0.0789293067947838, "grad_norm": 0.9209678545700773, "learning_rate": 9.937503608132828e-06, "loss": 0.3746, "step": 2300 }, { "epoch": 0.07896362388469458, "grad_norm": 1.0055267548941502, "learning_rate": 9.937415985117695e-06, "loss": 0.4636, "step": 2301 }, { "epoch": 0.07899794097460536, "grad_norm": 0.8013042157649152, "learning_rate": 9.937328301106544e-06, "loss": 0.4105, "step": 2302 }, { "epoch": 0.07903225806451612, "grad_norm": 0.861514161111024, "learning_rate": 9.937240556100457e-06, "loss": 0.4696, "step": 2303 }, { "epoch": 0.0790665751544269, "grad_norm": 0.8419371892398808, "learning_rate": 9.93715275010052e-06, "loss": 0.3832, "step": 2304 }, { "epoch": 0.07910089224433768, "grad_norm": 0.7829248502053258, "learning_rate": 9.937064883107816e-06, "loss": 0.345, "step": 2305 }, { "epoch": 0.07913520933424846, "grad_norm": 0.9078181606285046, "learning_rate": 9.936976955123432e-06, "loss": 0.3595, "step": 2306 }, { "epoch": 0.07916952642415923, "grad_norm": 0.862128811900413, "learning_rate": 9.936888966148455e-06, "loss": 0.3607, "step": 2307 }, { "epoch": 0.07920384351407, "grad_norm": 0.910226566242721, "learning_rate": 9.936800916183968e-06, "loss": 0.391, "step": 2308 }, { "epoch": 0.07923816060398078, "grad_norm": 0.8725322810947627, "learning_rate": 9.936712805231065e-06, "loss": 0.372, "step": 2309 }, { "epoch": 0.07927247769389156, "grad_norm": 0.8449357769388605, "learning_rate": 9.936624633290829e-06, "loss": 0.3747, "step": 2310 }, { "epoch": 0.07930679478380233, "grad_norm": 0.8938855584546513, "learning_rate": 9.93653640036435e-06, "loss": 0.4236, "step": 2311 }, { "epoch": 0.07934111187371311, "grad_norm": 0.9947628252444024, "learning_rate": 9.93644810645272e-06, "loss": 0.3638, "step": 2312 }, { "epoch": 0.07937542896362389, "grad_norm": 0.8216023053392191, "learning_rate": 9.93635975155703e-06, "loss": 0.3968, "step": 2313 }, { "epoch": 0.07940974605353467, "grad_norm": 0.8948843750500882, "learning_rate": 9.93627133567837e-06, "loss": 0.3975, "step": 2314 }, { "epoch": 0.07944406314344543, "grad_norm": 0.8264989085071923, "learning_rate": 9.936182858817832e-06, "loss": 0.4263, "step": 2315 }, { "epoch": 0.07947838023335621, "grad_norm": 0.9113810992207189, "learning_rate": 9.936094320976511e-06, "loss": 0.4164, "step": 2316 }, { "epoch": 0.07951269732326699, "grad_norm": 0.8371639039074055, "learning_rate": 9.9360057221555e-06, "loss": 0.3645, "step": 2317 }, { "epoch": 0.07954701441317777, "grad_norm": 0.8626786389902945, "learning_rate": 9.935917062355891e-06, "loss": 0.3525, "step": 2318 }, { "epoch": 0.07958133150308853, "grad_norm": 0.8150921543248354, "learning_rate": 9.935828341578784e-06, "loss": 0.3411, "step": 2319 }, { "epoch": 0.07961564859299931, "grad_norm": 0.8449495746326727, "learning_rate": 9.93573955982527e-06, "loss": 0.3575, "step": 2320 }, { "epoch": 0.07964996568291009, "grad_norm": 0.8770459315439613, "learning_rate": 9.93565071709645e-06, "loss": 0.4076, "step": 2321 }, { "epoch": 0.07968428277282086, "grad_norm": 0.9212903483387448, "learning_rate": 9.93556181339342e-06, "loss": 0.3615, "step": 2322 }, { "epoch": 0.07971859986273164, "grad_norm": 0.8701588436916174, "learning_rate": 9.935472848717276e-06, "loss": 0.3361, "step": 2323 }, { "epoch": 0.07975291695264242, "grad_norm": 0.912572273432178, "learning_rate": 9.93538382306912e-06, "loss": 0.3972, "step": 2324 }, { "epoch": 0.0797872340425532, "grad_norm": 0.9133372344348004, "learning_rate": 9.935294736450051e-06, "loss": 0.3998, "step": 2325 }, { "epoch": 0.07982155113246396, "grad_norm": 0.7872193160335382, "learning_rate": 9.93520558886117e-06, "loss": 0.3524, "step": 2326 }, { "epoch": 0.07985586822237474, "grad_norm": 0.9220509638537605, "learning_rate": 9.935116380303578e-06, "loss": 0.3762, "step": 2327 }, { "epoch": 0.07989018531228552, "grad_norm": 0.856485296154473, "learning_rate": 9.935027110778377e-06, "loss": 0.3761, "step": 2328 }, { "epoch": 0.0799245024021963, "grad_norm": 0.8154422773404953, "learning_rate": 9.934937780286668e-06, "loss": 0.3424, "step": 2329 }, { "epoch": 0.07995881949210706, "grad_norm": 0.8813121029775522, "learning_rate": 9.93484838882956e-06, "loss": 0.3486, "step": 2330 }, { "epoch": 0.07999313658201784, "grad_norm": 0.8345004740117113, "learning_rate": 9.93475893640815e-06, "loss": 0.3916, "step": 2331 }, { "epoch": 0.08002745367192862, "grad_norm": 0.8711946614501069, "learning_rate": 9.934669423023548e-06, "loss": 0.4599, "step": 2332 }, { "epoch": 0.0800617707618394, "grad_norm": 0.9168937574743187, "learning_rate": 9.93457984867686e-06, "loss": 0.3642, "step": 2333 }, { "epoch": 0.08009608785175017, "grad_norm": 0.8345219252184396, "learning_rate": 9.934490213369187e-06, "loss": 0.3952, "step": 2334 }, { "epoch": 0.08013040494166095, "grad_norm": 0.8674199492513132, "learning_rate": 9.934400517101645e-06, "loss": 0.369, "step": 2335 }, { "epoch": 0.08016472203157173, "grad_norm": 0.7836780450988251, "learning_rate": 9.934310759875337e-06, "loss": 0.348, "step": 2336 }, { "epoch": 0.0801990391214825, "grad_norm": 0.8853251205198489, "learning_rate": 9.93422094169137e-06, "loss": 0.3834, "step": 2337 }, { "epoch": 0.08023335621139327, "grad_norm": 0.8113195741372842, "learning_rate": 9.934131062550857e-06, "loss": 0.3672, "step": 2338 }, { "epoch": 0.08026767330130405, "grad_norm": 0.8758117906542479, "learning_rate": 9.934041122454907e-06, "loss": 0.3979, "step": 2339 }, { "epoch": 0.08030199039121483, "grad_norm": 0.7045087841452412, "learning_rate": 9.933951121404632e-06, "loss": 0.3205, "step": 2340 }, { "epoch": 0.08033630748112561, "grad_norm": 0.9610186664957295, "learning_rate": 9.933861059401145e-06, "loss": 0.3797, "step": 2341 }, { "epoch": 0.08037062457103637, "grad_norm": 0.7365509749674543, "learning_rate": 9.933770936445553e-06, "loss": 0.3321, "step": 2342 }, { "epoch": 0.08040494166094715, "grad_norm": 0.8525684811961162, "learning_rate": 9.933680752538976e-06, "loss": 0.4612, "step": 2343 }, { "epoch": 0.08043925875085793, "grad_norm": 0.8106546692931588, "learning_rate": 9.933590507682524e-06, "loss": 0.3896, "step": 2344 }, { "epoch": 0.0804735758407687, "grad_norm": 0.8620002274964368, "learning_rate": 9.933500201877315e-06, "loss": 0.3796, "step": 2345 }, { "epoch": 0.08050789293067948, "grad_norm": 0.9452508139351112, "learning_rate": 9.933409835124462e-06, "loss": 0.3518, "step": 2346 }, { "epoch": 0.08054221002059025, "grad_norm": 0.903976086607312, "learning_rate": 9.933319407425084e-06, "loss": 0.3682, "step": 2347 }, { "epoch": 0.08057652711050103, "grad_norm": 0.8918255081165807, "learning_rate": 9.933228918780294e-06, "loss": 0.4058, "step": 2348 }, { "epoch": 0.0806108442004118, "grad_norm": 0.8374666324788042, "learning_rate": 9.933138369191213e-06, "loss": 0.3695, "step": 2349 }, { "epoch": 0.08064516129032258, "grad_norm": 0.8407743281816283, "learning_rate": 9.933047758658959e-06, "loss": 0.3594, "step": 2350 }, { "epoch": 0.08067947838023336, "grad_norm": 0.9013131355808726, "learning_rate": 9.932957087184651e-06, "loss": 0.4516, "step": 2351 }, { "epoch": 0.08071379547014414, "grad_norm": 0.9344266353858955, "learning_rate": 9.93286635476941e-06, "loss": 0.3608, "step": 2352 }, { "epoch": 0.0807481125600549, "grad_norm": 0.9613293122821888, "learning_rate": 9.932775561414356e-06, "loss": 0.4153, "step": 2353 }, { "epoch": 0.08078242964996568, "grad_norm": 0.8423158021322664, "learning_rate": 9.932684707120613e-06, "loss": 0.3826, "step": 2354 }, { "epoch": 0.08081674673987646, "grad_norm": 1.1833378362511942, "learning_rate": 9.932593791889298e-06, "loss": 0.3963, "step": 2355 }, { "epoch": 0.08085106382978724, "grad_norm": 1.2339776886300977, "learning_rate": 9.932502815721538e-06, "loss": 0.3429, "step": 2356 }, { "epoch": 0.080885380919698, "grad_norm": 0.8963925786039498, "learning_rate": 9.932411778618459e-06, "loss": 0.3498, "step": 2357 }, { "epoch": 0.08091969800960878, "grad_norm": 0.8146925255264924, "learning_rate": 9.932320680581183e-06, "loss": 0.3581, "step": 2358 }, { "epoch": 0.08095401509951956, "grad_norm": 0.8782254579876025, "learning_rate": 9.932229521610833e-06, "loss": 0.3702, "step": 2359 }, { "epoch": 0.08098833218943034, "grad_norm": 0.8543966241512518, "learning_rate": 9.932138301708538e-06, "loss": 0.3716, "step": 2360 }, { "epoch": 0.08102264927934111, "grad_norm": 0.9130152970150869, "learning_rate": 9.932047020875426e-06, "loss": 0.3927, "step": 2361 }, { "epoch": 0.08105696636925189, "grad_norm": 0.9081414876686749, "learning_rate": 9.931955679112624e-06, "loss": 0.3862, "step": 2362 }, { "epoch": 0.08109128345916267, "grad_norm": 0.9752873571000918, "learning_rate": 9.931864276421258e-06, "loss": 0.3575, "step": 2363 }, { "epoch": 0.08112560054907343, "grad_norm": 0.9492825909177173, "learning_rate": 9.931772812802457e-06, "loss": 0.3568, "step": 2364 }, { "epoch": 0.08115991763898421, "grad_norm": 0.8872933380832805, "learning_rate": 9.931681288257357e-06, "loss": 0.3778, "step": 2365 }, { "epoch": 0.08119423472889499, "grad_norm": 0.9545465701987376, "learning_rate": 9.931589702787082e-06, "loss": 0.3534, "step": 2366 }, { "epoch": 0.08122855181880577, "grad_norm": 0.8669537746297782, "learning_rate": 9.931498056392766e-06, "loss": 0.3748, "step": 2367 }, { "epoch": 0.08126286890871653, "grad_norm": 0.8464279712207134, "learning_rate": 9.931406349075543e-06, "loss": 0.3978, "step": 2368 }, { "epoch": 0.08129718599862731, "grad_norm": 0.8550516117035774, "learning_rate": 9.931314580836542e-06, "loss": 0.329, "step": 2369 }, { "epoch": 0.08133150308853809, "grad_norm": 0.8608645578509956, "learning_rate": 9.9312227516769e-06, "loss": 0.3836, "step": 2370 }, { "epoch": 0.08136582017844887, "grad_norm": 0.8479015844754415, "learning_rate": 9.93113086159775e-06, "loss": 0.3526, "step": 2371 }, { "epoch": 0.08140013726835964, "grad_norm": 0.8193101044874288, "learning_rate": 9.931038910600226e-06, "loss": 0.396, "step": 2372 }, { "epoch": 0.08143445435827042, "grad_norm": 0.896055717563442, "learning_rate": 9.930946898685465e-06, "loss": 0.431, "step": 2373 }, { "epoch": 0.0814687714481812, "grad_norm": 0.8251209487670098, "learning_rate": 9.930854825854605e-06, "loss": 0.3486, "step": 2374 }, { "epoch": 0.08150308853809198, "grad_norm": 0.810461274322894, "learning_rate": 9.930762692108782e-06, "loss": 0.4162, "step": 2375 }, { "epoch": 0.08153740562800274, "grad_norm": 0.8061391691602661, "learning_rate": 9.930670497449135e-06, "loss": 0.3393, "step": 2376 }, { "epoch": 0.08157172271791352, "grad_norm": 0.876186877522002, "learning_rate": 9.930578241876803e-06, "loss": 0.3567, "step": 2377 }, { "epoch": 0.0816060398078243, "grad_norm": 0.9094136704255812, "learning_rate": 9.930485925392926e-06, "loss": 0.3888, "step": 2378 }, { "epoch": 0.08164035689773508, "grad_norm": 0.8872637539122209, "learning_rate": 9.930393547998643e-06, "loss": 0.4039, "step": 2379 }, { "epoch": 0.08167467398764584, "grad_norm": 0.891171094315155, "learning_rate": 9.930301109695095e-06, "loss": 0.3921, "step": 2380 }, { "epoch": 0.08170899107755662, "grad_norm": 0.8913366778041409, "learning_rate": 9.930208610483425e-06, "loss": 0.4006, "step": 2381 }, { "epoch": 0.0817433081674674, "grad_norm": 0.8611152498794543, "learning_rate": 9.930116050364778e-06, "loss": 0.3616, "step": 2382 }, { "epoch": 0.08177762525737818, "grad_norm": 0.8345850329280261, "learning_rate": 9.930023429340294e-06, "loss": 0.3371, "step": 2383 }, { "epoch": 0.08181194234728895, "grad_norm": 0.8379549562619475, "learning_rate": 9.929930747411117e-06, "loss": 0.3597, "step": 2384 }, { "epoch": 0.08184625943719973, "grad_norm": 0.8625353194041455, "learning_rate": 9.929838004578394e-06, "loss": 0.3596, "step": 2385 }, { "epoch": 0.0818805765271105, "grad_norm": 1.7132719064883704, "learning_rate": 9.929745200843271e-06, "loss": 0.354, "step": 2386 }, { "epoch": 0.08191489361702127, "grad_norm": 0.8427478625397464, "learning_rate": 9.929652336206894e-06, "loss": 0.4018, "step": 2387 }, { "epoch": 0.08194921070693205, "grad_norm": 0.873829667389934, "learning_rate": 9.92955941067041e-06, "loss": 0.3879, "step": 2388 }, { "epoch": 0.08198352779684283, "grad_norm": 0.8485784611033294, "learning_rate": 9.929466424234963e-06, "loss": 0.328, "step": 2389 }, { "epoch": 0.08201784488675361, "grad_norm": 0.783815895407097, "learning_rate": 9.929373376901709e-06, "loss": 0.3666, "step": 2390 }, { "epoch": 0.08205216197666437, "grad_norm": 0.9077360320728501, "learning_rate": 9.929280268671792e-06, "loss": 0.3666, "step": 2391 }, { "epoch": 0.08208647906657515, "grad_norm": 0.8707458627626425, "learning_rate": 9.929187099546367e-06, "loss": 0.3315, "step": 2392 }, { "epoch": 0.08212079615648593, "grad_norm": 0.82370033607672, "learning_rate": 9.92909386952658e-06, "loss": 0.3685, "step": 2393 }, { "epoch": 0.08215511324639671, "grad_norm": 0.8418778215789328, "learning_rate": 9.929000578613586e-06, "loss": 0.3488, "step": 2394 }, { "epoch": 0.08218943033630748, "grad_norm": 0.8921975330917126, "learning_rate": 9.928907226808537e-06, "loss": 0.401, "step": 2395 }, { "epoch": 0.08222374742621825, "grad_norm": 0.8576982977703411, "learning_rate": 9.928813814112583e-06, "loss": 0.3648, "step": 2396 }, { "epoch": 0.08225806451612903, "grad_norm": 0.9464544493490538, "learning_rate": 9.928720340526884e-06, "loss": 0.3967, "step": 2397 }, { "epoch": 0.08229238160603981, "grad_norm": 0.8372187101717082, "learning_rate": 9.92862680605259e-06, "loss": 0.3185, "step": 2398 }, { "epoch": 0.08232669869595058, "grad_norm": 0.9224533881587462, "learning_rate": 9.928533210690859e-06, "loss": 0.3844, "step": 2399 }, { "epoch": 0.08236101578586136, "grad_norm": 0.9476169041842992, "learning_rate": 9.928439554442846e-06, "loss": 0.3665, "step": 2400 }, { "epoch": 0.08239533287577214, "grad_norm": 0.9195219578761059, "learning_rate": 9.928345837309708e-06, "loss": 0.3385, "step": 2401 }, { "epoch": 0.08242964996568292, "grad_norm": 0.8954119258829696, "learning_rate": 9.928252059292603e-06, "loss": 0.3927, "step": 2402 }, { "epoch": 0.08246396705559368, "grad_norm": 0.7593813643432518, "learning_rate": 9.928158220392688e-06, "loss": 0.374, "step": 2403 }, { "epoch": 0.08249828414550446, "grad_norm": 0.7915292244645482, "learning_rate": 9.928064320611126e-06, "loss": 0.3856, "step": 2404 }, { "epoch": 0.08253260123541524, "grad_norm": 0.8634865526033129, "learning_rate": 9.927970359949074e-06, "loss": 0.4139, "step": 2405 }, { "epoch": 0.08256691832532602, "grad_norm": 0.9221345639396124, "learning_rate": 9.927876338407694e-06, "loss": 0.4096, "step": 2406 }, { "epoch": 0.08260123541523678, "grad_norm": 0.8531863867530761, "learning_rate": 9.927782255988148e-06, "loss": 0.3869, "step": 2407 }, { "epoch": 0.08263555250514756, "grad_norm": 0.9274520482809487, "learning_rate": 9.927688112691596e-06, "loss": 0.4296, "step": 2408 }, { "epoch": 0.08266986959505834, "grad_norm": 0.9682999894808159, "learning_rate": 9.927593908519203e-06, "loss": 0.3423, "step": 2409 }, { "epoch": 0.08270418668496911, "grad_norm": 1.0110156001866673, "learning_rate": 9.927499643472133e-06, "loss": 0.3416, "step": 2410 }, { "epoch": 0.08273850377487989, "grad_norm": 0.8953414198909283, "learning_rate": 9.927405317551549e-06, "loss": 0.3529, "step": 2411 }, { "epoch": 0.08277282086479067, "grad_norm": 0.8903199470418762, "learning_rate": 9.927310930758617e-06, "loss": 0.3547, "step": 2412 }, { "epoch": 0.08280713795470145, "grad_norm": 0.8282612558778918, "learning_rate": 9.927216483094504e-06, "loss": 0.3674, "step": 2413 }, { "epoch": 0.08284145504461221, "grad_norm": 0.8842495146031858, "learning_rate": 9.927121974560374e-06, "loss": 0.4182, "step": 2414 }, { "epoch": 0.08287577213452299, "grad_norm": 0.8965293277714135, "learning_rate": 9.9270274051574e-06, "loss": 0.4246, "step": 2415 }, { "epoch": 0.08291008922443377, "grad_norm": 0.8339886777014376, "learning_rate": 9.926932774886744e-06, "loss": 0.3764, "step": 2416 }, { "epoch": 0.08294440631434455, "grad_norm": 0.8793442211213867, "learning_rate": 9.926838083749576e-06, "loss": 0.4269, "step": 2417 }, { "epoch": 0.08297872340425531, "grad_norm": 0.8706328445434756, "learning_rate": 9.926743331747071e-06, "loss": 0.4149, "step": 2418 }, { "epoch": 0.0830130404941661, "grad_norm": 0.9012983343336646, "learning_rate": 9.926648518880395e-06, "loss": 0.3971, "step": 2419 }, { "epoch": 0.08304735758407687, "grad_norm": 0.8744034296741835, "learning_rate": 9.926553645150722e-06, "loss": 0.3602, "step": 2420 }, { "epoch": 0.08308167467398765, "grad_norm": 0.7869859141150978, "learning_rate": 9.926458710559221e-06, "loss": 0.3624, "step": 2421 }, { "epoch": 0.08311599176389842, "grad_norm": 0.8496570536531765, "learning_rate": 9.926363715107065e-06, "loss": 0.3411, "step": 2422 }, { "epoch": 0.0831503088538092, "grad_norm": 0.8994629354942776, "learning_rate": 9.926268658795432e-06, "loss": 0.3689, "step": 2423 }, { "epoch": 0.08318462594371998, "grad_norm": 0.8170207001644675, "learning_rate": 9.926173541625492e-06, "loss": 0.3987, "step": 2424 }, { "epoch": 0.08321894303363075, "grad_norm": 0.7453672332739549, "learning_rate": 9.92607836359842e-06, "loss": 0.33, "step": 2425 }, { "epoch": 0.08325326012354152, "grad_norm": 0.8717094960179528, "learning_rate": 9.925983124715394e-06, "loss": 0.4534, "step": 2426 }, { "epoch": 0.0832875772134523, "grad_norm": 0.8285759510354089, "learning_rate": 9.925887824977589e-06, "loss": 0.3995, "step": 2427 }, { "epoch": 0.08332189430336308, "grad_norm": 0.9010164206781746, "learning_rate": 9.925792464386183e-06, "loss": 0.3368, "step": 2428 }, { "epoch": 0.08335621139327384, "grad_norm": 1.45120184357687, "learning_rate": 9.925697042942355e-06, "loss": 0.4188, "step": 2429 }, { "epoch": 0.08339052848318462, "grad_norm": 0.9628282158064346, "learning_rate": 9.925601560647282e-06, "loss": 0.3826, "step": 2430 }, { "epoch": 0.0834248455730954, "grad_norm": 0.872566447759965, "learning_rate": 9.925506017502145e-06, "loss": 0.3802, "step": 2431 }, { "epoch": 0.08345916266300618, "grad_norm": 0.9227179300994797, "learning_rate": 9.925410413508124e-06, "loss": 0.4112, "step": 2432 }, { "epoch": 0.08349347975291695, "grad_norm": 0.884190197561078, "learning_rate": 9.925314748666399e-06, "loss": 0.3471, "step": 2433 }, { "epoch": 0.08352779684282773, "grad_norm": 0.8811217320255895, "learning_rate": 9.925219022978154e-06, "loss": 0.3606, "step": 2434 }, { "epoch": 0.0835621139327385, "grad_norm": 1.0658978783745474, "learning_rate": 9.925123236444569e-06, "loss": 0.4148, "step": 2435 }, { "epoch": 0.08359643102264928, "grad_norm": 0.8249303193401366, "learning_rate": 9.925027389066828e-06, "loss": 0.3544, "step": 2436 }, { "epoch": 0.08363074811256005, "grad_norm": 0.8222390592662029, "learning_rate": 9.924931480846117e-06, "loss": 0.3842, "step": 2437 }, { "epoch": 0.08366506520247083, "grad_norm": 0.8720742268339479, "learning_rate": 9.924835511783618e-06, "loss": 0.4213, "step": 2438 }, { "epoch": 0.08369938229238161, "grad_norm": 0.8588848983739445, "learning_rate": 9.924739481880518e-06, "loss": 0.4212, "step": 2439 }, { "epoch": 0.08373369938229239, "grad_norm": 0.8479080061677046, "learning_rate": 9.924643391138005e-06, "loss": 0.3992, "step": 2440 }, { "epoch": 0.08376801647220315, "grad_norm": 0.993973330276192, "learning_rate": 9.924547239557263e-06, "loss": 0.3581, "step": 2441 }, { "epoch": 0.08380233356211393, "grad_norm": 0.8968112494188826, "learning_rate": 9.924451027139483e-06, "loss": 0.3444, "step": 2442 }, { "epoch": 0.08383665065202471, "grad_norm": 0.8858826339714934, "learning_rate": 9.92435475388585e-06, "loss": 0.3493, "step": 2443 }, { "epoch": 0.08387096774193549, "grad_norm": 0.9978914696503121, "learning_rate": 9.924258419797556e-06, "loss": 0.3724, "step": 2444 }, { "epoch": 0.08390528483184626, "grad_norm": 0.819198500259465, "learning_rate": 9.924162024875792e-06, "loss": 0.3155, "step": 2445 }, { "epoch": 0.08393960192175703, "grad_norm": 0.8340419965791998, "learning_rate": 9.924065569121745e-06, "loss": 0.3516, "step": 2446 }, { "epoch": 0.08397391901166781, "grad_norm": 0.8349403855280066, "learning_rate": 9.92396905253661e-06, "loss": 0.3083, "step": 2447 }, { "epoch": 0.08400823610157859, "grad_norm": 0.8981467556155217, "learning_rate": 9.923872475121578e-06, "loss": 0.3939, "step": 2448 }, { "epoch": 0.08404255319148936, "grad_norm": 0.9639581367208658, "learning_rate": 9.923775836877842e-06, "loss": 0.3979, "step": 2449 }, { "epoch": 0.08407687028140014, "grad_norm": 0.8371760935691872, "learning_rate": 9.923679137806596e-06, "loss": 0.3563, "step": 2450 }, { "epoch": 0.08411118737131092, "grad_norm": 0.87513396473075, "learning_rate": 9.923582377909035e-06, "loss": 0.3591, "step": 2451 }, { "epoch": 0.08414550446122168, "grad_norm": 0.8272837458894541, "learning_rate": 9.923485557186355e-06, "loss": 0.3926, "step": 2452 }, { "epoch": 0.08417982155113246, "grad_norm": 0.7861249681828347, "learning_rate": 9.92338867563975e-06, "loss": 0.4026, "step": 2453 }, { "epoch": 0.08421413864104324, "grad_norm": 0.8413471498827266, "learning_rate": 9.923291733270418e-06, "loss": 0.3806, "step": 2454 }, { "epoch": 0.08424845573095402, "grad_norm": 0.8149328461307798, "learning_rate": 9.923194730079554e-06, "loss": 0.3944, "step": 2455 }, { "epoch": 0.08428277282086478, "grad_norm": 1.0015536317684561, "learning_rate": 9.923097666068363e-06, "loss": 0.3614, "step": 2456 }, { "epoch": 0.08431708991077556, "grad_norm": 0.8823572304984164, "learning_rate": 9.923000541238039e-06, "loss": 0.3847, "step": 2457 }, { "epoch": 0.08435140700068634, "grad_norm": 0.8111953820735306, "learning_rate": 9.922903355589782e-06, "loss": 0.3524, "step": 2458 }, { "epoch": 0.08438572409059712, "grad_norm": 0.98124852421129, "learning_rate": 9.922806109124793e-06, "loss": 0.3934, "step": 2459 }, { "epoch": 0.08442004118050789, "grad_norm": 0.8834638815235785, "learning_rate": 9.922708801844277e-06, "loss": 0.3878, "step": 2460 }, { "epoch": 0.08445435827041867, "grad_norm": 0.9745965676670062, "learning_rate": 9.922611433749432e-06, "loss": 0.4007, "step": 2461 }, { "epoch": 0.08448867536032945, "grad_norm": 0.8319368960819582, "learning_rate": 9.922514004841461e-06, "loss": 0.357, "step": 2462 }, { "epoch": 0.08452299245024023, "grad_norm": 0.8614500755527466, "learning_rate": 9.92241651512157e-06, "loss": 0.3574, "step": 2463 }, { "epoch": 0.08455730954015099, "grad_norm": 0.8170527821062686, "learning_rate": 9.92231896459096e-06, "loss": 0.3253, "step": 2464 }, { "epoch": 0.08459162663006177, "grad_norm": 0.8432161626459269, "learning_rate": 9.922221353250839e-06, "loss": 0.4274, "step": 2465 }, { "epoch": 0.08462594371997255, "grad_norm": 0.8635613357692609, "learning_rate": 9.92212368110241e-06, "loss": 0.3632, "step": 2466 }, { "epoch": 0.08466026080988333, "grad_norm": 0.8676142441213189, "learning_rate": 9.922025948146885e-06, "loss": 0.428, "step": 2467 }, { "epoch": 0.0846945778997941, "grad_norm": 0.8485798434499818, "learning_rate": 9.921928154385465e-06, "loss": 0.3395, "step": 2468 }, { "epoch": 0.08472889498970487, "grad_norm": 0.8283604865854768, "learning_rate": 9.921830299819364e-06, "loss": 0.3474, "step": 2469 }, { "epoch": 0.08476321207961565, "grad_norm": 0.9474664416616048, "learning_rate": 9.921732384449785e-06, "loss": 0.426, "step": 2470 }, { "epoch": 0.08479752916952643, "grad_norm": 1.1473454577498197, "learning_rate": 9.921634408277942e-06, "loss": 0.3788, "step": 2471 }, { "epoch": 0.0848318462594372, "grad_norm": 0.9861333130316822, "learning_rate": 9.921536371305044e-06, "loss": 0.406, "step": 2472 }, { "epoch": 0.08486616334934798, "grad_norm": 0.9016420520553077, "learning_rate": 9.921438273532302e-06, "loss": 0.3778, "step": 2473 }, { "epoch": 0.08490048043925875, "grad_norm": 0.8461829462035723, "learning_rate": 9.921340114960928e-06, "loss": 0.3528, "step": 2474 }, { "epoch": 0.08493479752916952, "grad_norm": 0.9014306507003178, "learning_rate": 9.921241895592134e-06, "loss": 0.3481, "step": 2475 }, { "epoch": 0.0849691146190803, "grad_norm": 0.823400253591975, "learning_rate": 9.921143615427136e-06, "loss": 0.364, "step": 2476 }, { "epoch": 0.08500343170899108, "grad_norm": 0.8893035238974699, "learning_rate": 9.921045274467144e-06, "loss": 0.4058, "step": 2477 }, { "epoch": 0.08503774879890186, "grad_norm": 0.8314459191901622, "learning_rate": 9.920946872713374e-06, "loss": 0.3746, "step": 2478 }, { "epoch": 0.08507206588881262, "grad_norm": 0.9066371277047741, "learning_rate": 9.920848410167046e-06, "loss": 0.3855, "step": 2479 }, { "epoch": 0.0851063829787234, "grad_norm": 0.8605135414544288, "learning_rate": 9.920749886829371e-06, "loss": 0.3647, "step": 2480 }, { "epoch": 0.08514070006863418, "grad_norm": 1.0634066112085414, "learning_rate": 9.920651302701568e-06, "loss": 0.3834, "step": 2481 }, { "epoch": 0.08517501715854496, "grad_norm": 0.8533251384652767, "learning_rate": 9.920552657784857e-06, "loss": 0.3691, "step": 2482 }, { "epoch": 0.08520933424845573, "grad_norm": 0.8045522851587744, "learning_rate": 9.920453952080453e-06, "loss": 0.3437, "step": 2483 }, { "epoch": 0.0852436513383665, "grad_norm": 1.0323298144737676, "learning_rate": 9.920355185589577e-06, "loss": 0.4644, "step": 2484 }, { "epoch": 0.08527796842827728, "grad_norm": 0.9358928345781939, "learning_rate": 9.92025635831345e-06, "loss": 0.3738, "step": 2485 }, { "epoch": 0.08531228551818806, "grad_norm": 0.9144740229073531, "learning_rate": 9.920157470253291e-06, "loss": 0.3702, "step": 2486 }, { "epoch": 0.08534660260809883, "grad_norm": 0.9176948183398671, "learning_rate": 9.920058521410325e-06, "loss": 0.4498, "step": 2487 }, { "epoch": 0.08538091969800961, "grad_norm": 0.843041202136998, "learning_rate": 9.919959511785771e-06, "loss": 0.3635, "step": 2488 }, { "epoch": 0.08541523678792039, "grad_norm": 0.9417086108719566, "learning_rate": 9.919860441380853e-06, "loss": 0.3902, "step": 2489 }, { "epoch": 0.08544955387783117, "grad_norm": 0.6923020164286913, "learning_rate": 9.919761310196794e-06, "loss": 0.3041, "step": 2490 }, { "epoch": 0.08548387096774193, "grad_norm": 0.8755819051951024, "learning_rate": 9.919662118234822e-06, "loss": 0.3871, "step": 2491 }, { "epoch": 0.08551818805765271, "grad_norm": 0.8255069910051159, "learning_rate": 9.91956286549616e-06, "loss": 0.3627, "step": 2492 }, { "epoch": 0.08555250514756349, "grad_norm": 0.864608718076526, "learning_rate": 9.919463551982033e-06, "loss": 0.3559, "step": 2493 }, { "epoch": 0.08558682223747426, "grad_norm": 0.9411860309740808, "learning_rate": 9.919364177693672e-06, "loss": 0.3955, "step": 2494 }, { "epoch": 0.08562113932738503, "grad_norm": 0.8705303647906968, "learning_rate": 9.9192647426323e-06, "loss": 0.3886, "step": 2495 }, { "epoch": 0.08565545641729581, "grad_norm": 0.9855807508818536, "learning_rate": 9.919165246799147e-06, "loss": 0.3852, "step": 2496 }, { "epoch": 0.08568977350720659, "grad_norm": 0.883686089298468, "learning_rate": 9.919065690195445e-06, "loss": 0.3952, "step": 2497 }, { "epoch": 0.08572409059711736, "grad_norm": 0.8360276098857078, "learning_rate": 9.918966072822417e-06, "loss": 0.346, "step": 2498 }, { "epoch": 0.08575840768702814, "grad_norm": 0.8778995011982066, "learning_rate": 9.918866394681303e-06, "loss": 0.3468, "step": 2499 }, { "epoch": 0.08579272477693892, "grad_norm": 0.8637219537137311, "learning_rate": 9.918766655773327e-06, "loss": 0.3663, "step": 2500 }, { "epoch": 0.0858270418668497, "grad_norm": 0.8297826478636404, "learning_rate": 9.918666856099724e-06, "loss": 0.3658, "step": 2501 }, { "epoch": 0.08586135895676046, "grad_norm": 0.7889040240178083, "learning_rate": 9.918566995661728e-06, "loss": 0.3806, "step": 2502 }, { "epoch": 0.08589567604667124, "grad_norm": 0.8519714900570102, "learning_rate": 9.918467074460569e-06, "loss": 0.3864, "step": 2503 }, { "epoch": 0.08592999313658202, "grad_norm": 0.7979168053689747, "learning_rate": 9.918367092497485e-06, "loss": 0.3357, "step": 2504 }, { "epoch": 0.0859643102264928, "grad_norm": 0.8844725961684861, "learning_rate": 9.91826704977371e-06, "loss": 0.3082, "step": 2505 }, { "epoch": 0.08599862731640356, "grad_norm": 0.9212647164538318, "learning_rate": 9.91816694629048e-06, "loss": 0.377, "step": 2506 }, { "epoch": 0.08603294440631434, "grad_norm": 0.8876630308269731, "learning_rate": 9.91806678204903e-06, "loss": 0.3346, "step": 2507 }, { "epoch": 0.08606726149622512, "grad_norm": 0.8546912460987768, "learning_rate": 9.9179665570506e-06, "loss": 0.3627, "step": 2508 }, { "epoch": 0.0861015785861359, "grad_norm": 0.9145884765040256, "learning_rate": 9.917866271296426e-06, "loss": 0.3549, "step": 2509 }, { "epoch": 0.08613589567604667, "grad_norm": 0.8557109505937306, "learning_rate": 9.917765924787749e-06, "loss": 0.3734, "step": 2510 }, { "epoch": 0.08617021276595745, "grad_norm": 0.8344475048798053, "learning_rate": 9.917665517525807e-06, "loss": 0.4112, "step": 2511 }, { "epoch": 0.08620452985586823, "grad_norm": 0.8799910340619186, "learning_rate": 9.917565049511841e-06, "loss": 0.3465, "step": 2512 }, { "epoch": 0.086238846945779, "grad_norm": 0.9363702498264262, "learning_rate": 9.917464520747092e-06, "loss": 0.374, "step": 2513 }, { "epoch": 0.08627316403568977, "grad_norm": 0.8470432764032957, "learning_rate": 9.917363931232803e-06, "loss": 0.3846, "step": 2514 }, { "epoch": 0.08630748112560055, "grad_norm": 0.8389748699379567, "learning_rate": 9.917263280970213e-06, "loss": 0.3284, "step": 2515 }, { "epoch": 0.08634179821551133, "grad_norm": 0.8424732822840084, "learning_rate": 9.91716256996057e-06, "loss": 0.3671, "step": 2516 }, { "epoch": 0.0863761153054221, "grad_norm": 0.8332217421973885, "learning_rate": 9.917061798205118e-06, "loss": 0.3179, "step": 2517 }, { "epoch": 0.08641043239533287, "grad_norm": 1.0767604116367109, "learning_rate": 9.916960965705098e-06, "loss": 0.3935, "step": 2518 }, { "epoch": 0.08644474948524365, "grad_norm": 0.8407538936164428, "learning_rate": 9.916860072461758e-06, "loss": 0.372, "step": 2519 }, { "epoch": 0.08647906657515443, "grad_norm": 0.8909941136243221, "learning_rate": 9.916759118476344e-06, "loss": 0.4149, "step": 2520 }, { "epoch": 0.0865133836650652, "grad_norm": 0.8730577474487456, "learning_rate": 9.916658103750106e-06, "loss": 0.4372, "step": 2521 }, { "epoch": 0.08654770075497598, "grad_norm": 1.015795603692107, "learning_rate": 9.916557028284287e-06, "loss": 0.331, "step": 2522 }, { "epoch": 0.08658201784488676, "grad_norm": 0.8504161320402872, "learning_rate": 9.916455892080139e-06, "loss": 0.3844, "step": 2523 }, { "epoch": 0.08661633493479753, "grad_norm": 0.8759165326874941, "learning_rate": 9.91635469513891e-06, "loss": 0.4215, "step": 2524 }, { "epoch": 0.0866506520247083, "grad_norm": 0.7936864487925535, "learning_rate": 9.916253437461851e-06, "loss": 0.3512, "step": 2525 }, { "epoch": 0.08668496911461908, "grad_norm": 0.8506549993534492, "learning_rate": 9.916152119050213e-06, "loss": 0.3858, "step": 2526 }, { "epoch": 0.08671928620452986, "grad_norm": 0.7447448164710901, "learning_rate": 9.916050739905246e-06, "loss": 0.293, "step": 2527 }, { "epoch": 0.08675360329444064, "grad_norm": 0.8115436430080761, "learning_rate": 9.915949300028204e-06, "loss": 0.4177, "step": 2528 }, { "epoch": 0.0867879203843514, "grad_norm": 0.8489478503660237, "learning_rate": 9.91584779942034e-06, "loss": 0.3758, "step": 2529 }, { "epoch": 0.08682223747426218, "grad_norm": 0.9542511601837451, "learning_rate": 9.915746238082908e-06, "loss": 0.3849, "step": 2530 }, { "epoch": 0.08685655456417296, "grad_norm": 0.8059692287104011, "learning_rate": 9.915644616017163e-06, "loss": 0.3691, "step": 2531 }, { "epoch": 0.08689087165408374, "grad_norm": 0.8462542551764214, "learning_rate": 9.915542933224359e-06, "loss": 0.3773, "step": 2532 }, { "epoch": 0.0869251887439945, "grad_norm": 0.8849636859197944, "learning_rate": 9.915441189705753e-06, "loss": 0.4166, "step": 2533 }, { "epoch": 0.08695950583390528, "grad_norm": 0.9331254611582918, "learning_rate": 9.915339385462602e-06, "loss": 0.413, "step": 2534 }, { "epoch": 0.08699382292381606, "grad_norm": 0.8838206273173601, "learning_rate": 9.915237520496163e-06, "loss": 0.3493, "step": 2535 }, { "epoch": 0.08702814001372683, "grad_norm": 0.8907581739078693, "learning_rate": 9.915135594807696e-06, "loss": 0.3604, "step": 2536 }, { "epoch": 0.08706245710363761, "grad_norm": 0.8627352424059573, "learning_rate": 9.91503360839846e-06, "loss": 0.3468, "step": 2537 }, { "epoch": 0.08709677419354839, "grad_norm": 0.872390173153754, "learning_rate": 9.914931561269713e-06, "loss": 0.355, "step": 2538 }, { "epoch": 0.08713109128345917, "grad_norm": 0.8742642323967676, "learning_rate": 9.914829453422718e-06, "loss": 0.3477, "step": 2539 }, { "epoch": 0.08716540837336993, "grad_norm": 0.8205134947320241, "learning_rate": 9.914727284858734e-06, "loss": 0.3741, "step": 2540 }, { "epoch": 0.08719972546328071, "grad_norm": 0.8726609849749153, "learning_rate": 9.914625055579024e-06, "loss": 0.3345, "step": 2541 }, { "epoch": 0.08723404255319149, "grad_norm": 0.8006973988933338, "learning_rate": 9.914522765584853e-06, "loss": 0.3234, "step": 2542 }, { "epoch": 0.08726835964310227, "grad_norm": 0.8141422689800988, "learning_rate": 9.914420414877482e-06, "loss": 0.364, "step": 2543 }, { "epoch": 0.08730267673301303, "grad_norm": 0.851828330586195, "learning_rate": 9.914318003458177e-06, "loss": 0.3939, "step": 2544 }, { "epoch": 0.08733699382292381, "grad_norm": 0.8950808530831365, "learning_rate": 9.914215531328201e-06, "loss": 0.433, "step": 2545 }, { "epoch": 0.0873713109128346, "grad_norm": 0.8451954462197216, "learning_rate": 9.914112998488823e-06, "loss": 0.3858, "step": 2546 }, { "epoch": 0.08740562800274537, "grad_norm": 0.924689010197254, "learning_rate": 9.914010404941308e-06, "loss": 0.3734, "step": 2547 }, { "epoch": 0.08743994509265614, "grad_norm": 0.828899414311597, "learning_rate": 9.913907750686923e-06, "loss": 0.3513, "step": 2548 }, { "epoch": 0.08747426218256692, "grad_norm": 0.8089029686888152, "learning_rate": 9.913805035726939e-06, "loss": 0.3745, "step": 2549 }, { "epoch": 0.0875085792724777, "grad_norm": 0.8614033782414092, "learning_rate": 9.913702260062618e-06, "loss": 0.3482, "step": 2550 }, { "epoch": 0.08754289636238848, "grad_norm": 0.8569187922620773, "learning_rate": 9.913599423695238e-06, "loss": 0.4196, "step": 2551 }, { "epoch": 0.08757721345229924, "grad_norm": 0.9599205106753235, "learning_rate": 9.913496526626065e-06, "loss": 0.4086, "step": 2552 }, { "epoch": 0.08761153054221002, "grad_norm": 0.9351831189571462, "learning_rate": 9.913393568856372e-06, "loss": 0.4038, "step": 2553 }, { "epoch": 0.0876458476321208, "grad_norm": 0.8632014194172218, "learning_rate": 9.913290550387428e-06, "loss": 0.3193, "step": 2554 }, { "epoch": 0.08768016472203158, "grad_norm": 0.8632027648478783, "learning_rate": 9.913187471220508e-06, "loss": 0.3769, "step": 2555 }, { "epoch": 0.08771448181194234, "grad_norm": 0.9429622690432738, "learning_rate": 9.913084331356886e-06, "loss": 0.3927, "step": 2556 }, { "epoch": 0.08774879890185312, "grad_norm": 0.9529062856867098, "learning_rate": 9.912981130797836e-06, "loss": 0.3793, "step": 2557 }, { "epoch": 0.0877831159917639, "grad_norm": 1.0590944636679949, "learning_rate": 9.91287786954463e-06, "loss": 0.4018, "step": 2558 }, { "epoch": 0.08781743308167467, "grad_norm": 0.9624296475305535, "learning_rate": 9.912774547598547e-06, "loss": 0.3553, "step": 2559 }, { "epoch": 0.08785175017158545, "grad_norm": 0.8398597189432524, "learning_rate": 9.912671164960862e-06, "loss": 0.3861, "step": 2560 }, { "epoch": 0.08788606726149623, "grad_norm": 0.8908332778140763, "learning_rate": 9.912567721632854e-06, "loss": 0.4369, "step": 2561 }, { "epoch": 0.087920384351407, "grad_norm": 0.877917281991138, "learning_rate": 9.912464217615797e-06, "loss": 0.342, "step": 2562 }, { "epoch": 0.08795470144131777, "grad_norm": 0.8027829023688695, "learning_rate": 9.912360652910972e-06, "loss": 0.3843, "step": 2563 }, { "epoch": 0.08798901853122855, "grad_norm": 0.8349275957046153, "learning_rate": 9.912257027519659e-06, "loss": 0.4048, "step": 2564 }, { "epoch": 0.08802333562113933, "grad_norm": 0.8653207693811705, "learning_rate": 9.912153341443138e-06, "loss": 0.3168, "step": 2565 }, { "epoch": 0.08805765271105011, "grad_norm": 0.9049870934649233, "learning_rate": 9.91204959468269e-06, "loss": 0.3777, "step": 2566 }, { "epoch": 0.08809196980096087, "grad_norm": 0.8929967967171314, "learning_rate": 9.911945787239594e-06, "loss": 0.3832, "step": 2567 }, { "epoch": 0.08812628689087165, "grad_norm": 0.8734644510899631, "learning_rate": 9.911841919115135e-06, "loss": 0.3937, "step": 2568 }, { "epoch": 0.08816060398078243, "grad_norm": 0.843797462451251, "learning_rate": 9.911737990310596e-06, "loss": 0.4029, "step": 2569 }, { "epoch": 0.08819492107069321, "grad_norm": 0.8013353590836788, "learning_rate": 9.911634000827263e-06, "loss": 0.32, "step": 2570 }, { "epoch": 0.08822923816060398, "grad_norm": 0.9232260705943511, "learning_rate": 9.911529950666416e-06, "loss": 0.3907, "step": 2571 }, { "epoch": 0.08826355525051476, "grad_norm": 0.7772659374400311, "learning_rate": 9.911425839829343e-06, "loss": 0.3996, "step": 2572 }, { "epoch": 0.08829787234042553, "grad_norm": 0.8059478162758557, "learning_rate": 9.91132166831733e-06, "loss": 0.3684, "step": 2573 }, { "epoch": 0.08833218943033631, "grad_norm": 0.8203126235045234, "learning_rate": 9.911217436131663e-06, "loss": 0.3394, "step": 2574 }, { "epoch": 0.08836650652024708, "grad_norm": 0.7566056360897875, "learning_rate": 9.911113143273634e-06, "loss": 0.3099, "step": 2575 }, { "epoch": 0.08840082361015786, "grad_norm": 0.9218743144429522, "learning_rate": 9.911008789744525e-06, "loss": 0.3882, "step": 2576 }, { "epoch": 0.08843514070006864, "grad_norm": 0.7598808949267828, "learning_rate": 9.910904375545628e-06, "loss": 0.4052, "step": 2577 }, { "epoch": 0.08846945778997942, "grad_norm": 0.8641015049563251, "learning_rate": 9.910799900678235e-06, "loss": 0.3676, "step": 2578 }, { "epoch": 0.08850377487989018, "grad_norm": 0.9014328657418106, "learning_rate": 9.910695365143633e-06, "loss": 0.347, "step": 2579 }, { "epoch": 0.08853809196980096, "grad_norm": 0.8316285639925022, "learning_rate": 9.910590768943114e-06, "loss": 0.376, "step": 2580 }, { "epoch": 0.08857240905971174, "grad_norm": 0.8260674905113876, "learning_rate": 9.910486112077973e-06, "loss": 0.3681, "step": 2581 }, { "epoch": 0.0886067261496225, "grad_norm": 0.9559891649042203, "learning_rate": 9.910381394549499e-06, "loss": 0.3439, "step": 2582 }, { "epoch": 0.08864104323953328, "grad_norm": 0.899010848772272, "learning_rate": 9.910276616358991e-06, "loss": 0.4391, "step": 2583 }, { "epoch": 0.08867536032944406, "grad_norm": 0.8439712483387362, "learning_rate": 9.910171777507737e-06, "loss": 0.3704, "step": 2584 }, { "epoch": 0.08870967741935484, "grad_norm": 0.9244738898030975, "learning_rate": 9.910066877997038e-06, "loss": 0.403, "step": 2585 }, { "epoch": 0.08874399450926561, "grad_norm": 0.769285481930372, "learning_rate": 9.909961917828186e-06, "loss": 0.3186, "step": 2586 }, { "epoch": 0.08877831159917639, "grad_norm": 1.0408378134651297, "learning_rate": 9.90985689700248e-06, "loss": 0.3759, "step": 2587 }, { "epoch": 0.08881262868908717, "grad_norm": 0.8121112591732703, "learning_rate": 9.909751815521216e-06, "loss": 0.337, "step": 2588 }, { "epoch": 0.08884694577899795, "grad_norm": 0.9422282104226497, "learning_rate": 9.909646673385692e-06, "loss": 0.4115, "step": 2589 }, { "epoch": 0.08888126286890871, "grad_norm": 0.9196701429190222, "learning_rate": 9.909541470597209e-06, "loss": 0.3909, "step": 2590 }, { "epoch": 0.08891557995881949, "grad_norm": 0.8929441231703755, "learning_rate": 9.909436207157063e-06, "loss": 0.3904, "step": 2591 }, { "epoch": 0.08894989704873027, "grad_norm": 0.8747952953601389, "learning_rate": 9.909330883066556e-06, "loss": 0.4422, "step": 2592 }, { "epoch": 0.08898421413864105, "grad_norm": 0.9295679097985181, "learning_rate": 9.909225498326993e-06, "loss": 0.383, "step": 2593 }, { "epoch": 0.08901853122855181, "grad_norm": 0.8362203797767725, "learning_rate": 9.909120052939669e-06, "loss": 0.3866, "step": 2594 }, { "epoch": 0.0890528483184626, "grad_norm": 0.8425834403886084, "learning_rate": 9.909014546905894e-06, "loss": 0.385, "step": 2595 }, { "epoch": 0.08908716540837337, "grad_norm": 1.2677967723777441, "learning_rate": 9.908908980226963e-06, "loss": 0.3958, "step": 2596 }, { "epoch": 0.08912148249828415, "grad_norm": 0.8774297198273919, "learning_rate": 9.908803352904188e-06, "loss": 0.3367, "step": 2597 }, { "epoch": 0.08915579958819492, "grad_norm": 0.8233208233404244, "learning_rate": 9.908697664938869e-06, "loss": 0.3775, "step": 2598 }, { "epoch": 0.0891901166781057, "grad_norm": 0.7843561371598, "learning_rate": 9.908591916332315e-06, "loss": 0.3432, "step": 2599 }, { "epoch": 0.08922443376801648, "grad_norm": 0.7852102427071943, "learning_rate": 9.908486107085829e-06, "loss": 0.3667, "step": 2600 }, { "epoch": 0.08925875085792724, "grad_norm": 0.9715550424941911, "learning_rate": 9.908380237200721e-06, "loss": 0.3737, "step": 2601 }, { "epoch": 0.08929306794783802, "grad_norm": 0.8040199276300045, "learning_rate": 9.908274306678297e-06, "loss": 0.3516, "step": 2602 }, { "epoch": 0.0893273850377488, "grad_norm": 0.7791569487606664, "learning_rate": 9.908168315519868e-06, "loss": 0.3517, "step": 2603 }, { "epoch": 0.08936170212765958, "grad_norm": 1.0938400687486092, "learning_rate": 9.90806226372674e-06, "loss": 0.3707, "step": 2604 }, { "epoch": 0.08939601921757034, "grad_norm": 0.9148820186990398, "learning_rate": 9.907956151300227e-06, "loss": 0.3944, "step": 2605 }, { "epoch": 0.08943033630748112, "grad_norm": 0.8571741344664207, "learning_rate": 9.907849978241637e-06, "loss": 0.3359, "step": 2606 }, { "epoch": 0.0894646533973919, "grad_norm": 0.828639571978262, "learning_rate": 9.907743744552284e-06, "loss": 0.3148, "step": 2607 }, { "epoch": 0.08949897048730268, "grad_norm": 1.0394898531391918, "learning_rate": 9.907637450233476e-06, "loss": 0.3777, "step": 2608 }, { "epoch": 0.08953328757721345, "grad_norm": 0.8798287491183256, "learning_rate": 9.907531095286529e-06, "loss": 0.4078, "step": 2609 }, { "epoch": 0.08956760466712423, "grad_norm": 0.8786403630874596, "learning_rate": 9.907424679712758e-06, "loss": 0.3932, "step": 2610 }, { "epoch": 0.089601921757035, "grad_norm": 0.9567237371321602, "learning_rate": 9.907318203513477e-06, "loss": 0.3577, "step": 2611 }, { "epoch": 0.08963623884694578, "grad_norm": 0.8015265155759254, "learning_rate": 9.907211666690001e-06, "loss": 0.3386, "step": 2612 }, { "epoch": 0.08967055593685655, "grad_norm": 0.8370872791140679, "learning_rate": 9.907105069243645e-06, "loss": 0.3802, "step": 2613 }, { "epoch": 0.08970487302676733, "grad_norm": 0.8396223573687605, "learning_rate": 9.906998411175728e-06, "loss": 0.3556, "step": 2614 }, { "epoch": 0.08973919011667811, "grad_norm": 0.949830594487073, "learning_rate": 9.906891692487566e-06, "loss": 0.378, "step": 2615 }, { "epoch": 0.08977350720658889, "grad_norm": 0.7893438136889459, "learning_rate": 9.90678491318048e-06, "loss": 0.3766, "step": 2616 }, { "epoch": 0.08980782429649965, "grad_norm": 0.8387774831176559, "learning_rate": 9.906678073255786e-06, "loss": 0.3908, "step": 2617 }, { "epoch": 0.08984214138641043, "grad_norm": 0.905876705751349, "learning_rate": 9.906571172714804e-06, "loss": 0.3807, "step": 2618 }, { "epoch": 0.08987645847632121, "grad_norm": 0.9207121915833111, "learning_rate": 9.906464211558856e-06, "loss": 0.4196, "step": 2619 }, { "epoch": 0.08991077556623199, "grad_norm": 0.9295422838751115, "learning_rate": 9.906357189789265e-06, "loss": 0.4458, "step": 2620 }, { "epoch": 0.08994509265614276, "grad_norm": 0.9304937279089236, "learning_rate": 9.906250107407347e-06, "loss": 0.3748, "step": 2621 }, { "epoch": 0.08997940974605353, "grad_norm": 0.8219371511323065, "learning_rate": 9.906142964414434e-06, "loss": 0.3631, "step": 2622 }, { "epoch": 0.09001372683596431, "grad_norm": 0.8960163859652633, "learning_rate": 9.906035760811841e-06, "loss": 0.3863, "step": 2623 }, { "epoch": 0.09004804392587508, "grad_norm": 0.7641350591266384, "learning_rate": 9.905928496600897e-06, "loss": 0.3337, "step": 2624 }, { "epoch": 0.09008236101578586, "grad_norm": 0.9021843319392795, "learning_rate": 9.905821171782929e-06, "loss": 0.338, "step": 2625 }, { "epoch": 0.09011667810569664, "grad_norm": 0.7675441848286718, "learning_rate": 9.905713786359255e-06, "loss": 0.4047, "step": 2626 }, { "epoch": 0.09015099519560742, "grad_norm": 0.85638100540276, "learning_rate": 9.90560634033121e-06, "loss": 0.3579, "step": 2627 }, { "epoch": 0.09018531228551818, "grad_norm": 0.7720257779265943, "learning_rate": 9.905498833700118e-06, "loss": 0.3426, "step": 2628 }, { "epoch": 0.09021962937542896, "grad_norm": 0.8873177776334705, "learning_rate": 9.905391266467306e-06, "loss": 0.3347, "step": 2629 }, { "epoch": 0.09025394646533974, "grad_norm": 0.9340692159507287, "learning_rate": 9.905283638634104e-06, "loss": 0.3946, "step": 2630 }, { "epoch": 0.09028826355525052, "grad_norm": 0.8262875860191283, "learning_rate": 9.905175950201841e-06, "loss": 0.3931, "step": 2631 }, { "epoch": 0.09032258064516129, "grad_norm": 1.000775969742467, "learning_rate": 9.90506820117185e-06, "loss": 0.4335, "step": 2632 }, { "epoch": 0.09035689773507206, "grad_norm": 0.8925406110238849, "learning_rate": 9.904960391545459e-06, "loss": 0.3813, "step": 2633 }, { "epoch": 0.09039121482498284, "grad_norm": 0.9846135570003826, "learning_rate": 9.904852521324003e-06, "loss": 0.391, "step": 2634 }, { "epoch": 0.09042553191489362, "grad_norm": 0.8445879439687517, "learning_rate": 9.90474459050881e-06, "loss": 0.3579, "step": 2635 }, { "epoch": 0.09045984900480439, "grad_norm": 0.851616477595903, "learning_rate": 9.904636599101217e-06, "loss": 0.3713, "step": 2636 }, { "epoch": 0.09049416609471517, "grad_norm": 0.9134814592007027, "learning_rate": 9.904528547102556e-06, "loss": 0.4131, "step": 2637 }, { "epoch": 0.09052848318462595, "grad_norm": 0.935298116565896, "learning_rate": 9.904420434514164e-06, "loss": 0.3683, "step": 2638 }, { "epoch": 0.09056280027453673, "grad_norm": 0.8099853964168904, "learning_rate": 9.904312261337376e-06, "loss": 0.3284, "step": 2639 }, { "epoch": 0.09059711736444749, "grad_norm": 0.7184090661459948, "learning_rate": 9.904204027573528e-06, "loss": 0.2973, "step": 2640 }, { "epoch": 0.09063143445435827, "grad_norm": 0.8177505818307111, "learning_rate": 9.904095733223956e-06, "loss": 0.348, "step": 2641 }, { "epoch": 0.09066575154426905, "grad_norm": 0.9110193339557767, "learning_rate": 9.903987378289998e-06, "loss": 0.3448, "step": 2642 }, { "epoch": 0.09070006863417983, "grad_norm": 0.854956047286799, "learning_rate": 9.903878962772995e-06, "loss": 0.4046, "step": 2643 }, { "epoch": 0.0907343857240906, "grad_norm": 0.7470915696458098, "learning_rate": 9.903770486674284e-06, "loss": 0.3243, "step": 2644 }, { "epoch": 0.09076870281400137, "grad_norm": 0.841860871204433, "learning_rate": 9.903661949995207e-06, "loss": 0.3873, "step": 2645 }, { "epoch": 0.09080301990391215, "grad_norm": 0.7312684488863073, "learning_rate": 9.903553352737104e-06, "loss": 0.3407, "step": 2646 }, { "epoch": 0.09083733699382292, "grad_norm": 0.9045193314703711, "learning_rate": 9.903444694901315e-06, "loss": 0.3724, "step": 2647 }, { "epoch": 0.0908716540837337, "grad_norm": 0.8880514649749812, "learning_rate": 9.903335976489183e-06, "loss": 0.3513, "step": 2648 }, { "epoch": 0.09090597117364448, "grad_norm": 0.7991772226931614, "learning_rate": 9.903227197502051e-06, "loss": 0.3743, "step": 2649 }, { "epoch": 0.09094028826355526, "grad_norm": 0.8261934642083321, "learning_rate": 9.903118357941266e-06, "loss": 0.4456, "step": 2650 }, { "epoch": 0.09097460535346602, "grad_norm": 0.8210897648725346, "learning_rate": 9.90300945780817e-06, "loss": 0.3378, "step": 2651 }, { "epoch": 0.0910089224433768, "grad_norm": 0.8595239852350113, "learning_rate": 9.902900497104109e-06, "loss": 0.3662, "step": 2652 }, { "epoch": 0.09104323953328758, "grad_norm": 0.8415896986207174, "learning_rate": 9.902791475830426e-06, "loss": 0.3736, "step": 2653 }, { "epoch": 0.09107755662319836, "grad_norm": 0.8447181876925958, "learning_rate": 9.902682393988471e-06, "loss": 0.3968, "step": 2654 }, { "epoch": 0.09111187371310912, "grad_norm": 0.847291271999308, "learning_rate": 9.902573251579594e-06, "loss": 0.4319, "step": 2655 }, { "epoch": 0.0911461908030199, "grad_norm": 0.8900997816785214, "learning_rate": 9.902464048605138e-06, "loss": 0.3925, "step": 2656 }, { "epoch": 0.09118050789293068, "grad_norm": 0.7394433974495679, "learning_rate": 9.902354785066454e-06, "loss": 0.2811, "step": 2657 }, { "epoch": 0.09121482498284146, "grad_norm": 0.7804777431515774, "learning_rate": 9.902245460964892e-06, "loss": 0.3528, "step": 2658 }, { "epoch": 0.09124914207275223, "grad_norm": 0.9590724689017847, "learning_rate": 9.902136076301805e-06, "loss": 0.3551, "step": 2659 }, { "epoch": 0.091283459162663, "grad_norm": 0.7631104795893033, "learning_rate": 9.90202663107854e-06, "loss": 0.4436, "step": 2660 }, { "epoch": 0.09131777625257378, "grad_norm": 0.8283753490004251, "learning_rate": 9.901917125296452e-06, "loss": 0.4174, "step": 2661 }, { "epoch": 0.09135209334248456, "grad_norm": 0.81547410808606, "learning_rate": 9.901807558956893e-06, "loss": 0.3775, "step": 2662 }, { "epoch": 0.09138641043239533, "grad_norm": 0.8868334237514649, "learning_rate": 9.901697932061217e-06, "loss": 0.3785, "step": 2663 }, { "epoch": 0.09142072752230611, "grad_norm": 0.812885448501182, "learning_rate": 9.901588244610776e-06, "loss": 0.4008, "step": 2664 }, { "epoch": 0.09145504461221689, "grad_norm": 0.8037061770700561, "learning_rate": 9.90147849660693e-06, "loss": 0.3982, "step": 2665 }, { "epoch": 0.09148936170212765, "grad_norm": 0.7654860260603868, "learning_rate": 9.90136868805103e-06, "loss": 0.3584, "step": 2666 }, { "epoch": 0.09152367879203843, "grad_norm": 0.9036719287819398, "learning_rate": 9.901258818944433e-06, "loss": 0.3815, "step": 2667 }, { "epoch": 0.09155799588194921, "grad_norm": 0.8910775087355483, "learning_rate": 9.9011488892885e-06, "loss": 0.374, "step": 2668 }, { "epoch": 0.09159231297185999, "grad_norm": 0.8410814036109414, "learning_rate": 9.901038899084585e-06, "loss": 0.337, "step": 2669 }, { "epoch": 0.09162663006177076, "grad_norm": 0.9341818598964029, "learning_rate": 9.90092884833405e-06, "loss": 0.3635, "step": 2670 }, { "epoch": 0.09166094715168153, "grad_norm": 0.9930622930260318, "learning_rate": 9.900818737038251e-06, "loss": 0.3775, "step": 2671 }, { "epoch": 0.09169526424159231, "grad_norm": 0.8095208383883523, "learning_rate": 9.900708565198552e-06, "loss": 0.3171, "step": 2672 }, { "epoch": 0.0917295813315031, "grad_norm": 0.8010349542252908, "learning_rate": 9.900598332816312e-06, "loss": 0.3356, "step": 2673 }, { "epoch": 0.09176389842141386, "grad_norm": 0.7501137873587865, "learning_rate": 9.900488039892892e-06, "loss": 0.2968, "step": 2674 }, { "epoch": 0.09179821551132464, "grad_norm": 1.2180101989624232, "learning_rate": 9.900377686429656e-06, "loss": 0.3973, "step": 2675 }, { "epoch": 0.09183253260123542, "grad_norm": 0.8585013719508764, "learning_rate": 9.900267272427968e-06, "loss": 0.3905, "step": 2676 }, { "epoch": 0.0918668496911462, "grad_norm": 0.8394865284264473, "learning_rate": 9.900156797889192e-06, "loss": 0.3847, "step": 2677 }, { "epoch": 0.09190116678105696, "grad_norm": 0.7717190716634504, "learning_rate": 9.900046262814689e-06, "loss": 0.3082, "step": 2678 }, { "epoch": 0.09193548387096774, "grad_norm": 0.835961968362429, "learning_rate": 9.899935667205829e-06, "loss": 0.3254, "step": 2679 }, { "epoch": 0.09196980096087852, "grad_norm": 0.864775014514809, "learning_rate": 9.899825011063976e-06, "loss": 0.3644, "step": 2680 }, { "epoch": 0.0920041180507893, "grad_norm": 0.8732484774618181, "learning_rate": 9.899714294390498e-06, "loss": 0.3931, "step": 2681 }, { "epoch": 0.09203843514070006, "grad_norm": 0.8311117181699542, "learning_rate": 9.899603517186761e-06, "loss": 0.3743, "step": 2682 }, { "epoch": 0.09207275223061084, "grad_norm": 0.8136754593338154, "learning_rate": 9.899492679454137e-06, "loss": 0.3476, "step": 2683 }, { "epoch": 0.09210706932052162, "grad_norm": 0.8844928227377996, "learning_rate": 9.899381781193993e-06, "loss": 0.3792, "step": 2684 }, { "epoch": 0.0921413864104324, "grad_norm": 0.8365464167986699, "learning_rate": 9.899270822407699e-06, "loss": 0.4462, "step": 2685 }, { "epoch": 0.09217570350034317, "grad_norm": 0.8480338322010014, "learning_rate": 9.899159803096624e-06, "loss": 0.3338, "step": 2686 }, { "epoch": 0.09221002059025395, "grad_norm": 0.7512263081003525, "learning_rate": 9.899048723262144e-06, "loss": 0.3299, "step": 2687 }, { "epoch": 0.09224433768016473, "grad_norm": 0.8994034062204361, "learning_rate": 9.898937582905629e-06, "loss": 0.448, "step": 2688 }, { "epoch": 0.09227865477007549, "grad_norm": 0.8465293913279555, "learning_rate": 9.89882638202845e-06, "loss": 0.3575, "step": 2689 }, { "epoch": 0.09231297185998627, "grad_norm": 0.8157678499994027, "learning_rate": 9.898715120631983e-06, "loss": 0.3608, "step": 2690 }, { "epoch": 0.09234728894989705, "grad_norm": 0.897381867449327, "learning_rate": 9.898603798717602e-06, "loss": 0.323, "step": 2691 }, { "epoch": 0.09238160603980783, "grad_norm": 0.9016121417279745, "learning_rate": 9.898492416286683e-06, "loss": 0.4124, "step": 2692 }, { "epoch": 0.0924159231297186, "grad_norm": 0.7962044642379498, "learning_rate": 9.8983809733406e-06, "loss": 0.36, "step": 2693 }, { "epoch": 0.09245024021962937, "grad_norm": 0.8757632799128444, "learning_rate": 9.898269469880733e-06, "loss": 0.378, "step": 2694 }, { "epoch": 0.09248455730954015, "grad_norm": 0.9114311899326614, "learning_rate": 9.898157905908456e-06, "loss": 0.4552, "step": 2695 }, { "epoch": 0.09251887439945093, "grad_norm": 0.8840153890794201, "learning_rate": 9.898046281425148e-06, "loss": 0.3761, "step": 2696 }, { "epoch": 0.0925531914893617, "grad_norm": 0.8144398599924665, "learning_rate": 9.89793459643219e-06, "loss": 0.3838, "step": 2697 }, { "epoch": 0.09258750857927248, "grad_norm": 0.8571705733777177, "learning_rate": 9.897822850930961e-06, "loss": 0.3687, "step": 2698 }, { "epoch": 0.09262182566918326, "grad_norm": 0.8381663557342602, "learning_rate": 9.89771104492284e-06, "loss": 0.3688, "step": 2699 }, { "epoch": 0.09265614275909403, "grad_norm": 0.852828046455236, "learning_rate": 9.89759917840921e-06, "loss": 0.3637, "step": 2700 }, { "epoch": 0.0926904598490048, "grad_norm": 0.9193570613888421, "learning_rate": 9.897487251391451e-06, "loss": 0.41, "step": 2701 }, { "epoch": 0.09272477693891558, "grad_norm": 0.8601326142381949, "learning_rate": 9.897375263870947e-06, "loss": 0.3722, "step": 2702 }, { "epoch": 0.09275909402882636, "grad_norm": 0.8465691552946754, "learning_rate": 9.897263215849084e-06, "loss": 0.3966, "step": 2703 }, { "epoch": 0.09279341111873714, "grad_norm": 0.8315345607863638, "learning_rate": 9.897151107327242e-06, "loss": 0.3827, "step": 2704 }, { "epoch": 0.0928277282086479, "grad_norm": 0.8816071277258754, "learning_rate": 9.897038938306806e-06, "loss": 0.3904, "step": 2705 }, { "epoch": 0.09286204529855868, "grad_norm": 0.8455876670365345, "learning_rate": 9.896926708789165e-06, "loss": 0.3359, "step": 2706 }, { "epoch": 0.09289636238846946, "grad_norm": 0.7873023988441726, "learning_rate": 9.896814418775703e-06, "loss": 0.3466, "step": 2707 }, { "epoch": 0.09293067947838023, "grad_norm": 0.8293672059208017, "learning_rate": 9.896702068267808e-06, "loss": 0.3803, "step": 2708 }, { "epoch": 0.092964996568291, "grad_norm": 0.8543572343495843, "learning_rate": 9.896589657266868e-06, "loss": 0.3706, "step": 2709 }, { "epoch": 0.09299931365820178, "grad_norm": 1.006483282612912, "learning_rate": 9.896477185774273e-06, "loss": 0.408, "step": 2710 }, { "epoch": 0.09303363074811256, "grad_norm": 0.8361628255597192, "learning_rate": 9.89636465379141e-06, "loss": 0.309, "step": 2711 }, { "epoch": 0.09306794783802333, "grad_norm": 0.7608765708093078, "learning_rate": 9.89625206131967e-06, "loss": 0.341, "step": 2712 }, { "epoch": 0.09310226492793411, "grad_norm": 0.8839440040792864, "learning_rate": 9.896139408360445e-06, "loss": 0.4331, "step": 2713 }, { "epoch": 0.09313658201784489, "grad_norm": 0.8780576236622781, "learning_rate": 9.896026694915124e-06, "loss": 0.3769, "step": 2714 }, { "epoch": 0.09317089910775567, "grad_norm": 0.8824640566505149, "learning_rate": 9.895913920985105e-06, "loss": 0.3412, "step": 2715 }, { "epoch": 0.09320521619766643, "grad_norm": 0.8874737869464411, "learning_rate": 9.895801086571775e-06, "loss": 0.3337, "step": 2716 }, { "epoch": 0.09323953328757721, "grad_norm": 0.8974907269327448, "learning_rate": 9.895688191676532e-06, "loss": 0.3421, "step": 2717 }, { "epoch": 0.09327385037748799, "grad_norm": 0.8607901470504523, "learning_rate": 9.895575236300768e-06, "loss": 0.4085, "step": 2718 }, { "epoch": 0.09330816746739877, "grad_norm": 0.9053122163269336, "learning_rate": 9.895462220445879e-06, "loss": 0.3804, "step": 2719 }, { "epoch": 0.09334248455730954, "grad_norm": 0.8396118909523185, "learning_rate": 9.895349144113263e-06, "loss": 0.3594, "step": 2720 }, { "epoch": 0.09337680164722031, "grad_norm": 0.8641250617663074, "learning_rate": 9.895236007304316e-06, "loss": 0.3828, "step": 2721 }, { "epoch": 0.0934111187371311, "grad_norm": 0.9793986403980364, "learning_rate": 9.895122810020435e-06, "loss": 0.3405, "step": 2722 }, { "epoch": 0.09344543582704187, "grad_norm": 0.7591513747571591, "learning_rate": 9.895009552263019e-06, "loss": 0.3858, "step": 2723 }, { "epoch": 0.09347975291695264, "grad_norm": 0.9101468079398524, "learning_rate": 9.894896234033467e-06, "loss": 0.3765, "step": 2724 }, { "epoch": 0.09351407000686342, "grad_norm": 0.8401712264395551, "learning_rate": 9.894782855333179e-06, "loss": 0.351, "step": 2725 }, { "epoch": 0.0935483870967742, "grad_norm": 0.9584398062046995, "learning_rate": 9.894669416163554e-06, "loss": 0.3689, "step": 2726 }, { "epoch": 0.09358270418668498, "grad_norm": 0.8153632100243808, "learning_rate": 9.894555916525997e-06, "loss": 0.3586, "step": 2727 }, { "epoch": 0.09361702127659574, "grad_norm": 0.926216695009488, "learning_rate": 9.894442356421908e-06, "loss": 0.4292, "step": 2728 }, { "epoch": 0.09365133836650652, "grad_norm": 0.7847032426491187, "learning_rate": 9.894328735852688e-06, "loss": 0.3892, "step": 2729 }, { "epoch": 0.0936856554564173, "grad_norm": 0.7786776496837422, "learning_rate": 9.894215054819744e-06, "loss": 0.3978, "step": 2730 }, { "epoch": 0.09371997254632806, "grad_norm": 0.8501226361971653, "learning_rate": 9.89410131332448e-06, "loss": 0.3644, "step": 2731 }, { "epoch": 0.09375428963623884, "grad_norm": 0.7868825808651401, "learning_rate": 9.8939875113683e-06, "loss": 0.3387, "step": 2732 }, { "epoch": 0.09378860672614962, "grad_norm": 0.8626212339156909, "learning_rate": 9.89387364895261e-06, "loss": 0.3323, "step": 2733 }, { "epoch": 0.0938229238160604, "grad_norm": 0.8718915710205785, "learning_rate": 9.893759726078815e-06, "loss": 0.4148, "step": 2734 }, { "epoch": 0.09385724090597117, "grad_norm": 0.8920976733324959, "learning_rate": 9.893645742748325e-06, "loss": 0.3823, "step": 2735 }, { "epoch": 0.09389155799588195, "grad_norm": 0.8542454281870395, "learning_rate": 9.893531698962548e-06, "loss": 0.3594, "step": 2736 }, { "epoch": 0.09392587508579273, "grad_norm": 0.8807697216874513, "learning_rate": 9.893417594722894e-06, "loss": 0.4577, "step": 2737 }, { "epoch": 0.0939601921757035, "grad_norm": 0.8056523313073509, "learning_rate": 9.893303430030769e-06, "loss": 0.3518, "step": 2738 }, { "epoch": 0.09399450926561427, "grad_norm": 0.9133416347646673, "learning_rate": 9.893189204887584e-06, "loss": 0.3744, "step": 2739 }, { "epoch": 0.09402882635552505, "grad_norm": 0.9971183232428137, "learning_rate": 9.893074919294752e-06, "loss": 0.3843, "step": 2740 }, { "epoch": 0.09406314344543583, "grad_norm": 0.8643525103271978, "learning_rate": 9.892960573253685e-06, "loss": 0.3715, "step": 2741 }, { "epoch": 0.09409746053534661, "grad_norm": 0.8735327710535621, "learning_rate": 9.892846166765795e-06, "loss": 0.3439, "step": 2742 }, { "epoch": 0.09413177762525737, "grad_norm": 0.8444282690014393, "learning_rate": 9.892731699832495e-06, "loss": 0.4487, "step": 2743 }, { "epoch": 0.09416609471516815, "grad_norm": 0.8925197270060041, "learning_rate": 9.892617172455197e-06, "loss": 0.4365, "step": 2744 }, { "epoch": 0.09420041180507893, "grad_norm": 0.8218144742458613, "learning_rate": 9.89250258463532e-06, "loss": 0.3839, "step": 2745 }, { "epoch": 0.09423472889498971, "grad_norm": 0.8019715157124556, "learning_rate": 9.892387936374277e-06, "loss": 0.3425, "step": 2746 }, { "epoch": 0.09426904598490048, "grad_norm": 0.8619224260230735, "learning_rate": 9.892273227673486e-06, "loss": 0.3821, "step": 2747 }, { "epoch": 0.09430336307481126, "grad_norm": 0.7263021156163881, "learning_rate": 9.892158458534362e-06, "loss": 0.3365, "step": 2748 }, { "epoch": 0.09433768016472203, "grad_norm": 0.7403105703024324, "learning_rate": 9.892043628958324e-06, "loss": 0.3299, "step": 2749 }, { "epoch": 0.09437199725463281, "grad_norm": 0.7885668950371147, "learning_rate": 9.89192873894679e-06, "loss": 0.3825, "step": 2750 }, { "epoch": 0.09440631434454358, "grad_norm": 0.8524709159086289, "learning_rate": 9.891813788501181e-06, "loss": 0.4168, "step": 2751 }, { "epoch": 0.09444063143445436, "grad_norm": 0.8615500650240736, "learning_rate": 9.891698777622915e-06, "loss": 0.3853, "step": 2752 }, { "epoch": 0.09447494852436514, "grad_norm": 0.9333207071397672, "learning_rate": 9.891583706313416e-06, "loss": 0.4328, "step": 2753 }, { "epoch": 0.0945092656142759, "grad_norm": 0.8563439528370768, "learning_rate": 9.891468574574101e-06, "loss": 0.4028, "step": 2754 }, { "epoch": 0.09454358270418668, "grad_norm": 0.8238760250202585, "learning_rate": 9.891353382406395e-06, "loss": 0.3742, "step": 2755 }, { "epoch": 0.09457789979409746, "grad_norm": 0.8065689722893481, "learning_rate": 9.891238129811723e-06, "loss": 0.3925, "step": 2756 }, { "epoch": 0.09461221688400824, "grad_norm": 0.7951385556787754, "learning_rate": 9.891122816791504e-06, "loss": 0.3183, "step": 2757 }, { "epoch": 0.094646533973919, "grad_norm": 0.9740734099924213, "learning_rate": 9.891007443347165e-06, "loss": 0.4307, "step": 2758 }, { "epoch": 0.09468085106382979, "grad_norm": 0.8915732936944466, "learning_rate": 9.890892009480134e-06, "loss": 0.3667, "step": 2759 }, { "epoch": 0.09471516815374056, "grad_norm": 0.8529343847667407, "learning_rate": 9.890776515191832e-06, "loss": 0.3226, "step": 2760 }, { "epoch": 0.09474948524365134, "grad_norm": 0.8833421011890261, "learning_rate": 9.890660960483689e-06, "loss": 0.5058, "step": 2761 }, { "epoch": 0.09478380233356211, "grad_norm": 0.7991158041225765, "learning_rate": 9.890545345357131e-06, "loss": 0.3665, "step": 2762 }, { "epoch": 0.09481811942347289, "grad_norm": 0.8442796843799579, "learning_rate": 9.890429669813589e-06, "loss": 0.3549, "step": 2763 }, { "epoch": 0.09485243651338367, "grad_norm": 0.8039502781565084, "learning_rate": 9.890313933854488e-06, "loss": 0.3815, "step": 2764 }, { "epoch": 0.09488675360329445, "grad_norm": 0.8967048823840912, "learning_rate": 9.89019813748126e-06, "loss": 0.3978, "step": 2765 }, { "epoch": 0.09492107069320521, "grad_norm": 0.8728778332492563, "learning_rate": 9.890082280695337e-06, "loss": 0.345, "step": 2766 }, { "epoch": 0.09495538778311599, "grad_norm": 0.8240771072270681, "learning_rate": 9.889966363498147e-06, "loss": 0.3401, "step": 2767 }, { "epoch": 0.09498970487302677, "grad_norm": 0.7939722935151308, "learning_rate": 9.889850385891126e-06, "loss": 0.3963, "step": 2768 }, { "epoch": 0.09502402196293755, "grad_norm": 0.8511249615920382, "learning_rate": 9.889734347875702e-06, "loss": 0.3202, "step": 2769 }, { "epoch": 0.09505833905284831, "grad_norm": 0.9488783960279956, "learning_rate": 9.889618249453312e-06, "loss": 0.3255, "step": 2770 }, { "epoch": 0.0950926561427591, "grad_norm": 0.8371702662720555, "learning_rate": 9.88950209062539e-06, "loss": 0.3666, "step": 2771 }, { "epoch": 0.09512697323266987, "grad_norm": 3.432927740260554, "learning_rate": 9.88938587139337e-06, "loss": 0.3204, "step": 2772 }, { "epoch": 0.09516129032258064, "grad_norm": 0.8961812456885718, "learning_rate": 9.889269591758688e-06, "loss": 0.3817, "step": 2773 }, { "epoch": 0.09519560741249142, "grad_norm": 0.8955796439254436, "learning_rate": 9.88915325172278e-06, "loss": 0.3433, "step": 2774 }, { "epoch": 0.0952299245024022, "grad_norm": 0.9470689094773632, "learning_rate": 9.889036851287084e-06, "loss": 0.367, "step": 2775 }, { "epoch": 0.09526424159231298, "grad_norm": 0.8580100254733648, "learning_rate": 9.888920390453038e-06, "loss": 0.3468, "step": 2776 }, { "epoch": 0.09529855868222374, "grad_norm": 0.7869199491703349, "learning_rate": 9.88880386922208e-06, "loss": 0.402, "step": 2777 }, { "epoch": 0.09533287577213452, "grad_norm": 0.8567400374760717, "learning_rate": 9.888687287595651e-06, "loss": 0.391, "step": 2778 }, { "epoch": 0.0953671928620453, "grad_norm": 0.8564960327254276, "learning_rate": 9.888570645575189e-06, "loss": 0.3548, "step": 2779 }, { "epoch": 0.09540150995195608, "grad_norm": 0.8826397535553091, "learning_rate": 9.888453943162135e-06, "loss": 0.358, "step": 2780 }, { "epoch": 0.09543582704186684, "grad_norm": 0.8656207693847487, "learning_rate": 9.888337180357935e-06, "loss": 0.3485, "step": 2781 }, { "epoch": 0.09547014413177762, "grad_norm": 0.9272485225799488, "learning_rate": 9.888220357164027e-06, "loss": 0.417, "step": 2782 }, { "epoch": 0.0955044612216884, "grad_norm": 0.9321260657559214, "learning_rate": 9.888103473581856e-06, "loss": 0.4308, "step": 2783 }, { "epoch": 0.09553877831159918, "grad_norm": 0.8204664857938766, "learning_rate": 9.887986529612865e-06, "loss": 0.4188, "step": 2784 }, { "epoch": 0.09557309540150995, "grad_norm": 0.9241658665026019, "learning_rate": 9.8878695252585e-06, "loss": 0.3974, "step": 2785 }, { "epoch": 0.09560741249142073, "grad_norm": 0.7963384837954584, "learning_rate": 9.887752460520205e-06, "loss": 0.3037, "step": 2786 }, { "epoch": 0.0956417295813315, "grad_norm": 0.9757491289457706, "learning_rate": 9.887635335399425e-06, "loss": 0.3766, "step": 2787 }, { "epoch": 0.09567604667124228, "grad_norm": 0.8256766320747817, "learning_rate": 9.887518149897612e-06, "loss": 0.364, "step": 2788 }, { "epoch": 0.09571036376115305, "grad_norm": 0.8197543161944223, "learning_rate": 9.88740090401621e-06, "loss": 0.3456, "step": 2789 }, { "epoch": 0.09574468085106383, "grad_norm": 0.9093823506730404, "learning_rate": 9.887283597756668e-06, "loss": 0.3773, "step": 2790 }, { "epoch": 0.09577899794097461, "grad_norm": 0.876949079676251, "learning_rate": 9.887166231120434e-06, "loss": 0.3865, "step": 2791 }, { "epoch": 0.09581331503088539, "grad_norm": 0.8314782052119509, "learning_rate": 9.88704880410896e-06, "loss": 0.3502, "step": 2792 }, { "epoch": 0.09584763212079615, "grad_norm": 0.9043669189240294, "learning_rate": 9.886931316723696e-06, "loss": 0.3343, "step": 2793 }, { "epoch": 0.09588194921070693, "grad_norm": 0.8340734994242943, "learning_rate": 9.886813768966092e-06, "loss": 0.4304, "step": 2794 }, { "epoch": 0.09591626630061771, "grad_norm": 0.8608550527436541, "learning_rate": 9.886696160837601e-06, "loss": 0.4285, "step": 2795 }, { "epoch": 0.09595058339052848, "grad_norm": 0.8790132510467025, "learning_rate": 9.886578492339679e-06, "loss": 0.4187, "step": 2796 }, { "epoch": 0.09598490048043926, "grad_norm": 0.9299207247925316, "learning_rate": 9.886460763473775e-06, "loss": 0.314, "step": 2797 }, { "epoch": 0.09601921757035004, "grad_norm": 0.8238593417820651, "learning_rate": 9.886342974241345e-06, "loss": 0.3686, "step": 2798 }, { "epoch": 0.09605353466026081, "grad_norm": 0.913761353615544, "learning_rate": 9.886225124643845e-06, "loss": 0.3208, "step": 2799 }, { "epoch": 0.09608785175017158, "grad_norm": 1.0914343862403586, "learning_rate": 9.88610721468273e-06, "loss": 0.3937, "step": 2800 }, { "epoch": 0.09612216884008236, "grad_norm": 0.792958763041346, "learning_rate": 9.885989244359457e-06, "loss": 0.3325, "step": 2801 }, { "epoch": 0.09615648592999314, "grad_norm": 0.816117089016893, "learning_rate": 9.885871213675484e-06, "loss": 0.3744, "step": 2802 }, { "epoch": 0.09619080301990392, "grad_norm": 0.8849205681606838, "learning_rate": 9.885753122632269e-06, "loss": 0.3506, "step": 2803 }, { "epoch": 0.09622512010981468, "grad_norm": 0.7829310656301283, "learning_rate": 9.885634971231271e-06, "loss": 0.3326, "step": 2804 }, { "epoch": 0.09625943719972546, "grad_norm": 0.7433047433124501, "learning_rate": 9.885516759473948e-06, "loss": 0.3269, "step": 2805 }, { "epoch": 0.09629375428963624, "grad_norm": 0.7912760792715486, "learning_rate": 9.88539848736176e-06, "loss": 0.3769, "step": 2806 }, { "epoch": 0.09632807137954702, "grad_norm": 0.8137542202215688, "learning_rate": 9.88528015489617e-06, "loss": 0.4133, "step": 2807 }, { "epoch": 0.09636238846945779, "grad_norm": 0.809703726934102, "learning_rate": 9.885161762078641e-06, "loss": 0.3514, "step": 2808 }, { "epoch": 0.09639670555936856, "grad_norm": 0.8539047952219263, "learning_rate": 9.885043308910633e-06, "loss": 0.3258, "step": 2809 }, { "epoch": 0.09643102264927934, "grad_norm": 0.8527755017097571, "learning_rate": 9.88492479539361e-06, "loss": 0.4101, "step": 2810 }, { "epoch": 0.09646533973919012, "grad_norm": 0.9477680367285388, "learning_rate": 9.884806221529037e-06, "loss": 0.3771, "step": 2811 }, { "epoch": 0.09649965682910089, "grad_norm": 0.8600430514225146, "learning_rate": 9.884687587318378e-06, "loss": 0.3365, "step": 2812 }, { "epoch": 0.09653397391901167, "grad_norm": 0.8815377770700299, "learning_rate": 9.884568892763098e-06, "loss": 0.3921, "step": 2813 }, { "epoch": 0.09656829100892245, "grad_norm": 0.861724595526162, "learning_rate": 9.884450137864666e-06, "loss": 0.3659, "step": 2814 }, { "epoch": 0.09660260809883323, "grad_norm": 0.8404925233097114, "learning_rate": 9.884331322624547e-06, "loss": 0.3543, "step": 2815 }, { "epoch": 0.09663692518874399, "grad_norm": 0.8996442919889135, "learning_rate": 9.884212447044208e-06, "loss": 0.3446, "step": 2816 }, { "epoch": 0.09667124227865477, "grad_norm": 0.8733178399610985, "learning_rate": 9.884093511125118e-06, "loss": 0.3599, "step": 2817 }, { "epoch": 0.09670555936856555, "grad_norm": 0.9105150758868502, "learning_rate": 9.88397451486875e-06, "loss": 0.3832, "step": 2818 }, { "epoch": 0.09673987645847631, "grad_norm": 0.7669143076118112, "learning_rate": 9.883855458276568e-06, "loss": 0.364, "step": 2819 }, { "epoch": 0.0967741935483871, "grad_norm": 0.9135643605167131, "learning_rate": 9.883736341350048e-06, "loss": 0.3711, "step": 2820 }, { "epoch": 0.09680851063829787, "grad_norm": 0.8806801059687427, "learning_rate": 9.883617164090658e-06, "loss": 0.3592, "step": 2821 }, { "epoch": 0.09684282772820865, "grad_norm": 0.8971752965414436, "learning_rate": 9.883497926499872e-06, "loss": 0.3548, "step": 2822 }, { "epoch": 0.09687714481811942, "grad_norm": 0.8481623456042576, "learning_rate": 9.883378628579161e-06, "loss": 0.3742, "step": 2823 }, { "epoch": 0.0969114619080302, "grad_norm": 0.972405553168225, "learning_rate": 9.883259270330003e-06, "loss": 0.4519, "step": 2824 }, { "epoch": 0.09694577899794098, "grad_norm": 0.8578998912691331, "learning_rate": 9.883139851753869e-06, "loss": 0.3289, "step": 2825 }, { "epoch": 0.09698009608785176, "grad_norm": 0.8546108641149964, "learning_rate": 9.883020372852236e-06, "loss": 0.397, "step": 2826 }, { "epoch": 0.09701441317776252, "grad_norm": 0.8346083655609736, "learning_rate": 9.882900833626579e-06, "loss": 0.3406, "step": 2827 }, { "epoch": 0.0970487302676733, "grad_norm": 0.7891616583311923, "learning_rate": 9.882781234078376e-06, "loss": 0.3359, "step": 2828 }, { "epoch": 0.09708304735758408, "grad_norm": 0.8968767265900921, "learning_rate": 9.882661574209103e-06, "loss": 0.378, "step": 2829 }, { "epoch": 0.09711736444749486, "grad_norm": 0.8149996266131859, "learning_rate": 9.882541854020238e-06, "loss": 0.3767, "step": 2830 }, { "epoch": 0.09715168153740562, "grad_norm": 0.8526117131801225, "learning_rate": 9.88242207351326e-06, "loss": 0.3528, "step": 2831 }, { "epoch": 0.0971859986273164, "grad_norm": 0.8665501508205624, "learning_rate": 9.882302232689652e-06, "loss": 0.3726, "step": 2832 }, { "epoch": 0.09722031571722718, "grad_norm": 0.8555395603796114, "learning_rate": 9.882182331550892e-06, "loss": 0.3766, "step": 2833 }, { "epoch": 0.09725463280713796, "grad_norm": 0.738937060551665, "learning_rate": 9.88206237009846e-06, "loss": 0.3227, "step": 2834 }, { "epoch": 0.09728894989704873, "grad_norm": 0.9213213626351596, "learning_rate": 9.881942348333839e-06, "loss": 0.37, "step": 2835 }, { "epoch": 0.0973232669869595, "grad_norm": 1.0804172394974498, "learning_rate": 9.881822266258515e-06, "loss": 0.3102, "step": 2836 }, { "epoch": 0.09735758407687028, "grad_norm": 1.0125465694966587, "learning_rate": 9.881702123873967e-06, "loss": 0.4171, "step": 2837 }, { "epoch": 0.09739190116678105, "grad_norm": 0.8790612290879644, "learning_rate": 9.88158192118168e-06, "loss": 0.3847, "step": 2838 }, { "epoch": 0.09742621825669183, "grad_norm": 0.8826692367469475, "learning_rate": 9.881461658183138e-06, "loss": 0.358, "step": 2839 }, { "epoch": 0.09746053534660261, "grad_norm": 0.8346311053742984, "learning_rate": 9.881341334879831e-06, "loss": 0.3462, "step": 2840 }, { "epoch": 0.09749485243651339, "grad_norm": 1.0207346131548667, "learning_rate": 9.881220951273242e-06, "loss": 0.3207, "step": 2841 }, { "epoch": 0.09752916952642415, "grad_norm": 0.8335831471751305, "learning_rate": 9.88110050736486e-06, "loss": 0.317, "step": 2842 }, { "epoch": 0.09756348661633493, "grad_norm": 0.8148324461045042, "learning_rate": 9.880980003156171e-06, "loss": 0.4063, "step": 2843 }, { "epoch": 0.09759780370624571, "grad_norm": 0.8152717851509451, "learning_rate": 9.880859438648666e-06, "loss": 0.3553, "step": 2844 }, { "epoch": 0.09763212079615649, "grad_norm": 0.8314375056370598, "learning_rate": 9.880738813843834e-06, "loss": 0.3853, "step": 2845 }, { "epoch": 0.09766643788606726, "grad_norm": 0.8481539559237081, "learning_rate": 9.880618128743161e-06, "loss": 0.3856, "step": 2846 }, { "epoch": 0.09770075497597804, "grad_norm": 0.9447084592802567, "learning_rate": 9.880497383348145e-06, "loss": 0.3902, "step": 2847 }, { "epoch": 0.09773507206588881, "grad_norm": 0.8081395684360454, "learning_rate": 9.880376577660272e-06, "loss": 0.319, "step": 2848 }, { "epoch": 0.0977693891557996, "grad_norm": 0.7866166710065124, "learning_rate": 9.880255711681036e-06, "loss": 0.3683, "step": 2849 }, { "epoch": 0.09780370624571036, "grad_norm": 0.761747198637629, "learning_rate": 9.88013478541193e-06, "loss": 0.3468, "step": 2850 }, { "epoch": 0.09783802333562114, "grad_norm": 0.8605552631054779, "learning_rate": 9.88001379885445e-06, "loss": 0.3752, "step": 2851 }, { "epoch": 0.09787234042553192, "grad_norm": 0.8743068950536805, "learning_rate": 9.879892752010087e-06, "loss": 0.3544, "step": 2852 }, { "epoch": 0.0979066575154427, "grad_norm": 0.9296376387295996, "learning_rate": 9.87977164488034e-06, "loss": 0.3844, "step": 2853 }, { "epoch": 0.09794097460535346, "grad_norm": 0.9309289654632442, "learning_rate": 9.879650477466704e-06, "loss": 0.3298, "step": 2854 }, { "epoch": 0.09797529169526424, "grad_norm": 0.8515415897624865, "learning_rate": 9.879529249770673e-06, "loss": 0.3547, "step": 2855 }, { "epoch": 0.09800960878517502, "grad_norm": 0.8032337181862459, "learning_rate": 9.87940796179375e-06, "loss": 0.3395, "step": 2856 }, { "epoch": 0.0980439258750858, "grad_norm": 0.8738247870060674, "learning_rate": 9.87928661353743e-06, "loss": 0.3358, "step": 2857 }, { "epoch": 0.09807824296499656, "grad_norm": 0.9944406272365468, "learning_rate": 9.879165205003211e-06, "loss": 0.3614, "step": 2858 }, { "epoch": 0.09811256005490734, "grad_norm": 0.8959284377316815, "learning_rate": 9.879043736192597e-06, "loss": 0.4422, "step": 2859 }, { "epoch": 0.09814687714481812, "grad_norm": 0.7921205357939445, "learning_rate": 9.878922207107084e-06, "loss": 0.3434, "step": 2860 }, { "epoch": 0.09818119423472889, "grad_norm": 0.860809572659995, "learning_rate": 9.878800617748175e-06, "loss": 0.3945, "step": 2861 }, { "epoch": 0.09821551132463967, "grad_norm": 0.9195632547425324, "learning_rate": 9.878678968117375e-06, "loss": 0.3865, "step": 2862 }, { "epoch": 0.09824982841455045, "grad_norm": 0.7875981144326313, "learning_rate": 9.878557258216181e-06, "loss": 0.397, "step": 2863 }, { "epoch": 0.09828414550446123, "grad_norm": 1.0812033063506188, "learning_rate": 9.878435488046102e-06, "loss": 0.4033, "step": 2864 }, { "epoch": 0.09831846259437199, "grad_norm": 0.7942812034538164, "learning_rate": 9.87831365760864e-06, "loss": 0.4328, "step": 2865 }, { "epoch": 0.09835277968428277, "grad_norm": 0.807086803276257, "learning_rate": 9.878191766905301e-06, "loss": 0.338, "step": 2866 }, { "epoch": 0.09838709677419355, "grad_norm": 0.8253094127580689, "learning_rate": 9.878069815937588e-06, "loss": 0.3902, "step": 2867 }, { "epoch": 0.09842141386410433, "grad_norm": 0.9055653288987783, "learning_rate": 9.877947804707012e-06, "loss": 0.3924, "step": 2868 }, { "epoch": 0.0984557309540151, "grad_norm": 0.8410428783553174, "learning_rate": 9.877825733215076e-06, "loss": 0.3746, "step": 2869 }, { "epoch": 0.09849004804392587, "grad_norm": 0.7941908148151807, "learning_rate": 9.877703601463292e-06, "loss": 0.3801, "step": 2870 }, { "epoch": 0.09852436513383665, "grad_norm": 0.7836030648319493, "learning_rate": 9.877581409453165e-06, "loss": 0.3436, "step": 2871 }, { "epoch": 0.09855868222374743, "grad_norm": 0.8303982807543931, "learning_rate": 9.877459157186209e-06, "loss": 0.3947, "step": 2872 }, { "epoch": 0.0985929993136582, "grad_norm": 0.7906280502094545, "learning_rate": 9.87733684466393e-06, "loss": 0.3677, "step": 2873 }, { "epoch": 0.09862731640356898, "grad_norm": 0.9105458308949906, "learning_rate": 9.877214471887841e-06, "loss": 0.3602, "step": 2874 }, { "epoch": 0.09866163349347976, "grad_norm": 0.9154002631835592, "learning_rate": 9.877092038859453e-06, "loss": 0.3746, "step": 2875 }, { "epoch": 0.09869595058339053, "grad_norm": 0.8435781416560743, "learning_rate": 9.87696954558028e-06, "loss": 0.3641, "step": 2876 }, { "epoch": 0.0987302676733013, "grad_norm": 0.8111709864331068, "learning_rate": 9.876846992051831e-06, "loss": 0.3251, "step": 2877 }, { "epoch": 0.09876458476321208, "grad_norm": 0.9225717962828771, "learning_rate": 9.876724378275628e-06, "loss": 0.3848, "step": 2878 }, { "epoch": 0.09879890185312286, "grad_norm": 0.8997789483884896, "learning_rate": 9.876601704253176e-06, "loss": 0.389, "step": 2879 }, { "epoch": 0.09883321894303362, "grad_norm": 0.9290464233439161, "learning_rate": 9.876478969986001e-06, "loss": 0.3989, "step": 2880 }, { "epoch": 0.0988675360329444, "grad_norm": 1.003741677918514, "learning_rate": 9.87635617547561e-06, "loss": 0.3766, "step": 2881 }, { "epoch": 0.09890185312285518, "grad_norm": 0.8958472617060601, "learning_rate": 9.876233320723526e-06, "loss": 0.3273, "step": 2882 }, { "epoch": 0.09893617021276596, "grad_norm": 0.8204322819740095, "learning_rate": 9.876110405731263e-06, "loss": 0.42, "step": 2883 }, { "epoch": 0.09897048730267673, "grad_norm": 0.9560959907896567, "learning_rate": 9.87598743050034e-06, "loss": 0.3848, "step": 2884 }, { "epoch": 0.0990048043925875, "grad_norm": 0.7941937052180739, "learning_rate": 9.875864395032278e-06, "loss": 0.3567, "step": 2885 }, { "epoch": 0.09903912148249829, "grad_norm": 0.8146615863641377, "learning_rate": 9.875741299328597e-06, "loss": 0.3326, "step": 2886 }, { "epoch": 0.09907343857240906, "grad_norm": 1.0590262353522049, "learning_rate": 9.875618143390816e-06, "loss": 0.4058, "step": 2887 }, { "epoch": 0.09910775566231983, "grad_norm": 0.8119550956498769, "learning_rate": 9.875494927220458e-06, "loss": 0.3488, "step": 2888 }, { "epoch": 0.09914207275223061, "grad_norm": 0.821013684254081, "learning_rate": 9.875371650819043e-06, "loss": 0.3718, "step": 2889 }, { "epoch": 0.09917638984214139, "grad_norm": 0.8804281181753508, "learning_rate": 9.875248314188098e-06, "loss": 0.4028, "step": 2890 }, { "epoch": 0.09921070693205217, "grad_norm": 0.9131448543461024, "learning_rate": 9.875124917329142e-06, "loss": 0.3522, "step": 2891 }, { "epoch": 0.09924502402196293, "grad_norm": 1.0225937324934855, "learning_rate": 9.8750014602437e-06, "loss": 0.3426, "step": 2892 }, { "epoch": 0.09927934111187371, "grad_norm": 0.8750898671664015, "learning_rate": 9.874877942933302e-06, "loss": 0.4084, "step": 2893 }, { "epoch": 0.09931365820178449, "grad_norm": 0.8946570384753637, "learning_rate": 9.874754365399468e-06, "loss": 0.4137, "step": 2894 }, { "epoch": 0.09934797529169527, "grad_norm": 0.8519442603420078, "learning_rate": 9.874630727643729e-06, "loss": 0.4082, "step": 2895 }, { "epoch": 0.09938229238160604, "grad_norm": 0.9377715660365419, "learning_rate": 9.874507029667611e-06, "loss": 0.4161, "step": 2896 }, { "epoch": 0.09941660947151681, "grad_norm": 0.9093245115696305, "learning_rate": 9.87438327147264e-06, "loss": 0.3829, "step": 2897 }, { "epoch": 0.0994509265614276, "grad_norm": 0.843691802607205, "learning_rate": 9.874259453060347e-06, "loss": 0.3522, "step": 2898 }, { "epoch": 0.09948524365133837, "grad_norm": 0.8427506251033304, "learning_rate": 9.874135574432263e-06, "loss": 0.348, "step": 2899 }, { "epoch": 0.09951956074124914, "grad_norm": 1.4295379861773938, "learning_rate": 9.874011635589916e-06, "loss": 0.3692, "step": 2900 }, { "epoch": 0.09955387783115992, "grad_norm": 0.7889309138684252, "learning_rate": 9.873887636534836e-06, "loss": 0.347, "step": 2901 }, { "epoch": 0.0995881949210707, "grad_norm": 0.8529355450572867, "learning_rate": 9.873763577268558e-06, "loss": 0.3833, "step": 2902 }, { "epoch": 0.09962251201098146, "grad_norm": 0.8587270756658357, "learning_rate": 9.873639457792612e-06, "loss": 0.3875, "step": 2903 }, { "epoch": 0.09965682910089224, "grad_norm": 0.8310004410867441, "learning_rate": 9.873515278108534e-06, "loss": 0.3498, "step": 2904 }, { "epoch": 0.09969114619080302, "grad_norm": 1.0774723218522848, "learning_rate": 9.873391038217857e-06, "loss": 0.3219, "step": 2905 }, { "epoch": 0.0997254632807138, "grad_norm": 0.8103237791588404, "learning_rate": 9.873266738122114e-06, "loss": 0.3528, "step": 2906 }, { "epoch": 0.09975978037062457, "grad_norm": 0.8149490885664521, "learning_rate": 9.873142377822844e-06, "loss": 0.3764, "step": 2907 }, { "epoch": 0.09979409746053534, "grad_norm": 0.8215811584851357, "learning_rate": 9.87301795732158e-06, "loss": 0.3772, "step": 2908 }, { "epoch": 0.09982841455044612, "grad_norm": 0.913105454410742, "learning_rate": 9.87289347661986e-06, "loss": 0.4251, "step": 2909 }, { "epoch": 0.0998627316403569, "grad_norm": 0.8233232517816671, "learning_rate": 9.872768935719224e-06, "loss": 0.3864, "step": 2910 }, { "epoch": 0.09989704873026767, "grad_norm": 0.946310496104765, "learning_rate": 9.872644334621209e-06, "loss": 0.3458, "step": 2911 }, { "epoch": 0.09993136582017845, "grad_norm": 0.8792093269165999, "learning_rate": 9.872519673327353e-06, "loss": 0.3911, "step": 2912 }, { "epoch": 0.09996568291008923, "grad_norm": 0.9836139044631973, "learning_rate": 9.872394951839198e-06, "loss": 0.3415, "step": 2913 }, { "epoch": 0.1, "grad_norm": 0.8689782157992402, "learning_rate": 9.872270170158284e-06, "loss": 0.373, "step": 2914 }, { "epoch": 0.10003431708991077, "grad_norm": 1.0065875042311676, "learning_rate": 9.872145328286151e-06, "loss": 0.3487, "step": 2915 }, { "epoch": 0.10006863417982155, "grad_norm": 0.8199420558443006, "learning_rate": 9.872020426224344e-06, "loss": 0.3952, "step": 2916 }, { "epoch": 0.10010295126973233, "grad_norm": 0.7834671093455768, "learning_rate": 9.871895463974405e-06, "loss": 0.3151, "step": 2917 }, { "epoch": 0.10013726835964311, "grad_norm": 0.9023407522126186, "learning_rate": 9.871770441537878e-06, "loss": 0.342, "step": 2918 }, { "epoch": 0.10017158544955387, "grad_norm": 0.8338184335392342, "learning_rate": 9.871645358916307e-06, "loss": 0.372, "step": 2919 }, { "epoch": 0.10020590253946465, "grad_norm": 0.9532739873592277, "learning_rate": 9.871520216111237e-06, "loss": 0.3632, "step": 2920 }, { "epoch": 0.10024021962937543, "grad_norm": 1.0729286129992361, "learning_rate": 9.871395013124214e-06, "loss": 0.3424, "step": 2921 }, { "epoch": 0.10027453671928621, "grad_norm": 0.8718788583044722, "learning_rate": 9.871269749956786e-06, "loss": 0.3498, "step": 2922 }, { "epoch": 0.10030885380919698, "grad_norm": 0.7522141238596458, "learning_rate": 9.8711444266105e-06, "loss": 0.4011, "step": 2923 }, { "epoch": 0.10034317089910776, "grad_norm": 0.8003846130403514, "learning_rate": 9.871019043086902e-06, "loss": 0.3322, "step": 2924 }, { "epoch": 0.10037748798901854, "grad_norm": 0.7804344149833286, "learning_rate": 9.870893599387544e-06, "loss": 0.435, "step": 2925 }, { "epoch": 0.1004118050789293, "grad_norm": 0.9029794800503814, "learning_rate": 9.870768095513974e-06, "loss": 0.4259, "step": 2926 }, { "epoch": 0.10044612216884008, "grad_norm": 0.7764133266958569, "learning_rate": 9.870642531467745e-06, "loss": 0.3234, "step": 2927 }, { "epoch": 0.10048043925875086, "grad_norm": 0.8637484817107015, "learning_rate": 9.870516907250402e-06, "loss": 0.3844, "step": 2928 }, { "epoch": 0.10051475634866164, "grad_norm": 0.9032217349424319, "learning_rate": 9.870391222863504e-06, "loss": 0.3687, "step": 2929 }, { "epoch": 0.1005490734385724, "grad_norm": 0.8334053172438373, "learning_rate": 9.870265478308601e-06, "loss": 0.3338, "step": 2930 }, { "epoch": 0.10058339052848318, "grad_norm": 0.9069767221963376, "learning_rate": 9.870139673587245e-06, "loss": 0.3706, "step": 2931 }, { "epoch": 0.10061770761839396, "grad_norm": 0.9162417982130271, "learning_rate": 9.870013808700992e-06, "loss": 0.4472, "step": 2932 }, { "epoch": 0.10065202470830474, "grad_norm": 0.8488895514298374, "learning_rate": 9.869887883651397e-06, "loss": 0.3532, "step": 2933 }, { "epoch": 0.1006863417982155, "grad_norm": 0.8092354066304698, "learning_rate": 9.869761898440014e-06, "loss": 0.3244, "step": 2934 }, { "epoch": 0.10072065888812629, "grad_norm": 0.8725074354608426, "learning_rate": 9.8696358530684e-06, "loss": 0.3617, "step": 2935 }, { "epoch": 0.10075497597803706, "grad_norm": 0.8601305557322121, "learning_rate": 9.869509747538113e-06, "loss": 0.3649, "step": 2936 }, { "epoch": 0.10078929306794784, "grad_norm": 0.8902750926251357, "learning_rate": 9.869383581850709e-06, "loss": 0.4186, "step": 2937 }, { "epoch": 0.10082361015785861, "grad_norm": 0.8199996237364159, "learning_rate": 9.869257356007749e-06, "loss": 0.4116, "step": 2938 }, { "epoch": 0.10085792724776939, "grad_norm": 0.8071674683346289, "learning_rate": 9.869131070010792e-06, "loss": 0.343, "step": 2939 }, { "epoch": 0.10089224433768017, "grad_norm": 0.8118084566106948, "learning_rate": 9.869004723861398e-06, "loss": 0.367, "step": 2940 }, { "epoch": 0.10092656142759095, "grad_norm": 0.7731245229851549, "learning_rate": 9.868878317561125e-06, "loss": 0.3699, "step": 2941 }, { "epoch": 0.10096087851750171, "grad_norm": 0.8645715615816506, "learning_rate": 9.868751851111538e-06, "loss": 0.3645, "step": 2942 }, { "epoch": 0.10099519560741249, "grad_norm": 1.0173696319959613, "learning_rate": 9.8686253245142e-06, "loss": 0.3694, "step": 2943 }, { "epoch": 0.10102951269732327, "grad_norm": 1.2228677832628485, "learning_rate": 9.86849873777067e-06, "loss": 0.3699, "step": 2944 }, { "epoch": 0.10106382978723404, "grad_norm": 0.7626070753529868, "learning_rate": 9.868372090882516e-06, "loss": 0.3308, "step": 2945 }, { "epoch": 0.10109814687714481, "grad_norm": 0.8018876919780801, "learning_rate": 9.8682453838513e-06, "loss": 0.3716, "step": 2946 }, { "epoch": 0.1011324639670556, "grad_norm": 0.9446956912580392, "learning_rate": 9.868118616678589e-06, "loss": 0.3894, "step": 2947 }, { "epoch": 0.10116678105696637, "grad_norm": 0.8528205978389306, "learning_rate": 9.867991789365946e-06, "loss": 0.3488, "step": 2948 }, { "epoch": 0.10120109814687714, "grad_norm": 0.8727176495858333, "learning_rate": 9.86786490191494e-06, "loss": 0.3643, "step": 2949 }, { "epoch": 0.10123541523678792, "grad_norm": 0.8549601616419187, "learning_rate": 9.867737954327141e-06, "loss": 0.383, "step": 2950 }, { "epoch": 0.1012697323266987, "grad_norm": 0.810424277389586, "learning_rate": 9.867610946604114e-06, "loss": 0.3625, "step": 2951 }, { "epoch": 0.10130404941660948, "grad_norm": 0.8670322930667416, "learning_rate": 9.867483878747428e-06, "loss": 0.3754, "step": 2952 }, { "epoch": 0.10133836650652024, "grad_norm": 0.7975813740847404, "learning_rate": 9.867356750758655e-06, "loss": 0.367, "step": 2953 }, { "epoch": 0.10137268359643102, "grad_norm": 0.8354060159537395, "learning_rate": 9.867229562639362e-06, "loss": 0.356, "step": 2954 }, { "epoch": 0.1014070006863418, "grad_norm": 0.9436930343282935, "learning_rate": 9.867102314391123e-06, "loss": 0.3715, "step": 2955 }, { "epoch": 0.10144131777625258, "grad_norm": 0.890776993999414, "learning_rate": 9.866975006015512e-06, "loss": 0.3438, "step": 2956 }, { "epoch": 0.10147563486616334, "grad_norm": 0.8124433068106912, "learning_rate": 9.866847637514095e-06, "loss": 0.434, "step": 2957 }, { "epoch": 0.10150995195607412, "grad_norm": 0.8517128629570687, "learning_rate": 9.866720208888452e-06, "loss": 0.3307, "step": 2958 }, { "epoch": 0.1015442690459849, "grad_norm": 0.8862988255032215, "learning_rate": 9.866592720140154e-06, "loss": 0.4151, "step": 2959 }, { "epoch": 0.10157858613589568, "grad_norm": 0.7905810061102476, "learning_rate": 9.866465171270779e-06, "loss": 0.3572, "step": 2960 }, { "epoch": 0.10161290322580645, "grad_norm": 0.8523674471071827, "learning_rate": 9.866337562281897e-06, "loss": 0.3768, "step": 2961 }, { "epoch": 0.10164722031571723, "grad_norm": 0.8712192830946796, "learning_rate": 9.866209893175091e-06, "loss": 0.3951, "step": 2962 }, { "epoch": 0.101681537405628, "grad_norm": 0.8613903894178688, "learning_rate": 9.866082163951934e-06, "loss": 0.3682, "step": 2963 }, { "epoch": 0.10171585449553878, "grad_norm": 0.8177440767604572, "learning_rate": 9.865954374614007e-06, "loss": 0.3403, "step": 2964 }, { "epoch": 0.10175017158544955, "grad_norm": 0.8936513906133113, "learning_rate": 9.865826525162884e-06, "loss": 0.4031, "step": 2965 }, { "epoch": 0.10178448867536033, "grad_norm": 0.9054524103212381, "learning_rate": 9.865698615600148e-06, "loss": 0.334, "step": 2966 }, { "epoch": 0.10181880576527111, "grad_norm": 0.9416039402928066, "learning_rate": 9.865570645927378e-06, "loss": 0.389, "step": 2967 }, { "epoch": 0.10185312285518187, "grad_norm": 0.8102875030783934, "learning_rate": 9.865442616146157e-06, "loss": 0.3543, "step": 2968 }, { "epoch": 0.10188743994509265, "grad_norm": 0.8444093784186112, "learning_rate": 9.865314526258065e-06, "loss": 0.3865, "step": 2969 }, { "epoch": 0.10192175703500343, "grad_norm": 0.8756999520388478, "learning_rate": 9.865186376264683e-06, "loss": 0.3968, "step": 2970 }, { "epoch": 0.10195607412491421, "grad_norm": 0.7735849541389249, "learning_rate": 9.865058166167596e-06, "loss": 0.3724, "step": 2971 }, { "epoch": 0.10199039121482498, "grad_norm": 0.8606122696501924, "learning_rate": 9.864929895968389e-06, "loss": 0.3818, "step": 2972 }, { "epoch": 0.10202470830473576, "grad_norm": 0.7789126511074389, "learning_rate": 9.864801565668645e-06, "loss": 0.3489, "step": 2973 }, { "epoch": 0.10205902539464654, "grad_norm": 0.8444526393542466, "learning_rate": 9.864673175269948e-06, "loss": 0.3701, "step": 2974 }, { "epoch": 0.10209334248455731, "grad_norm": 0.8602611082485861, "learning_rate": 9.864544724773886e-06, "loss": 0.3463, "step": 2975 }, { "epoch": 0.10212765957446808, "grad_norm": 0.8587162487286619, "learning_rate": 9.864416214182044e-06, "loss": 0.3414, "step": 2976 }, { "epoch": 0.10216197666437886, "grad_norm": 0.7907204264340281, "learning_rate": 9.864287643496013e-06, "loss": 0.3542, "step": 2977 }, { "epoch": 0.10219629375428964, "grad_norm": 0.81344097839421, "learning_rate": 9.864159012717379e-06, "loss": 0.3381, "step": 2978 }, { "epoch": 0.10223061084420042, "grad_norm": 0.8086161126341562, "learning_rate": 9.86403032184773e-06, "loss": 0.349, "step": 2979 }, { "epoch": 0.10226492793411118, "grad_norm": 0.825121926351156, "learning_rate": 9.86390157088866e-06, "loss": 0.3644, "step": 2980 }, { "epoch": 0.10229924502402196, "grad_norm": 0.9088563547524948, "learning_rate": 9.863772759841756e-06, "loss": 0.3397, "step": 2981 }, { "epoch": 0.10233356211393274, "grad_norm": 0.8062123867095046, "learning_rate": 9.863643888708609e-06, "loss": 0.3712, "step": 2982 }, { "epoch": 0.10236787920384352, "grad_norm": 0.9302252635898831, "learning_rate": 9.863514957490814e-06, "loss": 0.3625, "step": 2983 }, { "epoch": 0.10240219629375429, "grad_norm": 0.9722773274008913, "learning_rate": 9.86338596618996e-06, "loss": 0.4583, "step": 2984 }, { "epoch": 0.10243651338366506, "grad_norm": 0.9207090679940818, "learning_rate": 9.863256914807644e-06, "loss": 0.361, "step": 2985 }, { "epoch": 0.10247083047357584, "grad_norm": 0.8424212351014259, "learning_rate": 9.863127803345457e-06, "loss": 0.374, "step": 2986 }, { "epoch": 0.10250514756348662, "grad_norm": 0.7372335187130303, "learning_rate": 9.862998631804996e-06, "loss": 0.3188, "step": 2987 }, { "epoch": 0.10253946465339739, "grad_norm": 0.660206377916632, "learning_rate": 9.862869400187858e-06, "loss": 0.3182, "step": 2988 }, { "epoch": 0.10257378174330817, "grad_norm": 0.9363346062770908, "learning_rate": 9.86274010849564e-06, "loss": 0.3664, "step": 2989 }, { "epoch": 0.10260809883321895, "grad_norm": 0.7667725120264955, "learning_rate": 9.862610756729933e-06, "loss": 0.2929, "step": 2990 }, { "epoch": 0.10264241592312971, "grad_norm": 0.8400916029400813, "learning_rate": 9.862481344892342e-06, "loss": 0.4108, "step": 2991 }, { "epoch": 0.10267673301304049, "grad_norm": 0.7936990946842369, "learning_rate": 9.862351872984463e-06, "loss": 0.3473, "step": 2992 }, { "epoch": 0.10271105010295127, "grad_norm": 0.8683040416044504, "learning_rate": 9.862222341007894e-06, "loss": 0.3937, "step": 2993 }, { "epoch": 0.10274536719286205, "grad_norm": 0.9149810818598119, "learning_rate": 9.862092748964238e-06, "loss": 0.3861, "step": 2994 }, { "epoch": 0.10277968428277282, "grad_norm": 0.8682151640499354, "learning_rate": 9.861963096855096e-06, "loss": 0.3569, "step": 2995 }, { "epoch": 0.1028140013726836, "grad_norm": 0.8696170461046826, "learning_rate": 9.861833384682068e-06, "loss": 0.3718, "step": 2996 }, { "epoch": 0.10284831846259437, "grad_norm": 0.8286778917374927, "learning_rate": 9.861703612446753e-06, "loss": 0.3625, "step": 2997 }, { "epoch": 0.10288263555250515, "grad_norm": 0.9128047480406991, "learning_rate": 9.861573780150762e-06, "loss": 0.3274, "step": 2998 }, { "epoch": 0.10291695264241592, "grad_norm": 0.8545741058717449, "learning_rate": 9.861443887795694e-06, "loss": 0.3533, "step": 2999 }, { "epoch": 0.1029512697323267, "grad_norm": 0.8078358462472407, "learning_rate": 9.861313935383155e-06, "loss": 0.3709, "step": 3000 }, { "epoch": 0.10298558682223748, "grad_norm": 0.8456360130784996, "learning_rate": 9.86118392291475e-06, "loss": 0.3975, "step": 3001 }, { "epoch": 0.10301990391214826, "grad_norm": 0.8099827389838186, "learning_rate": 9.861053850392084e-06, "loss": 0.3688, "step": 3002 }, { "epoch": 0.10305422100205902, "grad_norm": 0.9392703240532052, "learning_rate": 9.860923717816765e-06, "loss": 0.3884, "step": 3003 }, { "epoch": 0.1030885380919698, "grad_norm": 0.9410651699600349, "learning_rate": 9.860793525190403e-06, "loss": 0.3738, "step": 3004 }, { "epoch": 0.10312285518188058, "grad_norm": 0.8350301793808181, "learning_rate": 9.860663272514603e-06, "loss": 0.3417, "step": 3005 }, { "epoch": 0.10315717227179136, "grad_norm": 0.7947778263326322, "learning_rate": 9.860532959790977e-06, "loss": 0.3472, "step": 3006 }, { "epoch": 0.10319148936170212, "grad_norm": 0.7752634774988231, "learning_rate": 9.860402587021129e-06, "loss": 0.3236, "step": 3007 }, { "epoch": 0.1032258064516129, "grad_norm": 0.8417449854402435, "learning_rate": 9.860272154206678e-06, "loss": 0.3701, "step": 3008 }, { "epoch": 0.10326012354152368, "grad_norm": 0.8024835250706932, "learning_rate": 9.860141661349227e-06, "loss": 0.3559, "step": 3009 }, { "epoch": 0.10329444063143445, "grad_norm": 0.6780928146750231, "learning_rate": 9.860011108450396e-06, "loss": 0.3169, "step": 3010 }, { "epoch": 0.10332875772134523, "grad_norm": 0.8705913554709158, "learning_rate": 9.859880495511791e-06, "loss": 0.3809, "step": 3011 }, { "epoch": 0.103363074811256, "grad_norm": 0.8828393986317041, "learning_rate": 9.85974982253503e-06, "loss": 0.4119, "step": 3012 }, { "epoch": 0.10339739190116679, "grad_norm": 0.858558985314777, "learning_rate": 9.859619089521726e-06, "loss": 0.3532, "step": 3013 }, { "epoch": 0.10343170899107755, "grad_norm": 0.8848908835759802, "learning_rate": 9.859488296473493e-06, "loss": 0.3415, "step": 3014 }, { "epoch": 0.10346602608098833, "grad_norm": 0.9006485224363635, "learning_rate": 9.859357443391947e-06, "loss": 0.355, "step": 3015 }, { "epoch": 0.10350034317089911, "grad_norm": 0.8070527618305093, "learning_rate": 9.859226530278708e-06, "loss": 0.334, "step": 3016 }, { "epoch": 0.10353466026080989, "grad_norm": 0.8362625922606874, "learning_rate": 9.859095557135389e-06, "loss": 0.4028, "step": 3017 }, { "epoch": 0.10356897735072065, "grad_norm": 0.8598192447844426, "learning_rate": 9.85896452396361e-06, "loss": 0.3905, "step": 3018 }, { "epoch": 0.10360329444063143, "grad_norm": 0.842175186850451, "learning_rate": 9.858833430764988e-06, "loss": 0.3897, "step": 3019 }, { "epoch": 0.10363761153054221, "grad_norm": 0.8712958970660815, "learning_rate": 9.858702277541146e-06, "loss": 0.3673, "step": 3020 }, { "epoch": 0.10367192862045299, "grad_norm": 0.8561233217898143, "learning_rate": 9.8585710642937e-06, "loss": 0.4115, "step": 3021 }, { "epoch": 0.10370624571036376, "grad_norm": 0.8710633610989693, "learning_rate": 9.858439791024275e-06, "loss": 0.4341, "step": 3022 }, { "epoch": 0.10374056280027454, "grad_norm": 0.8384724887019385, "learning_rate": 9.85830845773449e-06, "loss": 0.3977, "step": 3023 }, { "epoch": 0.10377487989018531, "grad_norm": 0.8613558332656617, "learning_rate": 9.858177064425967e-06, "loss": 0.3732, "step": 3024 }, { "epoch": 0.1038091969800961, "grad_norm": 0.7530420914938192, "learning_rate": 9.85804561110033e-06, "loss": 0.3397, "step": 3025 }, { "epoch": 0.10384351407000686, "grad_norm": 0.7648436950566105, "learning_rate": 9.857914097759204e-06, "loss": 0.3808, "step": 3026 }, { "epoch": 0.10387783115991764, "grad_norm": 0.8566009094377162, "learning_rate": 9.857782524404216e-06, "loss": 0.3984, "step": 3027 }, { "epoch": 0.10391214824982842, "grad_norm": 0.7405449513819244, "learning_rate": 9.857650891036985e-06, "loss": 0.3089, "step": 3028 }, { "epoch": 0.1039464653397392, "grad_norm": 0.9299799333715146, "learning_rate": 9.857519197659142e-06, "loss": 0.3769, "step": 3029 }, { "epoch": 0.10398078242964996, "grad_norm": 0.811656553848229, "learning_rate": 9.857387444272312e-06, "loss": 0.322, "step": 3030 }, { "epoch": 0.10401509951956074, "grad_norm": 0.8225866991763965, "learning_rate": 9.857255630878125e-06, "loss": 0.3698, "step": 3031 }, { "epoch": 0.10404941660947152, "grad_norm": 0.878055192069008, "learning_rate": 9.857123757478206e-06, "loss": 0.3666, "step": 3032 }, { "epoch": 0.10408373369938229, "grad_norm": 0.8737466321964391, "learning_rate": 9.856991824074186e-06, "loss": 0.3926, "step": 3033 }, { "epoch": 0.10411805078929307, "grad_norm": 0.9295606569643219, "learning_rate": 9.856859830667696e-06, "loss": 0.4051, "step": 3034 }, { "epoch": 0.10415236787920384, "grad_norm": 0.8107688744419022, "learning_rate": 9.856727777260364e-06, "loss": 0.3024, "step": 3035 }, { "epoch": 0.10418668496911462, "grad_norm": 0.8200198356396936, "learning_rate": 9.856595663853823e-06, "loss": 0.39, "step": 3036 }, { "epoch": 0.10422100205902539, "grad_norm": 0.9022542895267867, "learning_rate": 9.856463490449707e-06, "loss": 0.3713, "step": 3037 }, { "epoch": 0.10425531914893617, "grad_norm": 0.8704497718174372, "learning_rate": 9.856331257049643e-06, "loss": 0.3849, "step": 3038 }, { "epoch": 0.10428963623884695, "grad_norm": 0.8858701096877876, "learning_rate": 9.856198963655271e-06, "loss": 0.3628, "step": 3039 }, { "epoch": 0.10432395332875773, "grad_norm": 0.8660551303749399, "learning_rate": 9.856066610268222e-06, "loss": 0.392, "step": 3040 }, { "epoch": 0.10435827041866849, "grad_norm": 0.8766393335443949, "learning_rate": 9.855934196890132e-06, "loss": 0.3766, "step": 3041 }, { "epoch": 0.10439258750857927, "grad_norm": 0.7821619269049094, "learning_rate": 9.855801723522634e-06, "loss": 0.2895, "step": 3042 }, { "epoch": 0.10442690459849005, "grad_norm": 0.9442132493568278, "learning_rate": 9.85566919016737e-06, "loss": 0.3462, "step": 3043 }, { "epoch": 0.10446122168840083, "grad_norm": 0.851645933851089, "learning_rate": 9.855536596825972e-06, "loss": 0.3497, "step": 3044 }, { "epoch": 0.1044955387783116, "grad_norm": 0.8138700989443518, "learning_rate": 9.855403943500082e-06, "loss": 0.3768, "step": 3045 }, { "epoch": 0.10452985586822237, "grad_norm": 0.8579511266789224, "learning_rate": 9.855271230191337e-06, "loss": 0.4143, "step": 3046 }, { "epoch": 0.10456417295813315, "grad_norm": 0.798358393489065, "learning_rate": 9.855138456901375e-06, "loss": 0.3454, "step": 3047 }, { "epoch": 0.10459849004804393, "grad_norm": 0.8130060142004267, "learning_rate": 9.85500562363184e-06, "loss": 0.2847, "step": 3048 }, { "epoch": 0.1046328071379547, "grad_norm": 0.8310879010890103, "learning_rate": 9.85487273038437e-06, "loss": 0.419, "step": 3049 }, { "epoch": 0.10466712422786548, "grad_norm": 0.8644313786392752, "learning_rate": 9.854739777160607e-06, "loss": 0.3296, "step": 3050 }, { "epoch": 0.10470144131777626, "grad_norm": 0.8179121930314397, "learning_rate": 9.854606763962195e-06, "loss": 0.372, "step": 3051 }, { "epoch": 0.10473575840768702, "grad_norm": 1.3748488911753083, "learning_rate": 9.854473690790776e-06, "loss": 0.3691, "step": 3052 }, { "epoch": 0.1047700754975978, "grad_norm": 0.8513990077134144, "learning_rate": 9.854340557647994e-06, "loss": 0.3865, "step": 3053 }, { "epoch": 0.10480439258750858, "grad_norm": 0.8350091546847057, "learning_rate": 9.854207364535495e-06, "loss": 0.3292, "step": 3054 }, { "epoch": 0.10483870967741936, "grad_norm": 0.8606082373839066, "learning_rate": 9.854074111454923e-06, "loss": 0.3341, "step": 3055 }, { "epoch": 0.10487302676733012, "grad_norm": 0.7893090132994366, "learning_rate": 9.853940798407923e-06, "loss": 0.3564, "step": 3056 }, { "epoch": 0.1049073438572409, "grad_norm": 0.7978382549597173, "learning_rate": 9.853807425396146e-06, "loss": 0.3521, "step": 3057 }, { "epoch": 0.10494166094715168, "grad_norm": 0.8093412586699855, "learning_rate": 9.853673992421236e-06, "loss": 0.3501, "step": 3058 }, { "epoch": 0.10497597803706246, "grad_norm": 0.97364260644155, "learning_rate": 9.853540499484844e-06, "loss": 0.3587, "step": 3059 }, { "epoch": 0.10501029512697323, "grad_norm": 0.7945270112700079, "learning_rate": 9.853406946588617e-06, "loss": 0.3628, "step": 3060 }, { "epoch": 0.105044612216884, "grad_norm": 0.7545022876779938, "learning_rate": 9.853273333734206e-06, "loss": 0.3458, "step": 3061 }, { "epoch": 0.10507892930679479, "grad_norm": 0.8468055927126233, "learning_rate": 9.85313966092326e-06, "loss": 0.3863, "step": 3062 }, { "epoch": 0.10511324639670556, "grad_norm": 0.8666282220626178, "learning_rate": 9.853005928157434e-06, "loss": 0.3885, "step": 3063 }, { "epoch": 0.10514756348661633, "grad_norm": 0.960976200642727, "learning_rate": 9.852872135438375e-06, "loss": 0.3421, "step": 3064 }, { "epoch": 0.10518188057652711, "grad_norm": 0.8575457532735163, "learning_rate": 9.85273828276774e-06, "loss": 0.4158, "step": 3065 }, { "epoch": 0.10521619766643789, "grad_norm": 0.8055811485813626, "learning_rate": 9.852604370147181e-06, "loss": 0.3536, "step": 3066 }, { "epoch": 0.10525051475634867, "grad_norm": 0.7943691881795102, "learning_rate": 9.852470397578353e-06, "loss": 0.3567, "step": 3067 }, { "epoch": 0.10528483184625943, "grad_norm": 0.8985649896358848, "learning_rate": 9.852336365062912e-06, "loss": 0.361, "step": 3068 }, { "epoch": 0.10531914893617021, "grad_norm": 0.9222863128436573, "learning_rate": 9.85220227260251e-06, "loss": 0.3592, "step": 3069 }, { "epoch": 0.10535346602608099, "grad_norm": 0.9080513769733659, "learning_rate": 9.852068120198806e-06, "loss": 0.3268, "step": 3070 }, { "epoch": 0.10538778311599177, "grad_norm": 0.889938965225396, "learning_rate": 9.851933907853458e-06, "loss": 0.344, "step": 3071 }, { "epoch": 0.10542210020590254, "grad_norm": 0.8497022673178297, "learning_rate": 9.851799635568124e-06, "loss": 0.3568, "step": 3072 }, { "epoch": 0.10545641729581332, "grad_norm": 0.7799327995518923, "learning_rate": 9.851665303344464e-06, "loss": 0.3407, "step": 3073 }, { "epoch": 0.1054907343857241, "grad_norm": 0.8873328472285504, "learning_rate": 9.851530911184134e-06, "loss": 0.3364, "step": 3074 }, { "epoch": 0.10552505147563486, "grad_norm": 0.8747031931262927, "learning_rate": 9.851396459088794e-06, "loss": 0.3994, "step": 3075 }, { "epoch": 0.10555936856554564, "grad_norm": 0.8342566708439844, "learning_rate": 9.851261947060108e-06, "loss": 0.3404, "step": 3076 }, { "epoch": 0.10559368565545642, "grad_norm": 0.860377963695574, "learning_rate": 9.851127375099738e-06, "loss": 0.338, "step": 3077 }, { "epoch": 0.1056280027453672, "grad_norm": 0.8738786130137317, "learning_rate": 9.850992743209345e-06, "loss": 0.3809, "step": 3078 }, { "epoch": 0.10566231983527796, "grad_norm": 0.7623955051076534, "learning_rate": 9.850858051390591e-06, "loss": 0.3668, "step": 3079 }, { "epoch": 0.10569663692518874, "grad_norm": 0.8516444101056876, "learning_rate": 9.850723299645142e-06, "loss": 0.3703, "step": 3080 }, { "epoch": 0.10573095401509952, "grad_norm": 0.8334118975306251, "learning_rate": 9.850588487974662e-06, "loss": 0.3853, "step": 3081 }, { "epoch": 0.1057652711050103, "grad_norm": 0.8396788951915493, "learning_rate": 9.850453616380817e-06, "loss": 0.3279, "step": 3082 }, { "epoch": 0.10579958819492107, "grad_norm": 0.8482707521303604, "learning_rate": 9.850318684865273e-06, "loss": 0.39, "step": 3083 }, { "epoch": 0.10583390528483184, "grad_norm": 0.8362453044028811, "learning_rate": 9.850183693429693e-06, "loss": 0.3206, "step": 3084 }, { "epoch": 0.10586822237474262, "grad_norm": 0.8322225469179741, "learning_rate": 9.850048642075752e-06, "loss": 0.3673, "step": 3085 }, { "epoch": 0.1059025394646534, "grad_norm": 0.8687571846197014, "learning_rate": 9.849913530805113e-06, "loss": 0.3672, "step": 3086 }, { "epoch": 0.10593685655456417, "grad_norm": 0.9584022212394884, "learning_rate": 9.849778359619449e-06, "loss": 0.3624, "step": 3087 }, { "epoch": 0.10597117364447495, "grad_norm": 0.9130955797946186, "learning_rate": 9.849643128520425e-06, "loss": 0.3391, "step": 3088 }, { "epoch": 0.10600549073438573, "grad_norm": 0.8628706005570751, "learning_rate": 9.849507837509715e-06, "loss": 0.3514, "step": 3089 }, { "epoch": 0.1060398078242965, "grad_norm": 0.7840533419131998, "learning_rate": 9.84937248658899e-06, "loss": 0.352, "step": 3090 }, { "epoch": 0.10607412491420727, "grad_norm": 0.8107385153771935, "learning_rate": 9.849237075759923e-06, "loss": 0.3695, "step": 3091 }, { "epoch": 0.10610844200411805, "grad_norm": 0.9618253226349421, "learning_rate": 9.849101605024182e-06, "loss": 0.3998, "step": 3092 }, { "epoch": 0.10614275909402883, "grad_norm": 0.7866316772979359, "learning_rate": 9.848966074383449e-06, "loss": 0.3616, "step": 3093 }, { "epoch": 0.10617707618393961, "grad_norm": 0.8990793198700768, "learning_rate": 9.84883048383939e-06, "loss": 0.377, "step": 3094 }, { "epoch": 0.10621139327385037, "grad_norm": 0.7916160232854066, "learning_rate": 9.848694833393684e-06, "loss": 0.3512, "step": 3095 }, { "epoch": 0.10624571036376115, "grad_norm": 0.8380447301252253, "learning_rate": 9.848559123048007e-06, "loss": 0.3561, "step": 3096 }, { "epoch": 0.10628002745367193, "grad_norm": 0.8185210337759722, "learning_rate": 9.848423352804035e-06, "loss": 0.3252, "step": 3097 }, { "epoch": 0.1063143445435827, "grad_norm": 0.8096684437833157, "learning_rate": 9.848287522663445e-06, "loss": 0.3705, "step": 3098 }, { "epoch": 0.10634866163349348, "grad_norm": 0.8409578597165619, "learning_rate": 9.848151632627916e-06, "loss": 0.4168, "step": 3099 }, { "epoch": 0.10638297872340426, "grad_norm": 0.8930391024154394, "learning_rate": 9.848015682699125e-06, "loss": 0.3344, "step": 3100 }, { "epoch": 0.10641729581331504, "grad_norm": 0.8206493824656259, "learning_rate": 9.847879672878754e-06, "loss": 0.3137, "step": 3101 }, { "epoch": 0.1064516129032258, "grad_norm": 0.7627349133484526, "learning_rate": 9.847743603168479e-06, "loss": 0.3138, "step": 3102 }, { "epoch": 0.10648592999313658, "grad_norm": 0.9538190590506117, "learning_rate": 9.847607473569984e-06, "loss": 0.3156, "step": 3103 }, { "epoch": 0.10652024708304736, "grad_norm": 0.8551660871007145, "learning_rate": 9.847471284084953e-06, "loss": 0.4072, "step": 3104 }, { "epoch": 0.10655456417295814, "grad_norm": 0.8748874727773372, "learning_rate": 9.847335034715065e-06, "loss": 0.3483, "step": 3105 }, { "epoch": 0.1065888812628689, "grad_norm": 0.863086916051977, "learning_rate": 9.847198725462002e-06, "loss": 0.39, "step": 3106 }, { "epoch": 0.10662319835277968, "grad_norm": 0.8466131995172542, "learning_rate": 9.84706235632745e-06, "loss": 0.4287, "step": 3107 }, { "epoch": 0.10665751544269046, "grad_norm": 0.846665775374712, "learning_rate": 9.846925927313098e-06, "loss": 0.3529, "step": 3108 }, { "epoch": 0.10669183253260124, "grad_norm": 0.8061513532901586, "learning_rate": 9.846789438420623e-06, "loss": 0.3784, "step": 3109 }, { "epoch": 0.106726149622512, "grad_norm": 0.9888204450326454, "learning_rate": 9.846652889651717e-06, "loss": 0.3454, "step": 3110 }, { "epoch": 0.10676046671242279, "grad_norm": 0.7506091314466498, "learning_rate": 9.846516281008064e-06, "loss": 0.3509, "step": 3111 }, { "epoch": 0.10679478380233356, "grad_norm": 0.8567741884327057, "learning_rate": 9.846379612491354e-06, "loss": 0.4102, "step": 3112 }, { "epoch": 0.10682910089224434, "grad_norm": 0.7880364324984408, "learning_rate": 9.846242884103274e-06, "loss": 0.3265, "step": 3113 }, { "epoch": 0.10686341798215511, "grad_norm": 0.9040787766291396, "learning_rate": 9.846106095845514e-06, "loss": 0.3706, "step": 3114 }, { "epoch": 0.10689773507206589, "grad_norm": 0.8498033362928246, "learning_rate": 9.845969247719761e-06, "loss": 0.447, "step": 3115 }, { "epoch": 0.10693205216197667, "grad_norm": 0.7794185859534867, "learning_rate": 9.84583233972771e-06, "loss": 0.3542, "step": 3116 }, { "epoch": 0.10696636925188743, "grad_norm": 0.8757166234116035, "learning_rate": 9.84569537187105e-06, "loss": 0.4175, "step": 3117 }, { "epoch": 0.10700068634179821, "grad_norm": 0.8465536520939753, "learning_rate": 9.845558344151473e-06, "loss": 0.3525, "step": 3118 }, { "epoch": 0.10703500343170899, "grad_norm": 0.8640226290236539, "learning_rate": 9.84542125657067e-06, "loss": 0.3653, "step": 3119 }, { "epoch": 0.10706932052161977, "grad_norm": 0.8173829895173401, "learning_rate": 9.84528410913034e-06, "loss": 0.3625, "step": 3120 }, { "epoch": 0.10710363761153054, "grad_norm": 0.8239297415147654, "learning_rate": 9.845146901832172e-06, "loss": 0.3617, "step": 3121 }, { "epoch": 0.10713795470144132, "grad_norm": 0.8819751560648756, "learning_rate": 9.845009634677862e-06, "loss": 0.3676, "step": 3122 }, { "epoch": 0.1071722717913521, "grad_norm": 0.9016661838730129, "learning_rate": 9.844872307669107e-06, "loss": 0.444, "step": 3123 }, { "epoch": 0.10720658888126287, "grad_norm": 0.8539364133757541, "learning_rate": 9.844734920807605e-06, "loss": 0.3332, "step": 3124 }, { "epoch": 0.10724090597117364, "grad_norm": 0.8506532282698986, "learning_rate": 9.84459747409505e-06, "loss": 0.3331, "step": 3125 }, { "epoch": 0.10727522306108442, "grad_norm": 0.8337803091314117, "learning_rate": 9.844459967533142e-06, "loss": 0.313, "step": 3126 }, { "epoch": 0.1073095401509952, "grad_norm": 0.7502694392006777, "learning_rate": 9.844322401123579e-06, "loss": 0.3395, "step": 3127 }, { "epoch": 0.10734385724090598, "grad_norm": 0.9327746514848954, "learning_rate": 9.84418477486806e-06, "loss": 0.3454, "step": 3128 }, { "epoch": 0.10737817433081674, "grad_norm": 0.8426668449916132, "learning_rate": 9.844047088768287e-06, "loss": 0.3838, "step": 3129 }, { "epoch": 0.10741249142072752, "grad_norm": 0.9033073888015414, "learning_rate": 9.843909342825959e-06, "loss": 0.4075, "step": 3130 }, { "epoch": 0.1074468085106383, "grad_norm": 0.8066546479844963, "learning_rate": 9.84377153704278e-06, "loss": 0.35, "step": 3131 }, { "epoch": 0.10748112560054908, "grad_norm": 0.8303866997532171, "learning_rate": 9.84363367142045e-06, "loss": 0.3908, "step": 3132 }, { "epoch": 0.10751544269045984, "grad_norm": 0.846192816964086, "learning_rate": 9.843495745960673e-06, "loss": 0.4019, "step": 3133 }, { "epoch": 0.10754975978037062, "grad_norm": 0.8106913743581903, "learning_rate": 9.843357760665154e-06, "loss": 0.4045, "step": 3134 }, { "epoch": 0.1075840768702814, "grad_norm": 0.8250469878349157, "learning_rate": 9.843219715535595e-06, "loss": 0.3183, "step": 3135 }, { "epoch": 0.10761839396019218, "grad_norm": 0.8397443214176133, "learning_rate": 9.843081610573704e-06, "loss": 0.3611, "step": 3136 }, { "epoch": 0.10765271105010295, "grad_norm": 1.028401584535172, "learning_rate": 9.842943445781186e-06, "loss": 0.3552, "step": 3137 }, { "epoch": 0.10768702814001373, "grad_norm": 0.7855551233741441, "learning_rate": 9.842805221159747e-06, "loss": 0.3674, "step": 3138 }, { "epoch": 0.1077213452299245, "grad_norm": 0.8182524117745961, "learning_rate": 9.842666936711095e-06, "loss": 0.3556, "step": 3139 }, { "epoch": 0.10775566231983527, "grad_norm": 0.7983829030585571, "learning_rate": 9.842528592436942e-06, "loss": 0.3697, "step": 3140 }, { "epoch": 0.10778997940974605, "grad_norm": 0.882434869816229, "learning_rate": 9.842390188338992e-06, "loss": 0.3611, "step": 3141 }, { "epoch": 0.10782429649965683, "grad_norm": 0.8207861239907397, "learning_rate": 9.842251724418958e-06, "loss": 0.3443, "step": 3142 }, { "epoch": 0.10785861358956761, "grad_norm": 0.8440162525736215, "learning_rate": 9.842113200678549e-06, "loss": 0.3375, "step": 3143 }, { "epoch": 0.10789293067947837, "grad_norm": 0.9222249396668741, "learning_rate": 9.841974617119478e-06, "loss": 0.3393, "step": 3144 }, { "epoch": 0.10792724776938915, "grad_norm": 0.8512066577163819, "learning_rate": 9.841835973743454e-06, "loss": 0.3422, "step": 3145 }, { "epoch": 0.10796156485929993, "grad_norm": 0.7927965827218864, "learning_rate": 9.841697270552192e-06, "loss": 0.3646, "step": 3146 }, { "epoch": 0.10799588194921071, "grad_norm": 0.8639217359082604, "learning_rate": 9.841558507547406e-06, "loss": 0.4373, "step": 3147 }, { "epoch": 0.10803019903912148, "grad_norm": 0.827313739766438, "learning_rate": 9.841419684730808e-06, "loss": 0.353, "step": 3148 }, { "epoch": 0.10806451612903226, "grad_norm": 0.9284431049032609, "learning_rate": 9.841280802104116e-06, "loss": 0.4321, "step": 3149 }, { "epoch": 0.10809883321894304, "grad_norm": 0.9049630706890811, "learning_rate": 9.841141859669043e-06, "loss": 0.4029, "step": 3150 }, { "epoch": 0.10813315030885381, "grad_norm": 0.8848508346292406, "learning_rate": 9.841002857427307e-06, "loss": 0.3133, "step": 3151 }, { "epoch": 0.10816746739876458, "grad_norm": 0.8452535260528021, "learning_rate": 9.840863795380624e-06, "loss": 0.3798, "step": 3152 }, { "epoch": 0.10820178448867536, "grad_norm": 0.859268170375599, "learning_rate": 9.840724673530712e-06, "loss": 0.3823, "step": 3153 }, { "epoch": 0.10823610157858614, "grad_norm": 0.8131555911591922, "learning_rate": 9.840585491879292e-06, "loss": 0.3417, "step": 3154 }, { "epoch": 0.10827041866849692, "grad_norm": 0.9940925955098538, "learning_rate": 9.840446250428082e-06, "loss": 0.3872, "step": 3155 }, { "epoch": 0.10830473575840768, "grad_norm": 0.7774722049373632, "learning_rate": 9.840306949178801e-06, "loss": 0.3664, "step": 3156 }, { "epoch": 0.10833905284831846, "grad_norm": 0.7970990894697403, "learning_rate": 9.840167588133172e-06, "loss": 0.3514, "step": 3157 }, { "epoch": 0.10837336993822924, "grad_norm": 0.9303464778668893, "learning_rate": 9.840028167292914e-06, "loss": 0.3895, "step": 3158 }, { "epoch": 0.10840768702814, "grad_norm": 0.8915326736698239, "learning_rate": 9.839888686659752e-06, "loss": 0.3489, "step": 3159 }, { "epoch": 0.10844200411805079, "grad_norm": 0.7793099727580808, "learning_rate": 9.839749146235408e-06, "loss": 0.3219, "step": 3160 }, { "epoch": 0.10847632120796157, "grad_norm": 0.845375551057043, "learning_rate": 9.839609546021605e-06, "loss": 0.3553, "step": 3161 }, { "epoch": 0.10851063829787234, "grad_norm": 0.8346949503888457, "learning_rate": 9.83946988602007e-06, "loss": 0.317, "step": 3162 }, { "epoch": 0.10854495538778311, "grad_norm": 0.8517594670222565, "learning_rate": 9.839330166232526e-06, "loss": 0.3286, "step": 3163 }, { "epoch": 0.10857927247769389, "grad_norm": 0.8496044270640478, "learning_rate": 9.8391903866607e-06, "loss": 0.3067, "step": 3164 }, { "epoch": 0.10861358956760467, "grad_norm": 0.9163249906026586, "learning_rate": 9.839050547306317e-06, "loss": 0.3638, "step": 3165 }, { "epoch": 0.10864790665751545, "grad_norm": 0.8338206232367731, "learning_rate": 9.838910648171108e-06, "loss": 0.3835, "step": 3166 }, { "epoch": 0.10868222374742621, "grad_norm": 1.018132914515668, "learning_rate": 9.8387706892568e-06, "loss": 0.323, "step": 3167 }, { "epoch": 0.10871654083733699, "grad_norm": 0.8585487348336798, "learning_rate": 9.838630670565122e-06, "loss": 0.39, "step": 3168 }, { "epoch": 0.10875085792724777, "grad_norm": 0.7949107817665703, "learning_rate": 9.8384905920978e-06, "loss": 0.3743, "step": 3169 }, { "epoch": 0.10878517501715855, "grad_norm": 0.8992986881229694, "learning_rate": 9.83835045385657e-06, "loss": 0.3862, "step": 3170 }, { "epoch": 0.10881949210706932, "grad_norm": 0.8244767653402731, "learning_rate": 9.838210255843163e-06, "loss": 0.3149, "step": 3171 }, { "epoch": 0.1088538091969801, "grad_norm": 0.8118433760369914, "learning_rate": 9.838069998059308e-06, "loss": 0.3514, "step": 3172 }, { "epoch": 0.10888812628689087, "grad_norm": 0.8956021134513374, "learning_rate": 9.837929680506738e-06, "loss": 0.4053, "step": 3173 }, { "epoch": 0.10892244337680165, "grad_norm": 0.9601304824064908, "learning_rate": 9.837789303187186e-06, "loss": 0.4018, "step": 3174 }, { "epoch": 0.10895676046671242, "grad_norm": 0.7722742272879206, "learning_rate": 9.83764886610239e-06, "loss": 0.3079, "step": 3175 }, { "epoch": 0.1089910775566232, "grad_norm": 0.7728896593784041, "learning_rate": 9.837508369254082e-06, "loss": 0.3602, "step": 3176 }, { "epoch": 0.10902539464653398, "grad_norm": 0.7991180414075584, "learning_rate": 9.837367812643997e-06, "loss": 0.2941, "step": 3177 }, { "epoch": 0.10905971173644476, "grad_norm": 0.8730738213343482, "learning_rate": 9.837227196273873e-06, "loss": 0.4122, "step": 3178 }, { "epoch": 0.10909402882635552, "grad_norm": 0.8320877794815728, "learning_rate": 9.837086520145446e-06, "loss": 0.3773, "step": 3179 }, { "epoch": 0.1091283459162663, "grad_norm": 0.7646221784652161, "learning_rate": 9.836945784260456e-06, "loss": 0.3562, "step": 3180 }, { "epoch": 0.10916266300617708, "grad_norm": 0.8304857767002717, "learning_rate": 9.836804988620638e-06, "loss": 0.3559, "step": 3181 }, { "epoch": 0.10919698009608785, "grad_norm": 0.9544454373110625, "learning_rate": 9.836664133227735e-06, "loss": 0.3321, "step": 3182 }, { "epoch": 0.10923129718599862, "grad_norm": 0.8023484242989966, "learning_rate": 9.836523218083487e-06, "loss": 0.4125, "step": 3183 }, { "epoch": 0.1092656142759094, "grad_norm": 0.7802267248661383, "learning_rate": 9.83638224318963e-06, "loss": 0.3041, "step": 3184 }, { "epoch": 0.10929993136582018, "grad_norm": 0.8949163609482162, "learning_rate": 9.836241208547912e-06, "loss": 0.3563, "step": 3185 }, { "epoch": 0.10933424845573095, "grad_norm": 0.8577673246532036, "learning_rate": 9.836100114160072e-06, "loss": 0.3983, "step": 3186 }, { "epoch": 0.10936856554564173, "grad_norm": 0.8296881405054004, "learning_rate": 9.835958960027854e-06, "loss": 0.3711, "step": 3187 }, { "epoch": 0.1094028826355525, "grad_norm": 0.7735806467672144, "learning_rate": 9.835817746152998e-06, "loss": 0.3458, "step": 3188 }, { "epoch": 0.10943719972546329, "grad_norm": 0.8621378630563961, "learning_rate": 9.835676472537254e-06, "loss": 0.4, "step": 3189 }, { "epoch": 0.10947151681537405, "grad_norm": 0.8197704565155121, "learning_rate": 9.835535139182367e-06, "loss": 0.3835, "step": 3190 }, { "epoch": 0.10950583390528483, "grad_norm": 1.2966994040850115, "learning_rate": 9.835393746090078e-06, "loss": 0.3476, "step": 3191 }, { "epoch": 0.10954015099519561, "grad_norm": 0.8500770140857917, "learning_rate": 9.835252293262138e-06, "loss": 0.3906, "step": 3192 }, { "epoch": 0.10957446808510639, "grad_norm": 0.7184949822156548, "learning_rate": 9.835110780700292e-06, "loss": 0.332, "step": 3193 }, { "epoch": 0.10960878517501715, "grad_norm": 0.9228866438155077, "learning_rate": 9.83496920840629e-06, "loss": 0.3958, "step": 3194 }, { "epoch": 0.10964310226492793, "grad_norm": 0.8198996081980164, "learning_rate": 9.834827576381882e-06, "loss": 0.307, "step": 3195 }, { "epoch": 0.10967741935483871, "grad_norm": 0.8125266864307543, "learning_rate": 9.834685884628814e-06, "loss": 0.4076, "step": 3196 }, { "epoch": 0.10971173644474949, "grad_norm": 0.8889775849940598, "learning_rate": 9.83454413314884e-06, "loss": 0.3408, "step": 3197 }, { "epoch": 0.10974605353466026, "grad_norm": 0.875883599370745, "learning_rate": 9.83440232194371e-06, "loss": 0.4176, "step": 3198 }, { "epoch": 0.10978037062457104, "grad_norm": 0.8661917308579372, "learning_rate": 9.834260451015177e-06, "loss": 0.4175, "step": 3199 }, { "epoch": 0.10981468771448182, "grad_norm": 0.9204486238381352, "learning_rate": 9.83411852036499e-06, "loss": 0.3274, "step": 3200 }, { "epoch": 0.1098490048043926, "grad_norm": 0.8663663634789075, "learning_rate": 9.833976529994904e-06, "loss": 0.3564, "step": 3201 }, { "epoch": 0.10988332189430336, "grad_norm": 0.8766073814695919, "learning_rate": 9.833834479906676e-06, "loss": 0.3596, "step": 3202 }, { "epoch": 0.10991763898421414, "grad_norm": 0.8479878947774454, "learning_rate": 9.833692370102058e-06, "loss": 0.3941, "step": 3203 }, { "epoch": 0.10995195607412492, "grad_norm": 0.7681455331124257, "learning_rate": 9.833550200582804e-06, "loss": 0.3934, "step": 3204 }, { "epoch": 0.10998627316403568, "grad_norm": 0.823066639647452, "learning_rate": 9.833407971350676e-06, "loss": 0.3095, "step": 3205 }, { "epoch": 0.11002059025394646, "grad_norm": 0.8498786832585672, "learning_rate": 9.833265682407427e-06, "loss": 0.3055, "step": 3206 }, { "epoch": 0.11005490734385724, "grad_norm": 1.0567011599851919, "learning_rate": 9.833123333754815e-06, "loss": 0.3361, "step": 3207 }, { "epoch": 0.11008922443376802, "grad_norm": 0.7815910517436794, "learning_rate": 9.832980925394597e-06, "loss": 0.3715, "step": 3208 }, { "epoch": 0.11012354152367879, "grad_norm": 0.8785327565150313, "learning_rate": 9.832838457328537e-06, "loss": 0.3487, "step": 3209 }, { "epoch": 0.11015785861358957, "grad_norm": 0.9051310291055505, "learning_rate": 9.832695929558393e-06, "loss": 0.3659, "step": 3210 }, { "epoch": 0.11019217570350034, "grad_norm": 0.9403521368415109, "learning_rate": 9.832553342085924e-06, "loss": 0.3947, "step": 3211 }, { "epoch": 0.11022649279341112, "grad_norm": 0.7607473198880762, "learning_rate": 9.832410694912892e-06, "loss": 0.33, "step": 3212 }, { "epoch": 0.11026080988332189, "grad_norm": 0.7551908884639968, "learning_rate": 9.83226798804106e-06, "loss": 0.3001, "step": 3213 }, { "epoch": 0.11029512697323267, "grad_norm": 0.901551496890212, "learning_rate": 9.832125221472192e-06, "loss": 0.3666, "step": 3214 }, { "epoch": 0.11032944406314345, "grad_norm": 0.9664179755093218, "learning_rate": 9.83198239520805e-06, "loss": 0.4141, "step": 3215 }, { "epoch": 0.11036376115305423, "grad_norm": 0.8886926265285471, "learning_rate": 9.831839509250399e-06, "loss": 0.3576, "step": 3216 }, { "epoch": 0.11039807824296499, "grad_norm": 0.8463854874271949, "learning_rate": 9.831696563601005e-06, "loss": 0.3759, "step": 3217 }, { "epoch": 0.11043239533287577, "grad_norm": 0.8487660645283377, "learning_rate": 9.831553558261634e-06, "loss": 0.4021, "step": 3218 }, { "epoch": 0.11046671242278655, "grad_norm": 0.9046223446388556, "learning_rate": 9.831410493234047e-06, "loss": 0.3866, "step": 3219 }, { "epoch": 0.11050102951269733, "grad_norm": 0.9176756851635303, "learning_rate": 9.831267368520022e-06, "loss": 0.3507, "step": 3220 }, { "epoch": 0.1105353466026081, "grad_norm": 0.8576644578080286, "learning_rate": 9.831124184121317e-06, "loss": 0.3677, "step": 3221 }, { "epoch": 0.11056966369251887, "grad_norm": 0.8185063991729695, "learning_rate": 9.830980940039708e-06, "loss": 0.3538, "step": 3222 }, { "epoch": 0.11060398078242965, "grad_norm": 0.8436388826341245, "learning_rate": 9.83083763627696e-06, "loss": 0.3211, "step": 3223 }, { "epoch": 0.11063829787234042, "grad_norm": 0.8079034952948527, "learning_rate": 9.830694272834848e-06, "loss": 0.3141, "step": 3224 }, { "epoch": 0.1106726149622512, "grad_norm": 0.8496824882745918, "learning_rate": 9.830550849715138e-06, "loss": 0.3733, "step": 3225 }, { "epoch": 0.11070693205216198, "grad_norm": 0.8229436401782374, "learning_rate": 9.830407366919605e-06, "loss": 0.2901, "step": 3226 }, { "epoch": 0.11074124914207276, "grad_norm": 0.8274451511392135, "learning_rate": 9.83026382445002e-06, "loss": 0.4034, "step": 3227 }, { "epoch": 0.11077556623198352, "grad_norm": 0.9637397118822315, "learning_rate": 9.830120222308157e-06, "loss": 0.3254, "step": 3228 }, { "epoch": 0.1108098833218943, "grad_norm": 0.8997928149721566, "learning_rate": 9.82997656049579e-06, "loss": 0.4899, "step": 3229 }, { "epoch": 0.11084420041180508, "grad_norm": 0.7393155699854794, "learning_rate": 9.829832839014694e-06, "loss": 0.3484, "step": 3230 }, { "epoch": 0.11087851750171586, "grad_norm": 0.8330418783082193, "learning_rate": 9.829689057866645e-06, "loss": 0.3988, "step": 3231 }, { "epoch": 0.11091283459162662, "grad_norm": 0.8452009694602735, "learning_rate": 9.829545217053419e-06, "loss": 0.4143, "step": 3232 }, { "epoch": 0.1109471516815374, "grad_norm": 0.8082040790390829, "learning_rate": 9.829401316576793e-06, "loss": 0.406, "step": 3233 }, { "epoch": 0.11098146877144818, "grad_norm": 0.7723297475919075, "learning_rate": 9.829257356438542e-06, "loss": 0.3486, "step": 3234 }, { "epoch": 0.11101578586135896, "grad_norm": 0.8197261287048735, "learning_rate": 9.829113336640447e-06, "loss": 0.3298, "step": 3235 }, { "epoch": 0.11105010295126973, "grad_norm": 0.8848973230798786, "learning_rate": 9.828969257184287e-06, "loss": 0.3581, "step": 3236 }, { "epoch": 0.1110844200411805, "grad_norm": 0.8157666867628892, "learning_rate": 9.828825118071842e-06, "loss": 0.3658, "step": 3237 }, { "epoch": 0.11111873713109129, "grad_norm": 0.945019569844319, "learning_rate": 9.828680919304893e-06, "loss": 0.3879, "step": 3238 }, { "epoch": 0.11115305422100207, "grad_norm": 0.7639381283591963, "learning_rate": 9.828536660885222e-06, "loss": 0.3495, "step": 3239 }, { "epoch": 0.11118737131091283, "grad_norm": 0.8118464743859095, "learning_rate": 9.828392342814609e-06, "loss": 0.3561, "step": 3240 }, { "epoch": 0.11122168840082361, "grad_norm": 0.960868726373882, "learning_rate": 9.828247965094838e-06, "loss": 0.3411, "step": 3241 }, { "epoch": 0.11125600549073439, "grad_norm": 0.7803175808256573, "learning_rate": 9.828103527727693e-06, "loss": 0.2857, "step": 3242 }, { "epoch": 0.11129032258064517, "grad_norm": 0.9325609265807864, "learning_rate": 9.827959030714957e-06, "loss": 0.4175, "step": 3243 }, { "epoch": 0.11132463967055593, "grad_norm": 0.9166248817436596, "learning_rate": 9.827814474058416e-06, "loss": 0.3733, "step": 3244 }, { "epoch": 0.11135895676046671, "grad_norm": 0.9045273897444355, "learning_rate": 9.827669857759857e-06, "loss": 0.355, "step": 3245 }, { "epoch": 0.11139327385037749, "grad_norm": 0.8363230531971086, "learning_rate": 9.827525181821066e-06, "loss": 0.3638, "step": 3246 }, { "epoch": 0.11142759094028826, "grad_norm": 0.7346452159081971, "learning_rate": 9.827380446243827e-06, "loss": 0.3249, "step": 3247 }, { "epoch": 0.11146190803019904, "grad_norm": 0.9084328239780343, "learning_rate": 9.827235651029932e-06, "loss": 0.3767, "step": 3248 }, { "epoch": 0.11149622512010982, "grad_norm": 0.809447916186476, "learning_rate": 9.827090796181169e-06, "loss": 0.3835, "step": 3249 }, { "epoch": 0.1115305422100206, "grad_norm": 0.724048343103065, "learning_rate": 9.826945881699326e-06, "loss": 0.3287, "step": 3250 }, { "epoch": 0.11156485929993136, "grad_norm": 0.8443483428255274, "learning_rate": 9.826800907586196e-06, "loss": 0.3976, "step": 3251 }, { "epoch": 0.11159917638984214, "grad_norm": 0.7895512586092446, "learning_rate": 9.826655873843567e-06, "loss": 0.398, "step": 3252 }, { "epoch": 0.11163349347975292, "grad_norm": 0.8781115620117496, "learning_rate": 9.826510780473233e-06, "loss": 0.4286, "step": 3253 }, { "epoch": 0.1116678105696637, "grad_norm": 0.9124374993787476, "learning_rate": 9.826365627476985e-06, "loss": 0.3549, "step": 3254 }, { "epoch": 0.11170212765957446, "grad_norm": 0.799198796864396, "learning_rate": 9.826220414856616e-06, "loss": 0.3386, "step": 3255 }, { "epoch": 0.11173644474948524, "grad_norm": 0.9152535853035302, "learning_rate": 9.826075142613922e-06, "loss": 0.3845, "step": 3256 }, { "epoch": 0.11177076183939602, "grad_norm": 0.8898028336095081, "learning_rate": 9.825929810750695e-06, "loss": 0.3856, "step": 3257 }, { "epoch": 0.1118050789293068, "grad_norm": 0.9284202425080348, "learning_rate": 9.825784419268732e-06, "loss": 0.4172, "step": 3258 }, { "epoch": 0.11183939601921757, "grad_norm": 0.7820058433946185, "learning_rate": 9.825638968169828e-06, "loss": 0.3389, "step": 3259 }, { "epoch": 0.11187371310912834, "grad_norm": 0.9113833705880052, "learning_rate": 9.825493457455783e-06, "loss": 0.4265, "step": 3260 }, { "epoch": 0.11190803019903912, "grad_norm": 0.9121702930242267, "learning_rate": 9.825347887128391e-06, "loss": 0.4005, "step": 3261 }, { "epoch": 0.1119423472889499, "grad_norm": 0.9070868491521799, "learning_rate": 9.825202257189451e-06, "loss": 0.3968, "step": 3262 }, { "epoch": 0.11197666437886067, "grad_norm": 0.9239880893005664, "learning_rate": 9.825056567640764e-06, "loss": 0.3658, "step": 3263 }, { "epoch": 0.11201098146877145, "grad_norm": 0.7572459085768087, "learning_rate": 9.824910818484127e-06, "loss": 0.3422, "step": 3264 }, { "epoch": 0.11204529855868223, "grad_norm": 0.8127061905793209, "learning_rate": 9.824765009721345e-06, "loss": 0.3597, "step": 3265 }, { "epoch": 0.112079615648593, "grad_norm": 0.9106364384333776, "learning_rate": 9.824619141354213e-06, "loss": 0.3551, "step": 3266 }, { "epoch": 0.11211393273850377, "grad_norm": 0.811626799864334, "learning_rate": 9.824473213384538e-06, "loss": 0.3553, "step": 3267 }, { "epoch": 0.11214824982841455, "grad_norm": 0.8239099428253472, "learning_rate": 9.82432722581412e-06, "loss": 0.347, "step": 3268 }, { "epoch": 0.11218256691832533, "grad_norm": 0.8486653888294798, "learning_rate": 9.824181178644767e-06, "loss": 0.4031, "step": 3269 }, { "epoch": 0.1122168840082361, "grad_norm": 0.8768230458806162, "learning_rate": 9.824035071878278e-06, "loss": 0.3154, "step": 3270 }, { "epoch": 0.11225120109814687, "grad_norm": 0.8410043963942204, "learning_rate": 9.82388890551646e-06, "loss": 0.3711, "step": 3271 }, { "epoch": 0.11228551818805765, "grad_norm": 0.8593448911531246, "learning_rate": 9.823742679561119e-06, "loss": 0.3725, "step": 3272 }, { "epoch": 0.11231983527796843, "grad_norm": 0.9396508362851289, "learning_rate": 9.82359639401406e-06, "loss": 0.3725, "step": 3273 }, { "epoch": 0.1123541523678792, "grad_norm": 0.8061811876321361, "learning_rate": 9.823450048877093e-06, "loss": 0.3301, "step": 3274 }, { "epoch": 0.11238846945778998, "grad_norm": 0.8876506200810048, "learning_rate": 9.823303644152022e-06, "loss": 0.3885, "step": 3275 }, { "epoch": 0.11242278654770076, "grad_norm": 0.8359811231054582, "learning_rate": 9.82315717984066e-06, "loss": 0.3567, "step": 3276 }, { "epoch": 0.11245710363761154, "grad_norm": 0.7946245757361555, "learning_rate": 9.823010655944813e-06, "loss": 0.3453, "step": 3277 }, { "epoch": 0.1124914207275223, "grad_norm": 0.9827181566578224, "learning_rate": 9.822864072466293e-06, "loss": 0.4484, "step": 3278 }, { "epoch": 0.11252573781743308, "grad_norm": 0.84712455457476, "learning_rate": 9.82271742940691e-06, "loss": 0.3418, "step": 3279 }, { "epoch": 0.11256005490734386, "grad_norm": 0.9085845232484665, "learning_rate": 9.822570726768476e-06, "loss": 0.3578, "step": 3280 }, { "epoch": 0.11259437199725464, "grad_norm": 0.7995974782884908, "learning_rate": 9.822423964552804e-06, "loss": 0.3399, "step": 3281 }, { "epoch": 0.1126286890871654, "grad_norm": 0.919533499761625, "learning_rate": 9.822277142761706e-06, "loss": 0.34, "step": 3282 }, { "epoch": 0.11266300617707618, "grad_norm": 0.8429779191347073, "learning_rate": 9.822130261396996e-06, "loss": 0.4136, "step": 3283 }, { "epoch": 0.11269732326698696, "grad_norm": 0.9122497137627564, "learning_rate": 9.82198332046049e-06, "loss": 0.3579, "step": 3284 }, { "epoch": 0.11273164035689774, "grad_norm": 0.8818286633196356, "learning_rate": 9.821836319953999e-06, "loss": 0.3503, "step": 3285 }, { "epoch": 0.1127659574468085, "grad_norm": 0.8081410816757331, "learning_rate": 9.821689259879343e-06, "loss": 0.3646, "step": 3286 }, { "epoch": 0.11280027453671929, "grad_norm": 0.8663374697102735, "learning_rate": 9.821542140238338e-06, "loss": 0.4332, "step": 3287 }, { "epoch": 0.11283459162663007, "grad_norm": 0.9764499359850255, "learning_rate": 9.821394961032802e-06, "loss": 0.3637, "step": 3288 }, { "epoch": 0.11286890871654083, "grad_norm": 0.9122125901656387, "learning_rate": 9.821247722264552e-06, "loss": 0.3344, "step": 3289 }, { "epoch": 0.11290322580645161, "grad_norm": 0.8937896931861005, "learning_rate": 9.821100423935408e-06, "loss": 0.432, "step": 3290 }, { "epoch": 0.11293754289636239, "grad_norm": 0.9014637536632589, "learning_rate": 9.820953066047187e-06, "loss": 0.3127, "step": 3291 }, { "epoch": 0.11297185998627317, "grad_norm": 0.8707013406843914, "learning_rate": 9.820805648601713e-06, "loss": 0.3858, "step": 3292 }, { "epoch": 0.11300617707618393, "grad_norm": 0.8077922777834703, "learning_rate": 9.820658171600807e-06, "loss": 0.3705, "step": 3293 }, { "epoch": 0.11304049416609471, "grad_norm": 0.7792228442480691, "learning_rate": 9.820510635046287e-06, "loss": 0.294, "step": 3294 }, { "epoch": 0.11307481125600549, "grad_norm": 0.8132908679724535, "learning_rate": 9.820363038939979e-06, "loss": 0.3535, "step": 3295 }, { "epoch": 0.11310912834591627, "grad_norm": 0.8288074988929097, "learning_rate": 9.820215383283707e-06, "loss": 0.3429, "step": 3296 }, { "epoch": 0.11314344543582704, "grad_norm": 0.8930108177769132, "learning_rate": 9.820067668079294e-06, "loss": 0.3296, "step": 3297 }, { "epoch": 0.11317776252573782, "grad_norm": 0.8746793861803793, "learning_rate": 9.819919893328564e-06, "loss": 0.3428, "step": 3298 }, { "epoch": 0.1132120796156486, "grad_norm": 0.8363569873013431, "learning_rate": 9.819772059033342e-06, "loss": 0.3584, "step": 3299 }, { "epoch": 0.11324639670555937, "grad_norm": 0.9283491169923725, "learning_rate": 9.819624165195456e-06, "loss": 0.3934, "step": 3300 }, { "epoch": 0.11328071379547014, "grad_norm": 0.8504890010726729, "learning_rate": 9.819476211816734e-06, "loss": 0.3652, "step": 3301 }, { "epoch": 0.11331503088538092, "grad_norm": 0.8658408656334756, "learning_rate": 9.819328198899e-06, "loss": 0.392, "step": 3302 }, { "epoch": 0.1133493479752917, "grad_norm": 0.8268875904355587, "learning_rate": 9.819180126444088e-06, "loss": 0.3507, "step": 3303 }, { "epoch": 0.11338366506520248, "grad_norm": 0.8986361408717024, "learning_rate": 9.819031994453821e-06, "loss": 0.4064, "step": 3304 }, { "epoch": 0.11341798215511324, "grad_norm": 0.8146072997653493, "learning_rate": 9.818883802930034e-06, "loss": 0.3161, "step": 3305 }, { "epoch": 0.11345229924502402, "grad_norm": 0.9689341294923195, "learning_rate": 9.818735551874556e-06, "loss": 0.371, "step": 3306 }, { "epoch": 0.1134866163349348, "grad_norm": 0.9754202838625722, "learning_rate": 9.818587241289216e-06, "loss": 0.3543, "step": 3307 }, { "epoch": 0.11352093342484558, "grad_norm": 0.8182188495749493, "learning_rate": 9.81843887117585e-06, "loss": 0.3616, "step": 3308 }, { "epoch": 0.11355525051475635, "grad_norm": 0.8368186654851817, "learning_rate": 9.818290441536291e-06, "loss": 0.377, "step": 3309 }, { "epoch": 0.11358956760466712, "grad_norm": 0.9932906112019612, "learning_rate": 9.81814195237237e-06, "loss": 0.3355, "step": 3310 }, { "epoch": 0.1136238846945779, "grad_norm": 0.7351105060788333, "learning_rate": 9.817993403685924e-06, "loss": 0.33, "step": 3311 }, { "epoch": 0.11365820178448867, "grad_norm": 0.8666195422670502, "learning_rate": 9.817844795478783e-06, "loss": 0.2977, "step": 3312 }, { "epoch": 0.11369251887439945, "grad_norm": 0.9108477455635606, "learning_rate": 9.81769612775279e-06, "loss": 0.3763, "step": 3313 }, { "epoch": 0.11372683596431023, "grad_norm": 0.7673391373237632, "learning_rate": 9.817547400509777e-06, "loss": 0.3563, "step": 3314 }, { "epoch": 0.113761153054221, "grad_norm": 0.8506741549743262, "learning_rate": 9.817398613751583e-06, "loss": 0.3509, "step": 3315 }, { "epoch": 0.11379547014413177, "grad_norm": 0.7831694659100975, "learning_rate": 9.817249767480045e-06, "loss": 0.3989, "step": 3316 }, { "epoch": 0.11382978723404255, "grad_norm": 0.8792804926305796, "learning_rate": 9.817100861697005e-06, "loss": 0.4067, "step": 3317 }, { "epoch": 0.11386410432395333, "grad_norm": 0.844467753803815, "learning_rate": 9.816951896404296e-06, "loss": 0.3647, "step": 3318 }, { "epoch": 0.11389842141386411, "grad_norm": 0.7129247056370369, "learning_rate": 9.816802871603765e-06, "loss": 0.3295, "step": 3319 }, { "epoch": 0.11393273850377487, "grad_norm": 0.8690141566965038, "learning_rate": 9.81665378729725e-06, "loss": 0.3347, "step": 3320 }, { "epoch": 0.11396705559368565, "grad_norm": 0.8883197971296768, "learning_rate": 9.816504643486592e-06, "loss": 0.3962, "step": 3321 }, { "epoch": 0.11400137268359643, "grad_norm": 0.9078249961070074, "learning_rate": 9.816355440173636e-06, "loss": 0.3235, "step": 3322 }, { "epoch": 0.11403568977350721, "grad_norm": 0.6839530054903146, "learning_rate": 9.816206177360223e-06, "loss": 0.304, "step": 3323 }, { "epoch": 0.11407000686341798, "grad_norm": 1.0431404026855782, "learning_rate": 9.816056855048199e-06, "loss": 0.3741, "step": 3324 }, { "epoch": 0.11410432395332876, "grad_norm": 0.7554537989350636, "learning_rate": 9.815907473239406e-06, "loss": 0.3308, "step": 3325 }, { "epoch": 0.11413864104323954, "grad_norm": 0.8654679904243269, "learning_rate": 9.815758031935692e-06, "loss": 0.3655, "step": 3326 }, { "epoch": 0.11417295813315032, "grad_norm": 0.8082718097660411, "learning_rate": 9.815608531138901e-06, "loss": 0.3345, "step": 3327 }, { "epoch": 0.11420727522306108, "grad_norm": 0.8291785449622703, "learning_rate": 9.815458970850881e-06, "loss": 0.3209, "step": 3328 }, { "epoch": 0.11424159231297186, "grad_norm": 0.8433628847428353, "learning_rate": 9.815309351073481e-06, "loss": 0.3879, "step": 3329 }, { "epoch": 0.11427590940288264, "grad_norm": 0.8796749188589652, "learning_rate": 9.81515967180855e-06, "loss": 0.4689, "step": 3330 }, { "epoch": 0.1143102264927934, "grad_norm": 0.9778803684695443, "learning_rate": 9.815009933057932e-06, "loss": 0.3453, "step": 3331 }, { "epoch": 0.11434454358270418, "grad_norm": 0.8952792156952304, "learning_rate": 9.814860134823481e-06, "loss": 0.3653, "step": 3332 }, { "epoch": 0.11437886067261496, "grad_norm": 0.8651971206171813, "learning_rate": 9.814710277107044e-06, "loss": 0.3737, "step": 3333 }, { "epoch": 0.11441317776252574, "grad_norm": 0.8369972701741676, "learning_rate": 9.814560359910479e-06, "loss": 0.3437, "step": 3334 }, { "epoch": 0.11444749485243651, "grad_norm": 0.8119281314775901, "learning_rate": 9.814410383235632e-06, "loss": 0.37, "step": 3335 }, { "epoch": 0.11448181194234729, "grad_norm": 0.9241855674969985, "learning_rate": 9.814260347084358e-06, "loss": 0.3645, "step": 3336 }, { "epoch": 0.11451612903225807, "grad_norm": 0.952608194637413, "learning_rate": 9.81411025145851e-06, "loss": 0.4037, "step": 3337 }, { "epoch": 0.11455044612216884, "grad_norm": 0.9233634850081172, "learning_rate": 9.813960096359944e-06, "loss": 0.334, "step": 3338 }, { "epoch": 0.11458476321207961, "grad_norm": 0.8107060452543434, "learning_rate": 9.813809881790513e-06, "loss": 0.3121, "step": 3339 }, { "epoch": 0.11461908030199039, "grad_norm": 0.8139869974126841, "learning_rate": 9.813659607752072e-06, "loss": 0.3278, "step": 3340 }, { "epoch": 0.11465339739190117, "grad_norm": 0.853176849041194, "learning_rate": 9.81350927424648e-06, "loss": 0.3904, "step": 3341 }, { "epoch": 0.11468771448181195, "grad_norm": 0.7728050478832508, "learning_rate": 9.813358881275592e-06, "loss": 0.307, "step": 3342 }, { "epoch": 0.11472203157172271, "grad_norm": 0.8007851335434737, "learning_rate": 9.813208428841268e-06, "loss": 0.3615, "step": 3343 }, { "epoch": 0.11475634866163349, "grad_norm": 0.9040300217382363, "learning_rate": 9.813057916945363e-06, "loss": 0.366, "step": 3344 }, { "epoch": 0.11479066575154427, "grad_norm": 0.8555166150109841, "learning_rate": 9.812907345589742e-06, "loss": 0.3707, "step": 3345 }, { "epoch": 0.11482498284145505, "grad_norm": 0.8432287204284233, "learning_rate": 9.81275671477626e-06, "loss": 0.358, "step": 3346 }, { "epoch": 0.11485929993136582, "grad_norm": 0.8289240764884636, "learning_rate": 9.81260602450678e-06, "loss": 0.3646, "step": 3347 }, { "epoch": 0.1148936170212766, "grad_norm": 0.8556993328994305, "learning_rate": 9.812455274783166e-06, "loss": 0.3493, "step": 3348 }, { "epoch": 0.11492793411118737, "grad_norm": 0.8033256188877408, "learning_rate": 9.812304465607276e-06, "loss": 0.3414, "step": 3349 }, { "epoch": 0.11496225120109815, "grad_norm": 0.8123388471089921, "learning_rate": 9.812153596980975e-06, "loss": 0.3216, "step": 3350 }, { "epoch": 0.11499656829100892, "grad_norm": 0.812381191256054, "learning_rate": 9.812002668906127e-06, "loss": 0.3245, "step": 3351 }, { "epoch": 0.1150308853809197, "grad_norm": 0.8076442882533256, "learning_rate": 9.811851681384595e-06, "loss": 0.3699, "step": 3352 }, { "epoch": 0.11506520247083048, "grad_norm": 0.7847018250639193, "learning_rate": 9.811700634418246e-06, "loss": 0.3596, "step": 3353 }, { "epoch": 0.11509951956074124, "grad_norm": 0.8491697633601929, "learning_rate": 9.811549528008948e-06, "loss": 0.3641, "step": 3354 }, { "epoch": 0.11513383665065202, "grad_norm": 0.7641100736278287, "learning_rate": 9.811398362158562e-06, "loss": 0.3707, "step": 3355 }, { "epoch": 0.1151681537405628, "grad_norm": 0.870649325198182, "learning_rate": 9.811247136868961e-06, "loss": 0.3179, "step": 3356 }, { "epoch": 0.11520247083047358, "grad_norm": 0.8160943256647388, "learning_rate": 9.811095852142009e-06, "loss": 0.3273, "step": 3357 }, { "epoch": 0.11523678792038435, "grad_norm": 0.7881171068322652, "learning_rate": 9.810944507979579e-06, "loss": 0.3623, "step": 3358 }, { "epoch": 0.11527110501029512, "grad_norm": 0.9227605188393279, "learning_rate": 9.810793104383538e-06, "loss": 0.3569, "step": 3359 }, { "epoch": 0.1153054221002059, "grad_norm": 0.9358751514816263, "learning_rate": 9.810641641355758e-06, "loss": 0.355, "step": 3360 }, { "epoch": 0.11533973919011668, "grad_norm": 0.7950583188562488, "learning_rate": 9.810490118898106e-06, "loss": 0.3323, "step": 3361 }, { "epoch": 0.11537405628002745, "grad_norm": 0.776015105056292, "learning_rate": 9.81033853701246e-06, "loss": 0.3818, "step": 3362 }, { "epoch": 0.11540837336993823, "grad_norm": 0.8408290504368052, "learning_rate": 9.810186895700689e-06, "loss": 0.4046, "step": 3363 }, { "epoch": 0.115442690459849, "grad_norm": 0.825037294506854, "learning_rate": 9.810035194964667e-06, "loss": 0.3569, "step": 3364 }, { "epoch": 0.11547700754975979, "grad_norm": 0.7929739147405145, "learning_rate": 9.809883434806267e-06, "loss": 0.3906, "step": 3365 }, { "epoch": 0.11551132463967055, "grad_norm": 0.8755317166888035, "learning_rate": 9.809731615227367e-06, "loss": 0.4129, "step": 3366 }, { "epoch": 0.11554564172958133, "grad_norm": 0.7851325793364263, "learning_rate": 9.809579736229838e-06, "loss": 0.3501, "step": 3367 }, { "epoch": 0.11557995881949211, "grad_norm": 0.7050458121606659, "learning_rate": 9.809427797815561e-06, "loss": 0.353, "step": 3368 }, { "epoch": 0.11561427590940289, "grad_norm": 0.8736015954718142, "learning_rate": 9.809275799986409e-06, "loss": 0.3807, "step": 3369 }, { "epoch": 0.11564859299931365, "grad_norm": 0.9200626646444355, "learning_rate": 9.809123742744264e-06, "loss": 0.3756, "step": 3370 }, { "epoch": 0.11568291008922443, "grad_norm": 1.0264422685523904, "learning_rate": 9.808971626090999e-06, "loss": 0.4042, "step": 3371 }, { "epoch": 0.11571722717913521, "grad_norm": 0.779355063983906, "learning_rate": 9.808819450028498e-06, "loss": 0.3469, "step": 3372 }, { "epoch": 0.11575154426904599, "grad_norm": 0.6647828040210073, "learning_rate": 9.80866721455864e-06, "loss": 0.3425, "step": 3373 }, { "epoch": 0.11578586135895676, "grad_norm": 0.7943548593799631, "learning_rate": 9.808514919683303e-06, "loss": 0.3444, "step": 3374 }, { "epoch": 0.11582017844886754, "grad_norm": 0.8996847718962739, "learning_rate": 9.808362565404372e-06, "loss": 0.3797, "step": 3375 }, { "epoch": 0.11585449553877832, "grad_norm": 0.7895288889889981, "learning_rate": 9.808210151723727e-06, "loss": 0.3778, "step": 3376 }, { "epoch": 0.11588881262868908, "grad_norm": 0.7648111593504388, "learning_rate": 9.80805767864325e-06, "loss": 0.3717, "step": 3377 }, { "epoch": 0.11592312971859986, "grad_norm": 0.9174667885505756, "learning_rate": 9.807905146164829e-06, "loss": 0.4006, "step": 3378 }, { "epoch": 0.11595744680851064, "grad_norm": 0.792161415588377, "learning_rate": 9.807752554290343e-06, "loss": 0.3714, "step": 3379 }, { "epoch": 0.11599176389842142, "grad_norm": 0.7380787660417524, "learning_rate": 9.80759990302168e-06, "loss": 0.3303, "step": 3380 }, { "epoch": 0.11602608098833218, "grad_norm": 0.8048322697635637, "learning_rate": 9.807447192360725e-06, "loss": 0.3571, "step": 3381 }, { "epoch": 0.11606039807824296, "grad_norm": 0.9577745051425938, "learning_rate": 9.807294422309366e-06, "loss": 0.37, "step": 3382 }, { "epoch": 0.11609471516815374, "grad_norm": 0.9207999646822899, "learning_rate": 9.807141592869488e-06, "loss": 0.4264, "step": 3383 }, { "epoch": 0.11612903225806452, "grad_norm": 0.8225195387379108, "learning_rate": 9.806988704042981e-06, "loss": 0.3439, "step": 3384 }, { "epoch": 0.11616334934797529, "grad_norm": 0.7905256476950827, "learning_rate": 9.806835755831732e-06, "loss": 0.3612, "step": 3385 }, { "epoch": 0.11619766643788607, "grad_norm": 0.8106927098766434, "learning_rate": 9.806682748237633e-06, "loss": 0.3491, "step": 3386 }, { "epoch": 0.11623198352779684, "grad_norm": 0.8745050723089709, "learning_rate": 9.806529681262571e-06, "loss": 0.3736, "step": 3387 }, { "epoch": 0.11626630061770762, "grad_norm": 0.84087557483318, "learning_rate": 9.80637655490844e-06, "loss": 0.3393, "step": 3388 }, { "epoch": 0.11630061770761839, "grad_norm": 0.8154621582405571, "learning_rate": 9.80622336917713e-06, "loss": 0.376, "step": 3389 }, { "epoch": 0.11633493479752917, "grad_norm": 0.7351042715247964, "learning_rate": 9.806070124070535e-06, "loss": 0.3248, "step": 3390 }, { "epoch": 0.11636925188743995, "grad_norm": 0.8588132348250993, "learning_rate": 9.805916819590544e-06, "loss": 0.3734, "step": 3391 }, { "epoch": 0.11640356897735073, "grad_norm": 0.8269828568193369, "learning_rate": 9.805763455739056e-06, "loss": 0.3527, "step": 3392 }, { "epoch": 0.11643788606726149, "grad_norm": 0.8631209273384122, "learning_rate": 9.805610032517963e-06, "loss": 0.3373, "step": 3393 }, { "epoch": 0.11647220315717227, "grad_norm": 0.922014565680785, "learning_rate": 9.80545654992916e-06, "loss": 0.3933, "step": 3394 }, { "epoch": 0.11650652024708305, "grad_norm": 0.7885156879533212, "learning_rate": 9.805303007974547e-06, "loss": 0.3146, "step": 3395 }, { "epoch": 0.11654083733699382, "grad_norm": 0.7822813482440099, "learning_rate": 9.805149406656015e-06, "loss": 0.3464, "step": 3396 }, { "epoch": 0.1165751544269046, "grad_norm": 0.8583719741535423, "learning_rate": 9.804995745975464e-06, "loss": 0.3298, "step": 3397 }, { "epoch": 0.11660947151681537, "grad_norm": 0.8819046571480865, "learning_rate": 9.804842025934793e-06, "loss": 0.3363, "step": 3398 }, { "epoch": 0.11664378860672615, "grad_norm": 0.8563659950700536, "learning_rate": 9.804688246535902e-06, "loss": 0.3095, "step": 3399 }, { "epoch": 0.11667810569663692, "grad_norm": 0.8329921221800441, "learning_rate": 9.804534407780687e-06, "loss": 0.3674, "step": 3400 }, { "epoch": 0.1167124227865477, "grad_norm": 0.8817743005040141, "learning_rate": 9.804380509671055e-06, "loss": 0.3661, "step": 3401 }, { "epoch": 0.11674673987645848, "grad_norm": 0.9131815317298206, "learning_rate": 9.8042265522089e-06, "loss": 0.3318, "step": 3402 }, { "epoch": 0.11678105696636926, "grad_norm": 0.7440921672216438, "learning_rate": 9.804072535396128e-06, "loss": 0.3167, "step": 3403 }, { "epoch": 0.11681537405628002, "grad_norm": 0.7889269944686349, "learning_rate": 9.80391845923464e-06, "loss": 0.3703, "step": 3404 }, { "epoch": 0.1168496911461908, "grad_norm": 0.8739716042784764, "learning_rate": 9.803764323726342e-06, "loss": 0.3432, "step": 3405 }, { "epoch": 0.11688400823610158, "grad_norm": 0.7756730467018511, "learning_rate": 9.803610128873135e-06, "loss": 0.3364, "step": 3406 }, { "epoch": 0.11691832532601236, "grad_norm": 0.887113082638555, "learning_rate": 9.803455874676927e-06, "loss": 0.3744, "step": 3407 }, { "epoch": 0.11695264241592312, "grad_norm": 0.8009522629846317, "learning_rate": 9.803301561139618e-06, "loss": 0.3187, "step": 3408 }, { "epoch": 0.1169869595058339, "grad_norm": 0.7683739639562628, "learning_rate": 9.803147188263123e-06, "loss": 0.3659, "step": 3409 }, { "epoch": 0.11702127659574468, "grad_norm": 0.8801469650773434, "learning_rate": 9.802992756049341e-06, "loss": 0.4189, "step": 3410 }, { "epoch": 0.11705559368565546, "grad_norm": 0.833474614808327, "learning_rate": 9.802838264500186e-06, "loss": 0.4228, "step": 3411 }, { "epoch": 0.11708991077556623, "grad_norm": 0.8111650190357087, "learning_rate": 9.80268371361756e-06, "loss": 0.3025, "step": 3412 }, { "epoch": 0.117124227865477, "grad_norm": 1.3098051674120865, "learning_rate": 9.802529103403379e-06, "loss": 0.3287, "step": 3413 }, { "epoch": 0.11715854495538779, "grad_norm": 0.9346726088517017, "learning_rate": 9.80237443385955e-06, "loss": 0.3704, "step": 3414 }, { "epoch": 0.11719286204529857, "grad_norm": 0.8107598935406738, "learning_rate": 9.802219704987985e-06, "loss": 0.4016, "step": 3415 }, { "epoch": 0.11722717913520933, "grad_norm": 0.8633702421382647, "learning_rate": 9.802064916790592e-06, "loss": 0.3674, "step": 3416 }, { "epoch": 0.11726149622512011, "grad_norm": 0.8405583398897827, "learning_rate": 9.801910069269286e-06, "loss": 0.3201, "step": 3417 }, { "epoch": 0.11729581331503089, "grad_norm": 0.8942664471528378, "learning_rate": 9.801755162425981e-06, "loss": 0.3527, "step": 3418 }, { "epoch": 0.11733013040494165, "grad_norm": 0.8937223705542444, "learning_rate": 9.801600196262588e-06, "loss": 0.3319, "step": 3419 }, { "epoch": 0.11736444749485243, "grad_norm": 0.8503941638183395, "learning_rate": 9.801445170781023e-06, "loss": 0.3724, "step": 3420 }, { "epoch": 0.11739876458476321, "grad_norm": 0.973450923594454, "learning_rate": 9.801290085983202e-06, "loss": 0.4323, "step": 3421 }, { "epoch": 0.11743308167467399, "grad_norm": 0.813334010331565, "learning_rate": 9.801134941871038e-06, "loss": 0.3495, "step": 3422 }, { "epoch": 0.11746739876458476, "grad_norm": 0.8027576550968788, "learning_rate": 9.80097973844645e-06, "loss": 0.328, "step": 3423 }, { "epoch": 0.11750171585449554, "grad_norm": 0.8621027624630335, "learning_rate": 9.800824475711355e-06, "loss": 0.3554, "step": 3424 }, { "epoch": 0.11753603294440632, "grad_norm": 0.8395921627000563, "learning_rate": 9.80066915366767e-06, "loss": 0.3307, "step": 3425 }, { "epoch": 0.1175703500343171, "grad_norm": 0.8069135047257322, "learning_rate": 9.800513772317315e-06, "loss": 0.3704, "step": 3426 }, { "epoch": 0.11760466712422786, "grad_norm": 0.7893383418876551, "learning_rate": 9.80035833166221e-06, "loss": 0.3251, "step": 3427 }, { "epoch": 0.11763898421413864, "grad_norm": 0.84774729471915, "learning_rate": 9.800202831704274e-06, "loss": 0.2894, "step": 3428 }, { "epoch": 0.11767330130404942, "grad_norm": 0.8404150630067865, "learning_rate": 9.800047272445428e-06, "loss": 0.3383, "step": 3429 }, { "epoch": 0.1177076183939602, "grad_norm": 0.9210055564613867, "learning_rate": 9.799891653887595e-06, "loss": 0.4014, "step": 3430 }, { "epoch": 0.11774193548387096, "grad_norm": 0.8280445175931884, "learning_rate": 9.799735976032697e-06, "loss": 0.3148, "step": 3431 }, { "epoch": 0.11777625257378174, "grad_norm": 0.7818479904786064, "learning_rate": 9.799580238882656e-06, "loss": 0.2832, "step": 3432 }, { "epoch": 0.11781056966369252, "grad_norm": 0.7718561571930687, "learning_rate": 9.799424442439398e-06, "loss": 0.3931, "step": 3433 }, { "epoch": 0.1178448867536033, "grad_norm": 0.8191751609702792, "learning_rate": 9.799268586704847e-06, "loss": 0.4427, "step": 3434 }, { "epoch": 0.11787920384351407, "grad_norm": 0.8729432960592729, "learning_rate": 9.799112671680925e-06, "loss": 0.3554, "step": 3435 }, { "epoch": 0.11791352093342485, "grad_norm": 0.9150600115800713, "learning_rate": 9.798956697369564e-06, "loss": 0.3747, "step": 3436 }, { "epoch": 0.11794783802333562, "grad_norm": 0.8515049604987672, "learning_rate": 9.798800663772687e-06, "loss": 0.3306, "step": 3437 }, { "epoch": 0.1179821551132464, "grad_norm": 0.9953783539623939, "learning_rate": 9.798644570892222e-06, "loss": 0.374, "step": 3438 }, { "epoch": 0.11801647220315717, "grad_norm": 0.6892309880908066, "learning_rate": 9.7984884187301e-06, "loss": 0.3018, "step": 3439 }, { "epoch": 0.11805078929306795, "grad_norm": 0.8426099282509795, "learning_rate": 9.798332207288247e-06, "loss": 0.3123, "step": 3440 }, { "epoch": 0.11808510638297873, "grad_norm": 0.7917315915302691, "learning_rate": 9.798175936568594e-06, "loss": 0.3906, "step": 3441 }, { "epoch": 0.11811942347288949, "grad_norm": 0.8462344554089859, "learning_rate": 9.79801960657307e-06, "loss": 0.3581, "step": 3442 }, { "epoch": 0.11815374056280027, "grad_norm": 0.7565002248663224, "learning_rate": 9.79786321730361e-06, "loss": 0.3488, "step": 3443 }, { "epoch": 0.11818805765271105, "grad_norm": 0.8591237264485954, "learning_rate": 9.797706768762144e-06, "loss": 0.3937, "step": 3444 }, { "epoch": 0.11822237474262183, "grad_norm": 0.8409862053219564, "learning_rate": 9.797550260950601e-06, "loss": 0.3518, "step": 3445 }, { "epoch": 0.1182566918325326, "grad_norm": 0.9329124972144294, "learning_rate": 9.79739369387092e-06, "loss": 0.3827, "step": 3446 }, { "epoch": 0.11829100892244337, "grad_norm": 0.7746654629806985, "learning_rate": 9.797237067525034e-06, "loss": 0.3651, "step": 3447 }, { "epoch": 0.11832532601235415, "grad_norm": 0.7555546014767998, "learning_rate": 9.797080381914876e-06, "loss": 0.3275, "step": 3448 }, { "epoch": 0.11835964310226493, "grad_norm": 0.8338652772147915, "learning_rate": 9.796923637042384e-06, "loss": 0.3153, "step": 3449 }, { "epoch": 0.1183939601921757, "grad_norm": 0.8486837407771153, "learning_rate": 9.79676683290949e-06, "loss": 0.3389, "step": 3450 }, { "epoch": 0.11842827728208648, "grad_norm": 0.849036364126479, "learning_rate": 9.796609969518138e-06, "loss": 0.4272, "step": 3451 }, { "epoch": 0.11846259437199726, "grad_norm": 0.9135979931692098, "learning_rate": 9.79645304687026e-06, "loss": 0.3681, "step": 3452 }, { "epoch": 0.11849691146190804, "grad_norm": 0.8191490212883145, "learning_rate": 9.796296064967797e-06, "loss": 0.3611, "step": 3453 }, { "epoch": 0.1185312285518188, "grad_norm": 0.8041401586431692, "learning_rate": 9.796139023812689e-06, "loss": 0.3245, "step": 3454 }, { "epoch": 0.11856554564172958, "grad_norm": 0.8800032724549973, "learning_rate": 9.795981923406874e-06, "loss": 0.3502, "step": 3455 }, { "epoch": 0.11859986273164036, "grad_norm": 0.7997449875163205, "learning_rate": 9.795824763752294e-06, "loss": 0.3174, "step": 3456 }, { "epoch": 0.11863417982155114, "grad_norm": 0.8289473789575231, "learning_rate": 9.79566754485089e-06, "loss": 0.3489, "step": 3457 }, { "epoch": 0.1186684969114619, "grad_norm": 0.8064944034141854, "learning_rate": 9.795510266704607e-06, "loss": 0.2985, "step": 3458 }, { "epoch": 0.11870281400137268, "grad_norm": 0.8125849296811108, "learning_rate": 9.795352929315384e-06, "loss": 0.3165, "step": 3459 }, { "epoch": 0.11873713109128346, "grad_norm": 0.7957152598676185, "learning_rate": 9.795195532685167e-06, "loss": 0.328, "step": 3460 }, { "epoch": 0.11877144818119423, "grad_norm": 0.8399797117832202, "learning_rate": 9.7950380768159e-06, "loss": 0.3174, "step": 3461 }, { "epoch": 0.11880576527110501, "grad_norm": 0.8712174726402371, "learning_rate": 9.794880561709527e-06, "loss": 0.4079, "step": 3462 }, { "epoch": 0.11884008236101579, "grad_norm": 0.8104863856565261, "learning_rate": 9.794722987367995e-06, "loss": 0.3364, "step": 3463 }, { "epoch": 0.11887439945092657, "grad_norm": 0.8697595830534773, "learning_rate": 9.79456535379325e-06, "loss": 0.3813, "step": 3464 }, { "epoch": 0.11890871654083733, "grad_norm": 0.9139189722173975, "learning_rate": 9.794407660987242e-06, "loss": 0.3855, "step": 3465 }, { "epoch": 0.11894303363074811, "grad_norm": 0.7978705346457892, "learning_rate": 9.794249908951915e-06, "loss": 0.3554, "step": 3466 }, { "epoch": 0.11897735072065889, "grad_norm": 0.7411918697295345, "learning_rate": 9.794092097689221e-06, "loss": 0.3307, "step": 3467 }, { "epoch": 0.11901166781056967, "grad_norm": 0.8784280760214632, "learning_rate": 9.79393422720111e-06, "loss": 0.3685, "step": 3468 }, { "epoch": 0.11904598490048043, "grad_norm": 0.8141618495207084, "learning_rate": 9.793776297489528e-06, "loss": 0.3911, "step": 3469 }, { "epoch": 0.11908030199039121, "grad_norm": 0.7727928821134745, "learning_rate": 9.79361830855643e-06, "loss": 0.3516, "step": 3470 }, { "epoch": 0.11911461908030199, "grad_norm": 0.7750666463558035, "learning_rate": 9.793460260403767e-06, "loss": 0.3195, "step": 3471 }, { "epoch": 0.11914893617021277, "grad_norm": 0.8968194562441725, "learning_rate": 9.79330215303349e-06, "loss": 0.3489, "step": 3472 }, { "epoch": 0.11918325326012354, "grad_norm": 0.7187355059869247, "learning_rate": 9.793143986447556e-06, "loss": 0.3316, "step": 3473 }, { "epoch": 0.11921757035003432, "grad_norm": 0.8410705690326824, "learning_rate": 9.792985760647914e-06, "loss": 0.3302, "step": 3474 }, { "epoch": 0.1192518874399451, "grad_norm": 0.8615025612006677, "learning_rate": 9.792827475636521e-06, "loss": 0.3622, "step": 3475 }, { "epoch": 0.11928620452985587, "grad_norm": 0.7466431683493838, "learning_rate": 9.792669131415333e-06, "loss": 0.2983, "step": 3476 }, { "epoch": 0.11932052161976664, "grad_norm": 0.8221990946838983, "learning_rate": 9.792510727986306e-06, "loss": 0.3112, "step": 3477 }, { "epoch": 0.11935483870967742, "grad_norm": 0.8113493116839247, "learning_rate": 9.792352265351395e-06, "loss": 0.3522, "step": 3478 }, { "epoch": 0.1193891557995882, "grad_norm": 0.8356381602645498, "learning_rate": 9.792193743512562e-06, "loss": 0.3574, "step": 3479 }, { "epoch": 0.11942347288949898, "grad_norm": 1.0545850036623565, "learning_rate": 9.79203516247176e-06, "loss": 0.3894, "step": 3480 }, { "epoch": 0.11945778997940974, "grad_norm": 0.9315486241303444, "learning_rate": 9.791876522230952e-06, "loss": 0.3908, "step": 3481 }, { "epoch": 0.11949210706932052, "grad_norm": 0.8609226296943031, "learning_rate": 9.791717822792094e-06, "loss": 0.4424, "step": 3482 }, { "epoch": 0.1195264241592313, "grad_norm": 0.8100743818039263, "learning_rate": 9.79155906415715e-06, "loss": 0.3396, "step": 3483 }, { "epoch": 0.11956074124914207, "grad_norm": 0.8156915227776486, "learning_rate": 9.791400246328081e-06, "loss": 0.333, "step": 3484 }, { "epoch": 0.11959505833905285, "grad_norm": 0.8278363284661125, "learning_rate": 9.791241369306847e-06, "loss": 0.4353, "step": 3485 }, { "epoch": 0.11962937542896362, "grad_norm": 0.7661608533224366, "learning_rate": 9.791082433095414e-06, "loss": 0.356, "step": 3486 }, { "epoch": 0.1196636925188744, "grad_norm": 0.7524708675079623, "learning_rate": 9.79092343769574e-06, "loss": 0.3635, "step": 3487 }, { "epoch": 0.11969800960878517, "grad_norm": 0.7864881474062817, "learning_rate": 9.790764383109796e-06, "loss": 0.3047, "step": 3488 }, { "epoch": 0.11973232669869595, "grad_norm": 1.2882886802748403, "learning_rate": 9.790605269339541e-06, "loss": 0.3824, "step": 3489 }, { "epoch": 0.11976664378860673, "grad_norm": 0.9487715288776346, "learning_rate": 9.790446096386945e-06, "loss": 0.4151, "step": 3490 }, { "epoch": 0.1198009608785175, "grad_norm": 0.8846419024605701, "learning_rate": 9.790286864253973e-06, "loss": 0.3536, "step": 3491 }, { "epoch": 0.11983527796842827, "grad_norm": 0.8742976449658342, "learning_rate": 9.790127572942591e-06, "loss": 0.3924, "step": 3492 }, { "epoch": 0.11986959505833905, "grad_norm": 0.9104702528579433, "learning_rate": 9.789968222454767e-06, "loss": 0.3889, "step": 3493 }, { "epoch": 0.11990391214824983, "grad_norm": 0.8968551553676806, "learning_rate": 9.78980881279247e-06, "loss": 0.3471, "step": 3494 }, { "epoch": 0.11993822923816061, "grad_norm": 0.8490392084732088, "learning_rate": 9.78964934395767e-06, "loss": 0.349, "step": 3495 }, { "epoch": 0.11997254632807137, "grad_norm": 0.9195592981581796, "learning_rate": 9.789489815952337e-06, "loss": 0.39, "step": 3496 }, { "epoch": 0.12000686341798215, "grad_norm": 0.8255694923282246, "learning_rate": 9.789330228778439e-06, "loss": 0.389, "step": 3497 }, { "epoch": 0.12004118050789293, "grad_norm": 0.7959343455461776, "learning_rate": 9.789170582437953e-06, "loss": 0.3673, "step": 3498 }, { "epoch": 0.12007549759780371, "grad_norm": 0.7181959241549799, "learning_rate": 9.789010876932847e-06, "loss": 0.3428, "step": 3499 }, { "epoch": 0.12010981468771448, "grad_norm": 0.8023686524886693, "learning_rate": 9.788851112265094e-06, "loss": 0.3927, "step": 3500 }, { "epoch": 0.12014413177762526, "grad_norm": 0.792359819137804, "learning_rate": 9.78869128843667e-06, "loss": 0.3984, "step": 3501 }, { "epoch": 0.12017844886753604, "grad_norm": 0.8361390039606362, "learning_rate": 9.788531405449548e-06, "loss": 0.3878, "step": 3502 }, { "epoch": 0.1202127659574468, "grad_norm": 0.8680579193369793, "learning_rate": 9.788371463305704e-06, "loss": 0.4205, "step": 3503 }, { "epoch": 0.12024708304735758, "grad_norm": 0.8074118860156951, "learning_rate": 9.788211462007112e-06, "loss": 0.3881, "step": 3504 }, { "epoch": 0.12028140013726836, "grad_norm": 0.8859280958001698, "learning_rate": 9.78805140155575e-06, "loss": 0.3555, "step": 3505 }, { "epoch": 0.12031571722717914, "grad_norm": 0.7104006521060037, "learning_rate": 9.787891281953596e-06, "loss": 0.3457, "step": 3506 }, { "epoch": 0.1203500343170899, "grad_norm": 0.8213864336829538, "learning_rate": 9.787731103202627e-06, "loss": 0.3367, "step": 3507 }, { "epoch": 0.12038435140700068, "grad_norm": 0.7892755669690775, "learning_rate": 9.787570865304822e-06, "loss": 0.344, "step": 3508 }, { "epoch": 0.12041866849691146, "grad_norm": 0.875032644572703, "learning_rate": 9.78741056826216e-06, "loss": 0.365, "step": 3509 }, { "epoch": 0.12045298558682224, "grad_norm": 0.7629725617442719, "learning_rate": 9.787250212076623e-06, "loss": 0.3776, "step": 3510 }, { "epoch": 0.12048730267673301, "grad_norm": 0.8340232127097853, "learning_rate": 9.787089796750191e-06, "loss": 0.3318, "step": 3511 }, { "epoch": 0.12052161976664379, "grad_norm": 0.8337653086607425, "learning_rate": 9.786929322284846e-06, "loss": 0.3478, "step": 3512 }, { "epoch": 0.12055593685655457, "grad_norm": 0.8246084399712234, "learning_rate": 9.78676878868257e-06, "loss": 0.3271, "step": 3513 }, { "epoch": 0.12059025394646535, "grad_norm": 1.0407908426435089, "learning_rate": 9.786608195945347e-06, "loss": 0.4136, "step": 3514 }, { "epoch": 0.12062457103637611, "grad_norm": 0.7300656078619903, "learning_rate": 9.78644754407516e-06, "loss": 0.361, "step": 3515 }, { "epoch": 0.12065888812628689, "grad_norm": 0.7711836157293378, "learning_rate": 9.786286833073993e-06, "loss": 0.3493, "step": 3516 }, { "epoch": 0.12069320521619767, "grad_norm": 1.007765311151393, "learning_rate": 9.786126062943832e-06, "loss": 0.3394, "step": 3517 }, { "epoch": 0.12072752230610845, "grad_norm": 0.8140975345995495, "learning_rate": 9.785965233686665e-06, "loss": 0.3622, "step": 3518 }, { "epoch": 0.12076183939601921, "grad_norm": 0.8104874780152109, "learning_rate": 9.785804345304477e-06, "loss": 0.3369, "step": 3519 }, { "epoch": 0.12079615648592999, "grad_norm": 0.7820835272320121, "learning_rate": 9.785643397799257e-06, "loss": 0.3736, "step": 3520 }, { "epoch": 0.12083047357584077, "grad_norm": 0.7734524661511795, "learning_rate": 9.785482391172992e-06, "loss": 0.423, "step": 3521 }, { "epoch": 0.12086479066575155, "grad_norm": 0.8161059655788305, "learning_rate": 9.78532132542767e-06, "loss": 0.3546, "step": 3522 }, { "epoch": 0.12089910775566232, "grad_norm": 0.9033318940448798, "learning_rate": 9.785160200565284e-06, "loss": 0.3433, "step": 3523 }, { "epoch": 0.1209334248455731, "grad_norm": 0.8826619648206329, "learning_rate": 9.784999016587822e-06, "loss": 0.3277, "step": 3524 }, { "epoch": 0.12096774193548387, "grad_norm": 0.8561160480129082, "learning_rate": 9.784837773497275e-06, "loss": 0.3435, "step": 3525 }, { "epoch": 0.12100205902539464, "grad_norm": 0.8207791047469398, "learning_rate": 9.784676471295636e-06, "loss": 0.3331, "step": 3526 }, { "epoch": 0.12103637611530542, "grad_norm": 0.8136863526760668, "learning_rate": 9.784515109984899e-06, "loss": 0.3349, "step": 3527 }, { "epoch": 0.1210706932052162, "grad_norm": 0.8774523284856044, "learning_rate": 9.784353689567055e-06, "loss": 0.3433, "step": 3528 }, { "epoch": 0.12110501029512698, "grad_norm": 0.8294361626432787, "learning_rate": 9.7841922100441e-06, "loss": 0.3905, "step": 3529 }, { "epoch": 0.12113932738503774, "grad_norm": 0.8323697596745055, "learning_rate": 9.784030671418028e-06, "loss": 0.3116, "step": 3530 }, { "epoch": 0.12117364447494852, "grad_norm": 0.88404405488196, "learning_rate": 9.783869073690833e-06, "loss": 0.3985, "step": 3531 }, { "epoch": 0.1212079615648593, "grad_norm": 0.79011603495035, "learning_rate": 9.783707416864514e-06, "loss": 0.3628, "step": 3532 }, { "epoch": 0.12124227865477008, "grad_norm": 0.8308276540143258, "learning_rate": 9.783545700941069e-06, "loss": 0.3359, "step": 3533 }, { "epoch": 0.12127659574468085, "grad_norm": 0.7936646329782926, "learning_rate": 9.783383925922491e-06, "loss": 0.3287, "step": 3534 }, { "epoch": 0.12131091283459162, "grad_norm": 0.8612214808829451, "learning_rate": 9.783222091810782e-06, "loss": 0.4115, "step": 3535 }, { "epoch": 0.1213452299245024, "grad_norm": 0.8442154078993332, "learning_rate": 9.783060198607942e-06, "loss": 0.4317, "step": 3536 }, { "epoch": 0.12137954701441318, "grad_norm": 0.8253317268577266, "learning_rate": 9.78289824631597e-06, "loss": 0.3399, "step": 3537 }, { "epoch": 0.12141386410432395, "grad_norm": 0.7239812634952332, "learning_rate": 9.782736234936865e-06, "loss": 0.3099, "step": 3538 }, { "epoch": 0.12144818119423473, "grad_norm": 0.8567984569590773, "learning_rate": 9.782574164472632e-06, "loss": 0.3218, "step": 3539 }, { "epoch": 0.12148249828414551, "grad_norm": 0.8723019664497593, "learning_rate": 9.782412034925268e-06, "loss": 0.3246, "step": 3540 }, { "epoch": 0.12151681537405629, "grad_norm": 0.8950524590717234, "learning_rate": 9.782249846296782e-06, "loss": 0.3301, "step": 3541 }, { "epoch": 0.12155113246396705, "grad_norm": 0.8523407341605695, "learning_rate": 9.782087598589173e-06, "loss": 0.3517, "step": 3542 }, { "epoch": 0.12158544955387783, "grad_norm": 0.9731941727476875, "learning_rate": 9.781925291804448e-06, "loss": 0.3124, "step": 3543 }, { "epoch": 0.12161976664378861, "grad_norm": 0.8177999706191627, "learning_rate": 9.781762925944611e-06, "loss": 0.3191, "step": 3544 }, { "epoch": 0.12165408373369939, "grad_norm": 0.9483686634718503, "learning_rate": 9.781600501011668e-06, "loss": 0.3537, "step": 3545 }, { "epoch": 0.12168840082361015, "grad_norm": 0.8666029598584067, "learning_rate": 9.781438017007625e-06, "loss": 0.3526, "step": 3546 }, { "epoch": 0.12172271791352093, "grad_norm": 0.9931162975383236, "learning_rate": 9.781275473934491e-06, "loss": 0.4266, "step": 3547 }, { "epoch": 0.12175703500343171, "grad_norm": 0.9312086232537429, "learning_rate": 9.781112871794271e-06, "loss": 0.3534, "step": 3548 }, { "epoch": 0.12179135209334248, "grad_norm": 0.8814574053632488, "learning_rate": 9.780950210588979e-06, "loss": 0.3538, "step": 3549 }, { "epoch": 0.12182566918325326, "grad_norm": 0.8709676112132411, "learning_rate": 9.780787490320618e-06, "loss": 0.3856, "step": 3550 }, { "epoch": 0.12185998627316404, "grad_norm": 0.8369986957274982, "learning_rate": 9.780624710991204e-06, "loss": 0.3487, "step": 3551 }, { "epoch": 0.12189430336307482, "grad_norm": 0.8878283610214247, "learning_rate": 9.780461872602743e-06, "loss": 0.4131, "step": 3552 }, { "epoch": 0.12192862045298558, "grad_norm": 0.8350102545097683, "learning_rate": 9.780298975157251e-06, "loss": 0.3617, "step": 3553 }, { "epoch": 0.12196293754289636, "grad_norm": 0.9850125138779704, "learning_rate": 9.780136018656737e-06, "loss": 0.3593, "step": 3554 }, { "epoch": 0.12199725463280714, "grad_norm": 0.7991148593238554, "learning_rate": 9.779973003103216e-06, "loss": 0.3474, "step": 3555 }, { "epoch": 0.12203157172271792, "grad_norm": 0.9479092509488494, "learning_rate": 9.779809928498703e-06, "loss": 0.348, "step": 3556 }, { "epoch": 0.12206588881262868, "grad_norm": 0.8913876112032659, "learning_rate": 9.779646794845209e-06, "loss": 0.3454, "step": 3557 }, { "epoch": 0.12210020590253946, "grad_norm": 0.8703421213326291, "learning_rate": 9.779483602144752e-06, "loss": 0.3291, "step": 3558 }, { "epoch": 0.12213452299245024, "grad_norm": 0.9504147305071746, "learning_rate": 9.779320350399347e-06, "loss": 0.4602, "step": 3559 }, { "epoch": 0.12216884008236102, "grad_norm": 1.0125607831680512, "learning_rate": 9.779157039611011e-06, "loss": 0.3677, "step": 3560 }, { "epoch": 0.12220315717227179, "grad_norm": 0.7810416400582335, "learning_rate": 9.778993669781764e-06, "loss": 0.3291, "step": 3561 }, { "epoch": 0.12223747426218257, "grad_norm": 0.8592821297932055, "learning_rate": 9.77883024091362e-06, "loss": 0.351, "step": 3562 }, { "epoch": 0.12227179135209335, "grad_norm": 0.8693090236330487, "learning_rate": 9.7786667530086e-06, "loss": 0.2945, "step": 3563 }, { "epoch": 0.12230610844200412, "grad_norm": 0.7745847891358717, "learning_rate": 9.778503206068723e-06, "loss": 0.3049, "step": 3564 }, { "epoch": 0.12234042553191489, "grad_norm": 0.8575452981997702, "learning_rate": 9.778339600096012e-06, "loss": 0.3357, "step": 3565 }, { "epoch": 0.12237474262182567, "grad_norm": 1.0148518313340984, "learning_rate": 9.778175935092485e-06, "loss": 0.3567, "step": 3566 }, { "epoch": 0.12240905971173645, "grad_norm": 0.910557995523266, "learning_rate": 9.778012211060166e-06, "loss": 0.3716, "step": 3567 }, { "epoch": 0.12244337680164721, "grad_norm": 0.9201053649902604, "learning_rate": 9.777848428001077e-06, "loss": 0.3209, "step": 3568 }, { "epoch": 0.12247769389155799, "grad_norm": 0.8475213982204842, "learning_rate": 9.777684585917239e-06, "loss": 0.3268, "step": 3569 }, { "epoch": 0.12251201098146877, "grad_norm": 0.908977191803382, "learning_rate": 9.77752068481068e-06, "loss": 0.4054, "step": 3570 }, { "epoch": 0.12254632807137955, "grad_norm": 0.7941324256901408, "learning_rate": 9.777356724683423e-06, "loss": 0.394, "step": 3571 }, { "epoch": 0.12258064516129032, "grad_norm": 0.7590908877547091, "learning_rate": 9.777192705537493e-06, "loss": 0.3289, "step": 3572 }, { "epoch": 0.1226149622512011, "grad_norm": 0.7971404324266765, "learning_rate": 9.777028627374917e-06, "loss": 0.3401, "step": 3573 }, { "epoch": 0.12264927934111187, "grad_norm": 0.8253608698194778, "learning_rate": 9.776864490197722e-06, "loss": 0.4235, "step": 3574 }, { "epoch": 0.12268359643102265, "grad_norm": 0.7544090941756361, "learning_rate": 9.776700294007935e-06, "loss": 0.3337, "step": 3575 }, { "epoch": 0.12271791352093342, "grad_norm": 0.8423227845310013, "learning_rate": 9.776536038807584e-06, "loss": 0.4093, "step": 3576 }, { "epoch": 0.1227522306108442, "grad_norm": 0.802361721508734, "learning_rate": 9.776371724598701e-06, "loss": 0.3356, "step": 3577 }, { "epoch": 0.12278654770075498, "grad_norm": 0.8643130113576944, "learning_rate": 9.776207351383313e-06, "loss": 0.3168, "step": 3578 }, { "epoch": 0.12282086479066576, "grad_norm": 0.7955733080243306, "learning_rate": 9.776042919163452e-06, "loss": 0.4017, "step": 3579 }, { "epoch": 0.12285518188057652, "grad_norm": 0.7904232072888998, "learning_rate": 9.775878427941149e-06, "loss": 0.3162, "step": 3580 }, { "epoch": 0.1228894989704873, "grad_norm": 0.7503642181447431, "learning_rate": 9.775713877718436e-06, "loss": 0.3602, "step": 3581 }, { "epoch": 0.12292381606039808, "grad_norm": 0.8647034844051038, "learning_rate": 9.775549268497346e-06, "loss": 0.336, "step": 3582 }, { "epoch": 0.12295813315030886, "grad_norm": 0.9096797116445696, "learning_rate": 9.775384600279913e-06, "loss": 0.3506, "step": 3583 }, { "epoch": 0.12299245024021963, "grad_norm": 0.852760861331044, "learning_rate": 9.77521987306817e-06, "loss": 0.3382, "step": 3584 }, { "epoch": 0.1230267673301304, "grad_norm": 0.8442406748818265, "learning_rate": 9.775055086864153e-06, "loss": 0.3581, "step": 3585 }, { "epoch": 0.12306108442004118, "grad_norm": 0.7800713877720646, "learning_rate": 9.774890241669899e-06, "loss": 0.3174, "step": 3586 }, { "epoch": 0.12309540150995196, "grad_norm": 0.8068836025469189, "learning_rate": 9.774725337487442e-06, "loss": 0.4046, "step": 3587 }, { "epoch": 0.12312971859986273, "grad_norm": 0.9083274083229701, "learning_rate": 9.77456037431882e-06, "loss": 0.4163, "step": 3588 }, { "epoch": 0.12316403568977351, "grad_norm": 0.8632877209384172, "learning_rate": 9.774395352166071e-06, "loss": 0.4268, "step": 3589 }, { "epoch": 0.12319835277968429, "grad_norm": 0.797167697668809, "learning_rate": 9.774230271031235e-06, "loss": 0.4013, "step": 3590 }, { "epoch": 0.12323266986959505, "grad_norm": 0.8917934015381896, "learning_rate": 9.774065130916348e-06, "loss": 0.3729, "step": 3591 }, { "epoch": 0.12326698695950583, "grad_norm": 0.8585932489358953, "learning_rate": 9.773899931823455e-06, "loss": 0.3665, "step": 3592 }, { "epoch": 0.12330130404941661, "grad_norm": 0.8203726833387457, "learning_rate": 9.773734673754593e-06, "loss": 0.3117, "step": 3593 }, { "epoch": 0.12333562113932739, "grad_norm": 0.7311617976991851, "learning_rate": 9.773569356711804e-06, "loss": 0.3477, "step": 3594 }, { "epoch": 0.12336993822923815, "grad_norm": 0.8804732722933633, "learning_rate": 9.773403980697132e-06, "loss": 0.4092, "step": 3595 }, { "epoch": 0.12340425531914893, "grad_norm": 0.8508335354525809, "learning_rate": 9.773238545712618e-06, "loss": 0.3756, "step": 3596 }, { "epoch": 0.12343857240905971, "grad_norm": 0.9899039123469383, "learning_rate": 9.773073051760309e-06, "loss": 0.3374, "step": 3597 }, { "epoch": 0.12347288949897049, "grad_norm": 0.8726927964583727, "learning_rate": 9.772907498842246e-06, "loss": 0.3681, "step": 3598 }, { "epoch": 0.12350720658888126, "grad_norm": 0.8558480752430314, "learning_rate": 9.772741886960475e-06, "loss": 0.3808, "step": 3599 }, { "epoch": 0.12354152367879204, "grad_norm": 0.8729692332932844, "learning_rate": 9.772576216117043e-06, "loss": 0.4056, "step": 3600 }, { "epoch": 0.12357584076870282, "grad_norm": 0.8392307496230195, "learning_rate": 9.772410486313996e-06, "loss": 0.3733, "step": 3601 }, { "epoch": 0.1236101578586136, "grad_norm": 0.8374349055477555, "learning_rate": 9.772244697553382e-06, "loss": 0.3599, "step": 3602 }, { "epoch": 0.12364447494852436, "grad_norm": 0.8572531091444937, "learning_rate": 9.772078849837247e-06, "loss": 0.3577, "step": 3603 }, { "epoch": 0.12367879203843514, "grad_norm": 0.8468064422301228, "learning_rate": 9.771912943167643e-06, "loss": 0.3783, "step": 3604 }, { "epoch": 0.12371310912834592, "grad_norm": 0.7382799940199504, "learning_rate": 9.771746977546617e-06, "loss": 0.3936, "step": 3605 }, { "epoch": 0.1237474262182567, "grad_norm": 0.8141660228550998, "learning_rate": 9.771580952976222e-06, "loss": 0.3862, "step": 3606 }, { "epoch": 0.12378174330816746, "grad_norm": 0.9326537799056659, "learning_rate": 9.771414869458506e-06, "loss": 0.3697, "step": 3607 }, { "epoch": 0.12381606039807824, "grad_norm": 0.8668304789122272, "learning_rate": 9.771248726995524e-06, "loss": 0.3416, "step": 3608 }, { "epoch": 0.12385037748798902, "grad_norm": 0.780069048000804, "learning_rate": 9.771082525589325e-06, "loss": 0.3401, "step": 3609 }, { "epoch": 0.1238846945778998, "grad_norm": 0.7671493665748174, "learning_rate": 9.770916265241966e-06, "loss": 0.3267, "step": 3610 }, { "epoch": 0.12391901166781057, "grad_norm": 0.7659777428680432, "learning_rate": 9.770749945955496e-06, "loss": 0.2967, "step": 3611 }, { "epoch": 0.12395332875772135, "grad_norm": 0.8586832349353505, "learning_rate": 9.770583567731976e-06, "loss": 0.4161, "step": 3612 }, { "epoch": 0.12398764584763212, "grad_norm": 0.9369993267797151, "learning_rate": 9.770417130573455e-06, "loss": 0.3779, "step": 3613 }, { "epoch": 0.12402196293754289, "grad_norm": 0.8320244632636825, "learning_rate": 9.770250634481993e-06, "loss": 0.4156, "step": 3614 }, { "epoch": 0.12405628002745367, "grad_norm": 0.7910758340589232, "learning_rate": 9.770084079459645e-06, "loss": 0.3183, "step": 3615 }, { "epoch": 0.12409059711736445, "grad_norm": 0.9764756814576913, "learning_rate": 9.769917465508472e-06, "loss": 0.361, "step": 3616 }, { "epoch": 0.12412491420727523, "grad_norm": 0.8474366448327862, "learning_rate": 9.769750792630527e-06, "loss": 0.3213, "step": 3617 }, { "epoch": 0.12415923129718599, "grad_norm": 0.8800448103824438, "learning_rate": 9.769584060827875e-06, "loss": 0.3545, "step": 3618 }, { "epoch": 0.12419354838709677, "grad_norm": 0.9155187269107828, "learning_rate": 9.76941727010257e-06, "loss": 0.3076, "step": 3619 }, { "epoch": 0.12422786547700755, "grad_norm": 0.7677444946046206, "learning_rate": 9.769250420456677e-06, "loss": 0.3571, "step": 3620 }, { "epoch": 0.12426218256691833, "grad_norm": 0.8951906396693511, "learning_rate": 9.769083511892254e-06, "loss": 0.3544, "step": 3621 }, { "epoch": 0.1242964996568291, "grad_norm": 0.9165180335610913, "learning_rate": 9.768916544411366e-06, "loss": 0.4013, "step": 3622 }, { "epoch": 0.12433081674673988, "grad_norm": 0.9492753942359234, "learning_rate": 9.768749518016075e-06, "loss": 0.3195, "step": 3623 }, { "epoch": 0.12436513383665065, "grad_norm": 0.9595597818884108, "learning_rate": 9.76858243270844e-06, "loss": 0.3495, "step": 3624 }, { "epoch": 0.12439945092656143, "grad_norm": 0.7665167756427316, "learning_rate": 9.768415288490532e-06, "loss": 0.3286, "step": 3625 }, { "epoch": 0.1244337680164722, "grad_norm": 0.9756266396061022, "learning_rate": 9.768248085364411e-06, "loss": 0.3421, "step": 3626 }, { "epoch": 0.12446808510638298, "grad_norm": 0.8994941292843611, "learning_rate": 9.768080823332145e-06, "loss": 0.4054, "step": 3627 }, { "epoch": 0.12450240219629376, "grad_norm": 0.8485654514249206, "learning_rate": 9.767913502395797e-06, "loss": 0.3935, "step": 3628 }, { "epoch": 0.12453671928620454, "grad_norm": 0.7765757776370543, "learning_rate": 9.76774612255744e-06, "loss": 0.374, "step": 3629 }, { "epoch": 0.1245710363761153, "grad_norm": 0.8204926569672623, "learning_rate": 9.767578683819137e-06, "loss": 0.341, "step": 3630 }, { "epoch": 0.12460535346602608, "grad_norm": 0.7708654121584694, "learning_rate": 9.767411186182955e-06, "loss": 0.3534, "step": 3631 }, { "epoch": 0.12463967055593686, "grad_norm": 0.8189714283691756, "learning_rate": 9.76724362965097e-06, "loss": 0.4017, "step": 3632 }, { "epoch": 0.12467398764584763, "grad_norm": 0.7429945336924539, "learning_rate": 9.767076014225247e-06, "loss": 0.4111, "step": 3633 }, { "epoch": 0.1247083047357584, "grad_norm": 0.852256247990586, "learning_rate": 9.766908339907857e-06, "loss": 0.3242, "step": 3634 }, { "epoch": 0.12474262182566918, "grad_norm": 2.0023727918906156, "learning_rate": 9.766740606700872e-06, "loss": 0.3653, "step": 3635 }, { "epoch": 0.12477693891557996, "grad_norm": 0.8873428644529557, "learning_rate": 9.766572814606365e-06, "loss": 0.3789, "step": 3636 }, { "epoch": 0.12481125600549073, "grad_norm": 0.8917734369812084, "learning_rate": 9.766404963626408e-06, "loss": 0.3589, "step": 3637 }, { "epoch": 0.12484557309540151, "grad_norm": 0.8534547088237179, "learning_rate": 9.766237053763074e-06, "loss": 0.3396, "step": 3638 }, { "epoch": 0.12487989018531229, "grad_norm": 0.8470867102829159, "learning_rate": 9.76606908501844e-06, "loss": 0.4194, "step": 3639 }, { "epoch": 0.12491420727522307, "grad_norm": 0.8153084036450907, "learning_rate": 9.765901057394577e-06, "loss": 0.3936, "step": 3640 }, { "epoch": 0.12494852436513383, "grad_norm": 0.8231520135987483, "learning_rate": 9.765732970893565e-06, "loss": 0.353, "step": 3641 }, { "epoch": 0.12498284145504461, "grad_norm": 0.8715042692809263, "learning_rate": 9.765564825517477e-06, "loss": 0.3734, "step": 3642 }, { "epoch": 0.12501715854495538, "grad_norm": 0.8218421808874873, "learning_rate": 9.765396621268391e-06, "loss": 0.3308, "step": 3643 }, { "epoch": 0.12505147563486615, "grad_norm": 0.7161811943872589, "learning_rate": 9.765228358148386e-06, "loss": 0.3283, "step": 3644 }, { "epoch": 0.12508579272477693, "grad_norm": 0.8360873603944217, "learning_rate": 9.765060036159541e-06, "loss": 0.3661, "step": 3645 }, { "epoch": 0.1251201098146877, "grad_norm": 0.8485894032279295, "learning_rate": 9.764891655303935e-06, "loss": 0.3439, "step": 3646 }, { "epoch": 0.1251544269045985, "grad_norm": 0.9854388843315148, "learning_rate": 9.764723215583649e-06, "loss": 0.343, "step": 3647 }, { "epoch": 0.12518874399450927, "grad_norm": 0.8411403124731311, "learning_rate": 9.764554717000761e-06, "loss": 0.3705, "step": 3648 }, { "epoch": 0.12522306108442005, "grad_norm": 1.0933049271312714, "learning_rate": 9.764386159557354e-06, "loss": 0.3767, "step": 3649 }, { "epoch": 0.12525737817433083, "grad_norm": 1.1585305225035936, "learning_rate": 9.764217543255511e-06, "loss": 0.3517, "step": 3650 }, { "epoch": 0.12529169526424158, "grad_norm": 0.8169842114501258, "learning_rate": 9.764048868097316e-06, "loss": 0.3449, "step": 3651 }, { "epoch": 0.12532601235415236, "grad_norm": 0.8225398729932161, "learning_rate": 9.763880134084851e-06, "loss": 0.3607, "step": 3652 }, { "epoch": 0.12536032944406314, "grad_norm": 0.781055728381333, "learning_rate": 9.7637113412202e-06, "loss": 0.4346, "step": 3653 }, { "epoch": 0.12539464653397392, "grad_norm": 0.779576987515312, "learning_rate": 9.763542489505452e-06, "loss": 0.3648, "step": 3654 }, { "epoch": 0.1254289636238847, "grad_norm": 0.7814040150933883, "learning_rate": 9.763373578942688e-06, "loss": 0.3314, "step": 3655 }, { "epoch": 0.12546328071379548, "grad_norm": 0.83927089794406, "learning_rate": 9.763204609533998e-06, "loss": 0.3915, "step": 3656 }, { "epoch": 0.12549759780370626, "grad_norm": 0.9897774780214648, "learning_rate": 9.763035581281468e-06, "loss": 0.3699, "step": 3657 }, { "epoch": 0.125531914893617, "grad_norm": 0.8075732757224826, "learning_rate": 9.762866494187187e-06, "loss": 0.3126, "step": 3658 }, { "epoch": 0.1255662319835278, "grad_norm": 0.9277258903500479, "learning_rate": 9.762697348253243e-06, "loss": 0.4132, "step": 3659 }, { "epoch": 0.12560054907343857, "grad_norm": 0.845827590664333, "learning_rate": 9.762528143481727e-06, "loss": 0.3428, "step": 3660 }, { "epoch": 0.12563486616334935, "grad_norm": 0.8135461059561584, "learning_rate": 9.762358879874725e-06, "loss": 0.3405, "step": 3661 }, { "epoch": 0.12566918325326012, "grad_norm": 0.8395229740209961, "learning_rate": 9.762189557434335e-06, "loss": 0.4093, "step": 3662 }, { "epoch": 0.1257035003431709, "grad_norm": 0.876933201727199, "learning_rate": 9.762020176162643e-06, "loss": 0.356, "step": 3663 }, { "epoch": 0.12573781743308168, "grad_norm": 0.8836916375787754, "learning_rate": 9.761850736061745e-06, "loss": 0.3915, "step": 3664 }, { "epoch": 0.12577213452299246, "grad_norm": 0.7406699423886569, "learning_rate": 9.761681237133733e-06, "loss": 0.3321, "step": 3665 }, { "epoch": 0.12580645161290321, "grad_norm": 0.828802771408623, "learning_rate": 9.7615116793807e-06, "loss": 0.3439, "step": 3666 }, { "epoch": 0.125840768702814, "grad_norm": 0.9130694950375672, "learning_rate": 9.761342062804744e-06, "loss": 0.3586, "step": 3667 }, { "epoch": 0.12587508579272477, "grad_norm": 0.8309258751623015, "learning_rate": 9.761172387407956e-06, "loss": 0.3516, "step": 3668 }, { "epoch": 0.12590940288263555, "grad_norm": 0.8511798075505141, "learning_rate": 9.761002653192433e-06, "loss": 0.3846, "step": 3669 }, { "epoch": 0.12594371997254633, "grad_norm": 0.8854847613090716, "learning_rate": 9.760832860160275e-06, "loss": 0.3627, "step": 3670 }, { "epoch": 0.1259780370624571, "grad_norm": 0.8297940382136716, "learning_rate": 9.760663008313576e-06, "loss": 0.3686, "step": 3671 }, { "epoch": 0.1260123541523679, "grad_norm": 0.8166895834495954, "learning_rate": 9.760493097654437e-06, "loss": 0.3926, "step": 3672 }, { "epoch": 0.12604667124227867, "grad_norm": 0.8074759966036993, "learning_rate": 9.760323128184957e-06, "loss": 0.314, "step": 3673 }, { "epoch": 0.12608098833218942, "grad_norm": 0.8042063921512125, "learning_rate": 9.760153099907233e-06, "loss": 0.3277, "step": 3674 }, { "epoch": 0.1261153054221002, "grad_norm": 0.8500360523116266, "learning_rate": 9.759983012823367e-06, "loss": 0.3438, "step": 3675 }, { "epoch": 0.12614962251201098, "grad_norm": 0.7438234975366406, "learning_rate": 9.759812866935462e-06, "loss": 0.3248, "step": 3676 }, { "epoch": 0.12618393960192176, "grad_norm": 0.8369965195734497, "learning_rate": 9.759642662245619e-06, "loss": 0.3935, "step": 3677 }, { "epoch": 0.12621825669183254, "grad_norm": 0.8290494302981019, "learning_rate": 9.759472398755937e-06, "loss": 0.4057, "step": 3678 }, { "epoch": 0.12625257378174332, "grad_norm": 0.7565862705429368, "learning_rate": 9.759302076468524e-06, "loss": 0.334, "step": 3679 }, { "epoch": 0.1262868908716541, "grad_norm": 0.846375785968924, "learning_rate": 9.759131695385484e-06, "loss": 0.3787, "step": 3680 }, { "epoch": 0.12632120796156485, "grad_norm": 0.8322265937630485, "learning_rate": 9.75896125550892e-06, "loss": 0.3346, "step": 3681 }, { "epoch": 0.12635552505147563, "grad_norm": 0.8545595029250355, "learning_rate": 9.758790756840936e-06, "loss": 0.3102, "step": 3682 }, { "epoch": 0.1263898421413864, "grad_norm": 0.9565081949071169, "learning_rate": 9.758620199383644e-06, "loss": 0.3754, "step": 3683 }, { "epoch": 0.12642415923129718, "grad_norm": 0.8112642697827867, "learning_rate": 9.758449583139145e-06, "loss": 0.3713, "step": 3684 }, { "epoch": 0.12645847632120796, "grad_norm": 0.7923671863170595, "learning_rate": 9.758278908109548e-06, "loss": 0.3374, "step": 3685 }, { "epoch": 0.12649279341111874, "grad_norm": 0.8440443545944509, "learning_rate": 9.758108174296966e-06, "loss": 0.3581, "step": 3686 }, { "epoch": 0.12652711050102952, "grad_norm": 0.9769646085501588, "learning_rate": 9.757937381703503e-06, "loss": 0.349, "step": 3687 }, { "epoch": 0.1265614275909403, "grad_norm": 0.8083373523138121, "learning_rate": 9.757766530331272e-06, "loss": 0.3627, "step": 3688 }, { "epoch": 0.12659574468085105, "grad_norm": 0.8306349358412176, "learning_rate": 9.75759562018238e-06, "loss": 0.3301, "step": 3689 }, { "epoch": 0.12663006177076183, "grad_norm": 0.8492823819300788, "learning_rate": 9.757424651258944e-06, "loss": 0.3512, "step": 3690 }, { "epoch": 0.1266643788606726, "grad_norm": 0.9038680308857184, "learning_rate": 9.757253623563073e-06, "loss": 0.3776, "step": 3691 }, { "epoch": 0.1266986959505834, "grad_norm": 0.8563671261698347, "learning_rate": 9.757082537096877e-06, "loss": 0.4179, "step": 3692 }, { "epoch": 0.12673301304049417, "grad_norm": 0.8032696634275284, "learning_rate": 9.756911391862474e-06, "loss": 0.3633, "step": 3693 }, { "epoch": 0.12676733013040495, "grad_norm": 0.7515324674559677, "learning_rate": 9.756740187861977e-06, "loss": 0.3352, "step": 3694 }, { "epoch": 0.12680164722031573, "grad_norm": 0.8734917008134291, "learning_rate": 9.7565689250975e-06, "loss": 0.3934, "step": 3695 }, { "epoch": 0.1268359643102265, "grad_norm": 0.8613817824740176, "learning_rate": 9.75639760357116e-06, "loss": 0.3266, "step": 3696 }, { "epoch": 0.12687028140013726, "grad_norm": 0.8222827498658959, "learning_rate": 9.756226223285074e-06, "loss": 0.415, "step": 3697 }, { "epoch": 0.12690459849004804, "grad_norm": 0.8910054239054584, "learning_rate": 9.756054784241359e-06, "loss": 0.3309, "step": 3698 }, { "epoch": 0.12693891557995882, "grad_norm": 1.0885846253542828, "learning_rate": 9.755883286442129e-06, "loss": 0.3965, "step": 3699 }, { "epoch": 0.1269732326698696, "grad_norm": 0.7782575715047183, "learning_rate": 9.755711729889508e-06, "loss": 0.4005, "step": 3700 }, { "epoch": 0.12700754975978037, "grad_norm": 0.8545231146984744, "learning_rate": 9.755540114585612e-06, "loss": 0.4368, "step": 3701 }, { "epoch": 0.12704186684969115, "grad_norm": 0.8183370469794224, "learning_rate": 9.755368440532563e-06, "loss": 0.3502, "step": 3702 }, { "epoch": 0.12707618393960193, "grad_norm": 0.8682541000028379, "learning_rate": 9.755196707732482e-06, "loss": 0.3304, "step": 3703 }, { "epoch": 0.12711050102951268, "grad_norm": 0.7422582829503968, "learning_rate": 9.755024916187488e-06, "loss": 0.3146, "step": 3704 }, { "epoch": 0.12714481811942346, "grad_norm": 0.7497774745783855, "learning_rate": 9.754853065899706e-06, "loss": 0.3335, "step": 3705 }, { "epoch": 0.12717913520933424, "grad_norm": 0.9403388175982145, "learning_rate": 9.754681156871257e-06, "loss": 0.3777, "step": 3706 }, { "epoch": 0.12721345229924502, "grad_norm": 0.8985960004616815, "learning_rate": 9.754509189104267e-06, "loss": 0.3212, "step": 3707 }, { "epoch": 0.1272477693891558, "grad_norm": 0.8387149389006591, "learning_rate": 9.754337162600858e-06, "loss": 0.3268, "step": 3708 }, { "epoch": 0.12728208647906658, "grad_norm": 0.8785195950913552, "learning_rate": 9.754165077363158e-06, "loss": 0.3976, "step": 3709 }, { "epoch": 0.12731640356897736, "grad_norm": 0.7679308460733657, "learning_rate": 9.75399293339329e-06, "loss": 0.3387, "step": 3710 }, { "epoch": 0.12735072065888814, "grad_norm": 0.8144802909293347, "learning_rate": 9.753820730693381e-06, "loss": 0.3549, "step": 3711 }, { "epoch": 0.1273850377487989, "grad_norm": 0.8566617797722895, "learning_rate": 9.753648469265562e-06, "loss": 0.323, "step": 3712 }, { "epoch": 0.12741935483870967, "grad_norm": 0.8401076004570169, "learning_rate": 9.753476149111956e-06, "loss": 0.3225, "step": 3713 }, { "epoch": 0.12745367192862045, "grad_norm": 0.8195769560157438, "learning_rate": 9.753303770234696e-06, "loss": 0.3119, "step": 3714 }, { "epoch": 0.12748798901853123, "grad_norm": 0.7422512072149514, "learning_rate": 9.75313133263591e-06, "loss": 0.3245, "step": 3715 }, { "epoch": 0.127522306108442, "grad_norm": 0.8873220742064153, "learning_rate": 9.752958836317724e-06, "loss": 0.3688, "step": 3716 }, { "epoch": 0.1275566231983528, "grad_norm": 0.8841012508805587, "learning_rate": 9.752786281282279e-06, "loss": 0.3676, "step": 3717 }, { "epoch": 0.12759094028826357, "grad_norm": 0.9289997462751279, "learning_rate": 9.752613667531696e-06, "loss": 0.3589, "step": 3718 }, { "epoch": 0.12762525737817432, "grad_norm": 0.8845431976896988, "learning_rate": 9.752440995068114e-06, "loss": 0.3701, "step": 3719 }, { "epoch": 0.1276595744680851, "grad_norm": 0.8651173442603208, "learning_rate": 9.752268263893664e-06, "loss": 0.3321, "step": 3720 }, { "epoch": 0.12769389155799588, "grad_norm": 0.8129626513409484, "learning_rate": 9.75209547401048e-06, "loss": 0.3507, "step": 3721 }, { "epoch": 0.12772820864790665, "grad_norm": 0.7253546771315019, "learning_rate": 9.751922625420698e-06, "loss": 0.3325, "step": 3722 }, { "epoch": 0.12776252573781743, "grad_norm": 0.8717265369645513, "learning_rate": 9.751749718126451e-06, "loss": 0.3865, "step": 3723 }, { "epoch": 0.1277968428277282, "grad_norm": 0.7381766495155383, "learning_rate": 9.751576752129876e-06, "loss": 0.3054, "step": 3724 }, { "epoch": 0.127831159917639, "grad_norm": 0.9370106256537544, "learning_rate": 9.75140372743311e-06, "loss": 0.3581, "step": 3725 }, { "epoch": 0.12786547700754977, "grad_norm": 0.7350918814924375, "learning_rate": 9.751230644038292e-06, "loss": 0.385, "step": 3726 }, { "epoch": 0.12789979409746052, "grad_norm": 0.802770984169667, "learning_rate": 9.751057501947557e-06, "loss": 0.3441, "step": 3727 }, { "epoch": 0.1279341111873713, "grad_norm": 0.7255000873464981, "learning_rate": 9.750884301163047e-06, "loss": 0.3422, "step": 3728 }, { "epoch": 0.12796842827728208, "grad_norm": 0.7685374490687039, "learning_rate": 9.7507110416869e-06, "loss": 0.2843, "step": 3729 }, { "epoch": 0.12800274536719286, "grad_norm": 0.8130484192926957, "learning_rate": 9.750537723521256e-06, "loss": 0.3504, "step": 3730 }, { "epoch": 0.12803706245710364, "grad_norm": 0.8022949928347266, "learning_rate": 9.750364346668258e-06, "loss": 0.3615, "step": 3731 }, { "epoch": 0.12807137954701442, "grad_norm": 0.794912387254042, "learning_rate": 9.750190911130048e-06, "loss": 0.3414, "step": 3732 }, { "epoch": 0.1281056966369252, "grad_norm": 0.805698050472879, "learning_rate": 9.750017416908766e-06, "loss": 0.3455, "step": 3733 }, { "epoch": 0.12814001372683598, "grad_norm": 0.8378740473192827, "learning_rate": 9.749843864006556e-06, "loss": 0.333, "step": 3734 }, { "epoch": 0.12817433081674673, "grad_norm": 0.8117891294299557, "learning_rate": 9.749670252425565e-06, "loss": 0.298, "step": 3735 }, { "epoch": 0.1282086479066575, "grad_norm": 0.8746074018661174, "learning_rate": 9.749496582167935e-06, "loss": 0.4244, "step": 3736 }, { "epoch": 0.1282429649965683, "grad_norm": 0.7968341327178108, "learning_rate": 9.74932285323581e-06, "loss": 0.3495, "step": 3737 }, { "epoch": 0.12827728208647907, "grad_norm": 0.8226089739123229, "learning_rate": 9.74914906563134e-06, "loss": 0.3763, "step": 3738 }, { "epoch": 0.12831159917638985, "grad_norm": 0.8203992844826996, "learning_rate": 9.74897521935667e-06, "loss": 0.3661, "step": 3739 }, { "epoch": 0.12834591626630062, "grad_norm": 0.9641908576451717, "learning_rate": 9.74880131441395e-06, "loss": 0.3795, "step": 3740 }, { "epoch": 0.1283802333562114, "grad_norm": 0.8127593651750007, "learning_rate": 9.748627350805325e-06, "loss": 0.3915, "step": 3741 }, { "epoch": 0.12841455044612216, "grad_norm": 0.8189490228607769, "learning_rate": 9.748453328532943e-06, "loss": 0.3877, "step": 3742 }, { "epoch": 0.12844886753603293, "grad_norm": 0.862933443018838, "learning_rate": 9.748279247598959e-06, "loss": 0.3863, "step": 3743 }, { "epoch": 0.1284831846259437, "grad_norm": 0.7645053237339449, "learning_rate": 9.74810510800552e-06, "loss": 0.3419, "step": 3744 }, { "epoch": 0.1285175017158545, "grad_norm": 0.8296552145503001, "learning_rate": 9.747930909754779e-06, "loss": 0.3249, "step": 3745 }, { "epoch": 0.12855181880576527, "grad_norm": 0.7887272099758043, "learning_rate": 9.747756652848885e-06, "loss": 0.3603, "step": 3746 }, { "epoch": 0.12858613589567605, "grad_norm": 0.8647472879056072, "learning_rate": 9.747582337289996e-06, "loss": 0.4291, "step": 3747 }, { "epoch": 0.12862045298558683, "grad_norm": 0.7873640366953645, "learning_rate": 9.74740796308026e-06, "loss": 0.3742, "step": 3748 }, { "epoch": 0.1286547700754976, "grad_norm": 0.8060998431895625, "learning_rate": 9.747233530221833e-06, "loss": 0.3894, "step": 3749 }, { "epoch": 0.12868908716540836, "grad_norm": 0.8627168840093998, "learning_rate": 9.747059038716873e-06, "loss": 0.373, "step": 3750 }, { "epoch": 0.12872340425531914, "grad_norm": 0.8511089901495538, "learning_rate": 9.746884488567531e-06, "loss": 0.3954, "step": 3751 }, { "epoch": 0.12875772134522992, "grad_norm": 0.8546825931374428, "learning_rate": 9.746709879775967e-06, "loss": 0.3961, "step": 3752 }, { "epoch": 0.1287920384351407, "grad_norm": 1.3328066459038919, "learning_rate": 9.746535212344334e-06, "loss": 0.3859, "step": 3753 }, { "epoch": 0.12882635552505148, "grad_norm": 0.9058659906792675, "learning_rate": 9.746360486274796e-06, "loss": 0.3354, "step": 3754 }, { "epoch": 0.12886067261496226, "grad_norm": 1.0004225343188822, "learning_rate": 9.746185701569505e-06, "loss": 0.4368, "step": 3755 }, { "epoch": 0.12889498970487304, "grad_norm": 0.8741747733868418, "learning_rate": 9.746010858230625e-06, "loss": 0.3844, "step": 3756 }, { "epoch": 0.12892930679478382, "grad_norm": 0.8781469408148672, "learning_rate": 9.745835956260312e-06, "loss": 0.3482, "step": 3757 }, { "epoch": 0.12896362388469457, "grad_norm": 0.7887937007460356, "learning_rate": 9.74566099566073e-06, "loss": 0.3586, "step": 3758 }, { "epoch": 0.12899794097460535, "grad_norm": 0.7546521428135815, "learning_rate": 9.745485976434041e-06, "loss": 0.3827, "step": 3759 }, { "epoch": 0.12903225806451613, "grad_norm": 0.8539031298610535, "learning_rate": 9.745310898582404e-06, "loss": 0.3721, "step": 3760 }, { "epoch": 0.1290665751544269, "grad_norm": 0.736205710290974, "learning_rate": 9.745135762107982e-06, "loss": 0.3672, "step": 3761 }, { "epoch": 0.12910089224433768, "grad_norm": 0.8330999528188857, "learning_rate": 9.744960567012941e-06, "loss": 0.348, "step": 3762 }, { "epoch": 0.12913520933424846, "grad_norm": 0.7433511437268036, "learning_rate": 9.744785313299446e-06, "loss": 0.311, "step": 3763 }, { "epoch": 0.12916952642415924, "grad_norm": 0.8254733717026739, "learning_rate": 9.744610000969658e-06, "loss": 0.3365, "step": 3764 }, { "epoch": 0.12920384351407, "grad_norm": 0.8757961487673961, "learning_rate": 9.744434630025748e-06, "loss": 0.3141, "step": 3765 }, { "epoch": 0.12923816060398077, "grad_norm": 0.8553931217281936, "learning_rate": 9.744259200469877e-06, "loss": 0.4232, "step": 3766 }, { "epoch": 0.12927247769389155, "grad_norm": 0.8756085184344561, "learning_rate": 9.744083712304216e-06, "loss": 0.3573, "step": 3767 }, { "epoch": 0.12930679478380233, "grad_norm": 0.8150635633051282, "learning_rate": 9.743908165530931e-06, "loss": 0.3562, "step": 3768 }, { "epoch": 0.1293411118737131, "grad_norm": 0.8494481010948495, "learning_rate": 9.743732560152192e-06, "loss": 0.3163, "step": 3769 }, { "epoch": 0.1293754289636239, "grad_norm": 0.8727973268811845, "learning_rate": 9.743556896170168e-06, "loss": 0.3785, "step": 3770 }, { "epoch": 0.12940974605353467, "grad_norm": 0.844181123907445, "learning_rate": 9.743381173587028e-06, "loss": 0.4207, "step": 3771 }, { "epoch": 0.12944406314344545, "grad_norm": 0.7684514519962077, "learning_rate": 9.743205392404944e-06, "loss": 0.2824, "step": 3772 }, { "epoch": 0.1294783802333562, "grad_norm": 0.7345621269136999, "learning_rate": 9.743029552626088e-06, "loss": 0.2924, "step": 3773 }, { "epoch": 0.12951269732326698, "grad_norm": 0.772815250512831, "learning_rate": 9.742853654252632e-06, "loss": 0.3335, "step": 3774 }, { "epoch": 0.12954701441317776, "grad_norm": 0.7891148761093366, "learning_rate": 9.74267769728675e-06, "loss": 0.3023, "step": 3775 }, { "epoch": 0.12958133150308854, "grad_norm": 0.8407256666188563, "learning_rate": 9.742501681730612e-06, "loss": 0.3387, "step": 3776 }, { "epoch": 0.12961564859299932, "grad_norm": 0.8434014791420411, "learning_rate": 9.742325607586396e-06, "loss": 0.349, "step": 3777 }, { "epoch": 0.1296499656829101, "grad_norm": 0.8058247268494972, "learning_rate": 9.742149474856276e-06, "loss": 0.3349, "step": 3778 }, { "epoch": 0.12968428277282087, "grad_norm": 0.850957489576283, "learning_rate": 9.741973283542428e-06, "loss": 0.3233, "step": 3779 }, { "epoch": 0.12971859986273165, "grad_norm": 0.756289839107643, "learning_rate": 9.741797033647027e-06, "loss": 0.3048, "step": 3780 }, { "epoch": 0.1297529169526424, "grad_norm": 0.8213612806806702, "learning_rate": 9.741620725172253e-06, "loss": 0.3445, "step": 3781 }, { "epoch": 0.12978723404255318, "grad_norm": 0.9100950879195283, "learning_rate": 9.741444358120285e-06, "loss": 0.3578, "step": 3782 }, { "epoch": 0.12982155113246396, "grad_norm": 0.8506608106790468, "learning_rate": 9.741267932493299e-06, "loss": 0.3527, "step": 3783 }, { "epoch": 0.12985586822237474, "grad_norm": 0.82328701416086, "learning_rate": 9.741091448293476e-06, "loss": 0.369, "step": 3784 }, { "epoch": 0.12989018531228552, "grad_norm": 0.7926045306520629, "learning_rate": 9.740914905522995e-06, "loss": 0.4338, "step": 3785 }, { "epoch": 0.1299245024021963, "grad_norm": 0.8466900657567408, "learning_rate": 9.740738304184038e-06, "loss": 0.3491, "step": 3786 }, { "epoch": 0.12995881949210708, "grad_norm": 0.8333023236498169, "learning_rate": 9.740561644278785e-06, "loss": 0.315, "step": 3787 }, { "epoch": 0.12999313658201783, "grad_norm": 0.9195840700772163, "learning_rate": 9.740384925809422e-06, "loss": 0.3305, "step": 3788 }, { "epoch": 0.1300274536719286, "grad_norm": 0.870061370233934, "learning_rate": 9.74020814877813e-06, "loss": 0.4004, "step": 3789 }, { "epoch": 0.1300617707618394, "grad_norm": 0.8144606252908788, "learning_rate": 9.740031313187091e-06, "loss": 0.3193, "step": 3790 }, { "epoch": 0.13009608785175017, "grad_norm": 0.8101872830749628, "learning_rate": 9.739854419038492e-06, "loss": 0.3566, "step": 3791 }, { "epoch": 0.13013040494166095, "grad_norm": 0.7293922938274965, "learning_rate": 9.739677466334519e-06, "loss": 0.2954, "step": 3792 }, { "epoch": 0.13016472203157173, "grad_norm": 0.8037064814878389, "learning_rate": 9.739500455077355e-06, "loss": 0.356, "step": 3793 }, { "epoch": 0.1301990391214825, "grad_norm": 0.9147602602707562, "learning_rate": 9.73932338526919e-06, "loss": 0.3865, "step": 3794 }, { "epoch": 0.1302333562113933, "grad_norm": 0.8425713523764407, "learning_rate": 9.739146256912208e-06, "loss": 0.3831, "step": 3795 }, { "epoch": 0.13026767330130404, "grad_norm": 0.7713677540648021, "learning_rate": 9.738969070008601e-06, "loss": 0.3337, "step": 3796 }, { "epoch": 0.13030199039121482, "grad_norm": 0.7617763235562401, "learning_rate": 9.738791824560558e-06, "loss": 0.3713, "step": 3797 }, { "epoch": 0.1303363074811256, "grad_norm": 0.947854920333551, "learning_rate": 9.738614520570264e-06, "loss": 0.365, "step": 3798 }, { "epoch": 0.13037062457103638, "grad_norm": 0.8294943559900073, "learning_rate": 9.738437158039913e-06, "loss": 0.388, "step": 3799 }, { "epoch": 0.13040494166094715, "grad_norm": 0.8131086619003051, "learning_rate": 9.738259736971696e-06, "loss": 0.3705, "step": 3800 }, { "epoch": 0.13043925875085793, "grad_norm": 0.8334035042242641, "learning_rate": 9.738082257367805e-06, "loss": 0.3561, "step": 3801 }, { "epoch": 0.1304735758407687, "grad_norm": 0.872327699816829, "learning_rate": 9.73790471923043e-06, "loss": 0.3698, "step": 3802 }, { "epoch": 0.1305078929306795, "grad_norm": 0.7862850771136763, "learning_rate": 9.737727122561767e-06, "loss": 0.3635, "step": 3803 }, { "epoch": 0.13054221002059024, "grad_norm": 0.8112737179967905, "learning_rate": 9.737549467364009e-06, "loss": 0.3223, "step": 3804 }, { "epoch": 0.13057652711050102, "grad_norm": 0.8273701696460704, "learning_rate": 9.73737175363935e-06, "loss": 0.4021, "step": 3805 }, { "epoch": 0.1306108442004118, "grad_norm": 0.9021249271892293, "learning_rate": 9.737193981389987e-06, "loss": 0.3502, "step": 3806 }, { "epoch": 0.13064516129032258, "grad_norm": 0.8297623858987314, "learning_rate": 9.737016150618115e-06, "loss": 0.3745, "step": 3807 }, { "epoch": 0.13067947838023336, "grad_norm": 0.8448324560653346, "learning_rate": 9.736838261325931e-06, "loss": 0.3831, "step": 3808 }, { "epoch": 0.13071379547014414, "grad_norm": 0.7976031011796403, "learning_rate": 9.736660313515635e-06, "loss": 0.3305, "step": 3809 }, { "epoch": 0.13074811256005492, "grad_norm": 0.8093755027404168, "learning_rate": 9.73648230718942e-06, "loss": 0.3011, "step": 3810 }, { "epoch": 0.13078242964996567, "grad_norm": 0.7839820133785169, "learning_rate": 9.736304242349492e-06, "loss": 0.4161, "step": 3811 }, { "epoch": 0.13081674673987645, "grad_norm": 0.8534163818493462, "learning_rate": 9.736126118998044e-06, "loss": 0.4388, "step": 3812 }, { "epoch": 0.13085106382978723, "grad_norm": 0.7884975866066187, "learning_rate": 9.735947937137281e-06, "loss": 0.3401, "step": 3813 }, { "epoch": 0.130885380919698, "grad_norm": 0.7422673965788783, "learning_rate": 9.735769696769403e-06, "loss": 0.3272, "step": 3814 }, { "epoch": 0.1309196980096088, "grad_norm": 0.7422179931611754, "learning_rate": 9.735591397896611e-06, "loss": 0.3234, "step": 3815 }, { "epoch": 0.13095401509951957, "grad_norm": 0.9558776078594132, "learning_rate": 9.735413040521108e-06, "loss": 0.3536, "step": 3816 }, { "epoch": 0.13098833218943035, "grad_norm": 0.8173346244963752, "learning_rate": 9.735234624645098e-06, "loss": 0.3291, "step": 3817 }, { "epoch": 0.13102264927934112, "grad_norm": 0.826333434318717, "learning_rate": 9.735056150270786e-06, "loss": 0.3524, "step": 3818 }, { "epoch": 0.13105696636925188, "grad_norm": 0.7239136491155919, "learning_rate": 9.734877617400375e-06, "loss": 0.2989, "step": 3819 }, { "epoch": 0.13109128345916266, "grad_norm": 0.9214392236960747, "learning_rate": 9.734699026036072e-06, "loss": 0.3085, "step": 3820 }, { "epoch": 0.13112560054907343, "grad_norm": 0.8980333315970483, "learning_rate": 9.734520376180083e-06, "loss": 0.358, "step": 3821 }, { "epoch": 0.1311599176389842, "grad_norm": 0.7036760990246448, "learning_rate": 9.734341667834614e-06, "loss": 0.3236, "step": 3822 }, { "epoch": 0.131194234728895, "grad_norm": 0.7809946358932639, "learning_rate": 9.734162901001874e-06, "loss": 0.3205, "step": 3823 }, { "epoch": 0.13122855181880577, "grad_norm": 0.7498897535945791, "learning_rate": 9.733984075684069e-06, "loss": 0.3255, "step": 3824 }, { "epoch": 0.13126286890871655, "grad_norm": 0.7725794781050151, "learning_rate": 9.733805191883412e-06, "loss": 0.3639, "step": 3825 }, { "epoch": 0.1312971859986273, "grad_norm": 0.8179232599120754, "learning_rate": 9.733626249602111e-06, "loss": 0.3959, "step": 3826 }, { "epoch": 0.13133150308853808, "grad_norm": 0.8553605162178912, "learning_rate": 9.733447248842375e-06, "loss": 0.3484, "step": 3827 }, { "epoch": 0.13136582017844886, "grad_norm": 0.8264586531448916, "learning_rate": 9.733268189606417e-06, "loss": 0.3697, "step": 3828 }, { "epoch": 0.13140013726835964, "grad_norm": 0.9986721189640958, "learning_rate": 9.73308907189645e-06, "loss": 0.3521, "step": 3829 }, { "epoch": 0.13143445435827042, "grad_norm": 0.882782483578054, "learning_rate": 9.732909895714684e-06, "loss": 0.4018, "step": 3830 }, { "epoch": 0.1314687714481812, "grad_norm": 0.7556617583736476, "learning_rate": 9.732730661063336e-06, "loss": 0.3223, "step": 3831 }, { "epoch": 0.13150308853809198, "grad_norm": 0.9334071412117005, "learning_rate": 9.732551367944619e-06, "loss": 0.3491, "step": 3832 }, { "epoch": 0.13153740562800276, "grad_norm": 0.9443950393579578, "learning_rate": 9.732372016360747e-06, "loss": 0.3921, "step": 3833 }, { "epoch": 0.1315717227179135, "grad_norm": 0.9803924589835303, "learning_rate": 9.732192606313935e-06, "loss": 0.3334, "step": 3834 }, { "epoch": 0.1316060398078243, "grad_norm": 0.764303911978109, "learning_rate": 9.732013137806402e-06, "loss": 0.3319, "step": 3835 }, { "epoch": 0.13164035689773507, "grad_norm": 0.7209409121351775, "learning_rate": 9.731833610840362e-06, "loss": 0.3243, "step": 3836 }, { "epoch": 0.13167467398764585, "grad_norm": 0.8290207122440965, "learning_rate": 9.731654025418035e-06, "loss": 0.4183, "step": 3837 }, { "epoch": 0.13170899107755663, "grad_norm": 0.8480367231104113, "learning_rate": 9.731474381541641e-06, "loss": 0.3102, "step": 3838 }, { "epoch": 0.1317433081674674, "grad_norm": 0.9945090781357959, "learning_rate": 9.731294679213397e-06, "loss": 0.3193, "step": 3839 }, { "epoch": 0.13177762525737818, "grad_norm": 0.7874609939802262, "learning_rate": 9.731114918435521e-06, "loss": 0.3436, "step": 3840 }, { "epoch": 0.13181194234728896, "grad_norm": 0.8199815595965844, "learning_rate": 9.730935099210238e-06, "loss": 0.3634, "step": 3841 }, { "epoch": 0.13184625943719971, "grad_norm": 0.9529325745032994, "learning_rate": 9.730755221539767e-06, "loss": 0.3686, "step": 3842 }, { "epoch": 0.1318805765271105, "grad_norm": 0.7153403330245223, "learning_rate": 9.730575285426332e-06, "loss": 0.3566, "step": 3843 }, { "epoch": 0.13191489361702127, "grad_norm": 0.8702248122834902, "learning_rate": 9.730395290872151e-06, "loss": 0.3613, "step": 3844 }, { "epoch": 0.13194921070693205, "grad_norm": 0.8412053159324294, "learning_rate": 9.730215237879455e-06, "loss": 0.3455, "step": 3845 }, { "epoch": 0.13198352779684283, "grad_norm": 0.76272882450883, "learning_rate": 9.730035126450463e-06, "loss": 0.349, "step": 3846 }, { "epoch": 0.1320178448867536, "grad_norm": 0.7278751386582897, "learning_rate": 9.729854956587403e-06, "loss": 0.3348, "step": 3847 }, { "epoch": 0.1320521619766644, "grad_norm": 1.1193902396297857, "learning_rate": 9.729674728292499e-06, "loss": 0.3702, "step": 3848 }, { "epoch": 0.13208647906657514, "grad_norm": 0.7552228634812618, "learning_rate": 9.729494441567976e-06, "loss": 0.3503, "step": 3849 }, { "epoch": 0.13212079615648592, "grad_norm": 0.8129226849115356, "learning_rate": 9.729314096416066e-06, "loss": 0.3502, "step": 3850 }, { "epoch": 0.1321551132463967, "grad_norm": 0.8840739268037381, "learning_rate": 9.729133692838993e-06, "loss": 0.407, "step": 3851 }, { "epoch": 0.13218943033630748, "grad_norm": 0.8496738212079712, "learning_rate": 9.728953230838986e-06, "loss": 0.3835, "step": 3852 }, { "epoch": 0.13222374742621826, "grad_norm": 0.9447922632673227, "learning_rate": 9.728772710418277e-06, "loss": 0.3935, "step": 3853 }, { "epoch": 0.13225806451612904, "grad_norm": 0.7995322811755836, "learning_rate": 9.728592131579094e-06, "loss": 0.3684, "step": 3854 }, { "epoch": 0.13229238160603982, "grad_norm": 0.885074903723624, "learning_rate": 9.728411494323668e-06, "loss": 0.4059, "step": 3855 }, { "epoch": 0.1323266986959506, "grad_norm": 2.8960441426305463, "learning_rate": 9.72823079865423e-06, "loss": 0.3591, "step": 3856 }, { "epoch": 0.13236101578586135, "grad_norm": 0.7847135314600334, "learning_rate": 9.728050044573014e-06, "loss": 0.3192, "step": 3857 }, { "epoch": 0.13239533287577213, "grad_norm": 0.7758584098376203, "learning_rate": 9.727869232082253e-06, "loss": 0.3498, "step": 3858 }, { "epoch": 0.1324296499656829, "grad_norm": 0.858959156132807, "learning_rate": 9.727688361184178e-06, "loss": 0.3752, "step": 3859 }, { "epoch": 0.13246396705559368, "grad_norm": 0.7401784942141674, "learning_rate": 9.727507431881024e-06, "loss": 0.3553, "step": 3860 }, { "epoch": 0.13249828414550446, "grad_norm": 0.7563660390426981, "learning_rate": 9.72732644417503e-06, "loss": 0.3822, "step": 3861 }, { "epoch": 0.13253260123541524, "grad_norm": 0.871456049095559, "learning_rate": 9.72714539806843e-06, "loss": 0.3955, "step": 3862 }, { "epoch": 0.13256691832532602, "grad_norm": 0.8759045684933475, "learning_rate": 9.726964293563458e-06, "loss": 0.3609, "step": 3863 }, { "epoch": 0.1326012354152368, "grad_norm": 0.764743855971966, "learning_rate": 9.726783130662352e-06, "loss": 0.3545, "step": 3864 }, { "epoch": 0.13263555250514755, "grad_norm": 0.86107104745494, "learning_rate": 9.726601909367354e-06, "loss": 0.352, "step": 3865 }, { "epoch": 0.13266986959505833, "grad_norm": 0.8414776778612681, "learning_rate": 9.7264206296807e-06, "loss": 0.3933, "step": 3866 }, { "epoch": 0.1327041866849691, "grad_norm": 0.7515617259386869, "learning_rate": 9.726239291604628e-06, "loss": 0.3335, "step": 3867 }, { "epoch": 0.1327385037748799, "grad_norm": 0.8916259458068987, "learning_rate": 9.72605789514138e-06, "loss": 0.3892, "step": 3868 }, { "epoch": 0.13277282086479067, "grad_norm": 1.0040891088149042, "learning_rate": 9.725876440293197e-06, "loss": 0.3885, "step": 3869 }, { "epoch": 0.13280713795470145, "grad_norm": 0.8370854287505777, "learning_rate": 9.725694927062322e-06, "loss": 0.3376, "step": 3870 }, { "epoch": 0.13284145504461223, "grad_norm": 0.8046497109840123, "learning_rate": 9.725513355450992e-06, "loss": 0.3275, "step": 3871 }, { "epoch": 0.13287577213452298, "grad_norm": 0.8265967639463778, "learning_rate": 9.725331725461457e-06, "loss": 0.3552, "step": 3872 }, { "epoch": 0.13291008922443376, "grad_norm": 0.8910321391223522, "learning_rate": 9.725150037095958e-06, "loss": 0.4129, "step": 3873 }, { "epoch": 0.13294440631434454, "grad_norm": 0.861455456660369, "learning_rate": 9.724968290356738e-06, "loss": 0.3486, "step": 3874 }, { "epoch": 0.13297872340425532, "grad_norm": 1.0109423821695733, "learning_rate": 9.724786485246043e-06, "loss": 0.3998, "step": 3875 }, { "epoch": 0.1330130404941661, "grad_norm": 0.9154227751354751, "learning_rate": 9.724604621766122e-06, "loss": 0.2991, "step": 3876 }, { "epoch": 0.13304735758407688, "grad_norm": 0.919767786985658, "learning_rate": 9.724422699919218e-06, "loss": 0.3353, "step": 3877 }, { "epoch": 0.13308167467398765, "grad_norm": 0.8391831772441941, "learning_rate": 9.724240719707578e-06, "loss": 0.3106, "step": 3878 }, { "epoch": 0.13311599176389843, "grad_norm": 0.8662312636309234, "learning_rate": 9.724058681133454e-06, "loss": 0.3499, "step": 3879 }, { "epoch": 0.13315030885380919, "grad_norm": 1.0690959079512456, "learning_rate": 9.723876584199093e-06, "loss": 0.3818, "step": 3880 }, { "epoch": 0.13318462594371996, "grad_norm": 0.9220009429727961, "learning_rate": 9.723694428906742e-06, "loss": 0.3863, "step": 3881 }, { "epoch": 0.13321894303363074, "grad_norm": 0.8560169787132468, "learning_rate": 9.723512215258655e-06, "loss": 0.3918, "step": 3882 }, { "epoch": 0.13325326012354152, "grad_norm": 0.7842146075350874, "learning_rate": 9.723329943257081e-06, "loss": 0.3392, "step": 3883 }, { "epoch": 0.1332875772134523, "grad_norm": 0.7876787464261106, "learning_rate": 9.723147612904274e-06, "loss": 0.407, "step": 3884 }, { "epoch": 0.13332189430336308, "grad_norm": 0.8107475632341826, "learning_rate": 9.722965224202483e-06, "loss": 0.3857, "step": 3885 }, { "epoch": 0.13335621139327386, "grad_norm": 0.83727518614098, "learning_rate": 9.722782777153964e-06, "loss": 0.3991, "step": 3886 }, { "epoch": 0.13339052848318464, "grad_norm": 0.8502752554146781, "learning_rate": 9.72260027176097e-06, "loss": 0.3445, "step": 3887 }, { "epoch": 0.1334248455730954, "grad_norm": 0.8100560840921252, "learning_rate": 9.722417708025755e-06, "loss": 0.361, "step": 3888 }, { "epoch": 0.13345916266300617, "grad_norm": 1.0031634122975266, "learning_rate": 9.722235085950577e-06, "loss": 0.3756, "step": 3889 }, { "epoch": 0.13349347975291695, "grad_norm": 0.763816041514078, "learning_rate": 9.722052405537688e-06, "loss": 0.3441, "step": 3890 }, { "epoch": 0.13352779684282773, "grad_norm": 0.8004992625647749, "learning_rate": 9.721869666789348e-06, "loss": 0.3837, "step": 3891 }, { "epoch": 0.1335621139327385, "grad_norm": 0.7983126693729733, "learning_rate": 9.721686869707813e-06, "loss": 0.3664, "step": 3892 }, { "epoch": 0.1335964310226493, "grad_norm": 0.8036349215453386, "learning_rate": 9.721504014295342e-06, "loss": 0.3719, "step": 3893 }, { "epoch": 0.13363074811256007, "grad_norm": 0.945880051157389, "learning_rate": 9.721321100554194e-06, "loss": 0.4203, "step": 3894 }, { "epoch": 0.13366506520247082, "grad_norm": 0.8320815952870617, "learning_rate": 9.721138128486627e-06, "loss": 0.3787, "step": 3895 }, { "epoch": 0.1336993822923816, "grad_norm": 0.7670125021703335, "learning_rate": 9.720955098094903e-06, "loss": 0.3599, "step": 3896 }, { "epoch": 0.13373369938229238, "grad_norm": 0.901622070386798, "learning_rate": 9.720772009381285e-06, "loss": 0.378, "step": 3897 }, { "epoch": 0.13376801647220316, "grad_norm": 0.7438371888328154, "learning_rate": 9.72058886234803e-06, "loss": 0.3538, "step": 3898 }, { "epoch": 0.13380233356211393, "grad_norm": 0.7829754085318599, "learning_rate": 9.720405656997405e-06, "loss": 0.3813, "step": 3899 }, { "epoch": 0.1338366506520247, "grad_norm": 0.907120889571451, "learning_rate": 9.720222393331671e-06, "loss": 0.3454, "step": 3900 }, { "epoch": 0.1338709677419355, "grad_norm": 0.8261652007235694, "learning_rate": 9.720039071353092e-06, "loss": 0.3769, "step": 3901 }, { "epoch": 0.13390528483184627, "grad_norm": 0.8816716035892734, "learning_rate": 9.719855691063933e-06, "loss": 0.3641, "step": 3902 }, { "epoch": 0.13393960192175702, "grad_norm": 0.793763627099288, "learning_rate": 9.719672252466462e-06, "loss": 0.3366, "step": 3903 }, { "epoch": 0.1339739190116678, "grad_norm": 0.8145652655366553, "learning_rate": 9.71948875556294e-06, "loss": 0.3207, "step": 3904 }, { "epoch": 0.13400823610157858, "grad_norm": 0.8060902129179826, "learning_rate": 9.719305200355639e-06, "loss": 0.3328, "step": 3905 }, { "epoch": 0.13404255319148936, "grad_norm": 0.7901355974546271, "learning_rate": 9.719121586846824e-06, "loss": 0.3496, "step": 3906 }, { "epoch": 0.13407687028140014, "grad_norm": 0.7488271452045159, "learning_rate": 9.718937915038763e-06, "loss": 0.3206, "step": 3907 }, { "epoch": 0.13411118737131092, "grad_norm": 0.9243197454514743, "learning_rate": 9.718754184933727e-06, "loss": 0.4403, "step": 3908 }, { "epoch": 0.1341455044612217, "grad_norm": 0.8230593952677938, "learning_rate": 9.718570396533983e-06, "loss": 0.343, "step": 3909 }, { "epoch": 0.13417982155113248, "grad_norm": 0.8530786676898139, "learning_rate": 9.718386549841804e-06, "loss": 0.417, "step": 3910 }, { "epoch": 0.13421413864104323, "grad_norm": 0.9094254922994576, "learning_rate": 9.718202644859461e-06, "loss": 0.3625, "step": 3911 }, { "epoch": 0.134248455730954, "grad_norm": 0.7791327851772241, "learning_rate": 9.718018681589224e-06, "loss": 0.3317, "step": 3912 }, { "epoch": 0.1342827728208648, "grad_norm": 0.79218981969967, "learning_rate": 9.717834660033367e-06, "loss": 0.3247, "step": 3913 }, { "epoch": 0.13431708991077557, "grad_norm": 0.7896130104405442, "learning_rate": 9.717650580194164e-06, "loss": 0.407, "step": 3914 }, { "epoch": 0.13435140700068635, "grad_norm": 0.9911106030977259, "learning_rate": 9.717466442073888e-06, "loss": 0.399, "step": 3915 }, { "epoch": 0.13438572409059713, "grad_norm": 0.8620966753675606, "learning_rate": 9.717282245674813e-06, "loss": 0.367, "step": 3916 }, { "epoch": 0.1344200411805079, "grad_norm": 0.7902149506896146, "learning_rate": 9.717097990999216e-06, "loss": 0.3443, "step": 3917 }, { "epoch": 0.13445435827041866, "grad_norm": 0.854105647429747, "learning_rate": 9.716913678049375e-06, "loss": 0.3373, "step": 3918 }, { "epoch": 0.13448867536032943, "grad_norm": 0.7858622507994245, "learning_rate": 9.716729306827564e-06, "loss": 0.335, "step": 3919 }, { "epoch": 0.13452299245024021, "grad_norm": 0.7618589684394441, "learning_rate": 9.71654487733606e-06, "loss": 0.3303, "step": 3920 }, { "epoch": 0.134557309540151, "grad_norm": 0.6892889107665385, "learning_rate": 9.716360389577144e-06, "loss": 0.3019, "step": 3921 }, { "epoch": 0.13459162663006177, "grad_norm": 0.8137929122790322, "learning_rate": 9.716175843553096e-06, "loss": 0.3144, "step": 3922 }, { "epoch": 0.13462594371997255, "grad_norm": 0.913225942479974, "learning_rate": 9.715991239266191e-06, "loss": 0.4022, "step": 3923 }, { "epoch": 0.13466026080988333, "grad_norm": 0.867877161917119, "learning_rate": 9.715806576718714e-06, "loss": 0.3356, "step": 3924 }, { "epoch": 0.1346945778997941, "grad_norm": 0.7681690541188645, "learning_rate": 9.715621855912944e-06, "loss": 0.3298, "step": 3925 }, { "epoch": 0.13472889498970486, "grad_norm": 0.7972900259914483, "learning_rate": 9.715437076851164e-06, "loss": 0.4411, "step": 3926 }, { "epoch": 0.13476321207961564, "grad_norm": 0.8791462247074748, "learning_rate": 9.715252239535658e-06, "loss": 0.3159, "step": 3927 }, { "epoch": 0.13479752916952642, "grad_norm": 0.9011337119852236, "learning_rate": 9.715067343968705e-06, "loss": 0.3276, "step": 3928 }, { "epoch": 0.1348318462594372, "grad_norm": 0.8427340735211615, "learning_rate": 9.714882390152596e-06, "loss": 0.3363, "step": 3929 }, { "epoch": 0.13486616334934798, "grad_norm": 0.7152512185966449, "learning_rate": 9.714697378089611e-06, "loss": 0.3519, "step": 3930 }, { "epoch": 0.13490048043925876, "grad_norm": 0.7473646586727819, "learning_rate": 9.714512307782035e-06, "loss": 0.3013, "step": 3931 }, { "epoch": 0.13493479752916954, "grad_norm": 0.8104924899082703, "learning_rate": 9.714327179232156e-06, "loss": 0.3661, "step": 3932 }, { "epoch": 0.1349691146190803, "grad_norm": 0.7237544483379489, "learning_rate": 9.714141992442263e-06, "loss": 0.3294, "step": 3933 }, { "epoch": 0.13500343170899107, "grad_norm": 0.8501158550113136, "learning_rate": 9.71395674741464e-06, "loss": 0.3393, "step": 3934 }, { "epoch": 0.13503774879890185, "grad_norm": 0.7633695168649891, "learning_rate": 9.713771444151578e-06, "loss": 0.3416, "step": 3935 }, { "epoch": 0.13507206588881263, "grad_norm": 0.900456259757287, "learning_rate": 9.713586082655367e-06, "loss": 0.472, "step": 3936 }, { "epoch": 0.1351063829787234, "grad_norm": 0.8055215048439022, "learning_rate": 9.713400662928294e-06, "loss": 0.4127, "step": 3937 }, { "epoch": 0.13514070006863418, "grad_norm": 0.7967923480999656, "learning_rate": 9.713215184972651e-06, "loss": 0.3734, "step": 3938 }, { "epoch": 0.13517501715854496, "grad_norm": 0.7407533793667258, "learning_rate": 9.71302964879073e-06, "loss": 0.3125, "step": 3939 }, { "epoch": 0.13520933424845574, "grad_norm": 0.7292649858501673, "learning_rate": 9.712844054384823e-06, "loss": 0.3208, "step": 3940 }, { "epoch": 0.1352436513383665, "grad_norm": 0.844805211866971, "learning_rate": 9.712658401757222e-06, "loss": 0.3418, "step": 3941 }, { "epoch": 0.13527796842827727, "grad_norm": 0.8168602106267151, "learning_rate": 9.71247269091022e-06, "loss": 0.3434, "step": 3942 }, { "epoch": 0.13531228551818805, "grad_norm": 0.870180763102146, "learning_rate": 9.712286921846113e-06, "loss": 0.373, "step": 3943 }, { "epoch": 0.13534660260809883, "grad_norm": 0.8031974468743853, "learning_rate": 9.712101094567194e-06, "loss": 0.3669, "step": 3944 }, { "epoch": 0.1353809196980096, "grad_norm": 0.8350405315565487, "learning_rate": 9.711915209075762e-06, "loss": 0.405, "step": 3945 }, { "epoch": 0.1354152367879204, "grad_norm": 0.7860638069694652, "learning_rate": 9.71172926537411e-06, "loss": 0.3406, "step": 3946 }, { "epoch": 0.13544955387783117, "grad_norm": 0.8798310568183813, "learning_rate": 9.711543263464536e-06, "loss": 0.3344, "step": 3947 }, { "epoch": 0.13548387096774195, "grad_norm": 0.7956841015351614, "learning_rate": 9.711357203349336e-06, "loss": 0.3494, "step": 3948 }, { "epoch": 0.1355181880576527, "grad_norm": 0.8814958534057048, "learning_rate": 9.711171085030814e-06, "loss": 0.4342, "step": 3949 }, { "epoch": 0.13555250514756348, "grad_norm": 0.7358009519781442, "learning_rate": 9.710984908511265e-06, "loss": 0.3222, "step": 3950 }, { "epoch": 0.13558682223747426, "grad_norm": 0.8866850870887141, "learning_rate": 9.71079867379299e-06, "loss": 0.3005, "step": 3951 }, { "epoch": 0.13562113932738504, "grad_norm": 1.100762287362415, "learning_rate": 9.710612380878288e-06, "loss": 0.396, "step": 3952 }, { "epoch": 0.13565545641729582, "grad_norm": 0.8627440143024937, "learning_rate": 9.710426029769465e-06, "loss": 0.3966, "step": 3953 }, { "epoch": 0.1356897735072066, "grad_norm": 0.8880714964851676, "learning_rate": 9.710239620468817e-06, "loss": 0.3819, "step": 3954 }, { "epoch": 0.13572409059711738, "grad_norm": 0.8576258794686556, "learning_rate": 9.710053152978652e-06, "loss": 0.3496, "step": 3955 }, { "epoch": 0.13575840768702813, "grad_norm": 0.8359662435354225, "learning_rate": 9.709866627301271e-06, "loss": 0.3682, "step": 3956 }, { "epoch": 0.1357927247769389, "grad_norm": 0.9563864609413597, "learning_rate": 9.70968004343898e-06, "loss": 0.3381, "step": 3957 }, { "epoch": 0.13582704186684968, "grad_norm": 0.8226949423914146, "learning_rate": 9.70949340139408e-06, "loss": 0.3449, "step": 3958 }, { "epoch": 0.13586135895676046, "grad_norm": 0.9306059864710731, "learning_rate": 9.709306701168883e-06, "loss": 0.3561, "step": 3959 }, { "epoch": 0.13589567604667124, "grad_norm": 0.7696896706143269, "learning_rate": 9.709119942765692e-06, "loss": 0.3567, "step": 3960 }, { "epoch": 0.13592999313658202, "grad_norm": 0.8269699808162242, "learning_rate": 9.708933126186814e-06, "loss": 0.3741, "step": 3961 }, { "epoch": 0.1359643102264928, "grad_norm": 0.8521133565121838, "learning_rate": 9.708746251434558e-06, "loss": 0.4138, "step": 3962 }, { "epoch": 0.13599862731640358, "grad_norm": 0.8903131530773738, "learning_rate": 9.70855931851123e-06, "loss": 0.3215, "step": 3963 }, { "epoch": 0.13603294440631433, "grad_norm": 0.7289585551808937, "learning_rate": 9.708372327419143e-06, "loss": 0.3631, "step": 3964 }, { "epoch": 0.1360672614962251, "grad_norm": 0.8024792343811505, "learning_rate": 9.708185278160604e-06, "loss": 0.3797, "step": 3965 }, { "epoch": 0.1361015785861359, "grad_norm": 0.8315961634094315, "learning_rate": 9.707998170737926e-06, "loss": 0.3387, "step": 3966 }, { "epoch": 0.13613589567604667, "grad_norm": 0.798479625509827, "learning_rate": 9.707811005153418e-06, "loss": 0.3873, "step": 3967 }, { "epoch": 0.13617021276595745, "grad_norm": 0.8199832801588988, "learning_rate": 9.707623781409396e-06, "loss": 0.3259, "step": 3968 }, { "epoch": 0.13620452985586823, "grad_norm": 0.8830714683532426, "learning_rate": 9.70743649950817e-06, "loss": 0.342, "step": 3969 }, { "epoch": 0.136238846945779, "grad_norm": 0.9586983058281853, "learning_rate": 9.707249159452055e-06, "loss": 0.3652, "step": 3970 }, { "epoch": 0.1362731640356898, "grad_norm": 0.7852720230033471, "learning_rate": 9.707061761243362e-06, "loss": 0.2892, "step": 3971 }, { "epoch": 0.13630748112560054, "grad_norm": 0.7506166142578138, "learning_rate": 9.706874304884411e-06, "loss": 0.3795, "step": 3972 }, { "epoch": 0.13634179821551132, "grad_norm": 0.7827229407536205, "learning_rate": 9.706686790377515e-06, "loss": 0.3466, "step": 3973 }, { "epoch": 0.1363761153054221, "grad_norm": 0.8262181252351442, "learning_rate": 9.706499217724993e-06, "loss": 0.3641, "step": 3974 }, { "epoch": 0.13641043239533288, "grad_norm": 0.8337685912412836, "learning_rate": 9.706311586929159e-06, "loss": 0.3431, "step": 3975 }, { "epoch": 0.13644474948524365, "grad_norm": 0.8332339744559485, "learning_rate": 9.706123897992333e-06, "loss": 0.3621, "step": 3976 }, { "epoch": 0.13647906657515443, "grad_norm": 0.7767419969479045, "learning_rate": 9.70593615091683e-06, "loss": 0.3859, "step": 3977 }, { "epoch": 0.1365133836650652, "grad_norm": 0.8086623526821602, "learning_rate": 9.705748345704977e-06, "loss": 0.3047, "step": 3978 }, { "epoch": 0.13654770075497596, "grad_norm": 0.8894623793260831, "learning_rate": 9.705560482359086e-06, "loss": 0.3538, "step": 3979 }, { "epoch": 0.13658201784488674, "grad_norm": 0.9350448575628919, "learning_rate": 9.705372560881482e-06, "loss": 0.3395, "step": 3980 }, { "epoch": 0.13661633493479752, "grad_norm": 0.7816805451399472, "learning_rate": 9.705184581274488e-06, "loss": 0.3406, "step": 3981 }, { "epoch": 0.1366506520247083, "grad_norm": 0.8818613750254991, "learning_rate": 9.704996543540424e-06, "loss": 0.3091, "step": 3982 }, { "epoch": 0.13668496911461908, "grad_norm": 0.9485320964299029, "learning_rate": 9.704808447681611e-06, "loss": 0.326, "step": 3983 }, { "epoch": 0.13671928620452986, "grad_norm": 0.9454710659639013, "learning_rate": 9.704620293700377e-06, "loss": 0.3734, "step": 3984 }, { "epoch": 0.13675360329444064, "grad_norm": 0.7684520473925459, "learning_rate": 9.704432081599042e-06, "loss": 0.3181, "step": 3985 }, { "epoch": 0.13678792038435142, "grad_norm": 0.7920122903347179, "learning_rate": 9.704243811379937e-06, "loss": 0.4261, "step": 3986 }, { "epoch": 0.13682223747426217, "grad_norm": 0.7817202632512656, "learning_rate": 9.704055483045381e-06, "loss": 0.3157, "step": 3987 }, { "epoch": 0.13685655456417295, "grad_norm": 0.8398710119830114, "learning_rate": 9.703867096597706e-06, "loss": 0.3453, "step": 3988 }, { "epoch": 0.13689087165408373, "grad_norm": 0.8518320228224117, "learning_rate": 9.703678652039235e-06, "loss": 0.3412, "step": 3989 }, { "epoch": 0.1369251887439945, "grad_norm": 0.8592193673796615, "learning_rate": 9.7034901493723e-06, "loss": 0.3614, "step": 3990 }, { "epoch": 0.1369595058339053, "grad_norm": 0.8423610837040311, "learning_rate": 9.703301588599228e-06, "loss": 0.3447, "step": 3991 }, { "epoch": 0.13699382292381607, "grad_norm": 0.7848264010166909, "learning_rate": 9.703112969722347e-06, "loss": 0.3551, "step": 3992 }, { "epoch": 0.13702814001372685, "grad_norm": 0.8096592800430897, "learning_rate": 9.70292429274399e-06, "loss": 0.3568, "step": 3993 }, { "epoch": 0.13706245710363762, "grad_norm": 0.8717967965020196, "learning_rate": 9.702735557666486e-06, "loss": 0.3722, "step": 3994 }, { "epoch": 0.13709677419354838, "grad_norm": 0.7472647493958352, "learning_rate": 9.702546764492166e-06, "loss": 0.291, "step": 3995 }, { "epoch": 0.13713109128345916, "grad_norm": 0.8755681450795659, "learning_rate": 9.702357913223364e-06, "loss": 0.3182, "step": 3996 }, { "epoch": 0.13716540837336993, "grad_norm": 0.8844477987760532, "learning_rate": 9.702169003862415e-06, "loss": 0.376, "step": 3997 }, { "epoch": 0.13719972546328071, "grad_norm": 0.8651843583694343, "learning_rate": 9.701980036411647e-06, "loss": 0.3526, "step": 3998 }, { "epoch": 0.1372340425531915, "grad_norm": 0.854554715343053, "learning_rate": 9.701791010873399e-06, "loss": 0.3448, "step": 3999 }, { "epoch": 0.13726835964310227, "grad_norm": 0.8039751821144666, "learning_rate": 9.701601927250003e-06, "loss": 0.3577, "step": 4000 }, { "epoch": 0.13730267673301305, "grad_norm": 0.8461414751916319, "learning_rate": 9.701412785543798e-06, "loss": 0.424, "step": 4001 }, { "epoch": 0.1373369938229238, "grad_norm": 0.9129534732094868, "learning_rate": 9.70122358575712e-06, "loss": 0.3587, "step": 4002 }, { "epoch": 0.13737131091283458, "grad_norm": 0.7330853922248198, "learning_rate": 9.701034327892304e-06, "loss": 0.3264, "step": 4003 }, { "epoch": 0.13740562800274536, "grad_norm": 0.8151644075527383, "learning_rate": 9.700845011951691e-06, "loss": 0.324, "step": 4004 }, { "epoch": 0.13743994509265614, "grad_norm": 0.777033031023085, "learning_rate": 9.700655637937618e-06, "loss": 0.3869, "step": 4005 }, { "epoch": 0.13747426218256692, "grad_norm": 0.8554314830952717, "learning_rate": 9.700466205852425e-06, "loss": 0.3408, "step": 4006 }, { "epoch": 0.1375085792724777, "grad_norm": 0.7733637033348875, "learning_rate": 9.700276715698453e-06, "loss": 0.3699, "step": 4007 }, { "epoch": 0.13754289636238848, "grad_norm": 1.3874664335122775, "learning_rate": 9.700087167478041e-06, "loss": 0.3929, "step": 4008 }, { "epoch": 0.13757721345229926, "grad_norm": 0.8200289491398897, "learning_rate": 9.699897561193534e-06, "loss": 0.3774, "step": 4009 }, { "epoch": 0.13761153054221, "grad_norm": 0.871311152635403, "learning_rate": 9.69970789684727e-06, "loss": 0.3493, "step": 4010 }, { "epoch": 0.1376458476321208, "grad_norm": 0.8627332708611075, "learning_rate": 9.699518174441596e-06, "loss": 0.3946, "step": 4011 }, { "epoch": 0.13768016472203157, "grad_norm": 0.8265698758138618, "learning_rate": 9.699328393978853e-06, "loss": 0.339, "step": 4012 }, { "epoch": 0.13771448181194235, "grad_norm": 0.7268694931144447, "learning_rate": 9.699138555461387e-06, "loss": 0.3139, "step": 4013 }, { "epoch": 0.13774879890185313, "grad_norm": 0.7528795480892787, "learning_rate": 9.698948658891542e-06, "loss": 0.3659, "step": 4014 }, { "epoch": 0.1377831159917639, "grad_norm": 0.8290587043063125, "learning_rate": 9.698758704271664e-06, "loss": 0.3906, "step": 4015 }, { "epoch": 0.13781743308167468, "grad_norm": 0.8481636806237066, "learning_rate": 9.698568691604102e-06, "loss": 0.3844, "step": 4016 }, { "epoch": 0.13785175017158546, "grad_norm": 0.8097718126120815, "learning_rate": 9.698378620891202e-06, "loss": 0.3417, "step": 4017 }, { "epoch": 0.13788606726149621, "grad_norm": 0.8084597285609907, "learning_rate": 9.698188492135311e-06, "loss": 0.3734, "step": 4018 }, { "epoch": 0.137920384351407, "grad_norm": 0.8157976847979519, "learning_rate": 9.697998305338778e-06, "loss": 0.3691, "step": 4019 }, { "epoch": 0.13795470144131777, "grad_norm": 0.8446720534130019, "learning_rate": 9.697808060503956e-06, "loss": 0.3789, "step": 4020 }, { "epoch": 0.13798901853122855, "grad_norm": 0.7556158905701479, "learning_rate": 9.69761775763319e-06, "loss": 0.3267, "step": 4021 }, { "epoch": 0.13802333562113933, "grad_norm": 0.8512864298201958, "learning_rate": 9.697427396728835e-06, "loss": 0.3813, "step": 4022 }, { "epoch": 0.1380576527110501, "grad_norm": 0.7547093106673317, "learning_rate": 9.697236977793241e-06, "loss": 0.3699, "step": 4023 }, { "epoch": 0.1380919698009609, "grad_norm": 0.7592037954973087, "learning_rate": 9.69704650082876e-06, "loss": 0.3458, "step": 4024 }, { "epoch": 0.13812628689087164, "grad_norm": 0.8772738093622618, "learning_rate": 9.696855965837747e-06, "loss": 0.349, "step": 4025 }, { "epoch": 0.13816060398078242, "grad_norm": 0.8168451619648905, "learning_rate": 9.696665372822552e-06, "loss": 0.3402, "step": 4026 }, { "epoch": 0.1381949210706932, "grad_norm": 0.877834020370345, "learning_rate": 9.696474721785534e-06, "loss": 0.4041, "step": 4027 }, { "epoch": 0.13822923816060398, "grad_norm": 0.8109536421876382, "learning_rate": 9.696284012729045e-06, "loss": 0.3823, "step": 4028 }, { "epoch": 0.13826355525051476, "grad_norm": 0.8741410303467569, "learning_rate": 9.696093245655443e-06, "loss": 0.3136, "step": 4029 }, { "epoch": 0.13829787234042554, "grad_norm": 0.8731355571693147, "learning_rate": 9.695902420567085e-06, "loss": 0.3412, "step": 4030 }, { "epoch": 0.13833218943033632, "grad_norm": 0.8871435194241281, "learning_rate": 9.695711537466326e-06, "loss": 0.3247, "step": 4031 }, { "epoch": 0.1383665065202471, "grad_norm": 0.9329465496939654, "learning_rate": 9.695520596355527e-06, "loss": 0.3562, "step": 4032 }, { "epoch": 0.13840082361015785, "grad_norm": 0.9094839280737423, "learning_rate": 9.695329597237044e-06, "loss": 0.3371, "step": 4033 }, { "epoch": 0.13843514070006863, "grad_norm": 0.7917910616703845, "learning_rate": 9.695138540113241e-06, "loss": 0.3613, "step": 4034 }, { "epoch": 0.1384694577899794, "grad_norm": 0.8564568531780972, "learning_rate": 9.694947424986472e-06, "loss": 0.386, "step": 4035 }, { "epoch": 0.13850377487989018, "grad_norm": 0.796057270717588, "learning_rate": 9.694756251859103e-06, "loss": 0.3285, "step": 4036 }, { "epoch": 0.13853809196980096, "grad_norm": 0.8160248633578262, "learning_rate": 9.694565020733492e-06, "loss": 0.3523, "step": 4037 }, { "epoch": 0.13857240905971174, "grad_norm": 0.8159011598377155, "learning_rate": 9.694373731612007e-06, "loss": 0.3707, "step": 4038 }, { "epoch": 0.13860672614962252, "grad_norm": 0.7105401967931706, "learning_rate": 9.694182384497007e-06, "loss": 0.3173, "step": 4039 }, { "epoch": 0.1386410432395333, "grad_norm": 0.868172188841973, "learning_rate": 9.693990979390854e-06, "loss": 0.3428, "step": 4040 }, { "epoch": 0.13867536032944405, "grad_norm": 0.768177314284304, "learning_rate": 9.693799516295917e-06, "loss": 0.2782, "step": 4041 }, { "epoch": 0.13870967741935483, "grad_norm": 1.3448834966297387, "learning_rate": 9.693607995214559e-06, "loss": 0.3777, "step": 4042 }, { "epoch": 0.1387439945092656, "grad_norm": 0.852996199466307, "learning_rate": 9.693416416149147e-06, "loss": 0.3905, "step": 4043 }, { "epoch": 0.1387783115991764, "grad_norm": 0.8792665141667508, "learning_rate": 9.693224779102045e-06, "loss": 0.3528, "step": 4044 }, { "epoch": 0.13881262868908717, "grad_norm": 0.859858857007085, "learning_rate": 9.693033084075625e-06, "loss": 0.3593, "step": 4045 }, { "epoch": 0.13884694577899795, "grad_norm": 0.8658582141037549, "learning_rate": 9.692841331072252e-06, "loss": 0.3556, "step": 4046 }, { "epoch": 0.13888126286890873, "grad_norm": 0.865989184181208, "learning_rate": 9.692649520094294e-06, "loss": 0.3717, "step": 4047 }, { "epoch": 0.13891557995881948, "grad_norm": 1.254853258032889, "learning_rate": 9.692457651144125e-06, "loss": 0.3131, "step": 4048 }, { "epoch": 0.13894989704873026, "grad_norm": 0.851963915349442, "learning_rate": 9.69226572422411e-06, "loss": 0.3354, "step": 4049 }, { "epoch": 0.13898421413864104, "grad_norm": 0.9287666731326046, "learning_rate": 9.692073739336624e-06, "loss": 0.4382, "step": 4050 }, { "epoch": 0.13901853122855182, "grad_norm": 0.8722835863329863, "learning_rate": 9.691881696484038e-06, "loss": 0.3273, "step": 4051 }, { "epoch": 0.1390528483184626, "grad_norm": 0.909495382113667, "learning_rate": 9.691689595668724e-06, "loss": 0.408, "step": 4052 }, { "epoch": 0.13908716540837338, "grad_norm": 0.9480223735513167, "learning_rate": 9.691497436893054e-06, "loss": 0.312, "step": 4053 }, { "epoch": 0.13912148249828415, "grad_norm": 0.7595281089495806, "learning_rate": 9.691305220159405e-06, "loss": 0.3692, "step": 4054 }, { "epoch": 0.13915579958819493, "grad_norm": 0.8312375205497932, "learning_rate": 9.691112945470147e-06, "loss": 0.3724, "step": 4055 }, { "epoch": 0.13919011667810569, "grad_norm": 0.8235804489108685, "learning_rate": 9.690920612827658e-06, "loss": 0.3661, "step": 4056 }, { "epoch": 0.13922443376801646, "grad_norm": 0.7128743858381507, "learning_rate": 9.690728222234315e-06, "loss": 0.3675, "step": 4057 }, { "epoch": 0.13925875085792724, "grad_norm": 0.7402534635022884, "learning_rate": 9.690535773692493e-06, "loss": 0.3196, "step": 4058 }, { "epoch": 0.13929306794783802, "grad_norm": 0.8338689639255383, "learning_rate": 9.69034326720457e-06, "loss": 0.4003, "step": 4059 }, { "epoch": 0.1393273850377488, "grad_norm": 0.8602347859464977, "learning_rate": 9.690150702772925e-06, "loss": 0.3193, "step": 4060 }, { "epoch": 0.13936170212765958, "grad_norm": 0.826593608004492, "learning_rate": 9.689958080399936e-06, "loss": 0.3523, "step": 4061 }, { "epoch": 0.13939601921757036, "grad_norm": 0.7904170540279831, "learning_rate": 9.689765400087982e-06, "loss": 0.3611, "step": 4062 }, { "epoch": 0.1394303363074811, "grad_norm": 0.8876843241315321, "learning_rate": 9.689572661839445e-06, "loss": 0.3435, "step": 4063 }, { "epoch": 0.1394646533973919, "grad_norm": 0.849165456630105, "learning_rate": 9.689379865656706e-06, "loss": 0.3588, "step": 4064 }, { "epoch": 0.13949897048730267, "grad_norm": 0.7516337581365327, "learning_rate": 9.689187011542145e-06, "loss": 0.3147, "step": 4065 }, { "epoch": 0.13953328757721345, "grad_norm": 0.7830890617631248, "learning_rate": 9.688994099498147e-06, "loss": 0.3949, "step": 4066 }, { "epoch": 0.13956760466712423, "grad_norm": 0.7922895794561192, "learning_rate": 9.688801129527093e-06, "loss": 0.3132, "step": 4067 }, { "epoch": 0.139601921757035, "grad_norm": 0.8104485448968398, "learning_rate": 9.688608101631368e-06, "loss": 0.3726, "step": 4068 }, { "epoch": 0.1396362388469458, "grad_norm": 0.8655722425190336, "learning_rate": 9.688415015813354e-06, "loss": 0.3477, "step": 4069 }, { "epoch": 0.13967055593685657, "grad_norm": 0.8275159512585133, "learning_rate": 9.688221872075441e-06, "loss": 0.3509, "step": 4070 }, { "epoch": 0.13970487302676732, "grad_norm": 0.8885862458460666, "learning_rate": 9.688028670420011e-06, "loss": 0.3712, "step": 4071 }, { "epoch": 0.1397391901166781, "grad_norm": 2.508239697689049, "learning_rate": 9.687835410849453e-06, "loss": 0.3304, "step": 4072 }, { "epoch": 0.13977350720658888, "grad_norm": 0.8734048816981702, "learning_rate": 9.687642093366155e-06, "loss": 0.35, "step": 4073 }, { "epoch": 0.13980782429649966, "grad_norm": 0.8788328757597881, "learning_rate": 9.687448717972503e-06, "loss": 0.3108, "step": 4074 }, { "epoch": 0.13984214138641043, "grad_norm": 0.9694552850071189, "learning_rate": 9.687255284670889e-06, "loss": 0.3387, "step": 4075 }, { "epoch": 0.1398764584763212, "grad_norm": 0.909088048640463, "learning_rate": 9.6870617934637e-06, "loss": 0.3476, "step": 4076 }, { "epoch": 0.139910775566232, "grad_norm": 0.8133516771383801, "learning_rate": 9.686868244353326e-06, "loss": 0.388, "step": 4077 }, { "epoch": 0.13994509265614277, "grad_norm": 0.7795765646544367, "learning_rate": 9.686674637342159e-06, "loss": 0.3463, "step": 4078 }, { "epoch": 0.13997940974605352, "grad_norm": 0.7566074748028111, "learning_rate": 9.686480972432592e-06, "loss": 0.2784, "step": 4079 }, { "epoch": 0.1400137268359643, "grad_norm": 0.8313476893485403, "learning_rate": 9.686287249627018e-06, "loss": 0.4079, "step": 4080 }, { "epoch": 0.14004804392587508, "grad_norm": 0.8251977881004425, "learning_rate": 9.686093468927827e-06, "loss": 0.3647, "step": 4081 }, { "epoch": 0.14008236101578586, "grad_norm": 0.8283681942835577, "learning_rate": 9.685899630337413e-06, "loss": 0.4203, "step": 4082 }, { "epoch": 0.14011667810569664, "grad_norm": 0.7545821591691111, "learning_rate": 9.685705733858175e-06, "loss": 0.3007, "step": 4083 }, { "epoch": 0.14015099519560742, "grad_norm": 0.7630386007808468, "learning_rate": 9.685511779492504e-06, "loss": 0.3517, "step": 4084 }, { "epoch": 0.1401853122855182, "grad_norm": 0.8709538590515021, "learning_rate": 9.6853177672428e-06, "loss": 0.4212, "step": 4085 }, { "epoch": 0.14021962937542895, "grad_norm": 0.8551843697461796, "learning_rate": 9.685123697111456e-06, "loss": 0.3363, "step": 4086 }, { "epoch": 0.14025394646533973, "grad_norm": 0.8731034475218257, "learning_rate": 9.68492956910087e-06, "loss": 0.3562, "step": 4087 }, { "epoch": 0.1402882635552505, "grad_norm": 0.8563130669667102, "learning_rate": 9.684735383213441e-06, "loss": 0.3332, "step": 4088 }, { "epoch": 0.1403225806451613, "grad_norm": 0.8351181565801459, "learning_rate": 9.684541139451572e-06, "loss": 0.3091, "step": 4089 }, { "epoch": 0.14035689773507207, "grad_norm": 0.7510832985160293, "learning_rate": 9.684346837817655e-06, "loss": 0.321, "step": 4090 }, { "epoch": 0.14039121482498285, "grad_norm": 0.7204961752223404, "learning_rate": 9.684152478314094e-06, "loss": 0.3215, "step": 4091 }, { "epoch": 0.14042553191489363, "grad_norm": 0.8077757378553969, "learning_rate": 9.683958060943293e-06, "loss": 0.3195, "step": 4092 }, { "epoch": 0.1404598490048044, "grad_norm": 0.8085719135681624, "learning_rate": 9.683763585707651e-06, "loss": 0.3543, "step": 4093 }, { "epoch": 0.14049416609471516, "grad_norm": 0.9249021105923585, "learning_rate": 9.683569052609569e-06, "loss": 0.3635, "step": 4094 }, { "epoch": 0.14052848318462594, "grad_norm": 0.8323586624737938, "learning_rate": 9.683374461651452e-06, "loss": 0.3475, "step": 4095 }, { "epoch": 0.14056280027453671, "grad_norm": 0.7438781695231008, "learning_rate": 9.683179812835706e-06, "loss": 0.3167, "step": 4096 }, { "epoch": 0.1405971173644475, "grad_norm": 0.7010380210683392, "learning_rate": 9.68298510616473e-06, "loss": 0.3341, "step": 4097 }, { "epoch": 0.14063143445435827, "grad_norm": 0.7548799901144418, "learning_rate": 9.682790341640935e-06, "loss": 0.3467, "step": 4098 }, { "epoch": 0.14066575154426905, "grad_norm": 0.8480762132446322, "learning_rate": 9.682595519266724e-06, "loss": 0.3495, "step": 4099 }, { "epoch": 0.14070006863417983, "grad_norm": 0.7525861945522923, "learning_rate": 9.682400639044507e-06, "loss": 0.3025, "step": 4100 }, { "epoch": 0.1407343857240906, "grad_norm": 0.8031997511168214, "learning_rate": 9.682205700976687e-06, "loss": 0.33, "step": 4101 }, { "epoch": 0.14076870281400136, "grad_norm": 0.7919001044914488, "learning_rate": 9.682010705065674e-06, "loss": 0.3493, "step": 4102 }, { "epoch": 0.14080301990391214, "grad_norm": 0.855290489177912, "learning_rate": 9.68181565131388e-06, "loss": 0.3909, "step": 4103 }, { "epoch": 0.14083733699382292, "grad_norm": 1.0046856528421602, "learning_rate": 9.68162053972371e-06, "loss": 0.3701, "step": 4104 }, { "epoch": 0.1408716540837337, "grad_norm": 0.7746995080502562, "learning_rate": 9.681425370297577e-06, "loss": 0.2955, "step": 4105 }, { "epoch": 0.14090597117364448, "grad_norm": 0.8448463357025212, "learning_rate": 9.681230143037892e-06, "loss": 0.4026, "step": 4106 }, { "epoch": 0.14094028826355526, "grad_norm": 0.9431581738029969, "learning_rate": 9.681034857947064e-06, "loss": 0.3294, "step": 4107 }, { "epoch": 0.14097460535346604, "grad_norm": 0.9509581595462232, "learning_rate": 9.680839515027512e-06, "loss": 0.4397, "step": 4108 }, { "epoch": 0.1410089224433768, "grad_norm": 0.7905851724823858, "learning_rate": 9.680644114281643e-06, "loss": 0.3694, "step": 4109 }, { "epoch": 0.14104323953328757, "grad_norm": 0.8572819288113676, "learning_rate": 9.680448655711873e-06, "loss": 0.3996, "step": 4110 }, { "epoch": 0.14107755662319835, "grad_norm": 0.7925532389301229, "learning_rate": 9.680253139320617e-06, "loss": 0.3274, "step": 4111 }, { "epoch": 0.14111187371310913, "grad_norm": 0.8673712129320142, "learning_rate": 9.680057565110289e-06, "loss": 0.3771, "step": 4112 }, { "epoch": 0.1411461908030199, "grad_norm": 0.7161077940312903, "learning_rate": 9.679861933083308e-06, "loss": 0.3266, "step": 4113 }, { "epoch": 0.14118050789293068, "grad_norm": 0.85743784456514, "learning_rate": 9.679666243242087e-06, "loss": 0.4258, "step": 4114 }, { "epoch": 0.14121482498284146, "grad_norm": 0.8884325616872777, "learning_rate": 9.679470495589048e-06, "loss": 0.3817, "step": 4115 }, { "epoch": 0.14124914207275224, "grad_norm": 0.8500947186823814, "learning_rate": 9.679274690126604e-06, "loss": 0.4093, "step": 4116 }, { "epoch": 0.141283459162663, "grad_norm": 1.4156627229300787, "learning_rate": 9.679078826857178e-06, "loss": 0.3402, "step": 4117 }, { "epoch": 0.14131777625257377, "grad_norm": 0.8462100837805527, "learning_rate": 9.678882905783189e-06, "loss": 0.3791, "step": 4118 }, { "epoch": 0.14135209334248455, "grad_norm": 0.9280852252386729, "learning_rate": 9.678686926907056e-06, "loss": 0.3115, "step": 4119 }, { "epoch": 0.14138641043239533, "grad_norm": 0.8908367927186475, "learning_rate": 9.6784908902312e-06, "loss": 0.3888, "step": 4120 }, { "epoch": 0.1414207275223061, "grad_norm": 0.9638398648179202, "learning_rate": 9.678294795758045e-06, "loss": 0.3811, "step": 4121 }, { "epoch": 0.1414550446122169, "grad_norm": 0.7779020046055469, "learning_rate": 9.678098643490013e-06, "loss": 0.3402, "step": 4122 }, { "epoch": 0.14148936170212767, "grad_norm": 0.9333519883240604, "learning_rate": 9.677902433429525e-06, "loss": 0.357, "step": 4123 }, { "epoch": 0.14152367879203845, "grad_norm": 0.80395229259005, "learning_rate": 9.677706165579005e-06, "loss": 0.3686, "step": 4124 }, { "epoch": 0.1415579958819492, "grad_norm": 0.9049392904110555, "learning_rate": 9.677509839940882e-06, "loss": 0.3197, "step": 4125 }, { "epoch": 0.14159231297185998, "grad_norm": 0.7702160549563951, "learning_rate": 9.677313456517577e-06, "loss": 0.3018, "step": 4126 }, { "epoch": 0.14162663006177076, "grad_norm": 0.7453218830696892, "learning_rate": 9.677117015311517e-06, "loss": 0.3392, "step": 4127 }, { "epoch": 0.14166094715168154, "grad_norm": 1.0032081890396447, "learning_rate": 9.67692051632513e-06, "loss": 0.392, "step": 4128 }, { "epoch": 0.14169526424159232, "grad_norm": 0.7274268624403355, "learning_rate": 9.676723959560844e-06, "loss": 0.3106, "step": 4129 }, { "epoch": 0.1417295813315031, "grad_norm": 0.777983699217554, "learning_rate": 9.676527345021084e-06, "loss": 0.3137, "step": 4130 }, { "epoch": 0.14176389842141388, "grad_norm": 0.9050625869232153, "learning_rate": 9.67633067270828e-06, "loss": 0.3591, "step": 4131 }, { "epoch": 0.14179821551132463, "grad_norm": 0.8530548283108426, "learning_rate": 9.676133942624866e-06, "loss": 0.4358, "step": 4132 }, { "epoch": 0.1418325326012354, "grad_norm": 0.8149878930466851, "learning_rate": 9.675937154773266e-06, "loss": 0.3392, "step": 4133 }, { "epoch": 0.14186684969114619, "grad_norm": 0.9012568639461416, "learning_rate": 9.675740309155915e-06, "loss": 0.418, "step": 4134 }, { "epoch": 0.14190116678105696, "grad_norm": 0.8225449763450987, "learning_rate": 9.675543405775243e-06, "loss": 0.3251, "step": 4135 }, { "epoch": 0.14193548387096774, "grad_norm": 0.8824372993983778, "learning_rate": 9.675346444633686e-06, "loss": 0.3541, "step": 4136 }, { "epoch": 0.14196980096087852, "grad_norm": 0.8117469392846887, "learning_rate": 9.675149425733673e-06, "loss": 0.3938, "step": 4137 }, { "epoch": 0.1420041180507893, "grad_norm": 0.7471630087452911, "learning_rate": 9.67495234907764e-06, "loss": 0.391, "step": 4138 }, { "epoch": 0.14203843514070008, "grad_norm": 0.8562818279883028, "learning_rate": 9.67475521466802e-06, "loss": 0.3111, "step": 4139 }, { "epoch": 0.14207275223061083, "grad_norm": 0.9862857072727697, "learning_rate": 9.67455802250725e-06, "loss": 0.3207, "step": 4140 }, { "epoch": 0.1421070693205216, "grad_norm": 0.7871311212458757, "learning_rate": 9.674360772597766e-06, "loss": 0.4406, "step": 4141 }, { "epoch": 0.1421413864104324, "grad_norm": 0.8331199703766512, "learning_rate": 9.674163464942004e-06, "loss": 0.36, "step": 4142 }, { "epoch": 0.14217570350034317, "grad_norm": 0.8222937615012348, "learning_rate": 9.673966099542402e-06, "loss": 0.3885, "step": 4143 }, { "epoch": 0.14221002059025395, "grad_norm": 0.7345563985873651, "learning_rate": 9.673768676401399e-06, "loss": 0.3351, "step": 4144 }, { "epoch": 0.14224433768016473, "grad_norm": 0.8762541559204964, "learning_rate": 9.673571195521432e-06, "loss": 0.3642, "step": 4145 }, { "epoch": 0.1422786547700755, "grad_norm": 0.8564625605262913, "learning_rate": 9.673373656904943e-06, "loss": 0.3637, "step": 4146 }, { "epoch": 0.1423129718599863, "grad_norm": 0.9333950798111097, "learning_rate": 9.67317606055437e-06, "loss": 0.3734, "step": 4147 }, { "epoch": 0.14234728894989704, "grad_norm": 0.955491187717378, "learning_rate": 9.672978406472155e-06, "loss": 0.3658, "step": 4148 }, { "epoch": 0.14238160603980782, "grad_norm": 0.9626995255896627, "learning_rate": 9.672780694660741e-06, "loss": 0.3036, "step": 4149 }, { "epoch": 0.1424159231297186, "grad_norm": 0.7791132536288968, "learning_rate": 9.672582925122568e-06, "loss": 0.3211, "step": 4150 }, { "epoch": 0.14245024021962938, "grad_norm": 0.899299544160242, "learning_rate": 9.672385097860083e-06, "loss": 0.3868, "step": 4151 }, { "epoch": 0.14248455730954016, "grad_norm": 0.8851935575471184, "learning_rate": 9.672187212875725e-06, "loss": 0.3377, "step": 4152 }, { "epoch": 0.14251887439945093, "grad_norm": 0.8330468515781498, "learning_rate": 9.671989270171943e-06, "loss": 0.3096, "step": 4153 }, { "epoch": 0.1425531914893617, "grad_norm": 0.84200694262363, "learning_rate": 9.67179126975118e-06, "loss": 0.3302, "step": 4154 }, { "epoch": 0.14258750857927247, "grad_norm": 0.8314047962286678, "learning_rate": 9.671593211615881e-06, "loss": 0.2995, "step": 4155 }, { "epoch": 0.14262182566918324, "grad_norm": 0.8491754599796019, "learning_rate": 9.671395095768496e-06, "loss": 0.326, "step": 4156 }, { "epoch": 0.14265614275909402, "grad_norm": 0.8043681073374515, "learning_rate": 9.671196922211472e-06, "loss": 0.3642, "step": 4157 }, { "epoch": 0.1426904598490048, "grad_norm": 0.9572300598222323, "learning_rate": 9.670998690947256e-06, "loss": 0.3908, "step": 4158 }, { "epoch": 0.14272477693891558, "grad_norm": 0.849585782058965, "learning_rate": 9.670800401978295e-06, "loss": 0.3469, "step": 4159 }, { "epoch": 0.14275909402882636, "grad_norm": 0.7504448578302261, "learning_rate": 9.67060205530704e-06, "loss": 0.3574, "step": 4160 }, { "epoch": 0.14279341111873714, "grad_norm": 0.8295923910519732, "learning_rate": 9.670403650935946e-06, "loss": 0.3184, "step": 4161 }, { "epoch": 0.14282772820864792, "grad_norm": 0.7918983454758574, "learning_rate": 9.670205188867456e-06, "loss": 0.2805, "step": 4162 }, { "epoch": 0.14286204529855867, "grad_norm": 0.7896520610667501, "learning_rate": 9.670006669104028e-06, "loss": 0.3721, "step": 4163 }, { "epoch": 0.14289636238846945, "grad_norm": 0.8736469773600777, "learning_rate": 9.669808091648113e-06, "loss": 0.3566, "step": 4164 }, { "epoch": 0.14293067947838023, "grad_norm": 0.7829093032983294, "learning_rate": 9.669609456502162e-06, "loss": 0.3926, "step": 4165 }, { "epoch": 0.142964996568291, "grad_norm": 0.8087533851952897, "learning_rate": 9.66941076366863e-06, "loss": 0.386, "step": 4166 }, { "epoch": 0.1429993136582018, "grad_norm": 0.8973060504121981, "learning_rate": 9.669212013149973e-06, "loss": 0.3804, "step": 4167 }, { "epoch": 0.14303363074811257, "grad_norm": 0.7798508941156381, "learning_rate": 9.669013204948645e-06, "loss": 0.3323, "step": 4168 }, { "epoch": 0.14306794783802335, "grad_norm": 0.8593084248546586, "learning_rate": 9.668814339067104e-06, "loss": 0.3187, "step": 4169 }, { "epoch": 0.1431022649279341, "grad_norm": 0.7897839575206659, "learning_rate": 9.668615415507803e-06, "loss": 0.3397, "step": 4170 }, { "epoch": 0.14313658201784488, "grad_norm": 0.7220957334434934, "learning_rate": 9.668416434273202e-06, "loss": 0.3268, "step": 4171 }, { "epoch": 0.14317089910775566, "grad_norm": 0.8836943841421461, "learning_rate": 9.66821739536576e-06, "loss": 0.3621, "step": 4172 }, { "epoch": 0.14320521619766644, "grad_norm": 0.82291401977347, "learning_rate": 9.668018298787933e-06, "loss": 0.322, "step": 4173 }, { "epoch": 0.14323953328757721, "grad_norm": 0.7685528498611814, "learning_rate": 9.667819144542182e-06, "loss": 0.3616, "step": 4174 }, { "epoch": 0.143273850377488, "grad_norm": 0.8281344242462304, "learning_rate": 9.66761993263097e-06, "loss": 0.3419, "step": 4175 }, { "epoch": 0.14330816746739877, "grad_norm": 0.8078665334723373, "learning_rate": 9.667420663056754e-06, "loss": 0.3277, "step": 4176 }, { "epoch": 0.14334248455730955, "grad_norm": 0.9454865735315648, "learning_rate": 9.667221335822e-06, "loss": 0.3699, "step": 4177 }, { "epoch": 0.1433768016472203, "grad_norm": 0.7938798912684428, "learning_rate": 9.667021950929165e-06, "loss": 0.3681, "step": 4178 }, { "epoch": 0.14341111873713108, "grad_norm": 0.7833973433646674, "learning_rate": 9.666822508380715e-06, "loss": 0.3694, "step": 4179 }, { "epoch": 0.14344543582704186, "grad_norm": 0.7654322942944841, "learning_rate": 9.666623008179114e-06, "loss": 0.3962, "step": 4180 }, { "epoch": 0.14347975291695264, "grad_norm": 0.7933444610552227, "learning_rate": 9.666423450326827e-06, "loss": 0.318, "step": 4181 }, { "epoch": 0.14351407000686342, "grad_norm": 0.7678272794633694, "learning_rate": 9.666223834826318e-06, "loss": 0.2999, "step": 4182 }, { "epoch": 0.1435483870967742, "grad_norm": 0.819245962745865, "learning_rate": 9.666024161680056e-06, "loss": 0.4051, "step": 4183 }, { "epoch": 0.14358270418668498, "grad_norm": 0.8163793724051532, "learning_rate": 9.665824430890504e-06, "loss": 0.3378, "step": 4184 }, { "epoch": 0.14361702127659576, "grad_norm": 0.9317493008414007, "learning_rate": 9.665624642460132e-06, "loss": 0.364, "step": 4185 }, { "epoch": 0.1436513383665065, "grad_norm": 0.9132417633698998, "learning_rate": 9.665424796391405e-06, "loss": 0.3837, "step": 4186 }, { "epoch": 0.1436856554564173, "grad_norm": 0.8277171333437623, "learning_rate": 9.665224892686796e-06, "loss": 0.3401, "step": 4187 }, { "epoch": 0.14371997254632807, "grad_norm": 0.951270975437776, "learning_rate": 9.665024931348772e-06, "loss": 0.3722, "step": 4188 }, { "epoch": 0.14375428963623885, "grad_norm": 0.7767080846095186, "learning_rate": 9.664824912379803e-06, "loss": 0.3602, "step": 4189 }, { "epoch": 0.14378860672614963, "grad_norm": 0.7682513507678636, "learning_rate": 9.664624835782362e-06, "loss": 0.3426, "step": 4190 }, { "epoch": 0.1438229238160604, "grad_norm": 0.831809502321174, "learning_rate": 9.664424701558919e-06, "loss": 0.3096, "step": 4191 }, { "epoch": 0.14385724090597118, "grad_norm": 0.899277691715548, "learning_rate": 9.664224509711948e-06, "loss": 0.3419, "step": 4192 }, { "epoch": 0.14389155799588194, "grad_norm": 0.8839123085608304, "learning_rate": 9.664024260243919e-06, "loss": 0.3164, "step": 4193 }, { "epoch": 0.14392587508579271, "grad_norm": 0.7298187228081533, "learning_rate": 9.663823953157309e-06, "loss": 0.3641, "step": 4194 }, { "epoch": 0.1439601921757035, "grad_norm": 0.8565553508438621, "learning_rate": 9.663623588454593e-06, "loss": 0.3623, "step": 4195 }, { "epoch": 0.14399450926561427, "grad_norm": 1.0883554502509312, "learning_rate": 9.663423166138244e-06, "loss": 0.3241, "step": 4196 }, { "epoch": 0.14402882635552505, "grad_norm": 0.6899866409755547, "learning_rate": 9.663222686210738e-06, "loss": 0.329, "step": 4197 }, { "epoch": 0.14406314344543583, "grad_norm": 0.872499780974478, "learning_rate": 9.663022148674552e-06, "loss": 0.4117, "step": 4198 }, { "epoch": 0.1440974605353466, "grad_norm": 0.7561955048841748, "learning_rate": 9.662821553532166e-06, "loss": 0.3273, "step": 4199 }, { "epoch": 0.1441317776252574, "grad_norm": 0.8467725410077748, "learning_rate": 9.662620900786056e-06, "loss": 0.3051, "step": 4200 }, { "epoch": 0.14416609471516814, "grad_norm": 0.7984635544686257, "learning_rate": 9.662420190438698e-06, "loss": 0.4217, "step": 4201 }, { "epoch": 0.14420041180507892, "grad_norm": 0.9006663489780021, "learning_rate": 9.662219422492576e-06, "loss": 0.353, "step": 4202 }, { "epoch": 0.1442347288949897, "grad_norm": 0.8324446295633383, "learning_rate": 9.662018596950169e-06, "loss": 0.362, "step": 4203 }, { "epoch": 0.14426904598490048, "grad_norm": 1.0773586927170795, "learning_rate": 9.661817713813958e-06, "loss": 0.3486, "step": 4204 }, { "epoch": 0.14430336307481126, "grad_norm": 0.8262059831219438, "learning_rate": 9.661616773086423e-06, "loss": 0.3384, "step": 4205 }, { "epoch": 0.14433768016472204, "grad_norm": 0.819276043548866, "learning_rate": 9.661415774770049e-06, "loss": 0.3339, "step": 4206 }, { "epoch": 0.14437199725463282, "grad_norm": 0.9073661891633396, "learning_rate": 9.661214718867318e-06, "loss": 0.4068, "step": 4207 }, { "epoch": 0.1444063143445436, "grad_norm": 0.8192885401487864, "learning_rate": 9.661013605380712e-06, "loss": 0.3584, "step": 4208 }, { "epoch": 0.14444063143445435, "grad_norm": 0.7867017422541186, "learning_rate": 9.660812434312719e-06, "loss": 0.338, "step": 4209 }, { "epoch": 0.14447494852436513, "grad_norm": 0.8253231205335675, "learning_rate": 9.660611205665819e-06, "loss": 0.3424, "step": 4210 }, { "epoch": 0.1445092656142759, "grad_norm": 0.6773525751437083, "learning_rate": 9.660409919442505e-06, "loss": 0.3528, "step": 4211 }, { "epoch": 0.14454358270418668, "grad_norm": 0.7711298708498971, "learning_rate": 9.660208575645258e-06, "loss": 0.3094, "step": 4212 }, { "epoch": 0.14457789979409746, "grad_norm": 0.8610946639453738, "learning_rate": 9.66000717427657e-06, "loss": 0.3625, "step": 4213 }, { "epoch": 0.14461221688400824, "grad_norm": 0.8355463096357394, "learning_rate": 9.659805715338923e-06, "loss": 0.3862, "step": 4214 }, { "epoch": 0.14464653397391902, "grad_norm": 0.857549319592278, "learning_rate": 9.65960419883481e-06, "loss": 0.336, "step": 4215 }, { "epoch": 0.14468085106382977, "grad_norm": 0.771464892548624, "learning_rate": 9.659402624766721e-06, "loss": 0.2821, "step": 4216 }, { "epoch": 0.14471516815374055, "grad_norm": 0.9202862911615366, "learning_rate": 9.659200993137143e-06, "loss": 0.3844, "step": 4217 }, { "epoch": 0.14474948524365133, "grad_norm": 1.032674979942829, "learning_rate": 9.65899930394857e-06, "loss": 0.3566, "step": 4218 }, { "epoch": 0.1447838023335621, "grad_norm": 0.7570790535844356, "learning_rate": 9.658797557203492e-06, "loss": 0.3105, "step": 4219 }, { "epoch": 0.1448181194234729, "grad_norm": 0.8641537666137403, "learning_rate": 9.658595752904402e-06, "loss": 0.3676, "step": 4220 }, { "epoch": 0.14485243651338367, "grad_norm": 0.8340346839342826, "learning_rate": 9.658393891053795e-06, "loss": 0.3526, "step": 4221 }, { "epoch": 0.14488675360329445, "grad_norm": 0.811703839370573, "learning_rate": 9.65819197165416e-06, "loss": 0.3352, "step": 4222 }, { "epoch": 0.14492107069320523, "grad_norm": 0.7379997262534204, "learning_rate": 9.657989994707995e-06, "loss": 0.3237, "step": 4223 }, { "epoch": 0.14495538778311598, "grad_norm": 0.8239013494847381, "learning_rate": 9.657787960217793e-06, "loss": 0.4125, "step": 4224 }, { "epoch": 0.14498970487302676, "grad_norm": 0.8172470627157724, "learning_rate": 9.657585868186053e-06, "loss": 0.3582, "step": 4225 }, { "epoch": 0.14502402196293754, "grad_norm": 0.7459824915743777, "learning_rate": 9.65738371861527e-06, "loss": 0.325, "step": 4226 }, { "epoch": 0.14505833905284832, "grad_norm": 0.7621563205415913, "learning_rate": 9.65718151150794e-06, "loss": 0.347, "step": 4227 }, { "epoch": 0.1450926561427591, "grad_norm": 0.8350744983893549, "learning_rate": 9.656979246866562e-06, "loss": 0.3976, "step": 4228 }, { "epoch": 0.14512697323266988, "grad_norm": 0.8028700190056046, "learning_rate": 9.656776924693635e-06, "loss": 0.3745, "step": 4229 }, { "epoch": 0.14516129032258066, "grad_norm": 0.9376137168315798, "learning_rate": 9.65657454499166e-06, "loss": 0.3237, "step": 4230 }, { "epoch": 0.14519560741249143, "grad_norm": 0.8868459711099189, "learning_rate": 9.656372107763133e-06, "loss": 0.3458, "step": 4231 }, { "epoch": 0.14522992450240219, "grad_norm": 0.8307399953015782, "learning_rate": 9.656169613010559e-06, "loss": 0.346, "step": 4232 }, { "epoch": 0.14526424159231296, "grad_norm": 0.780705990507259, "learning_rate": 9.655967060736438e-06, "loss": 0.3412, "step": 4233 }, { "epoch": 0.14529855868222374, "grad_norm": 0.8505217918260011, "learning_rate": 9.655764450943272e-06, "loss": 0.3612, "step": 4234 }, { "epoch": 0.14533287577213452, "grad_norm": 0.8340716282380892, "learning_rate": 9.655561783633564e-06, "loss": 0.3393, "step": 4235 }, { "epoch": 0.1453671928620453, "grad_norm": 0.8866977017491356, "learning_rate": 9.655359058809818e-06, "loss": 0.3941, "step": 4236 }, { "epoch": 0.14540150995195608, "grad_norm": 0.7855383535174961, "learning_rate": 9.655156276474539e-06, "loss": 0.3576, "step": 4237 }, { "epoch": 0.14543582704186686, "grad_norm": 0.835938333464289, "learning_rate": 9.65495343663023e-06, "loss": 0.4173, "step": 4238 }, { "epoch": 0.1454701441317776, "grad_norm": 0.840648060760225, "learning_rate": 9.6547505392794e-06, "loss": 0.3036, "step": 4239 }, { "epoch": 0.1455044612216884, "grad_norm": 0.8489158279136724, "learning_rate": 9.654547584424554e-06, "loss": 0.3837, "step": 4240 }, { "epoch": 0.14553877831159917, "grad_norm": 0.9430189823832883, "learning_rate": 9.6543445720682e-06, "loss": 0.3251, "step": 4241 }, { "epoch": 0.14557309540150995, "grad_norm": 0.7577674973997902, "learning_rate": 9.654141502212845e-06, "loss": 0.2957, "step": 4242 }, { "epoch": 0.14560741249142073, "grad_norm": 0.6881873425708337, "learning_rate": 9.653938374860997e-06, "loss": 0.2829, "step": 4243 }, { "epoch": 0.1456417295813315, "grad_norm": 0.8432594985933374, "learning_rate": 9.653735190015165e-06, "loss": 0.3449, "step": 4244 }, { "epoch": 0.1456760466712423, "grad_norm": 0.7302376700209855, "learning_rate": 9.653531947677863e-06, "loss": 0.3396, "step": 4245 }, { "epoch": 0.14571036376115307, "grad_norm": 0.8469322859578724, "learning_rate": 9.653328647851599e-06, "loss": 0.3361, "step": 4246 }, { "epoch": 0.14574468085106382, "grad_norm": 0.8039315343343769, "learning_rate": 9.653125290538883e-06, "loss": 0.3326, "step": 4247 }, { "epoch": 0.1457789979409746, "grad_norm": 0.7735859401587456, "learning_rate": 9.65292187574223e-06, "loss": 0.3246, "step": 4248 }, { "epoch": 0.14581331503088538, "grad_norm": 0.807840129700063, "learning_rate": 9.652718403464154e-06, "loss": 0.3436, "step": 4249 }, { "epoch": 0.14584763212079616, "grad_norm": 0.9152674275019083, "learning_rate": 9.652514873707164e-06, "loss": 0.4047, "step": 4250 }, { "epoch": 0.14588194921070693, "grad_norm": 2.0599549678312075, "learning_rate": 9.652311286473777e-06, "loss": 0.3619, "step": 4251 }, { "epoch": 0.14591626630061771, "grad_norm": 0.8925658773080727, "learning_rate": 9.65210764176651e-06, "loss": 0.4055, "step": 4252 }, { "epoch": 0.1459505833905285, "grad_norm": 0.8097500507914739, "learning_rate": 9.651903939587876e-06, "loss": 0.2781, "step": 4253 }, { "epoch": 0.14598490048043927, "grad_norm": 0.8644723801455926, "learning_rate": 9.651700179940392e-06, "loss": 0.3424, "step": 4254 }, { "epoch": 0.14601921757035002, "grad_norm": 0.8238823216669212, "learning_rate": 9.651496362826576e-06, "loss": 0.3386, "step": 4255 }, { "epoch": 0.1460535346602608, "grad_norm": 0.8212894218923499, "learning_rate": 9.651292488248944e-06, "loss": 0.3514, "step": 4256 }, { "epoch": 0.14608785175017158, "grad_norm": 0.8042267647796472, "learning_rate": 9.65108855621002e-06, "loss": 0.3239, "step": 4257 }, { "epoch": 0.14612216884008236, "grad_norm": 0.7262650438945376, "learning_rate": 9.650884566712317e-06, "loss": 0.329, "step": 4258 }, { "epoch": 0.14615648592999314, "grad_norm": 0.8308110881187458, "learning_rate": 9.650680519758357e-06, "loss": 0.3494, "step": 4259 }, { "epoch": 0.14619080301990392, "grad_norm": 0.8699056086448038, "learning_rate": 9.650476415350662e-06, "loss": 0.374, "step": 4260 }, { "epoch": 0.1462251201098147, "grad_norm": 0.7511456537895502, "learning_rate": 9.650272253491753e-06, "loss": 0.3375, "step": 4261 }, { "epoch": 0.14625943719972545, "grad_norm": 0.9166364714596205, "learning_rate": 9.650068034184153e-06, "loss": 0.3591, "step": 4262 }, { "epoch": 0.14629375428963623, "grad_norm": 0.8478903243974613, "learning_rate": 9.649863757430383e-06, "loss": 0.3776, "step": 4263 }, { "epoch": 0.146328071379547, "grad_norm": 0.7965573299736922, "learning_rate": 9.649659423232966e-06, "loss": 0.3621, "step": 4264 }, { "epoch": 0.1463623884694578, "grad_norm": 0.9562636852008639, "learning_rate": 9.64945503159443e-06, "loss": 0.352, "step": 4265 }, { "epoch": 0.14639670555936857, "grad_norm": 0.7803687275182734, "learning_rate": 9.649250582517298e-06, "loss": 0.3631, "step": 4266 }, { "epoch": 0.14643102264927935, "grad_norm": 0.7560707640831501, "learning_rate": 9.649046076004093e-06, "loss": 0.3588, "step": 4267 }, { "epoch": 0.14646533973919013, "grad_norm": 0.7684663674600504, "learning_rate": 9.648841512057345e-06, "loss": 0.3699, "step": 4268 }, { "epoch": 0.1464996568291009, "grad_norm": 0.8327898669412447, "learning_rate": 9.64863689067958e-06, "loss": 0.3685, "step": 4269 }, { "epoch": 0.14653397391901166, "grad_norm": 0.7404843174790923, "learning_rate": 9.648432211873328e-06, "loss": 0.3611, "step": 4270 }, { "epoch": 0.14656829100892244, "grad_norm": 0.8301536065378115, "learning_rate": 9.648227475641115e-06, "loss": 0.409, "step": 4271 }, { "epoch": 0.14660260809883321, "grad_norm": 0.9035465258853828, "learning_rate": 9.648022681985469e-06, "loss": 0.3869, "step": 4272 }, { "epoch": 0.146636925188744, "grad_norm": 0.7891172967613277, "learning_rate": 9.647817830908924e-06, "loss": 0.3658, "step": 4273 }, { "epoch": 0.14667124227865477, "grad_norm": 0.8205596734894742, "learning_rate": 9.647612922414008e-06, "loss": 0.3741, "step": 4274 }, { "epoch": 0.14670555936856555, "grad_norm": 0.8828264502416665, "learning_rate": 9.647407956503253e-06, "loss": 0.3328, "step": 4275 }, { "epoch": 0.14673987645847633, "grad_norm": 0.7821013829802218, "learning_rate": 9.64720293317919e-06, "loss": 0.4075, "step": 4276 }, { "epoch": 0.14677419354838708, "grad_norm": 0.8507273172104673, "learning_rate": 9.646997852444354e-06, "loss": 0.3585, "step": 4277 }, { "epoch": 0.14680851063829786, "grad_norm": 0.7403731050893028, "learning_rate": 9.646792714301275e-06, "loss": 0.3153, "step": 4278 }, { "epoch": 0.14684282772820864, "grad_norm": 0.8837642051791802, "learning_rate": 9.646587518752492e-06, "loss": 0.2917, "step": 4279 }, { "epoch": 0.14687714481811942, "grad_norm": 0.8723171487259133, "learning_rate": 9.646382265800537e-06, "loss": 0.3593, "step": 4280 }, { "epoch": 0.1469114619080302, "grad_norm": 0.9198689388437716, "learning_rate": 9.646176955447945e-06, "loss": 0.354, "step": 4281 }, { "epoch": 0.14694577899794098, "grad_norm": 0.7848754697497654, "learning_rate": 9.645971587697256e-06, "loss": 0.3421, "step": 4282 }, { "epoch": 0.14698009608785176, "grad_norm": 0.7393242351590595, "learning_rate": 9.645766162551003e-06, "loss": 0.3546, "step": 4283 }, { "epoch": 0.14701441317776254, "grad_norm": 0.8649898932616422, "learning_rate": 9.645560680011728e-06, "loss": 0.3769, "step": 4284 }, { "epoch": 0.1470487302676733, "grad_norm": 0.8403841843778374, "learning_rate": 9.645355140081964e-06, "loss": 0.418, "step": 4285 }, { "epoch": 0.14708304735758407, "grad_norm": 0.8822250164316242, "learning_rate": 9.645149542764255e-06, "loss": 0.3179, "step": 4286 }, { "epoch": 0.14711736444749485, "grad_norm": 0.8604388042425948, "learning_rate": 9.644943888061139e-06, "loss": 0.3877, "step": 4287 }, { "epoch": 0.14715168153740563, "grad_norm": 0.8846747530123639, "learning_rate": 9.644738175975155e-06, "loss": 0.3993, "step": 4288 }, { "epoch": 0.1471859986273164, "grad_norm": 0.7386893505066845, "learning_rate": 9.644532406508846e-06, "loss": 0.3525, "step": 4289 }, { "epoch": 0.14722031571722718, "grad_norm": 0.7909974765140532, "learning_rate": 9.644326579664756e-06, "loss": 0.2874, "step": 4290 }, { "epoch": 0.14725463280713796, "grad_norm": 0.7918910835863991, "learning_rate": 9.644120695445425e-06, "loss": 0.3355, "step": 4291 }, { "epoch": 0.14728894989704874, "grad_norm": 0.7289494466032496, "learning_rate": 9.643914753853398e-06, "loss": 0.3204, "step": 4292 }, { "epoch": 0.1473232669869595, "grad_norm": 0.816308007748621, "learning_rate": 9.643708754891218e-06, "loss": 0.3503, "step": 4293 }, { "epoch": 0.14735758407687027, "grad_norm": 0.918008265430801, "learning_rate": 9.643502698561432e-06, "loss": 0.374, "step": 4294 }, { "epoch": 0.14739190116678105, "grad_norm": 0.838009221221612, "learning_rate": 9.643296584866582e-06, "loss": 0.3492, "step": 4295 }, { "epoch": 0.14742621825669183, "grad_norm": 0.8927796146226128, "learning_rate": 9.643090413809215e-06, "loss": 0.3628, "step": 4296 }, { "epoch": 0.1474605353466026, "grad_norm": 0.8295649699223046, "learning_rate": 9.642884185391882e-06, "loss": 0.3343, "step": 4297 }, { "epoch": 0.1474948524365134, "grad_norm": 0.8741817402067998, "learning_rate": 9.642677899617126e-06, "loss": 0.3965, "step": 4298 }, { "epoch": 0.14752916952642417, "grad_norm": 0.9971367924006183, "learning_rate": 9.642471556487499e-06, "loss": 0.3585, "step": 4299 }, { "epoch": 0.14756348661633492, "grad_norm": 0.8629412174596747, "learning_rate": 9.642265156005547e-06, "loss": 0.3904, "step": 4300 }, { "epoch": 0.1475978037062457, "grad_norm": 0.7310619457099866, "learning_rate": 9.642058698173822e-06, "loss": 0.4039, "step": 4301 }, { "epoch": 0.14763212079615648, "grad_norm": 0.8120741923738276, "learning_rate": 9.641852182994875e-06, "loss": 0.3524, "step": 4302 }, { "epoch": 0.14766643788606726, "grad_norm": 0.7895784484020488, "learning_rate": 9.641645610471255e-06, "loss": 0.3406, "step": 4303 }, { "epoch": 0.14770075497597804, "grad_norm": 0.8269768910517433, "learning_rate": 9.641438980605516e-06, "loss": 0.3991, "step": 4304 }, { "epoch": 0.14773507206588882, "grad_norm": 0.7526618802118966, "learning_rate": 9.641232293400208e-06, "loss": 0.3596, "step": 4305 }, { "epoch": 0.1477693891557996, "grad_norm": 0.8285470154186937, "learning_rate": 9.641025548857888e-06, "loss": 0.3869, "step": 4306 }, { "epoch": 0.14780370624571038, "grad_norm": 0.8873871747992212, "learning_rate": 9.640818746981107e-06, "loss": 0.413, "step": 4307 }, { "epoch": 0.14783802333562113, "grad_norm": 0.7572449527867418, "learning_rate": 9.640611887772422e-06, "loss": 0.3298, "step": 4308 }, { "epoch": 0.1478723404255319, "grad_norm": 0.6987463602088806, "learning_rate": 9.640404971234385e-06, "loss": 0.3261, "step": 4309 }, { "epoch": 0.14790665751544269, "grad_norm": 0.7665784685995557, "learning_rate": 9.640197997369559e-06, "loss": 0.3268, "step": 4310 }, { "epoch": 0.14794097460535346, "grad_norm": 0.7333103526142961, "learning_rate": 9.639990966180493e-06, "loss": 0.3355, "step": 4311 }, { "epoch": 0.14797529169526424, "grad_norm": 0.7444969240729827, "learning_rate": 9.63978387766975e-06, "loss": 0.3247, "step": 4312 }, { "epoch": 0.14800960878517502, "grad_norm": 0.7650740391559632, "learning_rate": 9.639576731839888e-06, "loss": 0.3373, "step": 4313 }, { "epoch": 0.1480439258750858, "grad_norm": 0.8370004295281066, "learning_rate": 9.639369528693462e-06, "loss": 0.3779, "step": 4314 }, { "epoch": 0.14807824296499658, "grad_norm": 0.838005915906798, "learning_rate": 9.639162268233036e-06, "loss": 0.3522, "step": 4315 }, { "epoch": 0.14811256005490733, "grad_norm": 2.284157335450502, "learning_rate": 9.638954950461169e-06, "loss": 0.4025, "step": 4316 }, { "epoch": 0.1481468771448181, "grad_norm": 0.7455905584915035, "learning_rate": 9.638747575380421e-06, "loss": 0.296, "step": 4317 }, { "epoch": 0.1481811942347289, "grad_norm": 0.9464322754386217, "learning_rate": 9.638540142993355e-06, "loss": 0.357, "step": 4318 }, { "epoch": 0.14821551132463967, "grad_norm": 0.7372631175012065, "learning_rate": 9.638332653302536e-06, "loss": 0.3362, "step": 4319 }, { "epoch": 0.14824982841455045, "grad_norm": 0.9059355627931354, "learning_rate": 9.638125106310523e-06, "loss": 0.326, "step": 4320 }, { "epoch": 0.14828414550446123, "grad_norm": 0.8214512022282464, "learning_rate": 9.637917502019881e-06, "loss": 0.2928, "step": 4321 }, { "epoch": 0.148318462594372, "grad_norm": 0.7417816060530762, "learning_rate": 9.637709840433175e-06, "loss": 0.3179, "step": 4322 }, { "epoch": 0.14835277968428276, "grad_norm": 0.8012451433157761, "learning_rate": 9.637502121552974e-06, "loss": 0.3715, "step": 4323 }, { "epoch": 0.14838709677419354, "grad_norm": 0.7451693608292279, "learning_rate": 9.637294345381838e-06, "loss": 0.3377, "step": 4324 }, { "epoch": 0.14842141386410432, "grad_norm": 0.7827308282153376, "learning_rate": 9.63708651192234e-06, "loss": 0.3213, "step": 4325 }, { "epoch": 0.1484557309540151, "grad_norm": 0.7197608147710333, "learning_rate": 9.636878621177042e-06, "loss": 0.3016, "step": 4326 }, { "epoch": 0.14849004804392588, "grad_norm": 0.8109102045510482, "learning_rate": 9.636670673148516e-06, "loss": 0.3748, "step": 4327 }, { "epoch": 0.14852436513383666, "grad_norm": 0.8627391172690211, "learning_rate": 9.636462667839329e-06, "loss": 0.3878, "step": 4328 }, { "epoch": 0.14855868222374743, "grad_norm": 0.8866946085250315, "learning_rate": 9.636254605252052e-06, "loss": 0.3254, "step": 4329 }, { "epoch": 0.14859299931365821, "grad_norm": 0.7913521585748899, "learning_rate": 9.636046485389254e-06, "loss": 0.3295, "step": 4330 }, { "epoch": 0.14862731640356897, "grad_norm": 0.7919909465098259, "learning_rate": 9.635838308253507e-06, "loss": 0.39, "step": 4331 }, { "epoch": 0.14866163349347974, "grad_norm": 0.9087356311113893, "learning_rate": 9.635630073847383e-06, "loss": 0.3651, "step": 4332 }, { "epoch": 0.14869595058339052, "grad_norm": 0.9268157287264317, "learning_rate": 9.635421782173453e-06, "loss": 0.4142, "step": 4333 }, { "epoch": 0.1487302676733013, "grad_norm": 0.7578463281154298, "learning_rate": 9.635213433234292e-06, "loss": 0.348, "step": 4334 }, { "epoch": 0.14876458476321208, "grad_norm": 1.2946433777226136, "learning_rate": 9.635005027032474e-06, "loss": 0.3333, "step": 4335 }, { "epoch": 0.14879890185312286, "grad_norm": 0.8564406216367172, "learning_rate": 9.634796563570572e-06, "loss": 0.3678, "step": 4336 }, { "epoch": 0.14883321894303364, "grad_norm": 0.7671508802202849, "learning_rate": 9.634588042851161e-06, "loss": 0.3458, "step": 4337 }, { "epoch": 0.14886753603294442, "grad_norm": 0.8680062031655567, "learning_rate": 9.63437946487682e-06, "loss": 0.3462, "step": 4338 }, { "epoch": 0.14890185312285517, "grad_norm": 0.7636853795246537, "learning_rate": 9.634170829650123e-06, "loss": 0.3983, "step": 4339 }, { "epoch": 0.14893617021276595, "grad_norm": 0.766569472398937, "learning_rate": 9.633962137173647e-06, "loss": 0.3293, "step": 4340 }, { "epoch": 0.14897048730267673, "grad_norm": 0.8053048433754284, "learning_rate": 9.633753387449974e-06, "loss": 0.3561, "step": 4341 }, { "epoch": 0.1490048043925875, "grad_norm": 0.9430251080064879, "learning_rate": 9.63354458048168e-06, "loss": 0.3651, "step": 4342 }, { "epoch": 0.1490391214824983, "grad_norm": 0.8193602274729542, "learning_rate": 9.633335716271342e-06, "loss": 0.343, "step": 4343 }, { "epoch": 0.14907343857240907, "grad_norm": 0.7777169715942889, "learning_rate": 9.633126794821545e-06, "loss": 0.3452, "step": 4344 }, { "epoch": 0.14910775566231985, "grad_norm": 0.7342501635430856, "learning_rate": 9.63291781613487e-06, "loss": 0.3568, "step": 4345 }, { "epoch": 0.1491420727522306, "grad_norm": 0.8904207144652211, "learning_rate": 9.632708780213895e-06, "loss": 0.3555, "step": 4346 }, { "epoch": 0.14917638984214138, "grad_norm": 0.8628966949199132, "learning_rate": 9.632499687061202e-06, "loss": 0.3583, "step": 4347 }, { "epoch": 0.14921070693205216, "grad_norm": 0.9026976484408903, "learning_rate": 9.632290536679381e-06, "loss": 0.3767, "step": 4348 }, { "epoch": 0.14924502402196294, "grad_norm": 0.8557537605563728, "learning_rate": 9.632081329071008e-06, "loss": 0.3869, "step": 4349 }, { "epoch": 0.14927934111187371, "grad_norm": 0.8177467609934082, "learning_rate": 9.631872064238672e-06, "loss": 0.3613, "step": 4350 }, { "epoch": 0.1493136582017845, "grad_norm": 0.9499496877976343, "learning_rate": 9.631662742184957e-06, "loss": 0.3276, "step": 4351 }, { "epoch": 0.14934797529169527, "grad_norm": 0.8261785801513952, "learning_rate": 9.631453362912448e-06, "loss": 0.4045, "step": 4352 }, { "epoch": 0.14938229238160605, "grad_norm": 0.826459415708172, "learning_rate": 9.631243926423732e-06, "loss": 0.3685, "step": 4353 }, { "epoch": 0.1494166094715168, "grad_norm": 0.8172900554608525, "learning_rate": 9.631034432721399e-06, "loss": 0.3279, "step": 4354 }, { "epoch": 0.14945092656142758, "grad_norm": 0.7868499499626388, "learning_rate": 9.630824881808032e-06, "loss": 0.393, "step": 4355 }, { "epoch": 0.14948524365133836, "grad_norm": 0.8387636850165544, "learning_rate": 9.630615273686223e-06, "loss": 0.3481, "step": 4356 }, { "epoch": 0.14951956074124914, "grad_norm": 0.7250166521968685, "learning_rate": 9.630405608358564e-06, "loss": 0.2984, "step": 4357 }, { "epoch": 0.14955387783115992, "grad_norm": 0.9087559747238748, "learning_rate": 9.63019588582764e-06, "loss": 0.3382, "step": 4358 }, { "epoch": 0.1495881949210707, "grad_norm": 0.8483035796331516, "learning_rate": 9.629986106096043e-06, "loss": 0.3315, "step": 4359 }, { "epoch": 0.14962251201098148, "grad_norm": 0.9170159254395153, "learning_rate": 9.629776269166367e-06, "loss": 0.3352, "step": 4360 }, { "epoch": 0.14965682910089226, "grad_norm": 0.8603160283233571, "learning_rate": 9.629566375041204e-06, "loss": 0.3593, "step": 4361 }, { "epoch": 0.149691146190803, "grad_norm": 0.8449722294948127, "learning_rate": 9.629356423723146e-06, "loss": 0.3405, "step": 4362 }, { "epoch": 0.1497254632807138, "grad_norm": 0.9330861170288719, "learning_rate": 9.629146415214785e-06, "loss": 0.3651, "step": 4363 }, { "epoch": 0.14975978037062457, "grad_norm": 0.8206136264233947, "learning_rate": 9.628936349518719e-06, "loss": 0.3858, "step": 4364 }, { "epoch": 0.14979409746053535, "grad_norm": 0.9289224890711952, "learning_rate": 9.628726226637539e-06, "loss": 0.2971, "step": 4365 }, { "epoch": 0.14982841455044613, "grad_norm": 0.8177747234256394, "learning_rate": 9.628516046573844e-06, "loss": 0.3832, "step": 4366 }, { "epoch": 0.1498627316403569, "grad_norm": 0.7652128835119913, "learning_rate": 9.62830580933023e-06, "loss": 0.3312, "step": 4367 }, { "epoch": 0.14989704873026768, "grad_norm": 0.8062347826158278, "learning_rate": 9.628095514909292e-06, "loss": 0.3483, "step": 4368 }, { "epoch": 0.14993136582017844, "grad_norm": 0.7693934158043706, "learning_rate": 9.627885163313633e-06, "loss": 0.3327, "step": 4369 }, { "epoch": 0.14996568291008922, "grad_norm": 0.8318672528730423, "learning_rate": 9.627674754545847e-06, "loss": 0.307, "step": 4370 }, { "epoch": 0.15, "grad_norm": 0.8706104903349177, "learning_rate": 9.627464288608532e-06, "loss": 0.3951, "step": 4371 }, { "epoch": 0.15003431708991077, "grad_norm": 0.8782947820276429, "learning_rate": 9.627253765504295e-06, "loss": 0.3298, "step": 4372 }, { "epoch": 0.15006863417982155, "grad_norm": 0.8125183058404396, "learning_rate": 9.62704318523573e-06, "loss": 0.3427, "step": 4373 }, { "epoch": 0.15010295126973233, "grad_norm": 0.7644938482034483, "learning_rate": 9.626832547805443e-06, "loss": 0.3275, "step": 4374 }, { "epoch": 0.1501372683596431, "grad_norm": 0.8496950207028109, "learning_rate": 9.626621853216031e-06, "loss": 0.3375, "step": 4375 }, { "epoch": 0.1501715854495539, "grad_norm": 0.830041939380299, "learning_rate": 9.626411101470103e-06, "loss": 0.3181, "step": 4376 }, { "epoch": 0.15020590253946464, "grad_norm": 0.8227065932968298, "learning_rate": 9.62620029257026e-06, "loss": 0.367, "step": 4377 }, { "epoch": 0.15024021962937542, "grad_norm": 0.9485369838539014, "learning_rate": 9.625989426519104e-06, "loss": 0.364, "step": 4378 }, { "epoch": 0.1502745367192862, "grad_norm": 0.7400551192024957, "learning_rate": 9.625778503319242e-06, "loss": 0.3329, "step": 4379 }, { "epoch": 0.15030885380919698, "grad_norm": 0.8459186988266515, "learning_rate": 9.625567522973281e-06, "loss": 0.3693, "step": 4380 }, { "epoch": 0.15034317089910776, "grad_norm": 0.9025060288652811, "learning_rate": 9.625356485483826e-06, "loss": 0.3589, "step": 4381 }, { "epoch": 0.15037748798901854, "grad_norm": 0.8391405077280943, "learning_rate": 9.625145390853484e-06, "loss": 0.3526, "step": 4382 }, { "epoch": 0.15041180507892932, "grad_norm": 1.0000027643918579, "learning_rate": 9.624934239084864e-06, "loss": 0.3554, "step": 4383 }, { "epoch": 0.15044612216884007, "grad_norm": 0.8360936808259034, "learning_rate": 9.624723030180572e-06, "loss": 0.3304, "step": 4384 }, { "epoch": 0.15048043925875085, "grad_norm": 0.7877245809764742, "learning_rate": 9.62451176414322e-06, "loss": 0.3436, "step": 4385 }, { "epoch": 0.15051475634866163, "grad_norm": 0.8683182480961004, "learning_rate": 9.624300440975415e-06, "loss": 0.3801, "step": 4386 }, { "epoch": 0.1505490734385724, "grad_norm": 0.8512268958503397, "learning_rate": 9.624089060679771e-06, "loss": 0.3277, "step": 4387 }, { "epoch": 0.15058339052848319, "grad_norm": 0.7894241102980697, "learning_rate": 9.623877623258899e-06, "loss": 0.3503, "step": 4388 }, { "epoch": 0.15061770761839396, "grad_norm": 0.8549394925661281, "learning_rate": 9.623666128715409e-06, "loss": 0.3563, "step": 4389 }, { "epoch": 0.15065202470830474, "grad_norm": 0.8836989032809346, "learning_rate": 9.623454577051913e-06, "loss": 0.3032, "step": 4390 }, { "epoch": 0.15068634179821552, "grad_norm": 0.7778709611456287, "learning_rate": 9.623242968271029e-06, "loss": 0.3532, "step": 4391 }, { "epoch": 0.15072065888812627, "grad_norm": 0.8198751276773196, "learning_rate": 9.623031302375365e-06, "loss": 0.3539, "step": 4392 }, { "epoch": 0.15075497597803705, "grad_norm": 0.7905384929650096, "learning_rate": 9.622819579367542e-06, "loss": 0.3823, "step": 4393 }, { "epoch": 0.15078929306794783, "grad_norm": 0.8261706191280568, "learning_rate": 9.622607799250173e-06, "loss": 0.3161, "step": 4394 }, { "epoch": 0.1508236101578586, "grad_norm": 0.8232516992503169, "learning_rate": 9.622395962025872e-06, "loss": 0.4106, "step": 4395 }, { "epoch": 0.1508579272477694, "grad_norm": 0.9403678124025933, "learning_rate": 9.622184067697257e-06, "loss": 0.3306, "step": 4396 }, { "epoch": 0.15089224433768017, "grad_norm": 0.8948969928571304, "learning_rate": 9.62197211626695e-06, "loss": 0.3465, "step": 4397 }, { "epoch": 0.15092656142759095, "grad_norm": 0.871859581875667, "learning_rate": 9.621760107737565e-06, "loss": 0.4036, "step": 4398 }, { "epoch": 0.15096087851750173, "grad_norm": 0.8340989787832089, "learning_rate": 9.62154804211172e-06, "loss": 0.3263, "step": 4399 }, { "epoch": 0.15099519560741248, "grad_norm": 0.7790452068159773, "learning_rate": 9.62133591939204e-06, "loss": 0.355, "step": 4400 }, { "epoch": 0.15102951269732326, "grad_norm": 0.8344329525335404, "learning_rate": 9.621123739581142e-06, "loss": 0.4425, "step": 4401 }, { "epoch": 0.15106382978723404, "grad_norm": 0.8654621878389446, "learning_rate": 9.62091150268165e-06, "loss": 0.3606, "step": 4402 }, { "epoch": 0.15109814687714482, "grad_norm": 0.7374336702778416, "learning_rate": 9.62069920869618e-06, "loss": 0.3544, "step": 4403 }, { "epoch": 0.1511324639670556, "grad_norm": 0.8237473879430223, "learning_rate": 9.62048685762736e-06, "loss": 0.3785, "step": 4404 }, { "epoch": 0.15116678105696638, "grad_norm": 0.9077638528539411, "learning_rate": 9.620274449477814e-06, "loss": 0.3489, "step": 4405 }, { "epoch": 0.15120109814687716, "grad_norm": 0.7986917935417698, "learning_rate": 9.620061984250162e-06, "loss": 0.4109, "step": 4406 }, { "epoch": 0.1512354152367879, "grad_norm": 0.8997918578067088, "learning_rate": 9.619849461947029e-06, "loss": 0.3482, "step": 4407 }, { "epoch": 0.15126973232669869, "grad_norm": 0.9542692781597149, "learning_rate": 9.619636882571043e-06, "loss": 0.362, "step": 4408 }, { "epoch": 0.15130404941660947, "grad_norm": 0.7977593926378349, "learning_rate": 9.61942424612483e-06, "loss": 0.2878, "step": 4409 }, { "epoch": 0.15133836650652024, "grad_norm": 0.7900918413652684, "learning_rate": 9.619211552611016e-06, "loss": 0.3777, "step": 4410 }, { "epoch": 0.15137268359643102, "grad_norm": 0.8852936506651254, "learning_rate": 9.61899880203223e-06, "loss": 0.4218, "step": 4411 }, { "epoch": 0.1514070006863418, "grad_norm": 0.7246026322968814, "learning_rate": 9.618785994391097e-06, "loss": 0.3399, "step": 4412 }, { "epoch": 0.15144131777625258, "grad_norm": 0.7313572152328026, "learning_rate": 9.618573129690249e-06, "loss": 0.2951, "step": 4413 }, { "epoch": 0.15147563486616336, "grad_norm": 0.9008316810493703, "learning_rate": 9.618360207932316e-06, "loss": 0.3494, "step": 4414 }, { "epoch": 0.1515099519560741, "grad_norm": 0.6640707898877738, "learning_rate": 9.618147229119926e-06, "loss": 0.3071, "step": 4415 }, { "epoch": 0.1515442690459849, "grad_norm": 0.7624662274694367, "learning_rate": 9.61793419325571e-06, "loss": 0.3113, "step": 4416 }, { "epoch": 0.15157858613589567, "grad_norm": 0.8756299856612796, "learning_rate": 9.617721100342301e-06, "loss": 0.3545, "step": 4417 }, { "epoch": 0.15161290322580645, "grad_norm": 0.8625106970484305, "learning_rate": 9.617507950382334e-06, "loss": 0.4225, "step": 4418 }, { "epoch": 0.15164722031571723, "grad_norm": 0.7958421832458416, "learning_rate": 9.617294743378438e-06, "loss": 0.3319, "step": 4419 }, { "epoch": 0.151681537405628, "grad_norm": 0.7631718267894285, "learning_rate": 9.617081479333249e-06, "loss": 0.3124, "step": 4420 }, { "epoch": 0.1517158544955388, "grad_norm": 1.0867735975900947, "learning_rate": 9.616868158249402e-06, "loss": 0.3741, "step": 4421 }, { "epoch": 0.15175017158544957, "grad_norm": 0.9090480384322238, "learning_rate": 9.616654780129531e-06, "loss": 0.3104, "step": 4422 }, { "epoch": 0.15178448867536032, "grad_norm": 0.8061526011736457, "learning_rate": 9.616441344976272e-06, "loss": 0.3107, "step": 4423 }, { "epoch": 0.1518188057652711, "grad_norm": 0.8632652174719804, "learning_rate": 9.616227852792263e-06, "loss": 0.3494, "step": 4424 }, { "epoch": 0.15185312285518188, "grad_norm": 0.8403548301803088, "learning_rate": 9.616014303580141e-06, "loss": 0.4048, "step": 4425 }, { "epoch": 0.15188743994509266, "grad_norm": 0.8898073486310037, "learning_rate": 9.615800697342544e-06, "loss": 0.3331, "step": 4426 }, { "epoch": 0.15192175703500344, "grad_norm": 0.9888832866520522, "learning_rate": 9.615587034082111e-06, "loss": 0.349, "step": 4427 }, { "epoch": 0.15195607412491421, "grad_norm": 0.7816668724347716, "learning_rate": 9.61537331380148e-06, "loss": 0.2904, "step": 4428 }, { "epoch": 0.151990391214825, "grad_norm": 0.9594769986104847, "learning_rate": 9.615159536503296e-06, "loss": 0.3033, "step": 4429 }, { "epoch": 0.15202470830473575, "grad_norm": 0.8345320928285809, "learning_rate": 9.614945702190194e-06, "loss": 0.3833, "step": 4430 }, { "epoch": 0.15205902539464652, "grad_norm": 0.8034383747011887, "learning_rate": 9.61473181086482e-06, "loss": 0.3898, "step": 4431 }, { "epoch": 0.1520933424845573, "grad_norm": 0.8019366313452284, "learning_rate": 9.614517862529814e-06, "loss": 0.3209, "step": 4432 }, { "epoch": 0.15212765957446808, "grad_norm": 0.8288820058887698, "learning_rate": 9.614303857187819e-06, "loss": 0.3625, "step": 4433 }, { "epoch": 0.15216197666437886, "grad_norm": 0.871095192376594, "learning_rate": 9.61408979484148e-06, "loss": 0.3797, "step": 4434 }, { "epoch": 0.15219629375428964, "grad_norm": 0.8457109183603262, "learning_rate": 9.613875675493442e-06, "loss": 0.382, "step": 4435 }, { "epoch": 0.15223061084420042, "grad_norm": 0.9121948572906236, "learning_rate": 9.613661499146349e-06, "loss": 0.4205, "step": 4436 }, { "epoch": 0.1522649279341112, "grad_norm": 1.1509640884901573, "learning_rate": 9.613447265802846e-06, "loss": 0.4244, "step": 4437 }, { "epoch": 0.15229924502402195, "grad_norm": 0.7895959121192443, "learning_rate": 9.613232975465582e-06, "loss": 0.3648, "step": 4438 }, { "epoch": 0.15233356211393273, "grad_norm": 0.8222458420535945, "learning_rate": 9.613018628137203e-06, "loss": 0.3212, "step": 4439 }, { "epoch": 0.1523678792038435, "grad_norm": 0.8420512488733028, "learning_rate": 9.612804223820355e-06, "loss": 0.4244, "step": 4440 }, { "epoch": 0.1524021962937543, "grad_norm": 0.7490383390292967, "learning_rate": 9.612589762517692e-06, "loss": 0.3305, "step": 4441 }, { "epoch": 0.15243651338366507, "grad_norm": 0.8092670901806712, "learning_rate": 9.612375244231858e-06, "loss": 0.3577, "step": 4442 }, { "epoch": 0.15247083047357585, "grad_norm": 0.741666103080091, "learning_rate": 9.612160668965507e-06, "loss": 0.3094, "step": 4443 }, { "epoch": 0.15250514756348663, "grad_norm": 0.7335659628804264, "learning_rate": 9.611946036721287e-06, "loss": 0.3314, "step": 4444 }, { "epoch": 0.1525394646533974, "grad_norm": 0.689734082789997, "learning_rate": 9.611731347501851e-06, "loss": 0.2957, "step": 4445 }, { "epoch": 0.15257378174330816, "grad_norm": 0.7847271281235809, "learning_rate": 9.611516601309851e-06, "loss": 0.3066, "step": 4446 }, { "epoch": 0.15260809883321894, "grad_norm": 0.7642871241524313, "learning_rate": 9.61130179814794e-06, "loss": 0.3407, "step": 4447 }, { "epoch": 0.15264241592312972, "grad_norm": 0.7225606770252699, "learning_rate": 9.611086938018772e-06, "loss": 0.3679, "step": 4448 }, { "epoch": 0.1526767330130405, "grad_norm": 0.8408252763601689, "learning_rate": 9.610872020925e-06, "loss": 0.3692, "step": 4449 }, { "epoch": 0.15271105010295127, "grad_norm": 0.8011337358752548, "learning_rate": 9.61065704686928e-06, "loss": 0.3295, "step": 4450 }, { "epoch": 0.15274536719286205, "grad_norm": 0.7806361946246266, "learning_rate": 9.610442015854268e-06, "loss": 0.3107, "step": 4451 }, { "epoch": 0.15277968428277283, "grad_norm": 0.8789326416089842, "learning_rate": 9.610226927882621e-06, "loss": 0.3515, "step": 4452 }, { "epoch": 0.15281400137268358, "grad_norm": 0.8324181009563213, "learning_rate": 9.610011782956995e-06, "loss": 0.3618, "step": 4453 }, { "epoch": 0.15284831846259436, "grad_norm": 0.945501630797121, "learning_rate": 9.609796581080048e-06, "loss": 0.3972, "step": 4454 }, { "epoch": 0.15288263555250514, "grad_norm": 0.8023551441610992, "learning_rate": 9.609581322254437e-06, "loss": 0.3587, "step": 4455 }, { "epoch": 0.15291695264241592, "grad_norm": 0.8533935525410625, "learning_rate": 9.609366006482826e-06, "loss": 0.3314, "step": 4456 }, { "epoch": 0.1529512697323267, "grad_norm": 0.8824706961295052, "learning_rate": 9.60915063376787e-06, "loss": 0.3523, "step": 4457 }, { "epoch": 0.15298558682223748, "grad_norm": 0.7937811119514798, "learning_rate": 9.608935204112233e-06, "loss": 0.3416, "step": 4458 }, { "epoch": 0.15301990391214826, "grad_norm": 0.8055585814989668, "learning_rate": 9.608719717518572e-06, "loss": 0.3104, "step": 4459 }, { "epoch": 0.15305422100205904, "grad_norm": 0.7902315885738851, "learning_rate": 9.608504173989555e-06, "loss": 0.3644, "step": 4460 }, { "epoch": 0.1530885380919698, "grad_norm": 0.9929162136264774, "learning_rate": 9.608288573527841e-06, "loss": 0.3273, "step": 4461 }, { "epoch": 0.15312285518188057, "grad_norm": 0.797944670941328, "learning_rate": 9.608072916136094e-06, "loss": 0.3602, "step": 4462 }, { "epoch": 0.15315717227179135, "grad_norm": 0.7756661155286696, "learning_rate": 9.60785720181698e-06, "loss": 0.3244, "step": 4463 }, { "epoch": 0.15319148936170213, "grad_norm": 0.89997811731816, "learning_rate": 9.607641430573162e-06, "loss": 0.3402, "step": 4464 }, { "epoch": 0.1532258064516129, "grad_norm": 0.8193574269507506, "learning_rate": 9.607425602407305e-06, "loss": 0.3221, "step": 4465 }, { "epoch": 0.15326012354152369, "grad_norm": 0.8287432207682263, "learning_rate": 9.607209717322079e-06, "loss": 0.3586, "step": 4466 }, { "epoch": 0.15329444063143446, "grad_norm": 0.7918206403238793, "learning_rate": 9.606993775320145e-06, "loss": 0.3411, "step": 4467 }, { "epoch": 0.15332875772134524, "grad_norm": 0.8980199782195064, "learning_rate": 9.606777776404177e-06, "loss": 0.3815, "step": 4468 }, { "epoch": 0.153363074811256, "grad_norm": 0.8485704891678583, "learning_rate": 9.60656172057684e-06, "loss": 0.366, "step": 4469 }, { "epoch": 0.15339739190116677, "grad_norm": 0.767292171840128, "learning_rate": 9.606345607840802e-06, "loss": 0.3982, "step": 4470 }, { "epoch": 0.15343170899107755, "grad_norm": 0.8360191589019272, "learning_rate": 9.606129438198736e-06, "loss": 0.3239, "step": 4471 }, { "epoch": 0.15346602608098833, "grad_norm": 0.7697308233511646, "learning_rate": 9.605913211653311e-06, "loss": 0.3161, "step": 4472 }, { "epoch": 0.1535003431708991, "grad_norm": 0.9870426469344775, "learning_rate": 9.605696928207197e-06, "loss": 0.3886, "step": 4473 }, { "epoch": 0.1535346602608099, "grad_norm": 0.7439789538220378, "learning_rate": 9.605480587863068e-06, "loss": 0.3069, "step": 4474 }, { "epoch": 0.15356897735072067, "grad_norm": 0.7977869382674827, "learning_rate": 9.605264190623595e-06, "loss": 0.3299, "step": 4475 }, { "epoch": 0.15360329444063142, "grad_norm": 0.8209323652648627, "learning_rate": 9.605047736491453e-06, "loss": 0.3533, "step": 4476 }, { "epoch": 0.1536376115305422, "grad_norm": 0.7852674180228731, "learning_rate": 9.604831225469316e-06, "loss": 0.3013, "step": 4477 }, { "epoch": 0.15367192862045298, "grad_norm": 0.8283582076904854, "learning_rate": 9.604614657559857e-06, "loss": 0.3229, "step": 4478 }, { "epoch": 0.15370624571036376, "grad_norm": 0.748471037850166, "learning_rate": 9.604398032765752e-06, "loss": 0.3173, "step": 4479 }, { "epoch": 0.15374056280027454, "grad_norm": 0.7920481838201667, "learning_rate": 9.604181351089678e-06, "loss": 0.3374, "step": 4480 }, { "epoch": 0.15377487989018532, "grad_norm": 0.8025715842881563, "learning_rate": 9.603964612534312e-06, "loss": 0.2983, "step": 4481 }, { "epoch": 0.1538091969800961, "grad_norm": 0.7713326255591956, "learning_rate": 9.60374781710233e-06, "loss": 0.3148, "step": 4482 }, { "epoch": 0.15384351407000688, "grad_norm": 0.8413956362762328, "learning_rate": 9.603530964796411e-06, "loss": 0.3321, "step": 4483 }, { "epoch": 0.15387783115991763, "grad_norm": 0.9030627191418457, "learning_rate": 9.603314055619234e-06, "loss": 0.3366, "step": 4484 }, { "epoch": 0.1539121482498284, "grad_norm": 0.8607212912669423, "learning_rate": 9.603097089573481e-06, "loss": 0.3321, "step": 4485 }, { "epoch": 0.15394646533973919, "grad_norm": 0.8068898292941449, "learning_rate": 9.602880066661828e-06, "loss": 0.3404, "step": 4486 }, { "epoch": 0.15398078242964996, "grad_norm": 0.6666467175161969, "learning_rate": 9.602662986886958e-06, "loss": 0.3091, "step": 4487 }, { "epoch": 0.15401509951956074, "grad_norm": 0.8507472992723986, "learning_rate": 9.602445850251553e-06, "loss": 0.3531, "step": 4488 }, { "epoch": 0.15404941660947152, "grad_norm": 0.8328571578192319, "learning_rate": 9.602228656758296e-06, "loss": 0.3597, "step": 4489 }, { "epoch": 0.1540837336993823, "grad_norm": 0.8157091506621601, "learning_rate": 9.602011406409871e-06, "loss": 0.3023, "step": 4490 }, { "epoch": 0.15411805078929308, "grad_norm": 0.8739945543459737, "learning_rate": 9.601794099208958e-06, "loss": 0.3614, "step": 4491 }, { "epoch": 0.15415236787920383, "grad_norm": 0.8527463490922559, "learning_rate": 9.601576735158247e-06, "loss": 0.362, "step": 4492 }, { "epoch": 0.1541866849691146, "grad_norm": 0.9243859965155333, "learning_rate": 9.601359314260418e-06, "loss": 0.3122, "step": 4493 }, { "epoch": 0.1542210020590254, "grad_norm": 0.84788372537038, "learning_rate": 9.601141836518159e-06, "loss": 0.3652, "step": 4494 }, { "epoch": 0.15425531914893617, "grad_norm": 0.8682548724373464, "learning_rate": 9.60092430193416e-06, "loss": 0.2822, "step": 4495 }, { "epoch": 0.15428963623884695, "grad_norm": 0.8246678485549664, "learning_rate": 9.600706710511103e-06, "loss": 0.3244, "step": 4496 }, { "epoch": 0.15432395332875773, "grad_norm": 0.8159478650894026, "learning_rate": 9.600489062251679e-06, "loss": 0.3355, "step": 4497 }, { "epoch": 0.1543582704186685, "grad_norm": 0.8190180950207403, "learning_rate": 9.600271357158575e-06, "loss": 0.3279, "step": 4498 }, { "epoch": 0.15439258750857926, "grad_norm": 0.7849049575456004, "learning_rate": 9.600053595234483e-06, "loss": 0.2905, "step": 4499 }, { "epoch": 0.15442690459849004, "grad_norm": 0.7849267144050783, "learning_rate": 9.599835776482092e-06, "loss": 0.3718, "step": 4500 }, { "epoch": 0.15446122168840082, "grad_norm": 0.7940923885208818, "learning_rate": 9.599617900904094e-06, "loss": 0.3543, "step": 4501 }, { "epoch": 0.1544955387783116, "grad_norm": 0.7498082352809774, "learning_rate": 9.599399968503178e-06, "loss": 0.3215, "step": 4502 }, { "epoch": 0.15452985586822238, "grad_norm": 0.8156209970970169, "learning_rate": 9.599181979282037e-06, "loss": 0.3709, "step": 4503 }, { "epoch": 0.15456417295813316, "grad_norm": 0.8716620172844723, "learning_rate": 9.598963933243366e-06, "loss": 0.3669, "step": 4504 }, { "epoch": 0.15459849004804394, "grad_norm": 0.848670793872528, "learning_rate": 9.598745830389858e-06, "loss": 0.3205, "step": 4505 }, { "epoch": 0.15463280713795471, "grad_norm": 0.754515771832073, "learning_rate": 9.598527670724207e-06, "loss": 0.3595, "step": 4506 }, { "epoch": 0.15466712422786547, "grad_norm": 0.8252032088405225, "learning_rate": 9.598309454249106e-06, "loss": 0.3301, "step": 4507 }, { "epoch": 0.15470144131777624, "grad_norm": 0.8032624480260291, "learning_rate": 9.598091180967253e-06, "loss": 0.3474, "step": 4508 }, { "epoch": 0.15473575840768702, "grad_norm": 0.8414722040733481, "learning_rate": 9.597872850881347e-06, "loss": 0.3083, "step": 4509 }, { "epoch": 0.1547700754975978, "grad_norm": 0.7784068743357652, "learning_rate": 9.59765446399408e-06, "loss": 0.36, "step": 4510 }, { "epoch": 0.15480439258750858, "grad_norm": 0.8291153262226986, "learning_rate": 9.597436020308153e-06, "loss": 0.3301, "step": 4511 }, { "epoch": 0.15483870967741936, "grad_norm": 0.8639677190357216, "learning_rate": 9.597217519826264e-06, "loss": 0.4212, "step": 4512 }, { "epoch": 0.15487302676733014, "grad_norm": 0.8460504400982191, "learning_rate": 9.596998962551114e-06, "loss": 0.3578, "step": 4513 }, { "epoch": 0.1549073438572409, "grad_norm": 0.8712143942161257, "learning_rate": 9.596780348485399e-06, "loss": 0.3312, "step": 4514 }, { "epoch": 0.15494166094715167, "grad_norm": 0.9963705607993372, "learning_rate": 9.596561677631823e-06, "loss": 0.3372, "step": 4515 }, { "epoch": 0.15497597803706245, "grad_norm": 0.7973317264187012, "learning_rate": 9.596342949993087e-06, "loss": 0.3886, "step": 4516 }, { "epoch": 0.15501029512697323, "grad_norm": 0.7840705749975726, "learning_rate": 9.596124165571894e-06, "loss": 0.372, "step": 4517 }, { "epoch": 0.155044612216884, "grad_norm": 0.8738580889725557, "learning_rate": 9.595905324370941e-06, "loss": 0.3202, "step": 4518 }, { "epoch": 0.1550789293067948, "grad_norm": 0.8511738414678715, "learning_rate": 9.59568642639294e-06, "loss": 0.4148, "step": 4519 }, { "epoch": 0.15511324639670557, "grad_norm": 0.8382489474176139, "learning_rate": 9.59546747164059e-06, "loss": 0.3309, "step": 4520 }, { "epoch": 0.15514756348661635, "grad_norm": 0.7720381757051008, "learning_rate": 9.595248460116598e-06, "loss": 0.318, "step": 4521 }, { "epoch": 0.1551818805765271, "grad_norm": 0.7469317738325385, "learning_rate": 9.595029391823668e-06, "loss": 0.3686, "step": 4522 }, { "epoch": 0.15521619766643788, "grad_norm": 0.7841188060734449, "learning_rate": 9.594810266764506e-06, "loss": 0.3788, "step": 4523 }, { "epoch": 0.15525051475634866, "grad_norm": 0.9251702088800301, "learning_rate": 9.594591084941823e-06, "loss": 0.4392, "step": 4524 }, { "epoch": 0.15528483184625944, "grad_norm": 0.8551100211699671, "learning_rate": 9.594371846358324e-06, "loss": 0.3586, "step": 4525 }, { "epoch": 0.15531914893617021, "grad_norm": 0.8405982586266029, "learning_rate": 9.594152551016715e-06, "loss": 0.3328, "step": 4526 }, { "epoch": 0.155353466026081, "grad_norm": 0.8450413154358858, "learning_rate": 9.593933198919708e-06, "loss": 0.3616, "step": 4527 }, { "epoch": 0.15538778311599177, "grad_norm": 0.8078707874410306, "learning_rate": 9.593713790070014e-06, "loss": 0.401, "step": 4528 }, { "epoch": 0.15542210020590255, "grad_norm": 0.8676312430993062, "learning_rate": 9.593494324470339e-06, "loss": 0.3553, "step": 4529 }, { "epoch": 0.1554564172958133, "grad_norm": 0.7947936664261186, "learning_rate": 9.593274802123398e-06, "loss": 0.3145, "step": 4530 }, { "epoch": 0.15549073438572408, "grad_norm": 0.7710271390443137, "learning_rate": 9.593055223031905e-06, "loss": 0.3418, "step": 4531 }, { "epoch": 0.15552505147563486, "grad_norm": 0.8754882043547788, "learning_rate": 9.592835587198565e-06, "loss": 0.3338, "step": 4532 }, { "epoch": 0.15555936856554564, "grad_norm": 0.9006554233593159, "learning_rate": 9.5926158946261e-06, "loss": 0.4158, "step": 4533 }, { "epoch": 0.15559368565545642, "grad_norm": 0.8824350154683703, "learning_rate": 9.592396145317217e-06, "loss": 0.415, "step": 4534 }, { "epoch": 0.1556280027453672, "grad_norm": 0.8390921092240242, "learning_rate": 9.592176339274638e-06, "loss": 0.3561, "step": 4535 }, { "epoch": 0.15566231983527798, "grad_norm": 0.8197036631269755, "learning_rate": 9.59195647650107e-06, "loss": 0.3125, "step": 4536 }, { "epoch": 0.15569663692518873, "grad_norm": 0.7510800707786364, "learning_rate": 9.591736556999235e-06, "loss": 0.2891, "step": 4537 }, { "epoch": 0.1557309540150995, "grad_norm": 0.8234495716526417, "learning_rate": 9.591516580771849e-06, "loss": 0.3596, "step": 4538 }, { "epoch": 0.1557652711050103, "grad_norm": 0.7880611642933403, "learning_rate": 9.591296547821626e-06, "loss": 0.3028, "step": 4539 }, { "epoch": 0.15579958819492107, "grad_norm": 0.7852682012733868, "learning_rate": 9.59107645815129e-06, "loss": 0.3888, "step": 4540 }, { "epoch": 0.15583390528483185, "grad_norm": 0.7914531836077278, "learning_rate": 9.590856311763557e-06, "loss": 0.3333, "step": 4541 }, { "epoch": 0.15586822237474263, "grad_norm": 0.8326774048159997, "learning_rate": 9.590636108661147e-06, "loss": 0.3342, "step": 4542 }, { "epoch": 0.1559025394646534, "grad_norm": 0.8087503028615912, "learning_rate": 9.590415848846778e-06, "loss": 0.4527, "step": 4543 }, { "epoch": 0.15593685655456418, "grad_norm": 0.8051360285784726, "learning_rate": 9.590195532323175e-06, "loss": 0.2875, "step": 4544 }, { "epoch": 0.15597117364447494, "grad_norm": 0.9707433582369213, "learning_rate": 9.589975159093057e-06, "loss": 0.3293, "step": 4545 }, { "epoch": 0.15600549073438572, "grad_norm": 1.012034495373145, "learning_rate": 9.589754729159146e-06, "loss": 0.3176, "step": 4546 }, { "epoch": 0.1560398078242965, "grad_norm": 0.9089171811155808, "learning_rate": 9.589534242524167e-06, "loss": 0.3372, "step": 4547 }, { "epoch": 0.15607412491420727, "grad_norm": 0.7743882092855018, "learning_rate": 9.589313699190845e-06, "loss": 0.3596, "step": 4548 }, { "epoch": 0.15610844200411805, "grad_norm": 0.8657053761090502, "learning_rate": 9.589093099161902e-06, "loss": 0.3284, "step": 4549 }, { "epoch": 0.15614275909402883, "grad_norm": 0.6958780482739507, "learning_rate": 9.588872442440065e-06, "loss": 0.3051, "step": 4550 }, { "epoch": 0.1561770761839396, "grad_norm": 0.7559815745082555, "learning_rate": 9.588651729028057e-06, "loss": 0.354, "step": 4551 }, { "epoch": 0.1562113932738504, "grad_norm": 0.8401000344487253, "learning_rate": 9.588430958928608e-06, "loss": 0.3234, "step": 4552 }, { "epoch": 0.15624571036376114, "grad_norm": 0.7892391650136825, "learning_rate": 9.588210132144444e-06, "loss": 0.4136, "step": 4553 }, { "epoch": 0.15628002745367192, "grad_norm": 0.7913541884763323, "learning_rate": 9.587989248678292e-06, "loss": 0.3411, "step": 4554 }, { "epoch": 0.1563143445435827, "grad_norm": 0.8453472793590263, "learning_rate": 9.587768308532885e-06, "loss": 0.3581, "step": 4555 }, { "epoch": 0.15634866163349348, "grad_norm": 0.8139832412010319, "learning_rate": 9.587547311710948e-06, "loss": 0.3864, "step": 4556 }, { "epoch": 0.15638297872340426, "grad_norm": 0.8513298616580625, "learning_rate": 9.58732625821521e-06, "loss": 0.3482, "step": 4557 }, { "epoch": 0.15641729581331504, "grad_norm": 0.7900954155354969, "learning_rate": 9.587105148048407e-06, "loss": 0.3271, "step": 4558 }, { "epoch": 0.15645161290322582, "grad_norm": 0.8249112469758564, "learning_rate": 9.586883981213268e-06, "loss": 0.3029, "step": 4559 }, { "epoch": 0.15648592999313657, "grad_norm": 0.711826775948581, "learning_rate": 9.586662757712524e-06, "loss": 0.3096, "step": 4560 }, { "epoch": 0.15652024708304735, "grad_norm": 0.8115536399623513, "learning_rate": 9.58644147754891e-06, "loss": 0.3631, "step": 4561 }, { "epoch": 0.15655456417295813, "grad_norm": 0.8707424310522114, "learning_rate": 9.586220140725157e-06, "loss": 0.3231, "step": 4562 }, { "epoch": 0.1565888812628689, "grad_norm": 0.808953616638321, "learning_rate": 9.585998747244003e-06, "loss": 0.3645, "step": 4563 }, { "epoch": 0.15662319835277969, "grad_norm": 0.8742340408543338, "learning_rate": 9.585777297108181e-06, "loss": 0.3384, "step": 4564 }, { "epoch": 0.15665751544269046, "grad_norm": 1.0053026680748278, "learning_rate": 9.585555790320426e-06, "loss": 0.3219, "step": 4565 }, { "epoch": 0.15669183253260124, "grad_norm": 1.0846344611950594, "learning_rate": 9.585334226883476e-06, "loss": 0.3682, "step": 4566 }, { "epoch": 0.15672614962251202, "grad_norm": 0.8547192769509238, "learning_rate": 9.585112606800068e-06, "loss": 0.3996, "step": 4567 }, { "epoch": 0.15676046671242277, "grad_norm": 0.8340099472121136, "learning_rate": 9.584890930072942e-06, "loss": 0.3467, "step": 4568 }, { "epoch": 0.15679478380233355, "grad_norm": 0.8915352220454823, "learning_rate": 9.58466919670483e-06, "loss": 0.3822, "step": 4569 }, { "epoch": 0.15682910089224433, "grad_norm": 0.8070688913150531, "learning_rate": 9.584447406698476e-06, "loss": 0.3737, "step": 4570 }, { "epoch": 0.1568634179821551, "grad_norm": 0.7930251206984544, "learning_rate": 9.58422556005662e-06, "loss": 0.375, "step": 4571 }, { "epoch": 0.1568977350720659, "grad_norm": 0.7391810838498337, "learning_rate": 9.584003656782002e-06, "loss": 0.3428, "step": 4572 }, { "epoch": 0.15693205216197667, "grad_norm": 0.7825324948180061, "learning_rate": 9.583781696877363e-06, "loss": 0.3794, "step": 4573 }, { "epoch": 0.15696636925188745, "grad_norm": 0.7660332770704992, "learning_rate": 9.583559680345444e-06, "loss": 0.3567, "step": 4574 }, { "epoch": 0.15700068634179823, "grad_norm": 0.8934511525670293, "learning_rate": 9.583337607188992e-06, "loss": 0.331, "step": 4575 }, { "epoch": 0.15703500343170898, "grad_norm": 0.8493806581908679, "learning_rate": 9.583115477410745e-06, "loss": 0.4077, "step": 4576 }, { "epoch": 0.15706932052161976, "grad_norm": 0.8003123546862577, "learning_rate": 9.582893291013451e-06, "loss": 0.3734, "step": 4577 }, { "epoch": 0.15710363761153054, "grad_norm": 0.7599786939809187, "learning_rate": 9.582671047999852e-06, "loss": 0.3294, "step": 4578 }, { "epoch": 0.15713795470144132, "grad_norm": 0.7991806244147093, "learning_rate": 9.582448748372697e-06, "loss": 0.3573, "step": 4579 }, { "epoch": 0.1571722717913521, "grad_norm": 0.8248757292572277, "learning_rate": 9.58222639213473e-06, "loss": 0.3686, "step": 4580 }, { "epoch": 0.15720658888126288, "grad_norm": 0.7732572975935562, "learning_rate": 9.582003979288698e-06, "loss": 0.3444, "step": 4581 }, { "epoch": 0.15724090597117366, "grad_norm": 0.8616466258758628, "learning_rate": 9.581781509837349e-06, "loss": 0.3911, "step": 4582 }, { "epoch": 0.1572752230610844, "grad_norm": 0.7811671684754479, "learning_rate": 9.58155898378343e-06, "loss": 0.3603, "step": 4583 }, { "epoch": 0.1573095401509952, "grad_norm": 0.8517843716832894, "learning_rate": 9.581336401129692e-06, "loss": 0.3343, "step": 4584 }, { "epoch": 0.15734385724090597, "grad_norm": 0.8262077849411479, "learning_rate": 9.581113761878885e-06, "loss": 0.3847, "step": 4585 }, { "epoch": 0.15737817433081674, "grad_norm": 0.8037151710179142, "learning_rate": 9.580891066033757e-06, "loss": 0.3239, "step": 4586 }, { "epoch": 0.15741249142072752, "grad_norm": 0.794642962711105, "learning_rate": 9.580668313597063e-06, "loss": 0.3906, "step": 4587 }, { "epoch": 0.1574468085106383, "grad_norm": 0.8066033864705908, "learning_rate": 9.58044550457155e-06, "loss": 0.3694, "step": 4588 }, { "epoch": 0.15748112560054908, "grad_norm": 0.8260726885884229, "learning_rate": 9.580222638959975e-06, "loss": 0.296, "step": 4589 }, { "epoch": 0.15751544269045986, "grad_norm": 0.8201634017923038, "learning_rate": 9.57999971676509e-06, "loss": 0.326, "step": 4590 }, { "epoch": 0.1575497597803706, "grad_norm": 0.7938375174836142, "learning_rate": 9.579776737989646e-06, "loss": 0.387, "step": 4591 }, { "epoch": 0.1575840768702814, "grad_norm": 0.9655366366508094, "learning_rate": 9.5795537026364e-06, "loss": 0.3585, "step": 4592 }, { "epoch": 0.15761839396019217, "grad_norm": 0.8297190577448678, "learning_rate": 9.57933061070811e-06, "loss": 0.3828, "step": 4593 }, { "epoch": 0.15765271105010295, "grad_norm": 0.9045837922399964, "learning_rate": 9.579107462207526e-06, "loss": 0.3838, "step": 4594 }, { "epoch": 0.15768702814001373, "grad_norm": 0.7781846513211368, "learning_rate": 9.578884257137409e-06, "loss": 0.307, "step": 4595 }, { "epoch": 0.1577213452299245, "grad_norm": 0.9901071320773623, "learning_rate": 9.578660995500516e-06, "loss": 0.3064, "step": 4596 }, { "epoch": 0.1577556623198353, "grad_norm": 0.8930879872566925, "learning_rate": 9.578437677299604e-06, "loss": 0.3627, "step": 4597 }, { "epoch": 0.15778997940974607, "grad_norm": 0.8017199756308947, "learning_rate": 9.578214302537432e-06, "loss": 0.3052, "step": 4598 }, { "epoch": 0.15782429649965682, "grad_norm": 0.7965403986095997, "learning_rate": 9.577990871216761e-06, "loss": 0.3843, "step": 4599 }, { "epoch": 0.1578586135895676, "grad_norm": 0.8734018060674619, "learning_rate": 9.57776738334035e-06, "loss": 0.3531, "step": 4600 }, { "epoch": 0.15789293067947838, "grad_norm": 0.7364905855272008, "learning_rate": 9.57754383891096e-06, "loss": 0.3354, "step": 4601 }, { "epoch": 0.15792724776938916, "grad_norm": 0.8052104556113794, "learning_rate": 9.577320237931354e-06, "loss": 0.3472, "step": 4602 }, { "epoch": 0.15796156485929994, "grad_norm": 0.7701392264072531, "learning_rate": 9.57709658040429e-06, "loss": 0.35, "step": 4603 }, { "epoch": 0.15799588194921071, "grad_norm": 0.8440920071867566, "learning_rate": 9.576872866332537e-06, "loss": 0.3448, "step": 4604 }, { "epoch": 0.1580301990391215, "grad_norm": 0.7912035637052842, "learning_rate": 9.576649095718855e-06, "loss": 0.342, "step": 4605 }, { "epoch": 0.15806451612903225, "grad_norm": 0.8108957003482766, "learning_rate": 9.57642526856601e-06, "loss": 0.339, "step": 4606 }, { "epoch": 0.15809883321894302, "grad_norm": 0.8872521625580522, "learning_rate": 9.576201384876764e-06, "loss": 0.3149, "step": 4607 }, { "epoch": 0.1581331503088538, "grad_norm": 0.8325172116593847, "learning_rate": 9.575977444653888e-06, "loss": 0.3316, "step": 4608 }, { "epoch": 0.15816746739876458, "grad_norm": 0.8825155795750278, "learning_rate": 9.575753447900145e-06, "loss": 0.3579, "step": 4609 }, { "epoch": 0.15820178448867536, "grad_norm": 0.7068917502369464, "learning_rate": 9.5755293946183e-06, "loss": 0.2831, "step": 4610 }, { "epoch": 0.15823610157858614, "grad_norm": 0.804822908416548, "learning_rate": 9.575305284811129e-06, "loss": 0.3288, "step": 4611 }, { "epoch": 0.15827041866849692, "grad_norm": 0.9220415610711058, "learning_rate": 9.575081118481394e-06, "loss": 0.3531, "step": 4612 }, { "epoch": 0.1583047357584077, "grad_norm": 0.8954819354496065, "learning_rate": 9.574856895631862e-06, "loss": 0.3624, "step": 4613 }, { "epoch": 0.15833905284831845, "grad_norm": 0.8827521500947654, "learning_rate": 9.57463261626531e-06, "loss": 0.3471, "step": 4614 }, { "epoch": 0.15837336993822923, "grad_norm": 0.7297303328592912, "learning_rate": 9.574408280384507e-06, "loss": 0.3327, "step": 4615 }, { "epoch": 0.15840768702814, "grad_norm": 0.7697206793055974, "learning_rate": 9.574183887992219e-06, "loss": 0.3427, "step": 4616 }, { "epoch": 0.1584420041180508, "grad_norm": 0.8456160687809294, "learning_rate": 9.573959439091226e-06, "loss": 0.4093, "step": 4617 }, { "epoch": 0.15847632120796157, "grad_norm": 0.8287555314312804, "learning_rate": 9.573734933684293e-06, "loss": 0.3162, "step": 4618 }, { "epoch": 0.15851063829787235, "grad_norm": 1.019262252905813, "learning_rate": 9.5735103717742e-06, "loss": 0.3681, "step": 4619 }, { "epoch": 0.15854495538778313, "grad_norm": 0.8289642769375634, "learning_rate": 9.573285753363717e-06, "loss": 0.3035, "step": 4620 }, { "epoch": 0.15857927247769388, "grad_norm": 0.7421288309357991, "learning_rate": 9.57306107845562e-06, "loss": 0.3531, "step": 4621 }, { "epoch": 0.15861358956760466, "grad_norm": 0.9034510280792877, "learning_rate": 9.572836347052688e-06, "loss": 0.3295, "step": 4622 }, { "epoch": 0.15864790665751544, "grad_norm": 0.7214704676828544, "learning_rate": 9.572611559157693e-06, "loss": 0.3244, "step": 4623 }, { "epoch": 0.15868222374742622, "grad_norm": 0.9871347152984835, "learning_rate": 9.572386714773413e-06, "loss": 0.4238, "step": 4624 }, { "epoch": 0.158716540837337, "grad_norm": 0.7589822918882357, "learning_rate": 9.572161813902625e-06, "loss": 0.2971, "step": 4625 }, { "epoch": 0.15875085792724777, "grad_norm": 0.8556236897081804, "learning_rate": 9.57193685654811e-06, "loss": 0.3553, "step": 4626 }, { "epoch": 0.15878517501715855, "grad_norm": 0.8506830328836746, "learning_rate": 9.571711842712646e-06, "loss": 0.344, "step": 4627 }, { "epoch": 0.15881949210706933, "grad_norm": 0.8916406872291467, "learning_rate": 9.57148677239901e-06, "loss": 0.3676, "step": 4628 }, { "epoch": 0.15885380919698008, "grad_norm": 0.7509394913694909, "learning_rate": 9.571261645609986e-06, "loss": 0.3415, "step": 4629 }, { "epoch": 0.15888812628689086, "grad_norm": 0.791915747799871, "learning_rate": 9.571036462348354e-06, "loss": 0.3582, "step": 4630 }, { "epoch": 0.15892244337680164, "grad_norm": 0.8226223831119953, "learning_rate": 9.570811222616896e-06, "loss": 0.3778, "step": 4631 }, { "epoch": 0.15895676046671242, "grad_norm": 0.8124460378976525, "learning_rate": 9.570585926418395e-06, "loss": 0.3611, "step": 4632 }, { "epoch": 0.1589910775566232, "grad_norm": 0.8334812673230401, "learning_rate": 9.570360573755632e-06, "loss": 0.3147, "step": 4633 }, { "epoch": 0.15902539464653398, "grad_norm": 0.8725072895820113, "learning_rate": 9.570135164631393e-06, "loss": 0.3572, "step": 4634 }, { "epoch": 0.15905971173644476, "grad_norm": 0.8262378713621241, "learning_rate": 9.569909699048462e-06, "loss": 0.3485, "step": 4635 }, { "epoch": 0.15909402882635554, "grad_norm": 0.8607841136862127, "learning_rate": 9.569684177009625e-06, "loss": 0.377, "step": 4636 }, { "epoch": 0.1591283459162663, "grad_norm": 0.7960663510153896, "learning_rate": 9.569458598517669e-06, "loss": 0.3229, "step": 4637 }, { "epoch": 0.15916266300617707, "grad_norm": 0.775381708861982, "learning_rate": 9.56923296357538e-06, "loss": 0.3416, "step": 4638 }, { "epoch": 0.15919698009608785, "grad_norm": 0.784629109622759, "learning_rate": 9.569007272185542e-06, "loss": 0.315, "step": 4639 }, { "epoch": 0.15923129718599863, "grad_norm": 0.7866482883142695, "learning_rate": 9.568781524350949e-06, "loss": 0.3449, "step": 4640 }, { "epoch": 0.1592656142759094, "grad_norm": 0.8254281022489749, "learning_rate": 9.568555720074384e-06, "loss": 0.3261, "step": 4641 }, { "epoch": 0.15929993136582019, "grad_norm": 0.8679931219247844, "learning_rate": 9.568329859358643e-06, "loss": 0.3579, "step": 4642 }, { "epoch": 0.15933424845573096, "grad_norm": 0.9607615180306538, "learning_rate": 9.56810394220651e-06, "loss": 0.3119, "step": 4643 }, { "epoch": 0.15936856554564172, "grad_norm": 0.8281343510155292, "learning_rate": 9.56787796862078e-06, "loss": 0.3491, "step": 4644 }, { "epoch": 0.1594028826355525, "grad_norm": 0.7766497996597523, "learning_rate": 9.567651938604242e-06, "loss": 0.32, "step": 4645 }, { "epoch": 0.15943719972546327, "grad_norm": 0.7527193181425116, "learning_rate": 9.567425852159691e-06, "loss": 0.3324, "step": 4646 }, { "epoch": 0.15947151681537405, "grad_norm": 0.7566238427392501, "learning_rate": 9.567199709289918e-06, "loss": 0.2959, "step": 4647 }, { "epoch": 0.15950583390528483, "grad_norm": 0.8325254820782306, "learning_rate": 9.566973509997717e-06, "loss": 0.3754, "step": 4648 }, { "epoch": 0.1595401509951956, "grad_norm": 0.8124746164251765, "learning_rate": 9.566747254285884e-06, "loss": 0.3003, "step": 4649 }, { "epoch": 0.1595744680851064, "grad_norm": 0.7646927779631457, "learning_rate": 9.566520942157212e-06, "loss": 0.3181, "step": 4650 }, { "epoch": 0.15960878517501717, "grad_norm": 0.8281370380137214, "learning_rate": 9.566294573614499e-06, "loss": 0.3372, "step": 4651 }, { "epoch": 0.15964310226492792, "grad_norm": 0.821711378804115, "learning_rate": 9.566068148660539e-06, "loss": 0.3769, "step": 4652 }, { "epoch": 0.1596774193548387, "grad_norm": 0.8457068932850343, "learning_rate": 9.565841667298132e-06, "loss": 0.3383, "step": 4653 }, { "epoch": 0.15971173644474948, "grad_norm": 0.8933547586669635, "learning_rate": 9.565615129530072e-06, "loss": 0.356, "step": 4654 }, { "epoch": 0.15974605353466026, "grad_norm": 0.7767787356917261, "learning_rate": 9.565388535359164e-06, "loss": 0.3893, "step": 4655 }, { "epoch": 0.15978037062457104, "grad_norm": 0.8211906501780234, "learning_rate": 9.5651618847882e-06, "loss": 0.3927, "step": 4656 }, { "epoch": 0.15981468771448182, "grad_norm": 0.8326391032380355, "learning_rate": 9.564935177819984e-06, "loss": 0.3512, "step": 4657 }, { "epoch": 0.1598490048043926, "grad_norm": 0.8268692354680796, "learning_rate": 9.564708414457318e-06, "loss": 0.322, "step": 4658 }, { "epoch": 0.15988332189430338, "grad_norm": 0.8751102400419275, "learning_rate": 9.564481594702999e-06, "loss": 0.3402, "step": 4659 }, { "epoch": 0.15991763898421413, "grad_norm": 0.7478103488369331, "learning_rate": 9.564254718559833e-06, "loss": 0.3365, "step": 4660 }, { "epoch": 0.1599519560741249, "grad_norm": 0.9302121935286832, "learning_rate": 9.564027786030621e-06, "loss": 0.3438, "step": 4661 }, { "epoch": 0.15998627316403569, "grad_norm": 0.8112942026672165, "learning_rate": 9.563800797118166e-06, "loss": 0.3298, "step": 4662 }, { "epoch": 0.16002059025394647, "grad_norm": 0.7908054616707924, "learning_rate": 9.563573751825274e-06, "loss": 0.2679, "step": 4663 }, { "epoch": 0.16005490734385724, "grad_norm": 0.8985875211462684, "learning_rate": 9.563346650154748e-06, "loss": 0.3247, "step": 4664 }, { "epoch": 0.16008922443376802, "grad_norm": 0.7559281288850158, "learning_rate": 9.563119492109397e-06, "loss": 0.3014, "step": 4665 }, { "epoch": 0.1601235415236788, "grad_norm": 0.7541016807707405, "learning_rate": 9.562892277692021e-06, "loss": 0.2877, "step": 4666 }, { "epoch": 0.16015785861358955, "grad_norm": 0.8424401261900176, "learning_rate": 9.562665006905434e-06, "loss": 0.3263, "step": 4667 }, { "epoch": 0.16019217570350033, "grad_norm": 0.8669431323301002, "learning_rate": 9.562437679752439e-06, "loss": 0.3791, "step": 4668 }, { "epoch": 0.1602264927934111, "grad_norm": 0.8835510233445256, "learning_rate": 9.562210296235845e-06, "loss": 0.3633, "step": 4669 }, { "epoch": 0.1602608098833219, "grad_norm": 0.9218355293030539, "learning_rate": 9.561982856358463e-06, "loss": 0.3603, "step": 4670 }, { "epoch": 0.16029512697323267, "grad_norm": 0.7716476620756441, "learning_rate": 9.561755360123102e-06, "loss": 0.3186, "step": 4671 }, { "epoch": 0.16032944406314345, "grad_norm": 0.7853561822883516, "learning_rate": 9.56152780753257e-06, "loss": 0.3473, "step": 4672 }, { "epoch": 0.16036376115305423, "grad_norm": 0.8890934238925008, "learning_rate": 9.561300198589681e-06, "loss": 0.3576, "step": 4673 }, { "epoch": 0.160398078242965, "grad_norm": 0.7294073283390667, "learning_rate": 9.561072533297247e-06, "loss": 0.2841, "step": 4674 }, { "epoch": 0.16043239533287576, "grad_norm": 0.8034734528370561, "learning_rate": 9.560844811658079e-06, "loss": 0.3623, "step": 4675 }, { "epoch": 0.16046671242278654, "grad_norm": 0.8654425688275827, "learning_rate": 9.56061703367499e-06, "loss": 0.3712, "step": 4676 }, { "epoch": 0.16050102951269732, "grad_norm": 1.00719518790754, "learning_rate": 9.560389199350796e-06, "loss": 0.3691, "step": 4677 }, { "epoch": 0.1605353466026081, "grad_norm": 0.8105800641746416, "learning_rate": 9.56016130868831e-06, "loss": 0.3734, "step": 4678 }, { "epoch": 0.16056966369251888, "grad_norm": 0.8691765209569814, "learning_rate": 9.559933361690346e-06, "loss": 0.3813, "step": 4679 }, { "epoch": 0.16060398078242966, "grad_norm": 0.9177315339141239, "learning_rate": 9.559705358359722e-06, "loss": 0.3597, "step": 4680 }, { "epoch": 0.16063829787234044, "grad_norm": 0.8649856671026072, "learning_rate": 9.559477298699258e-06, "loss": 0.3449, "step": 4681 }, { "epoch": 0.16067261496225121, "grad_norm": 0.8179240819303507, "learning_rate": 9.559249182711765e-06, "loss": 0.3201, "step": 4682 }, { "epoch": 0.16070693205216197, "grad_norm": 0.7632925736230045, "learning_rate": 9.559021010400064e-06, "loss": 0.384, "step": 4683 }, { "epoch": 0.16074124914207275, "grad_norm": 0.7662522695873107, "learning_rate": 9.558792781766974e-06, "loss": 0.3256, "step": 4684 }, { "epoch": 0.16077556623198352, "grad_norm": 0.8381428794261273, "learning_rate": 9.558564496815315e-06, "loss": 0.3385, "step": 4685 }, { "epoch": 0.1608098833218943, "grad_norm": 0.7628248862324599, "learning_rate": 9.558336155547905e-06, "loss": 0.3189, "step": 4686 }, { "epoch": 0.16084420041180508, "grad_norm": 0.8397514949506893, "learning_rate": 9.558107757967567e-06, "loss": 0.3108, "step": 4687 }, { "epoch": 0.16087851750171586, "grad_norm": 0.8064210398322066, "learning_rate": 9.557879304077121e-06, "loss": 0.406, "step": 4688 }, { "epoch": 0.16091283459162664, "grad_norm": 0.8103615418753828, "learning_rate": 9.557650793879392e-06, "loss": 0.2953, "step": 4689 }, { "epoch": 0.1609471516815374, "grad_norm": 0.7952363617096511, "learning_rate": 9.5574222273772e-06, "loss": 0.3507, "step": 4690 }, { "epoch": 0.16098146877144817, "grad_norm": 0.919435555729005, "learning_rate": 9.557193604573369e-06, "loss": 0.3523, "step": 4691 }, { "epoch": 0.16101578586135895, "grad_norm": 3.1130788575397306, "learning_rate": 9.556964925470726e-06, "loss": 0.3209, "step": 4692 }, { "epoch": 0.16105010295126973, "grad_norm": 0.7778822090892162, "learning_rate": 9.556736190072092e-06, "loss": 0.369, "step": 4693 }, { "epoch": 0.1610844200411805, "grad_norm": 0.7616910648670229, "learning_rate": 9.556507398380296e-06, "loss": 0.3724, "step": 4694 }, { "epoch": 0.1611187371310913, "grad_norm": 0.7572603332959087, "learning_rate": 9.556278550398165e-06, "loss": 0.3311, "step": 4695 }, { "epoch": 0.16115305422100207, "grad_norm": 0.8824475999109581, "learning_rate": 9.556049646128522e-06, "loss": 0.3448, "step": 4696 }, { "epoch": 0.16118737131091285, "grad_norm": 0.7176899826229693, "learning_rate": 9.555820685574199e-06, "loss": 0.3503, "step": 4697 }, { "epoch": 0.1612216884008236, "grad_norm": 0.8721175394978998, "learning_rate": 9.555591668738024e-06, "loss": 0.2807, "step": 4698 }, { "epoch": 0.16125600549073438, "grad_norm": 0.767672876661267, "learning_rate": 9.555362595622824e-06, "loss": 0.3883, "step": 4699 }, { "epoch": 0.16129032258064516, "grad_norm": 0.8203706282993998, "learning_rate": 9.55513346623143e-06, "loss": 0.323, "step": 4700 }, { "epoch": 0.16132463967055594, "grad_norm": 0.8117693950560531, "learning_rate": 9.554904280566672e-06, "loss": 0.3642, "step": 4701 }, { "epoch": 0.16135895676046672, "grad_norm": 0.8589690389067957, "learning_rate": 9.554675038631384e-06, "loss": 0.3425, "step": 4702 }, { "epoch": 0.1613932738503775, "grad_norm": 0.7891910431602425, "learning_rate": 9.554445740428395e-06, "loss": 0.4083, "step": 4703 }, { "epoch": 0.16142759094028827, "grad_norm": 0.839194736139386, "learning_rate": 9.55421638596054e-06, "loss": 0.3304, "step": 4704 }, { "epoch": 0.16146190803019905, "grad_norm": 0.7912204592386285, "learning_rate": 9.55398697523065e-06, "loss": 0.339, "step": 4705 }, { "epoch": 0.1614962251201098, "grad_norm": 0.8881801110926544, "learning_rate": 9.553757508241561e-06, "loss": 0.321, "step": 4706 }, { "epoch": 0.16153054221002058, "grad_norm": 1.0427574242894766, "learning_rate": 9.553527984996106e-06, "loss": 0.35, "step": 4707 }, { "epoch": 0.16156485929993136, "grad_norm": 0.7106416787983022, "learning_rate": 9.553298405497123e-06, "loss": 0.3283, "step": 4708 }, { "epoch": 0.16159917638984214, "grad_norm": 0.7845790718835058, "learning_rate": 9.553068769747446e-06, "loss": 0.3241, "step": 4709 }, { "epoch": 0.16163349347975292, "grad_norm": 0.9199943963479337, "learning_rate": 9.552839077749914e-06, "loss": 0.362, "step": 4710 }, { "epoch": 0.1616678105696637, "grad_norm": 0.8331737499624006, "learning_rate": 9.552609329507364e-06, "loss": 0.4068, "step": 4711 }, { "epoch": 0.16170212765957448, "grad_norm": 0.7368374244694618, "learning_rate": 9.55237952502263e-06, "loss": 0.2983, "step": 4712 }, { "epoch": 0.16173644474948523, "grad_norm": 0.8286340050670608, "learning_rate": 9.552149664298557e-06, "loss": 0.3268, "step": 4713 }, { "epoch": 0.161770761839396, "grad_norm": 0.7650165018729329, "learning_rate": 9.55191974733798e-06, "loss": 0.3537, "step": 4714 }, { "epoch": 0.1618050789293068, "grad_norm": 0.8048293328421433, "learning_rate": 9.551689774143745e-06, "loss": 0.3279, "step": 4715 }, { "epoch": 0.16183939601921757, "grad_norm": 0.7379435393300394, "learning_rate": 9.551459744718687e-06, "loss": 0.3579, "step": 4716 }, { "epoch": 0.16187371310912835, "grad_norm": 0.833984708041095, "learning_rate": 9.551229659065652e-06, "loss": 0.3184, "step": 4717 }, { "epoch": 0.16190803019903913, "grad_norm": 0.7781306608342972, "learning_rate": 9.55099951718748e-06, "loss": 0.3518, "step": 4718 }, { "epoch": 0.1619423472889499, "grad_norm": 0.8866169639739102, "learning_rate": 9.550769319087015e-06, "loss": 0.4599, "step": 4719 }, { "epoch": 0.16197666437886069, "grad_norm": 0.7832182915639522, "learning_rate": 9.5505390647671e-06, "loss": 0.3581, "step": 4720 }, { "epoch": 0.16201098146877144, "grad_norm": 0.7961926412281934, "learning_rate": 9.550308754230582e-06, "loss": 0.3089, "step": 4721 }, { "epoch": 0.16204529855868222, "grad_norm": 0.8474186002467107, "learning_rate": 9.550078387480302e-06, "loss": 0.3438, "step": 4722 }, { "epoch": 0.162079615648593, "grad_norm": 1.043045458392431, "learning_rate": 9.54984796451911e-06, "loss": 0.3843, "step": 4723 }, { "epoch": 0.16211393273850377, "grad_norm": 0.7553657626425755, "learning_rate": 9.54961748534985e-06, "loss": 0.3455, "step": 4724 }, { "epoch": 0.16214824982841455, "grad_norm": 0.8234113879037566, "learning_rate": 9.549386949975372e-06, "loss": 0.371, "step": 4725 }, { "epoch": 0.16218256691832533, "grad_norm": 0.7815953395316044, "learning_rate": 9.549156358398522e-06, "loss": 0.3412, "step": 4726 }, { "epoch": 0.1622168840082361, "grad_norm": 0.8203011159903748, "learning_rate": 9.548925710622148e-06, "loss": 0.3162, "step": 4727 }, { "epoch": 0.16225120109814686, "grad_norm": 0.7764139802918955, "learning_rate": 9.5486950066491e-06, "loss": 0.3327, "step": 4728 }, { "epoch": 0.16228551818805764, "grad_norm": 0.8760201435990489, "learning_rate": 9.54846424648223e-06, "loss": 0.3634, "step": 4729 }, { "epoch": 0.16231983527796842, "grad_norm": 0.9327454225633871, "learning_rate": 9.548233430124386e-06, "loss": 0.3445, "step": 4730 }, { "epoch": 0.1623541523678792, "grad_norm": 0.7899749882980164, "learning_rate": 9.548002557578422e-06, "loss": 0.3545, "step": 4731 }, { "epoch": 0.16238846945778998, "grad_norm": 0.8409315189547458, "learning_rate": 9.547771628847187e-06, "loss": 0.2902, "step": 4732 }, { "epoch": 0.16242278654770076, "grad_norm": 0.8339988939995171, "learning_rate": 9.547540643933538e-06, "loss": 0.3346, "step": 4733 }, { "epoch": 0.16245710363761154, "grad_norm": 0.8185933249234124, "learning_rate": 9.547309602840325e-06, "loss": 0.3706, "step": 4734 }, { "epoch": 0.16249142072752232, "grad_norm": 0.814908921251915, "learning_rate": 9.547078505570402e-06, "loss": 0.3637, "step": 4735 }, { "epoch": 0.16252573781743307, "grad_norm": 0.7736625456604638, "learning_rate": 9.546847352126628e-06, "loss": 0.3606, "step": 4736 }, { "epoch": 0.16256005490734385, "grad_norm": 0.7910238995723453, "learning_rate": 9.546616142511855e-06, "loss": 0.3434, "step": 4737 }, { "epoch": 0.16259437199725463, "grad_norm": 0.828115974207723, "learning_rate": 9.54638487672894e-06, "loss": 0.4038, "step": 4738 }, { "epoch": 0.1626286890871654, "grad_norm": 0.7370173181764158, "learning_rate": 9.54615355478074e-06, "loss": 0.3303, "step": 4739 }, { "epoch": 0.16266300617707619, "grad_norm": 0.8225964349499488, "learning_rate": 9.545922176670115e-06, "loss": 0.3315, "step": 4740 }, { "epoch": 0.16269732326698697, "grad_norm": 0.7911651134433942, "learning_rate": 9.54569074239992e-06, "loss": 0.3019, "step": 4741 }, { "epoch": 0.16273164035689774, "grad_norm": 0.7927786050993216, "learning_rate": 9.545459251973016e-06, "loss": 0.3715, "step": 4742 }, { "epoch": 0.16276595744680852, "grad_norm": 0.7763611662324954, "learning_rate": 9.545227705392262e-06, "loss": 0.3101, "step": 4743 }, { "epoch": 0.16280027453671927, "grad_norm": 0.8540089158936266, "learning_rate": 9.54499610266052e-06, "loss": 0.3492, "step": 4744 }, { "epoch": 0.16283459162663005, "grad_norm": 0.8960332930498356, "learning_rate": 9.54476444378065e-06, "loss": 0.365, "step": 4745 }, { "epoch": 0.16286890871654083, "grad_norm": 0.7826369875059851, "learning_rate": 9.544532728755514e-06, "loss": 0.3539, "step": 4746 }, { "epoch": 0.1629032258064516, "grad_norm": 0.8384466184457201, "learning_rate": 9.544300957587973e-06, "loss": 0.3538, "step": 4747 }, { "epoch": 0.1629375428963624, "grad_norm": 0.8153686885007774, "learning_rate": 9.544069130280893e-06, "loss": 0.321, "step": 4748 }, { "epoch": 0.16297185998627317, "grad_norm": 0.8425352526612662, "learning_rate": 9.543837246837137e-06, "loss": 0.3474, "step": 4749 }, { "epoch": 0.16300617707618395, "grad_norm": 0.7328472939099812, "learning_rate": 9.543605307259568e-06, "loss": 0.3051, "step": 4750 }, { "epoch": 0.1630404941660947, "grad_norm": 0.8607626064047931, "learning_rate": 9.543373311551054e-06, "loss": 0.3282, "step": 4751 }, { "epoch": 0.16307481125600548, "grad_norm": 0.8006381220474551, "learning_rate": 9.54314125971446e-06, "loss": 0.3325, "step": 4752 }, { "epoch": 0.16310912834591626, "grad_norm": 0.852389667444167, "learning_rate": 9.542909151752654e-06, "loss": 0.3319, "step": 4753 }, { "epoch": 0.16314344543582704, "grad_norm": 0.8478179916186276, "learning_rate": 9.5426769876685e-06, "loss": 0.3269, "step": 4754 }, { "epoch": 0.16317776252573782, "grad_norm": 0.9464765837271125, "learning_rate": 9.542444767464867e-06, "loss": 0.3161, "step": 4755 }, { "epoch": 0.1632120796156486, "grad_norm": 0.8051832859223419, "learning_rate": 9.542212491144625e-06, "loss": 0.3392, "step": 4756 }, { "epoch": 0.16324639670555938, "grad_norm": 0.7825687922679807, "learning_rate": 9.541980158710645e-06, "loss": 0.3352, "step": 4757 }, { "epoch": 0.16328071379547016, "grad_norm": 0.8227242734941501, "learning_rate": 9.541747770165794e-06, "loss": 0.2978, "step": 4758 }, { "epoch": 0.1633150308853809, "grad_norm": 0.8234493941607828, "learning_rate": 9.541515325512947e-06, "loss": 0.332, "step": 4759 }, { "epoch": 0.1633493479752917, "grad_norm": 0.7915242655088034, "learning_rate": 9.541282824754971e-06, "loss": 0.3372, "step": 4760 }, { "epoch": 0.16338366506520247, "grad_norm": 0.7576933168481177, "learning_rate": 9.54105026789474e-06, "loss": 0.2834, "step": 4761 }, { "epoch": 0.16341798215511324, "grad_norm": 0.7824687063891149, "learning_rate": 9.54081765493513e-06, "loss": 0.3743, "step": 4762 }, { "epoch": 0.16345229924502402, "grad_norm": 0.9531623018374334, "learning_rate": 9.540584985879009e-06, "loss": 0.4091, "step": 4763 }, { "epoch": 0.1634866163349348, "grad_norm": 0.7663304769885798, "learning_rate": 9.540352260729255e-06, "loss": 0.3315, "step": 4764 }, { "epoch": 0.16352093342484558, "grad_norm": 0.7977235271299082, "learning_rate": 9.540119479488743e-06, "loss": 0.3361, "step": 4765 }, { "epoch": 0.16355525051475636, "grad_norm": 1.1071315475925219, "learning_rate": 9.539886642160348e-06, "loss": 0.3466, "step": 4766 }, { "epoch": 0.1635895676046671, "grad_norm": 1.0665553895796667, "learning_rate": 9.539653748746947e-06, "loss": 0.357, "step": 4767 }, { "epoch": 0.1636238846945779, "grad_norm": 0.847851846303009, "learning_rate": 9.539420799251416e-06, "loss": 0.3094, "step": 4768 }, { "epoch": 0.16365820178448867, "grad_norm": 0.7550453878077739, "learning_rate": 9.539187793676634e-06, "loss": 0.3384, "step": 4769 }, { "epoch": 0.16369251887439945, "grad_norm": 0.7600448949308272, "learning_rate": 9.538954732025479e-06, "loss": 0.3755, "step": 4770 }, { "epoch": 0.16372683596431023, "grad_norm": 0.8728988083178885, "learning_rate": 9.538721614300828e-06, "loss": 0.2707, "step": 4771 }, { "epoch": 0.163761153054221, "grad_norm": 0.6837169303276204, "learning_rate": 9.538488440505565e-06, "loss": 0.3352, "step": 4772 }, { "epoch": 0.1637954701441318, "grad_norm": 0.8000952395397006, "learning_rate": 9.538255210642569e-06, "loss": 0.3153, "step": 4773 }, { "epoch": 0.16382978723404254, "grad_norm": 0.7545320228576291, "learning_rate": 9.53802192471472e-06, "loss": 0.3261, "step": 4774 }, { "epoch": 0.16386410432395332, "grad_norm": 0.8742949798483428, "learning_rate": 9.537788582724903e-06, "loss": 0.3644, "step": 4775 }, { "epoch": 0.1638984214138641, "grad_norm": 0.8044796245605201, "learning_rate": 9.537555184675995e-06, "loss": 0.2942, "step": 4776 }, { "epoch": 0.16393273850377488, "grad_norm": 0.9412493245437039, "learning_rate": 9.537321730570886e-06, "loss": 0.3575, "step": 4777 }, { "epoch": 0.16396705559368566, "grad_norm": 0.7944903002888425, "learning_rate": 9.537088220412457e-06, "loss": 0.3373, "step": 4778 }, { "epoch": 0.16400137268359644, "grad_norm": 1.1315585407503987, "learning_rate": 9.53685465420359e-06, "loss": 0.3486, "step": 4779 }, { "epoch": 0.16403568977350722, "grad_norm": 0.8534627817792472, "learning_rate": 9.536621031947176e-06, "loss": 0.3651, "step": 4780 }, { "epoch": 0.164070006863418, "grad_norm": 0.8026855352791733, "learning_rate": 9.536387353646096e-06, "loss": 0.2999, "step": 4781 }, { "epoch": 0.16410432395332875, "grad_norm": 0.8355404270536484, "learning_rate": 9.536153619303242e-06, "loss": 0.3858, "step": 4782 }, { "epoch": 0.16413864104323952, "grad_norm": 0.8180699017478485, "learning_rate": 9.535919828921496e-06, "loss": 0.3728, "step": 4783 }, { "epoch": 0.1641729581331503, "grad_norm": 0.7703917514075569, "learning_rate": 9.53568598250375e-06, "loss": 0.3407, "step": 4784 }, { "epoch": 0.16420727522306108, "grad_norm": 0.768515096340196, "learning_rate": 9.535452080052892e-06, "loss": 0.3287, "step": 4785 }, { "epoch": 0.16424159231297186, "grad_norm": 0.7225296258618804, "learning_rate": 9.53521812157181e-06, "loss": 0.2844, "step": 4786 }, { "epoch": 0.16427590940288264, "grad_norm": 0.8886996694873812, "learning_rate": 9.534984107063398e-06, "loss": 0.369, "step": 4787 }, { "epoch": 0.16431022649279342, "grad_norm": 0.8212278506482317, "learning_rate": 9.534750036530543e-06, "loss": 0.3521, "step": 4788 }, { "epoch": 0.1643445435827042, "grad_norm": 0.7492121090397337, "learning_rate": 9.53451590997614e-06, "loss": 0.3733, "step": 4789 }, { "epoch": 0.16437886067261495, "grad_norm": 0.7807754740904937, "learning_rate": 9.534281727403078e-06, "loss": 0.3231, "step": 4790 }, { "epoch": 0.16441317776252573, "grad_norm": 0.8049302497917585, "learning_rate": 9.534047488814251e-06, "loss": 0.3465, "step": 4791 }, { "epoch": 0.1644474948524365, "grad_norm": 0.847750177064445, "learning_rate": 9.533813194212555e-06, "loss": 0.3547, "step": 4792 }, { "epoch": 0.1644818119423473, "grad_norm": 0.836747075387184, "learning_rate": 9.533578843600883e-06, "loss": 0.3351, "step": 4793 }, { "epoch": 0.16451612903225807, "grad_norm": 0.7490293584684147, "learning_rate": 9.533344436982128e-06, "loss": 0.3158, "step": 4794 }, { "epoch": 0.16455044612216885, "grad_norm": 0.7812272480369489, "learning_rate": 9.53310997435919e-06, "loss": 0.3195, "step": 4795 }, { "epoch": 0.16458476321207963, "grad_norm": 0.7574918539721223, "learning_rate": 9.532875455734962e-06, "loss": 0.3147, "step": 4796 }, { "epoch": 0.16461908030199038, "grad_norm": 0.9108605340034065, "learning_rate": 9.532640881112346e-06, "loss": 0.3353, "step": 4797 }, { "epoch": 0.16465339739190116, "grad_norm": 0.7739070063944312, "learning_rate": 9.532406250494233e-06, "loss": 0.3475, "step": 4798 }, { "epoch": 0.16468771448181194, "grad_norm": 0.7213281078521598, "learning_rate": 9.532171563883525e-06, "loss": 0.2751, "step": 4799 }, { "epoch": 0.16472203157172272, "grad_norm": 0.8561352155589865, "learning_rate": 9.531936821283121e-06, "loss": 0.3274, "step": 4800 }, { "epoch": 0.1647563486616335, "grad_norm": 0.8136085345544708, "learning_rate": 9.531702022695924e-06, "loss": 0.3521, "step": 4801 }, { "epoch": 0.16479066575154427, "grad_norm": 0.7926668719113505, "learning_rate": 9.53146716812483e-06, "loss": 0.3924, "step": 4802 }, { "epoch": 0.16482498284145505, "grad_norm": 0.8847701463017431, "learning_rate": 9.531232257572742e-06, "loss": 0.3249, "step": 4803 }, { "epoch": 0.16485929993136583, "grad_norm": 0.7884965999358589, "learning_rate": 9.530997291042563e-06, "loss": 0.312, "step": 4804 }, { "epoch": 0.16489361702127658, "grad_norm": 0.7664702000174205, "learning_rate": 9.530762268537196e-06, "loss": 0.3353, "step": 4805 }, { "epoch": 0.16492793411118736, "grad_norm": 0.8018625278382542, "learning_rate": 9.530527190059542e-06, "loss": 0.3642, "step": 4806 }, { "epoch": 0.16496225120109814, "grad_norm": 0.7329584216896466, "learning_rate": 9.530292055612508e-06, "loss": 0.3732, "step": 4807 }, { "epoch": 0.16499656829100892, "grad_norm": 0.7634426880196034, "learning_rate": 9.530056865198997e-06, "loss": 0.3949, "step": 4808 }, { "epoch": 0.1650308853809197, "grad_norm": 0.7706977739929426, "learning_rate": 9.529821618821914e-06, "loss": 0.3916, "step": 4809 }, { "epoch": 0.16506520247083048, "grad_norm": 0.9156801145482019, "learning_rate": 9.529586316484167e-06, "loss": 0.3633, "step": 4810 }, { "epoch": 0.16509951956074126, "grad_norm": 0.7571318742415286, "learning_rate": 9.529350958188663e-06, "loss": 0.3815, "step": 4811 }, { "epoch": 0.16513383665065204, "grad_norm": 0.767846604895316, "learning_rate": 9.529115543938307e-06, "loss": 0.3065, "step": 4812 }, { "epoch": 0.1651681537405628, "grad_norm": 0.7381200340488476, "learning_rate": 9.528880073736008e-06, "loss": 0.3049, "step": 4813 }, { "epoch": 0.16520247083047357, "grad_norm": 0.8574411519218241, "learning_rate": 9.528644547584679e-06, "loss": 0.321, "step": 4814 }, { "epoch": 0.16523678792038435, "grad_norm": 0.8147928460875076, "learning_rate": 9.528408965487226e-06, "loss": 0.3617, "step": 4815 }, { "epoch": 0.16527110501029513, "grad_norm": 0.7715371542961418, "learning_rate": 9.528173327446557e-06, "loss": 0.3616, "step": 4816 }, { "epoch": 0.1653054221002059, "grad_norm": 0.810732680555384, "learning_rate": 9.527937633465587e-06, "loss": 0.378, "step": 4817 }, { "epoch": 0.16533973919011669, "grad_norm": 0.7912726160567829, "learning_rate": 9.527701883547228e-06, "loss": 0.4143, "step": 4818 }, { "epoch": 0.16537405628002746, "grad_norm": 0.8439090286703, "learning_rate": 9.527466077694389e-06, "loss": 0.3872, "step": 4819 }, { "epoch": 0.16540837336993822, "grad_norm": 0.7214459575687963, "learning_rate": 9.527230215909989e-06, "loss": 0.3432, "step": 4820 }, { "epoch": 0.165442690459849, "grad_norm": 0.8671026157202842, "learning_rate": 9.526994298196935e-06, "loss": 0.3477, "step": 4821 }, { "epoch": 0.16547700754975977, "grad_norm": 0.7991411515860991, "learning_rate": 9.526758324558143e-06, "loss": 0.3695, "step": 4822 }, { "epoch": 0.16551132463967055, "grad_norm": 0.8231936261872933, "learning_rate": 9.526522294996532e-06, "loss": 0.2916, "step": 4823 }, { "epoch": 0.16554564172958133, "grad_norm": 0.7618997822742937, "learning_rate": 9.526286209515015e-06, "loss": 0.3462, "step": 4824 }, { "epoch": 0.1655799588194921, "grad_norm": 0.7936061732749873, "learning_rate": 9.526050068116508e-06, "loss": 0.3112, "step": 4825 }, { "epoch": 0.1656142759094029, "grad_norm": 0.8891496901353237, "learning_rate": 9.525813870803931e-06, "loss": 0.3365, "step": 4826 }, { "epoch": 0.16564859299931367, "grad_norm": 0.8083132744002419, "learning_rate": 9.525577617580198e-06, "loss": 0.3404, "step": 4827 }, { "epoch": 0.16568291008922442, "grad_norm": 0.790050634910445, "learning_rate": 9.52534130844823e-06, "loss": 0.303, "step": 4828 }, { "epoch": 0.1657172271791352, "grad_norm": 0.7652234068328398, "learning_rate": 9.525104943410949e-06, "loss": 0.3121, "step": 4829 }, { "epoch": 0.16575154426904598, "grad_norm": 0.8693337969870677, "learning_rate": 9.52486852247127e-06, "loss": 0.3853, "step": 4830 }, { "epoch": 0.16578586135895676, "grad_norm": 0.7439080726572007, "learning_rate": 9.524632045632116e-06, "loss": 0.334, "step": 4831 }, { "epoch": 0.16582017844886754, "grad_norm": 0.8881012419732411, "learning_rate": 9.52439551289641e-06, "loss": 0.391, "step": 4832 }, { "epoch": 0.16585449553877832, "grad_norm": 0.8855993938463562, "learning_rate": 9.52415892426707e-06, "loss": 0.3988, "step": 4833 }, { "epoch": 0.1658888126286891, "grad_norm": 0.9285656622336455, "learning_rate": 9.523922279747023e-06, "loss": 0.3223, "step": 4834 }, { "epoch": 0.16592312971859988, "grad_norm": 0.8417809765768095, "learning_rate": 9.523685579339188e-06, "loss": 0.3897, "step": 4835 }, { "epoch": 0.16595744680851063, "grad_norm": 0.8274477508732689, "learning_rate": 9.523448823046494e-06, "loss": 0.297, "step": 4836 }, { "epoch": 0.1659917638984214, "grad_norm": 0.8065516512549216, "learning_rate": 9.523212010871863e-06, "loss": 0.3322, "step": 4837 }, { "epoch": 0.1660260809883322, "grad_norm": 0.8474305672422727, "learning_rate": 9.52297514281822e-06, "loss": 0.3277, "step": 4838 }, { "epoch": 0.16606039807824297, "grad_norm": 0.7392245520615667, "learning_rate": 9.522738218888494e-06, "loss": 0.367, "step": 4839 }, { "epoch": 0.16609471516815374, "grad_norm": 0.7239303819795556, "learning_rate": 9.52250123908561e-06, "loss": 0.2994, "step": 4840 }, { "epoch": 0.16612903225806452, "grad_norm": 0.8526951286743789, "learning_rate": 9.522264203412498e-06, "loss": 0.3259, "step": 4841 }, { "epoch": 0.1661633493479753, "grad_norm": 0.8736234796857664, "learning_rate": 9.522027111872082e-06, "loss": 0.375, "step": 4842 }, { "epoch": 0.16619766643788605, "grad_norm": 0.8771255640837398, "learning_rate": 9.521789964467292e-06, "loss": 0.3806, "step": 4843 }, { "epoch": 0.16623198352779683, "grad_norm": 0.7982160029987346, "learning_rate": 9.52155276120106e-06, "loss": 0.4044, "step": 4844 }, { "epoch": 0.1662663006177076, "grad_norm": 0.8551768712083877, "learning_rate": 9.521315502076315e-06, "loss": 0.3895, "step": 4845 }, { "epoch": 0.1663006177076184, "grad_norm": 0.7745016858263446, "learning_rate": 9.521078187095989e-06, "loss": 0.3127, "step": 4846 }, { "epoch": 0.16633493479752917, "grad_norm": 0.7409005199983952, "learning_rate": 9.520840816263013e-06, "loss": 0.3479, "step": 4847 }, { "epoch": 0.16636925188743995, "grad_norm": 0.9567105707324618, "learning_rate": 9.52060338958032e-06, "loss": 0.3404, "step": 4848 }, { "epoch": 0.16640356897735073, "grad_norm": 0.7503906658355428, "learning_rate": 9.520365907050841e-06, "loss": 0.3279, "step": 4849 }, { "epoch": 0.1664378860672615, "grad_norm": 0.8897929561876523, "learning_rate": 9.520128368677513e-06, "loss": 0.3188, "step": 4850 }, { "epoch": 0.16647220315717226, "grad_norm": 0.9150703447790733, "learning_rate": 9.519890774463267e-06, "loss": 0.3823, "step": 4851 }, { "epoch": 0.16650652024708304, "grad_norm": 0.7930155584346814, "learning_rate": 9.519653124411042e-06, "loss": 0.3129, "step": 4852 }, { "epoch": 0.16654083733699382, "grad_norm": 0.9096119572742523, "learning_rate": 9.519415418523772e-06, "loss": 0.3656, "step": 4853 }, { "epoch": 0.1665751544269046, "grad_norm": 0.81133040532315, "learning_rate": 9.519177656804393e-06, "loss": 0.3559, "step": 4854 }, { "epoch": 0.16660947151681538, "grad_norm": 0.8405826050009426, "learning_rate": 9.518939839255843e-06, "loss": 0.3429, "step": 4855 }, { "epoch": 0.16664378860672616, "grad_norm": 0.8071301730692605, "learning_rate": 9.51870196588106e-06, "loss": 0.319, "step": 4856 }, { "epoch": 0.16667810569663694, "grad_norm": 0.8161782324371469, "learning_rate": 9.518464036682983e-06, "loss": 0.2955, "step": 4857 }, { "epoch": 0.1667124227865477, "grad_norm": 0.7554730396621208, "learning_rate": 9.51822605166455e-06, "loss": 0.3059, "step": 4858 }, { "epoch": 0.16674673987645847, "grad_norm": 0.7655839449838975, "learning_rate": 9.517988010828703e-06, "loss": 0.3255, "step": 4859 }, { "epoch": 0.16678105696636925, "grad_norm": 0.8421424183385552, "learning_rate": 9.51774991417838e-06, "loss": 0.356, "step": 4860 }, { "epoch": 0.16681537405628002, "grad_norm": 0.8649930311334901, "learning_rate": 9.517511761716524e-06, "loss": 0.3374, "step": 4861 }, { "epoch": 0.1668496911461908, "grad_norm": 0.7595706744808994, "learning_rate": 9.517273553446078e-06, "loss": 0.3118, "step": 4862 }, { "epoch": 0.16688400823610158, "grad_norm": 0.8425063370223558, "learning_rate": 9.517035289369985e-06, "loss": 0.3491, "step": 4863 }, { "epoch": 0.16691832532601236, "grad_norm": 0.8451056242504416, "learning_rate": 9.516796969491185e-06, "loss": 0.3359, "step": 4864 }, { "epoch": 0.16695264241592314, "grad_norm": 0.7870714011188295, "learning_rate": 9.516558593812625e-06, "loss": 0.2992, "step": 4865 }, { "epoch": 0.1669869595058339, "grad_norm": 0.8306202080741668, "learning_rate": 9.516320162337251e-06, "loss": 0.3373, "step": 4866 }, { "epoch": 0.16702127659574467, "grad_norm": 0.8969133398376488, "learning_rate": 9.516081675068007e-06, "loss": 0.3932, "step": 4867 }, { "epoch": 0.16705559368565545, "grad_norm": 0.9310055807556554, "learning_rate": 9.515843132007837e-06, "loss": 0.3947, "step": 4868 }, { "epoch": 0.16708991077556623, "grad_norm": 0.9361903949498714, "learning_rate": 9.515604533159691e-06, "loss": 0.3392, "step": 4869 }, { "epoch": 0.167124227865477, "grad_norm": 0.9248955190469484, "learning_rate": 9.515365878526516e-06, "loss": 0.3478, "step": 4870 }, { "epoch": 0.1671585449553878, "grad_norm": 0.7704069245616976, "learning_rate": 9.51512716811126e-06, "loss": 0.3012, "step": 4871 }, { "epoch": 0.16719286204529857, "grad_norm": 0.731025608337145, "learning_rate": 9.51488840191687e-06, "loss": 0.2645, "step": 4872 }, { "epoch": 0.16722717913520935, "grad_norm": 0.8790927924456028, "learning_rate": 9.514649579946301e-06, "loss": 0.3036, "step": 4873 }, { "epoch": 0.1672614962251201, "grad_norm": 0.6615607143658528, "learning_rate": 9.5144107022025e-06, "loss": 0.3239, "step": 4874 }, { "epoch": 0.16729581331503088, "grad_norm": 0.8085608240479981, "learning_rate": 9.514171768688414e-06, "loss": 0.3955, "step": 4875 }, { "epoch": 0.16733013040494166, "grad_norm": 0.8294042130074262, "learning_rate": 9.513932779407002e-06, "loss": 0.3246, "step": 4876 }, { "epoch": 0.16736444749485244, "grad_norm": 0.7785119355109881, "learning_rate": 9.513693734361212e-06, "loss": 0.416, "step": 4877 }, { "epoch": 0.16739876458476322, "grad_norm": 0.8254289729048988, "learning_rate": 9.513454633553999e-06, "loss": 0.3849, "step": 4878 }, { "epoch": 0.167433081674674, "grad_norm": 0.8058440452655087, "learning_rate": 9.513215476988315e-06, "loss": 0.3328, "step": 4879 }, { "epoch": 0.16746739876458477, "grad_norm": 0.9256914230715155, "learning_rate": 9.512976264667118e-06, "loss": 0.3506, "step": 4880 }, { "epoch": 0.16750171585449553, "grad_norm": 0.9561553340464591, "learning_rate": 9.51273699659336e-06, "loss": 0.3553, "step": 4881 }, { "epoch": 0.1675360329444063, "grad_norm": 0.7495477823036469, "learning_rate": 9.512497672769997e-06, "loss": 0.3573, "step": 4882 }, { "epoch": 0.16757035003431708, "grad_norm": 0.8080772801782757, "learning_rate": 9.512258293199988e-06, "loss": 0.3292, "step": 4883 }, { "epoch": 0.16760466712422786, "grad_norm": 0.771594662905187, "learning_rate": 9.512018857886286e-06, "loss": 0.3043, "step": 4884 }, { "epoch": 0.16763898421413864, "grad_norm": 1.0491634646482473, "learning_rate": 9.511779366831853e-06, "loss": 0.3256, "step": 4885 }, { "epoch": 0.16767330130404942, "grad_norm": 0.7628138463700315, "learning_rate": 9.511539820039646e-06, "loss": 0.3266, "step": 4886 }, { "epoch": 0.1677076183939602, "grad_norm": 0.7876783383352315, "learning_rate": 9.511300217512624e-06, "loss": 0.3451, "step": 4887 }, { "epoch": 0.16774193548387098, "grad_norm": 0.7368869064815654, "learning_rate": 9.511060559253747e-06, "loss": 0.2993, "step": 4888 }, { "epoch": 0.16777625257378173, "grad_norm": 0.789607358963453, "learning_rate": 9.510820845265977e-06, "loss": 0.3656, "step": 4889 }, { "epoch": 0.1678105696636925, "grad_norm": 0.732350569465227, "learning_rate": 9.510581075552276e-06, "loss": 0.3221, "step": 4890 }, { "epoch": 0.1678448867536033, "grad_norm": 0.7905314064769655, "learning_rate": 9.510341250115602e-06, "loss": 0.3631, "step": 4891 }, { "epoch": 0.16787920384351407, "grad_norm": 0.7683803636820273, "learning_rate": 9.510101368958922e-06, "loss": 0.3052, "step": 4892 }, { "epoch": 0.16791352093342485, "grad_norm": 1.1262968490418797, "learning_rate": 9.509861432085196e-06, "loss": 0.3617, "step": 4893 }, { "epoch": 0.16794783802333563, "grad_norm": 0.8401266190060297, "learning_rate": 9.509621439497392e-06, "loss": 0.308, "step": 4894 }, { "epoch": 0.1679821551132464, "grad_norm": 0.9015491892568022, "learning_rate": 9.509381391198474e-06, "loss": 0.3009, "step": 4895 }, { "epoch": 0.16801647220315719, "grad_norm": 0.8021788449891144, "learning_rate": 9.509141287191404e-06, "loss": 0.2784, "step": 4896 }, { "epoch": 0.16805078929306794, "grad_norm": 0.8973570452251662, "learning_rate": 9.50890112747915e-06, "loss": 0.3465, "step": 4897 }, { "epoch": 0.16808510638297872, "grad_norm": 0.7286416284727715, "learning_rate": 9.508660912064683e-06, "loss": 0.3093, "step": 4898 }, { "epoch": 0.1681194234728895, "grad_norm": 0.8658061580726023, "learning_rate": 9.508420640950965e-06, "loss": 0.3319, "step": 4899 }, { "epoch": 0.16815374056280027, "grad_norm": 1.1905533267169626, "learning_rate": 9.508180314140964e-06, "loss": 0.3489, "step": 4900 }, { "epoch": 0.16818805765271105, "grad_norm": 0.9433053976674426, "learning_rate": 9.507939931637653e-06, "loss": 0.3386, "step": 4901 }, { "epoch": 0.16822237474262183, "grad_norm": 0.8462535930186288, "learning_rate": 9.507699493444e-06, "loss": 0.3555, "step": 4902 }, { "epoch": 0.1682566918325326, "grad_norm": 0.8054682168502115, "learning_rate": 9.507458999562975e-06, "loss": 0.3699, "step": 4903 }, { "epoch": 0.16829100892244336, "grad_norm": 0.8647838439195948, "learning_rate": 9.507218449997551e-06, "loss": 0.3368, "step": 4904 }, { "epoch": 0.16832532601235414, "grad_norm": 0.7286786077404734, "learning_rate": 9.506977844750696e-06, "loss": 0.2797, "step": 4905 }, { "epoch": 0.16835964310226492, "grad_norm": 0.8556677601881248, "learning_rate": 9.506737183825385e-06, "loss": 0.3886, "step": 4906 }, { "epoch": 0.1683939601921757, "grad_norm": 0.7672240806442276, "learning_rate": 9.50649646722459e-06, "loss": 0.2795, "step": 4907 }, { "epoch": 0.16842827728208648, "grad_norm": 0.775515184888408, "learning_rate": 9.506255694951284e-06, "loss": 0.3503, "step": 4908 }, { "epoch": 0.16846259437199726, "grad_norm": 0.7852040224071696, "learning_rate": 9.506014867008446e-06, "loss": 0.3459, "step": 4909 }, { "epoch": 0.16849691146190804, "grad_norm": 0.854277898620031, "learning_rate": 9.505773983399046e-06, "loss": 0.3345, "step": 4910 }, { "epoch": 0.16853122855181882, "grad_norm": 0.8645833655889498, "learning_rate": 9.50553304412606e-06, "loss": 0.3137, "step": 4911 }, { "epoch": 0.16856554564172957, "grad_norm": 0.8673541676384868, "learning_rate": 9.50529204919247e-06, "loss": 0.3599, "step": 4912 }, { "epoch": 0.16859986273164035, "grad_norm": 0.8476520917868515, "learning_rate": 9.505050998601246e-06, "loss": 0.3837, "step": 4913 }, { "epoch": 0.16863417982155113, "grad_norm": 0.9056412578822751, "learning_rate": 9.504809892355372e-06, "loss": 0.3426, "step": 4914 }, { "epoch": 0.1686684969114619, "grad_norm": 0.8763713154766297, "learning_rate": 9.504568730457822e-06, "loss": 0.3696, "step": 4915 }, { "epoch": 0.1687028140013727, "grad_norm": 0.7915503845089845, "learning_rate": 9.504327512911577e-06, "loss": 0.3643, "step": 4916 }, { "epoch": 0.16873713109128347, "grad_norm": 0.7776188793589165, "learning_rate": 9.504086239719617e-06, "loss": 0.3817, "step": 4917 }, { "epoch": 0.16877144818119424, "grad_norm": 0.789645760336156, "learning_rate": 9.503844910884924e-06, "loss": 0.3415, "step": 4918 }, { "epoch": 0.16880576527110502, "grad_norm": 0.786019863253275, "learning_rate": 9.503603526410477e-06, "loss": 0.3434, "step": 4919 }, { "epoch": 0.16884008236101578, "grad_norm": 0.8544169595983369, "learning_rate": 9.50336208629926e-06, "loss": 0.3422, "step": 4920 }, { "epoch": 0.16887439945092655, "grad_norm": 0.8291027827254928, "learning_rate": 9.503120590554254e-06, "loss": 0.3758, "step": 4921 }, { "epoch": 0.16890871654083733, "grad_norm": 0.8620140938965267, "learning_rate": 9.502879039178444e-06, "loss": 0.3593, "step": 4922 }, { "epoch": 0.1689430336307481, "grad_norm": 0.7891362938674321, "learning_rate": 9.502637432174813e-06, "loss": 0.3178, "step": 4923 }, { "epoch": 0.1689773507206589, "grad_norm": 0.7940965244744548, "learning_rate": 9.502395769546346e-06, "loss": 0.3474, "step": 4924 }, { "epoch": 0.16901166781056967, "grad_norm": 0.8400067956939039, "learning_rate": 9.502154051296028e-06, "loss": 0.3528, "step": 4925 }, { "epoch": 0.16904598490048045, "grad_norm": 0.8315685100183904, "learning_rate": 9.501912277426849e-06, "loss": 0.3927, "step": 4926 }, { "epoch": 0.1690803019903912, "grad_norm": 1.8328947508088569, "learning_rate": 9.501670447941789e-06, "loss": 0.3465, "step": 4927 }, { "epoch": 0.16911461908030198, "grad_norm": 0.7370625244604669, "learning_rate": 9.501428562843839e-06, "loss": 0.3309, "step": 4928 }, { "epoch": 0.16914893617021276, "grad_norm": 0.7833282419402637, "learning_rate": 9.50118662213599e-06, "loss": 0.3583, "step": 4929 }, { "epoch": 0.16918325326012354, "grad_norm": 0.7947416364922788, "learning_rate": 9.500944625821224e-06, "loss": 0.2961, "step": 4930 }, { "epoch": 0.16921757035003432, "grad_norm": 0.7489647318763974, "learning_rate": 9.500702573902538e-06, "loss": 0.329, "step": 4931 }, { "epoch": 0.1692518874399451, "grad_norm": 0.9193875385081165, "learning_rate": 9.500460466382918e-06, "loss": 0.3412, "step": 4932 }, { "epoch": 0.16928620452985588, "grad_norm": 0.7981900255845759, "learning_rate": 9.500218303265356e-06, "loss": 0.2832, "step": 4933 }, { "epoch": 0.16932052161976666, "grad_norm": 0.7812980952312554, "learning_rate": 9.499976084552845e-06, "loss": 0.3184, "step": 4934 }, { "epoch": 0.1693548387096774, "grad_norm": 0.8086867840085389, "learning_rate": 9.499733810248373e-06, "loss": 0.3512, "step": 4935 }, { "epoch": 0.1693891557995882, "grad_norm": 0.8092393288373076, "learning_rate": 9.499491480354939e-06, "loss": 0.336, "step": 4936 }, { "epoch": 0.16942347288949897, "grad_norm": 0.9517331551055258, "learning_rate": 9.499249094875531e-06, "loss": 0.3359, "step": 4937 }, { "epoch": 0.16945778997940975, "grad_norm": 0.9053515723029385, "learning_rate": 9.499006653813148e-06, "loss": 0.3333, "step": 4938 }, { "epoch": 0.16949210706932052, "grad_norm": 0.8001110114353182, "learning_rate": 9.498764157170783e-06, "loss": 0.3232, "step": 4939 }, { "epoch": 0.1695264241592313, "grad_norm": 0.8115752538191218, "learning_rate": 9.49852160495143e-06, "loss": 0.3335, "step": 4940 }, { "epoch": 0.16956074124914208, "grad_norm": 0.7591548477200953, "learning_rate": 9.498278997158089e-06, "loss": 0.3578, "step": 4941 }, { "epoch": 0.16959505833905286, "grad_norm": 0.9165736171951242, "learning_rate": 9.498036333793755e-06, "loss": 0.3993, "step": 4942 }, { "epoch": 0.1696293754289636, "grad_norm": 0.8205457919738803, "learning_rate": 9.497793614861426e-06, "loss": 0.3319, "step": 4943 }, { "epoch": 0.1696636925188744, "grad_norm": 0.8912282077473054, "learning_rate": 9.497550840364103e-06, "loss": 0.3614, "step": 4944 }, { "epoch": 0.16969800960878517, "grad_norm": 0.7970201880505271, "learning_rate": 9.497308010304779e-06, "loss": 0.3344, "step": 4945 }, { "epoch": 0.16973232669869595, "grad_norm": 0.7493917319110871, "learning_rate": 9.497065124686462e-06, "loss": 0.3157, "step": 4946 }, { "epoch": 0.16976664378860673, "grad_norm": 0.8020095898712546, "learning_rate": 9.496822183512147e-06, "loss": 0.3241, "step": 4947 }, { "epoch": 0.1698009608785175, "grad_norm": 0.8309174698223173, "learning_rate": 9.496579186784835e-06, "loss": 0.3502, "step": 4948 }, { "epoch": 0.1698352779684283, "grad_norm": 0.7523956379611771, "learning_rate": 9.49633613450753e-06, "loss": 0.3772, "step": 4949 }, { "epoch": 0.16986959505833904, "grad_norm": 0.849607876021488, "learning_rate": 9.496093026683237e-06, "loss": 0.3451, "step": 4950 }, { "epoch": 0.16990391214824982, "grad_norm": 0.8115587918072842, "learning_rate": 9.495849863314954e-06, "loss": 0.3193, "step": 4951 }, { "epoch": 0.1699382292381606, "grad_norm": 0.8749876034266881, "learning_rate": 9.495606644405687e-06, "loss": 0.3604, "step": 4952 }, { "epoch": 0.16997254632807138, "grad_norm": 0.8670581101832476, "learning_rate": 9.495363369958444e-06, "loss": 0.3175, "step": 4953 }, { "epoch": 0.17000686341798216, "grad_norm": 0.7477518538857831, "learning_rate": 9.495120039976226e-06, "loss": 0.2885, "step": 4954 }, { "epoch": 0.17004118050789294, "grad_norm": 0.8247572630196304, "learning_rate": 9.494876654462042e-06, "loss": 0.2927, "step": 4955 }, { "epoch": 0.17007549759780372, "grad_norm": 0.7854071608272083, "learning_rate": 9.494633213418895e-06, "loss": 0.2965, "step": 4956 }, { "epoch": 0.1701098146877145, "grad_norm": 0.7891821746833548, "learning_rate": 9.494389716849797e-06, "loss": 0.3015, "step": 4957 }, { "epoch": 0.17014413177762525, "grad_norm": 0.8698185908659808, "learning_rate": 9.494146164757754e-06, "loss": 0.3166, "step": 4958 }, { "epoch": 0.17017844886753603, "grad_norm": 0.790841421810714, "learning_rate": 9.493902557145773e-06, "loss": 0.3045, "step": 4959 }, { "epoch": 0.1702127659574468, "grad_norm": 0.7714791536445694, "learning_rate": 9.493658894016869e-06, "loss": 0.3376, "step": 4960 }, { "epoch": 0.17024708304735758, "grad_norm": 0.8690641620118176, "learning_rate": 9.493415175374044e-06, "loss": 0.3742, "step": 4961 }, { "epoch": 0.17028140013726836, "grad_norm": 0.7572474636023133, "learning_rate": 9.493171401220317e-06, "loss": 0.2946, "step": 4962 }, { "epoch": 0.17031571722717914, "grad_norm": 0.7649123807354133, "learning_rate": 9.492927571558692e-06, "loss": 0.2982, "step": 4963 }, { "epoch": 0.17035003431708992, "grad_norm": 0.8401540803201066, "learning_rate": 9.492683686392188e-06, "loss": 0.3502, "step": 4964 }, { "epoch": 0.17038435140700067, "grad_norm": 0.7675111437438937, "learning_rate": 9.492439745723813e-06, "loss": 0.3603, "step": 4965 }, { "epoch": 0.17041866849691145, "grad_norm": 0.7425677514926582, "learning_rate": 9.492195749556585e-06, "loss": 0.3391, "step": 4966 }, { "epoch": 0.17045298558682223, "grad_norm": 0.8243558655468176, "learning_rate": 9.491951697893516e-06, "loss": 0.2994, "step": 4967 }, { "epoch": 0.170487302676733, "grad_norm": 0.8973648558313343, "learning_rate": 9.491707590737618e-06, "loss": 0.396, "step": 4968 }, { "epoch": 0.1705216197666438, "grad_norm": 0.7801032544724688, "learning_rate": 9.491463428091911e-06, "loss": 0.3451, "step": 4969 }, { "epoch": 0.17055593685655457, "grad_norm": 0.8624991348523775, "learning_rate": 9.49121920995941e-06, "loss": 0.3686, "step": 4970 }, { "epoch": 0.17059025394646535, "grad_norm": 0.8221786980049548, "learning_rate": 9.490974936343132e-06, "loss": 0.3433, "step": 4971 }, { "epoch": 0.17062457103637613, "grad_norm": 0.7703356401165911, "learning_rate": 9.490730607246095e-06, "loss": 0.4258, "step": 4972 }, { "epoch": 0.17065888812628688, "grad_norm": 0.8950199126021653, "learning_rate": 9.490486222671316e-06, "loss": 0.3326, "step": 4973 }, { "epoch": 0.17069320521619766, "grad_norm": 0.9149597008715633, "learning_rate": 9.490241782621816e-06, "loss": 0.3337, "step": 4974 }, { "epoch": 0.17072752230610844, "grad_norm": 0.7284365765884652, "learning_rate": 9.489997287100614e-06, "loss": 0.3536, "step": 4975 }, { "epoch": 0.17076183939601922, "grad_norm": 0.8208798238821193, "learning_rate": 9.48975273611073e-06, "loss": 0.3141, "step": 4976 }, { "epoch": 0.17079615648593, "grad_norm": 0.8024606423305204, "learning_rate": 9.489508129655186e-06, "loss": 0.3371, "step": 4977 }, { "epoch": 0.17083047357584077, "grad_norm": 0.7688125810163218, "learning_rate": 9.489263467737002e-06, "loss": 0.3339, "step": 4978 }, { "epoch": 0.17086479066575155, "grad_norm": 0.7929835323363356, "learning_rate": 9.489018750359203e-06, "loss": 0.3333, "step": 4979 }, { "epoch": 0.17089910775566233, "grad_norm": 1.119027345149844, "learning_rate": 9.488773977524811e-06, "loss": 0.341, "step": 4980 }, { "epoch": 0.17093342484557308, "grad_norm": 0.9162215751368312, "learning_rate": 9.48852914923685e-06, "loss": 0.3363, "step": 4981 }, { "epoch": 0.17096774193548386, "grad_norm": 0.8152775881848903, "learning_rate": 9.488284265498344e-06, "loss": 0.3345, "step": 4982 }, { "epoch": 0.17100205902539464, "grad_norm": 0.8775369419794188, "learning_rate": 9.488039326312319e-06, "loss": 0.2963, "step": 4983 }, { "epoch": 0.17103637611530542, "grad_norm": 0.9087740652866446, "learning_rate": 9.4877943316818e-06, "loss": 0.3034, "step": 4984 }, { "epoch": 0.1710706932052162, "grad_norm": 0.6851939052193177, "learning_rate": 9.487549281609814e-06, "loss": 0.3182, "step": 4985 }, { "epoch": 0.17110501029512698, "grad_norm": 0.7803234421784438, "learning_rate": 9.48730417609939e-06, "loss": 0.3156, "step": 4986 }, { "epoch": 0.17113932738503776, "grad_norm": 0.9559842578316814, "learning_rate": 9.487059015153554e-06, "loss": 0.3136, "step": 4987 }, { "epoch": 0.1711736444749485, "grad_norm": 0.7632678175109928, "learning_rate": 9.486813798775335e-06, "loss": 0.3216, "step": 4988 }, { "epoch": 0.1712079615648593, "grad_norm": 0.7415932313178711, "learning_rate": 9.486568526967763e-06, "loss": 0.2877, "step": 4989 }, { "epoch": 0.17124227865477007, "grad_norm": 0.7950141603219479, "learning_rate": 9.486323199733866e-06, "loss": 0.329, "step": 4990 }, { "epoch": 0.17127659574468085, "grad_norm": 0.7872739504024059, "learning_rate": 9.48607781707668e-06, "loss": 0.3256, "step": 4991 }, { "epoch": 0.17131091283459163, "grad_norm": 0.6715530786291051, "learning_rate": 9.48583237899923e-06, "loss": 0.3577, "step": 4992 }, { "epoch": 0.1713452299245024, "grad_norm": 0.7993037323534885, "learning_rate": 9.485586885504551e-06, "loss": 0.3282, "step": 4993 }, { "epoch": 0.17137954701441319, "grad_norm": 0.8097890175230693, "learning_rate": 9.485341336595676e-06, "loss": 0.3322, "step": 4994 }, { "epoch": 0.17141386410432397, "grad_norm": 0.8952261784756731, "learning_rate": 9.485095732275639e-06, "loss": 0.3774, "step": 4995 }, { "epoch": 0.17144818119423472, "grad_norm": 0.8050672009724003, "learning_rate": 9.484850072547474e-06, "loss": 0.3632, "step": 4996 }, { "epoch": 0.1714824982841455, "grad_norm": 0.7729868669165777, "learning_rate": 9.484604357414214e-06, "loss": 0.3432, "step": 4997 }, { "epoch": 0.17151681537405628, "grad_norm": 0.7618328700471758, "learning_rate": 9.484358586878894e-06, "loss": 0.3392, "step": 4998 }, { "epoch": 0.17155113246396705, "grad_norm": 0.9640680361369354, "learning_rate": 9.484112760944554e-06, "loss": 0.3313, "step": 4999 }, { "epoch": 0.17158544955387783, "grad_norm": 0.9221377638493128, "learning_rate": 9.48386687961423e-06, "loss": 0.3333, "step": 5000 }, { "epoch": 0.1716197666437886, "grad_norm": 0.8601038629282732, "learning_rate": 9.483620942890956e-06, "loss": 0.3126, "step": 5001 }, { "epoch": 0.1716540837336994, "grad_norm": 0.7806299239351961, "learning_rate": 9.483374950777773e-06, "loss": 0.4001, "step": 5002 }, { "epoch": 0.17168840082361017, "grad_norm": 0.7454225378491193, "learning_rate": 9.483128903277721e-06, "loss": 0.3382, "step": 5003 }, { "epoch": 0.17172271791352092, "grad_norm": 0.861408988269326, "learning_rate": 9.482882800393837e-06, "loss": 0.338, "step": 5004 }, { "epoch": 0.1717570350034317, "grad_norm": 0.8408751845046478, "learning_rate": 9.482636642129163e-06, "loss": 0.356, "step": 5005 }, { "epoch": 0.17179135209334248, "grad_norm": 0.8009594664087161, "learning_rate": 9.48239042848674e-06, "loss": 0.3477, "step": 5006 }, { "epoch": 0.17182566918325326, "grad_norm": 0.8831486601817581, "learning_rate": 9.482144159469608e-06, "loss": 0.3252, "step": 5007 }, { "epoch": 0.17185998627316404, "grad_norm": 0.7937748229299787, "learning_rate": 9.481897835080811e-06, "loss": 0.3554, "step": 5008 }, { "epoch": 0.17189430336307482, "grad_norm": 0.8234794760563007, "learning_rate": 9.481651455323393e-06, "loss": 0.3241, "step": 5009 }, { "epoch": 0.1719286204529856, "grad_norm": 0.8492426954394083, "learning_rate": 9.481405020200395e-06, "loss": 0.3349, "step": 5010 }, { "epoch": 0.17196293754289635, "grad_norm": 0.8498583266025357, "learning_rate": 9.481158529714863e-06, "loss": 0.3549, "step": 5011 }, { "epoch": 0.17199725463280713, "grad_norm": 0.8402344656668124, "learning_rate": 9.480911983869842e-06, "loss": 0.3319, "step": 5012 }, { "epoch": 0.1720315717227179, "grad_norm": 0.8683987351177452, "learning_rate": 9.480665382668377e-06, "loss": 0.3403, "step": 5013 }, { "epoch": 0.1720658888126287, "grad_norm": 0.7214861293458698, "learning_rate": 9.480418726113516e-06, "loss": 0.3213, "step": 5014 }, { "epoch": 0.17210020590253947, "grad_norm": 0.9174947930488738, "learning_rate": 9.480172014208305e-06, "loss": 0.342, "step": 5015 }, { "epoch": 0.17213452299245025, "grad_norm": 0.697196495702306, "learning_rate": 9.479925246955792e-06, "loss": 0.2736, "step": 5016 }, { "epoch": 0.17216884008236102, "grad_norm": 0.8391858128836207, "learning_rate": 9.479678424359027e-06, "loss": 0.3217, "step": 5017 }, { "epoch": 0.1722031571722718, "grad_norm": 0.8057148180871894, "learning_rate": 9.479431546421057e-06, "loss": 0.327, "step": 5018 }, { "epoch": 0.17223747426218255, "grad_norm": 0.7915767167227221, "learning_rate": 9.479184613144931e-06, "loss": 0.3175, "step": 5019 }, { "epoch": 0.17227179135209333, "grad_norm": 0.8216325100996532, "learning_rate": 9.478937624533704e-06, "loss": 0.3169, "step": 5020 }, { "epoch": 0.1723061084420041, "grad_norm": 0.7536408965717774, "learning_rate": 9.478690580590421e-06, "loss": 0.3044, "step": 5021 }, { "epoch": 0.1723404255319149, "grad_norm": 0.8335787066575676, "learning_rate": 9.47844348131814e-06, "loss": 0.343, "step": 5022 }, { "epoch": 0.17237474262182567, "grad_norm": 0.7199481694801975, "learning_rate": 9.478196326719913e-06, "loss": 0.3019, "step": 5023 }, { "epoch": 0.17240905971173645, "grad_norm": 0.8017082666036462, "learning_rate": 9.47794911679879e-06, "loss": 0.3355, "step": 5024 }, { "epoch": 0.17244337680164723, "grad_norm": 0.9313857832052201, "learning_rate": 9.477701851557826e-06, "loss": 0.3819, "step": 5025 }, { "epoch": 0.172477693891558, "grad_norm": 0.8385953373705802, "learning_rate": 9.477454531000073e-06, "loss": 0.3067, "step": 5026 }, { "epoch": 0.17251201098146876, "grad_norm": 0.8310244622239378, "learning_rate": 9.477207155128594e-06, "loss": 0.3134, "step": 5027 }, { "epoch": 0.17254632807137954, "grad_norm": 0.9489774928499891, "learning_rate": 9.47695972394644e-06, "loss": 0.3603, "step": 5028 }, { "epoch": 0.17258064516129032, "grad_norm": 0.858475305525128, "learning_rate": 9.476712237456667e-06, "loss": 0.3356, "step": 5029 }, { "epoch": 0.1726149622512011, "grad_norm": 0.8520319929794933, "learning_rate": 9.476464695662333e-06, "loss": 0.3154, "step": 5030 }, { "epoch": 0.17264927934111188, "grad_norm": 0.7780272024048618, "learning_rate": 9.476217098566497e-06, "loss": 0.3138, "step": 5031 }, { "epoch": 0.17268359643102266, "grad_norm": 0.8068759440504428, "learning_rate": 9.47596944617222e-06, "loss": 0.3385, "step": 5032 }, { "epoch": 0.17271791352093344, "grad_norm": 0.6755085787720756, "learning_rate": 9.475721738482557e-06, "loss": 0.2926, "step": 5033 }, { "epoch": 0.1727522306108442, "grad_norm": 0.823722698349116, "learning_rate": 9.47547397550057e-06, "loss": 0.3437, "step": 5034 }, { "epoch": 0.17278654770075497, "grad_norm": 0.7688624266196674, "learning_rate": 9.47522615722932e-06, "loss": 0.3218, "step": 5035 }, { "epoch": 0.17282086479066575, "grad_norm": 0.8999610095279122, "learning_rate": 9.474978283671869e-06, "loss": 0.337, "step": 5036 }, { "epoch": 0.17285518188057652, "grad_norm": 0.8042668627603997, "learning_rate": 9.474730354831278e-06, "loss": 0.3235, "step": 5037 }, { "epoch": 0.1728894989704873, "grad_norm": 0.7311310048536752, "learning_rate": 9.474482370710611e-06, "loss": 0.3459, "step": 5038 }, { "epoch": 0.17292381606039808, "grad_norm": 0.7199064516730168, "learning_rate": 9.47423433131293e-06, "loss": 0.3438, "step": 5039 }, { "epoch": 0.17295813315030886, "grad_norm": 0.8179826419817622, "learning_rate": 9.473986236641301e-06, "loss": 0.3729, "step": 5040 }, { "epoch": 0.17299245024021964, "grad_norm": 0.8670592976299606, "learning_rate": 9.473738086698789e-06, "loss": 0.348, "step": 5041 }, { "epoch": 0.1730267673301304, "grad_norm": 0.7558411772949132, "learning_rate": 9.473489881488459e-06, "loss": 0.2861, "step": 5042 }, { "epoch": 0.17306108442004117, "grad_norm": 0.7849300257586157, "learning_rate": 9.473241621013374e-06, "loss": 0.3328, "step": 5043 }, { "epoch": 0.17309540150995195, "grad_norm": 0.8683792041652159, "learning_rate": 9.472993305276608e-06, "loss": 0.2996, "step": 5044 }, { "epoch": 0.17312971859986273, "grad_norm": 0.8147988433053716, "learning_rate": 9.472744934281222e-06, "loss": 0.3881, "step": 5045 }, { "epoch": 0.1731640356897735, "grad_norm": 0.7431111465174297, "learning_rate": 9.472496508030289e-06, "loss": 0.325, "step": 5046 }, { "epoch": 0.1731983527796843, "grad_norm": 0.796188214074599, "learning_rate": 9.472248026526875e-06, "loss": 0.3126, "step": 5047 }, { "epoch": 0.17323266986959507, "grad_norm": 0.8410394888848023, "learning_rate": 9.471999489774051e-06, "loss": 0.355, "step": 5048 }, { "epoch": 0.17326698695950585, "grad_norm": 0.8746031047412061, "learning_rate": 9.471750897774886e-06, "loss": 0.4112, "step": 5049 }, { "epoch": 0.1733013040494166, "grad_norm": 0.7910267249808561, "learning_rate": 9.471502250532454e-06, "loss": 0.3232, "step": 5050 }, { "epoch": 0.17333562113932738, "grad_norm": 0.8499805467499623, "learning_rate": 9.471253548049824e-06, "loss": 0.3327, "step": 5051 }, { "epoch": 0.17336993822923816, "grad_norm": 0.7612331615943626, "learning_rate": 9.471004790330069e-06, "loss": 0.3053, "step": 5052 }, { "epoch": 0.17340425531914894, "grad_norm": 0.8123422826787849, "learning_rate": 9.470755977376262e-06, "loss": 0.3732, "step": 5053 }, { "epoch": 0.17343857240905972, "grad_norm": 0.8512298575813463, "learning_rate": 9.47050710919148e-06, "loss": 0.3319, "step": 5054 }, { "epoch": 0.1734728894989705, "grad_norm": 0.7294102873435403, "learning_rate": 9.470258185778793e-06, "loss": 0.3185, "step": 5055 }, { "epoch": 0.17350720658888127, "grad_norm": 0.8330431679221036, "learning_rate": 9.470009207141277e-06, "loss": 0.2964, "step": 5056 }, { "epoch": 0.17354152367879203, "grad_norm": 0.8328189114565103, "learning_rate": 9.469760173282012e-06, "loss": 0.3383, "step": 5057 }, { "epoch": 0.1735758407687028, "grad_norm": 0.7484790657898583, "learning_rate": 9.469511084204069e-06, "loss": 0.318, "step": 5058 }, { "epoch": 0.17361015785861358, "grad_norm": 0.8916931558146075, "learning_rate": 9.469261939910528e-06, "loss": 0.3258, "step": 5059 }, { "epoch": 0.17364447494852436, "grad_norm": 0.8577213298998845, "learning_rate": 9.469012740404464e-06, "loss": 0.3158, "step": 5060 }, { "epoch": 0.17367879203843514, "grad_norm": 0.8033612046921177, "learning_rate": 9.468763485688962e-06, "loss": 0.3637, "step": 5061 }, { "epoch": 0.17371310912834592, "grad_norm": 0.7516309141117945, "learning_rate": 9.468514175767095e-06, "loss": 0.3293, "step": 5062 }, { "epoch": 0.1737474262182567, "grad_norm": 0.7432680151861505, "learning_rate": 9.468264810641944e-06, "loss": 0.3368, "step": 5063 }, { "epoch": 0.17378174330816748, "grad_norm": 0.7481172086287203, "learning_rate": 9.468015390316591e-06, "loss": 0.3408, "step": 5064 }, { "epoch": 0.17381606039807823, "grad_norm": 0.7724854130570632, "learning_rate": 9.46776591479412e-06, "loss": 0.3281, "step": 5065 }, { "epoch": 0.173850377487989, "grad_norm": 0.8379314356363884, "learning_rate": 9.467516384077607e-06, "loss": 0.3901, "step": 5066 }, { "epoch": 0.1738846945778998, "grad_norm": 0.8043375823791874, "learning_rate": 9.46726679817014e-06, "loss": 0.3779, "step": 5067 }, { "epoch": 0.17391901166781057, "grad_norm": 0.8418307733038519, "learning_rate": 9.467017157074799e-06, "loss": 0.3348, "step": 5068 }, { "epoch": 0.17395332875772135, "grad_norm": 0.8910572133089165, "learning_rate": 9.466767460794668e-06, "loss": 0.3527, "step": 5069 }, { "epoch": 0.17398764584763213, "grad_norm": 0.7511270902244729, "learning_rate": 9.466517709332834e-06, "loss": 0.3337, "step": 5070 }, { "epoch": 0.1740219629375429, "grad_norm": 0.8445613902560039, "learning_rate": 9.466267902692381e-06, "loss": 0.3139, "step": 5071 }, { "epoch": 0.17405628002745366, "grad_norm": 0.8462973509733795, "learning_rate": 9.466018040876395e-06, "loss": 0.3374, "step": 5072 }, { "epoch": 0.17409059711736444, "grad_norm": 0.858909782551154, "learning_rate": 9.465768123887963e-06, "loss": 0.3808, "step": 5073 }, { "epoch": 0.17412491420727522, "grad_norm": 0.8176684181363804, "learning_rate": 9.465518151730173e-06, "loss": 0.3679, "step": 5074 }, { "epoch": 0.174159231297186, "grad_norm": 0.7337870636202785, "learning_rate": 9.46526812440611e-06, "loss": 0.3124, "step": 5075 }, { "epoch": 0.17419354838709677, "grad_norm": 0.7345131476654401, "learning_rate": 9.465018041918869e-06, "loss": 0.3151, "step": 5076 }, { "epoch": 0.17422786547700755, "grad_norm": 0.6948735092672508, "learning_rate": 9.464767904271533e-06, "loss": 0.349, "step": 5077 }, { "epoch": 0.17426218256691833, "grad_norm": 0.8773955482103423, "learning_rate": 9.464517711467196e-06, "loss": 0.3695, "step": 5078 }, { "epoch": 0.1742964996568291, "grad_norm": 0.8016375958933014, "learning_rate": 9.46426746350895e-06, "loss": 0.3758, "step": 5079 }, { "epoch": 0.17433081674673986, "grad_norm": 0.8311386227975167, "learning_rate": 9.46401716039988e-06, "loss": 0.3263, "step": 5080 }, { "epoch": 0.17436513383665064, "grad_norm": 0.8137332605868367, "learning_rate": 9.463766802143085e-06, "loss": 0.3028, "step": 5081 }, { "epoch": 0.17439945092656142, "grad_norm": 0.7645881890792932, "learning_rate": 9.463516388741655e-06, "loss": 0.3164, "step": 5082 }, { "epoch": 0.1744337680164722, "grad_norm": 0.7520189800379958, "learning_rate": 9.463265920198685e-06, "loss": 0.3096, "step": 5083 }, { "epoch": 0.17446808510638298, "grad_norm": 0.7760112710294518, "learning_rate": 9.463015396517268e-06, "loss": 0.4298, "step": 5084 }, { "epoch": 0.17450240219629376, "grad_norm": 0.8095162821515727, "learning_rate": 9.462764817700498e-06, "loss": 0.3762, "step": 5085 }, { "epoch": 0.17453671928620454, "grad_norm": 0.8375780941637182, "learning_rate": 9.462514183751471e-06, "loss": 0.3477, "step": 5086 }, { "epoch": 0.17457103637611532, "grad_norm": 0.8675768461502347, "learning_rate": 9.462263494673285e-06, "loss": 0.3772, "step": 5087 }, { "epoch": 0.17460535346602607, "grad_norm": 0.7423015132502803, "learning_rate": 9.462012750469038e-06, "loss": 0.3168, "step": 5088 }, { "epoch": 0.17463967055593685, "grad_norm": 0.8004259425284441, "learning_rate": 9.461761951141824e-06, "loss": 0.3263, "step": 5089 }, { "epoch": 0.17467398764584763, "grad_norm": 0.8318667855758214, "learning_rate": 9.461511096694743e-06, "loss": 0.3539, "step": 5090 }, { "epoch": 0.1747083047357584, "grad_norm": 0.8433209542678629, "learning_rate": 9.461260187130894e-06, "loss": 0.3616, "step": 5091 }, { "epoch": 0.1747426218256692, "grad_norm": 0.7879315504668305, "learning_rate": 9.461009222453377e-06, "loss": 0.3441, "step": 5092 }, { "epoch": 0.17477693891557997, "grad_norm": 0.8219927925509767, "learning_rate": 9.460758202665292e-06, "loss": 0.3663, "step": 5093 }, { "epoch": 0.17481125600549074, "grad_norm": 0.8049693230607801, "learning_rate": 9.460507127769739e-06, "loss": 0.3081, "step": 5094 }, { "epoch": 0.1748455730954015, "grad_norm": 0.8696175000774512, "learning_rate": 9.460255997769821e-06, "loss": 0.3625, "step": 5095 }, { "epoch": 0.17487989018531228, "grad_norm": 0.7716465184724055, "learning_rate": 9.460004812668641e-06, "loss": 0.3085, "step": 5096 }, { "epoch": 0.17491420727522305, "grad_norm": 0.8850365738258734, "learning_rate": 9.459753572469303e-06, "loss": 0.3082, "step": 5097 }, { "epoch": 0.17494852436513383, "grad_norm": 0.7622552571231589, "learning_rate": 9.459502277174907e-06, "loss": 0.3246, "step": 5098 }, { "epoch": 0.1749828414550446, "grad_norm": 0.7893402918650159, "learning_rate": 9.459250926788561e-06, "loss": 0.3677, "step": 5099 }, { "epoch": 0.1750171585449554, "grad_norm": 0.7783203698080109, "learning_rate": 9.458999521313367e-06, "loss": 0.3541, "step": 5100 }, { "epoch": 0.17505147563486617, "grad_norm": 0.7801053050825534, "learning_rate": 9.458748060752435e-06, "loss": 0.3315, "step": 5101 }, { "epoch": 0.17508579272477695, "grad_norm": 0.8341548408286433, "learning_rate": 9.458496545108868e-06, "loss": 0.3349, "step": 5102 }, { "epoch": 0.1751201098146877, "grad_norm": 0.8675253351703988, "learning_rate": 9.458244974385773e-06, "loss": 0.3638, "step": 5103 }, { "epoch": 0.17515442690459848, "grad_norm": 0.8654172781492915, "learning_rate": 9.457993348586261e-06, "loss": 0.3756, "step": 5104 }, { "epoch": 0.17518874399450926, "grad_norm": 0.7315365008467162, "learning_rate": 9.457741667713438e-06, "loss": 0.303, "step": 5105 }, { "epoch": 0.17522306108442004, "grad_norm": 0.8094596627483707, "learning_rate": 9.457489931770413e-06, "loss": 0.3699, "step": 5106 }, { "epoch": 0.17525737817433082, "grad_norm": 0.7755569200973337, "learning_rate": 9.457238140760298e-06, "loss": 0.3535, "step": 5107 }, { "epoch": 0.1752916952642416, "grad_norm": 0.7512571613018294, "learning_rate": 9.4569862946862e-06, "loss": 0.3035, "step": 5108 }, { "epoch": 0.17532601235415238, "grad_norm": 0.8108336904692421, "learning_rate": 9.456734393551237e-06, "loss": 0.3715, "step": 5109 }, { "epoch": 0.17536032944406316, "grad_norm": 0.7863125441152216, "learning_rate": 9.456482437358514e-06, "loss": 0.3792, "step": 5110 }, { "epoch": 0.1753946465339739, "grad_norm": 0.7882976672304557, "learning_rate": 9.456230426111145e-06, "loss": 0.3417, "step": 5111 }, { "epoch": 0.1754289636238847, "grad_norm": 0.9649152728831045, "learning_rate": 9.455978359812248e-06, "loss": 0.3959, "step": 5112 }, { "epoch": 0.17546328071379547, "grad_norm": 1.0971182214773858, "learning_rate": 9.455726238464931e-06, "loss": 0.3661, "step": 5113 }, { "epoch": 0.17549759780370625, "grad_norm": 0.7564205983188709, "learning_rate": 9.455474062072313e-06, "loss": 0.3333, "step": 5114 }, { "epoch": 0.17553191489361702, "grad_norm": 0.7520712521662312, "learning_rate": 9.455221830637506e-06, "loss": 0.3192, "step": 5115 }, { "epoch": 0.1755662319835278, "grad_norm": 0.7702978028436008, "learning_rate": 9.454969544163629e-06, "loss": 0.3744, "step": 5116 }, { "epoch": 0.17560054907343858, "grad_norm": 0.7605244738759127, "learning_rate": 9.454717202653796e-06, "loss": 0.3554, "step": 5117 }, { "epoch": 0.17563486616334933, "grad_norm": 0.7253112371477546, "learning_rate": 9.454464806111126e-06, "loss": 0.3085, "step": 5118 }, { "epoch": 0.1756691832532601, "grad_norm": 0.8989462319789591, "learning_rate": 9.454212354538736e-06, "loss": 0.3381, "step": 5119 }, { "epoch": 0.1757035003431709, "grad_norm": 0.7839863800324979, "learning_rate": 9.453959847939746e-06, "loss": 0.2957, "step": 5120 }, { "epoch": 0.17573781743308167, "grad_norm": 0.8622977381748275, "learning_rate": 9.453707286317275e-06, "loss": 0.3296, "step": 5121 }, { "epoch": 0.17577213452299245, "grad_norm": 0.9266838598084033, "learning_rate": 9.453454669674444e-06, "loss": 0.3628, "step": 5122 }, { "epoch": 0.17580645161290323, "grad_norm": 0.8542750701902304, "learning_rate": 9.45320199801437e-06, "loss": 0.4191, "step": 5123 }, { "epoch": 0.175840768702814, "grad_norm": 0.8114394866640464, "learning_rate": 9.452949271340179e-06, "loss": 0.3259, "step": 5124 }, { "epoch": 0.1758750857927248, "grad_norm": 0.8003526357621783, "learning_rate": 9.452696489654992e-06, "loss": 0.3667, "step": 5125 }, { "epoch": 0.17590940288263554, "grad_norm": 0.8320133828383965, "learning_rate": 9.452443652961931e-06, "loss": 0.359, "step": 5126 }, { "epoch": 0.17594371997254632, "grad_norm": 0.8938196229461148, "learning_rate": 9.452190761264119e-06, "loss": 0.3518, "step": 5127 }, { "epoch": 0.1759780370624571, "grad_norm": 0.7909830201600941, "learning_rate": 9.451937814564682e-06, "loss": 0.316, "step": 5128 }, { "epoch": 0.17601235415236788, "grad_norm": 0.8512549025842815, "learning_rate": 9.451684812866743e-06, "loss": 0.3199, "step": 5129 }, { "epoch": 0.17604667124227866, "grad_norm": 0.7517408003811016, "learning_rate": 9.451431756173428e-06, "loss": 0.3166, "step": 5130 }, { "epoch": 0.17608098833218944, "grad_norm": 0.7855163639674765, "learning_rate": 9.451178644487864e-06, "loss": 0.3101, "step": 5131 }, { "epoch": 0.17611530542210022, "grad_norm": 0.8316986712847266, "learning_rate": 9.450925477813177e-06, "loss": 0.335, "step": 5132 }, { "epoch": 0.176149622512011, "grad_norm": 0.8043821592712189, "learning_rate": 9.450672256152495e-06, "loss": 0.3033, "step": 5133 }, { "epoch": 0.17618393960192175, "grad_norm": 0.8998388201557067, "learning_rate": 9.450418979508947e-06, "loss": 0.4303, "step": 5134 }, { "epoch": 0.17621825669183253, "grad_norm": 0.7157158630847961, "learning_rate": 9.450165647885661e-06, "loss": 0.3508, "step": 5135 }, { "epoch": 0.1762525737817433, "grad_norm": 0.7687869280439353, "learning_rate": 9.449912261285766e-06, "loss": 0.3767, "step": 5136 }, { "epoch": 0.17628689087165408, "grad_norm": 0.8118604340247696, "learning_rate": 9.449658819712395e-06, "loss": 0.3628, "step": 5137 }, { "epoch": 0.17632120796156486, "grad_norm": 0.8931527260772312, "learning_rate": 9.449405323168675e-06, "loss": 0.4452, "step": 5138 }, { "epoch": 0.17635552505147564, "grad_norm": 0.7988290579732564, "learning_rate": 9.44915177165774e-06, "loss": 0.3297, "step": 5139 }, { "epoch": 0.17638984214138642, "grad_norm": 0.7875669818759612, "learning_rate": 9.448898165182722e-06, "loss": 0.369, "step": 5140 }, { "epoch": 0.17642415923129717, "grad_norm": 0.7619022188009344, "learning_rate": 9.448644503746755e-06, "loss": 0.3201, "step": 5141 }, { "epoch": 0.17645847632120795, "grad_norm": 0.8191706849885854, "learning_rate": 9.448390787352971e-06, "loss": 0.3787, "step": 5142 }, { "epoch": 0.17649279341111873, "grad_norm": 0.8564557128412865, "learning_rate": 9.448137016004506e-06, "loss": 0.428, "step": 5143 }, { "epoch": 0.1765271105010295, "grad_norm": 0.8056288610191991, "learning_rate": 9.447883189704494e-06, "loss": 0.3774, "step": 5144 }, { "epoch": 0.1765614275909403, "grad_norm": 0.7582406955891109, "learning_rate": 9.44762930845607e-06, "loss": 0.3277, "step": 5145 }, { "epoch": 0.17659574468085107, "grad_norm": 0.8658364181551216, "learning_rate": 9.447375372262371e-06, "loss": 0.3699, "step": 5146 }, { "epoch": 0.17663006177076185, "grad_norm": 0.814976369374906, "learning_rate": 9.447121381126536e-06, "loss": 0.3192, "step": 5147 }, { "epoch": 0.17666437886067263, "grad_norm": 0.8925091179242155, "learning_rate": 9.446867335051701e-06, "loss": 0.3265, "step": 5148 }, { "epoch": 0.17669869595058338, "grad_norm": 0.7953172106132924, "learning_rate": 9.446613234041005e-06, "loss": 0.3414, "step": 5149 }, { "epoch": 0.17673301304049416, "grad_norm": 0.715565313398788, "learning_rate": 9.446359078097584e-06, "loss": 0.3126, "step": 5150 }, { "epoch": 0.17676733013040494, "grad_norm": 0.808182618375402, "learning_rate": 9.446104867224583e-06, "loss": 0.3537, "step": 5151 }, { "epoch": 0.17680164722031572, "grad_norm": 0.7424816517269274, "learning_rate": 9.44585060142514e-06, "loss": 0.3051, "step": 5152 }, { "epoch": 0.1768359643102265, "grad_norm": 0.8095428856500811, "learning_rate": 9.445596280702396e-06, "loss": 0.3519, "step": 5153 }, { "epoch": 0.17687028140013727, "grad_norm": 0.8216641797497121, "learning_rate": 9.445341905059491e-06, "loss": 0.4002, "step": 5154 }, { "epoch": 0.17690459849004805, "grad_norm": 0.8456207885850436, "learning_rate": 9.445087474499573e-06, "loss": 0.3097, "step": 5155 }, { "epoch": 0.17693891557995883, "grad_norm": 0.8315932434210151, "learning_rate": 9.444832989025778e-06, "loss": 0.3971, "step": 5156 }, { "epoch": 0.17697323266986958, "grad_norm": 0.7567178629909799, "learning_rate": 9.444578448641257e-06, "loss": 0.3086, "step": 5157 }, { "epoch": 0.17700754975978036, "grad_norm": 0.8263943950264263, "learning_rate": 9.444323853349148e-06, "loss": 0.3222, "step": 5158 }, { "epoch": 0.17704186684969114, "grad_norm": 1.132490736028843, "learning_rate": 9.4440692031526e-06, "loss": 0.3844, "step": 5159 }, { "epoch": 0.17707618393960192, "grad_norm": 0.8703205193711263, "learning_rate": 9.443814498054762e-06, "loss": 0.403, "step": 5160 }, { "epoch": 0.1771105010295127, "grad_norm": 0.8304782979458967, "learning_rate": 9.443559738058774e-06, "loss": 0.3974, "step": 5161 }, { "epoch": 0.17714481811942348, "grad_norm": 0.9501575804546688, "learning_rate": 9.443304923167786e-06, "loss": 0.3771, "step": 5162 }, { "epoch": 0.17717913520933426, "grad_norm": 0.8283551244397818, "learning_rate": 9.443050053384945e-06, "loss": 0.3704, "step": 5163 }, { "epoch": 0.177213452299245, "grad_norm": 0.9843269696633814, "learning_rate": 9.442795128713402e-06, "loss": 0.3876, "step": 5164 }, { "epoch": 0.1772477693891558, "grad_norm": 0.8431979493308173, "learning_rate": 9.442540149156306e-06, "loss": 0.389, "step": 5165 }, { "epoch": 0.17728208647906657, "grad_norm": 0.8311527954557988, "learning_rate": 9.442285114716804e-06, "loss": 0.3304, "step": 5166 }, { "epoch": 0.17731640356897735, "grad_norm": 0.7207921607077145, "learning_rate": 9.44203002539805e-06, "loss": 0.3441, "step": 5167 }, { "epoch": 0.17735072065888813, "grad_norm": 0.8246299939226023, "learning_rate": 9.441774881203194e-06, "loss": 0.3465, "step": 5168 }, { "epoch": 0.1773850377487989, "grad_norm": 0.7213512336606809, "learning_rate": 9.441519682135387e-06, "loss": 0.3065, "step": 5169 }, { "epoch": 0.1774193548387097, "grad_norm": 0.732114900324479, "learning_rate": 9.441264428197783e-06, "loss": 0.3016, "step": 5170 }, { "epoch": 0.17745367192862047, "grad_norm": 0.8339949389967654, "learning_rate": 9.441009119393535e-06, "loss": 0.3311, "step": 5171 }, { "epoch": 0.17748798901853122, "grad_norm": 0.8219550802285637, "learning_rate": 9.440753755725798e-06, "loss": 0.2849, "step": 5172 }, { "epoch": 0.177522306108442, "grad_norm": 0.8418993846712198, "learning_rate": 9.440498337197726e-06, "loss": 0.3453, "step": 5173 }, { "epoch": 0.17755662319835278, "grad_norm": 0.7114431696403632, "learning_rate": 9.440242863812471e-06, "loss": 0.364, "step": 5174 }, { "epoch": 0.17759094028826355, "grad_norm": 0.7959471566579718, "learning_rate": 9.439987335573195e-06, "loss": 0.3236, "step": 5175 }, { "epoch": 0.17762525737817433, "grad_norm": 0.8185064389678806, "learning_rate": 9.43973175248305e-06, "loss": 0.3236, "step": 5176 }, { "epoch": 0.1776595744680851, "grad_norm": 0.8390308389005446, "learning_rate": 9.439476114545198e-06, "loss": 0.3388, "step": 5177 }, { "epoch": 0.1776938915579959, "grad_norm": 0.7406921266753828, "learning_rate": 9.439220421762794e-06, "loss": 0.2893, "step": 5178 }, { "epoch": 0.17772820864790667, "grad_norm": 0.766670803110431, "learning_rate": 9.438964674138995e-06, "loss": 0.2785, "step": 5179 }, { "epoch": 0.17776252573781742, "grad_norm": 0.872800778588855, "learning_rate": 9.438708871676964e-06, "loss": 0.3571, "step": 5180 }, { "epoch": 0.1777968428277282, "grad_norm": 0.7665119154558292, "learning_rate": 9.43845301437986e-06, "loss": 0.3328, "step": 5181 }, { "epoch": 0.17783115991763898, "grad_norm": 0.7742965300514265, "learning_rate": 9.438197102250845e-06, "loss": 0.2988, "step": 5182 }, { "epoch": 0.17786547700754976, "grad_norm": 0.9130002128550919, "learning_rate": 9.437941135293078e-06, "loss": 0.3091, "step": 5183 }, { "epoch": 0.17789979409746054, "grad_norm": 0.7746560568436992, "learning_rate": 9.437685113509723e-06, "loss": 0.3415, "step": 5184 }, { "epoch": 0.17793411118737132, "grad_norm": 0.8710890182410338, "learning_rate": 9.437429036903943e-06, "loss": 0.3266, "step": 5185 }, { "epoch": 0.1779684282772821, "grad_norm": 0.7937577073715698, "learning_rate": 9.4371729054789e-06, "loss": 0.3213, "step": 5186 }, { "epoch": 0.17800274536719285, "grad_norm": 0.6800801275095041, "learning_rate": 9.436916719237757e-06, "loss": 0.314, "step": 5187 }, { "epoch": 0.17803706245710363, "grad_norm": 0.8365614760066143, "learning_rate": 9.436660478183683e-06, "loss": 0.3397, "step": 5188 }, { "epoch": 0.1780713795470144, "grad_norm": 0.7852837189072153, "learning_rate": 9.43640418231984e-06, "loss": 0.3315, "step": 5189 }, { "epoch": 0.1781056966369252, "grad_norm": 0.7791187236654813, "learning_rate": 9.436147831649399e-06, "loss": 0.3401, "step": 5190 }, { "epoch": 0.17814001372683597, "grad_norm": 0.8536237558504887, "learning_rate": 9.435891426175521e-06, "loss": 0.4297, "step": 5191 }, { "epoch": 0.17817433081674675, "grad_norm": 0.7250497313618757, "learning_rate": 9.435634965901376e-06, "loss": 0.3186, "step": 5192 }, { "epoch": 0.17820864790665752, "grad_norm": 0.9181146080463297, "learning_rate": 9.435378450830133e-06, "loss": 0.3184, "step": 5193 }, { "epoch": 0.1782429649965683, "grad_norm": 0.7253537418635014, "learning_rate": 9.43512188096496e-06, "loss": 0.2901, "step": 5194 }, { "epoch": 0.17827728208647906, "grad_norm": 0.8922902951661462, "learning_rate": 9.434865256309027e-06, "loss": 0.3126, "step": 5195 }, { "epoch": 0.17831159917638983, "grad_norm": 0.7622445359751494, "learning_rate": 9.434608576865505e-06, "loss": 0.3174, "step": 5196 }, { "epoch": 0.1783459162663006, "grad_norm": 0.7661747300401297, "learning_rate": 9.434351842637563e-06, "loss": 0.3382, "step": 5197 }, { "epoch": 0.1783802333562114, "grad_norm": 0.8211091744021422, "learning_rate": 9.434095053628376e-06, "loss": 0.3427, "step": 5198 }, { "epoch": 0.17841455044612217, "grad_norm": 0.7786172044930443, "learning_rate": 9.433838209841113e-06, "loss": 0.3368, "step": 5199 }, { "epoch": 0.17844886753603295, "grad_norm": 0.8454424863250848, "learning_rate": 9.433581311278947e-06, "loss": 0.3573, "step": 5200 }, { "epoch": 0.17848318462594373, "grad_norm": 0.7765457880844754, "learning_rate": 9.433324357945055e-06, "loss": 0.321, "step": 5201 }, { "epoch": 0.17851750171585448, "grad_norm": 0.8245771595558385, "learning_rate": 9.433067349842608e-06, "loss": 0.3219, "step": 5202 }, { "epoch": 0.17855181880576526, "grad_norm": 0.759883798884455, "learning_rate": 9.432810286974784e-06, "loss": 0.355, "step": 5203 }, { "epoch": 0.17858613589567604, "grad_norm": 0.7836732566065816, "learning_rate": 9.432553169344756e-06, "loss": 0.3441, "step": 5204 }, { "epoch": 0.17862045298558682, "grad_norm": 0.7939267243011884, "learning_rate": 9.432295996955701e-06, "loss": 0.334, "step": 5205 }, { "epoch": 0.1786547700754976, "grad_norm": 0.820332676698042, "learning_rate": 9.432038769810798e-06, "loss": 0.3466, "step": 5206 }, { "epoch": 0.17868908716540838, "grad_norm": 0.78643229009043, "learning_rate": 9.43178148791322e-06, "loss": 0.3285, "step": 5207 }, { "epoch": 0.17872340425531916, "grad_norm": 0.7505432394677205, "learning_rate": 9.431524151266152e-06, "loss": 0.3318, "step": 5208 }, { "epoch": 0.17875772134522994, "grad_norm": 0.8571835823800646, "learning_rate": 9.431266759872768e-06, "loss": 0.3383, "step": 5209 }, { "epoch": 0.1787920384351407, "grad_norm": 0.773928902680004, "learning_rate": 9.431009313736251e-06, "loss": 0.3546, "step": 5210 }, { "epoch": 0.17882635552505147, "grad_norm": 0.9422914423190286, "learning_rate": 9.43075181285978e-06, "loss": 0.322, "step": 5211 }, { "epoch": 0.17886067261496225, "grad_norm": 0.8277111456855035, "learning_rate": 9.430494257246534e-06, "loss": 0.342, "step": 5212 }, { "epoch": 0.17889498970487303, "grad_norm": 0.7551406078442578, "learning_rate": 9.430236646899699e-06, "loss": 0.3271, "step": 5213 }, { "epoch": 0.1789293067947838, "grad_norm": 0.6573328329293051, "learning_rate": 9.429978981822455e-06, "loss": 0.3116, "step": 5214 }, { "epoch": 0.17896362388469458, "grad_norm": 0.7996173492339036, "learning_rate": 9.429721262017985e-06, "loss": 0.3218, "step": 5215 }, { "epoch": 0.17899794097460536, "grad_norm": 0.7483946738776964, "learning_rate": 9.429463487489473e-06, "loss": 0.3284, "step": 5216 }, { "epoch": 0.17903225806451614, "grad_norm": 0.7302262788964946, "learning_rate": 9.429205658240104e-06, "loss": 0.3039, "step": 5217 }, { "epoch": 0.1790665751544269, "grad_norm": 0.7898267751621116, "learning_rate": 9.428947774273063e-06, "loss": 0.3331, "step": 5218 }, { "epoch": 0.17910089224433767, "grad_norm": 0.730824925019658, "learning_rate": 9.428689835591538e-06, "loss": 0.3205, "step": 5219 }, { "epoch": 0.17913520933424845, "grad_norm": 0.9491978730647018, "learning_rate": 9.428431842198711e-06, "loss": 0.3413, "step": 5220 }, { "epoch": 0.17916952642415923, "grad_norm": 0.7431226311123149, "learning_rate": 9.428173794097772e-06, "loss": 0.2722, "step": 5221 }, { "epoch": 0.17920384351407, "grad_norm": 0.8313065924191178, "learning_rate": 9.427915691291909e-06, "loss": 0.2873, "step": 5222 }, { "epoch": 0.1792381606039808, "grad_norm": 0.8192856901916804, "learning_rate": 9.42765753378431e-06, "loss": 0.3695, "step": 5223 }, { "epoch": 0.17927247769389157, "grad_norm": 0.8230659832700332, "learning_rate": 9.427399321578165e-06, "loss": 0.3857, "step": 5224 }, { "epoch": 0.17930679478380232, "grad_norm": 0.8042721424003211, "learning_rate": 9.427141054676663e-06, "loss": 0.3148, "step": 5225 }, { "epoch": 0.1793411118737131, "grad_norm": 0.8962939450185808, "learning_rate": 9.426882733082994e-06, "loss": 0.3825, "step": 5226 }, { "epoch": 0.17937542896362388, "grad_norm": 0.815505723506025, "learning_rate": 9.42662435680035e-06, "loss": 0.3897, "step": 5227 }, { "epoch": 0.17940974605353466, "grad_norm": 0.8164578836613182, "learning_rate": 9.426365925831924e-06, "loss": 0.352, "step": 5228 }, { "epoch": 0.17944406314344544, "grad_norm": 0.7868028464049177, "learning_rate": 9.426107440180907e-06, "loss": 0.3521, "step": 5229 }, { "epoch": 0.17947838023335622, "grad_norm": 0.7555514565862528, "learning_rate": 9.425848899850493e-06, "loss": 0.3076, "step": 5230 }, { "epoch": 0.179512697323267, "grad_norm": 0.8043397260437397, "learning_rate": 9.425590304843876e-06, "loss": 0.3324, "step": 5231 }, { "epoch": 0.17954701441317777, "grad_norm": 0.7434465472233245, "learning_rate": 9.42533165516425e-06, "loss": 0.3141, "step": 5232 }, { "epoch": 0.17958133150308853, "grad_norm": 0.8459292953132014, "learning_rate": 9.425072950814811e-06, "loss": 0.3381, "step": 5233 }, { "epoch": 0.1796156485929993, "grad_norm": 0.8636882966040884, "learning_rate": 9.424814191798754e-06, "loss": 0.3279, "step": 5234 }, { "epoch": 0.17964996568291008, "grad_norm": 0.8336063013745831, "learning_rate": 9.424555378119278e-06, "loss": 0.3239, "step": 5235 }, { "epoch": 0.17968428277282086, "grad_norm": 0.7280710559510587, "learning_rate": 9.424296509779579e-06, "loss": 0.3054, "step": 5236 }, { "epoch": 0.17971859986273164, "grad_norm": 0.8759113863158128, "learning_rate": 9.424037586782855e-06, "loss": 0.4215, "step": 5237 }, { "epoch": 0.17975291695264242, "grad_norm": 0.8733455300961938, "learning_rate": 9.423778609132303e-06, "loss": 0.3198, "step": 5238 }, { "epoch": 0.1797872340425532, "grad_norm": 0.8791042864370506, "learning_rate": 9.423519576831125e-06, "loss": 0.3444, "step": 5239 }, { "epoch": 0.17982155113246398, "grad_norm": 0.7688176136512628, "learning_rate": 9.42326048988252e-06, "loss": 0.3214, "step": 5240 }, { "epoch": 0.17985586822237473, "grad_norm": 0.7407113432651851, "learning_rate": 9.423001348289687e-06, "loss": 0.2811, "step": 5241 }, { "epoch": 0.1798901853122855, "grad_norm": 0.8090591350390112, "learning_rate": 9.42274215205583e-06, "loss": 0.3819, "step": 5242 }, { "epoch": 0.1799245024021963, "grad_norm": 0.8347613716358092, "learning_rate": 9.42248290118415e-06, "loss": 0.3255, "step": 5243 }, { "epoch": 0.17995881949210707, "grad_norm": 0.7461264028300875, "learning_rate": 9.422223595677852e-06, "loss": 0.2848, "step": 5244 }, { "epoch": 0.17999313658201785, "grad_norm": 0.8003406941427585, "learning_rate": 9.421964235540136e-06, "loss": 0.3503, "step": 5245 }, { "epoch": 0.18002745367192863, "grad_norm": 0.8342423511177284, "learning_rate": 9.421704820774207e-06, "loss": 0.4038, "step": 5246 }, { "epoch": 0.1800617707618394, "grad_norm": 0.8635616550047513, "learning_rate": 9.421445351383269e-06, "loss": 0.3689, "step": 5247 }, { "epoch": 0.18009608785175016, "grad_norm": 0.8226262996414772, "learning_rate": 9.42118582737053e-06, "loss": 0.3141, "step": 5248 }, { "epoch": 0.18013040494166094, "grad_norm": 0.7828175080332498, "learning_rate": 9.420926248739195e-06, "loss": 0.2922, "step": 5249 }, { "epoch": 0.18016472203157172, "grad_norm": 0.7924699495823002, "learning_rate": 9.42066661549247e-06, "loss": 0.3013, "step": 5250 }, { "epoch": 0.1801990391214825, "grad_norm": 0.8212827519720588, "learning_rate": 9.420406927633564e-06, "loss": 0.3409, "step": 5251 }, { "epoch": 0.18023335621139328, "grad_norm": 0.8253011994789063, "learning_rate": 9.420147185165682e-06, "loss": 0.3734, "step": 5252 }, { "epoch": 0.18026767330130405, "grad_norm": 0.791200739971604, "learning_rate": 9.419887388092037e-06, "loss": 0.362, "step": 5253 }, { "epoch": 0.18030199039121483, "grad_norm": 0.7962027451230174, "learning_rate": 9.419627536415834e-06, "loss": 0.3333, "step": 5254 }, { "epoch": 0.1803363074811256, "grad_norm": 0.8281503036817237, "learning_rate": 9.419367630140288e-06, "loss": 0.3486, "step": 5255 }, { "epoch": 0.18037062457103636, "grad_norm": 0.85708064131251, "learning_rate": 9.419107669268606e-06, "loss": 0.3701, "step": 5256 }, { "epoch": 0.18040494166094714, "grad_norm": 0.7640471816928135, "learning_rate": 9.418847653804e-06, "loss": 0.331, "step": 5257 }, { "epoch": 0.18043925875085792, "grad_norm": 0.9134015391013883, "learning_rate": 9.418587583749685e-06, "loss": 0.3564, "step": 5258 }, { "epoch": 0.1804735758407687, "grad_norm": 0.7235051609633061, "learning_rate": 9.41832745910887e-06, "loss": 0.2723, "step": 5259 }, { "epoch": 0.18050789293067948, "grad_norm": 0.7811721123305555, "learning_rate": 9.418067279884773e-06, "loss": 0.3962, "step": 5260 }, { "epoch": 0.18054221002059026, "grad_norm": 0.7349926115094079, "learning_rate": 9.417807046080606e-06, "loss": 0.3257, "step": 5261 }, { "epoch": 0.18057652711050104, "grad_norm": 0.8743150871187213, "learning_rate": 9.417546757699582e-06, "loss": 0.336, "step": 5262 }, { "epoch": 0.18061084420041182, "grad_norm": 0.7587596388894434, "learning_rate": 9.417286414744918e-06, "loss": 0.2812, "step": 5263 }, { "epoch": 0.18064516129032257, "grad_norm": 0.8180429471844699, "learning_rate": 9.41702601721983e-06, "loss": 0.3702, "step": 5264 }, { "epoch": 0.18067947838023335, "grad_norm": 0.743157047821368, "learning_rate": 9.416765565127537e-06, "loss": 0.327, "step": 5265 }, { "epoch": 0.18071379547014413, "grad_norm": 0.9118981162090877, "learning_rate": 9.416505058471254e-06, "loss": 0.3714, "step": 5266 }, { "epoch": 0.1807481125600549, "grad_norm": 0.7600055178578877, "learning_rate": 9.4162444972542e-06, "loss": 0.2917, "step": 5267 }, { "epoch": 0.1807824296499657, "grad_norm": 0.9203681675724491, "learning_rate": 9.415983881479593e-06, "loss": 0.378, "step": 5268 }, { "epoch": 0.18081674673987647, "grad_norm": 0.7882649344713965, "learning_rate": 9.415723211150657e-06, "loss": 0.268, "step": 5269 }, { "epoch": 0.18085106382978725, "grad_norm": 0.7720549168383586, "learning_rate": 9.415462486270606e-06, "loss": 0.2968, "step": 5270 }, { "epoch": 0.180885380919698, "grad_norm": 0.9294900804634193, "learning_rate": 9.415201706842664e-06, "loss": 0.3097, "step": 5271 }, { "epoch": 0.18091969800960878, "grad_norm": 0.8273147713432465, "learning_rate": 9.414940872870054e-06, "loss": 0.3607, "step": 5272 }, { "epoch": 0.18095401509951956, "grad_norm": 0.8225067273862475, "learning_rate": 9.414679984355996e-06, "loss": 0.3231, "step": 5273 }, { "epoch": 0.18098833218943033, "grad_norm": 1.0591660720457716, "learning_rate": 9.414419041303715e-06, "loss": 0.3229, "step": 5274 }, { "epoch": 0.1810226492793411, "grad_norm": 1.0197431106098938, "learning_rate": 9.414158043716431e-06, "loss": 0.3298, "step": 5275 }, { "epoch": 0.1810569663692519, "grad_norm": 0.7877055106640192, "learning_rate": 9.413896991597374e-06, "loss": 0.3481, "step": 5276 }, { "epoch": 0.18109128345916267, "grad_norm": 1.0745208509269437, "learning_rate": 9.413635884949764e-06, "loss": 0.4326, "step": 5277 }, { "epoch": 0.18112560054907345, "grad_norm": 0.8751634604302324, "learning_rate": 9.413374723776827e-06, "loss": 0.2942, "step": 5278 }, { "epoch": 0.1811599176389842, "grad_norm": 0.7619753199806546, "learning_rate": 9.413113508081792e-06, "loss": 0.3172, "step": 5279 }, { "epoch": 0.18119423472889498, "grad_norm": 0.8471383900348929, "learning_rate": 9.412852237867885e-06, "loss": 0.3924, "step": 5280 }, { "epoch": 0.18122855181880576, "grad_norm": 0.7823594346279384, "learning_rate": 9.412590913138334e-06, "loss": 0.3511, "step": 5281 }, { "epoch": 0.18126286890871654, "grad_norm": 0.792881059105552, "learning_rate": 9.412329533896367e-06, "loss": 0.4038, "step": 5282 }, { "epoch": 0.18129718599862732, "grad_norm": 0.7497353814642768, "learning_rate": 9.41206810014521e-06, "loss": 0.3303, "step": 5283 }, { "epoch": 0.1813315030885381, "grad_norm": 0.819287935290151, "learning_rate": 9.411806611888099e-06, "loss": 0.2825, "step": 5284 }, { "epoch": 0.18136582017844888, "grad_norm": 0.7866105569287832, "learning_rate": 9.411545069128259e-06, "loss": 0.3473, "step": 5285 }, { "epoch": 0.18140013726835966, "grad_norm": 0.9464548679980269, "learning_rate": 9.411283471868924e-06, "loss": 0.3511, "step": 5286 }, { "epoch": 0.1814344543582704, "grad_norm": 0.7853033785324202, "learning_rate": 9.411021820113325e-06, "loss": 0.3848, "step": 5287 }, { "epoch": 0.1814687714481812, "grad_norm": 0.929869950537133, "learning_rate": 9.410760113864695e-06, "loss": 0.3461, "step": 5288 }, { "epoch": 0.18150308853809197, "grad_norm": 0.816676520577672, "learning_rate": 9.410498353126264e-06, "loss": 0.3161, "step": 5289 }, { "epoch": 0.18153740562800275, "grad_norm": 0.7104329903622102, "learning_rate": 9.41023653790127e-06, "loss": 0.2987, "step": 5290 }, { "epoch": 0.18157172271791353, "grad_norm": 0.807982201567139, "learning_rate": 9.409974668192942e-06, "loss": 0.3281, "step": 5291 }, { "epoch": 0.1816060398078243, "grad_norm": 0.7820252876961132, "learning_rate": 9.409712744004522e-06, "loss": 0.3507, "step": 5292 }, { "epoch": 0.18164035689773508, "grad_norm": 0.7583375395580212, "learning_rate": 9.40945076533924e-06, "loss": 0.3559, "step": 5293 }, { "epoch": 0.18167467398764583, "grad_norm": 0.8277425626861419, "learning_rate": 9.409188732200337e-06, "loss": 0.3383, "step": 5294 }, { "epoch": 0.18170899107755661, "grad_norm": 0.873456971680246, "learning_rate": 9.408926644591046e-06, "loss": 0.3422, "step": 5295 }, { "epoch": 0.1817433081674674, "grad_norm": 0.8706784325093139, "learning_rate": 9.408664502514609e-06, "loss": 0.4106, "step": 5296 }, { "epoch": 0.18177762525737817, "grad_norm": 0.8637625713732461, "learning_rate": 9.40840230597426e-06, "loss": 0.2922, "step": 5297 }, { "epoch": 0.18181194234728895, "grad_norm": 0.7969423595788442, "learning_rate": 9.408140054973241e-06, "loss": 0.3611, "step": 5298 }, { "epoch": 0.18184625943719973, "grad_norm": 0.8102057312655588, "learning_rate": 9.40787774951479e-06, "loss": 0.359, "step": 5299 }, { "epoch": 0.1818805765271105, "grad_norm": 0.7909680396237937, "learning_rate": 9.407615389602149e-06, "loss": 0.3613, "step": 5300 }, { "epoch": 0.1819148936170213, "grad_norm": 0.7665917713281591, "learning_rate": 9.40735297523856e-06, "loss": 0.3393, "step": 5301 }, { "epoch": 0.18194921070693204, "grad_norm": 0.8935379166766794, "learning_rate": 9.407090506427261e-06, "loss": 0.3564, "step": 5302 }, { "epoch": 0.18198352779684282, "grad_norm": 0.7652631797649749, "learning_rate": 9.406827983171498e-06, "loss": 0.3664, "step": 5303 }, { "epoch": 0.1820178448867536, "grad_norm": 0.8353734843211232, "learning_rate": 9.406565405474513e-06, "loss": 0.3705, "step": 5304 }, { "epoch": 0.18205216197666438, "grad_norm": 0.786708256094731, "learning_rate": 9.40630277333955e-06, "loss": 0.377, "step": 5305 }, { "epoch": 0.18208647906657516, "grad_norm": 0.9788267508118924, "learning_rate": 9.406040086769854e-06, "loss": 0.4015, "step": 5306 }, { "epoch": 0.18212079615648594, "grad_norm": 0.7514924478124709, "learning_rate": 9.405777345768669e-06, "loss": 0.42, "step": 5307 }, { "epoch": 0.18215511324639672, "grad_norm": 0.9379612864490504, "learning_rate": 9.405514550339244e-06, "loss": 0.3757, "step": 5308 }, { "epoch": 0.18218943033630747, "grad_norm": 0.7724674469353222, "learning_rate": 9.40525170048482e-06, "loss": 0.3041, "step": 5309 }, { "epoch": 0.18222374742621825, "grad_norm": 0.7467897848748835, "learning_rate": 9.404988796208649e-06, "loss": 0.3474, "step": 5310 }, { "epoch": 0.18225806451612903, "grad_norm": 0.7947844559718586, "learning_rate": 9.404725837513976e-06, "loss": 0.3675, "step": 5311 }, { "epoch": 0.1822923816060398, "grad_norm": 0.8230477944254005, "learning_rate": 9.404462824404051e-06, "loss": 0.2719, "step": 5312 }, { "epoch": 0.18232669869595058, "grad_norm": 0.8114090082355921, "learning_rate": 9.404199756882122e-06, "loss": 0.3213, "step": 5313 }, { "epoch": 0.18236101578586136, "grad_norm": 0.8006744558300949, "learning_rate": 9.403936634951442e-06, "loss": 0.3245, "step": 5314 }, { "epoch": 0.18239533287577214, "grad_norm": 0.7668897140302676, "learning_rate": 9.403673458615258e-06, "loss": 0.3095, "step": 5315 }, { "epoch": 0.18242964996568292, "grad_norm": 0.7833740360312192, "learning_rate": 9.403410227876823e-06, "loss": 0.2934, "step": 5316 }, { "epoch": 0.18246396705559367, "grad_norm": 0.9157958528888563, "learning_rate": 9.403146942739387e-06, "loss": 0.3649, "step": 5317 }, { "epoch": 0.18249828414550445, "grad_norm": 0.7604561302213619, "learning_rate": 9.402883603206207e-06, "loss": 0.3111, "step": 5318 }, { "epoch": 0.18253260123541523, "grad_norm": 0.8502083080374363, "learning_rate": 9.40262020928053e-06, "loss": 0.3419, "step": 5319 }, { "epoch": 0.182566918325326, "grad_norm": 0.8568892463838907, "learning_rate": 9.402356760965616e-06, "loss": 0.341, "step": 5320 }, { "epoch": 0.1826012354152368, "grad_norm": 0.7741998095120405, "learning_rate": 9.402093258264716e-06, "loss": 0.3296, "step": 5321 }, { "epoch": 0.18263555250514757, "grad_norm": 0.8394453268051991, "learning_rate": 9.401829701181087e-06, "loss": 0.3833, "step": 5322 }, { "epoch": 0.18266986959505835, "grad_norm": 0.9166706389964981, "learning_rate": 9.401566089717982e-06, "loss": 0.3431, "step": 5323 }, { "epoch": 0.18270418668496913, "grad_norm": 0.8120195697464214, "learning_rate": 9.401302423878661e-06, "loss": 0.3033, "step": 5324 }, { "epoch": 0.18273850377487988, "grad_norm": 0.8598932747880459, "learning_rate": 9.40103870366638e-06, "loss": 0.3925, "step": 5325 }, { "epoch": 0.18277282086479066, "grad_norm": 0.8755707545802415, "learning_rate": 9.400774929084397e-06, "loss": 0.3281, "step": 5326 }, { "epoch": 0.18280713795470144, "grad_norm": 0.7949937692422717, "learning_rate": 9.40051110013597e-06, "loss": 0.3457, "step": 5327 }, { "epoch": 0.18284145504461222, "grad_norm": 0.8020293371657189, "learning_rate": 9.400247216824358e-06, "loss": 0.3261, "step": 5328 }, { "epoch": 0.182875772134523, "grad_norm": 0.7892976382411808, "learning_rate": 9.399983279152825e-06, "loss": 0.3574, "step": 5329 }, { "epoch": 0.18291008922443378, "grad_norm": 0.8037769289670901, "learning_rate": 9.399719287124626e-06, "loss": 0.4293, "step": 5330 }, { "epoch": 0.18294440631434455, "grad_norm": 0.762059763071615, "learning_rate": 9.399455240743026e-06, "loss": 0.3962, "step": 5331 }, { "epoch": 0.1829787234042553, "grad_norm": 0.8492253703725827, "learning_rate": 9.399191140011284e-06, "loss": 0.3825, "step": 5332 }, { "epoch": 0.18301304049416608, "grad_norm": 0.8296828626503258, "learning_rate": 9.398926984932664e-06, "loss": 0.3527, "step": 5333 }, { "epoch": 0.18304735758407686, "grad_norm": 0.8018942197606034, "learning_rate": 9.398662775510432e-06, "loss": 0.3626, "step": 5334 }, { "epoch": 0.18308167467398764, "grad_norm": 0.8577881465647018, "learning_rate": 9.39839851174785e-06, "loss": 0.3688, "step": 5335 }, { "epoch": 0.18311599176389842, "grad_norm": 0.8308218788932957, "learning_rate": 9.398134193648181e-06, "loss": 0.3253, "step": 5336 }, { "epoch": 0.1831503088538092, "grad_norm": 0.8221996159968226, "learning_rate": 9.39786982121469e-06, "loss": 0.3018, "step": 5337 }, { "epoch": 0.18318462594371998, "grad_norm": 0.845920695665865, "learning_rate": 9.397605394450648e-06, "loss": 0.3826, "step": 5338 }, { "epoch": 0.18321894303363076, "grad_norm": 0.817612261016402, "learning_rate": 9.397340913359318e-06, "loss": 0.3245, "step": 5339 }, { "epoch": 0.1832532601235415, "grad_norm": 0.808051343658686, "learning_rate": 9.397076377943966e-06, "loss": 0.3961, "step": 5340 }, { "epoch": 0.1832875772134523, "grad_norm": 0.8147116050466595, "learning_rate": 9.396811788207864e-06, "loss": 0.3661, "step": 5341 }, { "epoch": 0.18332189430336307, "grad_norm": 0.8193023851675182, "learning_rate": 9.396547144154278e-06, "loss": 0.3488, "step": 5342 }, { "epoch": 0.18335621139327385, "grad_norm": 0.7967393067772244, "learning_rate": 9.396282445786477e-06, "loss": 0.3039, "step": 5343 }, { "epoch": 0.18339052848318463, "grad_norm": 0.7715074245799178, "learning_rate": 9.396017693107732e-06, "loss": 0.3241, "step": 5344 }, { "epoch": 0.1834248455730954, "grad_norm": 0.7602783867376337, "learning_rate": 9.395752886121315e-06, "loss": 0.324, "step": 5345 }, { "epoch": 0.1834591626630062, "grad_norm": 0.7433793387672174, "learning_rate": 9.395488024830495e-06, "loss": 0.3166, "step": 5346 }, { "epoch": 0.18349347975291697, "grad_norm": 0.7189371851753031, "learning_rate": 9.395223109238545e-06, "loss": 0.2825, "step": 5347 }, { "epoch": 0.18352779684282772, "grad_norm": 0.8195648898796899, "learning_rate": 9.394958139348738e-06, "loss": 0.3287, "step": 5348 }, { "epoch": 0.1835621139327385, "grad_norm": 0.8468479515984865, "learning_rate": 9.394693115164345e-06, "loss": 0.3707, "step": 5349 }, { "epoch": 0.18359643102264928, "grad_norm": 0.8394893911586373, "learning_rate": 9.394428036688647e-06, "loss": 0.3659, "step": 5350 }, { "epoch": 0.18363074811256005, "grad_norm": 0.9282903107767366, "learning_rate": 9.394162903924911e-06, "loss": 0.3552, "step": 5351 }, { "epoch": 0.18366506520247083, "grad_norm": 0.9969636747329415, "learning_rate": 9.393897716876416e-06, "loss": 0.364, "step": 5352 }, { "epoch": 0.1836993822923816, "grad_norm": 0.7981971581245901, "learning_rate": 9.393632475546437e-06, "loss": 0.3438, "step": 5353 }, { "epoch": 0.1837336993822924, "grad_norm": 0.8858992538783874, "learning_rate": 9.393367179938252e-06, "loss": 0.3403, "step": 5354 }, { "epoch": 0.18376801647220314, "grad_norm": 0.9024887602858939, "learning_rate": 9.393101830055138e-06, "loss": 0.3924, "step": 5355 }, { "epoch": 0.18380233356211392, "grad_norm": 0.84737267044901, "learning_rate": 9.392836425900371e-06, "loss": 0.3103, "step": 5356 }, { "epoch": 0.1838366506520247, "grad_norm": 0.7518685022430942, "learning_rate": 9.392570967477233e-06, "loss": 0.3996, "step": 5357 }, { "epoch": 0.18387096774193548, "grad_norm": 0.9935821841317188, "learning_rate": 9.392305454789001e-06, "loss": 0.3775, "step": 5358 }, { "epoch": 0.18390528483184626, "grad_norm": 0.8641212919026783, "learning_rate": 9.392039887838957e-06, "loss": 0.3821, "step": 5359 }, { "epoch": 0.18393960192175704, "grad_norm": 0.7615550114441165, "learning_rate": 9.39177426663038e-06, "loss": 0.383, "step": 5360 }, { "epoch": 0.18397391901166782, "grad_norm": 0.806227233935286, "learning_rate": 9.391508591166552e-06, "loss": 0.3749, "step": 5361 }, { "epoch": 0.1840082361015786, "grad_norm": 0.7880120652100913, "learning_rate": 9.391242861450757e-06, "loss": 0.3147, "step": 5362 }, { "epoch": 0.18404255319148935, "grad_norm": 0.8386365963125599, "learning_rate": 9.390977077486275e-06, "loss": 0.3863, "step": 5363 }, { "epoch": 0.18407687028140013, "grad_norm": 0.7596000177272441, "learning_rate": 9.39071123927639e-06, "loss": 0.3538, "step": 5364 }, { "epoch": 0.1841111873713109, "grad_norm": 0.8651150344747865, "learning_rate": 9.390445346824388e-06, "loss": 0.346, "step": 5365 }, { "epoch": 0.1841455044612217, "grad_norm": 0.8341693872969563, "learning_rate": 9.390179400133553e-06, "loss": 0.3224, "step": 5366 }, { "epoch": 0.18417982155113247, "grad_norm": 0.7685338175169024, "learning_rate": 9.389913399207167e-06, "loss": 0.3938, "step": 5367 }, { "epoch": 0.18421413864104325, "grad_norm": 0.7675605668843977, "learning_rate": 9.389647344048523e-06, "loss": 0.2913, "step": 5368 }, { "epoch": 0.18424845573095402, "grad_norm": 0.8213121468320482, "learning_rate": 9.389381234660901e-06, "loss": 0.3777, "step": 5369 }, { "epoch": 0.1842827728208648, "grad_norm": 0.7640691197106297, "learning_rate": 9.389115071047593e-06, "loss": 0.3175, "step": 5370 }, { "epoch": 0.18431708991077556, "grad_norm": 0.8082479691255318, "learning_rate": 9.388848853211883e-06, "loss": 0.3166, "step": 5371 }, { "epoch": 0.18435140700068633, "grad_norm": 0.8759939903177097, "learning_rate": 9.388582581157065e-06, "loss": 0.342, "step": 5372 }, { "epoch": 0.18438572409059711, "grad_norm": 0.7874053897571073, "learning_rate": 9.388316254886425e-06, "loss": 0.3302, "step": 5373 }, { "epoch": 0.1844200411805079, "grad_norm": 0.9597867233546419, "learning_rate": 9.388049874403253e-06, "loss": 0.2887, "step": 5374 }, { "epoch": 0.18445435827041867, "grad_norm": 0.7849852348415368, "learning_rate": 9.387783439710843e-06, "loss": 0.325, "step": 5375 }, { "epoch": 0.18448867536032945, "grad_norm": 0.8859502572698498, "learning_rate": 9.387516950812483e-06, "loss": 0.2952, "step": 5376 }, { "epoch": 0.18452299245024023, "grad_norm": 0.797291761957372, "learning_rate": 9.387250407711466e-06, "loss": 0.3669, "step": 5377 }, { "epoch": 0.18455730954015098, "grad_norm": 0.8291917128537798, "learning_rate": 9.386983810411085e-06, "loss": 0.366, "step": 5378 }, { "epoch": 0.18459162663006176, "grad_norm": 0.8206310253035759, "learning_rate": 9.386717158914634e-06, "loss": 0.3832, "step": 5379 }, { "epoch": 0.18462594371997254, "grad_norm": 0.8718396796010983, "learning_rate": 9.386450453225408e-06, "loss": 0.3536, "step": 5380 }, { "epoch": 0.18466026080988332, "grad_norm": 0.8300418145057094, "learning_rate": 9.3861836933467e-06, "loss": 0.3427, "step": 5381 }, { "epoch": 0.1846945778997941, "grad_norm": 0.8525915259259376, "learning_rate": 9.385916879281805e-06, "loss": 0.2979, "step": 5382 }, { "epoch": 0.18472889498970488, "grad_norm": 0.760107379458393, "learning_rate": 9.385650011034023e-06, "loss": 0.3324, "step": 5383 }, { "epoch": 0.18476321207961566, "grad_norm": 0.9308443273188308, "learning_rate": 9.385383088606646e-06, "loss": 0.3397, "step": 5384 }, { "epoch": 0.18479752916952644, "grad_norm": 0.7614606838328665, "learning_rate": 9.385116112002975e-06, "loss": 0.3051, "step": 5385 }, { "epoch": 0.1848318462594372, "grad_norm": 0.8007502919778955, "learning_rate": 9.384849081226307e-06, "loss": 0.3447, "step": 5386 }, { "epoch": 0.18486616334934797, "grad_norm": 0.8189636592467192, "learning_rate": 9.38458199627994e-06, "loss": 0.348, "step": 5387 }, { "epoch": 0.18490048043925875, "grad_norm": 0.8595508450280033, "learning_rate": 9.384314857167176e-06, "loss": 0.3612, "step": 5388 }, { "epoch": 0.18493479752916953, "grad_norm": 0.7952120697441104, "learning_rate": 9.38404766389131e-06, "loss": 0.362, "step": 5389 }, { "epoch": 0.1849691146190803, "grad_norm": 0.8877258799022031, "learning_rate": 9.38378041645565e-06, "loss": 0.3652, "step": 5390 }, { "epoch": 0.18500343170899108, "grad_norm": 0.9454164389734214, "learning_rate": 9.383513114863493e-06, "loss": 0.3396, "step": 5391 }, { "epoch": 0.18503774879890186, "grad_norm": 0.7589363475483245, "learning_rate": 9.383245759118142e-06, "loss": 0.3025, "step": 5392 }, { "epoch": 0.18507206588881264, "grad_norm": 0.7759885952060936, "learning_rate": 9.3829783492229e-06, "loss": 0.324, "step": 5393 }, { "epoch": 0.1851063829787234, "grad_norm": 0.8215380061818558, "learning_rate": 9.382710885181071e-06, "loss": 0.277, "step": 5394 }, { "epoch": 0.18514070006863417, "grad_norm": 0.784471759884982, "learning_rate": 9.382443366995958e-06, "loss": 0.3388, "step": 5395 }, { "epoch": 0.18517501715854495, "grad_norm": 0.6884073952877423, "learning_rate": 9.382175794670868e-06, "loss": 0.3249, "step": 5396 }, { "epoch": 0.18520933424845573, "grad_norm": 0.8217029471017816, "learning_rate": 9.381908168209104e-06, "loss": 0.3118, "step": 5397 }, { "epoch": 0.1852436513383665, "grad_norm": 0.8560918795059923, "learning_rate": 9.381640487613972e-06, "loss": 0.3782, "step": 5398 }, { "epoch": 0.1852779684282773, "grad_norm": 0.7526096852796607, "learning_rate": 9.381372752888782e-06, "loss": 0.3462, "step": 5399 }, { "epoch": 0.18531228551818807, "grad_norm": 0.6896255821901339, "learning_rate": 9.38110496403684e-06, "loss": 0.3056, "step": 5400 }, { "epoch": 0.18534660260809882, "grad_norm": 0.8131660568918009, "learning_rate": 9.380837121061455e-06, "loss": 0.3328, "step": 5401 }, { "epoch": 0.1853809196980096, "grad_norm": 0.8503969317653218, "learning_rate": 9.380569223965931e-06, "loss": 0.2948, "step": 5402 }, { "epoch": 0.18541523678792038, "grad_norm": 0.9816777957102659, "learning_rate": 9.380301272753586e-06, "loss": 0.3687, "step": 5403 }, { "epoch": 0.18544955387783116, "grad_norm": 0.831983823797925, "learning_rate": 9.380033267427724e-06, "loss": 0.3351, "step": 5404 }, { "epoch": 0.18548387096774194, "grad_norm": 0.8042749996729954, "learning_rate": 9.379765207991656e-06, "loss": 0.3084, "step": 5405 }, { "epoch": 0.18551818805765272, "grad_norm": 0.855725631267933, "learning_rate": 9.379497094448697e-06, "loss": 0.3749, "step": 5406 }, { "epoch": 0.1855525051475635, "grad_norm": 0.8194153146751161, "learning_rate": 9.379228926802158e-06, "loss": 0.4003, "step": 5407 }, { "epoch": 0.18558682223747427, "grad_norm": 1.0771094067289113, "learning_rate": 9.378960705055348e-06, "loss": 0.3202, "step": 5408 }, { "epoch": 0.18562113932738503, "grad_norm": 0.8479568346442012, "learning_rate": 9.378692429211588e-06, "loss": 0.3931, "step": 5409 }, { "epoch": 0.1856554564172958, "grad_norm": 0.7222438293209331, "learning_rate": 9.378424099274186e-06, "loss": 0.3102, "step": 5410 }, { "epoch": 0.18568977350720658, "grad_norm": 0.8034569708732597, "learning_rate": 9.37815571524646e-06, "loss": 0.303, "step": 5411 }, { "epoch": 0.18572409059711736, "grad_norm": 0.7603313616622672, "learning_rate": 9.377887277131723e-06, "loss": 0.3156, "step": 5412 }, { "epoch": 0.18575840768702814, "grad_norm": 0.7637527083337099, "learning_rate": 9.377618784933293e-06, "loss": 0.3846, "step": 5413 }, { "epoch": 0.18579272477693892, "grad_norm": 0.9847635743108017, "learning_rate": 9.377350238654488e-06, "loss": 0.2989, "step": 5414 }, { "epoch": 0.1858270418668497, "grad_norm": 0.7626910599481626, "learning_rate": 9.377081638298623e-06, "loss": 0.3075, "step": 5415 }, { "epoch": 0.18586135895676045, "grad_norm": 0.7520949213941797, "learning_rate": 9.376812983869018e-06, "loss": 0.2979, "step": 5416 }, { "epoch": 0.18589567604667123, "grad_norm": 0.7357488854167873, "learning_rate": 9.376544275368993e-06, "loss": 0.3649, "step": 5417 }, { "epoch": 0.185929993136582, "grad_norm": 0.8345779682724427, "learning_rate": 9.376275512801863e-06, "loss": 0.3428, "step": 5418 }, { "epoch": 0.1859643102264928, "grad_norm": 0.734916429753342, "learning_rate": 9.376006696170954e-06, "loss": 0.2699, "step": 5419 }, { "epoch": 0.18599862731640357, "grad_norm": 0.8075777809549459, "learning_rate": 9.375737825479583e-06, "loss": 0.2786, "step": 5420 }, { "epoch": 0.18603294440631435, "grad_norm": 0.7694771422565135, "learning_rate": 9.375468900731073e-06, "loss": 0.3344, "step": 5421 }, { "epoch": 0.18606726149622513, "grad_norm": 0.8270340928697497, "learning_rate": 9.375199921928746e-06, "loss": 0.3453, "step": 5422 }, { "epoch": 0.1861015785861359, "grad_norm": 0.8613872593586046, "learning_rate": 9.374930889075923e-06, "loss": 0.3026, "step": 5423 }, { "epoch": 0.18613589567604666, "grad_norm": 0.889056772633939, "learning_rate": 9.374661802175932e-06, "loss": 0.358, "step": 5424 }, { "epoch": 0.18617021276595744, "grad_norm": 0.7377303728738337, "learning_rate": 9.374392661232094e-06, "loss": 0.3426, "step": 5425 }, { "epoch": 0.18620452985586822, "grad_norm": 0.802605116707136, "learning_rate": 9.374123466247736e-06, "loss": 0.3171, "step": 5426 }, { "epoch": 0.186238846945779, "grad_norm": 0.782338986176343, "learning_rate": 9.373854217226181e-06, "loss": 0.3657, "step": 5427 }, { "epoch": 0.18627316403568978, "grad_norm": 0.8100770757513295, "learning_rate": 9.373584914170756e-06, "loss": 0.3724, "step": 5428 }, { "epoch": 0.18630748112560055, "grad_norm": 0.7531820762446592, "learning_rate": 9.37331555708479e-06, "loss": 0.3261, "step": 5429 }, { "epoch": 0.18634179821551133, "grad_norm": 0.7402200513958652, "learning_rate": 9.37304614597161e-06, "loss": 0.3316, "step": 5430 }, { "epoch": 0.1863761153054221, "grad_norm": 0.7261072925607935, "learning_rate": 9.372776680834541e-06, "loss": 0.294, "step": 5431 }, { "epoch": 0.18641043239533286, "grad_norm": 0.723435561718127, "learning_rate": 9.372507161676915e-06, "loss": 0.2943, "step": 5432 }, { "epoch": 0.18644474948524364, "grad_norm": 0.9352164234046983, "learning_rate": 9.37223758850206e-06, "loss": 0.3132, "step": 5433 }, { "epoch": 0.18647906657515442, "grad_norm": 0.7329538789765059, "learning_rate": 9.371967961313309e-06, "loss": 0.2791, "step": 5434 }, { "epoch": 0.1865133836650652, "grad_norm": 0.7613250993598961, "learning_rate": 9.37169828011399e-06, "loss": 0.2975, "step": 5435 }, { "epoch": 0.18654770075497598, "grad_norm": 0.8584465686258954, "learning_rate": 9.371428544907435e-06, "loss": 0.3351, "step": 5436 }, { "epoch": 0.18658201784488676, "grad_norm": 1.042336148913441, "learning_rate": 9.371158755696979e-06, "loss": 0.3606, "step": 5437 }, { "epoch": 0.18661633493479754, "grad_norm": 0.7549941985517857, "learning_rate": 9.37088891248595e-06, "loss": 0.3653, "step": 5438 }, { "epoch": 0.1866506520247083, "grad_norm": 0.8432686802303613, "learning_rate": 9.370619015277686e-06, "loss": 0.3319, "step": 5439 }, { "epoch": 0.18668496911461907, "grad_norm": 0.7189598646236625, "learning_rate": 9.37034906407552e-06, "loss": 0.3324, "step": 5440 }, { "epoch": 0.18671928620452985, "grad_norm": 0.8693062460640408, "learning_rate": 9.370079058882786e-06, "loss": 0.3588, "step": 5441 }, { "epoch": 0.18675360329444063, "grad_norm": 0.7836113408744317, "learning_rate": 9.369808999702821e-06, "loss": 0.3563, "step": 5442 }, { "epoch": 0.1867879203843514, "grad_norm": 0.8133135649184563, "learning_rate": 9.36953888653896e-06, "loss": 0.3268, "step": 5443 }, { "epoch": 0.1868222374742622, "grad_norm": 0.8627788906581401, "learning_rate": 9.36926871939454e-06, "loss": 0.3225, "step": 5444 }, { "epoch": 0.18685655456417297, "grad_norm": 0.8715090279114563, "learning_rate": 9.3689984982729e-06, "loss": 0.3495, "step": 5445 }, { "epoch": 0.18689087165408375, "grad_norm": 0.7654346441618977, "learning_rate": 9.368728223177377e-06, "loss": 0.3901, "step": 5446 }, { "epoch": 0.1869251887439945, "grad_norm": 0.8639499559260462, "learning_rate": 9.368457894111309e-06, "loss": 0.3732, "step": 5447 }, { "epoch": 0.18695950583390528, "grad_norm": 0.7764577114898387, "learning_rate": 9.36818751107804e-06, "loss": 0.3624, "step": 5448 }, { "epoch": 0.18699382292381606, "grad_norm": 0.7470842981180017, "learning_rate": 9.367917074080904e-06, "loss": 0.3397, "step": 5449 }, { "epoch": 0.18702814001372683, "grad_norm": 0.7454378281159266, "learning_rate": 9.367646583123247e-06, "loss": 0.3099, "step": 5450 }, { "epoch": 0.1870624571036376, "grad_norm": 0.749062628278511, "learning_rate": 9.367376038208407e-06, "loss": 0.3177, "step": 5451 }, { "epoch": 0.1870967741935484, "grad_norm": 0.7355256960216783, "learning_rate": 9.367105439339728e-06, "loss": 0.2991, "step": 5452 }, { "epoch": 0.18713109128345917, "grad_norm": 0.7804041229454853, "learning_rate": 9.366834786520554e-06, "loss": 0.3186, "step": 5453 }, { "epoch": 0.18716540837336995, "grad_norm": 0.678426469252713, "learning_rate": 9.366564079754226e-06, "loss": 0.3074, "step": 5454 }, { "epoch": 0.1871997254632807, "grad_norm": 0.875004383072505, "learning_rate": 9.36629331904409e-06, "loss": 0.3277, "step": 5455 }, { "epoch": 0.18723404255319148, "grad_norm": 0.7943363956785898, "learning_rate": 9.366022504393493e-06, "loss": 0.3354, "step": 5456 }, { "epoch": 0.18726835964310226, "grad_norm": 0.7620253055316538, "learning_rate": 9.365751635805775e-06, "loss": 0.3427, "step": 5457 }, { "epoch": 0.18730267673301304, "grad_norm": 0.8253693463303957, "learning_rate": 9.365480713284289e-06, "loss": 0.3423, "step": 5458 }, { "epoch": 0.18733699382292382, "grad_norm": 0.8422040968855503, "learning_rate": 9.365209736832376e-06, "loss": 0.3152, "step": 5459 }, { "epoch": 0.1873713109128346, "grad_norm": 0.8415436007320911, "learning_rate": 9.364938706453386e-06, "loss": 0.3314, "step": 5460 }, { "epoch": 0.18740562800274538, "grad_norm": 0.8338898376276408, "learning_rate": 9.364667622150668e-06, "loss": 0.3265, "step": 5461 }, { "epoch": 0.18743994509265613, "grad_norm": 0.758572251296471, "learning_rate": 9.36439648392757e-06, "loss": 0.3067, "step": 5462 }, { "epoch": 0.1874742621825669, "grad_norm": 0.7993404201060011, "learning_rate": 9.364125291787442e-06, "loss": 0.3209, "step": 5463 }, { "epoch": 0.1875085792724777, "grad_norm": 0.8457557331142121, "learning_rate": 9.363854045733634e-06, "loss": 0.3055, "step": 5464 }, { "epoch": 0.18754289636238847, "grad_norm": 0.7572970070899463, "learning_rate": 9.363582745769496e-06, "loss": 0.3259, "step": 5465 }, { "epoch": 0.18757721345229925, "grad_norm": 0.9062964271789375, "learning_rate": 9.363311391898383e-06, "loss": 0.3915, "step": 5466 }, { "epoch": 0.18761153054221003, "grad_norm": 0.8782905740479273, "learning_rate": 9.363039984123642e-06, "loss": 0.3405, "step": 5467 }, { "epoch": 0.1876458476321208, "grad_norm": 0.8389514229825137, "learning_rate": 9.362768522448631e-06, "loss": 0.3339, "step": 5468 }, { "epoch": 0.18768016472203158, "grad_norm": 0.7304146405638664, "learning_rate": 9.362497006876702e-06, "loss": 0.3529, "step": 5469 }, { "epoch": 0.18771448181194234, "grad_norm": 0.7398306424781153, "learning_rate": 9.362225437411207e-06, "loss": 0.3248, "step": 5470 }, { "epoch": 0.18774879890185311, "grad_norm": 0.9020256209930789, "learning_rate": 9.361953814055503e-06, "loss": 0.3562, "step": 5471 }, { "epoch": 0.1877831159917639, "grad_norm": 0.7984310846269397, "learning_rate": 9.361682136812946e-06, "loss": 0.3385, "step": 5472 }, { "epoch": 0.18781743308167467, "grad_norm": 0.7480219788708568, "learning_rate": 9.36141040568689e-06, "loss": 0.3324, "step": 5473 }, { "epoch": 0.18785175017158545, "grad_norm": 0.7753378441651848, "learning_rate": 9.361138620680693e-06, "loss": 0.293, "step": 5474 }, { "epoch": 0.18788606726149623, "grad_norm": 0.770371201598822, "learning_rate": 9.360866781797715e-06, "loss": 0.3072, "step": 5475 }, { "epoch": 0.187920384351407, "grad_norm": 0.78354581384445, "learning_rate": 9.360594889041311e-06, "loss": 0.3518, "step": 5476 }, { "epoch": 0.1879547014413178, "grad_norm": 0.8405283176214701, "learning_rate": 9.360322942414842e-06, "loss": 0.3786, "step": 5477 }, { "epoch": 0.18798901853122854, "grad_norm": 0.8521292971698567, "learning_rate": 9.360050941921664e-06, "loss": 0.3369, "step": 5478 }, { "epoch": 0.18802333562113932, "grad_norm": 0.8741857608145303, "learning_rate": 9.359778887565144e-06, "loss": 0.3823, "step": 5479 }, { "epoch": 0.1880576527110501, "grad_norm": 0.8273397634097787, "learning_rate": 9.359506779348635e-06, "loss": 0.3477, "step": 5480 }, { "epoch": 0.18809196980096088, "grad_norm": 0.9336051862631871, "learning_rate": 9.359234617275504e-06, "loss": 0.3403, "step": 5481 }, { "epoch": 0.18812628689087166, "grad_norm": 0.8685347716393869, "learning_rate": 9.358962401349112e-06, "loss": 0.3591, "step": 5482 }, { "epoch": 0.18816060398078244, "grad_norm": 0.7561472981065008, "learning_rate": 9.358690131572821e-06, "loss": 0.3158, "step": 5483 }, { "epoch": 0.18819492107069322, "grad_norm": 0.8409228543398407, "learning_rate": 9.358417807949995e-06, "loss": 0.3403, "step": 5484 }, { "epoch": 0.18822923816060397, "grad_norm": 0.7453792230822894, "learning_rate": 9.358145430484e-06, "loss": 0.2956, "step": 5485 }, { "epoch": 0.18826355525051475, "grad_norm": 1.2064915402113008, "learning_rate": 9.357872999178199e-06, "loss": 0.3509, "step": 5486 }, { "epoch": 0.18829787234042553, "grad_norm": 0.8919194398934259, "learning_rate": 9.357600514035956e-06, "loss": 0.3545, "step": 5487 }, { "epoch": 0.1883321894303363, "grad_norm": 0.8176009649860679, "learning_rate": 9.35732797506064e-06, "loss": 0.3419, "step": 5488 }, { "epoch": 0.18836650652024708, "grad_norm": 0.8606408318109816, "learning_rate": 9.357055382255617e-06, "loss": 0.4157, "step": 5489 }, { "epoch": 0.18840082361015786, "grad_norm": 0.7853638094984138, "learning_rate": 9.356782735624256e-06, "loss": 0.3042, "step": 5490 }, { "epoch": 0.18843514070006864, "grad_norm": 0.8154038030474545, "learning_rate": 9.356510035169923e-06, "loss": 0.3514, "step": 5491 }, { "epoch": 0.18846945778997942, "grad_norm": 0.860762655994869, "learning_rate": 9.356237280895986e-06, "loss": 0.3824, "step": 5492 }, { "epoch": 0.18850377487989017, "grad_norm": 0.8193703986375228, "learning_rate": 9.355964472805819e-06, "loss": 0.3127, "step": 5493 }, { "epoch": 0.18853809196980095, "grad_norm": 0.866541462409693, "learning_rate": 9.355691610902787e-06, "loss": 0.3646, "step": 5494 }, { "epoch": 0.18857240905971173, "grad_norm": 0.7505778902647429, "learning_rate": 9.355418695190266e-06, "loss": 0.3222, "step": 5495 }, { "epoch": 0.1886067261496225, "grad_norm": 0.7799249131815299, "learning_rate": 9.355145725671623e-06, "loss": 0.3222, "step": 5496 }, { "epoch": 0.1886410432395333, "grad_norm": 0.940447562927523, "learning_rate": 9.354872702350232e-06, "loss": 0.372, "step": 5497 }, { "epoch": 0.18867536032944407, "grad_norm": 0.838692113387927, "learning_rate": 9.354599625229466e-06, "loss": 0.3337, "step": 5498 }, { "epoch": 0.18870967741935485, "grad_norm": 0.855243620306165, "learning_rate": 9.3543264943127e-06, "loss": 0.343, "step": 5499 }, { "epoch": 0.18874399450926563, "grad_norm": 0.7725014916194319, "learning_rate": 9.354053309603306e-06, "loss": 0.2921, "step": 5500 }, { "epoch": 0.18877831159917638, "grad_norm": 0.8104864359728302, "learning_rate": 9.35378007110466e-06, "loss": 0.3055, "step": 5501 }, { "epoch": 0.18881262868908716, "grad_norm": 0.7681404770981575, "learning_rate": 9.353506778820138e-06, "loss": 0.3504, "step": 5502 }, { "epoch": 0.18884694577899794, "grad_norm": 0.8023187522298422, "learning_rate": 9.353233432753115e-06, "loss": 0.3669, "step": 5503 }, { "epoch": 0.18888126286890872, "grad_norm": 0.8220873763009313, "learning_rate": 9.352960032906966e-06, "loss": 0.2945, "step": 5504 }, { "epoch": 0.1889155799588195, "grad_norm": 0.7575675511760678, "learning_rate": 9.352686579285073e-06, "loss": 0.3318, "step": 5505 }, { "epoch": 0.18894989704873028, "grad_norm": 0.830565248852289, "learning_rate": 9.352413071890811e-06, "loss": 0.3314, "step": 5506 }, { "epoch": 0.18898421413864105, "grad_norm": 0.772859350631907, "learning_rate": 9.352139510727562e-06, "loss": 0.2894, "step": 5507 }, { "epoch": 0.1890185312285518, "grad_norm": 1.0400580649201743, "learning_rate": 9.351865895798701e-06, "loss": 0.2669, "step": 5508 }, { "epoch": 0.18905284831846259, "grad_norm": 0.8172193293154782, "learning_rate": 9.351592227107612e-06, "loss": 0.3392, "step": 5509 }, { "epoch": 0.18908716540837336, "grad_norm": 0.7912334671698447, "learning_rate": 9.351318504657674e-06, "loss": 0.3361, "step": 5510 }, { "epoch": 0.18912148249828414, "grad_norm": 0.9222103476491504, "learning_rate": 9.35104472845227e-06, "loss": 0.3266, "step": 5511 }, { "epoch": 0.18915579958819492, "grad_norm": 0.8703347947936875, "learning_rate": 9.350770898494781e-06, "loss": 0.3233, "step": 5512 }, { "epoch": 0.1891901166781057, "grad_norm": 0.8491256347820167, "learning_rate": 9.350497014788589e-06, "loss": 0.3329, "step": 5513 }, { "epoch": 0.18922443376801648, "grad_norm": 0.9529495244585605, "learning_rate": 9.35022307733708e-06, "loss": 0.3449, "step": 5514 }, { "epoch": 0.18925875085792726, "grad_norm": 1.2240758089354946, "learning_rate": 9.349949086143634e-06, "loss": 0.3177, "step": 5515 }, { "epoch": 0.189293067947838, "grad_norm": 0.7419068040040366, "learning_rate": 9.34967504121164e-06, "loss": 0.2983, "step": 5516 }, { "epoch": 0.1893273850377488, "grad_norm": 0.823084115683537, "learning_rate": 9.349400942544485e-06, "loss": 0.3302, "step": 5517 }, { "epoch": 0.18936170212765957, "grad_norm": 0.7975403726624284, "learning_rate": 9.349126790145548e-06, "loss": 0.346, "step": 5518 }, { "epoch": 0.18939601921757035, "grad_norm": 0.9793560324265547, "learning_rate": 9.348852584018223e-06, "loss": 0.3418, "step": 5519 }, { "epoch": 0.18943033630748113, "grad_norm": 0.8114691012308346, "learning_rate": 9.348578324165893e-06, "loss": 0.3778, "step": 5520 }, { "epoch": 0.1894646533973919, "grad_norm": 0.8454472288438437, "learning_rate": 9.348304010591948e-06, "loss": 0.3311, "step": 5521 }, { "epoch": 0.1894989704873027, "grad_norm": 0.918059405178791, "learning_rate": 9.348029643299777e-06, "loss": 0.3945, "step": 5522 }, { "epoch": 0.18953328757721344, "grad_norm": 0.7979802008098931, "learning_rate": 9.34775522229277e-06, "loss": 0.3336, "step": 5523 }, { "epoch": 0.18956760466712422, "grad_norm": 0.8692148267768529, "learning_rate": 9.347480747574313e-06, "loss": 0.3565, "step": 5524 }, { "epoch": 0.189601921757035, "grad_norm": 0.8304819381372021, "learning_rate": 9.347206219147804e-06, "loss": 0.3089, "step": 5525 }, { "epoch": 0.18963623884694578, "grad_norm": 0.8111619311565633, "learning_rate": 9.346931637016627e-06, "loss": 0.339, "step": 5526 }, { "epoch": 0.18967055593685656, "grad_norm": 0.7560607080138244, "learning_rate": 9.34665700118418e-06, "loss": 0.3485, "step": 5527 }, { "epoch": 0.18970487302676733, "grad_norm": 0.8698470149670379, "learning_rate": 9.346382311653852e-06, "loss": 0.3467, "step": 5528 }, { "epoch": 0.1897391901166781, "grad_norm": 0.8178854824137503, "learning_rate": 9.34610756842904e-06, "loss": 0.3208, "step": 5529 }, { "epoch": 0.1897735072065889, "grad_norm": 0.7937159456171232, "learning_rate": 9.345832771513133e-06, "loss": 0.3447, "step": 5530 }, { "epoch": 0.18980782429649964, "grad_norm": 0.8512566174797496, "learning_rate": 9.345557920909531e-06, "loss": 0.3271, "step": 5531 }, { "epoch": 0.18984214138641042, "grad_norm": 0.8012590111858591, "learning_rate": 9.345283016621628e-06, "loss": 0.4155, "step": 5532 }, { "epoch": 0.1898764584763212, "grad_norm": 0.8457999060702213, "learning_rate": 9.345008058652818e-06, "loss": 0.3279, "step": 5533 }, { "epoch": 0.18991077556623198, "grad_norm": 0.821431457453206, "learning_rate": 9.344733047006499e-06, "loss": 0.2901, "step": 5534 }, { "epoch": 0.18994509265614276, "grad_norm": 0.8281219311517425, "learning_rate": 9.34445798168607e-06, "loss": 0.3406, "step": 5535 }, { "epoch": 0.18997940974605354, "grad_norm": 0.7297949665003676, "learning_rate": 9.344182862694925e-06, "loss": 0.309, "step": 5536 }, { "epoch": 0.19001372683596432, "grad_norm": 0.911615217635545, "learning_rate": 9.34390769003647e-06, "loss": 0.3227, "step": 5537 }, { "epoch": 0.1900480439258751, "grad_norm": 0.7540250322353722, "learning_rate": 9.343632463714097e-06, "loss": 0.293, "step": 5538 }, { "epoch": 0.19008236101578585, "grad_norm": 0.7660498137404436, "learning_rate": 9.34335718373121e-06, "loss": 0.3971, "step": 5539 }, { "epoch": 0.19011667810569663, "grad_norm": 0.8577850973943277, "learning_rate": 9.343081850091207e-06, "loss": 0.3231, "step": 5540 }, { "epoch": 0.1901509951956074, "grad_norm": 1.030231582847118, "learning_rate": 9.342806462797494e-06, "loss": 0.3739, "step": 5541 }, { "epoch": 0.1901853122855182, "grad_norm": 0.7319863759404278, "learning_rate": 9.34253102185347e-06, "loss": 0.3595, "step": 5542 }, { "epoch": 0.19021962937542897, "grad_norm": 0.7898266609174569, "learning_rate": 9.342255527262537e-06, "loss": 0.3951, "step": 5543 }, { "epoch": 0.19025394646533975, "grad_norm": 0.7973254962482962, "learning_rate": 9.3419799790281e-06, "loss": 0.3103, "step": 5544 }, { "epoch": 0.19028826355525053, "grad_norm": 0.8107637955735951, "learning_rate": 9.341704377153563e-06, "loss": 0.3434, "step": 5545 }, { "epoch": 0.19032258064516128, "grad_norm": 0.766768032544219, "learning_rate": 9.34142872164233e-06, "loss": 0.3246, "step": 5546 }, { "epoch": 0.19035689773507206, "grad_norm": 0.826122646452263, "learning_rate": 9.341153012497806e-06, "loss": 0.3194, "step": 5547 }, { "epoch": 0.19039121482498284, "grad_norm": 0.7728236617654094, "learning_rate": 9.340877249723396e-06, "loss": 0.3078, "step": 5548 }, { "epoch": 0.19042553191489361, "grad_norm": 0.8348585080390432, "learning_rate": 9.340601433322512e-06, "loss": 0.2911, "step": 5549 }, { "epoch": 0.1904598490048044, "grad_norm": 0.8929124426482425, "learning_rate": 9.340325563298558e-06, "loss": 0.3603, "step": 5550 }, { "epoch": 0.19049416609471517, "grad_norm": 0.8001210367492071, "learning_rate": 9.34004963965494e-06, "loss": 0.3498, "step": 5551 }, { "epoch": 0.19052848318462595, "grad_norm": 0.830965405282256, "learning_rate": 9.33977366239507e-06, "loss": 0.3523, "step": 5552 }, { "epoch": 0.19056280027453673, "grad_norm": 0.8347515669324251, "learning_rate": 9.339497631522356e-06, "loss": 0.3725, "step": 5553 }, { "epoch": 0.19059711736444748, "grad_norm": 0.727706522423244, "learning_rate": 9.339221547040208e-06, "loss": 0.3026, "step": 5554 }, { "epoch": 0.19063143445435826, "grad_norm": 0.8612448299539014, "learning_rate": 9.338945408952036e-06, "loss": 0.3337, "step": 5555 }, { "epoch": 0.19066575154426904, "grad_norm": 0.7370913548471918, "learning_rate": 9.338669217261253e-06, "loss": 0.3167, "step": 5556 }, { "epoch": 0.19070006863417982, "grad_norm": 0.8149468030341318, "learning_rate": 9.338392971971271e-06, "loss": 0.326, "step": 5557 }, { "epoch": 0.1907343857240906, "grad_norm": 0.7377936496926044, "learning_rate": 9.3381166730855e-06, "loss": 0.3049, "step": 5558 }, { "epoch": 0.19076870281400138, "grad_norm": 0.872040471970503, "learning_rate": 9.337840320607356e-06, "loss": 0.3609, "step": 5559 }, { "epoch": 0.19080301990391216, "grad_norm": 0.8668597261883848, "learning_rate": 9.337563914540252e-06, "loss": 0.3184, "step": 5560 }, { "epoch": 0.19083733699382294, "grad_norm": 0.8411791936676162, "learning_rate": 9.337287454887605e-06, "loss": 0.3698, "step": 5561 }, { "epoch": 0.1908716540837337, "grad_norm": 0.7253352870302295, "learning_rate": 9.337010941652827e-06, "loss": 0.2924, "step": 5562 }, { "epoch": 0.19090597117364447, "grad_norm": 0.9422191986851415, "learning_rate": 9.336734374839333e-06, "loss": 0.3218, "step": 5563 }, { "epoch": 0.19094028826355525, "grad_norm": 0.7391363169029249, "learning_rate": 9.336457754450545e-06, "loss": 0.3416, "step": 5564 }, { "epoch": 0.19097460535346603, "grad_norm": 0.7785355278511003, "learning_rate": 9.336181080489877e-06, "loss": 0.3468, "step": 5565 }, { "epoch": 0.1910089224433768, "grad_norm": 0.7852568261298415, "learning_rate": 9.335904352960746e-06, "loss": 0.3197, "step": 5566 }, { "epoch": 0.19104323953328758, "grad_norm": 0.7548294100566462, "learning_rate": 9.335627571866573e-06, "loss": 0.3955, "step": 5567 }, { "epoch": 0.19107755662319836, "grad_norm": 0.8817513201419824, "learning_rate": 9.335350737210774e-06, "loss": 0.3662, "step": 5568 }, { "epoch": 0.19111187371310911, "grad_norm": 0.7665420929002247, "learning_rate": 9.335073848996774e-06, "loss": 0.2974, "step": 5569 }, { "epoch": 0.1911461908030199, "grad_norm": 0.7576791419124972, "learning_rate": 9.334796907227989e-06, "loss": 0.3179, "step": 5570 }, { "epoch": 0.19118050789293067, "grad_norm": 0.7769481725564247, "learning_rate": 9.334519911907842e-06, "loss": 0.3657, "step": 5571 }, { "epoch": 0.19121482498284145, "grad_norm": 0.7627207873606553, "learning_rate": 9.334242863039755e-06, "loss": 0.2978, "step": 5572 }, { "epoch": 0.19124914207275223, "grad_norm": 0.8092690875891908, "learning_rate": 9.333965760627154e-06, "loss": 0.342, "step": 5573 }, { "epoch": 0.191283459162663, "grad_norm": 0.8231726164837416, "learning_rate": 9.333688604673454e-06, "loss": 0.3738, "step": 5574 }, { "epoch": 0.1913177762525738, "grad_norm": 0.7095367644594579, "learning_rate": 9.333411395182086e-06, "loss": 0.3044, "step": 5575 }, { "epoch": 0.19135209334248457, "grad_norm": 0.830059770611102, "learning_rate": 9.333134132156474e-06, "loss": 0.3775, "step": 5576 }, { "epoch": 0.19138641043239532, "grad_norm": 0.8980696397069721, "learning_rate": 9.33285681560004e-06, "loss": 0.3603, "step": 5577 }, { "epoch": 0.1914207275223061, "grad_norm": 0.8017627010974013, "learning_rate": 9.332579445516212e-06, "loss": 0.3598, "step": 5578 }, { "epoch": 0.19145504461221688, "grad_norm": 0.849180014405782, "learning_rate": 9.332302021908417e-06, "loss": 0.3444, "step": 5579 }, { "epoch": 0.19148936170212766, "grad_norm": 0.7591529632491405, "learning_rate": 9.332024544780079e-06, "loss": 0.3086, "step": 5580 }, { "epoch": 0.19152367879203844, "grad_norm": 0.8626162663573286, "learning_rate": 9.331747014134631e-06, "loss": 0.3637, "step": 5581 }, { "epoch": 0.19155799588194922, "grad_norm": 0.9312529537016042, "learning_rate": 9.331469429975497e-06, "loss": 0.3282, "step": 5582 }, { "epoch": 0.19159231297186, "grad_norm": 0.8685515522841049, "learning_rate": 9.33119179230611e-06, "loss": 0.3567, "step": 5583 }, { "epoch": 0.19162663006177078, "grad_norm": 0.7905796135112862, "learning_rate": 9.330914101129897e-06, "loss": 0.3542, "step": 5584 }, { "epoch": 0.19166094715168153, "grad_norm": 0.7891773441025396, "learning_rate": 9.33063635645029e-06, "loss": 0.3294, "step": 5585 }, { "epoch": 0.1916952642415923, "grad_norm": 0.8079619625152121, "learning_rate": 9.33035855827072e-06, "loss": 0.3773, "step": 5586 }, { "epoch": 0.19172958133150309, "grad_norm": 0.7703773047572383, "learning_rate": 9.33008070659462e-06, "loss": 0.3058, "step": 5587 }, { "epoch": 0.19176389842141386, "grad_norm": 0.8839044253510859, "learning_rate": 9.32980280142542e-06, "loss": 0.3357, "step": 5588 }, { "epoch": 0.19179821551132464, "grad_norm": 0.8196886579829653, "learning_rate": 9.329524842766552e-06, "loss": 0.3273, "step": 5589 }, { "epoch": 0.19183253260123542, "grad_norm": 0.8104117284697521, "learning_rate": 9.329246830621456e-06, "loss": 0.3313, "step": 5590 }, { "epoch": 0.1918668496911462, "grad_norm": 0.7562646252496692, "learning_rate": 9.328968764993562e-06, "loss": 0.295, "step": 5591 }, { "epoch": 0.19190116678105695, "grad_norm": 0.9254755662849725, "learning_rate": 9.328690645886304e-06, "loss": 0.3348, "step": 5592 }, { "epoch": 0.19193548387096773, "grad_norm": 0.8029298553485403, "learning_rate": 9.328412473303122e-06, "loss": 0.3497, "step": 5593 }, { "epoch": 0.1919698009608785, "grad_norm": 0.9071721175575298, "learning_rate": 9.32813424724745e-06, "loss": 0.3917, "step": 5594 }, { "epoch": 0.1920041180507893, "grad_norm": 0.7964045502000838, "learning_rate": 9.327855967722726e-06, "loss": 0.3444, "step": 5595 }, { "epoch": 0.19203843514070007, "grad_norm": 0.79060691251203, "learning_rate": 9.327577634732385e-06, "loss": 0.309, "step": 5596 }, { "epoch": 0.19207275223061085, "grad_norm": 0.781354113456758, "learning_rate": 9.32729924827987e-06, "loss": 0.3123, "step": 5597 }, { "epoch": 0.19210706932052163, "grad_norm": 0.808813073053702, "learning_rate": 9.327020808368616e-06, "loss": 0.3531, "step": 5598 }, { "epoch": 0.1921413864104324, "grad_norm": 0.7892439405823403, "learning_rate": 9.326742315002066e-06, "loss": 0.3426, "step": 5599 }, { "epoch": 0.19217570350034316, "grad_norm": 0.8030246251392336, "learning_rate": 9.32646376818366e-06, "loss": 0.339, "step": 5600 }, { "epoch": 0.19221002059025394, "grad_norm": 0.7866065534521014, "learning_rate": 9.326185167916838e-06, "loss": 0.2999, "step": 5601 }, { "epoch": 0.19224433768016472, "grad_norm": 0.9334992287479147, "learning_rate": 9.32590651420504e-06, "loss": 0.3653, "step": 5602 }, { "epoch": 0.1922786547700755, "grad_norm": 0.8876709591171529, "learning_rate": 9.325627807051713e-06, "loss": 0.357, "step": 5603 }, { "epoch": 0.19231297185998628, "grad_norm": 0.9031562910790711, "learning_rate": 9.325349046460295e-06, "loss": 0.339, "step": 5604 }, { "epoch": 0.19234728894989706, "grad_norm": 0.816796064911517, "learning_rate": 9.325070232434234e-06, "loss": 0.344, "step": 5605 }, { "epoch": 0.19238160603980783, "grad_norm": 0.7341661696456009, "learning_rate": 9.324791364976974e-06, "loss": 0.3573, "step": 5606 }, { "epoch": 0.1924159231297186, "grad_norm": 0.7819843666793793, "learning_rate": 9.324512444091957e-06, "loss": 0.334, "step": 5607 }, { "epoch": 0.19245024021962936, "grad_norm": 0.8387142336491409, "learning_rate": 9.324233469782633e-06, "loss": 0.3577, "step": 5608 }, { "epoch": 0.19248455730954014, "grad_norm": 0.810325694275504, "learning_rate": 9.323954442052443e-06, "loss": 0.3525, "step": 5609 }, { "epoch": 0.19251887439945092, "grad_norm": 0.7850387184922623, "learning_rate": 9.323675360904838e-06, "loss": 0.4083, "step": 5610 }, { "epoch": 0.1925531914893617, "grad_norm": 0.7896784949450331, "learning_rate": 9.323396226343267e-06, "loss": 0.303, "step": 5611 }, { "epoch": 0.19258750857927248, "grad_norm": 1.134918700162361, "learning_rate": 9.323117038371174e-06, "loss": 0.3581, "step": 5612 }, { "epoch": 0.19262182566918326, "grad_norm": 0.8373826533471727, "learning_rate": 9.32283779699201e-06, "loss": 0.3705, "step": 5613 }, { "epoch": 0.19265614275909404, "grad_norm": 0.975815152372837, "learning_rate": 9.322558502209227e-06, "loss": 0.3703, "step": 5614 }, { "epoch": 0.1926904598490048, "grad_norm": 0.8573246528995097, "learning_rate": 9.322279154026272e-06, "loss": 0.3345, "step": 5615 }, { "epoch": 0.19272477693891557, "grad_norm": 0.8086497161332622, "learning_rate": 9.321999752446598e-06, "loss": 0.3642, "step": 5616 }, { "epoch": 0.19275909402882635, "grad_norm": 0.8511468155686113, "learning_rate": 9.321720297473655e-06, "loss": 0.3313, "step": 5617 }, { "epoch": 0.19279341111873713, "grad_norm": 0.7638256464007046, "learning_rate": 9.321440789110898e-06, "loss": 0.3083, "step": 5618 }, { "epoch": 0.1928277282086479, "grad_norm": 0.8593156814252431, "learning_rate": 9.321161227361776e-06, "loss": 0.3168, "step": 5619 }, { "epoch": 0.1928620452985587, "grad_norm": 0.8968747404855433, "learning_rate": 9.320881612229746e-06, "loss": 0.3907, "step": 5620 }, { "epoch": 0.19289636238846947, "grad_norm": 0.7229218838164532, "learning_rate": 9.320601943718261e-06, "loss": 0.3039, "step": 5621 }, { "epoch": 0.19293067947838025, "grad_norm": 0.8267477258371024, "learning_rate": 9.320322221830778e-06, "loss": 0.3451, "step": 5622 }, { "epoch": 0.192964996568291, "grad_norm": 0.7981185849735685, "learning_rate": 9.32004244657075e-06, "loss": 0.4041, "step": 5623 }, { "epoch": 0.19299931365820178, "grad_norm": 0.9008287681425089, "learning_rate": 9.319762617941634e-06, "loss": 0.3358, "step": 5624 }, { "epoch": 0.19303363074811256, "grad_norm": 0.8835493253774916, "learning_rate": 9.319482735946887e-06, "loss": 0.3241, "step": 5625 }, { "epoch": 0.19306794783802333, "grad_norm": 0.7845799608208529, "learning_rate": 9.319202800589967e-06, "loss": 0.3311, "step": 5626 }, { "epoch": 0.19310226492793411, "grad_norm": 0.8031526300122624, "learning_rate": 9.31892281187433e-06, "loss": 0.3279, "step": 5627 }, { "epoch": 0.1931365820178449, "grad_norm": 0.8131932667862406, "learning_rate": 9.318642769803442e-06, "loss": 0.3245, "step": 5628 }, { "epoch": 0.19317089910775567, "grad_norm": 0.8216094106807381, "learning_rate": 9.318362674380754e-06, "loss": 0.3603, "step": 5629 }, { "epoch": 0.19320521619766645, "grad_norm": 0.8223997206522703, "learning_rate": 9.318082525609731e-06, "loss": 0.3397, "step": 5630 }, { "epoch": 0.1932395332875772, "grad_norm": 1.2183745195727675, "learning_rate": 9.317802323493834e-06, "loss": 0.348, "step": 5631 }, { "epoch": 0.19327385037748798, "grad_norm": 0.8421236019513769, "learning_rate": 9.31752206803652e-06, "loss": 0.2933, "step": 5632 }, { "epoch": 0.19330816746739876, "grad_norm": 0.8074122292718855, "learning_rate": 9.317241759241259e-06, "loss": 0.3223, "step": 5633 }, { "epoch": 0.19334248455730954, "grad_norm": 0.8861102209924105, "learning_rate": 9.316961397111506e-06, "loss": 0.4084, "step": 5634 }, { "epoch": 0.19337680164722032, "grad_norm": 0.8409153929461712, "learning_rate": 9.316680981650727e-06, "loss": 0.3505, "step": 5635 }, { "epoch": 0.1934111187371311, "grad_norm": 0.7861019023998971, "learning_rate": 9.316400512862391e-06, "loss": 0.2883, "step": 5636 }, { "epoch": 0.19344543582704188, "grad_norm": 0.8232937777842633, "learning_rate": 9.316119990749957e-06, "loss": 0.3719, "step": 5637 }, { "epoch": 0.19347975291695263, "grad_norm": 0.7590734128992871, "learning_rate": 9.315839415316893e-06, "loss": 0.3469, "step": 5638 }, { "epoch": 0.1935140700068634, "grad_norm": 0.9017919568285149, "learning_rate": 9.315558786566665e-06, "loss": 0.2771, "step": 5639 }, { "epoch": 0.1935483870967742, "grad_norm": 0.7933509149270227, "learning_rate": 9.31527810450274e-06, "loss": 0.3305, "step": 5640 }, { "epoch": 0.19358270418668497, "grad_norm": 0.824140102556545, "learning_rate": 9.314997369128585e-06, "loss": 0.3541, "step": 5641 }, { "epoch": 0.19361702127659575, "grad_norm": 0.9847761688553333, "learning_rate": 9.314716580447667e-06, "loss": 0.3069, "step": 5642 }, { "epoch": 0.19365133836650653, "grad_norm": 0.7595466414851538, "learning_rate": 9.314435738463457e-06, "loss": 0.3372, "step": 5643 }, { "epoch": 0.1936856554564173, "grad_norm": 0.7763807005979058, "learning_rate": 9.314154843179424e-06, "loss": 0.3545, "step": 5644 }, { "epoch": 0.19371997254632808, "grad_norm": 0.7148488600197651, "learning_rate": 9.313873894599036e-06, "loss": 0.358, "step": 5645 }, { "epoch": 0.19375428963623884, "grad_norm": 0.7899501898485372, "learning_rate": 9.313592892725767e-06, "loss": 0.3149, "step": 5646 }, { "epoch": 0.19378860672614961, "grad_norm": 0.80872373384594, "learning_rate": 9.313311837563088e-06, "loss": 0.4108, "step": 5647 }, { "epoch": 0.1938229238160604, "grad_norm": 0.7903098910350705, "learning_rate": 9.313030729114468e-06, "loss": 0.3749, "step": 5648 }, { "epoch": 0.19385724090597117, "grad_norm": 0.845134141468744, "learning_rate": 9.312749567383384e-06, "loss": 0.3377, "step": 5649 }, { "epoch": 0.19389155799588195, "grad_norm": 0.9189372595453398, "learning_rate": 9.312468352373306e-06, "loss": 0.3762, "step": 5650 }, { "epoch": 0.19392587508579273, "grad_norm": 0.7727253485063511, "learning_rate": 9.312187084087708e-06, "loss": 0.342, "step": 5651 }, { "epoch": 0.1939601921757035, "grad_norm": 1.070308696219316, "learning_rate": 9.311905762530067e-06, "loss": 0.3408, "step": 5652 }, { "epoch": 0.19399450926561426, "grad_norm": 0.7339028723439752, "learning_rate": 9.31162438770386e-06, "loss": 0.281, "step": 5653 }, { "epoch": 0.19402882635552504, "grad_norm": 0.7614690282085734, "learning_rate": 9.311342959612557e-06, "loss": 0.261, "step": 5654 }, { "epoch": 0.19406314344543582, "grad_norm": 0.7833792076134393, "learning_rate": 9.31106147825964e-06, "loss": 0.2626, "step": 5655 }, { "epoch": 0.1940974605353466, "grad_norm": 0.8107762457387915, "learning_rate": 9.310779943648585e-06, "loss": 0.3175, "step": 5656 }, { "epoch": 0.19413177762525738, "grad_norm": 0.8111135116208494, "learning_rate": 9.310498355782869e-06, "loss": 0.295, "step": 5657 }, { "epoch": 0.19416609471516816, "grad_norm": 0.8339029335271735, "learning_rate": 9.310216714665972e-06, "loss": 0.3676, "step": 5658 }, { "epoch": 0.19420041180507894, "grad_norm": 0.7605507456932374, "learning_rate": 9.309935020301372e-06, "loss": 0.3283, "step": 5659 }, { "epoch": 0.19423472889498972, "grad_norm": 0.7764875261915951, "learning_rate": 9.309653272692548e-06, "loss": 0.3105, "step": 5660 }, { "epoch": 0.19426904598490047, "grad_norm": 0.8455104114828065, "learning_rate": 9.309371471842986e-06, "loss": 0.28, "step": 5661 }, { "epoch": 0.19430336307481125, "grad_norm": 0.808673109242246, "learning_rate": 9.309089617756162e-06, "loss": 0.3168, "step": 5662 }, { "epoch": 0.19433768016472203, "grad_norm": 0.8573193819048016, "learning_rate": 9.30880771043556e-06, "loss": 0.2943, "step": 5663 }, { "epoch": 0.1943719972546328, "grad_norm": 0.8780993827254906, "learning_rate": 9.30852574988466e-06, "loss": 0.3965, "step": 5664 }, { "epoch": 0.19440631434454358, "grad_norm": 0.80068303251255, "learning_rate": 9.308243736106952e-06, "loss": 0.3047, "step": 5665 }, { "epoch": 0.19444063143445436, "grad_norm": 0.7813489977987431, "learning_rate": 9.307961669105912e-06, "loss": 0.3225, "step": 5666 }, { "epoch": 0.19447494852436514, "grad_norm": 0.7955226907318268, "learning_rate": 9.307679548885029e-06, "loss": 0.326, "step": 5667 }, { "epoch": 0.19450926561427592, "grad_norm": 0.7960692522890447, "learning_rate": 9.30739737544779e-06, "loss": 0.34, "step": 5668 }, { "epoch": 0.19454358270418667, "grad_norm": 0.9725509757332834, "learning_rate": 9.307115148797675e-06, "loss": 0.3518, "step": 5669 }, { "epoch": 0.19457789979409745, "grad_norm": 0.804436387040443, "learning_rate": 9.306832868938174e-06, "loss": 0.3893, "step": 5670 }, { "epoch": 0.19461221688400823, "grad_norm": 0.7886533601240181, "learning_rate": 9.306550535872777e-06, "loss": 0.3543, "step": 5671 }, { "epoch": 0.194646533973919, "grad_norm": 0.7973808620217431, "learning_rate": 9.306268149604967e-06, "loss": 0.3802, "step": 5672 }, { "epoch": 0.1946808510638298, "grad_norm": 0.6979430216431927, "learning_rate": 9.305985710138234e-06, "loss": 0.295, "step": 5673 }, { "epoch": 0.19471516815374057, "grad_norm": 0.836950424710843, "learning_rate": 9.30570321747607e-06, "loss": 0.3281, "step": 5674 }, { "epoch": 0.19474948524365135, "grad_norm": 0.7680913718149561, "learning_rate": 9.30542067162196e-06, "loss": 0.3081, "step": 5675 }, { "epoch": 0.1947838023335621, "grad_norm": 0.7900416937762347, "learning_rate": 9.305138072579401e-06, "loss": 0.3229, "step": 5676 }, { "epoch": 0.19481811942347288, "grad_norm": 0.8279338108829257, "learning_rate": 9.304855420351877e-06, "loss": 0.3345, "step": 5677 }, { "epoch": 0.19485243651338366, "grad_norm": 0.8478241120086428, "learning_rate": 9.304572714942886e-06, "loss": 0.3811, "step": 5678 }, { "epoch": 0.19488675360329444, "grad_norm": 0.8392624980279636, "learning_rate": 9.304289956355915e-06, "loss": 0.3736, "step": 5679 }, { "epoch": 0.19492107069320522, "grad_norm": 0.8470497599528127, "learning_rate": 9.304007144594462e-06, "loss": 0.3905, "step": 5680 }, { "epoch": 0.194955387783116, "grad_norm": 0.7279752012227764, "learning_rate": 9.303724279662016e-06, "loss": 0.2985, "step": 5681 }, { "epoch": 0.19498970487302678, "grad_norm": 0.775661109647253, "learning_rate": 9.303441361562079e-06, "loss": 0.3327, "step": 5682 }, { "epoch": 0.19502402196293755, "grad_norm": 0.900212751432175, "learning_rate": 9.303158390298136e-06, "loss": 0.3753, "step": 5683 }, { "epoch": 0.1950583390528483, "grad_norm": 0.9023586967238456, "learning_rate": 9.302875365873691e-06, "loss": 0.3219, "step": 5684 }, { "epoch": 0.19509265614275909, "grad_norm": 0.8141875689810835, "learning_rate": 9.302592288292237e-06, "loss": 0.3395, "step": 5685 }, { "epoch": 0.19512697323266986, "grad_norm": 0.8336690901002645, "learning_rate": 9.302309157557271e-06, "loss": 0.3602, "step": 5686 }, { "epoch": 0.19516129032258064, "grad_norm": 0.753665429711838, "learning_rate": 9.302025973672293e-06, "loss": 0.3044, "step": 5687 }, { "epoch": 0.19519560741249142, "grad_norm": 0.7376965123634607, "learning_rate": 9.301742736640798e-06, "loss": 0.3238, "step": 5688 }, { "epoch": 0.1952299245024022, "grad_norm": 0.8736046405705521, "learning_rate": 9.301459446466288e-06, "loss": 0.3094, "step": 5689 }, { "epoch": 0.19526424159231298, "grad_norm": 0.8230596952021708, "learning_rate": 9.301176103152261e-06, "loss": 0.3054, "step": 5690 }, { "epoch": 0.19529855868222376, "grad_norm": 0.7958026800304303, "learning_rate": 9.300892706702218e-06, "loss": 0.3835, "step": 5691 }, { "epoch": 0.1953328757721345, "grad_norm": 0.7954048427529151, "learning_rate": 9.30060925711966e-06, "loss": 0.3373, "step": 5692 }, { "epoch": 0.1953671928620453, "grad_norm": 0.924965302742821, "learning_rate": 9.300325754408088e-06, "loss": 0.3528, "step": 5693 }, { "epoch": 0.19540150995195607, "grad_norm": 0.8132103851121111, "learning_rate": 9.300042198571007e-06, "loss": 0.3294, "step": 5694 }, { "epoch": 0.19543582704186685, "grad_norm": 0.7554756973646174, "learning_rate": 9.299758589611915e-06, "loss": 0.2923, "step": 5695 }, { "epoch": 0.19547014413177763, "grad_norm": 0.832137554538738, "learning_rate": 9.299474927534322e-06, "loss": 0.3109, "step": 5696 }, { "epoch": 0.1955044612216884, "grad_norm": 0.8034986153538518, "learning_rate": 9.299191212341727e-06, "loss": 0.3115, "step": 5697 }, { "epoch": 0.1955387783115992, "grad_norm": 0.7554860648189257, "learning_rate": 9.298907444037637e-06, "loss": 0.3133, "step": 5698 }, { "epoch": 0.19557309540150994, "grad_norm": 0.7934671783476293, "learning_rate": 9.298623622625558e-06, "loss": 0.3263, "step": 5699 }, { "epoch": 0.19560741249142072, "grad_norm": 0.7812920647526068, "learning_rate": 9.298339748108995e-06, "loss": 0.2789, "step": 5700 }, { "epoch": 0.1956417295813315, "grad_norm": 0.8375872917157313, "learning_rate": 9.298055820491456e-06, "loss": 0.3704, "step": 5701 }, { "epoch": 0.19567604667124228, "grad_norm": 0.8321565158671685, "learning_rate": 9.297771839776448e-06, "loss": 0.3522, "step": 5702 }, { "epoch": 0.19571036376115306, "grad_norm": 0.7153750262315766, "learning_rate": 9.297487805967482e-06, "loss": 0.2729, "step": 5703 }, { "epoch": 0.19574468085106383, "grad_norm": 0.7962322032560156, "learning_rate": 9.297203719068063e-06, "loss": 0.3393, "step": 5704 }, { "epoch": 0.19577899794097461, "grad_norm": 0.8203658837845177, "learning_rate": 9.296919579081702e-06, "loss": 0.3647, "step": 5705 }, { "epoch": 0.1958133150308854, "grad_norm": 0.6803351693158446, "learning_rate": 9.29663538601191e-06, "loss": 0.3024, "step": 5706 }, { "epoch": 0.19584763212079614, "grad_norm": 0.7162106952552109, "learning_rate": 9.296351139862196e-06, "loss": 0.3882, "step": 5707 }, { "epoch": 0.19588194921070692, "grad_norm": 0.8115106810854446, "learning_rate": 9.296066840636072e-06, "loss": 0.315, "step": 5708 }, { "epoch": 0.1959162663006177, "grad_norm": 0.9401392736479548, "learning_rate": 9.295782488337053e-06, "loss": 0.2958, "step": 5709 }, { "epoch": 0.19595058339052848, "grad_norm": 0.7612775822506731, "learning_rate": 9.29549808296865e-06, "loss": 0.3365, "step": 5710 }, { "epoch": 0.19598490048043926, "grad_norm": 0.7638610285445647, "learning_rate": 9.295213624534374e-06, "loss": 0.334, "step": 5711 }, { "epoch": 0.19601921757035004, "grad_norm": 0.7435856769583983, "learning_rate": 9.294929113037744e-06, "loss": 0.3381, "step": 5712 }, { "epoch": 0.19605353466026082, "grad_norm": 0.8235691450590312, "learning_rate": 9.29464454848227e-06, "loss": 0.3326, "step": 5713 }, { "epoch": 0.1960878517501716, "grad_norm": 0.8039813838901954, "learning_rate": 9.29435993087147e-06, "loss": 0.2986, "step": 5714 }, { "epoch": 0.19612216884008235, "grad_norm": 0.8521124731142952, "learning_rate": 9.294075260208862e-06, "loss": 0.2995, "step": 5715 }, { "epoch": 0.19615648592999313, "grad_norm": 0.8106528084994589, "learning_rate": 9.293790536497958e-06, "loss": 0.3164, "step": 5716 }, { "epoch": 0.1961908030199039, "grad_norm": 0.7122321880666269, "learning_rate": 9.293505759742279e-06, "loss": 0.2836, "step": 5717 }, { "epoch": 0.1962251201098147, "grad_norm": 0.806249335339689, "learning_rate": 9.29322092994534e-06, "loss": 0.3128, "step": 5718 }, { "epoch": 0.19625943719972547, "grad_norm": 0.8264063981325601, "learning_rate": 9.292936047110665e-06, "loss": 0.3408, "step": 5719 }, { "epoch": 0.19629375428963625, "grad_norm": 0.9912362422682762, "learning_rate": 9.292651111241768e-06, "loss": 0.3586, "step": 5720 }, { "epoch": 0.19632807137954703, "grad_norm": 0.8262681478780003, "learning_rate": 9.292366122342174e-06, "loss": 0.2748, "step": 5721 }, { "epoch": 0.19636238846945778, "grad_norm": 0.8201135887052771, "learning_rate": 9.292081080415399e-06, "loss": 0.3254, "step": 5722 }, { "epoch": 0.19639670555936856, "grad_norm": 0.7537229964142791, "learning_rate": 9.291795985464966e-06, "loss": 0.306, "step": 5723 }, { "epoch": 0.19643102264927934, "grad_norm": 0.8827854864500216, "learning_rate": 9.291510837494397e-06, "loss": 0.2794, "step": 5724 }, { "epoch": 0.19646533973919011, "grad_norm": 0.7352755127397864, "learning_rate": 9.291225636507218e-06, "loss": 0.302, "step": 5725 }, { "epoch": 0.1964996568291009, "grad_norm": 0.8894603013047245, "learning_rate": 9.290940382506947e-06, "loss": 0.3592, "step": 5726 }, { "epoch": 0.19653397391901167, "grad_norm": 0.8602473650183281, "learning_rate": 9.29065507549711e-06, "loss": 0.3666, "step": 5727 }, { "epoch": 0.19656829100892245, "grad_norm": 0.8511688843740022, "learning_rate": 9.290369715481233e-06, "loss": 0.3191, "step": 5728 }, { "epoch": 0.19660260809883323, "grad_norm": 0.7896906886113214, "learning_rate": 9.29008430246284e-06, "loss": 0.3068, "step": 5729 }, { "epoch": 0.19663692518874398, "grad_norm": 0.8585323448971915, "learning_rate": 9.289798836445455e-06, "loss": 0.3404, "step": 5730 }, { "epoch": 0.19667124227865476, "grad_norm": 0.7888537590493875, "learning_rate": 9.28951331743261e-06, "loss": 0.3214, "step": 5731 }, { "epoch": 0.19670555936856554, "grad_norm": 0.9419824409104492, "learning_rate": 9.289227745427828e-06, "loss": 0.3752, "step": 5732 }, { "epoch": 0.19673987645847632, "grad_norm": 0.7991897891914094, "learning_rate": 9.28894212043464e-06, "loss": 0.284, "step": 5733 }, { "epoch": 0.1967741935483871, "grad_norm": 0.7013636692679165, "learning_rate": 9.288656442456567e-06, "loss": 0.3021, "step": 5734 }, { "epoch": 0.19680851063829788, "grad_norm": 0.8964951293977486, "learning_rate": 9.288370711497147e-06, "loss": 0.4047, "step": 5735 }, { "epoch": 0.19684282772820866, "grad_norm": 0.8255228996106272, "learning_rate": 9.288084927559908e-06, "loss": 0.3811, "step": 5736 }, { "epoch": 0.19687714481811944, "grad_norm": 0.7458758617550253, "learning_rate": 9.287799090648377e-06, "loss": 0.2807, "step": 5737 }, { "epoch": 0.1969114619080302, "grad_norm": 0.8557774099631678, "learning_rate": 9.28751320076609e-06, "loss": 0.3854, "step": 5738 }, { "epoch": 0.19694577899794097, "grad_norm": 0.7684848408332627, "learning_rate": 9.287227257916574e-06, "loss": 0.3326, "step": 5739 }, { "epoch": 0.19698009608785175, "grad_norm": 0.8386096491719753, "learning_rate": 9.286941262103364e-06, "loss": 0.3754, "step": 5740 }, { "epoch": 0.19701441317776253, "grad_norm": 0.6956502957684976, "learning_rate": 9.286655213329992e-06, "loss": 0.315, "step": 5741 }, { "epoch": 0.1970487302676733, "grad_norm": 0.7878070001581358, "learning_rate": 9.286369111599992e-06, "loss": 0.2904, "step": 5742 }, { "epoch": 0.19708304735758408, "grad_norm": 0.8162115852095874, "learning_rate": 9.286082956916902e-06, "loss": 0.3751, "step": 5743 }, { "epoch": 0.19711736444749486, "grad_norm": 0.7520126783929388, "learning_rate": 9.285796749284253e-06, "loss": 0.332, "step": 5744 }, { "epoch": 0.19715168153740562, "grad_norm": 0.9150697910231357, "learning_rate": 9.28551048870558e-06, "loss": 0.3656, "step": 5745 }, { "epoch": 0.1971859986273164, "grad_norm": 0.7835881787761729, "learning_rate": 9.285224175184424e-06, "loss": 0.339, "step": 5746 }, { "epoch": 0.19722031571722717, "grad_norm": 0.7329630199578808, "learning_rate": 9.284937808724318e-06, "loss": 0.3337, "step": 5747 }, { "epoch": 0.19725463280713795, "grad_norm": 0.875767849200955, "learning_rate": 9.284651389328801e-06, "loss": 0.314, "step": 5748 }, { "epoch": 0.19728894989704873, "grad_norm": 0.8460689196077757, "learning_rate": 9.284364917001411e-06, "loss": 0.3421, "step": 5749 }, { "epoch": 0.1973232669869595, "grad_norm": 0.7779286570333751, "learning_rate": 9.28407839174569e-06, "loss": 0.307, "step": 5750 }, { "epoch": 0.1973575840768703, "grad_norm": 0.8637664116031321, "learning_rate": 9.283791813565172e-06, "loss": 0.3328, "step": 5751 }, { "epoch": 0.19739190116678107, "grad_norm": 0.7467941272375586, "learning_rate": 9.283505182463403e-06, "loss": 0.3181, "step": 5752 }, { "epoch": 0.19742621825669182, "grad_norm": 0.7061850000484584, "learning_rate": 9.283218498443921e-06, "loss": 0.3038, "step": 5753 }, { "epoch": 0.1974605353466026, "grad_norm": 0.7492413450649269, "learning_rate": 9.282931761510267e-06, "loss": 0.2983, "step": 5754 }, { "epoch": 0.19749485243651338, "grad_norm": 0.8175390710856411, "learning_rate": 9.282644971665987e-06, "loss": 0.3178, "step": 5755 }, { "epoch": 0.19752916952642416, "grad_norm": 0.8405495099000745, "learning_rate": 9.28235812891462e-06, "loss": 0.3154, "step": 5756 }, { "epoch": 0.19756348661633494, "grad_norm": 0.8821955307468573, "learning_rate": 9.28207123325971e-06, "loss": 0.3482, "step": 5757 }, { "epoch": 0.19759780370624572, "grad_norm": 0.7931081893064026, "learning_rate": 9.281784284704803e-06, "loss": 0.3392, "step": 5758 }, { "epoch": 0.1976321207961565, "grad_norm": 0.75280108428993, "learning_rate": 9.281497283253445e-06, "loss": 0.4147, "step": 5759 }, { "epoch": 0.19766643788606725, "grad_norm": 0.8679532769472621, "learning_rate": 9.281210228909176e-06, "loss": 0.361, "step": 5760 }, { "epoch": 0.19770075497597803, "grad_norm": 0.8585741294200421, "learning_rate": 9.280923121675548e-06, "loss": 0.3104, "step": 5761 }, { "epoch": 0.1977350720658888, "grad_norm": 0.7148150903758592, "learning_rate": 9.280635961556108e-06, "loss": 0.3288, "step": 5762 }, { "epoch": 0.19776938915579959, "grad_norm": 0.8571139822669689, "learning_rate": 9.2803487485544e-06, "loss": 0.3152, "step": 5763 }, { "epoch": 0.19780370624571036, "grad_norm": 0.8094638239488063, "learning_rate": 9.280061482673972e-06, "loss": 0.3131, "step": 5764 }, { "epoch": 0.19783802333562114, "grad_norm": 0.8274854268821905, "learning_rate": 9.279774163918376e-06, "loss": 0.3722, "step": 5765 }, { "epoch": 0.19787234042553192, "grad_norm": 0.7937069840251126, "learning_rate": 9.279486792291161e-06, "loss": 0.3021, "step": 5766 }, { "epoch": 0.1979066575154427, "grad_norm": 0.8382410631242307, "learning_rate": 9.279199367795873e-06, "loss": 0.3421, "step": 5767 }, { "epoch": 0.19794097460535345, "grad_norm": 0.8258626631524318, "learning_rate": 9.27891189043607e-06, "loss": 0.3004, "step": 5768 }, { "epoch": 0.19797529169526423, "grad_norm": 0.7592010068187315, "learning_rate": 9.278624360215296e-06, "loss": 0.3383, "step": 5769 }, { "epoch": 0.198009608785175, "grad_norm": 0.7467034176859711, "learning_rate": 9.278336777137107e-06, "loss": 0.2967, "step": 5770 }, { "epoch": 0.1980439258750858, "grad_norm": 0.871759084857858, "learning_rate": 9.278049141205055e-06, "loss": 0.3691, "step": 5771 }, { "epoch": 0.19807824296499657, "grad_norm": 0.8637679252988698, "learning_rate": 9.277761452422694e-06, "loss": 0.3219, "step": 5772 }, { "epoch": 0.19811256005490735, "grad_norm": 0.8635867596979705, "learning_rate": 9.277473710793578e-06, "loss": 0.3767, "step": 5773 }, { "epoch": 0.19814687714481813, "grad_norm": 0.7921629245553321, "learning_rate": 9.277185916321261e-06, "loss": 0.37, "step": 5774 }, { "epoch": 0.1981811942347289, "grad_norm": 0.7711545210322397, "learning_rate": 9.2768980690093e-06, "loss": 0.3146, "step": 5775 }, { "epoch": 0.19821551132463966, "grad_norm": 0.7465570831746741, "learning_rate": 9.276610168861247e-06, "loss": 0.3481, "step": 5776 }, { "epoch": 0.19824982841455044, "grad_norm": 0.7612182598859318, "learning_rate": 9.276322215880665e-06, "loss": 0.3421, "step": 5777 }, { "epoch": 0.19828414550446122, "grad_norm": 0.7610870004271076, "learning_rate": 9.276034210071105e-06, "loss": 0.3735, "step": 5778 }, { "epoch": 0.198318462594372, "grad_norm": 0.7968744712113793, "learning_rate": 9.275746151436127e-06, "loss": 0.3789, "step": 5779 }, { "epoch": 0.19835277968428278, "grad_norm": 0.8278392040911685, "learning_rate": 9.275458039979292e-06, "loss": 0.4107, "step": 5780 }, { "epoch": 0.19838709677419356, "grad_norm": 0.7893488959397602, "learning_rate": 9.275169875704158e-06, "loss": 0.3012, "step": 5781 }, { "epoch": 0.19842141386410433, "grad_norm": 0.9295244656365881, "learning_rate": 9.274881658614283e-06, "loss": 0.3161, "step": 5782 }, { "epoch": 0.19845573095401509, "grad_norm": 0.8238293788656869, "learning_rate": 9.274593388713232e-06, "loss": 0.3262, "step": 5783 }, { "epoch": 0.19849004804392587, "grad_norm": 0.6888219915245974, "learning_rate": 9.27430506600456e-06, "loss": 0.3106, "step": 5784 }, { "epoch": 0.19852436513383664, "grad_norm": 0.7870390728690753, "learning_rate": 9.274016690491835e-06, "loss": 0.2935, "step": 5785 }, { "epoch": 0.19855868222374742, "grad_norm": 0.8244313255034067, "learning_rate": 9.273728262178614e-06, "loss": 0.3195, "step": 5786 }, { "epoch": 0.1985929993136582, "grad_norm": 0.9141862515723004, "learning_rate": 9.273439781068466e-06, "loss": 0.3268, "step": 5787 }, { "epoch": 0.19862731640356898, "grad_norm": 0.7997836615560534, "learning_rate": 9.273151247164948e-06, "loss": 0.3473, "step": 5788 }, { "epoch": 0.19866163349347976, "grad_norm": 0.8411037615910608, "learning_rate": 9.272862660471632e-06, "loss": 0.3305, "step": 5789 }, { "epoch": 0.19869595058339054, "grad_norm": 0.864032897297336, "learning_rate": 9.272574020992077e-06, "loss": 0.3677, "step": 5790 }, { "epoch": 0.1987302676733013, "grad_norm": 0.8945821316591269, "learning_rate": 9.272285328729853e-06, "loss": 0.3763, "step": 5791 }, { "epoch": 0.19876458476321207, "grad_norm": 0.7358534667415905, "learning_rate": 9.271996583688525e-06, "loss": 0.317, "step": 5792 }, { "epoch": 0.19879890185312285, "grad_norm": 0.8495856181403026, "learning_rate": 9.27170778587166e-06, "loss": 0.3454, "step": 5793 }, { "epoch": 0.19883321894303363, "grad_norm": 0.9411842335912335, "learning_rate": 9.271418935282825e-06, "loss": 0.3871, "step": 5794 }, { "epoch": 0.1988675360329444, "grad_norm": 0.7933881471718156, "learning_rate": 9.271130031925589e-06, "loss": 0.3553, "step": 5795 }, { "epoch": 0.1989018531228552, "grad_norm": 0.7856568568489429, "learning_rate": 9.270841075803521e-06, "loss": 0.3032, "step": 5796 }, { "epoch": 0.19893617021276597, "grad_norm": 0.8664938632211883, "learning_rate": 9.27055206692019e-06, "loss": 0.3451, "step": 5797 }, { "epoch": 0.19897048730267675, "grad_norm": 0.7512808053130836, "learning_rate": 9.270263005279167e-06, "loss": 0.3242, "step": 5798 }, { "epoch": 0.1990048043925875, "grad_norm": 0.7008452025926217, "learning_rate": 9.269973890884025e-06, "loss": 0.348, "step": 5799 }, { "epoch": 0.19903912148249828, "grad_norm": 0.8582059585206399, "learning_rate": 9.269684723738333e-06, "loss": 0.3782, "step": 5800 }, { "epoch": 0.19907343857240906, "grad_norm": 0.807322950947253, "learning_rate": 9.269395503845664e-06, "loss": 0.3482, "step": 5801 }, { "epoch": 0.19910775566231984, "grad_norm": 0.7391386839306976, "learning_rate": 9.269106231209592e-06, "loss": 0.3539, "step": 5802 }, { "epoch": 0.19914207275223061, "grad_norm": 0.7786898599630129, "learning_rate": 9.268816905833689e-06, "loss": 0.3302, "step": 5803 }, { "epoch": 0.1991763898421414, "grad_norm": 0.6890985329093026, "learning_rate": 9.26852752772153e-06, "loss": 0.3294, "step": 5804 }, { "epoch": 0.19921070693205217, "grad_norm": 0.8425495171375404, "learning_rate": 9.26823809687669e-06, "loss": 0.351, "step": 5805 }, { "epoch": 0.19924502402196292, "grad_norm": 0.7066976626688742, "learning_rate": 9.267948613302746e-06, "loss": 0.3092, "step": 5806 }, { "epoch": 0.1992793411118737, "grad_norm": 0.8431918700057148, "learning_rate": 9.267659077003271e-06, "loss": 0.3746, "step": 5807 }, { "epoch": 0.19931365820178448, "grad_norm": 0.9052298964566703, "learning_rate": 9.267369487981846e-06, "loss": 0.2999, "step": 5808 }, { "epoch": 0.19934797529169526, "grad_norm": 0.8239333366805368, "learning_rate": 9.267079846242044e-06, "loss": 0.331, "step": 5809 }, { "epoch": 0.19938229238160604, "grad_norm": 0.8165552111810043, "learning_rate": 9.266790151787445e-06, "loss": 0.3226, "step": 5810 }, { "epoch": 0.19941660947151682, "grad_norm": 0.9191461242639599, "learning_rate": 9.26650040462163e-06, "loss": 0.3004, "step": 5811 }, { "epoch": 0.1994509265614276, "grad_norm": 0.8695741763060433, "learning_rate": 9.266210604748175e-06, "loss": 0.3818, "step": 5812 }, { "epoch": 0.19948524365133838, "grad_norm": 0.7014792949380599, "learning_rate": 9.265920752170665e-06, "loss": 0.3089, "step": 5813 }, { "epoch": 0.19951956074124913, "grad_norm": 0.953199695354632, "learning_rate": 9.265630846892675e-06, "loss": 0.3317, "step": 5814 }, { "epoch": 0.1995538778311599, "grad_norm": 0.8375657441967373, "learning_rate": 9.265340888917792e-06, "loss": 0.3164, "step": 5815 }, { "epoch": 0.1995881949210707, "grad_norm": 0.8343567427527101, "learning_rate": 9.265050878249591e-06, "loss": 0.326, "step": 5816 }, { "epoch": 0.19962251201098147, "grad_norm": 0.6941392752185888, "learning_rate": 9.264760814891662e-06, "loss": 0.3634, "step": 5817 }, { "epoch": 0.19965682910089225, "grad_norm": 0.8120320221716777, "learning_rate": 9.264470698847585e-06, "loss": 0.3288, "step": 5818 }, { "epoch": 0.19969114619080303, "grad_norm": 0.7404434437336757, "learning_rate": 9.264180530120942e-06, "loss": 0.3803, "step": 5819 }, { "epoch": 0.1997254632807138, "grad_norm": 0.8054190954081394, "learning_rate": 9.263890308715322e-06, "loss": 0.3687, "step": 5820 }, { "epoch": 0.19975978037062458, "grad_norm": 0.8380603191456162, "learning_rate": 9.263600034634309e-06, "loss": 0.3443, "step": 5821 }, { "epoch": 0.19979409746053534, "grad_norm": 0.7984076178208189, "learning_rate": 9.263309707881488e-06, "loss": 0.3449, "step": 5822 }, { "epoch": 0.19982841455044612, "grad_norm": 0.7684530754829542, "learning_rate": 9.263019328460447e-06, "loss": 0.3047, "step": 5823 }, { "epoch": 0.1998627316403569, "grad_norm": 0.8088546970786998, "learning_rate": 9.262728896374771e-06, "loss": 0.3442, "step": 5824 }, { "epoch": 0.19989704873026767, "grad_norm": 0.6858094649313152, "learning_rate": 9.26243841162805e-06, "loss": 0.3147, "step": 5825 }, { "epoch": 0.19993136582017845, "grad_norm": 0.8109334334852726, "learning_rate": 9.262147874223871e-06, "loss": 0.3577, "step": 5826 }, { "epoch": 0.19996568291008923, "grad_norm": 0.8476491832526345, "learning_rate": 9.261857284165824e-06, "loss": 0.3939, "step": 5827 }, { "epoch": 0.2, "grad_norm": 0.7333948370907388, "learning_rate": 9.2615666414575e-06, "loss": 0.3179, "step": 5828 }, { "epoch": 0.20003431708991076, "grad_norm": 0.7288885459510167, "learning_rate": 9.261275946102489e-06, "loss": 0.3476, "step": 5829 }, { "epoch": 0.20006863417982154, "grad_norm": 0.7875713153881381, "learning_rate": 9.260985198104381e-06, "loss": 0.3123, "step": 5830 }, { "epoch": 0.20010295126973232, "grad_norm": 0.7184678964592834, "learning_rate": 9.26069439746677e-06, "loss": 0.3422, "step": 5831 }, { "epoch": 0.2001372683596431, "grad_norm": 0.7468465792932506, "learning_rate": 9.260403544193246e-06, "loss": 0.3067, "step": 5832 }, { "epoch": 0.20017158544955388, "grad_norm": 0.8646282943090343, "learning_rate": 9.260112638287404e-06, "loss": 0.3546, "step": 5833 }, { "epoch": 0.20020590253946466, "grad_norm": 0.9349344270163686, "learning_rate": 9.259821679752835e-06, "loss": 0.3985, "step": 5834 }, { "epoch": 0.20024021962937544, "grad_norm": 0.7370249547448201, "learning_rate": 9.25953066859314e-06, "loss": 0.2751, "step": 5835 }, { "epoch": 0.20027453671928622, "grad_norm": 0.8278317711914077, "learning_rate": 9.259239604811907e-06, "loss": 0.341, "step": 5836 }, { "epoch": 0.20030885380919697, "grad_norm": 0.8394215778398033, "learning_rate": 9.258948488412735e-06, "loss": 0.3723, "step": 5837 }, { "epoch": 0.20034317089910775, "grad_norm": 0.8252406473954569, "learning_rate": 9.258657319399218e-06, "loss": 0.334, "step": 5838 }, { "epoch": 0.20037748798901853, "grad_norm": 0.8485344072675817, "learning_rate": 9.258366097774958e-06, "loss": 0.3775, "step": 5839 }, { "epoch": 0.2004118050789293, "grad_norm": 0.7509620556012091, "learning_rate": 9.258074823543549e-06, "loss": 0.2907, "step": 5840 }, { "epoch": 0.20044612216884009, "grad_norm": 0.7421542834856132, "learning_rate": 9.257783496708587e-06, "loss": 0.3257, "step": 5841 }, { "epoch": 0.20048043925875086, "grad_norm": 0.7882834105820984, "learning_rate": 9.257492117273677e-06, "loss": 0.3297, "step": 5842 }, { "epoch": 0.20051475634866164, "grad_norm": 0.7902861991622792, "learning_rate": 9.257200685242414e-06, "loss": 0.4144, "step": 5843 }, { "epoch": 0.20054907343857242, "grad_norm": 0.7898535742703299, "learning_rate": 9.2569092006184e-06, "loss": 0.3251, "step": 5844 }, { "epoch": 0.20058339052848317, "grad_norm": 0.7221059077272434, "learning_rate": 9.256617663405239e-06, "loss": 0.3, "step": 5845 }, { "epoch": 0.20061770761839395, "grad_norm": 0.6841950328281028, "learning_rate": 9.256326073606525e-06, "loss": 0.3131, "step": 5846 }, { "epoch": 0.20065202470830473, "grad_norm": 0.7709543646124011, "learning_rate": 9.256034431225868e-06, "loss": 0.2996, "step": 5847 }, { "epoch": 0.2006863417982155, "grad_norm": 0.7885960398861159, "learning_rate": 9.255742736266866e-06, "loss": 0.3399, "step": 5848 }, { "epoch": 0.2007206588881263, "grad_norm": 0.6908680747593334, "learning_rate": 9.255450988733126e-06, "loss": 0.3214, "step": 5849 }, { "epoch": 0.20075497597803707, "grad_norm": 0.9081780921292799, "learning_rate": 9.255159188628249e-06, "loss": 0.4162, "step": 5850 }, { "epoch": 0.20078929306794785, "grad_norm": 0.820239770676233, "learning_rate": 9.254867335955842e-06, "loss": 0.3195, "step": 5851 }, { "epoch": 0.2008236101578586, "grad_norm": 0.7758796080931476, "learning_rate": 9.254575430719507e-06, "loss": 0.4043, "step": 5852 }, { "epoch": 0.20085792724776938, "grad_norm": 0.8153379915064416, "learning_rate": 9.254283472922856e-06, "loss": 0.3515, "step": 5853 }, { "epoch": 0.20089224433768016, "grad_norm": 0.7167181044054614, "learning_rate": 9.253991462569492e-06, "loss": 0.306, "step": 5854 }, { "epoch": 0.20092656142759094, "grad_norm": 0.7367116126335573, "learning_rate": 9.253699399663023e-06, "loss": 0.3182, "step": 5855 }, { "epoch": 0.20096087851750172, "grad_norm": 0.7961252480375484, "learning_rate": 9.253407284207056e-06, "loss": 0.3301, "step": 5856 }, { "epoch": 0.2009951956074125, "grad_norm": 0.8063419918751569, "learning_rate": 9.253115116205203e-06, "loss": 0.2942, "step": 5857 }, { "epoch": 0.20102951269732328, "grad_norm": 0.7425408020800652, "learning_rate": 9.25282289566107e-06, "loss": 0.328, "step": 5858 }, { "epoch": 0.20106382978723406, "grad_norm": 1.5843017311726106, "learning_rate": 9.25253062257827e-06, "loss": 0.3302, "step": 5859 }, { "epoch": 0.2010981468771448, "grad_norm": 0.7623198458392597, "learning_rate": 9.25223829696041e-06, "loss": 0.3225, "step": 5860 }, { "epoch": 0.20113246396705559, "grad_norm": 0.7142814518494035, "learning_rate": 9.251945918811105e-06, "loss": 0.312, "step": 5861 }, { "epoch": 0.20116678105696637, "grad_norm": 0.876290915558521, "learning_rate": 9.251653488133966e-06, "loss": 0.3313, "step": 5862 }, { "epoch": 0.20120109814687714, "grad_norm": 0.7958604632992411, "learning_rate": 9.251361004932604e-06, "loss": 0.3317, "step": 5863 }, { "epoch": 0.20123541523678792, "grad_norm": 0.8304451355929227, "learning_rate": 9.251068469210633e-06, "loss": 0.3878, "step": 5864 }, { "epoch": 0.2012697323266987, "grad_norm": 0.8035202793687058, "learning_rate": 9.250775880971668e-06, "loss": 0.323, "step": 5865 }, { "epoch": 0.20130404941660948, "grad_norm": 0.7403935251600731, "learning_rate": 9.250483240219324e-06, "loss": 0.2845, "step": 5866 }, { "epoch": 0.20133836650652023, "grad_norm": 0.8130693407518784, "learning_rate": 9.250190546957213e-06, "loss": 0.2962, "step": 5867 }, { "epoch": 0.201372683596431, "grad_norm": 0.8477377217453944, "learning_rate": 9.249897801188954e-06, "loss": 0.3067, "step": 5868 }, { "epoch": 0.2014070006863418, "grad_norm": 0.829703045165903, "learning_rate": 9.249605002918163e-06, "loss": 0.306, "step": 5869 }, { "epoch": 0.20144131777625257, "grad_norm": 0.7584910287082934, "learning_rate": 9.249312152148455e-06, "loss": 0.3145, "step": 5870 }, { "epoch": 0.20147563486616335, "grad_norm": 1.0203577395266024, "learning_rate": 9.249019248883451e-06, "loss": 0.3039, "step": 5871 }, { "epoch": 0.20150995195607413, "grad_norm": 0.8301346027019901, "learning_rate": 9.248726293126769e-06, "loss": 0.3067, "step": 5872 }, { "epoch": 0.2015442690459849, "grad_norm": 0.7337237745072763, "learning_rate": 9.248433284882025e-06, "loss": 0.2919, "step": 5873 }, { "epoch": 0.2015785861358957, "grad_norm": 0.7170366811550772, "learning_rate": 9.248140224152843e-06, "loss": 0.2891, "step": 5874 }, { "epoch": 0.20161290322580644, "grad_norm": 0.7942526792207968, "learning_rate": 9.24784711094284e-06, "loss": 0.335, "step": 5875 }, { "epoch": 0.20164722031571722, "grad_norm": 0.7893164656106602, "learning_rate": 9.247553945255639e-06, "loss": 0.3213, "step": 5876 }, { "epoch": 0.201681537405628, "grad_norm": 0.8275425808853809, "learning_rate": 9.247260727094862e-06, "loss": 0.4026, "step": 5877 }, { "epoch": 0.20171585449553878, "grad_norm": 0.7986887501213956, "learning_rate": 9.246967456464129e-06, "loss": 0.4035, "step": 5878 }, { "epoch": 0.20175017158544956, "grad_norm": 0.7736030152397553, "learning_rate": 9.246674133367065e-06, "loss": 0.3537, "step": 5879 }, { "epoch": 0.20178448867536034, "grad_norm": 0.738096108340393, "learning_rate": 9.246380757807293e-06, "loss": 0.2955, "step": 5880 }, { "epoch": 0.20181880576527111, "grad_norm": 0.813561317720725, "learning_rate": 9.246087329788438e-06, "loss": 0.3477, "step": 5881 }, { "epoch": 0.2018531228551819, "grad_norm": 0.8221645764882806, "learning_rate": 9.245793849314125e-06, "loss": 0.3288, "step": 5882 }, { "epoch": 0.20188743994509264, "grad_norm": 0.7748693102173441, "learning_rate": 9.245500316387978e-06, "loss": 0.3283, "step": 5883 }, { "epoch": 0.20192175703500342, "grad_norm": 0.7697042555216937, "learning_rate": 9.245206731013625e-06, "loss": 0.3415, "step": 5884 }, { "epoch": 0.2019560741249142, "grad_norm": 0.6949943224749587, "learning_rate": 9.244913093194692e-06, "loss": 0.3458, "step": 5885 }, { "epoch": 0.20199039121482498, "grad_norm": 0.8154896429100711, "learning_rate": 9.244619402934806e-06, "loss": 0.2965, "step": 5886 }, { "epoch": 0.20202470830473576, "grad_norm": 0.7934628684583684, "learning_rate": 9.244325660237599e-06, "loss": 0.3155, "step": 5887 }, { "epoch": 0.20205902539464654, "grad_norm": 0.8098240282058633, "learning_rate": 9.244031865106694e-06, "loss": 0.3517, "step": 5888 }, { "epoch": 0.20209334248455732, "grad_norm": 0.986778250558912, "learning_rate": 9.243738017545725e-06, "loss": 0.3432, "step": 5889 }, { "epoch": 0.20212765957446807, "grad_norm": 0.9539525065995252, "learning_rate": 9.24344411755832e-06, "loss": 0.3259, "step": 5890 }, { "epoch": 0.20216197666437885, "grad_norm": 0.8478250557937071, "learning_rate": 9.243150165148109e-06, "loss": 0.3609, "step": 5891 }, { "epoch": 0.20219629375428963, "grad_norm": 0.8681102112584538, "learning_rate": 9.242856160318726e-06, "loss": 0.3399, "step": 5892 }, { "epoch": 0.2022306108442004, "grad_norm": 0.8539632053544965, "learning_rate": 9.242562103073803e-06, "loss": 0.3527, "step": 5893 }, { "epoch": 0.2022649279341112, "grad_norm": 0.8219422023370037, "learning_rate": 9.242267993416969e-06, "loss": 0.3045, "step": 5894 }, { "epoch": 0.20229924502402197, "grad_norm": 0.7558544543104883, "learning_rate": 9.241973831351862e-06, "loss": 0.2865, "step": 5895 }, { "epoch": 0.20233356211393275, "grad_norm": 0.7643447084674455, "learning_rate": 9.241679616882113e-06, "loss": 0.2933, "step": 5896 }, { "epoch": 0.20236787920384353, "grad_norm": 0.7664826904034764, "learning_rate": 9.241385350011358e-06, "loss": 0.3094, "step": 5897 }, { "epoch": 0.20240219629375428, "grad_norm": 0.8400270824307353, "learning_rate": 9.241091030743231e-06, "loss": 0.337, "step": 5898 }, { "epoch": 0.20243651338366506, "grad_norm": 0.7749877826079673, "learning_rate": 9.24079665908137e-06, "loss": 0.3081, "step": 5899 }, { "epoch": 0.20247083047357584, "grad_norm": 0.832990745493857, "learning_rate": 9.24050223502941e-06, "loss": 0.3598, "step": 5900 }, { "epoch": 0.20250514756348661, "grad_norm": 0.7885473682812336, "learning_rate": 9.240207758590989e-06, "loss": 0.3195, "step": 5901 }, { "epoch": 0.2025394646533974, "grad_norm": 0.7358871507072469, "learning_rate": 9.239913229769745e-06, "loss": 0.3272, "step": 5902 }, { "epoch": 0.20257378174330817, "grad_norm": 0.8338779917910151, "learning_rate": 9.239618648569316e-06, "loss": 0.3834, "step": 5903 }, { "epoch": 0.20260809883321895, "grad_norm": 0.7446795848934818, "learning_rate": 9.23932401499334e-06, "loss": 0.3181, "step": 5904 }, { "epoch": 0.20264241592312973, "grad_norm": 0.7976363326929555, "learning_rate": 9.239029329045461e-06, "loss": 0.3171, "step": 5905 }, { "epoch": 0.20267673301304048, "grad_norm": 0.8197797721584781, "learning_rate": 9.238734590729317e-06, "loss": 0.3765, "step": 5906 }, { "epoch": 0.20271105010295126, "grad_norm": 0.8889424996638599, "learning_rate": 9.238439800048548e-06, "loss": 0.3308, "step": 5907 }, { "epoch": 0.20274536719286204, "grad_norm": 0.8247942906471211, "learning_rate": 9.238144957006797e-06, "loss": 0.3411, "step": 5908 }, { "epoch": 0.20277968428277282, "grad_norm": 0.8677440313132186, "learning_rate": 9.237850061607705e-06, "loss": 0.3391, "step": 5909 }, { "epoch": 0.2028140013726836, "grad_norm": 0.7874504511837694, "learning_rate": 9.237555113854918e-06, "loss": 0.29, "step": 5910 }, { "epoch": 0.20284831846259438, "grad_norm": 0.8423401093184169, "learning_rate": 9.237260113752077e-06, "loss": 0.286, "step": 5911 }, { "epoch": 0.20288263555250516, "grad_norm": 0.8181274183987086, "learning_rate": 9.236965061302828e-06, "loss": 0.3687, "step": 5912 }, { "epoch": 0.2029169526424159, "grad_norm": 0.8956169294589482, "learning_rate": 9.236669956510815e-06, "loss": 0.3462, "step": 5913 }, { "epoch": 0.2029512697323267, "grad_norm": 0.8150448129395629, "learning_rate": 9.236374799379684e-06, "loss": 0.3313, "step": 5914 }, { "epoch": 0.20298558682223747, "grad_norm": 0.8283140755606151, "learning_rate": 9.236079589913083e-06, "loss": 0.3061, "step": 5915 }, { "epoch": 0.20301990391214825, "grad_norm": 0.8059975866733382, "learning_rate": 9.235784328114657e-06, "loss": 0.3617, "step": 5916 }, { "epoch": 0.20305422100205903, "grad_norm": 0.8019951025105847, "learning_rate": 9.235489013988053e-06, "loss": 0.3629, "step": 5917 }, { "epoch": 0.2030885380919698, "grad_norm": 0.7871035022674241, "learning_rate": 9.23519364753692e-06, "loss": 0.3787, "step": 5918 }, { "epoch": 0.20312285518188058, "grad_norm": 0.7385095400588485, "learning_rate": 9.23489822876491e-06, "loss": 0.2858, "step": 5919 }, { "epoch": 0.20315717227179136, "grad_norm": 0.8308601620715179, "learning_rate": 9.234602757675666e-06, "loss": 0.3005, "step": 5920 }, { "epoch": 0.20319148936170212, "grad_norm": 0.8929640676205332, "learning_rate": 9.234307234272845e-06, "loss": 0.3482, "step": 5921 }, { "epoch": 0.2032258064516129, "grad_norm": 0.9823518230598935, "learning_rate": 9.234011658560094e-06, "loss": 0.3196, "step": 5922 }, { "epoch": 0.20326012354152367, "grad_norm": 0.7322697419389361, "learning_rate": 9.233716030541065e-06, "loss": 0.3591, "step": 5923 }, { "epoch": 0.20329444063143445, "grad_norm": 0.8509535405217549, "learning_rate": 9.233420350219411e-06, "loss": 0.3427, "step": 5924 }, { "epoch": 0.20332875772134523, "grad_norm": 0.7738508380123315, "learning_rate": 9.233124617598783e-06, "loss": 0.3131, "step": 5925 }, { "epoch": 0.203363074811256, "grad_norm": 0.80515621824489, "learning_rate": 9.232828832682836e-06, "loss": 0.2816, "step": 5926 }, { "epoch": 0.2033973919011668, "grad_norm": 0.8794899098553525, "learning_rate": 9.232532995475225e-06, "loss": 0.3949, "step": 5927 }, { "epoch": 0.20343170899107757, "grad_norm": 0.8230782310869289, "learning_rate": 9.232237105979603e-06, "loss": 0.2909, "step": 5928 }, { "epoch": 0.20346602608098832, "grad_norm": 0.8320457525648094, "learning_rate": 9.231941164199625e-06, "loss": 0.3208, "step": 5929 }, { "epoch": 0.2035003431708991, "grad_norm": 0.8278502389582061, "learning_rate": 9.231645170138949e-06, "loss": 0.3533, "step": 5930 }, { "epoch": 0.20353466026080988, "grad_norm": 0.810212894831109, "learning_rate": 9.23134912380123e-06, "loss": 0.2819, "step": 5931 }, { "epoch": 0.20356897735072066, "grad_norm": 0.7141362188009442, "learning_rate": 9.231053025190126e-06, "loss": 0.3892, "step": 5932 }, { "epoch": 0.20360329444063144, "grad_norm": 0.7722537082674198, "learning_rate": 9.230756874309295e-06, "loss": 0.3567, "step": 5933 }, { "epoch": 0.20363761153054222, "grad_norm": 0.793204943867182, "learning_rate": 9.230460671162395e-06, "loss": 0.3912, "step": 5934 }, { "epoch": 0.203671928620453, "grad_norm": 0.786352298550395, "learning_rate": 9.230164415753085e-06, "loss": 0.313, "step": 5935 }, { "epoch": 0.20370624571036375, "grad_norm": 0.7750366273942567, "learning_rate": 9.229868108085028e-06, "loss": 0.3458, "step": 5936 }, { "epoch": 0.20374056280027453, "grad_norm": 0.7599466853036936, "learning_rate": 9.229571748161879e-06, "loss": 0.3839, "step": 5937 }, { "epoch": 0.2037748798901853, "grad_norm": 0.7940155968393077, "learning_rate": 9.229275335987304e-06, "loss": 0.2914, "step": 5938 }, { "epoch": 0.20380919698009609, "grad_norm": 0.7954611830461957, "learning_rate": 9.228978871564962e-06, "loss": 0.3066, "step": 5939 }, { "epoch": 0.20384351407000686, "grad_norm": 0.8064593304977055, "learning_rate": 9.228682354898517e-06, "loss": 0.3409, "step": 5940 }, { "epoch": 0.20387783115991764, "grad_norm": 0.7293110231528475, "learning_rate": 9.228385785991633e-06, "loss": 0.3124, "step": 5941 }, { "epoch": 0.20391214824982842, "grad_norm": 0.694995460395242, "learning_rate": 9.22808916484797e-06, "loss": 0.3149, "step": 5942 }, { "epoch": 0.2039464653397392, "grad_norm": 0.7438508246726374, "learning_rate": 9.227792491471198e-06, "loss": 0.338, "step": 5943 }, { "epoch": 0.20398078242964995, "grad_norm": 0.7633232676878957, "learning_rate": 9.227495765864976e-06, "loss": 0.3776, "step": 5944 }, { "epoch": 0.20401509951956073, "grad_norm": 0.7417250697611446, "learning_rate": 9.227198988032975e-06, "loss": 0.343, "step": 5945 }, { "epoch": 0.2040494166094715, "grad_norm": 0.8034367897158975, "learning_rate": 9.226902157978856e-06, "loss": 0.3664, "step": 5946 }, { "epoch": 0.2040837336993823, "grad_norm": 0.7570890776907405, "learning_rate": 9.226605275706291e-06, "loss": 0.35, "step": 5947 }, { "epoch": 0.20411805078929307, "grad_norm": 0.9119978910891439, "learning_rate": 9.226308341218946e-06, "loss": 0.3863, "step": 5948 }, { "epoch": 0.20415236787920385, "grad_norm": 0.7419470210278365, "learning_rate": 9.226011354520488e-06, "loss": 0.3197, "step": 5949 }, { "epoch": 0.20418668496911463, "grad_norm": 0.8140837470023606, "learning_rate": 9.225714315614587e-06, "loss": 0.3363, "step": 5950 }, { "epoch": 0.2042210020590254, "grad_norm": 0.6727512842451786, "learning_rate": 9.225417224504912e-06, "loss": 0.2593, "step": 5951 }, { "epoch": 0.20425531914893616, "grad_norm": 0.8138802207118615, "learning_rate": 9.225120081195134e-06, "loss": 0.3218, "step": 5952 }, { "epoch": 0.20428963623884694, "grad_norm": 0.7047989943583128, "learning_rate": 9.224822885688922e-06, "loss": 0.3046, "step": 5953 }, { "epoch": 0.20432395332875772, "grad_norm": 0.8999426140008301, "learning_rate": 9.224525637989949e-06, "loss": 0.3578, "step": 5954 }, { "epoch": 0.2043582704186685, "grad_norm": 0.744013313597707, "learning_rate": 9.224228338101889e-06, "loss": 0.3427, "step": 5955 }, { "epoch": 0.20439258750857928, "grad_norm": 0.8124215981745522, "learning_rate": 9.223930986028411e-06, "loss": 0.3702, "step": 5956 }, { "epoch": 0.20442690459849006, "grad_norm": 0.7867184607309264, "learning_rate": 9.223633581773192e-06, "loss": 0.3708, "step": 5957 }, { "epoch": 0.20446122168840083, "grad_norm": 1.0231758078347535, "learning_rate": 9.223336125339904e-06, "loss": 0.4319, "step": 5958 }, { "epoch": 0.2044955387783116, "grad_norm": 0.7607383489982048, "learning_rate": 9.223038616732221e-06, "loss": 0.3024, "step": 5959 }, { "epoch": 0.20452985586822237, "grad_norm": 0.8517418875754031, "learning_rate": 9.22274105595382e-06, "loss": 0.3277, "step": 5960 }, { "epoch": 0.20456417295813314, "grad_norm": 0.8287433918563869, "learning_rate": 9.222443443008378e-06, "loss": 0.3532, "step": 5961 }, { "epoch": 0.20459849004804392, "grad_norm": 0.7578996395584335, "learning_rate": 9.222145777899567e-06, "loss": 0.3276, "step": 5962 }, { "epoch": 0.2046328071379547, "grad_norm": 0.7232062129260389, "learning_rate": 9.22184806063107e-06, "loss": 0.3276, "step": 5963 }, { "epoch": 0.20466712422786548, "grad_norm": 0.763389445616266, "learning_rate": 9.221550291206563e-06, "loss": 0.3133, "step": 5964 }, { "epoch": 0.20470144131777626, "grad_norm": 0.7904244216222973, "learning_rate": 9.22125246962972e-06, "loss": 0.2784, "step": 5965 }, { "epoch": 0.20473575840768704, "grad_norm": 0.7116124162404537, "learning_rate": 9.220954595904227e-06, "loss": 0.3214, "step": 5966 }, { "epoch": 0.2047700754975978, "grad_norm": 0.7780860062111585, "learning_rate": 9.220656670033762e-06, "loss": 0.343, "step": 5967 }, { "epoch": 0.20480439258750857, "grad_norm": 0.7697332397220615, "learning_rate": 9.220358692022004e-06, "loss": 0.3617, "step": 5968 }, { "epoch": 0.20483870967741935, "grad_norm": 0.9156151275969839, "learning_rate": 9.220060661872633e-06, "loss": 0.357, "step": 5969 }, { "epoch": 0.20487302676733013, "grad_norm": 0.7529985536529312, "learning_rate": 9.219762579589335e-06, "loss": 0.3193, "step": 5970 }, { "epoch": 0.2049073438572409, "grad_norm": 0.7509223840395911, "learning_rate": 9.219464445175789e-06, "loss": 0.2923, "step": 5971 }, { "epoch": 0.2049416609471517, "grad_norm": 0.8751797041322509, "learning_rate": 9.21916625863568e-06, "loss": 0.3231, "step": 5972 }, { "epoch": 0.20497597803706247, "grad_norm": 0.7477864216463755, "learning_rate": 9.21886801997269e-06, "loss": 0.297, "step": 5973 }, { "epoch": 0.20501029512697325, "grad_norm": 0.756449706171722, "learning_rate": 9.218569729190505e-06, "loss": 0.2739, "step": 5974 }, { "epoch": 0.205044612216884, "grad_norm": 0.7246142004971516, "learning_rate": 9.21827138629281e-06, "loss": 0.2983, "step": 5975 }, { "epoch": 0.20507892930679478, "grad_norm": 0.9296549175707977, "learning_rate": 9.217972991283289e-06, "loss": 0.339, "step": 5976 }, { "epoch": 0.20511324639670556, "grad_norm": 0.8715101504138822, "learning_rate": 9.21767454416563e-06, "loss": 0.3735, "step": 5977 }, { "epoch": 0.20514756348661634, "grad_norm": 0.7724119150802357, "learning_rate": 9.217376044943518e-06, "loss": 0.3325, "step": 5978 }, { "epoch": 0.20518188057652711, "grad_norm": 0.8548036350268593, "learning_rate": 9.217077493620641e-06, "loss": 0.3625, "step": 5979 }, { "epoch": 0.2052161976664379, "grad_norm": 0.7255660545319167, "learning_rate": 9.21677889020069e-06, "loss": 0.3197, "step": 5980 }, { "epoch": 0.20525051475634867, "grad_norm": 0.8948116182159541, "learning_rate": 9.216480234687354e-06, "loss": 0.3609, "step": 5981 }, { "epoch": 0.20528483184625942, "grad_norm": 0.828752179248975, "learning_rate": 9.216181527084318e-06, "loss": 0.4293, "step": 5982 }, { "epoch": 0.2053191489361702, "grad_norm": 0.8158039196271647, "learning_rate": 9.215882767395274e-06, "loss": 0.3111, "step": 5983 }, { "epoch": 0.20535346602608098, "grad_norm": 0.7576961127438452, "learning_rate": 9.215583955623917e-06, "loss": 0.3042, "step": 5984 }, { "epoch": 0.20538778311599176, "grad_norm": 0.8106444686847104, "learning_rate": 9.215285091773933e-06, "loss": 0.3068, "step": 5985 }, { "epoch": 0.20542210020590254, "grad_norm": 0.8887100155915516, "learning_rate": 9.214986175849016e-06, "loss": 0.3608, "step": 5986 }, { "epoch": 0.20545641729581332, "grad_norm": 0.9024537680042052, "learning_rate": 9.214687207852859e-06, "loss": 0.3194, "step": 5987 }, { "epoch": 0.2054907343857241, "grad_norm": 0.9865318800953514, "learning_rate": 9.214388187789156e-06, "loss": 0.3832, "step": 5988 }, { "epoch": 0.20552505147563488, "grad_norm": 0.7964583921690122, "learning_rate": 9.2140891156616e-06, "loss": 0.2871, "step": 5989 }, { "epoch": 0.20555936856554563, "grad_norm": 0.7892197176826939, "learning_rate": 9.213789991473887e-06, "loss": 0.3482, "step": 5990 }, { "epoch": 0.2055936856554564, "grad_norm": 0.8206250877289083, "learning_rate": 9.213490815229711e-06, "loss": 0.3281, "step": 5991 }, { "epoch": 0.2056280027453672, "grad_norm": 0.9291909024287905, "learning_rate": 9.213191586932769e-06, "loss": 0.3605, "step": 5992 }, { "epoch": 0.20566231983527797, "grad_norm": 0.8037257261990879, "learning_rate": 9.212892306586756e-06, "loss": 0.3596, "step": 5993 }, { "epoch": 0.20569663692518875, "grad_norm": 0.7317589190837962, "learning_rate": 9.212592974195371e-06, "loss": 0.3603, "step": 5994 }, { "epoch": 0.20573095401509953, "grad_norm": 0.836450148949187, "learning_rate": 9.212293589762311e-06, "loss": 0.3381, "step": 5995 }, { "epoch": 0.2057652711050103, "grad_norm": 0.8060421499386773, "learning_rate": 9.211994153291275e-06, "loss": 0.345, "step": 5996 }, { "epoch": 0.20579958819492106, "grad_norm": 0.9898633118928114, "learning_rate": 9.211694664785962e-06, "loss": 0.3094, "step": 5997 }, { "epoch": 0.20583390528483184, "grad_norm": 0.7674300480669946, "learning_rate": 9.211395124250071e-06, "loss": 0.3221, "step": 5998 }, { "epoch": 0.20586822237474262, "grad_norm": 0.6954616278576002, "learning_rate": 9.211095531687304e-06, "loss": 0.3389, "step": 5999 }, { "epoch": 0.2059025394646534, "grad_norm": 0.793788737669909, "learning_rate": 9.210795887101361e-06, "loss": 0.3234, "step": 6000 }, { "epoch": 0.20593685655456417, "grad_norm": 0.9300832233145225, "learning_rate": 9.210496190495943e-06, "loss": 0.3626, "step": 6001 }, { "epoch": 0.20597117364447495, "grad_norm": 0.7179639701694052, "learning_rate": 9.210196441874755e-06, "loss": 0.32, "step": 6002 }, { "epoch": 0.20600549073438573, "grad_norm": 0.7293987979806266, "learning_rate": 9.209896641241497e-06, "loss": 0.3251, "step": 6003 }, { "epoch": 0.2060398078242965, "grad_norm": 0.8755026764663034, "learning_rate": 9.209596788599878e-06, "loss": 0.3603, "step": 6004 }, { "epoch": 0.20607412491420726, "grad_norm": 0.8218529907854608, "learning_rate": 9.209296883953596e-06, "loss": 0.3659, "step": 6005 }, { "epoch": 0.20610844200411804, "grad_norm": 0.8082999038543061, "learning_rate": 9.208996927306358e-06, "loss": 0.3286, "step": 6006 }, { "epoch": 0.20614275909402882, "grad_norm": 0.7563265385911364, "learning_rate": 9.20869691866187e-06, "loss": 0.3325, "step": 6007 }, { "epoch": 0.2061770761839396, "grad_norm": 0.7538907878560203, "learning_rate": 9.20839685802384e-06, "loss": 0.3246, "step": 6008 }, { "epoch": 0.20621139327385038, "grad_norm": 0.7695378586275913, "learning_rate": 9.208096745395973e-06, "loss": 0.3321, "step": 6009 }, { "epoch": 0.20624571036376116, "grad_norm": 0.7894352250042195, "learning_rate": 9.207796580781977e-06, "loss": 0.2944, "step": 6010 }, { "epoch": 0.20628002745367194, "grad_norm": 0.8149814892149568, "learning_rate": 9.207496364185558e-06, "loss": 0.316, "step": 6011 }, { "epoch": 0.20631434454358272, "grad_norm": 0.8624598015345185, "learning_rate": 9.20719609561043e-06, "loss": 0.3242, "step": 6012 }, { "epoch": 0.20634866163349347, "grad_norm": 0.8128786222143247, "learning_rate": 9.206895775060296e-06, "loss": 0.3243, "step": 6013 }, { "epoch": 0.20638297872340425, "grad_norm": 0.6907537984775345, "learning_rate": 9.206595402538871e-06, "loss": 0.271, "step": 6014 }, { "epoch": 0.20641729581331503, "grad_norm": 0.842911081935703, "learning_rate": 9.206294978049866e-06, "loss": 0.3356, "step": 6015 }, { "epoch": 0.2064516129032258, "grad_norm": 0.8162873277490785, "learning_rate": 9.205994501596989e-06, "loss": 0.3334, "step": 6016 }, { "epoch": 0.20648592999313659, "grad_norm": 0.6957614238738931, "learning_rate": 9.205693973183954e-06, "loss": 0.3439, "step": 6017 }, { "epoch": 0.20652024708304736, "grad_norm": 0.8618265825205169, "learning_rate": 9.205393392814473e-06, "loss": 0.3699, "step": 6018 }, { "epoch": 0.20655456417295814, "grad_norm": 0.8555665721712634, "learning_rate": 9.20509276049226e-06, "loss": 0.3684, "step": 6019 }, { "epoch": 0.2065888812628689, "grad_norm": 0.8153189217611806, "learning_rate": 9.204792076221027e-06, "loss": 0.3412, "step": 6020 }, { "epoch": 0.20662319835277967, "grad_norm": 0.7567693668849497, "learning_rate": 9.204491340004492e-06, "loss": 0.3332, "step": 6021 }, { "epoch": 0.20665751544269045, "grad_norm": 0.8107563617145273, "learning_rate": 9.204190551846368e-06, "loss": 0.3112, "step": 6022 }, { "epoch": 0.20669183253260123, "grad_norm": 0.8620740846775072, "learning_rate": 9.203889711750371e-06, "loss": 0.3565, "step": 6023 }, { "epoch": 0.206726149622512, "grad_norm": 0.8670570671336756, "learning_rate": 9.203588819720219e-06, "loss": 0.2999, "step": 6024 }, { "epoch": 0.2067604667124228, "grad_norm": 0.8027730808063224, "learning_rate": 9.203287875759627e-06, "loss": 0.335, "step": 6025 }, { "epoch": 0.20679478380233357, "grad_norm": 0.7751556280145387, "learning_rate": 9.202986879872313e-06, "loss": 0.296, "step": 6026 }, { "epoch": 0.20682910089224435, "grad_norm": 0.8350029690595749, "learning_rate": 9.202685832061999e-06, "loss": 0.3943, "step": 6027 }, { "epoch": 0.2068634179821551, "grad_norm": 0.7725929029195777, "learning_rate": 9.202384732332399e-06, "loss": 0.348, "step": 6028 }, { "epoch": 0.20689773507206588, "grad_norm": 0.767767952120209, "learning_rate": 9.202083580687235e-06, "loss": 0.3236, "step": 6029 }, { "epoch": 0.20693205216197666, "grad_norm": 0.7426506021869733, "learning_rate": 9.20178237713023e-06, "loss": 0.3507, "step": 6030 }, { "epoch": 0.20696636925188744, "grad_norm": 0.8405572034784289, "learning_rate": 9.201481121665102e-06, "loss": 0.3747, "step": 6031 }, { "epoch": 0.20700068634179822, "grad_norm": 0.9376709552416853, "learning_rate": 9.201179814295571e-06, "loss": 0.4237, "step": 6032 }, { "epoch": 0.207035003431709, "grad_norm": 0.776485294355017, "learning_rate": 9.20087845502536e-06, "loss": 0.3481, "step": 6033 }, { "epoch": 0.20706932052161978, "grad_norm": 0.746407711563684, "learning_rate": 9.200577043858199e-06, "loss": 0.3458, "step": 6034 }, { "epoch": 0.20710363761153056, "grad_norm": 0.7870934247753304, "learning_rate": 9.200275580797802e-06, "loss": 0.3836, "step": 6035 }, { "epoch": 0.2071379547014413, "grad_norm": 0.8662837040653077, "learning_rate": 9.199974065847898e-06, "loss": 0.3574, "step": 6036 }, { "epoch": 0.2071722717913521, "grad_norm": 0.7586963101056947, "learning_rate": 9.199672499012211e-06, "loss": 0.3396, "step": 6037 }, { "epoch": 0.20720658888126287, "grad_norm": 0.7200836486558292, "learning_rate": 9.199370880294469e-06, "loss": 0.295, "step": 6038 }, { "epoch": 0.20724090597117364, "grad_norm": 0.7994298435439376, "learning_rate": 9.199069209698393e-06, "loss": 0.3646, "step": 6039 }, { "epoch": 0.20727522306108442, "grad_norm": 0.8293626335482873, "learning_rate": 9.198767487227714e-06, "loss": 0.3041, "step": 6040 }, { "epoch": 0.2073095401509952, "grad_norm": 0.7200438486416214, "learning_rate": 9.198465712886157e-06, "loss": 0.3029, "step": 6041 }, { "epoch": 0.20734385724090598, "grad_norm": 0.8498202132291183, "learning_rate": 9.198163886677452e-06, "loss": 0.3164, "step": 6042 }, { "epoch": 0.20737817433081673, "grad_norm": 0.928067360538817, "learning_rate": 9.197862008605324e-06, "loss": 0.3086, "step": 6043 }, { "epoch": 0.2074124914207275, "grad_norm": 0.7249912093442754, "learning_rate": 9.197560078673508e-06, "loss": 0.2532, "step": 6044 }, { "epoch": 0.2074468085106383, "grad_norm": 0.7712507422875408, "learning_rate": 9.197258096885732e-06, "loss": 0.3325, "step": 6045 }, { "epoch": 0.20748112560054907, "grad_norm": 0.9392804115787411, "learning_rate": 9.196956063245724e-06, "loss": 0.3509, "step": 6046 }, { "epoch": 0.20751544269045985, "grad_norm": 0.9576580432577092, "learning_rate": 9.196653977757217e-06, "loss": 0.2851, "step": 6047 }, { "epoch": 0.20754975978037063, "grad_norm": 0.7972247275980804, "learning_rate": 9.196351840423942e-06, "loss": 0.3522, "step": 6048 }, { "epoch": 0.2075840768702814, "grad_norm": 0.7884344105227692, "learning_rate": 9.196049651249634e-06, "loss": 0.3642, "step": 6049 }, { "epoch": 0.2076183939601922, "grad_norm": 0.7589816839669249, "learning_rate": 9.195747410238025e-06, "loss": 0.323, "step": 6050 }, { "epoch": 0.20765271105010294, "grad_norm": 0.841943199917009, "learning_rate": 9.195445117392847e-06, "loss": 0.3895, "step": 6051 }, { "epoch": 0.20768702814001372, "grad_norm": 0.8041388577788381, "learning_rate": 9.195142772717838e-06, "loss": 0.3584, "step": 6052 }, { "epoch": 0.2077213452299245, "grad_norm": 0.7894015107305619, "learning_rate": 9.194840376216729e-06, "loss": 0.3422, "step": 6053 }, { "epoch": 0.20775566231983528, "grad_norm": 0.851457781025252, "learning_rate": 9.19453792789326e-06, "loss": 0.3081, "step": 6054 }, { "epoch": 0.20778997940974606, "grad_norm": 0.7633922407428354, "learning_rate": 9.194235427751164e-06, "loss": 0.342, "step": 6055 }, { "epoch": 0.20782429649965684, "grad_norm": 0.6635821902498366, "learning_rate": 9.193932875794177e-06, "loss": 0.3534, "step": 6056 }, { "epoch": 0.20785861358956761, "grad_norm": 0.8486308355362945, "learning_rate": 9.193630272026042e-06, "loss": 0.3422, "step": 6057 }, { "epoch": 0.2078929306794784, "grad_norm": 0.7495019832683254, "learning_rate": 9.193327616450494e-06, "loss": 0.3121, "step": 6058 }, { "epoch": 0.20792724776938915, "grad_norm": 0.8937305110615602, "learning_rate": 9.193024909071272e-06, "loss": 0.3277, "step": 6059 }, { "epoch": 0.20796156485929992, "grad_norm": 0.7542838079467614, "learning_rate": 9.192722149892115e-06, "loss": 0.337, "step": 6060 }, { "epoch": 0.2079958819492107, "grad_norm": 0.8588680902475112, "learning_rate": 9.192419338916767e-06, "loss": 0.3844, "step": 6061 }, { "epoch": 0.20803019903912148, "grad_norm": 0.7927345065941199, "learning_rate": 9.192116476148963e-06, "loss": 0.3148, "step": 6062 }, { "epoch": 0.20806451612903226, "grad_norm": 0.7768696216281085, "learning_rate": 9.191813561592447e-06, "loss": 0.3478, "step": 6063 }, { "epoch": 0.20809883321894304, "grad_norm": 0.7860719490965569, "learning_rate": 9.191510595250963e-06, "loss": 0.3299, "step": 6064 }, { "epoch": 0.20813315030885382, "grad_norm": 0.7992511658390286, "learning_rate": 9.191207577128252e-06, "loss": 0.3094, "step": 6065 }, { "epoch": 0.20816746739876457, "grad_norm": 0.6917026757951853, "learning_rate": 9.190904507228058e-06, "loss": 0.3012, "step": 6066 }, { "epoch": 0.20820178448867535, "grad_norm": 0.8490779775692111, "learning_rate": 9.190601385554125e-06, "loss": 0.3103, "step": 6067 }, { "epoch": 0.20823610157858613, "grad_norm": 0.8763771327763805, "learning_rate": 9.190298212110198e-06, "loss": 0.3688, "step": 6068 }, { "epoch": 0.2082704186684969, "grad_norm": 0.7659305222560252, "learning_rate": 9.189994986900022e-06, "loss": 0.3102, "step": 6069 }, { "epoch": 0.2083047357584077, "grad_norm": 0.7600683806579662, "learning_rate": 9.189691709927342e-06, "loss": 0.3842, "step": 6070 }, { "epoch": 0.20833905284831847, "grad_norm": 0.866064910968009, "learning_rate": 9.189388381195907e-06, "loss": 0.3204, "step": 6071 }, { "epoch": 0.20837336993822925, "grad_norm": 0.838366478870542, "learning_rate": 9.189085000709462e-06, "loss": 0.4038, "step": 6072 }, { "epoch": 0.20840768702814003, "grad_norm": 0.7126363787409858, "learning_rate": 9.188781568471757e-06, "loss": 0.2861, "step": 6073 }, { "epoch": 0.20844200411805078, "grad_norm": 0.8157707136341625, "learning_rate": 9.188478084486537e-06, "loss": 0.346, "step": 6074 }, { "epoch": 0.20847632120796156, "grad_norm": 0.7141574017775698, "learning_rate": 9.188174548757556e-06, "loss": 0.3541, "step": 6075 }, { "epoch": 0.20851063829787234, "grad_norm": 0.7972295953609435, "learning_rate": 9.18787096128856e-06, "loss": 0.3233, "step": 6076 }, { "epoch": 0.20854495538778312, "grad_norm": 0.8950719462228282, "learning_rate": 9.1875673220833e-06, "loss": 0.3294, "step": 6077 }, { "epoch": 0.2085792724776939, "grad_norm": 0.7574468990418718, "learning_rate": 9.18726363114553e-06, "loss": 0.3309, "step": 6078 }, { "epoch": 0.20861358956760467, "grad_norm": 0.7882941990342622, "learning_rate": 9.186959888479e-06, "loss": 0.3334, "step": 6079 }, { "epoch": 0.20864790665751545, "grad_norm": 0.7885696900523995, "learning_rate": 9.18665609408746e-06, "loss": 0.2991, "step": 6080 }, { "epoch": 0.20868222374742623, "grad_norm": 0.8463390595904994, "learning_rate": 9.186352247974666e-06, "loss": 0.3448, "step": 6081 }, { "epoch": 0.20871654083733698, "grad_norm": 0.7518356368903465, "learning_rate": 9.18604835014437e-06, "loss": 0.3126, "step": 6082 }, { "epoch": 0.20875085792724776, "grad_norm": 0.7709642887981178, "learning_rate": 9.185744400600329e-06, "loss": 0.3201, "step": 6083 }, { "epoch": 0.20878517501715854, "grad_norm": 0.6654214032151283, "learning_rate": 9.185440399346295e-06, "loss": 0.3025, "step": 6084 }, { "epoch": 0.20881949210706932, "grad_norm": 0.8428191352806982, "learning_rate": 9.185136346386022e-06, "loss": 0.338, "step": 6085 }, { "epoch": 0.2088538091969801, "grad_norm": 1.086250927024614, "learning_rate": 9.184832241723274e-06, "loss": 0.3447, "step": 6086 }, { "epoch": 0.20888812628689088, "grad_norm": 0.8197650018015133, "learning_rate": 9.184528085361798e-06, "loss": 0.3331, "step": 6087 }, { "epoch": 0.20892244337680166, "grad_norm": 0.7893622367138041, "learning_rate": 9.18422387730536e-06, "loss": 0.3291, "step": 6088 }, { "epoch": 0.2089567604667124, "grad_norm": 0.7991316435992192, "learning_rate": 9.183919617557713e-06, "loss": 0.3697, "step": 6089 }, { "epoch": 0.2089910775566232, "grad_norm": 0.6987871513870736, "learning_rate": 9.183615306122616e-06, "loss": 0.3095, "step": 6090 }, { "epoch": 0.20902539464653397, "grad_norm": 0.7960785648570204, "learning_rate": 9.18331094300383e-06, "loss": 0.3647, "step": 6091 }, { "epoch": 0.20905971173644475, "grad_norm": 0.7963981775025928, "learning_rate": 9.183006528205116e-06, "loss": 0.3251, "step": 6092 }, { "epoch": 0.20909402882635553, "grad_norm": 0.7605853035880145, "learning_rate": 9.182702061730231e-06, "loss": 0.2907, "step": 6093 }, { "epoch": 0.2091283459162663, "grad_norm": 0.7929251577498838, "learning_rate": 9.18239754358294e-06, "loss": 0.3185, "step": 6094 }, { "epoch": 0.20916266300617709, "grad_norm": 0.7800436849800099, "learning_rate": 9.182092973767002e-06, "loss": 0.3043, "step": 6095 }, { "epoch": 0.20919698009608786, "grad_norm": 0.8537170879893603, "learning_rate": 9.181788352286184e-06, "loss": 0.3277, "step": 6096 }, { "epoch": 0.20923129718599862, "grad_norm": 0.8471081778170381, "learning_rate": 9.181483679144244e-06, "loss": 0.3541, "step": 6097 }, { "epoch": 0.2092656142759094, "grad_norm": 0.8105470825655364, "learning_rate": 9.18117895434495e-06, "loss": 0.3788, "step": 6098 }, { "epoch": 0.20929993136582017, "grad_norm": 0.755006207831851, "learning_rate": 9.180874177892063e-06, "loss": 0.3242, "step": 6099 }, { "epoch": 0.20933424845573095, "grad_norm": 0.7358969144478077, "learning_rate": 9.180569349789352e-06, "loss": 0.3469, "step": 6100 }, { "epoch": 0.20936856554564173, "grad_norm": 0.7751446787300761, "learning_rate": 9.180264470040579e-06, "loss": 0.2823, "step": 6101 }, { "epoch": 0.2094028826355525, "grad_norm": 0.7608610785609552, "learning_rate": 9.179959538649511e-06, "loss": 0.3306, "step": 6102 }, { "epoch": 0.2094371997254633, "grad_norm": 0.8111366519011135, "learning_rate": 9.17965455561992e-06, "loss": 0.3128, "step": 6103 }, { "epoch": 0.20947151681537404, "grad_norm": 0.7601825993066765, "learning_rate": 9.179349520955567e-06, "loss": 0.3175, "step": 6104 }, { "epoch": 0.20950583390528482, "grad_norm": 0.7707586556094664, "learning_rate": 9.179044434660225e-06, "loss": 0.3465, "step": 6105 }, { "epoch": 0.2095401509951956, "grad_norm": 0.7856199963187451, "learning_rate": 9.17873929673766e-06, "loss": 0.329, "step": 6106 }, { "epoch": 0.20957446808510638, "grad_norm": 0.9472144370101528, "learning_rate": 9.178434107191645e-06, "loss": 0.3177, "step": 6107 }, { "epoch": 0.20960878517501716, "grad_norm": 0.7105496356626204, "learning_rate": 9.178128866025946e-06, "loss": 0.3028, "step": 6108 }, { "epoch": 0.20964310226492794, "grad_norm": 0.968966236728223, "learning_rate": 9.177823573244338e-06, "loss": 0.3288, "step": 6109 }, { "epoch": 0.20967741935483872, "grad_norm": 0.837504036012917, "learning_rate": 9.177518228850588e-06, "loss": 0.3359, "step": 6110 }, { "epoch": 0.2097117364447495, "grad_norm": 0.7012513424443655, "learning_rate": 9.177212832848473e-06, "loss": 0.3398, "step": 6111 }, { "epoch": 0.20974605353466025, "grad_norm": 0.7459488482761453, "learning_rate": 9.176907385241763e-06, "loss": 0.3215, "step": 6112 }, { "epoch": 0.20978037062457103, "grad_norm": 0.747818616014636, "learning_rate": 9.176601886034231e-06, "loss": 0.3422, "step": 6113 }, { "epoch": 0.2098146877144818, "grad_norm": 0.8168226611971283, "learning_rate": 9.176296335229653e-06, "loss": 0.371, "step": 6114 }, { "epoch": 0.20984900480439259, "grad_norm": 0.7643698392528556, "learning_rate": 9.175990732831804e-06, "loss": 0.3047, "step": 6115 }, { "epoch": 0.20988332189430337, "grad_norm": 0.7921217270023329, "learning_rate": 9.175685078844456e-06, "loss": 0.3616, "step": 6116 }, { "epoch": 0.20991763898421414, "grad_norm": 0.743932063925517, "learning_rate": 9.175379373271388e-06, "loss": 0.349, "step": 6117 }, { "epoch": 0.20995195607412492, "grad_norm": 0.727544593426867, "learning_rate": 9.175073616116376e-06, "loss": 0.3164, "step": 6118 }, { "epoch": 0.2099862731640357, "grad_norm": 0.7535004344091235, "learning_rate": 9.174767807383198e-06, "loss": 0.3774, "step": 6119 }, { "epoch": 0.21002059025394645, "grad_norm": 0.7694513771376602, "learning_rate": 9.174461947075631e-06, "loss": 0.327, "step": 6120 }, { "epoch": 0.21005490734385723, "grad_norm": 0.7451472627516998, "learning_rate": 9.174156035197452e-06, "loss": 0.3437, "step": 6121 }, { "epoch": 0.210089224433768, "grad_norm": 0.7787720636269791, "learning_rate": 9.173850071752444e-06, "loss": 0.3617, "step": 6122 }, { "epoch": 0.2101235415236788, "grad_norm": 0.7528640053799818, "learning_rate": 9.173544056744384e-06, "loss": 0.3698, "step": 6123 }, { "epoch": 0.21015785861358957, "grad_norm": 0.7405852926048564, "learning_rate": 9.173237990177052e-06, "loss": 0.3259, "step": 6124 }, { "epoch": 0.21019217570350035, "grad_norm": 0.746198657964269, "learning_rate": 9.172931872054229e-06, "loss": 0.3507, "step": 6125 }, { "epoch": 0.21022649279341113, "grad_norm": 0.7856756826054041, "learning_rate": 9.1726257023797e-06, "loss": 0.3255, "step": 6126 }, { "epoch": 0.21026080988332188, "grad_norm": 0.8114244755988693, "learning_rate": 9.172319481157246e-06, "loss": 0.3231, "step": 6127 }, { "epoch": 0.21029512697323266, "grad_norm": 0.7510103865026064, "learning_rate": 9.172013208390646e-06, "loss": 0.3084, "step": 6128 }, { "epoch": 0.21032944406314344, "grad_norm": 0.757487245902033, "learning_rate": 9.171706884083692e-06, "loss": 0.3124, "step": 6129 }, { "epoch": 0.21036376115305422, "grad_norm": 0.7402066920380178, "learning_rate": 9.171400508240158e-06, "loss": 0.2727, "step": 6130 }, { "epoch": 0.210398078242965, "grad_norm": 0.8224505816930062, "learning_rate": 9.171094080863836e-06, "loss": 0.2914, "step": 6131 }, { "epoch": 0.21043239533287578, "grad_norm": 0.7658652547407925, "learning_rate": 9.17078760195851e-06, "loss": 0.3832, "step": 6132 }, { "epoch": 0.21046671242278656, "grad_norm": 0.8654375959176874, "learning_rate": 9.170481071527966e-06, "loss": 0.3247, "step": 6133 }, { "epoch": 0.21050102951269734, "grad_norm": 0.7026777665390492, "learning_rate": 9.17017448957599e-06, "loss": 0.3182, "step": 6134 }, { "epoch": 0.2105353466026081, "grad_norm": 0.9273525875076948, "learning_rate": 9.16986785610637e-06, "loss": 0.3232, "step": 6135 }, { "epoch": 0.21056966369251887, "grad_norm": 0.8104327731190822, "learning_rate": 9.169561171122896e-06, "loss": 0.4183, "step": 6136 }, { "epoch": 0.21060398078242965, "grad_norm": 0.692121185110494, "learning_rate": 9.169254434629354e-06, "loss": 0.3506, "step": 6137 }, { "epoch": 0.21063829787234042, "grad_norm": 0.7386068242338258, "learning_rate": 9.168947646629534e-06, "loss": 0.3217, "step": 6138 }, { "epoch": 0.2106726149622512, "grad_norm": 0.7493554865742172, "learning_rate": 9.168640807127224e-06, "loss": 0.2962, "step": 6139 }, { "epoch": 0.21070693205216198, "grad_norm": 0.858491930407094, "learning_rate": 9.168333916126221e-06, "loss": 0.3395, "step": 6140 }, { "epoch": 0.21074124914207276, "grad_norm": 1.0272569134844474, "learning_rate": 9.16802697363031e-06, "loss": 0.3237, "step": 6141 }, { "epoch": 0.21077556623198354, "grad_norm": 0.817812043748447, "learning_rate": 9.167719979643288e-06, "loss": 0.3042, "step": 6142 }, { "epoch": 0.2108098833218943, "grad_norm": 0.7287706510242602, "learning_rate": 9.167412934168942e-06, "loss": 0.3266, "step": 6143 }, { "epoch": 0.21084420041180507, "grad_norm": 0.8770460881914185, "learning_rate": 9.167105837211068e-06, "loss": 0.355, "step": 6144 }, { "epoch": 0.21087851750171585, "grad_norm": 0.7852913405742441, "learning_rate": 9.16679868877346e-06, "loss": 0.3535, "step": 6145 }, { "epoch": 0.21091283459162663, "grad_norm": 0.8397427876863666, "learning_rate": 9.166491488859912e-06, "loss": 0.3703, "step": 6146 }, { "epoch": 0.2109471516815374, "grad_norm": 0.7937175778867275, "learning_rate": 9.16618423747422e-06, "loss": 0.3409, "step": 6147 }, { "epoch": 0.2109814687714482, "grad_norm": 0.8162021080613954, "learning_rate": 9.16587693462018e-06, "loss": 0.3294, "step": 6148 }, { "epoch": 0.21101578586135897, "grad_norm": 0.8370771453060414, "learning_rate": 9.165569580301585e-06, "loss": 0.3141, "step": 6149 }, { "epoch": 0.21105010295126972, "grad_norm": 0.7977329206322701, "learning_rate": 9.165262174522236e-06, "loss": 0.3136, "step": 6150 }, { "epoch": 0.2110844200411805, "grad_norm": 0.9775181330275992, "learning_rate": 9.164954717285928e-06, "loss": 0.4035, "step": 6151 }, { "epoch": 0.21111873713109128, "grad_norm": 0.8370059129543492, "learning_rate": 9.164647208596462e-06, "loss": 0.3823, "step": 6152 }, { "epoch": 0.21115305422100206, "grad_norm": 0.8591588318393767, "learning_rate": 9.164339648457634e-06, "loss": 0.341, "step": 6153 }, { "epoch": 0.21118737131091284, "grad_norm": 0.8365253068979738, "learning_rate": 9.164032036873245e-06, "loss": 0.4091, "step": 6154 }, { "epoch": 0.21122168840082362, "grad_norm": 0.7997902112906587, "learning_rate": 9.163724373847096e-06, "loss": 0.3394, "step": 6155 }, { "epoch": 0.2112560054907344, "grad_norm": 0.8214788297028213, "learning_rate": 9.163416659382986e-06, "loss": 0.3567, "step": 6156 }, { "epoch": 0.21129032258064517, "grad_norm": 0.7640103325078305, "learning_rate": 9.163108893484716e-06, "loss": 0.3085, "step": 6157 }, { "epoch": 0.21132463967055592, "grad_norm": 0.7662779228212854, "learning_rate": 9.162801076156091e-06, "loss": 0.3256, "step": 6158 }, { "epoch": 0.2113589567604667, "grad_norm": 0.8025385408544441, "learning_rate": 9.162493207400911e-06, "loss": 0.3595, "step": 6159 }, { "epoch": 0.21139327385037748, "grad_norm": 0.8781031750299816, "learning_rate": 9.162185287222981e-06, "loss": 0.334, "step": 6160 }, { "epoch": 0.21142759094028826, "grad_norm": 0.8635469300887154, "learning_rate": 9.161877315626104e-06, "loss": 0.3287, "step": 6161 }, { "epoch": 0.21146190803019904, "grad_norm": 0.8410922701002965, "learning_rate": 9.161569292614085e-06, "loss": 0.3445, "step": 6162 }, { "epoch": 0.21149622512010982, "grad_norm": 0.7336416361950572, "learning_rate": 9.161261218190732e-06, "loss": 0.299, "step": 6163 }, { "epoch": 0.2115305422100206, "grad_norm": 0.8389474679149272, "learning_rate": 9.160953092359845e-06, "loss": 0.3854, "step": 6164 }, { "epoch": 0.21156485929993138, "grad_norm": 0.809534010671223, "learning_rate": 9.160644915125233e-06, "loss": 0.348, "step": 6165 }, { "epoch": 0.21159917638984213, "grad_norm": 0.7651549784058695, "learning_rate": 9.160336686490708e-06, "loss": 0.354, "step": 6166 }, { "epoch": 0.2116334934797529, "grad_norm": 0.7985090758052199, "learning_rate": 9.16002840646007e-06, "loss": 0.2865, "step": 6167 }, { "epoch": 0.2116678105696637, "grad_norm": 0.7869438529834702, "learning_rate": 9.159720075037134e-06, "loss": 0.4301, "step": 6168 }, { "epoch": 0.21170212765957447, "grad_norm": 0.7047717148980972, "learning_rate": 9.159411692225703e-06, "loss": 0.3554, "step": 6169 }, { "epoch": 0.21173644474948525, "grad_norm": 0.83007711597283, "learning_rate": 9.159103258029592e-06, "loss": 0.3629, "step": 6170 }, { "epoch": 0.21177076183939603, "grad_norm": 0.860227150210335, "learning_rate": 9.158794772452609e-06, "loss": 0.3322, "step": 6171 }, { "epoch": 0.2118050789293068, "grad_norm": 0.8931634899171965, "learning_rate": 9.158486235498565e-06, "loss": 0.3534, "step": 6172 }, { "epoch": 0.21183939601921756, "grad_norm": 0.7732141250391018, "learning_rate": 9.158177647171271e-06, "loss": 0.3271, "step": 6173 }, { "epoch": 0.21187371310912834, "grad_norm": 0.8036556070615963, "learning_rate": 9.157869007474542e-06, "loss": 0.3241, "step": 6174 }, { "epoch": 0.21190803019903912, "grad_norm": 0.8448668374589114, "learning_rate": 9.157560316412189e-06, "loss": 0.3711, "step": 6175 }, { "epoch": 0.2119423472889499, "grad_norm": 0.8222983153242287, "learning_rate": 9.157251573988025e-06, "loss": 0.2879, "step": 6176 }, { "epoch": 0.21197666437886067, "grad_norm": 0.8023694675749363, "learning_rate": 9.156942780205865e-06, "loss": 0.3166, "step": 6177 }, { "epoch": 0.21201098146877145, "grad_norm": 0.7866956043948424, "learning_rate": 9.156633935069522e-06, "loss": 0.3514, "step": 6178 }, { "epoch": 0.21204529855868223, "grad_norm": 0.7708376875423499, "learning_rate": 9.156325038582814e-06, "loss": 0.3401, "step": 6179 }, { "epoch": 0.212079615648593, "grad_norm": 0.8731590394705473, "learning_rate": 9.156016090749555e-06, "loss": 0.3147, "step": 6180 }, { "epoch": 0.21211393273850376, "grad_norm": 0.8375997405445094, "learning_rate": 9.155707091573564e-06, "loss": 0.3166, "step": 6181 }, { "epoch": 0.21214824982841454, "grad_norm": 0.7653994071243018, "learning_rate": 9.155398041058657e-06, "loss": 0.3138, "step": 6182 }, { "epoch": 0.21218256691832532, "grad_norm": 0.9485455859811006, "learning_rate": 9.15508893920865e-06, "loss": 0.3325, "step": 6183 }, { "epoch": 0.2122168840082361, "grad_norm": 0.729894271024018, "learning_rate": 9.154779786027366e-06, "loss": 0.2847, "step": 6184 }, { "epoch": 0.21225120109814688, "grad_norm": 0.8323543765838306, "learning_rate": 9.15447058151862e-06, "loss": 0.3245, "step": 6185 }, { "epoch": 0.21228551818805766, "grad_norm": 0.8658939717772834, "learning_rate": 9.154161325686234e-06, "loss": 0.4034, "step": 6186 }, { "epoch": 0.21231983527796844, "grad_norm": 0.7792734801443854, "learning_rate": 9.153852018534029e-06, "loss": 0.2892, "step": 6187 }, { "epoch": 0.21235415236787922, "grad_norm": 0.7946329009769459, "learning_rate": 9.153542660065824e-06, "loss": 0.3882, "step": 6188 }, { "epoch": 0.21238846945778997, "grad_norm": 0.700122719871284, "learning_rate": 9.153233250285443e-06, "loss": 0.2957, "step": 6189 }, { "epoch": 0.21242278654770075, "grad_norm": 0.8025332951283489, "learning_rate": 9.152923789196705e-06, "loss": 0.3915, "step": 6190 }, { "epoch": 0.21245710363761153, "grad_norm": 0.7516862887747253, "learning_rate": 9.15261427680344e-06, "loss": 0.3474, "step": 6191 }, { "epoch": 0.2124914207275223, "grad_norm": 0.7905902624970148, "learning_rate": 9.152304713109464e-06, "loss": 0.3553, "step": 6192 }, { "epoch": 0.21252573781743309, "grad_norm": 0.7885008311691364, "learning_rate": 9.151995098118605e-06, "loss": 0.3404, "step": 6193 }, { "epoch": 0.21256005490734386, "grad_norm": 0.7774447543641008, "learning_rate": 9.151685431834686e-06, "loss": 0.3098, "step": 6194 }, { "epoch": 0.21259437199725464, "grad_norm": 0.862736028191909, "learning_rate": 9.151375714261534e-06, "loss": 0.3597, "step": 6195 }, { "epoch": 0.2126286890871654, "grad_norm": 0.6932912588254856, "learning_rate": 9.151065945402977e-06, "loss": 0.2801, "step": 6196 }, { "epoch": 0.21266300617707617, "grad_norm": 0.7685216840272858, "learning_rate": 9.150756125262839e-06, "loss": 0.3119, "step": 6197 }, { "epoch": 0.21269732326698695, "grad_norm": 0.7981635294647061, "learning_rate": 9.15044625384495e-06, "loss": 0.3423, "step": 6198 }, { "epoch": 0.21273164035689773, "grad_norm": 0.6953547585018097, "learning_rate": 9.150136331153134e-06, "loss": 0.2981, "step": 6199 }, { "epoch": 0.2127659574468085, "grad_norm": 0.7413374385079955, "learning_rate": 9.149826357191223e-06, "loss": 0.3245, "step": 6200 }, { "epoch": 0.2128002745367193, "grad_norm": 0.7398470890247963, "learning_rate": 9.149516331963045e-06, "loss": 0.3647, "step": 6201 }, { "epoch": 0.21283459162663007, "grad_norm": 0.7875490857928332, "learning_rate": 9.149206255472432e-06, "loss": 0.2754, "step": 6202 }, { "epoch": 0.21286890871654085, "grad_norm": 0.8403029154129739, "learning_rate": 9.148896127723213e-06, "loss": 0.4126, "step": 6203 }, { "epoch": 0.2129032258064516, "grad_norm": 0.7661901844411623, "learning_rate": 9.148585948719217e-06, "loss": 0.3358, "step": 6204 }, { "epoch": 0.21293754289636238, "grad_norm": 0.7431382752357658, "learning_rate": 9.148275718464281e-06, "loss": 0.3589, "step": 6205 }, { "epoch": 0.21297185998627316, "grad_norm": 0.7969880578551658, "learning_rate": 9.147965436962234e-06, "loss": 0.445, "step": 6206 }, { "epoch": 0.21300617707618394, "grad_norm": 0.715265812500586, "learning_rate": 9.14765510421691e-06, "loss": 0.2709, "step": 6207 }, { "epoch": 0.21304049416609472, "grad_norm": 0.7589359170605428, "learning_rate": 9.147344720232144e-06, "loss": 0.3603, "step": 6208 }, { "epoch": 0.2130748112560055, "grad_norm": 0.864830888136238, "learning_rate": 9.147034285011769e-06, "loss": 0.3029, "step": 6209 }, { "epoch": 0.21310912834591628, "grad_norm": 0.8088553151984814, "learning_rate": 9.14672379855962e-06, "loss": 0.3234, "step": 6210 }, { "epoch": 0.21314344543582703, "grad_norm": 0.8633373200340195, "learning_rate": 9.146413260879532e-06, "loss": 0.305, "step": 6211 }, { "epoch": 0.2131777625257378, "grad_norm": 0.8086891449490765, "learning_rate": 9.146102671975344e-06, "loss": 0.3356, "step": 6212 }, { "epoch": 0.2132120796156486, "grad_norm": 0.8576178325436494, "learning_rate": 9.145792031850891e-06, "loss": 0.364, "step": 6213 }, { "epoch": 0.21324639670555937, "grad_norm": 0.7676352505304377, "learning_rate": 9.14548134051001e-06, "loss": 0.354, "step": 6214 }, { "epoch": 0.21328071379547014, "grad_norm": 0.7935749124585044, "learning_rate": 9.14517059795654e-06, "loss": 0.3225, "step": 6215 }, { "epoch": 0.21331503088538092, "grad_norm": 0.8030926906422179, "learning_rate": 9.144859804194323e-06, "loss": 0.3234, "step": 6216 }, { "epoch": 0.2133493479752917, "grad_norm": 0.7731071682149316, "learning_rate": 9.144548959227193e-06, "loss": 0.288, "step": 6217 }, { "epoch": 0.21338366506520248, "grad_norm": 0.7594474192456012, "learning_rate": 9.144238063058993e-06, "loss": 0.3381, "step": 6218 }, { "epoch": 0.21341798215511323, "grad_norm": 0.8112996345965264, "learning_rate": 9.143927115693565e-06, "loss": 0.3581, "step": 6219 }, { "epoch": 0.213452299245024, "grad_norm": 0.8149804428156158, "learning_rate": 9.143616117134747e-06, "loss": 0.3035, "step": 6220 }, { "epoch": 0.2134866163349348, "grad_norm": 0.8110671010023923, "learning_rate": 9.143305067386383e-06, "loss": 0.3611, "step": 6221 }, { "epoch": 0.21352093342484557, "grad_norm": 0.8156930819936318, "learning_rate": 9.142993966452315e-06, "loss": 0.3647, "step": 6222 }, { "epoch": 0.21355525051475635, "grad_norm": 0.917177356220547, "learning_rate": 9.142682814336388e-06, "loss": 0.3482, "step": 6223 }, { "epoch": 0.21358956760466713, "grad_norm": 0.8226364936664897, "learning_rate": 9.142371611042446e-06, "loss": 0.3348, "step": 6224 }, { "epoch": 0.2136238846945779, "grad_norm": 0.8125590063798093, "learning_rate": 9.14206035657433e-06, "loss": 0.3048, "step": 6225 }, { "epoch": 0.2136582017844887, "grad_norm": 0.753152562832452, "learning_rate": 9.14174905093589e-06, "loss": 0.2783, "step": 6226 }, { "epoch": 0.21369251887439944, "grad_norm": 0.9730594538643632, "learning_rate": 9.141437694130966e-06, "loss": 0.3116, "step": 6227 }, { "epoch": 0.21372683596431022, "grad_norm": 0.7611039227342877, "learning_rate": 9.141126286163409e-06, "loss": 0.3704, "step": 6228 }, { "epoch": 0.213761153054221, "grad_norm": 0.8447638704749365, "learning_rate": 9.140814827037065e-06, "loss": 0.361, "step": 6229 }, { "epoch": 0.21379547014413178, "grad_norm": 0.7778627664031409, "learning_rate": 9.140503316755782e-06, "loss": 0.3423, "step": 6230 }, { "epoch": 0.21382978723404256, "grad_norm": 0.8042081233750875, "learning_rate": 9.140191755323407e-06, "loss": 0.3055, "step": 6231 }, { "epoch": 0.21386410432395334, "grad_norm": 0.7155241403078176, "learning_rate": 9.13988014274379e-06, "loss": 0.3816, "step": 6232 }, { "epoch": 0.21389842141386411, "grad_norm": 0.7915111164014369, "learning_rate": 9.139568479020781e-06, "loss": 0.3373, "step": 6233 }, { "epoch": 0.21393273850377487, "grad_norm": 0.8228167079046294, "learning_rate": 9.139256764158231e-06, "loss": 0.3408, "step": 6234 }, { "epoch": 0.21396705559368565, "grad_norm": 0.8452268354179587, "learning_rate": 9.138944998159988e-06, "loss": 0.3675, "step": 6235 }, { "epoch": 0.21400137268359642, "grad_norm": 0.7519796281460783, "learning_rate": 9.138633181029904e-06, "loss": 0.2923, "step": 6236 }, { "epoch": 0.2140356897735072, "grad_norm": 0.8568992219291476, "learning_rate": 9.138321312771835e-06, "loss": 0.3381, "step": 6237 }, { "epoch": 0.21407000686341798, "grad_norm": 0.7682709053074535, "learning_rate": 9.138009393389628e-06, "loss": 0.357, "step": 6238 }, { "epoch": 0.21410432395332876, "grad_norm": 0.8179587159054676, "learning_rate": 9.137697422887142e-06, "loss": 0.4009, "step": 6239 }, { "epoch": 0.21413864104323954, "grad_norm": 0.6987421292096938, "learning_rate": 9.137385401268226e-06, "loss": 0.3144, "step": 6240 }, { "epoch": 0.21417295813315032, "grad_norm": 0.7955265432299771, "learning_rate": 9.137073328536738e-06, "loss": 0.3363, "step": 6241 }, { "epoch": 0.21420727522306107, "grad_norm": 0.7904146535154647, "learning_rate": 9.136761204696534e-06, "loss": 0.3341, "step": 6242 }, { "epoch": 0.21424159231297185, "grad_norm": 0.80267065143971, "learning_rate": 9.136449029751467e-06, "loss": 0.3964, "step": 6243 }, { "epoch": 0.21427590940288263, "grad_norm": 0.762977599976564, "learning_rate": 9.136136803705395e-06, "loss": 0.3151, "step": 6244 }, { "epoch": 0.2143102264927934, "grad_norm": 0.85793368427765, "learning_rate": 9.135824526562174e-06, "loss": 0.3436, "step": 6245 }, { "epoch": 0.2143445435827042, "grad_norm": 0.8274606869471453, "learning_rate": 9.135512198325665e-06, "loss": 0.3089, "step": 6246 }, { "epoch": 0.21437886067261497, "grad_norm": 0.8579373490372398, "learning_rate": 9.135199818999722e-06, "loss": 0.3278, "step": 6247 }, { "epoch": 0.21441317776252575, "grad_norm": 0.852784070998739, "learning_rate": 9.134887388588207e-06, "loss": 0.3487, "step": 6248 }, { "epoch": 0.21444749485243653, "grad_norm": 0.8165120611874758, "learning_rate": 9.13457490709498e-06, "loss": 0.2901, "step": 6249 }, { "epoch": 0.21448181194234728, "grad_norm": 0.8646082062649036, "learning_rate": 9.134262374523901e-06, "loss": 0.3707, "step": 6250 }, { "epoch": 0.21451612903225806, "grad_norm": 0.8092627286298899, "learning_rate": 9.133949790878828e-06, "loss": 0.3272, "step": 6251 }, { "epoch": 0.21455044612216884, "grad_norm": 0.931101493649595, "learning_rate": 9.133637156163629e-06, "loss": 0.3909, "step": 6252 }, { "epoch": 0.21458476321207962, "grad_norm": 0.7785392808003321, "learning_rate": 9.13332447038216e-06, "loss": 0.3433, "step": 6253 }, { "epoch": 0.2146190803019904, "grad_norm": 0.8340384724067191, "learning_rate": 9.133011733538287e-06, "loss": 0.3504, "step": 6254 }, { "epoch": 0.21465339739190117, "grad_norm": 0.9052075963529037, "learning_rate": 9.132698945635869e-06, "loss": 0.3363, "step": 6255 }, { "epoch": 0.21468771448181195, "grad_norm": 0.8176721197315767, "learning_rate": 9.132386106678778e-06, "loss": 0.3097, "step": 6256 }, { "epoch": 0.2147220315717227, "grad_norm": 0.8514009594768842, "learning_rate": 9.132073216670873e-06, "loss": 0.3135, "step": 6257 }, { "epoch": 0.21475634866163348, "grad_norm": 0.9774506798341015, "learning_rate": 9.13176027561602e-06, "loss": 0.3294, "step": 6258 }, { "epoch": 0.21479066575154426, "grad_norm": 0.8033342197390343, "learning_rate": 9.131447283518088e-06, "loss": 0.3546, "step": 6259 }, { "epoch": 0.21482498284145504, "grad_norm": 1.0583956970123876, "learning_rate": 9.13113424038094e-06, "loss": 0.3355, "step": 6260 }, { "epoch": 0.21485929993136582, "grad_norm": 0.7259232295987011, "learning_rate": 9.130821146208446e-06, "loss": 0.313, "step": 6261 }, { "epoch": 0.2148936170212766, "grad_norm": 0.8132465851298912, "learning_rate": 9.130508001004473e-06, "loss": 0.4487, "step": 6262 }, { "epoch": 0.21492793411118738, "grad_norm": 0.818853506767345, "learning_rate": 9.130194804772888e-06, "loss": 0.3482, "step": 6263 }, { "epoch": 0.21496225120109816, "grad_norm": 0.7839760620259467, "learning_rate": 9.129881557517562e-06, "loss": 0.2749, "step": 6264 }, { "epoch": 0.2149965682910089, "grad_norm": 0.7960636680770111, "learning_rate": 9.129568259242366e-06, "loss": 0.3157, "step": 6265 }, { "epoch": 0.2150308853809197, "grad_norm": 0.8086815673092679, "learning_rate": 9.129254909951166e-06, "loss": 0.3209, "step": 6266 }, { "epoch": 0.21506520247083047, "grad_norm": 0.8091764471417574, "learning_rate": 9.128941509647838e-06, "loss": 0.3897, "step": 6267 }, { "epoch": 0.21509951956074125, "grad_norm": 1.0582823400771841, "learning_rate": 9.12862805833625e-06, "loss": 0.3321, "step": 6268 }, { "epoch": 0.21513383665065203, "grad_norm": 0.7931403862902273, "learning_rate": 9.128314556020277e-06, "loss": 0.3038, "step": 6269 }, { "epoch": 0.2151681537405628, "grad_norm": 0.7778543009981654, "learning_rate": 9.128001002703791e-06, "loss": 0.317, "step": 6270 }, { "epoch": 0.21520247083047359, "grad_norm": 0.7473598310299929, "learning_rate": 9.127687398390663e-06, "loss": 0.3195, "step": 6271 }, { "epoch": 0.21523678792038436, "grad_norm": 0.7206154804110788, "learning_rate": 9.12737374308477e-06, "loss": 0.3036, "step": 6272 }, { "epoch": 0.21527110501029512, "grad_norm": 0.7734712400134566, "learning_rate": 9.12706003678999e-06, "loss": 0.3352, "step": 6273 }, { "epoch": 0.2153054221002059, "grad_norm": 0.8546233359238192, "learning_rate": 9.126746279510193e-06, "loss": 0.3429, "step": 6274 }, { "epoch": 0.21533973919011667, "grad_norm": 0.7124819798634612, "learning_rate": 9.126432471249256e-06, "loss": 0.3037, "step": 6275 }, { "epoch": 0.21537405628002745, "grad_norm": 0.6989247988040069, "learning_rate": 9.126118612011057e-06, "loss": 0.3245, "step": 6276 }, { "epoch": 0.21540837336993823, "grad_norm": 0.7520628124899489, "learning_rate": 9.125804701799475e-06, "loss": 0.3086, "step": 6277 }, { "epoch": 0.215442690459849, "grad_norm": 0.7810069646843407, "learning_rate": 9.125490740618384e-06, "loss": 0.2945, "step": 6278 }, { "epoch": 0.2154770075497598, "grad_norm": 0.7491534665802752, "learning_rate": 9.125176728471665e-06, "loss": 0.3161, "step": 6279 }, { "epoch": 0.21551132463967054, "grad_norm": 0.666943047877125, "learning_rate": 9.124862665363198e-06, "loss": 0.2729, "step": 6280 }, { "epoch": 0.21554564172958132, "grad_norm": 0.8024861747147354, "learning_rate": 9.12454855129686e-06, "loss": 0.343, "step": 6281 }, { "epoch": 0.2155799588194921, "grad_norm": 0.7978572983732772, "learning_rate": 9.124234386276536e-06, "loss": 0.3028, "step": 6282 }, { "epoch": 0.21561427590940288, "grad_norm": 1.3060247618857808, "learning_rate": 9.123920170306103e-06, "loss": 0.365, "step": 6283 }, { "epoch": 0.21564859299931366, "grad_norm": 0.7825237932198313, "learning_rate": 9.123605903389445e-06, "loss": 0.3458, "step": 6284 }, { "epoch": 0.21568291008922444, "grad_norm": 0.7612764826845423, "learning_rate": 9.123291585530442e-06, "loss": 0.3259, "step": 6285 }, { "epoch": 0.21571722717913522, "grad_norm": 0.7847565480946036, "learning_rate": 9.122977216732983e-06, "loss": 0.3359, "step": 6286 }, { "epoch": 0.215751544269046, "grad_norm": 0.8311155683756603, "learning_rate": 9.122662797000943e-06, "loss": 0.3611, "step": 6287 }, { "epoch": 0.21578586135895675, "grad_norm": 0.7543897433433787, "learning_rate": 9.122348326338214e-06, "loss": 0.3797, "step": 6288 }, { "epoch": 0.21582017844886753, "grad_norm": 0.690248231797033, "learning_rate": 9.122033804748675e-06, "loss": 0.3038, "step": 6289 }, { "epoch": 0.2158544955387783, "grad_norm": 0.7650186781753375, "learning_rate": 9.121719232236215e-06, "loss": 0.3344, "step": 6290 }, { "epoch": 0.2158888126286891, "grad_norm": 0.855392801697274, "learning_rate": 9.121404608804719e-06, "loss": 0.3434, "step": 6291 }, { "epoch": 0.21592312971859987, "grad_norm": 0.9057997348941035, "learning_rate": 9.121089934458074e-06, "loss": 0.3414, "step": 6292 }, { "epoch": 0.21595744680851064, "grad_norm": 0.7608947462083647, "learning_rate": 9.12077520920017e-06, "loss": 0.2648, "step": 6293 }, { "epoch": 0.21599176389842142, "grad_norm": 0.8013359202659763, "learning_rate": 9.12046043303489e-06, "loss": 0.3331, "step": 6294 }, { "epoch": 0.2160260809883322, "grad_norm": 0.8415482355501664, "learning_rate": 9.120145605966126e-06, "loss": 0.3369, "step": 6295 }, { "epoch": 0.21606039807824295, "grad_norm": 0.8173239265774075, "learning_rate": 9.119830727997766e-06, "loss": 0.3567, "step": 6296 }, { "epoch": 0.21609471516815373, "grad_norm": 0.7568223463999005, "learning_rate": 9.119515799133702e-06, "loss": 0.3537, "step": 6297 }, { "epoch": 0.2161290322580645, "grad_norm": 0.845589338251542, "learning_rate": 9.119200819377822e-06, "loss": 0.3385, "step": 6298 }, { "epoch": 0.2161633493479753, "grad_norm": 0.7829752399141577, "learning_rate": 9.11888578873402e-06, "loss": 0.3039, "step": 6299 }, { "epoch": 0.21619766643788607, "grad_norm": 0.895319453977994, "learning_rate": 9.118570707206184e-06, "loss": 0.3829, "step": 6300 }, { "epoch": 0.21623198352779685, "grad_norm": 0.7530536162079032, "learning_rate": 9.118255574798212e-06, "loss": 0.3734, "step": 6301 }, { "epoch": 0.21626630061770763, "grad_norm": 0.8359558698758339, "learning_rate": 9.11794039151399e-06, "loss": 0.3655, "step": 6302 }, { "epoch": 0.21630061770761838, "grad_norm": 0.8191705908767158, "learning_rate": 9.117625157357418e-06, "loss": 0.3409, "step": 6303 }, { "epoch": 0.21633493479752916, "grad_norm": 0.8015854507348855, "learning_rate": 9.117309872332388e-06, "loss": 0.3703, "step": 6304 }, { "epoch": 0.21636925188743994, "grad_norm": 0.7954129513317468, "learning_rate": 9.116994536442794e-06, "loss": 0.3647, "step": 6305 }, { "epoch": 0.21640356897735072, "grad_norm": 0.7024270253631765, "learning_rate": 9.11667914969253e-06, "loss": 0.362, "step": 6306 }, { "epoch": 0.2164378860672615, "grad_norm": 0.7949684425214515, "learning_rate": 9.116363712085498e-06, "loss": 0.3441, "step": 6307 }, { "epoch": 0.21647220315717228, "grad_norm": 0.7855549088180499, "learning_rate": 9.11604822362559e-06, "loss": 0.3391, "step": 6308 }, { "epoch": 0.21650652024708306, "grad_norm": 0.8426062745499452, "learning_rate": 9.115732684316708e-06, "loss": 0.3315, "step": 6309 }, { "epoch": 0.21654083733699384, "grad_norm": 0.8196684772103446, "learning_rate": 9.115417094162744e-06, "loss": 0.3433, "step": 6310 }, { "epoch": 0.2165751544269046, "grad_norm": 0.7264885396021151, "learning_rate": 9.1151014531676e-06, "loss": 0.3103, "step": 6311 }, { "epoch": 0.21660947151681537, "grad_norm": 0.86797496517197, "learning_rate": 9.114785761335177e-06, "loss": 0.3439, "step": 6312 }, { "epoch": 0.21664378860672615, "grad_norm": 0.7959659614108284, "learning_rate": 9.114470018669372e-06, "loss": 0.2987, "step": 6313 }, { "epoch": 0.21667810569663692, "grad_norm": 0.7978754570253623, "learning_rate": 9.114154225174087e-06, "loss": 0.3178, "step": 6314 }, { "epoch": 0.2167124227865477, "grad_norm": 0.8352077321562593, "learning_rate": 9.113838380853225e-06, "loss": 0.3759, "step": 6315 }, { "epoch": 0.21674673987645848, "grad_norm": 0.7831601971067659, "learning_rate": 9.113522485710683e-06, "loss": 0.3523, "step": 6316 }, { "epoch": 0.21678105696636926, "grad_norm": 0.7312586322048179, "learning_rate": 9.113206539750367e-06, "loss": 0.2719, "step": 6317 }, { "epoch": 0.21681537405628, "grad_norm": 0.6741606841027122, "learning_rate": 9.112890542976183e-06, "loss": 0.3019, "step": 6318 }, { "epoch": 0.2168496911461908, "grad_norm": 0.7463765056369536, "learning_rate": 9.112574495392027e-06, "loss": 0.3358, "step": 6319 }, { "epoch": 0.21688400823610157, "grad_norm": 0.7686806527997091, "learning_rate": 9.112258397001812e-06, "loss": 0.3236, "step": 6320 }, { "epoch": 0.21691832532601235, "grad_norm": 0.8027575437404875, "learning_rate": 9.111942247809438e-06, "loss": 0.3362, "step": 6321 }, { "epoch": 0.21695264241592313, "grad_norm": 0.8442950086901293, "learning_rate": 9.11162604781881e-06, "loss": 0.3132, "step": 6322 }, { "epoch": 0.2169869595058339, "grad_norm": 0.8186584472842655, "learning_rate": 9.111309797033836e-06, "loss": 0.2909, "step": 6323 }, { "epoch": 0.2170212765957447, "grad_norm": 0.7787729948365644, "learning_rate": 9.110993495458425e-06, "loss": 0.2869, "step": 6324 }, { "epoch": 0.21705559368565547, "grad_norm": 0.7270822627862252, "learning_rate": 9.11067714309648e-06, "loss": 0.3144, "step": 6325 }, { "epoch": 0.21708991077556622, "grad_norm": 0.8004245207091766, "learning_rate": 9.110360739951912e-06, "loss": 0.3265, "step": 6326 }, { "epoch": 0.217124227865477, "grad_norm": 0.7925163677468249, "learning_rate": 9.11004428602863e-06, "loss": 0.283, "step": 6327 }, { "epoch": 0.21715854495538778, "grad_norm": 0.7795223261397208, "learning_rate": 9.109727781330542e-06, "loss": 0.3506, "step": 6328 }, { "epoch": 0.21719286204529856, "grad_norm": 0.7558558397703768, "learning_rate": 9.10941122586156e-06, "loss": 0.3539, "step": 6329 }, { "epoch": 0.21722717913520934, "grad_norm": 0.8309672641067516, "learning_rate": 9.109094619625591e-06, "loss": 0.3288, "step": 6330 }, { "epoch": 0.21726149622512012, "grad_norm": 0.7985419830760888, "learning_rate": 9.10877796262655e-06, "loss": 0.3269, "step": 6331 }, { "epoch": 0.2172958133150309, "grad_norm": 0.6962188229819788, "learning_rate": 9.108461254868349e-06, "loss": 0.2817, "step": 6332 }, { "epoch": 0.21733013040494167, "grad_norm": 0.8843345652426238, "learning_rate": 9.108144496354897e-06, "loss": 0.3376, "step": 6333 }, { "epoch": 0.21736444749485243, "grad_norm": 0.9014095069143456, "learning_rate": 9.107827687090111e-06, "loss": 0.3674, "step": 6334 }, { "epoch": 0.2173987645847632, "grad_norm": 0.7664263849595718, "learning_rate": 9.1075108270779e-06, "loss": 0.3545, "step": 6335 }, { "epoch": 0.21743308167467398, "grad_norm": 0.8011602219425779, "learning_rate": 9.107193916322186e-06, "loss": 0.3626, "step": 6336 }, { "epoch": 0.21746739876458476, "grad_norm": 0.7163639462818472, "learning_rate": 9.106876954826877e-06, "loss": 0.297, "step": 6337 }, { "epoch": 0.21750171585449554, "grad_norm": 0.8360423794639772, "learning_rate": 9.106559942595894e-06, "loss": 0.3163, "step": 6338 }, { "epoch": 0.21753603294440632, "grad_norm": 0.7196948525610721, "learning_rate": 9.106242879633147e-06, "loss": 0.3403, "step": 6339 }, { "epoch": 0.2175703500343171, "grad_norm": 0.8005788337591173, "learning_rate": 9.105925765942558e-06, "loss": 0.3749, "step": 6340 }, { "epoch": 0.21760466712422785, "grad_norm": 0.7787013538958483, "learning_rate": 9.105608601528044e-06, "loss": 0.4252, "step": 6341 }, { "epoch": 0.21763898421413863, "grad_norm": 0.7196142774551937, "learning_rate": 9.105291386393523e-06, "loss": 0.2767, "step": 6342 }, { "epoch": 0.2176733013040494, "grad_norm": 0.8507432066925747, "learning_rate": 9.104974120542913e-06, "loss": 0.3773, "step": 6343 }, { "epoch": 0.2177076183939602, "grad_norm": 0.753389789338039, "learning_rate": 9.104656803980132e-06, "loss": 0.2644, "step": 6344 }, { "epoch": 0.21774193548387097, "grad_norm": 0.8442712481877942, "learning_rate": 9.104339436709104e-06, "loss": 0.3402, "step": 6345 }, { "epoch": 0.21777625257378175, "grad_norm": 0.8765773671479268, "learning_rate": 9.104022018733746e-06, "loss": 0.4055, "step": 6346 }, { "epoch": 0.21781056966369253, "grad_norm": 0.7037459162516108, "learning_rate": 9.103704550057981e-06, "loss": 0.4002, "step": 6347 }, { "epoch": 0.2178448867536033, "grad_norm": 0.8356892142161395, "learning_rate": 9.103387030685731e-06, "loss": 0.3168, "step": 6348 }, { "epoch": 0.21787920384351406, "grad_norm": 0.7929236616801226, "learning_rate": 9.103069460620917e-06, "loss": 0.3185, "step": 6349 }, { "epoch": 0.21791352093342484, "grad_norm": 0.8756530542634968, "learning_rate": 9.102751839867465e-06, "loss": 0.3285, "step": 6350 }, { "epoch": 0.21794783802333562, "grad_norm": 0.7446807388611604, "learning_rate": 9.102434168429297e-06, "loss": 0.3132, "step": 6351 }, { "epoch": 0.2179821551132464, "grad_norm": 0.8015069159573928, "learning_rate": 9.102116446310338e-06, "loss": 0.3375, "step": 6352 }, { "epoch": 0.21801647220315717, "grad_norm": 0.7442347669405007, "learning_rate": 9.101798673514512e-06, "loss": 0.2942, "step": 6353 }, { "epoch": 0.21805078929306795, "grad_norm": 0.7826090917396058, "learning_rate": 9.101480850045748e-06, "loss": 0.3475, "step": 6354 }, { "epoch": 0.21808510638297873, "grad_norm": 0.7629067373002283, "learning_rate": 9.101162975907966e-06, "loss": 0.3038, "step": 6355 }, { "epoch": 0.2181194234728895, "grad_norm": 0.731465030602298, "learning_rate": 9.1008450511051e-06, "loss": 0.3294, "step": 6356 }, { "epoch": 0.21815374056280026, "grad_norm": 0.7266689253158592, "learning_rate": 9.100527075641075e-06, "loss": 0.3292, "step": 6357 }, { "epoch": 0.21818805765271104, "grad_norm": 0.8461382647098661, "learning_rate": 9.100209049519817e-06, "loss": 0.3438, "step": 6358 }, { "epoch": 0.21822237474262182, "grad_norm": 0.8675105373282476, "learning_rate": 9.099890972745257e-06, "loss": 0.3728, "step": 6359 }, { "epoch": 0.2182566918325326, "grad_norm": 0.7520778481821677, "learning_rate": 9.099572845321324e-06, "loss": 0.3044, "step": 6360 }, { "epoch": 0.21829100892244338, "grad_norm": 0.6820046819449653, "learning_rate": 9.09925466725195e-06, "loss": 0.2821, "step": 6361 }, { "epoch": 0.21832532601235416, "grad_norm": 0.8382990117235342, "learning_rate": 9.09893643854106e-06, "loss": 0.3188, "step": 6362 }, { "epoch": 0.21835964310226494, "grad_norm": 0.698325747606041, "learning_rate": 9.098618159192592e-06, "loss": 0.3357, "step": 6363 }, { "epoch": 0.2183939601921757, "grad_norm": 0.8373037631638252, "learning_rate": 9.098299829210472e-06, "loss": 0.334, "step": 6364 }, { "epoch": 0.21842827728208647, "grad_norm": 0.8342558749795893, "learning_rate": 9.09798144859864e-06, "loss": 0.3448, "step": 6365 }, { "epoch": 0.21846259437199725, "grad_norm": 0.8334634561973601, "learning_rate": 9.097663017361023e-06, "loss": 0.3743, "step": 6366 }, { "epoch": 0.21849691146190803, "grad_norm": 0.8366144227800411, "learning_rate": 9.097344535501558e-06, "loss": 0.3672, "step": 6367 }, { "epoch": 0.2185312285518188, "grad_norm": 0.7889631745241483, "learning_rate": 9.097026003024176e-06, "loss": 0.3104, "step": 6368 }, { "epoch": 0.21856554564172959, "grad_norm": 0.8143894318575542, "learning_rate": 9.096707419932816e-06, "loss": 0.3312, "step": 6369 }, { "epoch": 0.21859986273164037, "grad_norm": 0.8206368557511715, "learning_rate": 9.096388786231412e-06, "loss": 0.3238, "step": 6370 }, { "epoch": 0.21863417982155114, "grad_norm": 0.8070649404114801, "learning_rate": 9.0960701019239e-06, "loss": 0.3382, "step": 6371 }, { "epoch": 0.2186684969114619, "grad_norm": 0.7753293078128406, "learning_rate": 9.095751367014219e-06, "loss": 0.3065, "step": 6372 }, { "epoch": 0.21870281400137268, "grad_norm": 0.8951895505543095, "learning_rate": 9.095432581506303e-06, "loss": 0.3684, "step": 6373 }, { "epoch": 0.21873713109128345, "grad_norm": 0.9093999266218691, "learning_rate": 9.095113745404092e-06, "loss": 0.3117, "step": 6374 }, { "epoch": 0.21877144818119423, "grad_norm": 0.7497046999420189, "learning_rate": 9.094794858711526e-06, "loss": 0.3527, "step": 6375 }, { "epoch": 0.218805765271105, "grad_norm": 0.711304631811939, "learning_rate": 9.094475921432544e-06, "loss": 0.3327, "step": 6376 }, { "epoch": 0.2188400823610158, "grad_norm": 0.7979060061917348, "learning_rate": 9.094156933571086e-06, "loss": 0.3262, "step": 6377 }, { "epoch": 0.21887439945092657, "grad_norm": 0.8355634488651548, "learning_rate": 9.093837895131092e-06, "loss": 0.3201, "step": 6378 }, { "epoch": 0.21890871654083735, "grad_norm": 0.8352944671710694, "learning_rate": 9.093518806116504e-06, "loss": 0.3473, "step": 6379 }, { "epoch": 0.2189430336307481, "grad_norm": 0.7946994955439016, "learning_rate": 9.093199666531263e-06, "loss": 0.2978, "step": 6380 }, { "epoch": 0.21897735072065888, "grad_norm": 0.7095646071405942, "learning_rate": 9.092880476379313e-06, "loss": 0.3267, "step": 6381 }, { "epoch": 0.21901166781056966, "grad_norm": 0.7831364508649629, "learning_rate": 9.092561235664597e-06, "loss": 0.2865, "step": 6382 }, { "epoch": 0.21904598490048044, "grad_norm": 0.7558701896670301, "learning_rate": 9.092241944391056e-06, "loss": 0.3408, "step": 6383 }, { "epoch": 0.21908030199039122, "grad_norm": 1.0364212280957639, "learning_rate": 9.091922602562639e-06, "loss": 0.4123, "step": 6384 }, { "epoch": 0.219114619080302, "grad_norm": 0.7914689210339287, "learning_rate": 9.091603210183289e-06, "loss": 0.2976, "step": 6385 }, { "epoch": 0.21914893617021278, "grad_norm": 0.772093284672834, "learning_rate": 9.09128376725695e-06, "loss": 0.3402, "step": 6386 }, { "epoch": 0.21918325326012353, "grad_norm": 1.1765828917776928, "learning_rate": 9.090964273787572e-06, "loss": 0.3065, "step": 6387 }, { "epoch": 0.2192175703500343, "grad_norm": 0.8058463040639253, "learning_rate": 9.090644729779099e-06, "loss": 0.3305, "step": 6388 }, { "epoch": 0.2192518874399451, "grad_norm": 0.7930826307089781, "learning_rate": 9.09032513523548e-06, "loss": 0.3714, "step": 6389 }, { "epoch": 0.21928620452985587, "grad_norm": 0.7786256307147637, "learning_rate": 9.090005490160663e-06, "loss": 0.2892, "step": 6390 }, { "epoch": 0.21932052161976665, "grad_norm": 0.7732315292332663, "learning_rate": 9.089685794558597e-06, "loss": 0.3396, "step": 6391 }, { "epoch": 0.21935483870967742, "grad_norm": 0.7713628965552627, "learning_rate": 9.089366048433229e-06, "loss": 0.344, "step": 6392 }, { "epoch": 0.2193891557995882, "grad_norm": 0.7574818999453482, "learning_rate": 9.089046251788512e-06, "loss": 0.3463, "step": 6393 }, { "epoch": 0.21942347288949898, "grad_norm": 0.8456326347838403, "learning_rate": 9.088726404628397e-06, "loss": 0.3942, "step": 6394 }, { "epoch": 0.21945778997940973, "grad_norm": 0.8311659256924407, "learning_rate": 9.088406506956834e-06, "loss": 0.3368, "step": 6395 }, { "epoch": 0.2194921070693205, "grad_norm": 0.7724695234308477, "learning_rate": 9.088086558777774e-06, "loss": 0.3587, "step": 6396 }, { "epoch": 0.2195264241592313, "grad_norm": 0.8600379098142213, "learning_rate": 9.087766560095173e-06, "loss": 0.356, "step": 6397 }, { "epoch": 0.21956074124914207, "grad_norm": 0.7534773092192851, "learning_rate": 9.08744651091298e-06, "loss": 0.2918, "step": 6398 }, { "epoch": 0.21959505833905285, "grad_norm": 0.7560743339615463, "learning_rate": 9.087126411235152e-06, "loss": 0.3449, "step": 6399 }, { "epoch": 0.21962937542896363, "grad_norm": 0.7954282356345637, "learning_rate": 9.086806261065641e-06, "loss": 0.4018, "step": 6400 }, { "epoch": 0.2196636925188744, "grad_norm": 0.7653383572589095, "learning_rate": 9.086486060408406e-06, "loss": 0.3033, "step": 6401 }, { "epoch": 0.2196980096087852, "grad_norm": 0.7737540000347006, "learning_rate": 9.086165809267398e-06, "loss": 0.3036, "step": 6402 }, { "epoch": 0.21973232669869594, "grad_norm": 0.7196008414405878, "learning_rate": 9.085845507646576e-06, "loss": 0.2997, "step": 6403 }, { "epoch": 0.21976664378860672, "grad_norm": 0.8491580938429153, "learning_rate": 9.085525155549896e-06, "loss": 0.2947, "step": 6404 }, { "epoch": 0.2198009608785175, "grad_norm": 0.7093015401276137, "learning_rate": 9.085204752981316e-06, "loss": 0.3075, "step": 6405 }, { "epoch": 0.21983527796842828, "grad_norm": 0.6897684438024104, "learning_rate": 9.084884299944795e-06, "loss": 0.3217, "step": 6406 }, { "epoch": 0.21986959505833906, "grad_norm": 0.8084698305302852, "learning_rate": 9.08456379644429e-06, "loss": 0.3619, "step": 6407 }, { "epoch": 0.21990391214824984, "grad_norm": 0.857265454270725, "learning_rate": 9.084243242483762e-06, "loss": 0.3064, "step": 6408 }, { "epoch": 0.21993822923816062, "grad_norm": 0.7592864408429073, "learning_rate": 9.083922638067171e-06, "loss": 0.274, "step": 6409 }, { "epoch": 0.21997254632807137, "grad_norm": 0.7502416155609265, "learning_rate": 9.083601983198476e-06, "loss": 0.3301, "step": 6410 }, { "epoch": 0.22000686341798215, "grad_norm": 0.7258745997262394, "learning_rate": 9.08328127788164e-06, "loss": 0.3122, "step": 6411 }, { "epoch": 0.22004118050789293, "grad_norm": 0.7881299306502934, "learning_rate": 9.082960522120626e-06, "loss": 0.3176, "step": 6412 }, { "epoch": 0.2200754975978037, "grad_norm": 0.7571999683648012, "learning_rate": 9.082639715919393e-06, "loss": 0.2714, "step": 6413 }, { "epoch": 0.22010981468771448, "grad_norm": 0.8306986717770303, "learning_rate": 9.082318859281906e-06, "loss": 0.3505, "step": 6414 }, { "epoch": 0.22014413177762526, "grad_norm": 0.8162331370309612, "learning_rate": 9.081997952212129e-06, "loss": 0.343, "step": 6415 }, { "epoch": 0.22017844886753604, "grad_norm": 0.7509451700735986, "learning_rate": 9.081676994714028e-06, "loss": 0.2849, "step": 6416 }, { "epoch": 0.22021276595744682, "grad_norm": 0.7729299123256287, "learning_rate": 9.081355986791565e-06, "loss": 0.3784, "step": 6417 }, { "epoch": 0.22024708304735757, "grad_norm": 0.9004656750264449, "learning_rate": 9.081034928448709e-06, "loss": 0.3823, "step": 6418 }, { "epoch": 0.22028140013726835, "grad_norm": 0.812293079857694, "learning_rate": 9.080713819689423e-06, "loss": 0.3076, "step": 6419 }, { "epoch": 0.22031571722717913, "grad_norm": 0.725793303490481, "learning_rate": 9.080392660517676e-06, "loss": 0.2891, "step": 6420 }, { "epoch": 0.2203500343170899, "grad_norm": 0.9125798767330467, "learning_rate": 9.080071450937436e-06, "loss": 0.3612, "step": 6421 }, { "epoch": 0.2203843514070007, "grad_norm": 0.8612501875027637, "learning_rate": 9.079750190952667e-06, "loss": 0.3679, "step": 6422 }, { "epoch": 0.22041866849691147, "grad_norm": 0.8714069082936573, "learning_rate": 9.079428880567342e-06, "loss": 0.3218, "step": 6423 }, { "epoch": 0.22045298558682225, "grad_norm": 0.8603903320021161, "learning_rate": 9.079107519785432e-06, "loss": 0.2923, "step": 6424 }, { "epoch": 0.22048730267673303, "grad_norm": 0.7380903374398539, "learning_rate": 9.078786108610902e-06, "loss": 0.3164, "step": 6425 }, { "epoch": 0.22052161976664378, "grad_norm": 0.7936431764438051, "learning_rate": 9.078464647047724e-06, "loss": 0.3094, "step": 6426 }, { "epoch": 0.22055593685655456, "grad_norm": 0.7985288580571908, "learning_rate": 9.078143135099873e-06, "loss": 0.3347, "step": 6427 }, { "epoch": 0.22059025394646534, "grad_norm": 0.7297640067966685, "learning_rate": 9.077821572771316e-06, "loss": 0.311, "step": 6428 }, { "epoch": 0.22062457103637612, "grad_norm": 0.8618300009167101, "learning_rate": 9.07749996006603e-06, "loss": 0.3856, "step": 6429 }, { "epoch": 0.2206588881262869, "grad_norm": 0.784760160557215, "learning_rate": 9.077178296987984e-06, "loss": 0.307, "step": 6430 }, { "epoch": 0.22069320521619767, "grad_norm": 0.8381023019761762, "learning_rate": 9.076856583541156e-06, "loss": 0.3482, "step": 6431 }, { "epoch": 0.22072752230610845, "grad_norm": 0.754140221993634, "learning_rate": 9.076534819729516e-06, "loss": 0.2648, "step": 6432 }, { "epoch": 0.2207618393960192, "grad_norm": 0.7828643514548363, "learning_rate": 9.076213005557041e-06, "loss": 0.3561, "step": 6433 }, { "epoch": 0.22079615648592998, "grad_norm": 0.8929580679877758, "learning_rate": 9.075891141027707e-06, "loss": 0.312, "step": 6434 }, { "epoch": 0.22083047357584076, "grad_norm": 0.869520934212167, "learning_rate": 9.075569226145491e-06, "loss": 0.3684, "step": 6435 }, { "epoch": 0.22086479066575154, "grad_norm": 0.7403213970182327, "learning_rate": 9.075247260914369e-06, "loss": 0.3318, "step": 6436 }, { "epoch": 0.22089910775566232, "grad_norm": 0.9021009749002704, "learning_rate": 9.074925245338316e-06, "loss": 0.4591, "step": 6437 }, { "epoch": 0.2209334248455731, "grad_norm": 0.7663847758297787, "learning_rate": 9.074603179421313e-06, "loss": 0.351, "step": 6438 }, { "epoch": 0.22096774193548388, "grad_norm": 0.7997843395539762, "learning_rate": 9.07428106316734e-06, "loss": 0.3368, "step": 6439 }, { "epoch": 0.22100205902539466, "grad_norm": 0.8972486439598959, "learning_rate": 9.073958896580375e-06, "loss": 0.3182, "step": 6440 }, { "epoch": 0.2210363761153054, "grad_norm": 0.823267945039784, "learning_rate": 9.073636679664394e-06, "loss": 0.3318, "step": 6441 }, { "epoch": 0.2210706932052162, "grad_norm": 0.6926978557311825, "learning_rate": 9.073314412423385e-06, "loss": 0.3029, "step": 6442 }, { "epoch": 0.22110501029512697, "grad_norm": 0.7687980100183638, "learning_rate": 9.072992094861323e-06, "loss": 0.3264, "step": 6443 }, { "epoch": 0.22113932738503775, "grad_norm": 0.9127796941449693, "learning_rate": 9.072669726982196e-06, "loss": 0.3056, "step": 6444 }, { "epoch": 0.22117364447494853, "grad_norm": 0.7358848381124945, "learning_rate": 9.072347308789979e-06, "loss": 0.3032, "step": 6445 }, { "epoch": 0.2212079615648593, "grad_norm": 0.8905870069588944, "learning_rate": 9.07202484028866e-06, "loss": 0.3269, "step": 6446 }, { "epoch": 0.22124227865477009, "grad_norm": 0.730655862922173, "learning_rate": 9.071702321482223e-06, "loss": 0.3321, "step": 6447 }, { "epoch": 0.22127659574468084, "grad_norm": 0.6829901198160047, "learning_rate": 9.071379752374649e-06, "loss": 0.3022, "step": 6448 }, { "epoch": 0.22131091283459162, "grad_norm": 0.8146289729446194, "learning_rate": 9.071057132969925e-06, "loss": 0.4105, "step": 6449 }, { "epoch": 0.2213452299245024, "grad_norm": 0.7278871241173211, "learning_rate": 9.070734463272037e-06, "loss": 0.3073, "step": 6450 }, { "epoch": 0.22137954701441317, "grad_norm": 0.7394048063008872, "learning_rate": 9.070411743284971e-06, "loss": 0.3275, "step": 6451 }, { "epoch": 0.22141386410432395, "grad_norm": 0.7544553902589941, "learning_rate": 9.070088973012713e-06, "loss": 0.2776, "step": 6452 }, { "epoch": 0.22144818119423473, "grad_norm": 0.8110978732148281, "learning_rate": 9.069766152459252e-06, "loss": 0.5, "step": 6453 }, { "epoch": 0.2214824982841455, "grad_norm": 0.7446684603500118, "learning_rate": 9.069443281628574e-06, "loss": 0.2797, "step": 6454 }, { "epoch": 0.2215168153740563, "grad_norm": 0.6686267396339456, "learning_rate": 9.06912036052467e-06, "loss": 0.3121, "step": 6455 }, { "epoch": 0.22155113246396704, "grad_norm": 0.8937226878235347, "learning_rate": 9.068797389151527e-06, "loss": 0.2845, "step": 6456 }, { "epoch": 0.22158544955387782, "grad_norm": 0.7790034490397908, "learning_rate": 9.068474367513135e-06, "loss": 0.3451, "step": 6457 }, { "epoch": 0.2216197666437886, "grad_norm": 0.7486021366987012, "learning_rate": 9.068151295613487e-06, "loss": 0.2628, "step": 6458 }, { "epoch": 0.22165408373369938, "grad_norm": 0.7749968421988477, "learning_rate": 9.067828173456574e-06, "loss": 0.3108, "step": 6459 }, { "epoch": 0.22168840082361016, "grad_norm": 0.7877528007763336, "learning_rate": 9.067505001046385e-06, "loss": 0.3276, "step": 6460 }, { "epoch": 0.22172271791352094, "grad_norm": 0.907827506514252, "learning_rate": 9.067181778386914e-06, "loss": 0.3741, "step": 6461 }, { "epoch": 0.22175703500343172, "grad_norm": 0.8427059894256157, "learning_rate": 9.066858505482154e-06, "loss": 0.3544, "step": 6462 }, { "epoch": 0.2217913520933425, "grad_norm": 0.7692426085538755, "learning_rate": 9.0665351823361e-06, "loss": 0.3117, "step": 6463 }, { "epoch": 0.22182566918325325, "grad_norm": 0.7966720559138668, "learning_rate": 9.066211808952743e-06, "loss": 0.3406, "step": 6464 }, { "epoch": 0.22185998627316403, "grad_norm": 0.8470893492767579, "learning_rate": 9.065888385336082e-06, "loss": 0.3177, "step": 6465 }, { "epoch": 0.2218943033630748, "grad_norm": 0.7909277057037633, "learning_rate": 9.065564911490108e-06, "loss": 0.3337, "step": 6466 }, { "epoch": 0.2219286204529856, "grad_norm": 0.8104906441819281, "learning_rate": 9.06524138741882e-06, "loss": 0.3544, "step": 6467 }, { "epoch": 0.22196293754289637, "grad_norm": 0.8814688013613301, "learning_rate": 9.064917813126215e-06, "loss": 0.374, "step": 6468 }, { "epoch": 0.22199725463280714, "grad_norm": 0.772831626895203, "learning_rate": 9.064594188616289e-06, "loss": 0.3159, "step": 6469 }, { "epoch": 0.22203157172271792, "grad_norm": 0.8667875401544196, "learning_rate": 9.064270513893041e-06, "loss": 0.3374, "step": 6470 }, { "epoch": 0.22206588881262868, "grad_norm": 0.7731521985987191, "learning_rate": 9.063946788960469e-06, "loss": 0.3387, "step": 6471 }, { "epoch": 0.22210020590253945, "grad_norm": 0.7307062388897559, "learning_rate": 9.063623013822573e-06, "loss": 0.3117, "step": 6472 }, { "epoch": 0.22213452299245023, "grad_norm": 0.7877790128104437, "learning_rate": 9.063299188483353e-06, "loss": 0.3277, "step": 6473 }, { "epoch": 0.222168840082361, "grad_norm": 0.8058015700695745, "learning_rate": 9.062975312946808e-06, "loss": 0.3778, "step": 6474 }, { "epoch": 0.2222031571722718, "grad_norm": 0.7632092504481065, "learning_rate": 9.06265138721694e-06, "loss": 0.3178, "step": 6475 }, { "epoch": 0.22223747426218257, "grad_norm": 0.7182199345212268, "learning_rate": 9.06232741129775e-06, "loss": 0.3222, "step": 6476 }, { "epoch": 0.22227179135209335, "grad_norm": 0.8692546085485747, "learning_rate": 9.062003385193243e-06, "loss": 0.3062, "step": 6477 }, { "epoch": 0.22230610844200413, "grad_norm": 0.8940153260303849, "learning_rate": 9.061679308907418e-06, "loss": 0.2959, "step": 6478 }, { "epoch": 0.22234042553191488, "grad_norm": 0.7849635140768157, "learning_rate": 9.061355182444283e-06, "loss": 0.3096, "step": 6479 }, { "epoch": 0.22237474262182566, "grad_norm": 0.8809519270968815, "learning_rate": 9.061031005807837e-06, "loss": 0.3613, "step": 6480 }, { "epoch": 0.22240905971173644, "grad_norm": 0.747668068817433, "learning_rate": 9.06070677900209e-06, "loss": 0.2875, "step": 6481 }, { "epoch": 0.22244337680164722, "grad_norm": 0.8461020786882969, "learning_rate": 9.060382502031044e-06, "loss": 0.3305, "step": 6482 }, { "epoch": 0.222477693891558, "grad_norm": 0.8282341259109194, "learning_rate": 9.060058174898705e-06, "loss": 0.3348, "step": 6483 }, { "epoch": 0.22251201098146878, "grad_norm": 0.7886998815813604, "learning_rate": 9.059733797609082e-06, "loss": 0.3004, "step": 6484 }, { "epoch": 0.22254632807137956, "grad_norm": 0.8334813355682326, "learning_rate": 9.059409370166181e-06, "loss": 0.3053, "step": 6485 }, { "epoch": 0.22258064516129034, "grad_norm": 0.8339206770396221, "learning_rate": 9.05908489257401e-06, "loss": 0.3401, "step": 6486 }, { "epoch": 0.2226149622512011, "grad_norm": 0.8491320290028338, "learning_rate": 9.058760364836578e-06, "loss": 0.3502, "step": 6487 }, { "epoch": 0.22264927934111187, "grad_norm": 0.812399690963179, "learning_rate": 9.058435786957893e-06, "loss": 0.3396, "step": 6488 }, { "epoch": 0.22268359643102265, "grad_norm": 0.8783650412992617, "learning_rate": 9.058111158941967e-06, "loss": 0.3075, "step": 6489 }, { "epoch": 0.22271791352093342, "grad_norm": 0.7795578029146538, "learning_rate": 9.057786480792808e-06, "loss": 0.3565, "step": 6490 }, { "epoch": 0.2227522306108442, "grad_norm": 0.8485807060703273, "learning_rate": 9.057461752514427e-06, "loss": 0.298, "step": 6491 }, { "epoch": 0.22278654770075498, "grad_norm": 0.7751242350459959, "learning_rate": 9.057136974110837e-06, "loss": 0.3154, "step": 6492 }, { "epoch": 0.22282086479066576, "grad_norm": 0.7587661560238087, "learning_rate": 9.056812145586052e-06, "loss": 0.2791, "step": 6493 }, { "epoch": 0.22285518188057651, "grad_norm": 0.6823980105305762, "learning_rate": 9.05648726694408e-06, "loss": 0.3107, "step": 6494 }, { "epoch": 0.2228894989704873, "grad_norm": 0.7616457643283483, "learning_rate": 9.056162338188939e-06, "loss": 0.3532, "step": 6495 }, { "epoch": 0.22292381606039807, "grad_norm": 0.8014873523606512, "learning_rate": 9.05583735932464e-06, "loss": 0.3742, "step": 6496 }, { "epoch": 0.22295813315030885, "grad_norm": 0.781340932042068, "learning_rate": 9.055512330355199e-06, "loss": 0.2946, "step": 6497 }, { "epoch": 0.22299245024021963, "grad_norm": 0.8037137799628208, "learning_rate": 9.055187251284632e-06, "loss": 0.3308, "step": 6498 }, { "epoch": 0.2230267673301304, "grad_norm": 0.7530724740258614, "learning_rate": 9.054862122116955e-06, "loss": 0.3049, "step": 6499 }, { "epoch": 0.2230610844200412, "grad_norm": 0.8014372950960164, "learning_rate": 9.054536942856183e-06, "loss": 0.3426, "step": 6500 }, { "epoch": 0.22309540150995197, "grad_norm": 0.8485837270146428, "learning_rate": 9.054211713506335e-06, "loss": 0.3418, "step": 6501 }, { "epoch": 0.22312971859986272, "grad_norm": 0.8669863919798649, "learning_rate": 9.053886434071429e-06, "loss": 0.2988, "step": 6502 }, { "epoch": 0.2231640356897735, "grad_norm": 0.8532766966795786, "learning_rate": 9.05356110455548e-06, "loss": 0.3698, "step": 6503 }, { "epoch": 0.22319835277968428, "grad_norm": 0.7666821630821633, "learning_rate": 9.05323572496251e-06, "loss": 0.3255, "step": 6504 }, { "epoch": 0.22323266986959506, "grad_norm": 0.7969943526862019, "learning_rate": 9.05291029529654e-06, "loss": 0.3215, "step": 6505 }, { "epoch": 0.22326698695950584, "grad_norm": 0.7530718071436393, "learning_rate": 9.052584815561587e-06, "loss": 0.2836, "step": 6506 }, { "epoch": 0.22330130404941662, "grad_norm": 0.8418602329288858, "learning_rate": 9.052259285761673e-06, "loss": 0.3775, "step": 6507 }, { "epoch": 0.2233356211393274, "grad_norm": 0.7846067812986041, "learning_rate": 9.051933705900822e-06, "loss": 0.333, "step": 6508 }, { "epoch": 0.22336993822923817, "grad_norm": 0.727168081974985, "learning_rate": 9.051608075983053e-06, "loss": 0.2878, "step": 6509 }, { "epoch": 0.22340425531914893, "grad_norm": 0.8099196999634882, "learning_rate": 9.051282396012389e-06, "loss": 0.3731, "step": 6510 }, { "epoch": 0.2234385724090597, "grad_norm": 0.7896842239240774, "learning_rate": 9.050956665992857e-06, "loss": 0.3762, "step": 6511 }, { "epoch": 0.22347288949897048, "grad_norm": 0.8283988624527127, "learning_rate": 9.050630885928475e-06, "loss": 0.323, "step": 6512 }, { "epoch": 0.22350720658888126, "grad_norm": 0.786527656864121, "learning_rate": 9.050305055823273e-06, "loss": 0.3285, "step": 6513 }, { "epoch": 0.22354152367879204, "grad_norm": 0.7916727210972517, "learning_rate": 9.049979175681275e-06, "loss": 0.3816, "step": 6514 }, { "epoch": 0.22357584076870282, "grad_norm": 0.8137527458478803, "learning_rate": 9.049653245506505e-06, "loss": 0.3586, "step": 6515 }, { "epoch": 0.2236101578586136, "grad_norm": 0.7870768404912815, "learning_rate": 9.04932726530299e-06, "loss": 0.3491, "step": 6516 }, { "epoch": 0.22364447494852435, "grad_norm": 0.7987878067277059, "learning_rate": 9.049001235074759e-06, "loss": 0.3071, "step": 6517 }, { "epoch": 0.22367879203843513, "grad_norm": 0.717508327199572, "learning_rate": 9.048675154825837e-06, "loss": 0.3453, "step": 6518 }, { "epoch": 0.2237131091283459, "grad_norm": 0.7809644416424952, "learning_rate": 9.048349024560254e-06, "loss": 0.3623, "step": 6519 }, { "epoch": 0.2237474262182567, "grad_norm": 0.8954375997672365, "learning_rate": 9.048022844282039e-06, "loss": 0.3843, "step": 6520 }, { "epoch": 0.22378174330816747, "grad_norm": 0.8415385042029884, "learning_rate": 9.047696613995222e-06, "loss": 0.3118, "step": 6521 }, { "epoch": 0.22381606039807825, "grad_norm": 0.7433276838563837, "learning_rate": 9.047370333703833e-06, "loss": 0.2825, "step": 6522 }, { "epoch": 0.22385037748798903, "grad_norm": 0.7860473255381998, "learning_rate": 9.0470440034119e-06, "loss": 0.326, "step": 6523 }, { "epoch": 0.2238846945778998, "grad_norm": 0.7242065451284208, "learning_rate": 9.046717623123458e-06, "loss": 0.3032, "step": 6524 }, { "epoch": 0.22391901166781056, "grad_norm": 0.7498260587779124, "learning_rate": 9.046391192842538e-06, "loss": 0.3272, "step": 6525 }, { "epoch": 0.22395332875772134, "grad_norm": 0.8871910035040514, "learning_rate": 9.046064712573173e-06, "loss": 0.3585, "step": 6526 }, { "epoch": 0.22398764584763212, "grad_norm": 0.7471796686518212, "learning_rate": 9.045738182319394e-06, "loss": 0.3026, "step": 6527 }, { "epoch": 0.2240219629375429, "grad_norm": 0.7855039670174887, "learning_rate": 9.045411602085239e-06, "loss": 0.3633, "step": 6528 }, { "epoch": 0.22405628002745367, "grad_norm": 0.7478617635991774, "learning_rate": 9.045084971874738e-06, "loss": 0.2878, "step": 6529 }, { "epoch": 0.22409059711736445, "grad_norm": 0.8099332425282143, "learning_rate": 9.044758291691928e-06, "loss": 0.3267, "step": 6530 }, { "epoch": 0.22412491420727523, "grad_norm": 0.8186286809348917, "learning_rate": 9.044431561540847e-06, "loss": 0.3606, "step": 6531 }, { "epoch": 0.224159231297186, "grad_norm": 0.8989203416991769, "learning_rate": 9.044104781425529e-06, "loss": 0.3159, "step": 6532 }, { "epoch": 0.22419354838709676, "grad_norm": 0.7988371682430511, "learning_rate": 9.04377795135001e-06, "loss": 0.3202, "step": 6533 }, { "epoch": 0.22422786547700754, "grad_norm": 0.9057793224965677, "learning_rate": 9.04345107131833e-06, "loss": 0.3027, "step": 6534 }, { "epoch": 0.22426218256691832, "grad_norm": 0.9206052300001207, "learning_rate": 9.043124141334527e-06, "loss": 0.3568, "step": 6535 }, { "epoch": 0.2242964996568291, "grad_norm": 0.7998944909171709, "learning_rate": 9.042797161402637e-06, "loss": 0.2783, "step": 6536 }, { "epoch": 0.22433081674673988, "grad_norm": 0.7103232010285336, "learning_rate": 9.042470131526703e-06, "loss": 0.2797, "step": 6537 }, { "epoch": 0.22436513383665066, "grad_norm": 0.7858219861058376, "learning_rate": 9.042143051710764e-06, "loss": 0.3376, "step": 6538 }, { "epoch": 0.22439945092656144, "grad_norm": 0.7701974529612964, "learning_rate": 9.041815921958859e-06, "loss": 0.352, "step": 6539 }, { "epoch": 0.2244337680164722, "grad_norm": 0.7713706444329975, "learning_rate": 9.041488742275031e-06, "loss": 0.3058, "step": 6540 }, { "epoch": 0.22446808510638297, "grad_norm": 0.819365760487639, "learning_rate": 9.041161512663321e-06, "loss": 0.366, "step": 6541 }, { "epoch": 0.22450240219629375, "grad_norm": 0.7458621286116685, "learning_rate": 9.040834233127773e-06, "loss": 0.2898, "step": 6542 }, { "epoch": 0.22453671928620453, "grad_norm": 0.9345187136956198, "learning_rate": 9.04050690367243e-06, "loss": 0.3417, "step": 6543 }, { "epoch": 0.2245710363761153, "grad_norm": 0.8450308636073403, "learning_rate": 9.040179524301334e-06, "loss": 0.3063, "step": 6544 }, { "epoch": 0.2246053534660261, "grad_norm": 0.7666821282145365, "learning_rate": 9.039852095018528e-06, "loss": 0.3109, "step": 6545 }, { "epoch": 0.22463967055593687, "grad_norm": 0.7823125909703293, "learning_rate": 9.039524615828063e-06, "loss": 0.3229, "step": 6546 }, { "epoch": 0.22467398764584764, "grad_norm": 0.8486646037261498, "learning_rate": 9.03919708673398e-06, "loss": 0.3101, "step": 6547 }, { "epoch": 0.2247083047357584, "grad_norm": 0.7698593753120625, "learning_rate": 9.038869507740327e-06, "loss": 0.3515, "step": 6548 }, { "epoch": 0.22474262182566918, "grad_norm": 0.7659151917664231, "learning_rate": 9.038541878851147e-06, "loss": 0.3068, "step": 6549 }, { "epoch": 0.22477693891557995, "grad_norm": 0.7087058733875743, "learning_rate": 9.038214200070493e-06, "loss": 0.3249, "step": 6550 }, { "epoch": 0.22481125600549073, "grad_norm": 0.7713946099601666, "learning_rate": 9.037886471402411e-06, "loss": 0.3222, "step": 6551 }, { "epoch": 0.2248455730954015, "grad_norm": 0.7394906414397874, "learning_rate": 9.037558692850948e-06, "loss": 0.3116, "step": 6552 }, { "epoch": 0.2248798901853123, "grad_norm": 0.8096128889721984, "learning_rate": 9.037230864420154e-06, "loss": 0.2974, "step": 6553 }, { "epoch": 0.22491420727522307, "grad_norm": 0.7954369562693301, "learning_rate": 9.036902986114082e-06, "loss": 0.3714, "step": 6554 }, { "epoch": 0.22494852436513382, "grad_norm": 0.7192000783079658, "learning_rate": 9.03657505793678e-06, "loss": 0.3147, "step": 6555 }, { "epoch": 0.2249828414550446, "grad_norm": 0.7915810171063576, "learning_rate": 9.036247079892297e-06, "loss": 0.3476, "step": 6556 }, { "epoch": 0.22501715854495538, "grad_norm": 0.7823071012947422, "learning_rate": 9.03591905198469e-06, "loss": 0.2876, "step": 6557 }, { "epoch": 0.22505147563486616, "grad_norm": 0.8011972976917195, "learning_rate": 9.035590974218007e-06, "loss": 0.3, "step": 6558 }, { "epoch": 0.22508579272477694, "grad_norm": 0.7973493950430214, "learning_rate": 9.035262846596303e-06, "loss": 0.2693, "step": 6559 }, { "epoch": 0.22512010981468772, "grad_norm": 0.8365136468646144, "learning_rate": 9.03493466912363e-06, "loss": 0.3487, "step": 6560 }, { "epoch": 0.2251544269045985, "grad_norm": 0.8608025797386375, "learning_rate": 9.034606441804043e-06, "loss": 0.4462, "step": 6561 }, { "epoch": 0.22518874399450928, "grad_norm": 0.9124656358444106, "learning_rate": 9.0342781646416e-06, "loss": 0.3356, "step": 6562 }, { "epoch": 0.22522306108442003, "grad_norm": 0.7973260072906808, "learning_rate": 9.033949837640352e-06, "loss": 0.3394, "step": 6563 }, { "epoch": 0.2252573781743308, "grad_norm": 0.7827401060172895, "learning_rate": 9.033621460804357e-06, "loss": 0.2917, "step": 6564 }, { "epoch": 0.2252916952642416, "grad_norm": 0.8338468593808971, "learning_rate": 9.033293034137672e-06, "loss": 0.3286, "step": 6565 }, { "epoch": 0.22532601235415237, "grad_norm": 0.8316922217024129, "learning_rate": 9.032964557644354e-06, "loss": 0.3063, "step": 6566 }, { "epoch": 0.22536032944406315, "grad_norm": 0.7682568353203931, "learning_rate": 9.03263603132846e-06, "loss": 0.3347, "step": 6567 }, { "epoch": 0.22539464653397392, "grad_norm": 0.8085873693407094, "learning_rate": 9.03230745519405e-06, "loss": 0.3376, "step": 6568 }, { "epoch": 0.2254289636238847, "grad_norm": 0.7258040551149747, "learning_rate": 9.031978829245183e-06, "loss": 0.34, "step": 6569 }, { "epoch": 0.22546328071379548, "grad_norm": 0.7840231074615996, "learning_rate": 9.031650153485918e-06, "loss": 0.3467, "step": 6570 }, { "epoch": 0.22549759780370623, "grad_norm": 0.768918341126954, "learning_rate": 9.031321427920317e-06, "loss": 0.3301, "step": 6571 }, { "epoch": 0.225531914893617, "grad_norm": 0.9104760462428811, "learning_rate": 9.030992652552438e-06, "loss": 0.3511, "step": 6572 }, { "epoch": 0.2255662319835278, "grad_norm": 0.7733466094340808, "learning_rate": 9.030663827386345e-06, "loss": 0.315, "step": 6573 }, { "epoch": 0.22560054907343857, "grad_norm": 0.894966321132475, "learning_rate": 9.0303349524261e-06, "loss": 0.311, "step": 6574 }, { "epoch": 0.22563486616334935, "grad_norm": 0.7814069465694865, "learning_rate": 9.030006027675764e-06, "loss": 0.3534, "step": 6575 }, { "epoch": 0.22566918325326013, "grad_norm": 0.8744812524771965, "learning_rate": 9.029677053139404e-06, "loss": 0.3415, "step": 6576 }, { "epoch": 0.2257035003431709, "grad_norm": 0.7641669703066655, "learning_rate": 9.029348028821082e-06, "loss": 0.3348, "step": 6577 }, { "epoch": 0.22573781743308166, "grad_norm": 0.8332224747338453, "learning_rate": 9.029018954724862e-06, "loss": 0.3555, "step": 6578 }, { "epoch": 0.22577213452299244, "grad_norm": 0.8289538379541117, "learning_rate": 9.028689830854811e-06, "loss": 0.3507, "step": 6579 }, { "epoch": 0.22580645161290322, "grad_norm": 0.7603839754684816, "learning_rate": 9.028360657214994e-06, "loss": 0.3039, "step": 6580 }, { "epoch": 0.225840768702814, "grad_norm": 0.8092420164559522, "learning_rate": 9.028031433809478e-06, "loss": 0.348, "step": 6581 }, { "epoch": 0.22587508579272478, "grad_norm": 0.6967102136622783, "learning_rate": 9.027702160642328e-06, "loss": 0.255, "step": 6582 }, { "epoch": 0.22590940288263556, "grad_norm": 0.7536685501393874, "learning_rate": 9.027372837717615e-06, "loss": 0.3647, "step": 6583 }, { "epoch": 0.22594371997254634, "grad_norm": 0.7730824586377533, "learning_rate": 9.027043465039407e-06, "loss": 0.326, "step": 6584 }, { "epoch": 0.22597803706245712, "grad_norm": 0.7714979421153119, "learning_rate": 9.02671404261177e-06, "loss": 0.3067, "step": 6585 }, { "epoch": 0.22601235415236787, "grad_norm": 0.7518395117434115, "learning_rate": 9.026384570438777e-06, "loss": 0.3633, "step": 6586 }, { "epoch": 0.22604667124227865, "grad_norm": 0.7306243532557412, "learning_rate": 9.026055048524498e-06, "loss": 0.2997, "step": 6587 }, { "epoch": 0.22608098833218943, "grad_norm": 0.7161341105871665, "learning_rate": 9.025725476873002e-06, "loss": 0.3234, "step": 6588 }, { "epoch": 0.2261153054221002, "grad_norm": 0.7866536192105015, "learning_rate": 9.02539585548836e-06, "loss": 0.349, "step": 6589 }, { "epoch": 0.22614962251201098, "grad_norm": 0.8252143104055941, "learning_rate": 9.025066184374647e-06, "loss": 0.3107, "step": 6590 }, { "epoch": 0.22618393960192176, "grad_norm": 0.7320395650224761, "learning_rate": 9.024736463535933e-06, "loss": 0.3337, "step": 6591 }, { "epoch": 0.22621825669183254, "grad_norm": 0.8647094688688682, "learning_rate": 9.024406692976293e-06, "loss": 0.3693, "step": 6592 }, { "epoch": 0.22625257378174332, "grad_norm": 0.7683842133659973, "learning_rate": 9.024076872699799e-06, "loss": 0.3448, "step": 6593 }, { "epoch": 0.22628689087165407, "grad_norm": 0.8839651838441194, "learning_rate": 9.023747002710527e-06, "loss": 0.3266, "step": 6594 }, { "epoch": 0.22632120796156485, "grad_norm": 0.7553412173327818, "learning_rate": 9.023417083012555e-06, "loss": 0.3443, "step": 6595 }, { "epoch": 0.22635552505147563, "grad_norm": 0.8181065224891427, "learning_rate": 9.023087113609954e-06, "loss": 0.316, "step": 6596 }, { "epoch": 0.2263898421413864, "grad_norm": 0.7341986427034254, "learning_rate": 9.0227570945068e-06, "loss": 0.3319, "step": 6597 }, { "epoch": 0.2264241592312972, "grad_norm": 0.8401935400516108, "learning_rate": 9.022427025707175e-06, "loss": 0.3168, "step": 6598 }, { "epoch": 0.22645847632120797, "grad_norm": 0.7624738658320298, "learning_rate": 9.022096907215152e-06, "loss": 0.3078, "step": 6599 }, { "epoch": 0.22649279341111875, "grad_norm": 0.7350610722293313, "learning_rate": 9.021766739034813e-06, "loss": 0.2898, "step": 6600 }, { "epoch": 0.2265271105010295, "grad_norm": 0.8327991621756803, "learning_rate": 9.021436521170233e-06, "loss": 0.3262, "step": 6601 }, { "epoch": 0.22656142759094028, "grad_norm": 0.8574775610734802, "learning_rate": 9.021106253625494e-06, "loss": 0.3572, "step": 6602 }, { "epoch": 0.22659574468085106, "grad_norm": 0.7032237302297962, "learning_rate": 9.020775936404676e-06, "loss": 0.2742, "step": 6603 }, { "epoch": 0.22663006177076184, "grad_norm": 0.8393264080950381, "learning_rate": 9.020445569511857e-06, "loss": 0.3159, "step": 6604 }, { "epoch": 0.22666437886067262, "grad_norm": 0.7687131500749294, "learning_rate": 9.020115152951122e-06, "loss": 0.3071, "step": 6605 }, { "epoch": 0.2266986959505834, "grad_norm": 0.813316535418161, "learning_rate": 9.019784686726552e-06, "loss": 0.2887, "step": 6606 }, { "epoch": 0.22673301304049417, "grad_norm": 0.7139744995742788, "learning_rate": 9.019454170842227e-06, "loss": 0.2677, "step": 6607 }, { "epoch": 0.22676733013040495, "grad_norm": 0.7993025351313648, "learning_rate": 9.019123605302234e-06, "loss": 0.3771, "step": 6608 }, { "epoch": 0.2268016472203157, "grad_norm": 0.7462864157963618, "learning_rate": 9.018792990110653e-06, "loss": 0.3382, "step": 6609 }, { "epoch": 0.22683596431022648, "grad_norm": 0.7381863710092814, "learning_rate": 9.018462325271571e-06, "loss": 0.2999, "step": 6610 }, { "epoch": 0.22687028140013726, "grad_norm": 0.7603578796719884, "learning_rate": 9.01813161078907e-06, "loss": 0.3814, "step": 6611 }, { "epoch": 0.22690459849004804, "grad_norm": 0.7693163485454011, "learning_rate": 9.017800846667241e-06, "loss": 0.3071, "step": 6612 }, { "epoch": 0.22693891557995882, "grad_norm": 0.8350913668342885, "learning_rate": 9.017470032910165e-06, "loss": 0.3232, "step": 6613 }, { "epoch": 0.2269732326698696, "grad_norm": 0.78943101832961, "learning_rate": 9.017139169521932e-06, "loss": 0.3326, "step": 6614 }, { "epoch": 0.22700754975978038, "grad_norm": 0.7838307612672512, "learning_rate": 9.016808256506627e-06, "loss": 0.3134, "step": 6615 }, { "epoch": 0.22704186684969116, "grad_norm": 0.7944555596792872, "learning_rate": 9.016477293868338e-06, "loss": 0.3231, "step": 6616 }, { "epoch": 0.2270761839396019, "grad_norm": 0.7924800453667278, "learning_rate": 9.016146281611156e-06, "loss": 0.3397, "step": 6617 }, { "epoch": 0.2271105010295127, "grad_norm": 0.8845231789748026, "learning_rate": 9.01581521973917e-06, "loss": 0.3364, "step": 6618 }, { "epoch": 0.22714481811942347, "grad_norm": 0.8539212674630269, "learning_rate": 9.015484108256467e-06, "loss": 0.355, "step": 6619 }, { "epoch": 0.22717913520933425, "grad_norm": 0.7377508410409751, "learning_rate": 9.015152947167141e-06, "loss": 0.2957, "step": 6620 }, { "epoch": 0.22721345229924503, "grad_norm": 0.8539094362885692, "learning_rate": 9.01482173647528e-06, "loss": 0.3589, "step": 6621 }, { "epoch": 0.2272477693891558, "grad_norm": 0.770596765226211, "learning_rate": 9.014490476184978e-06, "loss": 0.3468, "step": 6622 }, { "epoch": 0.2272820864790666, "grad_norm": 0.7733243179885424, "learning_rate": 9.014159166300327e-06, "loss": 0.2949, "step": 6623 }, { "epoch": 0.22731640356897734, "grad_norm": 0.7743620188397946, "learning_rate": 9.013827806825419e-06, "loss": 0.2974, "step": 6624 }, { "epoch": 0.22735072065888812, "grad_norm": 0.7968896178371802, "learning_rate": 9.013496397764348e-06, "loss": 0.3114, "step": 6625 }, { "epoch": 0.2273850377487989, "grad_norm": 0.8682901228867935, "learning_rate": 9.013164939121209e-06, "loss": 0.3906, "step": 6626 }, { "epoch": 0.22741935483870968, "grad_norm": 0.7638584995200856, "learning_rate": 9.012833430900097e-06, "loss": 0.2769, "step": 6627 }, { "epoch": 0.22745367192862045, "grad_norm": 0.8433539919786467, "learning_rate": 9.012501873105104e-06, "loss": 0.321, "step": 6628 }, { "epoch": 0.22748798901853123, "grad_norm": 0.8663216269868054, "learning_rate": 9.01217026574033e-06, "loss": 0.335, "step": 6629 }, { "epoch": 0.227522306108442, "grad_norm": 0.78428366579253, "learning_rate": 9.01183860880987e-06, "loss": 0.3692, "step": 6630 }, { "epoch": 0.2275566231983528, "grad_norm": 0.7756805788186997, "learning_rate": 9.011506902317822e-06, "loss": 0.3142, "step": 6631 }, { "epoch": 0.22759094028826354, "grad_norm": 0.869658122737735, "learning_rate": 9.011175146268281e-06, "loss": 0.332, "step": 6632 }, { "epoch": 0.22762525737817432, "grad_norm": 0.7857672899228154, "learning_rate": 9.01084334066535e-06, "loss": 0.3448, "step": 6633 }, { "epoch": 0.2276595744680851, "grad_norm": 0.825014192954458, "learning_rate": 9.010511485513124e-06, "loss": 0.3628, "step": 6634 }, { "epoch": 0.22769389155799588, "grad_norm": 0.7446542655690267, "learning_rate": 9.010179580815706e-06, "loss": 0.3424, "step": 6635 }, { "epoch": 0.22772820864790666, "grad_norm": 0.7670871286982861, "learning_rate": 9.009847626577193e-06, "loss": 0.2774, "step": 6636 }, { "epoch": 0.22776252573781744, "grad_norm": 0.7958032156407995, "learning_rate": 9.009515622801688e-06, "loss": 0.2932, "step": 6637 }, { "epoch": 0.22779684282772822, "grad_norm": 0.9341186359964958, "learning_rate": 9.009183569493293e-06, "loss": 0.3157, "step": 6638 }, { "epoch": 0.227831159917639, "grad_norm": 0.8234089508793018, "learning_rate": 9.008851466656108e-06, "loss": 0.3428, "step": 6639 }, { "epoch": 0.22786547700754975, "grad_norm": 0.7581890311834778, "learning_rate": 9.008519314294239e-06, "loss": 0.2944, "step": 6640 }, { "epoch": 0.22789979409746053, "grad_norm": 0.8642938650950727, "learning_rate": 9.008187112411786e-06, "loss": 0.3519, "step": 6641 }, { "epoch": 0.2279341111873713, "grad_norm": 0.876924980456301, "learning_rate": 9.007854861012855e-06, "loss": 0.3098, "step": 6642 }, { "epoch": 0.2279684282772821, "grad_norm": 0.778060861753991, "learning_rate": 9.00752256010155e-06, "loss": 0.326, "step": 6643 }, { "epoch": 0.22800274536719287, "grad_norm": 0.9203505792461453, "learning_rate": 9.007190209681973e-06, "loss": 0.344, "step": 6644 }, { "epoch": 0.22803706245710365, "grad_norm": 0.865259147358003, "learning_rate": 9.006857809758237e-06, "loss": 0.3253, "step": 6645 }, { "epoch": 0.22807137954701442, "grad_norm": 0.8004287221420848, "learning_rate": 9.006525360334442e-06, "loss": 0.4218, "step": 6646 }, { "epoch": 0.22810569663692518, "grad_norm": 0.7347982382766238, "learning_rate": 9.006192861414698e-06, "loss": 0.3348, "step": 6647 }, { "epoch": 0.22814001372683596, "grad_norm": 0.8472463238210195, "learning_rate": 9.005860313003111e-06, "loss": 0.3329, "step": 6648 }, { "epoch": 0.22817433081674673, "grad_norm": 0.7613524680644186, "learning_rate": 9.005527715103792e-06, "loss": 0.2761, "step": 6649 }, { "epoch": 0.2282086479066575, "grad_norm": 0.7774637375698217, "learning_rate": 9.005195067720846e-06, "loss": 0.3107, "step": 6650 }, { "epoch": 0.2282429649965683, "grad_norm": 0.7829984824020386, "learning_rate": 9.004862370858387e-06, "loss": 0.3251, "step": 6651 }, { "epoch": 0.22827728208647907, "grad_norm": 0.7784870306028822, "learning_rate": 9.004529624520522e-06, "loss": 0.311, "step": 6652 }, { "epoch": 0.22831159917638985, "grad_norm": 0.847917369426145, "learning_rate": 9.00419682871136e-06, "loss": 0.3722, "step": 6653 }, { "epoch": 0.22834591626630063, "grad_norm": 0.8267063622434693, "learning_rate": 9.003863983435017e-06, "loss": 0.3542, "step": 6654 }, { "epoch": 0.22838023335621138, "grad_norm": 0.8449562920801137, "learning_rate": 9.003531088695602e-06, "loss": 0.3583, "step": 6655 }, { "epoch": 0.22841455044612216, "grad_norm": 0.756477234615408, "learning_rate": 9.003198144497227e-06, "loss": 0.3988, "step": 6656 }, { "epoch": 0.22844886753603294, "grad_norm": 0.7939551049443513, "learning_rate": 9.002865150844007e-06, "loss": 0.309, "step": 6657 }, { "epoch": 0.22848318462594372, "grad_norm": 0.7103210109509251, "learning_rate": 9.002532107740056e-06, "loss": 0.2722, "step": 6658 }, { "epoch": 0.2285175017158545, "grad_norm": 0.8380313842408176, "learning_rate": 9.002199015189487e-06, "loss": 0.3051, "step": 6659 }, { "epoch": 0.22855181880576528, "grad_norm": 0.7901027534537671, "learning_rate": 9.001865873196413e-06, "loss": 0.3206, "step": 6660 }, { "epoch": 0.22858613589567606, "grad_norm": 0.789021883002162, "learning_rate": 9.001532681764955e-06, "loss": 0.3237, "step": 6661 }, { "epoch": 0.2286204529855868, "grad_norm": 0.7773105907761914, "learning_rate": 9.001199440899225e-06, "loss": 0.3432, "step": 6662 }, { "epoch": 0.2286547700754976, "grad_norm": 0.7691532540441803, "learning_rate": 9.00086615060334e-06, "loss": 0.2881, "step": 6663 }, { "epoch": 0.22868908716540837, "grad_norm": 0.9014722843187712, "learning_rate": 9.000532810881419e-06, "loss": 0.32, "step": 6664 }, { "epoch": 0.22872340425531915, "grad_norm": 0.797156224654163, "learning_rate": 9.000199421737579e-06, "loss": 0.3465, "step": 6665 }, { "epoch": 0.22875772134522993, "grad_norm": 0.8440747465715582, "learning_rate": 8.999865983175939e-06, "loss": 0.3385, "step": 6666 }, { "epoch": 0.2287920384351407, "grad_norm": 0.7614849227518585, "learning_rate": 8.999532495200616e-06, "loss": 0.3192, "step": 6667 }, { "epoch": 0.22882635552505148, "grad_norm": 0.7595704977669451, "learning_rate": 8.999198957815736e-06, "loss": 0.2984, "step": 6668 }, { "epoch": 0.22886067261496226, "grad_norm": 0.8682181990815536, "learning_rate": 8.998865371025414e-06, "loss": 0.3057, "step": 6669 }, { "epoch": 0.22889498970487301, "grad_norm": 0.7738409757936373, "learning_rate": 8.99853173483377e-06, "loss": 0.3326, "step": 6670 }, { "epoch": 0.2289293067947838, "grad_norm": 0.830888751277481, "learning_rate": 8.998198049244931e-06, "loss": 0.3031, "step": 6671 }, { "epoch": 0.22896362388469457, "grad_norm": 0.7699603888693324, "learning_rate": 8.997864314263015e-06, "loss": 0.3045, "step": 6672 }, { "epoch": 0.22899794097460535, "grad_norm": 0.8757458886111898, "learning_rate": 8.997530529892148e-06, "loss": 0.3595, "step": 6673 }, { "epoch": 0.22903225806451613, "grad_norm": 0.7666562926756878, "learning_rate": 8.997196696136452e-06, "loss": 0.3066, "step": 6674 }, { "epoch": 0.2290665751544269, "grad_norm": 0.7102480212472227, "learning_rate": 8.99686281300005e-06, "loss": 0.3418, "step": 6675 }, { "epoch": 0.2291008922443377, "grad_norm": 0.8900272190911288, "learning_rate": 8.996528880487068e-06, "loss": 0.3794, "step": 6676 }, { "epoch": 0.22913520933424847, "grad_norm": 0.7634133506655245, "learning_rate": 8.996194898601632e-06, "loss": 0.3285, "step": 6677 }, { "epoch": 0.22916952642415922, "grad_norm": 0.7837002710059436, "learning_rate": 8.995860867347867e-06, "loss": 0.3722, "step": 6678 }, { "epoch": 0.22920384351407, "grad_norm": 0.7933387014139431, "learning_rate": 8.995526786729899e-06, "loss": 0.3237, "step": 6679 }, { "epoch": 0.22923816060398078, "grad_norm": 0.7684930988101363, "learning_rate": 8.995192656751857e-06, "loss": 0.322, "step": 6680 }, { "epoch": 0.22927247769389156, "grad_norm": 0.797065584873615, "learning_rate": 8.994858477417867e-06, "loss": 0.3478, "step": 6681 }, { "epoch": 0.22930679478380234, "grad_norm": 0.8569221912332549, "learning_rate": 8.994524248732057e-06, "loss": 0.3041, "step": 6682 }, { "epoch": 0.22934111187371312, "grad_norm": 0.7364191183689411, "learning_rate": 8.994189970698559e-06, "loss": 0.2962, "step": 6683 }, { "epoch": 0.2293754289636239, "grad_norm": 0.7903736612404587, "learning_rate": 8.993855643321499e-06, "loss": 0.3264, "step": 6684 }, { "epoch": 0.22940974605353465, "grad_norm": 0.8147873219778439, "learning_rate": 8.99352126660501e-06, "loss": 0.3367, "step": 6685 }, { "epoch": 0.22944406314344543, "grad_norm": 0.7976661527985522, "learning_rate": 8.99318684055322e-06, "loss": 0.3383, "step": 6686 }, { "epoch": 0.2294783802333562, "grad_norm": 0.789934983925065, "learning_rate": 8.992852365170264e-06, "loss": 0.3008, "step": 6687 }, { "epoch": 0.22951269732326698, "grad_norm": 0.8725618374205829, "learning_rate": 8.992517840460274e-06, "loss": 0.327, "step": 6688 }, { "epoch": 0.22954701441317776, "grad_norm": 0.871748061922385, "learning_rate": 8.992183266427377e-06, "loss": 0.2708, "step": 6689 }, { "epoch": 0.22958133150308854, "grad_norm": 0.8264102189755984, "learning_rate": 8.991848643075712e-06, "loss": 0.3073, "step": 6690 }, { "epoch": 0.22961564859299932, "grad_norm": 0.8008220772968239, "learning_rate": 8.99151397040941e-06, "loss": 0.3648, "step": 6691 }, { "epoch": 0.2296499656829101, "grad_norm": 0.86959047540281, "learning_rate": 8.991179248432609e-06, "loss": 0.3558, "step": 6692 }, { "epoch": 0.22968428277282085, "grad_norm": 0.7253688025070645, "learning_rate": 8.99084447714944e-06, "loss": 0.3042, "step": 6693 }, { "epoch": 0.22971859986273163, "grad_norm": 0.7889937218857186, "learning_rate": 8.99050965656404e-06, "loss": 0.3747, "step": 6694 }, { "epoch": 0.2297529169526424, "grad_norm": 0.8179385313535779, "learning_rate": 8.990174786680545e-06, "loss": 0.3415, "step": 6695 }, { "epoch": 0.2297872340425532, "grad_norm": 0.7552839702342872, "learning_rate": 8.989839867503096e-06, "loss": 0.3736, "step": 6696 }, { "epoch": 0.22982155113246397, "grad_norm": 0.8652977408113935, "learning_rate": 8.989504899035824e-06, "loss": 0.3667, "step": 6697 }, { "epoch": 0.22985586822237475, "grad_norm": 0.7587251277111101, "learning_rate": 8.98916988128287e-06, "loss": 0.3402, "step": 6698 }, { "epoch": 0.22989018531228553, "grad_norm": 0.8231123860416797, "learning_rate": 8.988834814248375e-06, "loss": 0.3175, "step": 6699 }, { "epoch": 0.2299245024021963, "grad_norm": 0.8295826801058005, "learning_rate": 8.988499697936476e-06, "loss": 0.3198, "step": 6700 }, { "epoch": 0.22995881949210706, "grad_norm": 0.8138190961177525, "learning_rate": 8.988164532351313e-06, "loss": 0.3143, "step": 6701 }, { "epoch": 0.22999313658201784, "grad_norm": 0.8849557309528864, "learning_rate": 8.987829317497027e-06, "loss": 0.394, "step": 6702 }, { "epoch": 0.23002745367192862, "grad_norm": 0.8580615661008234, "learning_rate": 8.987494053377758e-06, "loss": 0.3225, "step": 6703 }, { "epoch": 0.2300617707618394, "grad_norm": 0.7783622007513766, "learning_rate": 8.98715873999765e-06, "loss": 0.3562, "step": 6704 }, { "epoch": 0.23009608785175018, "grad_norm": 0.8469739820251756, "learning_rate": 8.986823377360845e-06, "loss": 0.3231, "step": 6705 }, { "epoch": 0.23013040494166095, "grad_norm": 0.8298467354974002, "learning_rate": 8.986487965471485e-06, "loss": 0.2839, "step": 6706 }, { "epoch": 0.23016472203157173, "grad_norm": 0.8319601987588428, "learning_rate": 8.986152504333714e-06, "loss": 0.2695, "step": 6707 }, { "epoch": 0.23019903912148248, "grad_norm": 0.7419133024988376, "learning_rate": 8.985816993951674e-06, "loss": 0.299, "step": 6708 }, { "epoch": 0.23023335621139326, "grad_norm": 0.8348336475722761, "learning_rate": 8.985481434329515e-06, "loss": 0.3948, "step": 6709 }, { "epoch": 0.23026767330130404, "grad_norm": 0.7829163326057171, "learning_rate": 8.98514582547138e-06, "loss": 0.3087, "step": 6710 }, { "epoch": 0.23030199039121482, "grad_norm": 0.913612305177125, "learning_rate": 8.984810167381413e-06, "loss": 0.3876, "step": 6711 }, { "epoch": 0.2303363074811256, "grad_norm": 0.7834305727841328, "learning_rate": 8.98447446006376e-06, "loss": 0.3369, "step": 6712 }, { "epoch": 0.23037062457103638, "grad_norm": 0.9006342891696706, "learning_rate": 8.984138703522574e-06, "loss": 0.3761, "step": 6713 }, { "epoch": 0.23040494166094716, "grad_norm": 0.7723874182840127, "learning_rate": 8.983802897761999e-06, "loss": 0.3075, "step": 6714 }, { "epoch": 0.23043925875085794, "grad_norm": 0.8605991724741471, "learning_rate": 8.983467042786183e-06, "loss": 0.3319, "step": 6715 }, { "epoch": 0.2304735758407687, "grad_norm": 0.850183306565034, "learning_rate": 8.983131138599275e-06, "loss": 0.3494, "step": 6716 }, { "epoch": 0.23050789293067947, "grad_norm": 0.8435905077048158, "learning_rate": 8.982795185205427e-06, "loss": 0.3325, "step": 6717 }, { "epoch": 0.23054221002059025, "grad_norm": 0.8614917671703822, "learning_rate": 8.982459182608787e-06, "loss": 0.3239, "step": 6718 }, { "epoch": 0.23057652711050103, "grad_norm": 0.8182814307459672, "learning_rate": 8.982123130813506e-06, "loss": 0.383, "step": 6719 }, { "epoch": 0.2306108442004118, "grad_norm": 0.8356182162446204, "learning_rate": 8.981787029823739e-06, "loss": 0.3274, "step": 6720 }, { "epoch": 0.2306451612903226, "grad_norm": 0.8232922602038741, "learning_rate": 8.981450879643635e-06, "loss": 0.3926, "step": 6721 }, { "epoch": 0.23067947838023337, "grad_norm": 0.86640889196056, "learning_rate": 8.981114680277347e-06, "loss": 0.3324, "step": 6722 }, { "epoch": 0.23071379547014415, "grad_norm": 0.8635392040700686, "learning_rate": 8.98077843172903e-06, "loss": 0.31, "step": 6723 }, { "epoch": 0.2307481125600549, "grad_norm": 0.7151359257739269, "learning_rate": 8.980442134002834e-06, "loss": 0.311, "step": 6724 }, { "epoch": 0.23078242964996568, "grad_norm": 0.854748451026468, "learning_rate": 8.980105787102918e-06, "loss": 0.3231, "step": 6725 }, { "epoch": 0.23081674673987645, "grad_norm": 0.7930465196751711, "learning_rate": 8.979769391033433e-06, "loss": 0.3131, "step": 6726 }, { "epoch": 0.23085106382978723, "grad_norm": 0.7593531391027658, "learning_rate": 8.979432945798539e-06, "loss": 0.3231, "step": 6727 }, { "epoch": 0.230885380919698, "grad_norm": 0.7430730643221126, "learning_rate": 8.97909645140239e-06, "loss": 0.3461, "step": 6728 }, { "epoch": 0.2309196980096088, "grad_norm": 0.8682762985940342, "learning_rate": 8.978759907849144e-06, "loss": 0.366, "step": 6729 }, { "epoch": 0.23095401509951957, "grad_norm": 0.726625848183057, "learning_rate": 8.978423315142958e-06, "loss": 0.3262, "step": 6730 }, { "epoch": 0.23098833218943032, "grad_norm": 0.8154423696047278, "learning_rate": 8.978086673287989e-06, "loss": 0.3446, "step": 6731 }, { "epoch": 0.2310226492793411, "grad_norm": 0.759874988219107, "learning_rate": 8.9777499822884e-06, "loss": 0.3237, "step": 6732 }, { "epoch": 0.23105696636925188, "grad_norm": 0.7798031089796144, "learning_rate": 8.977413242148344e-06, "loss": 0.3377, "step": 6733 }, { "epoch": 0.23109128345916266, "grad_norm": 0.7972085120487945, "learning_rate": 8.977076452871988e-06, "loss": 0.3042, "step": 6734 }, { "epoch": 0.23112560054907344, "grad_norm": 0.7428080791260631, "learning_rate": 8.976739614463487e-06, "loss": 0.337, "step": 6735 }, { "epoch": 0.23115991763898422, "grad_norm": 0.7902097109384102, "learning_rate": 8.976402726927006e-06, "loss": 0.3575, "step": 6736 }, { "epoch": 0.231194234728895, "grad_norm": 0.8576398385914927, "learning_rate": 8.976065790266703e-06, "loss": 0.3187, "step": 6737 }, { "epoch": 0.23122855181880578, "grad_norm": 0.7435868320390198, "learning_rate": 8.975728804486746e-06, "loss": 0.34, "step": 6738 }, { "epoch": 0.23126286890871653, "grad_norm": 0.8357733269385993, "learning_rate": 8.975391769591292e-06, "loss": 0.3238, "step": 6739 }, { "epoch": 0.2312971859986273, "grad_norm": 0.8897782432081162, "learning_rate": 8.97505468558451e-06, "loss": 0.359, "step": 6740 }, { "epoch": 0.2313315030885381, "grad_norm": 0.7921399009401928, "learning_rate": 8.97471755247056e-06, "loss": 0.3173, "step": 6741 }, { "epoch": 0.23136582017844887, "grad_norm": 0.7999154944130216, "learning_rate": 8.97438037025361e-06, "loss": 0.3605, "step": 6742 }, { "epoch": 0.23140013726835965, "grad_norm": 0.8498158887136412, "learning_rate": 8.974043138937823e-06, "loss": 0.308, "step": 6743 }, { "epoch": 0.23143445435827043, "grad_norm": 0.8619826106335511, "learning_rate": 8.973705858527368e-06, "loss": 0.2892, "step": 6744 }, { "epoch": 0.2314687714481812, "grad_norm": 0.6869673114020158, "learning_rate": 8.973368529026408e-06, "loss": 0.2717, "step": 6745 }, { "epoch": 0.23150308853809198, "grad_norm": 0.8066208811605466, "learning_rate": 8.973031150439113e-06, "loss": 0.3759, "step": 6746 }, { "epoch": 0.23153740562800273, "grad_norm": 0.7680230952995496, "learning_rate": 8.97269372276965e-06, "loss": 0.2985, "step": 6747 }, { "epoch": 0.23157172271791351, "grad_norm": 0.7956585174433509, "learning_rate": 8.972356246022189e-06, "loss": 0.3005, "step": 6748 }, { "epoch": 0.2316060398078243, "grad_norm": 0.8428300472847602, "learning_rate": 8.972018720200898e-06, "loss": 0.3294, "step": 6749 }, { "epoch": 0.23164035689773507, "grad_norm": 0.9835791345105264, "learning_rate": 8.971681145309944e-06, "loss": 0.3269, "step": 6750 }, { "epoch": 0.23167467398764585, "grad_norm": 0.8864437597280767, "learning_rate": 8.971343521353503e-06, "loss": 0.315, "step": 6751 }, { "epoch": 0.23170899107755663, "grad_norm": 0.7921551186297058, "learning_rate": 8.971005848335741e-06, "loss": 0.3232, "step": 6752 }, { "epoch": 0.2317433081674674, "grad_norm": 0.8426806887758024, "learning_rate": 8.970668126260831e-06, "loss": 0.3204, "step": 6753 }, { "epoch": 0.23177762525737816, "grad_norm": 0.7731700731606936, "learning_rate": 8.970330355132947e-06, "loss": 0.288, "step": 6754 }, { "epoch": 0.23181194234728894, "grad_norm": 0.7586747540152712, "learning_rate": 8.96999253495626e-06, "loss": 0.285, "step": 6755 }, { "epoch": 0.23184625943719972, "grad_norm": 0.8592652151172473, "learning_rate": 8.969654665734946e-06, "loss": 0.326, "step": 6756 }, { "epoch": 0.2318805765271105, "grad_norm": 0.8260649511138425, "learning_rate": 8.969316747473173e-06, "loss": 0.2898, "step": 6757 }, { "epoch": 0.23191489361702128, "grad_norm": 0.7742036965433373, "learning_rate": 8.968978780175122e-06, "loss": 0.3288, "step": 6758 }, { "epoch": 0.23194921070693206, "grad_norm": 0.8282298989670056, "learning_rate": 8.968640763844964e-06, "loss": 0.3919, "step": 6759 }, { "epoch": 0.23198352779684284, "grad_norm": 0.8690966760246265, "learning_rate": 8.968302698486878e-06, "loss": 0.3665, "step": 6760 }, { "epoch": 0.23201784488675362, "grad_norm": 0.7999846878981891, "learning_rate": 8.967964584105038e-06, "loss": 0.3671, "step": 6761 }, { "epoch": 0.23205216197666437, "grad_norm": 0.6978961262978792, "learning_rate": 8.967626420703621e-06, "loss": 0.2876, "step": 6762 }, { "epoch": 0.23208647906657515, "grad_norm": 0.6558187714004617, "learning_rate": 8.967288208286806e-06, "loss": 0.2806, "step": 6763 }, { "epoch": 0.23212079615648593, "grad_norm": 0.8022351158229581, "learning_rate": 8.966949946858771e-06, "loss": 0.3025, "step": 6764 }, { "epoch": 0.2321551132463967, "grad_norm": 0.7249899350974987, "learning_rate": 8.966611636423694e-06, "loss": 0.2672, "step": 6765 }, { "epoch": 0.23218943033630748, "grad_norm": 0.8125911241694351, "learning_rate": 8.966273276985754e-06, "loss": 0.3088, "step": 6766 }, { "epoch": 0.23222374742621826, "grad_norm": 0.8030434568175864, "learning_rate": 8.965934868549134e-06, "loss": 0.3418, "step": 6767 }, { "epoch": 0.23225806451612904, "grad_norm": 0.7750939376148903, "learning_rate": 8.96559641111801e-06, "loss": 0.3334, "step": 6768 }, { "epoch": 0.23229238160603982, "grad_norm": 0.7754619660929122, "learning_rate": 8.965257904696568e-06, "loss": 0.3807, "step": 6769 }, { "epoch": 0.23232669869595057, "grad_norm": 0.7440697980616332, "learning_rate": 8.964919349288986e-06, "loss": 0.2929, "step": 6770 }, { "epoch": 0.23236101578586135, "grad_norm": 0.7921822129467732, "learning_rate": 8.96458074489945e-06, "loss": 0.3427, "step": 6771 }, { "epoch": 0.23239533287577213, "grad_norm": 0.7771518954363863, "learning_rate": 8.964242091532138e-06, "loss": 0.332, "step": 6772 }, { "epoch": 0.2324296499656829, "grad_norm": 0.8944399511918828, "learning_rate": 8.96390338919124e-06, "loss": 0.3366, "step": 6773 }, { "epoch": 0.2324639670555937, "grad_norm": 0.8206410270842822, "learning_rate": 8.963564637880934e-06, "loss": 0.3117, "step": 6774 }, { "epoch": 0.23249828414550447, "grad_norm": 0.7934265558725905, "learning_rate": 8.96322583760541e-06, "loss": 0.3177, "step": 6775 }, { "epoch": 0.23253260123541525, "grad_norm": 0.8618821311885196, "learning_rate": 8.962886988368852e-06, "loss": 0.3391, "step": 6776 }, { "epoch": 0.232566918325326, "grad_norm": 0.6910129053670443, "learning_rate": 8.962548090175445e-06, "loss": 0.3218, "step": 6777 }, { "epoch": 0.23260123541523678, "grad_norm": 0.7485711690804253, "learning_rate": 8.962209143029375e-06, "loss": 0.2927, "step": 6778 }, { "epoch": 0.23263555250514756, "grad_norm": 0.7649646228500229, "learning_rate": 8.961870146934832e-06, "loss": 0.3399, "step": 6779 }, { "epoch": 0.23266986959505834, "grad_norm": 0.819832661147729, "learning_rate": 8.961531101896002e-06, "loss": 0.3382, "step": 6780 }, { "epoch": 0.23270418668496912, "grad_norm": 0.7554662011434685, "learning_rate": 8.961192007917074e-06, "loss": 0.3272, "step": 6781 }, { "epoch": 0.2327385037748799, "grad_norm": 0.7493532248727477, "learning_rate": 8.960852865002236e-06, "loss": 0.337, "step": 6782 }, { "epoch": 0.23277282086479067, "grad_norm": 0.780338024275008, "learning_rate": 8.96051367315568e-06, "loss": 0.3413, "step": 6783 }, { "epoch": 0.23280713795470145, "grad_norm": 0.747918913613302, "learning_rate": 8.960174432381595e-06, "loss": 0.3076, "step": 6784 }, { "epoch": 0.2328414550446122, "grad_norm": 0.740941831531622, "learning_rate": 8.959835142684173e-06, "loss": 0.3228, "step": 6785 }, { "epoch": 0.23287577213452298, "grad_norm": 0.7671894684549133, "learning_rate": 8.959495804067602e-06, "loss": 0.3883, "step": 6786 }, { "epoch": 0.23291008922443376, "grad_norm": 0.6656337621933955, "learning_rate": 8.959156416536077e-06, "loss": 0.3499, "step": 6787 }, { "epoch": 0.23294440631434454, "grad_norm": 0.7661939373957939, "learning_rate": 8.958816980093792e-06, "loss": 0.3962, "step": 6788 }, { "epoch": 0.23297872340425532, "grad_norm": 0.8236095137224235, "learning_rate": 8.958477494744939e-06, "loss": 0.3243, "step": 6789 }, { "epoch": 0.2330130404941661, "grad_norm": 0.794067708295045, "learning_rate": 8.95813796049371e-06, "loss": 0.3704, "step": 6790 }, { "epoch": 0.23304735758407688, "grad_norm": 0.78261778571273, "learning_rate": 8.957798377344301e-06, "loss": 0.3826, "step": 6791 }, { "epoch": 0.23308167467398763, "grad_norm": 0.7619672599290014, "learning_rate": 8.95745874530091e-06, "loss": 0.2965, "step": 6792 }, { "epoch": 0.2331159917638984, "grad_norm": 0.7812887409617294, "learning_rate": 8.957119064367727e-06, "loss": 0.2806, "step": 6793 }, { "epoch": 0.2331503088538092, "grad_norm": 0.8413233321485535, "learning_rate": 8.956779334548952e-06, "loss": 0.3416, "step": 6794 }, { "epoch": 0.23318462594371997, "grad_norm": 0.8235263795081317, "learning_rate": 8.956439555848785e-06, "loss": 0.3188, "step": 6795 }, { "epoch": 0.23321894303363075, "grad_norm": 0.8480593508566995, "learning_rate": 8.956099728271416e-06, "loss": 0.4028, "step": 6796 }, { "epoch": 0.23325326012354153, "grad_norm": 0.8197227446801935, "learning_rate": 8.95575985182105e-06, "loss": 0.3036, "step": 6797 }, { "epoch": 0.2332875772134523, "grad_norm": 0.7774368818087184, "learning_rate": 8.955419926501881e-06, "loss": 0.2882, "step": 6798 }, { "epoch": 0.2333218943033631, "grad_norm": 0.840699207206129, "learning_rate": 8.95507995231811e-06, "loss": 0.3728, "step": 6799 }, { "epoch": 0.23335621139327384, "grad_norm": 0.7641665461514617, "learning_rate": 8.95473992927394e-06, "loss": 0.3421, "step": 6800 }, { "epoch": 0.23339052848318462, "grad_norm": 0.8429598334398817, "learning_rate": 8.954399857373569e-06, "loss": 0.3299, "step": 6801 }, { "epoch": 0.2334248455730954, "grad_norm": 0.8640323941972802, "learning_rate": 8.954059736621195e-06, "loss": 0.354, "step": 6802 }, { "epoch": 0.23345916266300618, "grad_norm": 0.8131265464616884, "learning_rate": 8.953719567021025e-06, "loss": 0.3194, "step": 6803 }, { "epoch": 0.23349347975291695, "grad_norm": 0.7736499766442794, "learning_rate": 8.953379348577259e-06, "loss": 0.372, "step": 6804 }, { "epoch": 0.23352779684282773, "grad_norm": 0.8220864876034213, "learning_rate": 8.9530390812941e-06, "loss": 0.3094, "step": 6805 }, { "epoch": 0.2335621139327385, "grad_norm": 0.8679702731486352, "learning_rate": 8.952698765175752e-06, "loss": 0.3764, "step": 6806 }, { "epoch": 0.2335964310226493, "grad_norm": 0.7615586978012516, "learning_rate": 8.95235840022642e-06, "loss": 0.2888, "step": 6807 }, { "epoch": 0.23363074811256004, "grad_norm": 0.7456524523499188, "learning_rate": 8.952017986450306e-06, "loss": 0.3303, "step": 6808 }, { "epoch": 0.23366506520247082, "grad_norm": 0.8799267864908962, "learning_rate": 8.951677523851619e-06, "loss": 0.3304, "step": 6809 }, { "epoch": 0.2336993822923816, "grad_norm": 0.7555881168909846, "learning_rate": 8.951337012434564e-06, "loss": 0.337, "step": 6810 }, { "epoch": 0.23373369938229238, "grad_norm": 0.8798364251569898, "learning_rate": 8.950996452203345e-06, "loss": 0.3073, "step": 6811 }, { "epoch": 0.23376801647220316, "grad_norm": 0.9023469966988021, "learning_rate": 8.950655843162172e-06, "loss": 0.301, "step": 6812 }, { "epoch": 0.23380233356211394, "grad_norm": 0.7158998827362983, "learning_rate": 8.950315185315253e-06, "loss": 0.3063, "step": 6813 }, { "epoch": 0.23383665065202472, "grad_norm": 0.9044525406919387, "learning_rate": 8.949974478666795e-06, "loss": 0.2978, "step": 6814 }, { "epoch": 0.23387096774193547, "grad_norm": 0.9201720739822459, "learning_rate": 8.949633723221008e-06, "loss": 0.3246, "step": 6815 }, { "epoch": 0.23390528483184625, "grad_norm": 0.715717621639304, "learning_rate": 8.949292918982102e-06, "loss": 0.3732, "step": 6816 }, { "epoch": 0.23393960192175703, "grad_norm": 1.0054691297088356, "learning_rate": 8.948952065954284e-06, "loss": 0.3772, "step": 6817 }, { "epoch": 0.2339739190116678, "grad_norm": 0.8435566877161687, "learning_rate": 8.948611164141767e-06, "loss": 0.352, "step": 6818 }, { "epoch": 0.2340082361015786, "grad_norm": 0.758838594522675, "learning_rate": 8.948270213548766e-06, "loss": 0.3243, "step": 6819 }, { "epoch": 0.23404255319148937, "grad_norm": 0.8001004147568623, "learning_rate": 8.947929214179488e-06, "loss": 0.3343, "step": 6820 }, { "epoch": 0.23407687028140015, "grad_norm": 0.7720659760489916, "learning_rate": 8.947588166038145e-06, "loss": 0.3001, "step": 6821 }, { "epoch": 0.23411118737131092, "grad_norm": 0.7919903428916631, "learning_rate": 8.947247069128956e-06, "loss": 0.2852, "step": 6822 }, { "epoch": 0.23414550446122168, "grad_norm": 0.8055134394388342, "learning_rate": 8.94690592345613e-06, "loss": 0.3348, "step": 6823 }, { "epoch": 0.23417982155113246, "grad_norm": 0.7673506997136218, "learning_rate": 8.946564729023883e-06, "loss": 0.2825, "step": 6824 }, { "epoch": 0.23421413864104323, "grad_norm": 0.8394956650392326, "learning_rate": 8.946223485836432e-06, "loss": 0.3012, "step": 6825 }, { "epoch": 0.234248455730954, "grad_norm": 0.7039002552661434, "learning_rate": 8.945882193897987e-06, "loss": 0.3491, "step": 6826 }, { "epoch": 0.2342827728208648, "grad_norm": 0.7342051603400452, "learning_rate": 8.945540853212771e-06, "loss": 0.2756, "step": 6827 }, { "epoch": 0.23431708991077557, "grad_norm": 0.7612357506808348, "learning_rate": 8.945199463784997e-06, "loss": 0.3384, "step": 6828 }, { "epoch": 0.23435140700068635, "grad_norm": 0.8165191133627046, "learning_rate": 8.944858025618884e-06, "loss": 0.3114, "step": 6829 }, { "epoch": 0.23438572409059713, "grad_norm": 0.7408450131591413, "learning_rate": 8.944516538718647e-06, "loss": 0.3832, "step": 6830 }, { "epoch": 0.23442004118050788, "grad_norm": 0.8672722698482287, "learning_rate": 8.944175003088509e-06, "loss": 0.3642, "step": 6831 }, { "epoch": 0.23445435827041866, "grad_norm": 0.7719564678851774, "learning_rate": 8.943833418732687e-06, "loss": 0.3728, "step": 6832 }, { "epoch": 0.23448867536032944, "grad_norm": 0.769569557173389, "learning_rate": 8.9434917856554e-06, "loss": 0.331, "step": 6833 }, { "epoch": 0.23452299245024022, "grad_norm": 0.8726152174294853, "learning_rate": 8.943150103860872e-06, "loss": 0.3876, "step": 6834 }, { "epoch": 0.234557309540151, "grad_norm": 0.7715215792634414, "learning_rate": 8.94280837335332e-06, "loss": 0.2835, "step": 6835 }, { "epoch": 0.23459162663006178, "grad_norm": 0.7873212263637521, "learning_rate": 8.942466594136968e-06, "loss": 0.334, "step": 6836 }, { "epoch": 0.23462594371997256, "grad_norm": 0.8137088612121719, "learning_rate": 8.942124766216035e-06, "loss": 0.3211, "step": 6837 }, { "epoch": 0.2346602608098833, "grad_norm": 0.8096029937925846, "learning_rate": 8.94178288959475e-06, "loss": 0.3013, "step": 6838 }, { "epoch": 0.2346945778997941, "grad_norm": 0.7769409233089022, "learning_rate": 8.941440964277332e-06, "loss": 0.2969, "step": 6839 }, { "epoch": 0.23472889498970487, "grad_norm": 0.8664626883100466, "learning_rate": 8.941098990268007e-06, "loss": 0.3537, "step": 6840 }, { "epoch": 0.23476321207961565, "grad_norm": 0.7995384146901454, "learning_rate": 8.940756967570998e-06, "loss": 0.3123, "step": 6841 }, { "epoch": 0.23479752916952643, "grad_norm": 0.8296545424558518, "learning_rate": 8.94041489619053e-06, "loss": 0.3371, "step": 6842 }, { "epoch": 0.2348318462594372, "grad_norm": 0.9030708802737136, "learning_rate": 8.940072776130832e-06, "loss": 0.3771, "step": 6843 }, { "epoch": 0.23486616334934798, "grad_norm": 0.7669973463719071, "learning_rate": 8.93973060739613e-06, "loss": 0.3061, "step": 6844 }, { "epoch": 0.23490048043925876, "grad_norm": 0.6788725887143677, "learning_rate": 8.939388389990645e-06, "loss": 0.3615, "step": 6845 }, { "epoch": 0.23493479752916951, "grad_norm": 0.7967307862515836, "learning_rate": 8.93904612391861e-06, "loss": 0.3264, "step": 6846 }, { "epoch": 0.2349691146190803, "grad_norm": 0.7934775723631223, "learning_rate": 8.938703809184257e-06, "loss": 0.349, "step": 6847 }, { "epoch": 0.23500343170899107, "grad_norm": 0.7064379305448927, "learning_rate": 8.938361445791808e-06, "loss": 0.2532, "step": 6848 }, { "epoch": 0.23503774879890185, "grad_norm": 0.7957982522910434, "learning_rate": 8.938019033745495e-06, "loss": 0.2784, "step": 6849 }, { "epoch": 0.23507206588881263, "grad_norm": 0.713058875736685, "learning_rate": 8.937676573049548e-06, "loss": 0.3133, "step": 6850 }, { "epoch": 0.2351063829787234, "grad_norm": 0.8700973648060825, "learning_rate": 8.937334063708197e-06, "loss": 0.3042, "step": 6851 }, { "epoch": 0.2351407000686342, "grad_norm": 0.8200565749414523, "learning_rate": 8.936991505725674e-06, "loss": 0.3563, "step": 6852 }, { "epoch": 0.23517501715854497, "grad_norm": 0.911193767322038, "learning_rate": 8.936648899106213e-06, "loss": 0.4191, "step": 6853 }, { "epoch": 0.23520933424845572, "grad_norm": 0.8209315433364479, "learning_rate": 8.936306243854042e-06, "loss": 0.4188, "step": 6854 }, { "epoch": 0.2352436513383665, "grad_norm": 0.8599225937773224, "learning_rate": 8.9359635399734e-06, "loss": 0.3437, "step": 6855 }, { "epoch": 0.23527796842827728, "grad_norm": 0.8761367284082586, "learning_rate": 8.935620787468516e-06, "loss": 0.2922, "step": 6856 }, { "epoch": 0.23531228551818806, "grad_norm": 0.8461773005624917, "learning_rate": 8.935277986343624e-06, "loss": 0.3432, "step": 6857 }, { "epoch": 0.23534660260809884, "grad_norm": 0.8413486572481547, "learning_rate": 8.934935136602962e-06, "loss": 0.2995, "step": 6858 }, { "epoch": 0.23538091969800962, "grad_norm": 0.8145538176849246, "learning_rate": 8.934592238250763e-06, "loss": 0.3134, "step": 6859 }, { "epoch": 0.2354152367879204, "grad_norm": 0.7968317313712118, "learning_rate": 8.934249291291265e-06, "loss": 0.3349, "step": 6860 }, { "epoch": 0.23544955387783115, "grad_norm": 0.7549412660167414, "learning_rate": 8.933906295728705e-06, "loss": 0.3035, "step": 6861 }, { "epoch": 0.23548387096774193, "grad_norm": 1.0124202581358634, "learning_rate": 8.933563251567319e-06, "loss": 0.3353, "step": 6862 }, { "epoch": 0.2355181880576527, "grad_norm": 0.8388790588930883, "learning_rate": 8.933220158811345e-06, "loss": 0.3054, "step": 6863 }, { "epoch": 0.23555250514756348, "grad_norm": 0.823045075514075, "learning_rate": 8.932877017465019e-06, "loss": 0.3683, "step": 6864 }, { "epoch": 0.23558682223747426, "grad_norm": 0.8171645416439408, "learning_rate": 8.932533827532585e-06, "loss": 0.3214, "step": 6865 }, { "epoch": 0.23562113932738504, "grad_norm": 0.7288262018664682, "learning_rate": 8.93219058901828e-06, "loss": 0.3258, "step": 6866 }, { "epoch": 0.23565545641729582, "grad_norm": 0.8173192728850929, "learning_rate": 8.931847301926344e-06, "loss": 0.3373, "step": 6867 }, { "epoch": 0.2356897735072066, "grad_norm": 0.7196857433233838, "learning_rate": 8.931503966261021e-06, "loss": 0.3678, "step": 6868 }, { "epoch": 0.23572409059711735, "grad_norm": 0.9818358261504806, "learning_rate": 8.931160582026547e-06, "loss": 0.3236, "step": 6869 }, { "epoch": 0.23575840768702813, "grad_norm": 0.7691650580941194, "learning_rate": 8.93081714922717e-06, "loss": 0.3734, "step": 6870 }, { "epoch": 0.2357927247769389, "grad_norm": 0.9128695587669556, "learning_rate": 8.93047366786713e-06, "loss": 0.369, "step": 6871 }, { "epoch": 0.2358270418668497, "grad_norm": 0.7665342981635478, "learning_rate": 8.930130137950667e-06, "loss": 0.3157, "step": 6872 }, { "epoch": 0.23586135895676047, "grad_norm": 0.7967829885963932, "learning_rate": 8.929786559482032e-06, "loss": 0.3084, "step": 6873 }, { "epoch": 0.23589567604667125, "grad_norm": 0.7959049458869247, "learning_rate": 8.929442932465464e-06, "loss": 0.3506, "step": 6874 }, { "epoch": 0.23592999313658203, "grad_norm": 0.7926466751555838, "learning_rate": 8.929099256905211e-06, "loss": 0.3819, "step": 6875 }, { "epoch": 0.2359643102264928, "grad_norm": 0.7659815770899566, "learning_rate": 8.928755532805515e-06, "loss": 0.2931, "step": 6876 }, { "epoch": 0.23599862731640356, "grad_norm": 0.772636323052561, "learning_rate": 8.928411760170627e-06, "loss": 0.3485, "step": 6877 }, { "epoch": 0.23603294440631434, "grad_norm": 0.836507071152659, "learning_rate": 8.928067939004792e-06, "loss": 0.3553, "step": 6878 }, { "epoch": 0.23606726149622512, "grad_norm": 0.8924117589214827, "learning_rate": 8.927724069312255e-06, "loss": 0.2896, "step": 6879 }, { "epoch": 0.2361015785861359, "grad_norm": 0.7590743632031132, "learning_rate": 8.92738015109727e-06, "loss": 0.3329, "step": 6880 }, { "epoch": 0.23613589567604668, "grad_norm": 0.9032843812064678, "learning_rate": 8.92703618436408e-06, "loss": 0.3485, "step": 6881 }, { "epoch": 0.23617021276595745, "grad_norm": 0.8106219233788795, "learning_rate": 8.926692169116937e-06, "loss": 0.3328, "step": 6882 }, { "epoch": 0.23620452985586823, "grad_norm": 0.7065493004540343, "learning_rate": 8.92634810536009e-06, "loss": 0.2841, "step": 6883 }, { "epoch": 0.23623884694577899, "grad_norm": 0.7389528806796525, "learning_rate": 8.92600399309779e-06, "loss": 0.3072, "step": 6884 }, { "epoch": 0.23627316403568976, "grad_norm": 0.8223166339682898, "learning_rate": 8.925659832334287e-06, "loss": 0.2892, "step": 6885 }, { "epoch": 0.23630748112560054, "grad_norm": 0.804733969642366, "learning_rate": 8.925315623073835e-06, "loss": 0.3581, "step": 6886 }, { "epoch": 0.23634179821551132, "grad_norm": 0.7880208737985928, "learning_rate": 8.924971365320684e-06, "loss": 0.3735, "step": 6887 }, { "epoch": 0.2363761153054221, "grad_norm": 0.8398836856333822, "learning_rate": 8.924627059079088e-06, "loss": 0.3175, "step": 6888 }, { "epoch": 0.23641043239533288, "grad_norm": 0.8204216621074129, "learning_rate": 8.9242827043533e-06, "loss": 0.2756, "step": 6889 }, { "epoch": 0.23644474948524366, "grad_norm": 0.8475066134829741, "learning_rate": 8.923938301147575e-06, "loss": 0.304, "step": 6890 }, { "epoch": 0.23647906657515444, "grad_norm": 0.7605482591552177, "learning_rate": 8.923593849466168e-06, "loss": 0.3637, "step": 6891 }, { "epoch": 0.2365133836650652, "grad_norm": 0.7448248297516027, "learning_rate": 8.923249349313332e-06, "loss": 0.3429, "step": 6892 }, { "epoch": 0.23654770075497597, "grad_norm": 0.7212859462112869, "learning_rate": 8.922904800693326e-06, "loss": 0.2914, "step": 6893 }, { "epoch": 0.23658201784488675, "grad_norm": 0.788539517407865, "learning_rate": 8.922560203610402e-06, "loss": 0.3281, "step": 6894 }, { "epoch": 0.23661633493479753, "grad_norm": 0.7620379664601367, "learning_rate": 8.922215558068823e-06, "loss": 0.3145, "step": 6895 }, { "epoch": 0.2366506520247083, "grad_norm": 0.7500820451483194, "learning_rate": 8.921870864072841e-06, "loss": 0.3217, "step": 6896 }, { "epoch": 0.2366849691146191, "grad_norm": 0.7421362449541651, "learning_rate": 8.92152612162672e-06, "loss": 0.3476, "step": 6897 }, { "epoch": 0.23671928620452987, "grad_norm": 0.7843428912191293, "learning_rate": 8.921181330734714e-06, "loss": 0.3433, "step": 6898 }, { "epoch": 0.23675360329444062, "grad_norm": 0.7711056414924781, "learning_rate": 8.920836491401083e-06, "loss": 0.3538, "step": 6899 }, { "epoch": 0.2367879203843514, "grad_norm": 0.8955537587825814, "learning_rate": 8.92049160363009e-06, "loss": 0.3705, "step": 6900 }, { "epoch": 0.23682223747426218, "grad_norm": 0.8004831096661155, "learning_rate": 8.920146667425994e-06, "loss": 0.3149, "step": 6901 }, { "epoch": 0.23685655456417296, "grad_norm": 0.7909560025434959, "learning_rate": 8.919801682793057e-06, "loss": 0.3259, "step": 6902 }, { "epoch": 0.23689087165408373, "grad_norm": 0.8291024660019312, "learning_rate": 8.919456649735541e-06, "loss": 0.3681, "step": 6903 }, { "epoch": 0.2369251887439945, "grad_norm": 0.8548895968573865, "learning_rate": 8.919111568257705e-06, "loss": 0.3236, "step": 6904 }, { "epoch": 0.2369595058339053, "grad_norm": 0.8735345568459211, "learning_rate": 8.918766438363816e-06, "loss": 0.3604, "step": 6905 }, { "epoch": 0.23699382292381607, "grad_norm": 0.7635085223091683, "learning_rate": 8.918421260058136e-06, "loss": 0.3081, "step": 6906 }, { "epoch": 0.23702814001372682, "grad_norm": 0.7918158935203244, "learning_rate": 8.91807603334493e-06, "loss": 0.3521, "step": 6907 }, { "epoch": 0.2370624571036376, "grad_norm": 0.7780611461766681, "learning_rate": 8.917730758228464e-06, "loss": 0.2957, "step": 6908 }, { "epoch": 0.23709677419354838, "grad_norm": 0.8629920422063146, "learning_rate": 8.917385434713002e-06, "loss": 0.3343, "step": 6909 }, { "epoch": 0.23713109128345916, "grad_norm": 0.7112470152607926, "learning_rate": 8.917040062802808e-06, "loss": 0.2964, "step": 6910 }, { "epoch": 0.23716540837336994, "grad_norm": 0.7545689406404823, "learning_rate": 8.916694642502152e-06, "loss": 0.3189, "step": 6911 }, { "epoch": 0.23719972546328072, "grad_norm": 0.7283986578047996, "learning_rate": 8.916349173815301e-06, "loss": 0.2893, "step": 6912 }, { "epoch": 0.2372340425531915, "grad_norm": 0.7495942512239337, "learning_rate": 8.916003656746522e-06, "loss": 0.3111, "step": 6913 }, { "epoch": 0.23726835964310228, "grad_norm": 0.8394836413367748, "learning_rate": 8.915658091300082e-06, "loss": 0.2964, "step": 6914 }, { "epoch": 0.23730267673301303, "grad_norm": 0.833145873492407, "learning_rate": 8.915312477480252e-06, "loss": 0.3434, "step": 6915 }, { "epoch": 0.2373369938229238, "grad_norm": 0.9030463661069003, "learning_rate": 8.914966815291301e-06, "loss": 0.366, "step": 6916 }, { "epoch": 0.2373713109128346, "grad_norm": 0.7890370385956326, "learning_rate": 8.9146211047375e-06, "loss": 0.3222, "step": 6917 }, { "epoch": 0.23740562800274537, "grad_norm": 0.8178739212776606, "learning_rate": 8.91427534582312e-06, "loss": 0.3479, "step": 6918 }, { "epoch": 0.23743994509265615, "grad_norm": 0.77044109749034, "learning_rate": 8.91392953855243e-06, "loss": 0.3377, "step": 6919 }, { "epoch": 0.23747426218256693, "grad_norm": 0.8908768506966965, "learning_rate": 8.913583682929705e-06, "loss": 0.3357, "step": 6920 }, { "epoch": 0.2375085792724777, "grad_norm": 0.7767787024933074, "learning_rate": 8.913237778959216e-06, "loss": 0.3235, "step": 6921 }, { "epoch": 0.23754289636238846, "grad_norm": 0.7641842152274813, "learning_rate": 8.912891826645236e-06, "loss": 0.3778, "step": 6922 }, { "epoch": 0.23757721345229924, "grad_norm": 0.9256583155110873, "learning_rate": 8.91254582599204e-06, "loss": 0.3235, "step": 6923 }, { "epoch": 0.23761153054221001, "grad_norm": 0.8431572732120508, "learning_rate": 8.912199777003902e-06, "loss": 0.3153, "step": 6924 }, { "epoch": 0.2376458476321208, "grad_norm": 0.7997588301011417, "learning_rate": 8.911853679685097e-06, "loss": 0.2994, "step": 6925 }, { "epoch": 0.23768016472203157, "grad_norm": 0.8289193848538805, "learning_rate": 8.911507534039901e-06, "loss": 0.3243, "step": 6926 }, { "epoch": 0.23771448181194235, "grad_norm": 0.7867926455548819, "learning_rate": 8.911161340072588e-06, "loss": 0.3215, "step": 6927 }, { "epoch": 0.23774879890185313, "grad_norm": 0.7707505699624364, "learning_rate": 8.910815097787437e-06, "loss": 0.3244, "step": 6928 }, { "epoch": 0.2377831159917639, "grad_norm": 0.7744197769811635, "learning_rate": 8.910468807188727e-06, "loss": 0.3053, "step": 6929 }, { "epoch": 0.23781743308167466, "grad_norm": 0.769443727595027, "learning_rate": 8.910122468280733e-06, "loss": 0.3661, "step": 6930 }, { "epoch": 0.23785175017158544, "grad_norm": 0.8761952283262497, "learning_rate": 8.909776081067734e-06, "loss": 0.3087, "step": 6931 }, { "epoch": 0.23788606726149622, "grad_norm": 0.9283219517667709, "learning_rate": 8.90942964555401e-06, "loss": 0.374, "step": 6932 }, { "epoch": 0.237920384351407, "grad_norm": 0.7438280982351492, "learning_rate": 8.90908316174384e-06, "loss": 0.2953, "step": 6933 }, { "epoch": 0.23795470144131778, "grad_norm": 0.9074706599438391, "learning_rate": 8.908736629641508e-06, "loss": 0.3662, "step": 6934 }, { "epoch": 0.23798901853122856, "grad_norm": 0.831417611470327, "learning_rate": 8.90839004925129e-06, "loss": 0.3682, "step": 6935 }, { "epoch": 0.23802333562113934, "grad_norm": 0.7800140557394672, "learning_rate": 8.90804342057747e-06, "loss": 0.3155, "step": 6936 }, { "epoch": 0.23805765271105012, "grad_norm": 0.9977876606264416, "learning_rate": 8.90769674362433e-06, "loss": 0.3109, "step": 6937 }, { "epoch": 0.23809196980096087, "grad_norm": 0.8845284961541682, "learning_rate": 8.907350018396153e-06, "loss": 0.3666, "step": 6938 }, { "epoch": 0.23812628689087165, "grad_norm": 0.812316900880827, "learning_rate": 8.907003244897221e-06, "loss": 0.3143, "step": 6939 }, { "epoch": 0.23816060398078243, "grad_norm": 0.7839448808860924, "learning_rate": 8.90665642313182e-06, "loss": 0.3575, "step": 6940 }, { "epoch": 0.2381949210706932, "grad_norm": 0.8487673520814282, "learning_rate": 8.906309553104233e-06, "loss": 0.3557, "step": 6941 }, { "epoch": 0.23822923816060398, "grad_norm": 0.7119465203159216, "learning_rate": 8.905962634818746e-06, "loss": 0.3217, "step": 6942 }, { "epoch": 0.23826355525051476, "grad_norm": 0.7566213564841715, "learning_rate": 8.905615668279643e-06, "loss": 0.3247, "step": 6943 }, { "epoch": 0.23829787234042554, "grad_norm": 0.7957095820117717, "learning_rate": 8.905268653491215e-06, "loss": 0.3699, "step": 6944 }, { "epoch": 0.2383321894303363, "grad_norm": 0.8191551970841275, "learning_rate": 8.904921590457744e-06, "loss": 0.3471, "step": 6945 }, { "epoch": 0.23836650652024707, "grad_norm": 0.8416162211140884, "learning_rate": 8.90457447918352e-06, "loss": 0.302, "step": 6946 }, { "epoch": 0.23840082361015785, "grad_norm": 0.794092606647606, "learning_rate": 8.90422731967283e-06, "loss": 0.2843, "step": 6947 }, { "epoch": 0.23843514070006863, "grad_norm": 0.7949633818732063, "learning_rate": 8.903880111929964e-06, "loss": 0.3392, "step": 6948 }, { "epoch": 0.2384694577899794, "grad_norm": 0.842901438538335, "learning_rate": 8.90353285595921e-06, "loss": 0.3507, "step": 6949 }, { "epoch": 0.2385037748798902, "grad_norm": 0.7710133593360948, "learning_rate": 8.903185551764858e-06, "loss": 0.3496, "step": 6950 }, { "epoch": 0.23853809196980097, "grad_norm": 0.8376405316114448, "learning_rate": 8.902838199351201e-06, "loss": 0.3056, "step": 6951 }, { "epoch": 0.23857240905971175, "grad_norm": 0.7738346850581042, "learning_rate": 8.902490798722527e-06, "loss": 0.2969, "step": 6952 }, { "epoch": 0.2386067261496225, "grad_norm": 0.8557551990248055, "learning_rate": 8.90214334988313e-06, "loss": 0.3295, "step": 6953 }, { "epoch": 0.23864104323953328, "grad_norm": 0.7934159154534802, "learning_rate": 8.901795852837301e-06, "loss": 0.3145, "step": 6954 }, { "epoch": 0.23867536032944406, "grad_norm": 0.804258350561957, "learning_rate": 8.901448307589333e-06, "loss": 0.2716, "step": 6955 }, { "epoch": 0.23870967741935484, "grad_norm": 0.9068169422105616, "learning_rate": 8.90110071414352e-06, "loss": 0.3162, "step": 6956 }, { "epoch": 0.23874399450926562, "grad_norm": 0.871771056402504, "learning_rate": 8.900753072504156e-06, "loss": 0.311, "step": 6957 }, { "epoch": 0.2387783115991764, "grad_norm": 0.6937119508191405, "learning_rate": 8.900405382675537e-06, "loss": 0.2821, "step": 6958 }, { "epoch": 0.23881262868908718, "grad_norm": 0.7210608568510545, "learning_rate": 8.900057644661955e-06, "loss": 0.3352, "step": 6959 }, { "epoch": 0.23884694577899795, "grad_norm": 0.7237144734006988, "learning_rate": 8.89970985846771e-06, "loss": 0.3087, "step": 6960 }, { "epoch": 0.2388812628689087, "grad_norm": 0.7468918324294794, "learning_rate": 8.899362024097094e-06, "loss": 0.2614, "step": 6961 }, { "epoch": 0.23891557995881949, "grad_norm": 0.8702328053425792, "learning_rate": 8.899014141554407e-06, "loss": 0.3686, "step": 6962 }, { "epoch": 0.23894989704873026, "grad_norm": 0.8802758639198882, "learning_rate": 8.898666210843946e-06, "loss": 0.325, "step": 6963 }, { "epoch": 0.23898421413864104, "grad_norm": 0.7766390260334488, "learning_rate": 8.898318231970008e-06, "loss": 0.3655, "step": 6964 }, { "epoch": 0.23901853122855182, "grad_norm": 0.6913248199375198, "learning_rate": 8.897970204936897e-06, "loss": 0.3306, "step": 6965 }, { "epoch": 0.2390528483184626, "grad_norm": 0.7945825887543233, "learning_rate": 8.897622129748906e-06, "loss": 0.3033, "step": 6966 }, { "epoch": 0.23908716540837338, "grad_norm": 0.7617465833728, "learning_rate": 8.897274006410338e-06, "loss": 0.3226, "step": 6967 }, { "epoch": 0.23912148249828413, "grad_norm": 0.8114567704422216, "learning_rate": 8.896925834925493e-06, "loss": 0.3968, "step": 6968 }, { "epoch": 0.2391557995881949, "grad_norm": 0.7783716219576889, "learning_rate": 8.896577615298674e-06, "loss": 0.309, "step": 6969 }, { "epoch": 0.2391901166781057, "grad_norm": 0.7957696133586872, "learning_rate": 8.896229347534182e-06, "loss": 0.3456, "step": 6970 }, { "epoch": 0.23922443376801647, "grad_norm": 0.854088610645964, "learning_rate": 8.895881031636317e-06, "loss": 0.3559, "step": 6971 }, { "epoch": 0.23925875085792725, "grad_norm": 0.8478699956524607, "learning_rate": 8.895532667609386e-06, "loss": 0.3338, "step": 6972 }, { "epoch": 0.23929306794783803, "grad_norm": 0.8428463313355131, "learning_rate": 8.895184255457688e-06, "loss": 0.2858, "step": 6973 }, { "epoch": 0.2393273850377488, "grad_norm": 0.7682449598594577, "learning_rate": 8.894835795185533e-06, "loss": 0.3211, "step": 6974 }, { "epoch": 0.2393617021276596, "grad_norm": 0.8368884640249883, "learning_rate": 8.89448728679722e-06, "loss": 0.3071, "step": 6975 }, { "epoch": 0.23939601921757034, "grad_norm": 0.7382019615041758, "learning_rate": 8.894138730297058e-06, "loss": 0.2836, "step": 6976 }, { "epoch": 0.23943033630748112, "grad_norm": 0.8594144371170206, "learning_rate": 8.893790125689353e-06, "loss": 0.3616, "step": 6977 }, { "epoch": 0.2394646533973919, "grad_norm": 0.9485908402068757, "learning_rate": 8.89344147297841e-06, "loss": 0.3931, "step": 6978 }, { "epoch": 0.23949897048730268, "grad_norm": 0.7507340980287277, "learning_rate": 8.893092772168536e-06, "loss": 0.3387, "step": 6979 }, { "epoch": 0.23953328757721346, "grad_norm": 0.7249473885723272, "learning_rate": 8.892744023264042e-06, "loss": 0.3054, "step": 6980 }, { "epoch": 0.23956760466712423, "grad_norm": 0.8893412746615967, "learning_rate": 8.892395226269232e-06, "loss": 0.3625, "step": 6981 }, { "epoch": 0.239601921757035, "grad_norm": 0.7922084352139331, "learning_rate": 8.892046381188418e-06, "loss": 0.3339, "step": 6982 }, { "epoch": 0.2396362388469458, "grad_norm": 0.8381859141566533, "learning_rate": 8.891697488025908e-06, "loss": 0.3797, "step": 6983 }, { "epoch": 0.23967055593685654, "grad_norm": 0.7955702542421199, "learning_rate": 8.891348546786014e-06, "loss": 0.2926, "step": 6984 }, { "epoch": 0.23970487302676732, "grad_norm": 1.008650862759439, "learning_rate": 8.890999557473044e-06, "loss": 0.3378, "step": 6985 }, { "epoch": 0.2397391901166781, "grad_norm": 0.747397153766693, "learning_rate": 8.890650520091312e-06, "loss": 0.3654, "step": 6986 }, { "epoch": 0.23977350720658888, "grad_norm": 0.8466943285660385, "learning_rate": 8.89030143464513e-06, "loss": 0.3465, "step": 6987 }, { "epoch": 0.23980782429649966, "grad_norm": 0.8633129616612092, "learning_rate": 8.889952301138807e-06, "loss": 0.3115, "step": 6988 }, { "epoch": 0.23984214138641044, "grad_norm": 0.8601539269772861, "learning_rate": 8.88960311957666e-06, "loss": 0.3185, "step": 6989 }, { "epoch": 0.23987645847632122, "grad_norm": 0.8294185981175056, "learning_rate": 8.889253889963e-06, "loss": 0.3331, "step": 6990 }, { "epoch": 0.23991077556623197, "grad_norm": 0.7118303608032678, "learning_rate": 8.888904612302145e-06, "loss": 0.2916, "step": 6991 }, { "epoch": 0.23994509265614275, "grad_norm": 0.8416951835072324, "learning_rate": 8.888555286598406e-06, "loss": 0.3097, "step": 6992 }, { "epoch": 0.23997940974605353, "grad_norm": 0.7741538772205874, "learning_rate": 8.8882059128561e-06, "loss": 0.2888, "step": 6993 }, { "epoch": 0.2400137268359643, "grad_norm": 0.8297081941770182, "learning_rate": 8.887856491079543e-06, "loss": 0.307, "step": 6994 }, { "epoch": 0.2400480439258751, "grad_norm": 0.7611320415977265, "learning_rate": 8.887507021273051e-06, "loss": 0.2971, "step": 6995 }, { "epoch": 0.24008236101578587, "grad_norm": 0.7127174201544376, "learning_rate": 8.887157503440943e-06, "loss": 0.3135, "step": 6996 }, { "epoch": 0.24011667810569665, "grad_norm": 0.9051627314943955, "learning_rate": 8.886807937587539e-06, "loss": 0.3054, "step": 6997 }, { "epoch": 0.24015099519560743, "grad_norm": 0.7368846209526613, "learning_rate": 8.886458323717152e-06, "loss": 0.3058, "step": 6998 }, { "epoch": 0.24018531228551818, "grad_norm": 0.8391892770026903, "learning_rate": 8.886108661834102e-06, "loss": 0.354, "step": 6999 }, { "epoch": 0.24021962937542896, "grad_norm": 0.7209191717727235, "learning_rate": 8.885758951942712e-06, "loss": 0.324, "step": 7000 }, { "epoch": 0.24025394646533973, "grad_norm": 0.7530773938506652, "learning_rate": 8.8854091940473e-06, "loss": 0.3589, "step": 7001 }, { "epoch": 0.24028826355525051, "grad_norm": 0.9158441828633284, "learning_rate": 8.88505938815219e-06, "loss": 0.316, "step": 7002 }, { "epoch": 0.2403225806451613, "grad_norm": 0.7680681679451116, "learning_rate": 8.884709534261696e-06, "loss": 0.3003, "step": 7003 }, { "epoch": 0.24035689773507207, "grad_norm": 0.7621844067915766, "learning_rate": 8.884359632380149e-06, "loss": 0.3314, "step": 7004 }, { "epoch": 0.24039121482498285, "grad_norm": 0.7465320129380653, "learning_rate": 8.884009682511866e-06, "loss": 0.3249, "step": 7005 }, { "epoch": 0.2404255319148936, "grad_norm": 0.6841352332066174, "learning_rate": 8.88365968466117e-06, "loss": 0.3122, "step": 7006 }, { "epoch": 0.24045984900480438, "grad_norm": 0.7755776342215903, "learning_rate": 8.883309638832387e-06, "loss": 0.3658, "step": 7007 }, { "epoch": 0.24049416609471516, "grad_norm": 0.7187701836916346, "learning_rate": 8.882959545029842e-06, "loss": 0.3068, "step": 7008 }, { "epoch": 0.24052848318462594, "grad_norm": 0.867773001855278, "learning_rate": 8.88260940325786e-06, "loss": 0.3347, "step": 7009 }, { "epoch": 0.24056280027453672, "grad_norm": 0.7862738887539674, "learning_rate": 8.882259213520762e-06, "loss": 0.3619, "step": 7010 }, { "epoch": 0.2405971173644475, "grad_norm": 0.7451890950621147, "learning_rate": 8.881908975822881e-06, "loss": 0.3238, "step": 7011 }, { "epoch": 0.24063143445435828, "grad_norm": 0.8688610925071337, "learning_rate": 8.881558690168539e-06, "loss": 0.3402, "step": 7012 }, { "epoch": 0.24066575154426906, "grad_norm": 0.8246334779010603, "learning_rate": 8.881208356562066e-06, "loss": 0.2829, "step": 7013 }, { "epoch": 0.2407000686341798, "grad_norm": 0.7550225714921396, "learning_rate": 8.880857975007786e-06, "loss": 0.2973, "step": 7014 }, { "epoch": 0.2407343857240906, "grad_norm": 0.7677217680202254, "learning_rate": 8.880507545510035e-06, "loss": 0.3193, "step": 7015 }, { "epoch": 0.24076870281400137, "grad_norm": 0.8280205394724377, "learning_rate": 8.880157068073133e-06, "loss": 0.2713, "step": 7016 }, { "epoch": 0.24080301990391215, "grad_norm": 0.7838708092280835, "learning_rate": 8.879806542701416e-06, "loss": 0.324, "step": 7017 }, { "epoch": 0.24083733699382293, "grad_norm": 0.7450753100834919, "learning_rate": 8.879455969399214e-06, "loss": 0.4003, "step": 7018 }, { "epoch": 0.2408716540837337, "grad_norm": 0.7514102663852537, "learning_rate": 8.879105348170855e-06, "loss": 0.322, "step": 7019 }, { "epoch": 0.24090597117364448, "grad_norm": 0.8111613488437908, "learning_rate": 8.878754679020672e-06, "loss": 0.3342, "step": 7020 }, { "epoch": 0.24094028826355526, "grad_norm": 0.7749099201369193, "learning_rate": 8.878403961952997e-06, "loss": 0.3208, "step": 7021 }, { "epoch": 0.24097460535346601, "grad_norm": 0.7717875967323601, "learning_rate": 8.878053196972162e-06, "loss": 0.3382, "step": 7022 }, { "epoch": 0.2410089224433768, "grad_norm": 0.8570423196479877, "learning_rate": 8.877702384082504e-06, "loss": 0.3052, "step": 7023 }, { "epoch": 0.24104323953328757, "grad_norm": 0.68043348126792, "learning_rate": 8.877351523288352e-06, "loss": 0.268, "step": 7024 }, { "epoch": 0.24107755662319835, "grad_norm": 0.7235860441019032, "learning_rate": 8.877000614594042e-06, "loss": 0.2997, "step": 7025 }, { "epoch": 0.24111187371310913, "grad_norm": 0.8008495429859028, "learning_rate": 8.876649658003911e-06, "loss": 0.3407, "step": 7026 }, { "epoch": 0.2411461908030199, "grad_norm": 0.8020190346511192, "learning_rate": 8.876298653522293e-06, "loss": 0.3022, "step": 7027 }, { "epoch": 0.2411805078929307, "grad_norm": 0.6921389248360976, "learning_rate": 8.875947601153524e-06, "loss": 0.3109, "step": 7028 }, { "epoch": 0.24121482498284144, "grad_norm": 0.7899347663289368, "learning_rate": 8.875596500901942e-06, "loss": 0.3791, "step": 7029 }, { "epoch": 0.24124914207275222, "grad_norm": 0.7967249196151953, "learning_rate": 8.875245352771881e-06, "loss": 0.3655, "step": 7030 }, { "epoch": 0.241283459162663, "grad_norm": 0.8241228354582381, "learning_rate": 8.874894156767686e-06, "loss": 0.3633, "step": 7031 }, { "epoch": 0.24131777625257378, "grad_norm": 0.7868956647965538, "learning_rate": 8.874542912893689e-06, "loss": 0.3492, "step": 7032 }, { "epoch": 0.24135209334248456, "grad_norm": 0.8341536556126988, "learning_rate": 8.874191621154232e-06, "loss": 0.3305, "step": 7033 }, { "epoch": 0.24138641043239534, "grad_norm": 0.7839804303412081, "learning_rate": 8.873840281553654e-06, "loss": 0.3546, "step": 7034 }, { "epoch": 0.24142072752230612, "grad_norm": 0.8330796736040458, "learning_rate": 8.873488894096296e-06, "loss": 0.3521, "step": 7035 }, { "epoch": 0.2414550446122169, "grad_norm": 0.8359623616336369, "learning_rate": 8.873137458786498e-06, "loss": 0.3203, "step": 7036 }, { "epoch": 0.24148936170212765, "grad_norm": 0.8203533568125639, "learning_rate": 8.872785975628603e-06, "loss": 0.3368, "step": 7037 }, { "epoch": 0.24152367879203843, "grad_norm": 1.1621019946378475, "learning_rate": 8.872434444626953e-06, "loss": 0.3012, "step": 7038 }, { "epoch": 0.2415579958819492, "grad_norm": 0.8044674940343138, "learning_rate": 8.87208286578589e-06, "loss": 0.3581, "step": 7039 }, { "epoch": 0.24159231297185998, "grad_norm": 0.8683443800614389, "learning_rate": 8.871731239109757e-06, "loss": 0.2945, "step": 7040 }, { "epoch": 0.24162663006177076, "grad_norm": 0.7143951001165414, "learning_rate": 8.871379564602898e-06, "loss": 0.3097, "step": 7041 }, { "epoch": 0.24166094715168154, "grad_norm": 0.72445020957734, "learning_rate": 8.87102784226966e-06, "loss": 0.3701, "step": 7042 }, { "epoch": 0.24169526424159232, "grad_norm": 0.8676804761038619, "learning_rate": 8.870676072114385e-06, "loss": 0.3156, "step": 7043 }, { "epoch": 0.2417295813315031, "grad_norm": 1.1788305609873526, "learning_rate": 8.87032425414142e-06, "loss": 0.3324, "step": 7044 }, { "epoch": 0.24176389842141385, "grad_norm": 0.963337265677573, "learning_rate": 8.869972388355111e-06, "loss": 0.3004, "step": 7045 }, { "epoch": 0.24179821551132463, "grad_norm": 0.8602548905965844, "learning_rate": 8.869620474759806e-06, "loss": 0.3435, "step": 7046 }, { "epoch": 0.2418325326012354, "grad_norm": 0.783329718472297, "learning_rate": 8.869268513359851e-06, "loss": 0.2718, "step": 7047 }, { "epoch": 0.2418668496911462, "grad_norm": 0.9162601073831961, "learning_rate": 8.868916504159595e-06, "loss": 0.3296, "step": 7048 }, { "epoch": 0.24190116678105697, "grad_norm": 0.822682735024009, "learning_rate": 8.868564447163385e-06, "loss": 0.2891, "step": 7049 }, { "epoch": 0.24193548387096775, "grad_norm": 0.8007544541572416, "learning_rate": 8.868212342375573e-06, "loss": 0.334, "step": 7050 }, { "epoch": 0.24196980096087853, "grad_norm": 0.6508192416082788, "learning_rate": 8.867860189800506e-06, "loss": 0.2922, "step": 7051 }, { "epoch": 0.24200411805078928, "grad_norm": 0.7528941905138445, "learning_rate": 8.867507989442539e-06, "loss": 0.3131, "step": 7052 }, { "epoch": 0.24203843514070006, "grad_norm": 0.7884771320647255, "learning_rate": 8.867155741306018e-06, "loss": 0.3359, "step": 7053 }, { "epoch": 0.24207275223061084, "grad_norm": 0.7351520023383643, "learning_rate": 8.866803445395295e-06, "loss": 0.2817, "step": 7054 }, { "epoch": 0.24210706932052162, "grad_norm": 0.8391847222878278, "learning_rate": 8.866451101714723e-06, "loss": 0.3044, "step": 7055 }, { "epoch": 0.2421413864104324, "grad_norm": 0.7707985172827777, "learning_rate": 8.866098710268657e-06, "loss": 0.3094, "step": 7056 }, { "epoch": 0.24217570350034318, "grad_norm": 0.7825432425393152, "learning_rate": 8.865746271061451e-06, "loss": 0.3747, "step": 7057 }, { "epoch": 0.24221002059025395, "grad_norm": 0.7672951537491186, "learning_rate": 8.865393784097455e-06, "loss": 0.3359, "step": 7058 }, { "epoch": 0.24224433768016473, "grad_norm": 0.7963733424500324, "learning_rate": 8.865041249381026e-06, "loss": 0.3033, "step": 7059 }, { "epoch": 0.24227865477007549, "grad_norm": 0.7937418459584924, "learning_rate": 8.864688666916516e-06, "loss": 0.3444, "step": 7060 }, { "epoch": 0.24231297185998626, "grad_norm": 0.7110568916870678, "learning_rate": 8.864336036708286e-06, "loss": 0.3546, "step": 7061 }, { "epoch": 0.24234728894989704, "grad_norm": 0.8049709148533019, "learning_rate": 8.86398335876069e-06, "loss": 0.3312, "step": 7062 }, { "epoch": 0.24238160603980782, "grad_norm": 0.8106944736965798, "learning_rate": 8.863630633078083e-06, "loss": 0.2791, "step": 7063 }, { "epoch": 0.2424159231297186, "grad_norm": 0.7784791211455098, "learning_rate": 8.863277859664825e-06, "loss": 0.3237, "step": 7064 }, { "epoch": 0.24245024021962938, "grad_norm": 0.750616392937096, "learning_rate": 8.862925038525272e-06, "loss": 0.3109, "step": 7065 }, { "epoch": 0.24248455730954016, "grad_norm": 0.8410489499566721, "learning_rate": 8.862572169663785e-06, "loss": 0.3203, "step": 7066 }, { "epoch": 0.24251887439945094, "grad_norm": 0.7793640255261655, "learning_rate": 8.862219253084722e-06, "loss": 0.3257, "step": 7067 }, { "epoch": 0.2425531914893617, "grad_norm": 0.754478438051335, "learning_rate": 8.861866288792442e-06, "loss": 0.322, "step": 7068 }, { "epoch": 0.24258750857927247, "grad_norm": 0.8456703891591667, "learning_rate": 8.861513276791308e-06, "loss": 0.3299, "step": 7069 }, { "epoch": 0.24262182566918325, "grad_norm": 0.8612968496537432, "learning_rate": 8.861160217085677e-06, "loss": 0.3726, "step": 7070 }, { "epoch": 0.24265614275909403, "grad_norm": 0.8266404734292152, "learning_rate": 8.860807109679917e-06, "loss": 0.3576, "step": 7071 }, { "epoch": 0.2426904598490048, "grad_norm": 0.8250347244499117, "learning_rate": 8.860453954578385e-06, "loss": 0.2745, "step": 7072 }, { "epoch": 0.2427247769389156, "grad_norm": 0.8009462309586763, "learning_rate": 8.860100751785444e-06, "loss": 0.2996, "step": 7073 }, { "epoch": 0.24275909402882637, "grad_norm": 0.7986603995965695, "learning_rate": 8.85974750130546e-06, "loss": 0.3859, "step": 7074 }, { "epoch": 0.24279341111873712, "grad_norm": 0.816162628499998, "learning_rate": 8.859394203142794e-06, "loss": 0.3732, "step": 7075 }, { "epoch": 0.2428277282086479, "grad_norm": 0.8582706061935365, "learning_rate": 8.859040857301814e-06, "loss": 0.3287, "step": 7076 }, { "epoch": 0.24286204529855868, "grad_norm": 0.8041250069708621, "learning_rate": 8.858687463786883e-06, "loss": 0.3712, "step": 7077 }, { "epoch": 0.24289636238846946, "grad_norm": 0.7946293699788217, "learning_rate": 8.858334022602367e-06, "loss": 0.3063, "step": 7078 }, { "epoch": 0.24293067947838023, "grad_norm": 0.8296478637125043, "learning_rate": 8.857980533752632e-06, "loss": 0.3138, "step": 7079 }, { "epoch": 0.24296499656829101, "grad_norm": 0.79679404292106, "learning_rate": 8.857626997242046e-06, "loss": 0.3129, "step": 7080 }, { "epoch": 0.2429993136582018, "grad_norm": 0.978549307717914, "learning_rate": 8.857273413074977e-06, "loss": 0.3066, "step": 7081 }, { "epoch": 0.24303363074811257, "grad_norm": 0.7558329797373846, "learning_rate": 8.85691978125579e-06, "loss": 0.2773, "step": 7082 }, { "epoch": 0.24306794783802332, "grad_norm": 0.7971526412337295, "learning_rate": 8.856566101788857e-06, "loss": 0.358, "step": 7083 }, { "epoch": 0.2431022649279341, "grad_norm": 0.7068977292036135, "learning_rate": 8.856212374678546e-06, "loss": 0.3402, "step": 7084 }, { "epoch": 0.24313658201784488, "grad_norm": 0.7517970482940077, "learning_rate": 8.855858599929226e-06, "loss": 0.3217, "step": 7085 }, { "epoch": 0.24317089910775566, "grad_norm": 0.7762213136811854, "learning_rate": 8.85550477754527e-06, "loss": 0.3118, "step": 7086 }, { "epoch": 0.24320521619766644, "grad_norm": 0.8037592221437742, "learning_rate": 8.855150907531047e-06, "loss": 0.3502, "step": 7087 }, { "epoch": 0.24323953328757722, "grad_norm": 0.8052951653316882, "learning_rate": 8.85479698989093e-06, "loss": 0.3208, "step": 7088 }, { "epoch": 0.243273850377488, "grad_norm": 0.7326094699631086, "learning_rate": 8.85444302462929e-06, "loss": 0.2956, "step": 7089 }, { "epoch": 0.24330816746739878, "grad_norm": 0.7879415778027691, "learning_rate": 8.854089011750499e-06, "loss": 0.3027, "step": 7090 }, { "epoch": 0.24334248455730953, "grad_norm": 0.792373552028269, "learning_rate": 8.853734951258933e-06, "loss": 0.3485, "step": 7091 }, { "epoch": 0.2433768016472203, "grad_norm": 0.7344290139370531, "learning_rate": 8.853380843158965e-06, "loss": 0.2887, "step": 7092 }, { "epoch": 0.2434111187371311, "grad_norm": 0.7737734313157253, "learning_rate": 8.853026687454968e-06, "loss": 0.3198, "step": 7093 }, { "epoch": 0.24344543582704187, "grad_norm": 0.7100442526561374, "learning_rate": 8.85267248415132e-06, "loss": 0.2889, "step": 7094 }, { "epoch": 0.24347975291695265, "grad_norm": 0.7744304025852998, "learning_rate": 8.852318233252393e-06, "loss": 0.3312, "step": 7095 }, { "epoch": 0.24351407000686343, "grad_norm": 0.7603737807797851, "learning_rate": 8.851963934762568e-06, "loss": 0.3404, "step": 7096 }, { "epoch": 0.2435483870967742, "grad_norm": 0.7475526987850266, "learning_rate": 8.85160958868622e-06, "loss": 0.3096, "step": 7097 }, { "epoch": 0.24358270418668496, "grad_norm": 0.8511741873707835, "learning_rate": 8.851255195027723e-06, "loss": 0.3494, "step": 7098 }, { "epoch": 0.24361702127659574, "grad_norm": 0.7874432560008072, "learning_rate": 8.85090075379146e-06, "loss": 0.2758, "step": 7099 }, { "epoch": 0.24365133836650651, "grad_norm": 0.7968551396301665, "learning_rate": 8.850546264981808e-06, "loss": 0.3214, "step": 7100 }, { "epoch": 0.2436856554564173, "grad_norm": 0.7703241811515987, "learning_rate": 8.850191728603146e-06, "loss": 0.3021, "step": 7101 }, { "epoch": 0.24371997254632807, "grad_norm": 0.8268085619067761, "learning_rate": 8.849837144659854e-06, "loss": 0.3083, "step": 7102 }, { "epoch": 0.24375428963623885, "grad_norm": 0.7747876408135852, "learning_rate": 8.849482513156312e-06, "loss": 0.3266, "step": 7103 }, { "epoch": 0.24378860672614963, "grad_norm": 0.8208566563310865, "learning_rate": 8.849127834096903e-06, "loss": 0.3204, "step": 7104 }, { "epoch": 0.2438229238160604, "grad_norm": 0.8051998719690308, "learning_rate": 8.848773107486006e-06, "loss": 0.3381, "step": 7105 }, { "epoch": 0.24385724090597116, "grad_norm": 0.9126619822295565, "learning_rate": 8.848418333328004e-06, "loss": 0.3205, "step": 7106 }, { "epoch": 0.24389155799588194, "grad_norm": 0.7972460775804182, "learning_rate": 8.848063511627282e-06, "loss": 0.3039, "step": 7107 }, { "epoch": 0.24392587508579272, "grad_norm": 0.8250040532088548, "learning_rate": 8.84770864238822e-06, "loss": 0.3937, "step": 7108 }, { "epoch": 0.2439601921757035, "grad_norm": 0.7894868361706583, "learning_rate": 8.847353725615205e-06, "loss": 0.2981, "step": 7109 }, { "epoch": 0.24399450926561428, "grad_norm": 0.8215161244445617, "learning_rate": 8.84699876131262e-06, "loss": 0.3268, "step": 7110 }, { "epoch": 0.24402882635552506, "grad_norm": 0.775931542131883, "learning_rate": 8.84664374948485e-06, "loss": 0.3662, "step": 7111 }, { "epoch": 0.24406314344543584, "grad_norm": 0.7901788918180812, "learning_rate": 8.846288690136281e-06, "loss": 0.3421, "step": 7112 }, { "epoch": 0.24409746053534662, "grad_norm": 0.7325761850509575, "learning_rate": 8.845933583271302e-06, "loss": 0.2758, "step": 7113 }, { "epoch": 0.24413177762525737, "grad_norm": 0.773090639029877, "learning_rate": 8.845578428894295e-06, "loss": 0.289, "step": 7114 }, { "epoch": 0.24416609471516815, "grad_norm": 0.749422821655595, "learning_rate": 8.845223227009652e-06, "loss": 0.3027, "step": 7115 }, { "epoch": 0.24420041180507893, "grad_norm": 0.7084011888030662, "learning_rate": 8.844867977621757e-06, "loss": 0.3603, "step": 7116 }, { "epoch": 0.2442347288949897, "grad_norm": 0.8184026097335382, "learning_rate": 8.844512680735002e-06, "loss": 0.3482, "step": 7117 }, { "epoch": 0.24426904598490048, "grad_norm": 0.7953292751517951, "learning_rate": 8.844157336353773e-06, "loss": 0.3275, "step": 7118 }, { "epoch": 0.24430336307481126, "grad_norm": 0.8365014025041948, "learning_rate": 8.843801944482466e-06, "loss": 0.3221, "step": 7119 }, { "epoch": 0.24433768016472204, "grad_norm": 0.7064892000169006, "learning_rate": 8.843446505125464e-06, "loss": 0.2902, "step": 7120 }, { "epoch": 0.2443719972546328, "grad_norm": 0.8924648354343915, "learning_rate": 8.843091018287162e-06, "loss": 0.3183, "step": 7121 }, { "epoch": 0.24440631434454357, "grad_norm": 0.7573842548593148, "learning_rate": 8.842735483971953e-06, "loss": 0.3267, "step": 7122 }, { "epoch": 0.24444063143445435, "grad_norm": 0.7311776582280959, "learning_rate": 8.842379902184226e-06, "loss": 0.2856, "step": 7123 }, { "epoch": 0.24447494852436513, "grad_norm": 0.791564531298588, "learning_rate": 8.842024272928375e-06, "loss": 0.3143, "step": 7124 }, { "epoch": 0.2445092656142759, "grad_norm": 0.9337572097058291, "learning_rate": 8.841668596208791e-06, "loss": 0.331, "step": 7125 }, { "epoch": 0.2445435827041867, "grad_norm": 0.8191148834481043, "learning_rate": 8.841312872029874e-06, "loss": 0.3511, "step": 7126 }, { "epoch": 0.24457789979409747, "grad_norm": 0.7914439661864553, "learning_rate": 8.840957100396015e-06, "loss": 0.3109, "step": 7127 }, { "epoch": 0.24461221688400825, "grad_norm": 0.7457464650740193, "learning_rate": 8.840601281311608e-06, "loss": 0.3128, "step": 7128 }, { "epoch": 0.244646533973919, "grad_norm": 1.4922555645021836, "learning_rate": 8.84024541478105e-06, "loss": 0.3795, "step": 7129 }, { "epoch": 0.24468085106382978, "grad_norm": 0.8104723928685099, "learning_rate": 8.839889500808736e-06, "loss": 0.3555, "step": 7130 }, { "epoch": 0.24471516815374056, "grad_norm": 0.8316696556154417, "learning_rate": 8.839533539399067e-06, "loss": 0.3023, "step": 7131 }, { "epoch": 0.24474948524365134, "grad_norm": 0.7520242414205407, "learning_rate": 8.839177530556436e-06, "loss": 0.2767, "step": 7132 }, { "epoch": 0.24478380233356212, "grad_norm": 0.8028511170043258, "learning_rate": 8.838821474285241e-06, "loss": 0.3455, "step": 7133 }, { "epoch": 0.2448181194234729, "grad_norm": 0.8326886255420192, "learning_rate": 8.838465370589885e-06, "loss": 0.3654, "step": 7134 }, { "epoch": 0.24485243651338368, "grad_norm": 0.745142745538548, "learning_rate": 8.838109219474763e-06, "loss": 0.3564, "step": 7135 }, { "epoch": 0.24488675360329443, "grad_norm": 0.8107857613554416, "learning_rate": 8.837753020944277e-06, "loss": 0.3346, "step": 7136 }, { "epoch": 0.2449210706932052, "grad_norm": 0.7448053854857153, "learning_rate": 8.837396775002826e-06, "loss": 0.3028, "step": 7137 }, { "epoch": 0.24495538778311599, "grad_norm": 0.7945174912686689, "learning_rate": 8.837040481654812e-06, "loss": 0.3492, "step": 7138 }, { "epoch": 0.24498970487302676, "grad_norm": 0.798123537542396, "learning_rate": 8.836684140904638e-06, "loss": 0.328, "step": 7139 }, { "epoch": 0.24502402196293754, "grad_norm": 0.7849075190044784, "learning_rate": 8.836327752756704e-06, "loss": 0.311, "step": 7140 }, { "epoch": 0.24505833905284832, "grad_norm": 0.8122872758685177, "learning_rate": 8.835971317215411e-06, "loss": 0.3516, "step": 7141 }, { "epoch": 0.2450926561427591, "grad_norm": 0.993689456251483, "learning_rate": 8.835614834285166e-06, "loss": 0.3048, "step": 7142 }, { "epoch": 0.24512697323266988, "grad_norm": 0.7982830575101657, "learning_rate": 8.835258303970374e-06, "loss": 0.3318, "step": 7143 }, { "epoch": 0.24516129032258063, "grad_norm": 0.7633877794481524, "learning_rate": 8.834901726275434e-06, "loss": 0.2926, "step": 7144 }, { "epoch": 0.2451956074124914, "grad_norm": 0.7556547933963558, "learning_rate": 8.834545101204757e-06, "loss": 0.3076, "step": 7145 }, { "epoch": 0.2452299245024022, "grad_norm": 0.829822925894211, "learning_rate": 8.834188428762743e-06, "loss": 0.3139, "step": 7146 }, { "epoch": 0.24526424159231297, "grad_norm": 0.6941503979315177, "learning_rate": 8.833831708953803e-06, "loss": 0.282, "step": 7147 }, { "epoch": 0.24529855868222375, "grad_norm": 0.8320753138097936, "learning_rate": 8.83347494178234e-06, "loss": 0.3779, "step": 7148 }, { "epoch": 0.24533287577213453, "grad_norm": 0.8152823361510879, "learning_rate": 8.833118127252766e-06, "loss": 0.3447, "step": 7149 }, { "epoch": 0.2453671928620453, "grad_norm": 0.8888288943055465, "learning_rate": 8.832761265369486e-06, "loss": 0.292, "step": 7150 }, { "epoch": 0.2454015099519561, "grad_norm": 0.8053865558365787, "learning_rate": 8.832404356136908e-06, "loss": 0.2738, "step": 7151 }, { "epoch": 0.24543582704186684, "grad_norm": 0.8064879943856991, "learning_rate": 8.832047399559444e-06, "loss": 0.3699, "step": 7152 }, { "epoch": 0.24547014413177762, "grad_norm": 0.7827894240454896, "learning_rate": 8.831690395641502e-06, "loss": 0.3749, "step": 7153 }, { "epoch": 0.2455044612216884, "grad_norm": 0.7673930064009531, "learning_rate": 8.83133334438749e-06, "loss": 0.3131, "step": 7154 }, { "epoch": 0.24553877831159918, "grad_norm": 0.823141872364538, "learning_rate": 8.830976245801822e-06, "loss": 0.3318, "step": 7155 }, { "epoch": 0.24557309540150996, "grad_norm": 0.7062856191723086, "learning_rate": 8.830619099888912e-06, "loss": 0.3066, "step": 7156 }, { "epoch": 0.24560741249142073, "grad_norm": 0.8055422323250266, "learning_rate": 8.830261906653166e-06, "loss": 0.301, "step": 7157 }, { "epoch": 0.2456417295813315, "grad_norm": 0.7968228308697085, "learning_rate": 8.829904666099001e-06, "loss": 0.3278, "step": 7158 }, { "epoch": 0.24567604667124227, "grad_norm": 0.7771225429222891, "learning_rate": 8.829547378230829e-06, "loss": 0.293, "step": 7159 }, { "epoch": 0.24571036376115304, "grad_norm": 0.7910087122127654, "learning_rate": 8.829190043053065e-06, "loss": 0.3561, "step": 7160 }, { "epoch": 0.24574468085106382, "grad_norm": 0.802625753126897, "learning_rate": 8.828832660570121e-06, "loss": 0.3519, "step": 7161 }, { "epoch": 0.2457789979409746, "grad_norm": 0.7792467299896826, "learning_rate": 8.828475230786414e-06, "loss": 0.3253, "step": 7162 }, { "epoch": 0.24581331503088538, "grad_norm": 0.8522391759380988, "learning_rate": 8.828117753706358e-06, "loss": 0.3379, "step": 7163 }, { "epoch": 0.24584763212079616, "grad_norm": 0.7786195032140694, "learning_rate": 8.827760229334373e-06, "loss": 0.3041, "step": 7164 }, { "epoch": 0.24588194921070694, "grad_norm": 0.7017999677757224, "learning_rate": 8.827402657674871e-06, "loss": 0.3011, "step": 7165 }, { "epoch": 0.24591626630061772, "grad_norm": 0.7619458175553393, "learning_rate": 8.827045038732272e-06, "loss": 0.3106, "step": 7166 }, { "epoch": 0.24595058339052847, "grad_norm": 0.7193746700941677, "learning_rate": 8.826687372510993e-06, "loss": 0.3455, "step": 7167 }, { "epoch": 0.24598490048043925, "grad_norm": 0.9441297031751184, "learning_rate": 8.826329659015453e-06, "loss": 0.3261, "step": 7168 }, { "epoch": 0.24601921757035003, "grad_norm": 0.8274557353047183, "learning_rate": 8.825971898250072e-06, "loss": 0.305, "step": 7169 }, { "epoch": 0.2460535346602608, "grad_norm": 0.8682873771182026, "learning_rate": 8.825614090219267e-06, "loss": 0.3304, "step": 7170 }, { "epoch": 0.2460878517501716, "grad_norm": 0.7898084972336007, "learning_rate": 8.825256234927462e-06, "loss": 0.337, "step": 7171 }, { "epoch": 0.24612216884008237, "grad_norm": 0.7604152824933259, "learning_rate": 8.824898332379075e-06, "loss": 0.2869, "step": 7172 }, { "epoch": 0.24615648592999315, "grad_norm": 0.8345164179674512, "learning_rate": 8.824540382578529e-06, "loss": 0.3373, "step": 7173 }, { "epoch": 0.24619080301990393, "grad_norm": 0.7308671120488112, "learning_rate": 8.824182385530246e-06, "loss": 0.3069, "step": 7174 }, { "epoch": 0.24622512010981468, "grad_norm": 0.886487521286712, "learning_rate": 8.823824341238647e-06, "loss": 0.367, "step": 7175 }, { "epoch": 0.24625943719972546, "grad_norm": 0.7337854669948621, "learning_rate": 8.823466249708159e-06, "loss": 0.318, "step": 7176 }, { "epoch": 0.24629375428963624, "grad_norm": 0.7469060564999087, "learning_rate": 8.823108110943199e-06, "loss": 0.2645, "step": 7177 }, { "epoch": 0.24632807137954701, "grad_norm": 0.7391857769954421, "learning_rate": 8.822749924948198e-06, "loss": 0.2887, "step": 7178 }, { "epoch": 0.2463623884694578, "grad_norm": 0.8607647642197194, "learning_rate": 8.822391691727578e-06, "loss": 0.2731, "step": 7179 }, { "epoch": 0.24639670555936857, "grad_norm": 0.7673688205914081, "learning_rate": 8.822033411285767e-06, "loss": 0.2969, "step": 7180 }, { "epoch": 0.24643102264927935, "grad_norm": 0.7777763043239748, "learning_rate": 8.821675083627187e-06, "loss": 0.33, "step": 7181 }, { "epoch": 0.2464653397391901, "grad_norm": 0.8751328666449975, "learning_rate": 8.821316708756269e-06, "loss": 0.3281, "step": 7182 }, { "epoch": 0.24649965682910088, "grad_norm": 0.6922714258053234, "learning_rate": 8.820958286677436e-06, "loss": 0.3002, "step": 7183 }, { "epoch": 0.24653397391901166, "grad_norm": 0.7501760813286277, "learning_rate": 8.820599817395121e-06, "loss": 0.296, "step": 7184 }, { "epoch": 0.24656829100892244, "grad_norm": 0.7155619882332297, "learning_rate": 8.820241300913748e-06, "loss": 0.2906, "step": 7185 }, { "epoch": 0.24660260809883322, "grad_norm": 0.800462875645286, "learning_rate": 8.819882737237746e-06, "loss": 0.346, "step": 7186 }, { "epoch": 0.246636925188744, "grad_norm": 0.7078391194451671, "learning_rate": 8.819524126371549e-06, "loss": 0.3463, "step": 7187 }, { "epoch": 0.24667124227865478, "grad_norm": 0.7707013532738972, "learning_rate": 8.819165468319585e-06, "loss": 0.2821, "step": 7188 }, { "epoch": 0.24670555936856556, "grad_norm": 0.7920961395939462, "learning_rate": 8.81880676308628e-06, "loss": 0.3028, "step": 7189 }, { "epoch": 0.2467398764584763, "grad_norm": 0.7757515215211693, "learning_rate": 8.818448010676074e-06, "loss": 0.3164, "step": 7190 }, { "epoch": 0.2467741935483871, "grad_norm": 0.7680752559328738, "learning_rate": 8.818089211093393e-06, "loss": 0.4103, "step": 7191 }, { "epoch": 0.24680851063829787, "grad_norm": 0.7899923610921534, "learning_rate": 8.817730364342671e-06, "loss": 0.274, "step": 7192 }, { "epoch": 0.24684282772820865, "grad_norm": 0.8253013018012166, "learning_rate": 8.817371470428343e-06, "loss": 0.3186, "step": 7193 }, { "epoch": 0.24687714481811943, "grad_norm": 0.8620283340786223, "learning_rate": 8.81701252935484e-06, "loss": 0.2803, "step": 7194 }, { "epoch": 0.2469114619080302, "grad_norm": 0.7989800981780847, "learning_rate": 8.816653541126598e-06, "loss": 0.355, "step": 7195 }, { "epoch": 0.24694577899794098, "grad_norm": 1.0395635071826157, "learning_rate": 8.816294505748051e-06, "loss": 0.3266, "step": 7196 }, { "epoch": 0.24698009608785176, "grad_norm": 0.7578973276088807, "learning_rate": 8.815935423223635e-06, "loss": 0.3275, "step": 7197 }, { "epoch": 0.24701441317776252, "grad_norm": 0.8246708376345705, "learning_rate": 8.815576293557784e-06, "loss": 0.422, "step": 7198 }, { "epoch": 0.2470487302676733, "grad_norm": 0.8652505085217079, "learning_rate": 8.815217116754936e-06, "loss": 0.3152, "step": 7199 }, { "epoch": 0.24708304735758407, "grad_norm": 0.7885129242284926, "learning_rate": 8.814857892819533e-06, "loss": 0.3437, "step": 7200 }, { "epoch": 0.24711736444749485, "grad_norm": 0.7714992779118802, "learning_rate": 8.814498621756005e-06, "loss": 0.3364, "step": 7201 }, { "epoch": 0.24715168153740563, "grad_norm": 0.7205921519532362, "learning_rate": 8.814139303568794e-06, "loss": 0.3164, "step": 7202 }, { "epoch": 0.2471859986273164, "grad_norm": 0.7983111891151835, "learning_rate": 8.81377993826234e-06, "loss": 0.3194, "step": 7203 }, { "epoch": 0.2472203157172272, "grad_norm": 0.7980899768452357, "learning_rate": 8.813420525841082e-06, "loss": 0.3337, "step": 7204 }, { "epoch": 0.24725463280713794, "grad_norm": 0.8001279561786326, "learning_rate": 8.813061066309456e-06, "loss": 0.3349, "step": 7205 }, { "epoch": 0.24728894989704872, "grad_norm": 0.8075836715300145, "learning_rate": 8.81270155967191e-06, "loss": 0.3478, "step": 7206 }, { "epoch": 0.2473232669869595, "grad_norm": 0.8725977200577769, "learning_rate": 8.81234200593288e-06, "loss": 0.2805, "step": 7207 }, { "epoch": 0.24735758407687028, "grad_norm": 0.8408543644075467, "learning_rate": 8.811982405096807e-06, "loss": 0.3454, "step": 7208 }, { "epoch": 0.24739190116678106, "grad_norm": 0.9826163876794045, "learning_rate": 8.81162275716814e-06, "loss": 0.4358, "step": 7209 }, { "epoch": 0.24742621825669184, "grad_norm": 0.8075877803544954, "learning_rate": 8.811263062151314e-06, "loss": 0.295, "step": 7210 }, { "epoch": 0.24746053534660262, "grad_norm": 0.8278499432468752, "learning_rate": 8.810903320050776e-06, "loss": 0.3451, "step": 7211 }, { "epoch": 0.2474948524365134, "grad_norm": 0.7081949068700069, "learning_rate": 8.810543530870973e-06, "loss": 0.3263, "step": 7212 }, { "epoch": 0.24752916952642415, "grad_norm": 0.7984020489780764, "learning_rate": 8.810183694616347e-06, "loss": 0.3677, "step": 7213 }, { "epoch": 0.24756348661633493, "grad_norm": 0.8276441995847528, "learning_rate": 8.809823811291343e-06, "loss": 0.3702, "step": 7214 }, { "epoch": 0.2475978037062457, "grad_norm": 0.8094802418937121, "learning_rate": 8.809463880900409e-06, "loss": 0.328, "step": 7215 }, { "epoch": 0.24763212079615649, "grad_norm": 0.7648355274613085, "learning_rate": 8.80910390344799e-06, "loss": 0.2872, "step": 7216 }, { "epoch": 0.24766643788606726, "grad_norm": 0.8439802021378126, "learning_rate": 8.808743878938532e-06, "loss": 0.3514, "step": 7217 }, { "epoch": 0.24770075497597804, "grad_norm": 0.8169555740463013, "learning_rate": 8.808383807376485e-06, "loss": 0.2949, "step": 7218 }, { "epoch": 0.24773507206588882, "grad_norm": 0.7549314771198786, "learning_rate": 8.808023688766294e-06, "loss": 0.3387, "step": 7219 }, { "epoch": 0.2477693891557996, "grad_norm": 0.7347543283496529, "learning_rate": 8.807663523112413e-06, "loss": 0.337, "step": 7220 }, { "epoch": 0.24780370624571035, "grad_norm": 0.7048218628301754, "learning_rate": 8.807303310419285e-06, "loss": 0.324, "step": 7221 }, { "epoch": 0.24783802333562113, "grad_norm": 0.8355672213259622, "learning_rate": 8.806943050691366e-06, "loss": 0.3339, "step": 7222 }, { "epoch": 0.2478723404255319, "grad_norm": 0.785294130983365, "learning_rate": 8.806582743933104e-06, "loss": 0.3728, "step": 7223 }, { "epoch": 0.2479066575154427, "grad_norm": 0.7885763297394562, "learning_rate": 8.80622239014895e-06, "loss": 0.3393, "step": 7224 }, { "epoch": 0.24794097460535347, "grad_norm": 0.7786052172933984, "learning_rate": 8.805861989343356e-06, "loss": 0.2869, "step": 7225 }, { "epoch": 0.24797529169526425, "grad_norm": 0.8444705288514197, "learning_rate": 8.805501541520774e-06, "loss": 0.3413, "step": 7226 }, { "epoch": 0.24800960878517503, "grad_norm": 0.792536470493974, "learning_rate": 8.805141046685656e-06, "loss": 0.3203, "step": 7227 }, { "epoch": 0.24804392587508578, "grad_norm": 0.7350682833935311, "learning_rate": 8.80478050484246e-06, "loss": 0.3113, "step": 7228 }, { "epoch": 0.24807824296499656, "grad_norm": 0.7894394559350159, "learning_rate": 8.804419915995634e-06, "loss": 0.3151, "step": 7229 }, { "epoch": 0.24811256005490734, "grad_norm": 0.881326584238369, "learning_rate": 8.804059280149637e-06, "loss": 0.3274, "step": 7230 }, { "epoch": 0.24814687714481812, "grad_norm": 0.7360097350129383, "learning_rate": 8.803698597308923e-06, "loss": 0.3108, "step": 7231 }, { "epoch": 0.2481811942347289, "grad_norm": 0.7098688974135752, "learning_rate": 8.803337867477946e-06, "loss": 0.2882, "step": 7232 }, { "epoch": 0.24821551132463968, "grad_norm": 0.836620394968694, "learning_rate": 8.802977090661164e-06, "loss": 0.3778, "step": 7233 }, { "epoch": 0.24824982841455046, "grad_norm": 0.7504157351972935, "learning_rate": 8.802616266863034e-06, "loss": 0.3304, "step": 7234 }, { "epoch": 0.24828414550446123, "grad_norm": 0.7462536964861922, "learning_rate": 8.802255396088016e-06, "loss": 0.324, "step": 7235 }, { "epoch": 0.24831846259437199, "grad_norm": 0.7621405846919175, "learning_rate": 8.801894478340562e-06, "loss": 0.3517, "step": 7236 }, { "epoch": 0.24835277968428277, "grad_norm": 0.7355539753548324, "learning_rate": 8.801533513625135e-06, "loss": 0.3182, "step": 7237 }, { "epoch": 0.24838709677419354, "grad_norm": 0.8552486420898439, "learning_rate": 8.801172501946197e-06, "loss": 0.3298, "step": 7238 }, { "epoch": 0.24842141386410432, "grad_norm": 0.7840440456217682, "learning_rate": 8.8008114433082e-06, "loss": 0.3005, "step": 7239 }, { "epoch": 0.2484557309540151, "grad_norm": 0.8016395516189568, "learning_rate": 8.80045033771561e-06, "loss": 0.3096, "step": 7240 }, { "epoch": 0.24849004804392588, "grad_norm": 0.836073855642358, "learning_rate": 8.800089185172889e-06, "loss": 0.3153, "step": 7241 }, { "epoch": 0.24852436513383666, "grad_norm": 0.719164899983333, "learning_rate": 8.799727985684494e-06, "loss": 0.3129, "step": 7242 }, { "epoch": 0.2485586822237474, "grad_norm": 0.8792698727490375, "learning_rate": 8.799366739254892e-06, "loss": 0.3413, "step": 7243 }, { "epoch": 0.2485929993136582, "grad_norm": 0.8095229984927975, "learning_rate": 8.799005445888542e-06, "loss": 0.2929, "step": 7244 }, { "epoch": 0.24862731640356897, "grad_norm": 0.8764228057818694, "learning_rate": 8.79864410558991e-06, "loss": 0.3535, "step": 7245 }, { "epoch": 0.24866163349347975, "grad_norm": 0.8716533677361314, "learning_rate": 8.79828271836346e-06, "loss": 0.3604, "step": 7246 }, { "epoch": 0.24869595058339053, "grad_norm": 0.7459690951817511, "learning_rate": 8.797921284213654e-06, "loss": 0.3316, "step": 7247 }, { "epoch": 0.2487302676733013, "grad_norm": 0.7300636572610746, "learning_rate": 8.797559803144956e-06, "loss": 0.3416, "step": 7248 }, { "epoch": 0.2487645847632121, "grad_norm": 0.7621210482597546, "learning_rate": 8.797198275161838e-06, "loss": 0.3322, "step": 7249 }, { "epoch": 0.24879890185312287, "grad_norm": 0.6439803210272902, "learning_rate": 8.79683670026876e-06, "loss": 0.2741, "step": 7250 }, { "epoch": 0.24883321894303362, "grad_norm": 0.7280216879051644, "learning_rate": 8.796475078470192e-06, "loss": 0.3178, "step": 7251 }, { "epoch": 0.2488675360329444, "grad_norm": 0.8210864954102892, "learning_rate": 8.796113409770602e-06, "loss": 0.2983, "step": 7252 }, { "epoch": 0.24890185312285518, "grad_norm": 0.7958882645161564, "learning_rate": 8.795751694174455e-06, "loss": 0.3367, "step": 7253 }, { "epoch": 0.24893617021276596, "grad_norm": 0.7596889974722156, "learning_rate": 8.795389931686222e-06, "loss": 0.3137, "step": 7254 }, { "epoch": 0.24897048730267674, "grad_norm": 0.8228142482298978, "learning_rate": 8.795028122310371e-06, "loss": 0.3269, "step": 7255 }, { "epoch": 0.24900480439258751, "grad_norm": 0.7315336918779641, "learning_rate": 8.794666266051371e-06, "loss": 0.3633, "step": 7256 }, { "epoch": 0.2490391214824983, "grad_norm": 0.7476138156872331, "learning_rate": 8.794304362913695e-06, "loss": 0.3207, "step": 7257 }, { "epoch": 0.24907343857240907, "grad_norm": 0.8353518755777101, "learning_rate": 8.793942412901813e-06, "loss": 0.3535, "step": 7258 }, { "epoch": 0.24910775566231982, "grad_norm": 0.7792886937718262, "learning_rate": 8.793580416020195e-06, "loss": 0.2993, "step": 7259 }, { "epoch": 0.2491420727522306, "grad_norm": 0.7641661061129454, "learning_rate": 8.793218372273313e-06, "loss": 0.3302, "step": 7260 }, { "epoch": 0.24917638984214138, "grad_norm": 0.7751250406882739, "learning_rate": 8.792856281665641e-06, "loss": 0.3343, "step": 7261 }, { "epoch": 0.24921070693205216, "grad_norm": 0.8160725365911436, "learning_rate": 8.792494144201651e-06, "loss": 0.3067, "step": 7262 }, { "epoch": 0.24924502402196294, "grad_norm": 0.7533686600456784, "learning_rate": 8.792131959885818e-06, "loss": 0.2883, "step": 7263 }, { "epoch": 0.24927934111187372, "grad_norm": 0.8454600335682894, "learning_rate": 8.791769728722617e-06, "loss": 0.3258, "step": 7264 }, { "epoch": 0.2493136582017845, "grad_norm": 0.7923559198998541, "learning_rate": 8.79140745071652e-06, "loss": 0.3255, "step": 7265 }, { "epoch": 0.24934797529169525, "grad_norm": 0.8819165506979861, "learning_rate": 8.791045125872007e-06, "loss": 0.3313, "step": 7266 }, { "epoch": 0.24938229238160603, "grad_norm": 0.7415918468280268, "learning_rate": 8.79068275419355e-06, "loss": 0.349, "step": 7267 }, { "epoch": 0.2494166094715168, "grad_norm": 0.7986996412202486, "learning_rate": 8.790320335685624e-06, "loss": 0.3425, "step": 7268 }, { "epoch": 0.2494509265614276, "grad_norm": 0.7745932078237965, "learning_rate": 8.789957870352713e-06, "loss": 0.3294, "step": 7269 }, { "epoch": 0.24948524365133837, "grad_norm": 0.831666452034527, "learning_rate": 8.789595358199289e-06, "loss": 0.3293, "step": 7270 }, { "epoch": 0.24951956074124915, "grad_norm": 0.872210139606618, "learning_rate": 8.789232799229836e-06, "loss": 0.3512, "step": 7271 }, { "epoch": 0.24955387783115993, "grad_norm": 0.8386025270133954, "learning_rate": 8.788870193448826e-06, "loss": 0.3387, "step": 7272 }, { "epoch": 0.2495881949210707, "grad_norm": 0.8814668629640824, "learning_rate": 8.788507540860745e-06, "loss": 0.3888, "step": 7273 }, { "epoch": 0.24962251201098146, "grad_norm": 0.7801917098179904, "learning_rate": 8.788144841470067e-06, "loss": 0.3172, "step": 7274 }, { "epoch": 0.24965682910089224, "grad_norm": 0.7785346789688229, "learning_rate": 8.78778209528128e-06, "loss": 0.3405, "step": 7275 }, { "epoch": 0.24969114619080301, "grad_norm": 0.7572131265448173, "learning_rate": 8.78741930229886e-06, "loss": 0.3101, "step": 7276 }, { "epoch": 0.2497254632807138, "grad_norm": 0.7945500198656847, "learning_rate": 8.787056462527286e-06, "loss": 0.3512, "step": 7277 }, { "epoch": 0.24975978037062457, "grad_norm": 0.790390150268724, "learning_rate": 8.78669357597105e-06, "loss": 0.3275, "step": 7278 }, { "epoch": 0.24979409746053535, "grad_norm": 0.7752080930121379, "learning_rate": 8.786330642634627e-06, "loss": 0.307, "step": 7279 }, { "epoch": 0.24982841455044613, "grad_norm": 0.7512141531241967, "learning_rate": 8.785967662522505e-06, "loss": 0.2712, "step": 7280 }, { "epoch": 0.2498627316403569, "grad_norm": 0.7237786926022693, "learning_rate": 8.785604635639164e-06, "loss": 0.357, "step": 7281 }, { "epoch": 0.24989704873026766, "grad_norm": 0.7775392307167948, "learning_rate": 8.785241561989094e-06, "loss": 0.2898, "step": 7282 }, { "epoch": 0.24993136582017844, "grad_norm": 0.6857621034181963, "learning_rate": 8.784878441576776e-06, "loss": 0.2861, "step": 7283 }, { "epoch": 0.24996568291008922, "grad_norm": 0.7950289472894743, "learning_rate": 8.784515274406699e-06, "loss": 0.323, "step": 7284 }, { "epoch": 0.25, "grad_norm": 0.7580140540259535, "learning_rate": 8.784152060483346e-06, "loss": 0.2846, "step": 7285 }, { "epoch": 0.25003431708991075, "grad_norm": 0.737065778696035, "learning_rate": 8.783788799811209e-06, "loss": 0.342, "step": 7286 }, { "epoch": 0.25006863417982156, "grad_norm": 0.8141441075294068, "learning_rate": 8.783425492394772e-06, "loss": 0.3666, "step": 7287 }, { "epoch": 0.2501029512697323, "grad_norm": 0.7863513791546776, "learning_rate": 8.783062138238523e-06, "loss": 0.3221, "step": 7288 }, { "epoch": 0.2501372683596431, "grad_norm": 0.7378721739681003, "learning_rate": 8.782698737346952e-06, "loss": 0.3004, "step": 7289 }, { "epoch": 0.25017158544955387, "grad_norm": 0.7914258656060498, "learning_rate": 8.782335289724548e-06, "loss": 0.2949, "step": 7290 }, { "epoch": 0.2502059025394647, "grad_norm": 0.8675173237831643, "learning_rate": 8.781971795375803e-06, "loss": 0.3912, "step": 7291 }, { "epoch": 0.2502402196293754, "grad_norm": 0.8490746271732502, "learning_rate": 8.781608254305206e-06, "loss": 0.3505, "step": 7292 }, { "epoch": 0.2502745367192862, "grad_norm": 0.8531028285487651, "learning_rate": 8.781244666517245e-06, "loss": 0.3018, "step": 7293 }, { "epoch": 0.250308853809197, "grad_norm": 0.7884218997967116, "learning_rate": 8.780881032016416e-06, "loss": 0.3049, "step": 7294 }, { "epoch": 0.25034317089910774, "grad_norm": 0.9212911399221447, "learning_rate": 8.780517350807211e-06, "loss": 0.3097, "step": 7295 }, { "epoch": 0.25037748798901854, "grad_norm": 0.7911943955922601, "learning_rate": 8.78015362289412e-06, "loss": 0.4128, "step": 7296 }, { "epoch": 0.2504118050789293, "grad_norm": 0.6850770204567168, "learning_rate": 8.77978984828164e-06, "loss": 0.2889, "step": 7297 }, { "epoch": 0.2504461221688401, "grad_norm": 0.7270719609221892, "learning_rate": 8.779426026974263e-06, "loss": 0.2872, "step": 7298 }, { "epoch": 0.25048043925875085, "grad_norm": 0.7550455639737629, "learning_rate": 8.779062158976483e-06, "loss": 0.3177, "step": 7299 }, { "epoch": 0.25051475634866166, "grad_norm": 0.8011937124682881, "learning_rate": 8.778698244292797e-06, "loss": 0.3418, "step": 7300 }, { "epoch": 0.2505490734385724, "grad_norm": 0.7575392108112213, "learning_rate": 8.778334282927698e-06, "loss": 0.3419, "step": 7301 }, { "epoch": 0.25058339052848316, "grad_norm": 0.969928231221997, "learning_rate": 8.777970274885686e-06, "loss": 0.3139, "step": 7302 }, { "epoch": 0.25061770761839397, "grad_norm": 0.8560033436233343, "learning_rate": 8.777606220171255e-06, "loss": 0.3064, "step": 7303 }, { "epoch": 0.2506520247083047, "grad_norm": 0.7792103848810618, "learning_rate": 8.777242118788903e-06, "loss": 0.331, "step": 7304 }, { "epoch": 0.25068634179821553, "grad_norm": 0.8197682909518924, "learning_rate": 8.77687797074313e-06, "loss": 0.3043, "step": 7305 }, { "epoch": 0.2507206588881263, "grad_norm": 0.8561002401082135, "learning_rate": 8.77651377603843e-06, "loss": 0.4158, "step": 7306 }, { "epoch": 0.2507549759780371, "grad_norm": 0.8119653942680141, "learning_rate": 8.776149534679309e-06, "loss": 0.3134, "step": 7307 }, { "epoch": 0.25078929306794784, "grad_norm": 0.7278226048668938, "learning_rate": 8.77578524667026e-06, "loss": 0.2613, "step": 7308 }, { "epoch": 0.2508236101578586, "grad_norm": 0.8539662413360418, "learning_rate": 8.775420912015789e-06, "loss": 0.3398, "step": 7309 }, { "epoch": 0.2508579272477694, "grad_norm": 0.793840958351125, "learning_rate": 8.775056530720392e-06, "loss": 0.3475, "step": 7310 }, { "epoch": 0.25089224433768015, "grad_norm": 0.7375741369633737, "learning_rate": 8.774692102788574e-06, "loss": 0.3027, "step": 7311 }, { "epoch": 0.25092656142759096, "grad_norm": 0.8195088210750222, "learning_rate": 8.774327628224837e-06, "loss": 0.3162, "step": 7312 }, { "epoch": 0.2509608785175017, "grad_norm": 0.7969818639116022, "learning_rate": 8.77396310703368e-06, "loss": 0.3253, "step": 7313 }, { "epoch": 0.2509951956074125, "grad_norm": 0.8023850642004609, "learning_rate": 8.77359853921961e-06, "loss": 0.3776, "step": 7314 }, { "epoch": 0.25102951269732326, "grad_norm": 0.8514763845874866, "learning_rate": 8.77323392478713e-06, "loss": 0.3002, "step": 7315 }, { "epoch": 0.251063829787234, "grad_norm": 0.7318027497870789, "learning_rate": 8.772869263740744e-06, "loss": 0.3122, "step": 7316 }, { "epoch": 0.2510981468771448, "grad_norm": 0.7440603211730666, "learning_rate": 8.772504556084957e-06, "loss": 0.3357, "step": 7317 }, { "epoch": 0.2511324639670556, "grad_norm": 0.8137816551847442, "learning_rate": 8.772139801824275e-06, "loss": 0.3013, "step": 7318 }, { "epoch": 0.2511667810569664, "grad_norm": 0.799684982672654, "learning_rate": 8.771775000963203e-06, "loss": 0.2986, "step": 7319 }, { "epoch": 0.25120109814687713, "grad_norm": 0.7392675576221657, "learning_rate": 8.771410153506249e-06, "loss": 0.3904, "step": 7320 }, { "epoch": 0.25123541523678794, "grad_norm": 0.8474054890560575, "learning_rate": 8.771045259457919e-06, "loss": 0.3629, "step": 7321 }, { "epoch": 0.2512697323266987, "grad_norm": 0.7500799498692787, "learning_rate": 8.770680318822721e-06, "loss": 0.3129, "step": 7322 }, { "epoch": 0.2513040494166095, "grad_norm": 0.8341127098311751, "learning_rate": 8.770315331605165e-06, "loss": 0.3354, "step": 7323 }, { "epoch": 0.25133836650652025, "grad_norm": 0.924823480015592, "learning_rate": 8.769950297809758e-06, "loss": 0.3357, "step": 7324 }, { "epoch": 0.251372683596431, "grad_norm": 0.7859869643225444, "learning_rate": 8.76958521744101e-06, "loss": 0.2842, "step": 7325 }, { "epoch": 0.2514070006863418, "grad_norm": 0.8607789754738838, "learning_rate": 8.769220090503431e-06, "loss": 0.3925, "step": 7326 }, { "epoch": 0.25144131777625256, "grad_norm": 0.8920591353611296, "learning_rate": 8.768854917001536e-06, "loss": 0.2953, "step": 7327 }, { "epoch": 0.25147563486616337, "grad_norm": 0.7049972666055261, "learning_rate": 8.768489696939828e-06, "loss": 0.2759, "step": 7328 }, { "epoch": 0.2515099519560741, "grad_norm": 1.2167326371910392, "learning_rate": 8.768124430322826e-06, "loss": 0.3237, "step": 7329 }, { "epoch": 0.2515442690459849, "grad_norm": 0.8872567828757929, "learning_rate": 8.767759117155038e-06, "loss": 0.3746, "step": 7330 }, { "epoch": 0.2515785861358957, "grad_norm": 0.7499339962163937, "learning_rate": 8.76739375744098e-06, "loss": 0.3658, "step": 7331 }, { "epoch": 0.25161290322580643, "grad_norm": 0.7324373174898458, "learning_rate": 8.767028351185167e-06, "loss": 0.2842, "step": 7332 }, { "epoch": 0.25164722031571723, "grad_norm": 0.7939108075846156, "learning_rate": 8.766662898392107e-06, "loss": 0.2815, "step": 7333 }, { "epoch": 0.251681537405628, "grad_norm": 0.6437136535151667, "learning_rate": 8.76629739906632e-06, "loss": 0.3057, "step": 7334 }, { "epoch": 0.2517158544955388, "grad_norm": 0.7373095652144711, "learning_rate": 8.765931853212319e-06, "loss": 0.2792, "step": 7335 }, { "epoch": 0.25175017158544954, "grad_norm": 0.7276714125726614, "learning_rate": 8.765566260834621e-06, "loss": 0.3422, "step": 7336 }, { "epoch": 0.25178448867536035, "grad_norm": 0.7364233464277634, "learning_rate": 8.765200621937742e-06, "loss": 0.3007, "step": 7337 }, { "epoch": 0.2518188057652711, "grad_norm": 0.8291641613415703, "learning_rate": 8.764834936526199e-06, "loss": 0.3535, "step": 7338 }, { "epoch": 0.25185312285518185, "grad_norm": 0.6890921700662388, "learning_rate": 8.76446920460451e-06, "loss": 0.2951, "step": 7339 }, { "epoch": 0.25188743994509266, "grad_norm": 0.7846978605455188, "learning_rate": 8.764103426177193e-06, "loss": 0.2784, "step": 7340 }, { "epoch": 0.2519217570350034, "grad_norm": 0.7564791985714066, "learning_rate": 8.763737601248767e-06, "loss": 0.334, "step": 7341 }, { "epoch": 0.2519560741249142, "grad_norm": 0.6948588038818445, "learning_rate": 8.76337172982375e-06, "loss": 0.3044, "step": 7342 }, { "epoch": 0.25199039121482497, "grad_norm": 0.7190002625203461, "learning_rate": 8.763005811906665e-06, "loss": 0.3438, "step": 7343 }, { "epoch": 0.2520247083047358, "grad_norm": 0.7689926156337868, "learning_rate": 8.762639847502029e-06, "loss": 0.3789, "step": 7344 }, { "epoch": 0.25205902539464653, "grad_norm": 0.8275913005786041, "learning_rate": 8.762273836614363e-06, "loss": 0.3631, "step": 7345 }, { "epoch": 0.25209334248455734, "grad_norm": 0.7738103282703906, "learning_rate": 8.761907779248191e-06, "loss": 0.3131, "step": 7346 }, { "epoch": 0.2521276595744681, "grad_norm": 0.7614177951663709, "learning_rate": 8.761541675408036e-06, "loss": 0.302, "step": 7347 }, { "epoch": 0.25216197666437884, "grad_norm": 0.7312431091699766, "learning_rate": 8.761175525098418e-06, "loss": 0.3276, "step": 7348 }, { "epoch": 0.25219629375428965, "grad_norm": 0.7660170682601425, "learning_rate": 8.76080932832386e-06, "loss": 0.3017, "step": 7349 }, { "epoch": 0.2522306108442004, "grad_norm": 0.7053402169245521, "learning_rate": 8.76044308508889e-06, "loss": 0.3051, "step": 7350 }, { "epoch": 0.2522649279341112, "grad_norm": 0.9168590877120903, "learning_rate": 8.76007679539803e-06, "loss": 0.3123, "step": 7351 }, { "epoch": 0.25229924502402196, "grad_norm": 0.7985749408519017, "learning_rate": 8.759710459255803e-06, "loss": 0.3095, "step": 7352 }, { "epoch": 0.25233356211393276, "grad_norm": 0.7758969488327822, "learning_rate": 8.759344076666737e-06, "loss": 0.3185, "step": 7353 }, { "epoch": 0.2523678792038435, "grad_norm": 0.796495100263322, "learning_rate": 8.758977647635358e-06, "loss": 0.3262, "step": 7354 }, { "epoch": 0.25240219629375427, "grad_norm": 0.7821860884965, "learning_rate": 8.758611172166194e-06, "loss": 0.3193, "step": 7355 }, { "epoch": 0.2524365133836651, "grad_norm": 0.9052729296117422, "learning_rate": 8.758244650263769e-06, "loss": 0.3248, "step": 7356 }, { "epoch": 0.2524708304735758, "grad_norm": 0.722904134401017, "learning_rate": 8.757878081932616e-06, "loss": 0.3402, "step": 7357 }, { "epoch": 0.25250514756348663, "grad_norm": 0.8639286127187775, "learning_rate": 8.757511467177258e-06, "loss": 0.3781, "step": 7358 }, { "epoch": 0.2525394646533974, "grad_norm": 0.794924593205692, "learning_rate": 8.757144806002227e-06, "loss": 0.2937, "step": 7359 }, { "epoch": 0.2525737817433082, "grad_norm": 0.8462254344638999, "learning_rate": 8.756778098412052e-06, "loss": 0.2644, "step": 7360 }, { "epoch": 0.25260809883321894, "grad_norm": 0.8353155506986817, "learning_rate": 8.756411344411267e-06, "loss": 0.3068, "step": 7361 }, { "epoch": 0.2526424159231297, "grad_norm": 0.7258639743717545, "learning_rate": 8.756044544004396e-06, "loss": 0.2939, "step": 7362 }, { "epoch": 0.2526767330130405, "grad_norm": 0.8498016679048834, "learning_rate": 8.755677697195975e-06, "loss": 0.3327, "step": 7363 }, { "epoch": 0.25271105010295125, "grad_norm": 0.8292018704714238, "learning_rate": 8.755310803990534e-06, "loss": 0.3078, "step": 7364 }, { "epoch": 0.25274536719286206, "grad_norm": 0.6941726019845554, "learning_rate": 8.754943864392605e-06, "loss": 0.2897, "step": 7365 }, { "epoch": 0.2527796842827728, "grad_norm": 0.7759664328916666, "learning_rate": 8.754576878406724e-06, "loss": 0.3398, "step": 7366 }, { "epoch": 0.2528140013726836, "grad_norm": 0.8057836351737561, "learning_rate": 8.754209846037424e-06, "loss": 0.3237, "step": 7367 }, { "epoch": 0.25284831846259437, "grad_norm": 0.7734379889194019, "learning_rate": 8.753842767289238e-06, "loss": 0.3176, "step": 7368 }, { "epoch": 0.2528826355525052, "grad_norm": 0.7069811208527123, "learning_rate": 8.7534756421667e-06, "loss": 0.3085, "step": 7369 }, { "epoch": 0.2529169526424159, "grad_norm": 0.7062821628044899, "learning_rate": 8.753108470674346e-06, "loss": 0.2876, "step": 7370 }, { "epoch": 0.2529512697323267, "grad_norm": 0.8202252738881063, "learning_rate": 8.752741252816714e-06, "loss": 0.311, "step": 7371 }, { "epoch": 0.2529855868222375, "grad_norm": 0.8190027026596534, "learning_rate": 8.75237398859834e-06, "loss": 0.2944, "step": 7372 }, { "epoch": 0.25301990391214824, "grad_norm": 0.7260296083070065, "learning_rate": 8.752006678023759e-06, "loss": 0.3535, "step": 7373 }, { "epoch": 0.25305422100205904, "grad_norm": 0.7197259445925926, "learning_rate": 8.75163932109751e-06, "loss": 0.3279, "step": 7374 }, { "epoch": 0.2530885380919698, "grad_norm": 0.7567007232070413, "learning_rate": 8.75127191782413e-06, "loss": 0.2825, "step": 7375 }, { "epoch": 0.2531228551818806, "grad_norm": 0.8026718930421, "learning_rate": 8.75090446820816e-06, "loss": 0.3274, "step": 7376 }, { "epoch": 0.25315717227179135, "grad_norm": 0.7838764038914449, "learning_rate": 8.75053697225414e-06, "loss": 0.3157, "step": 7377 }, { "epoch": 0.2531914893617021, "grad_norm": 1.0801091793749662, "learning_rate": 8.750169429966606e-06, "loss": 0.3094, "step": 7378 }, { "epoch": 0.2532258064516129, "grad_norm": 0.7123232238779729, "learning_rate": 8.749801841350101e-06, "loss": 0.3143, "step": 7379 }, { "epoch": 0.25326012354152366, "grad_norm": 0.7874976130691566, "learning_rate": 8.749434206409169e-06, "loss": 0.3059, "step": 7380 }, { "epoch": 0.25329444063143447, "grad_norm": 0.8674058906985942, "learning_rate": 8.749066525148345e-06, "loss": 0.3456, "step": 7381 }, { "epoch": 0.2533287577213452, "grad_norm": 0.7377450854167275, "learning_rate": 8.748698797572177e-06, "loss": 0.336, "step": 7382 }, { "epoch": 0.25336307481125603, "grad_norm": 0.805104588549896, "learning_rate": 8.748331023685207e-06, "loss": 0.3181, "step": 7383 }, { "epoch": 0.2533973919011668, "grad_norm": 0.7299947757767362, "learning_rate": 8.747963203491977e-06, "loss": 0.2899, "step": 7384 }, { "epoch": 0.25343170899107753, "grad_norm": 0.8344991334359384, "learning_rate": 8.747595336997027e-06, "loss": 0.3304, "step": 7385 }, { "epoch": 0.25346602608098834, "grad_norm": 0.799719015160754, "learning_rate": 8.74722742420491e-06, "loss": 0.2929, "step": 7386 }, { "epoch": 0.2535003431708991, "grad_norm": 0.8511686453024048, "learning_rate": 8.746859465120168e-06, "loss": 0.3211, "step": 7387 }, { "epoch": 0.2535346602608099, "grad_norm": 0.7539584868870962, "learning_rate": 8.746491459747344e-06, "loss": 0.304, "step": 7388 }, { "epoch": 0.25356897735072065, "grad_norm": 0.7452704748520331, "learning_rate": 8.746123408090986e-06, "loss": 0.3289, "step": 7389 }, { "epoch": 0.25360329444063145, "grad_norm": 0.9518273471598319, "learning_rate": 8.74575531015564e-06, "loss": 0.2854, "step": 7390 }, { "epoch": 0.2536376115305422, "grad_norm": 0.7364695842402014, "learning_rate": 8.745387165945856e-06, "loss": 0.3547, "step": 7391 }, { "epoch": 0.253671928620453, "grad_norm": 0.7921216508030392, "learning_rate": 8.745018975466179e-06, "loss": 0.3182, "step": 7392 }, { "epoch": 0.25370624571036376, "grad_norm": 0.718179688899627, "learning_rate": 8.744650738721158e-06, "loss": 0.2971, "step": 7393 }, { "epoch": 0.2537405628002745, "grad_norm": 0.7974849176112848, "learning_rate": 8.744282455715345e-06, "loss": 0.3501, "step": 7394 }, { "epoch": 0.2537748798901853, "grad_norm": 0.9140926905510259, "learning_rate": 8.743914126453287e-06, "loss": 0.3359, "step": 7395 }, { "epoch": 0.2538091969800961, "grad_norm": 0.7719036701476967, "learning_rate": 8.743545750939534e-06, "loss": 0.3424, "step": 7396 }, { "epoch": 0.2538435140700069, "grad_norm": 0.8217221689842585, "learning_rate": 8.74317732917864e-06, "loss": 0.3183, "step": 7397 }, { "epoch": 0.25387783115991763, "grad_norm": 0.9241746229784176, "learning_rate": 8.742808861175152e-06, "loss": 0.3029, "step": 7398 }, { "epoch": 0.25391214824982844, "grad_norm": 0.8890842327904626, "learning_rate": 8.742440346933625e-06, "loss": 0.3112, "step": 7399 }, { "epoch": 0.2539464653397392, "grad_norm": 0.8077558604023232, "learning_rate": 8.74207178645861e-06, "loss": 0.3248, "step": 7400 }, { "epoch": 0.25398078242964994, "grad_norm": 0.8156947148944262, "learning_rate": 8.741703179754663e-06, "loss": 0.34, "step": 7401 }, { "epoch": 0.25401509951956075, "grad_norm": 0.722304383817122, "learning_rate": 8.741334526826335e-06, "loss": 0.2957, "step": 7402 }, { "epoch": 0.2540494166094715, "grad_norm": 0.7451583503502213, "learning_rate": 8.740965827678179e-06, "loss": 0.3252, "step": 7403 }, { "epoch": 0.2540837336993823, "grad_norm": 0.830069773399366, "learning_rate": 8.740597082314754e-06, "loss": 0.3567, "step": 7404 }, { "epoch": 0.25411805078929306, "grad_norm": 0.7463850793803235, "learning_rate": 8.740228290740611e-06, "loss": 0.4279, "step": 7405 }, { "epoch": 0.25415236787920387, "grad_norm": 0.739883759231203, "learning_rate": 8.739859452960312e-06, "loss": 0.3889, "step": 7406 }, { "epoch": 0.2541866849691146, "grad_norm": 0.9584470541835617, "learning_rate": 8.739490568978405e-06, "loss": 0.3529, "step": 7407 }, { "epoch": 0.25422100205902537, "grad_norm": 0.7877644693707864, "learning_rate": 8.739121638799453e-06, "loss": 0.3376, "step": 7408 }, { "epoch": 0.2542553191489362, "grad_norm": 0.8993159968626938, "learning_rate": 8.738752662428014e-06, "loss": 0.3087, "step": 7409 }, { "epoch": 0.25428963623884693, "grad_norm": 0.8130735317546178, "learning_rate": 8.738383639868644e-06, "loss": 0.2851, "step": 7410 }, { "epoch": 0.25432395332875773, "grad_norm": 0.7940181556798769, "learning_rate": 8.738014571125903e-06, "loss": 0.3672, "step": 7411 }, { "epoch": 0.2543582704186685, "grad_norm": 0.8305396329313375, "learning_rate": 8.737645456204351e-06, "loss": 0.3596, "step": 7412 }, { "epoch": 0.2543925875085793, "grad_norm": 0.776149053309586, "learning_rate": 8.737276295108545e-06, "loss": 0.2792, "step": 7413 }, { "epoch": 0.25442690459849004, "grad_norm": 0.7852466606554913, "learning_rate": 8.73690708784305e-06, "loss": 0.2989, "step": 7414 }, { "epoch": 0.2544612216884008, "grad_norm": 0.7825703835985662, "learning_rate": 8.736537834412422e-06, "loss": 0.3207, "step": 7415 }, { "epoch": 0.2544955387783116, "grad_norm": 0.7315499369907262, "learning_rate": 8.736168534821228e-06, "loss": 0.3221, "step": 7416 }, { "epoch": 0.25452985586822235, "grad_norm": 0.7512994553638095, "learning_rate": 8.735799189074026e-06, "loss": 0.2698, "step": 7417 }, { "epoch": 0.25456417295813316, "grad_norm": 0.8094826794092304, "learning_rate": 8.73542979717538e-06, "loss": 0.3807, "step": 7418 }, { "epoch": 0.2545984900480439, "grad_norm": 0.7664281109132861, "learning_rate": 8.735060359129855e-06, "loss": 0.3051, "step": 7419 }, { "epoch": 0.2546328071379547, "grad_norm": 0.801492256337298, "learning_rate": 8.734690874942014e-06, "loss": 0.3278, "step": 7420 }, { "epoch": 0.25466712422786547, "grad_norm": 0.8649187389773351, "learning_rate": 8.734321344616421e-06, "loss": 0.2775, "step": 7421 }, { "epoch": 0.2547014413177763, "grad_norm": 0.8853664400370187, "learning_rate": 8.733951768157643e-06, "loss": 0.3058, "step": 7422 }, { "epoch": 0.25473575840768703, "grad_norm": 0.8006494963699301, "learning_rate": 8.733582145570243e-06, "loss": 0.3377, "step": 7423 }, { "epoch": 0.2547700754975978, "grad_norm": 0.7683004183332246, "learning_rate": 8.73321247685879e-06, "loss": 0.3228, "step": 7424 }, { "epoch": 0.2548043925875086, "grad_norm": 0.796222439326982, "learning_rate": 8.732842762027847e-06, "loss": 0.4, "step": 7425 }, { "epoch": 0.25483870967741934, "grad_norm": 0.8671742218808953, "learning_rate": 8.732473001081984e-06, "loss": 0.3171, "step": 7426 }, { "epoch": 0.25487302676733015, "grad_norm": 0.7607415544347811, "learning_rate": 8.73210319402577e-06, "loss": 0.3392, "step": 7427 }, { "epoch": 0.2549073438572409, "grad_norm": 0.8099995587783992, "learning_rate": 8.731733340863773e-06, "loss": 0.3774, "step": 7428 }, { "epoch": 0.2549416609471517, "grad_norm": 1.177633023830606, "learning_rate": 8.73136344160056e-06, "loss": 0.3585, "step": 7429 }, { "epoch": 0.25497597803706246, "grad_norm": 0.8138541272230908, "learning_rate": 8.730993496240702e-06, "loss": 0.3131, "step": 7430 }, { "epoch": 0.2550102951269732, "grad_norm": 0.707360741176137, "learning_rate": 8.730623504788769e-06, "loss": 0.2818, "step": 7431 }, { "epoch": 0.255044612216884, "grad_norm": 0.8020480242134391, "learning_rate": 8.730253467249332e-06, "loss": 0.3461, "step": 7432 }, { "epoch": 0.25507892930679477, "grad_norm": 0.7707991260842053, "learning_rate": 8.729883383626964e-06, "loss": 0.3394, "step": 7433 }, { "epoch": 0.2551132463967056, "grad_norm": 0.8946299707489083, "learning_rate": 8.729513253926234e-06, "loss": 0.352, "step": 7434 }, { "epoch": 0.2551475634866163, "grad_norm": 0.895434163655199, "learning_rate": 8.729143078151717e-06, "loss": 0.3562, "step": 7435 }, { "epoch": 0.25518188057652713, "grad_norm": 0.8274491139210509, "learning_rate": 8.728772856307984e-06, "loss": 0.339, "step": 7436 }, { "epoch": 0.2552161976664379, "grad_norm": 0.8399211607594499, "learning_rate": 8.728402588399609e-06, "loss": 0.2719, "step": 7437 }, { "epoch": 0.25525051475634863, "grad_norm": 0.8978459609956708, "learning_rate": 8.728032274431168e-06, "loss": 0.3166, "step": 7438 }, { "epoch": 0.25528483184625944, "grad_norm": 0.8610623633169118, "learning_rate": 8.727661914407235e-06, "loss": 0.3061, "step": 7439 }, { "epoch": 0.2553191489361702, "grad_norm": 0.7619377860471086, "learning_rate": 8.727291508332385e-06, "loss": 0.3644, "step": 7440 }, { "epoch": 0.255353466026081, "grad_norm": 0.8530565867704566, "learning_rate": 8.726921056211195e-06, "loss": 0.3001, "step": 7441 }, { "epoch": 0.25538778311599175, "grad_norm": 0.7151928956085442, "learning_rate": 8.726550558048239e-06, "loss": 0.3415, "step": 7442 }, { "epoch": 0.25542210020590256, "grad_norm": 0.7606348951794141, "learning_rate": 8.726180013848096e-06, "loss": 0.3256, "step": 7443 }, { "epoch": 0.2554564172958133, "grad_norm": 0.6544530075912041, "learning_rate": 8.725809423615343e-06, "loss": 0.2731, "step": 7444 }, { "epoch": 0.2554907343857241, "grad_norm": 0.8038448630989486, "learning_rate": 8.725438787354558e-06, "loss": 0.3421, "step": 7445 }, { "epoch": 0.25552505147563487, "grad_norm": 0.8522301306431117, "learning_rate": 8.725068105070322e-06, "loss": 0.3351, "step": 7446 }, { "epoch": 0.2555593685655456, "grad_norm": 0.7942163271766661, "learning_rate": 8.72469737676721e-06, "loss": 0.3202, "step": 7447 }, { "epoch": 0.2555936856554564, "grad_norm": 0.7219443083396326, "learning_rate": 8.724326602449807e-06, "loss": 0.3197, "step": 7448 }, { "epoch": 0.2556280027453672, "grad_norm": 0.7717449518663114, "learning_rate": 8.72395578212269e-06, "loss": 0.3226, "step": 7449 }, { "epoch": 0.255662319835278, "grad_norm": 0.8651684157850805, "learning_rate": 8.72358491579044e-06, "loss": 0.3306, "step": 7450 }, { "epoch": 0.25569663692518874, "grad_norm": 0.8180442827449004, "learning_rate": 8.72321400345764e-06, "loss": 0.3137, "step": 7451 }, { "epoch": 0.25573095401509954, "grad_norm": 0.7509727270211904, "learning_rate": 8.722843045128872e-06, "loss": 0.3216, "step": 7452 }, { "epoch": 0.2557652711050103, "grad_norm": 0.8616240173173986, "learning_rate": 8.722472040808721e-06, "loss": 0.3404, "step": 7453 }, { "epoch": 0.25579958819492105, "grad_norm": 0.8347425360680747, "learning_rate": 8.722100990501766e-06, "loss": 0.3845, "step": 7454 }, { "epoch": 0.25583390528483185, "grad_norm": 0.7520802845289727, "learning_rate": 8.721729894212592e-06, "loss": 0.293, "step": 7455 }, { "epoch": 0.2558682223747426, "grad_norm": 0.9209670793125995, "learning_rate": 8.721358751945785e-06, "loss": 0.2869, "step": 7456 }, { "epoch": 0.2559025394646534, "grad_norm": 0.8048198737436257, "learning_rate": 8.72098756370593e-06, "loss": 0.2937, "step": 7457 }, { "epoch": 0.25593685655456416, "grad_norm": 0.8566977063371383, "learning_rate": 8.72061632949761e-06, "loss": 0.3116, "step": 7458 }, { "epoch": 0.25597117364447497, "grad_norm": 0.8648695530221, "learning_rate": 8.720245049325415e-06, "loss": 0.3105, "step": 7459 }, { "epoch": 0.2560054907343857, "grad_norm": 0.8092976896880105, "learning_rate": 8.719873723193927e-06, "loss": 0.2673, "step": 7460 }, { "epoch": 0.2560398078242965, "grad_norm": 0.7867308375959629, "learning_rate": 8.719502351107739e-06, "loss": 0.3257, "step": 7461 }, { "epoch": 0.2560741249142073, "grad_norm": 0.7810105881324293, "learning_rate": 8.719130933071437e-06, "loss": 0.3658, "step": 7462 }, { "epoch": 0.25610844200411803, "grad_norm": 0.7140781667690231, "learning_rate": 8.718759469089606e-06, "loss": 0.3349, "step": 7463 }, { "epoch": 0.25614275909402884, "grad_norm": 0.776001940960225, "learning_rate": 8.718387959166837e-06, "loss": 0.3456, "step": 7464 }, { "epoch": 0.2561770761839396, "grad_norm": 0.8519655079994232, "learning_rate": 8.71801640330772e-06, "loss": 0.3497, "step": 7465 }, { "epoch": 0.2562113932738504, "grad_norm": 0.7318589242863622, "learning_rate": 8.717644801516847e-06, "loss": 0.2864, "step": 7466 }, { "epoch": 0.25624571036376115, "grad_norm": 0.7690053170808331, "learning_rate": 8.717273153798806e-06, "loss": 0.3036, "step": 7467 }, { "epoch": 0.25628002745367195, "grad_norm": 0.8015046238238628, "learning_rate": 8.71690146015819e-06, "loss": 0.3635, "step": 7468 }, { "epoch": 0.2563143445435827, "grad_norm": 0.7248482834034221, "learning_rate": 8.716529720599586e-06, "loss": 0.3237, "step": 7469 }, { "epoch": 0.25634866163349346, "grad_norm": 0.8122885748537214, "learning_rate": 8.716157935127595e-06, "loss": 0.3865, "step": 7470 }, { "epoch": 0.25638297872340426, "grad_norm": 0.7576638479950749, "learning_rate": 8.715786103746803e-06, "loss": 0.3819, "step": 7471 }, { "epoch": 0.256417295813315, "grad_norm": 0.7173292617183071, "learning_rate": 8.715414226461807e-06, "loss": 0.3016, "step": 7472 }, { "epoch": 0.2564516129032258, "grad_norm": 0.8343853801934047, "learning_rate": 8.7150423032772e-06, "loss": 0.3303, "step": 7473 }, { "epoch": 0.2564859299931366, "grad_norm": 0.9226631416656588, "learning_rate": 8.714670334197575e-06, "loss": 0.3643, "step": 7474 }, { "epoch": 0.2565202470830474, "grad_norm": 0.8109104741581915, "learning_rate": 8.71429831922753e-06, "loss": 0.3665, "step": 7475 }, { "epoch": 0.25655456417295813, "grad_norm": 0.8913737186493728, "learning_rate": 8.71392625837166e-06, "loss": 0.3558, "step": 7476 }, { "epoch": 0.2565888812628689, "grad_norm": 0.9532345090138864, "learning_rate": 8.71355415163456e-06, "loss": 0.3554, "step": 7477 }, { "epoch": 0.2566231983527797, "grad_norm": 0.76288961726146, "learning_rate": 8.71318199902083e-06, "loss": 0.293, "step": 7478 }, { "epoch": 0.25665751544269044, "grad_norm": 0.8174062198899272, "learning_rate": 8.712809800535065e-06, "loss": 0.3339, "step": 7479 }, { "epoch": 0.25669183253260125, "grad_norm": 0.7928958665548255, "learning_rate": 8.712437556181863e-06, "loss": 0.2896, "step": 7480 }, { "epoch": 0.256726149622512, "grad_norm": 0.6670173095974054, "learning_rate": 8.712065265965823e-06, "loss": 0.2971, "step": 7481 }, { "epoch": 0.2567604667124228, "grad_norm": 0.7251923254869622, "learning_rate": 8.711692929891545e-06, "loss": 0.3271, "step": 7482 }, { "epoch": 0.25679478380233356, "grad_norm": 0.850232206297266, "learning_rate": 8.711320547963627e-06, "loss": 0.3188, "step": 7483 }, { "epoch": 0.2568291008922443, "grad_norm": 0.8804185732655182, "learning_rate": 8.710948120186673e-06, "loss": 0.3877, "step": 7484 }, { "epoch": 0.2568634179821551, "grad_norm": 0.7933390725736313, "learning_rate": 8.710575646565281e-06, "loss": 0.3051, "step": 7485 }, { "epoch": 0.25689773507206587, "grad_norm": 0.7753184871700897, "learning_rate": 8.710203127104051e-06, "loss": 0.2981, "step": 7486 }, { "epoch": 0.2569320521619767, "grad_norm": 0.7665897732529194, "learning_rate": 8.709830561807589e-06, "loss": 0.3517, "step": 7487 }, { "epoch": 0.2569663692518874, "grad_norm": 0.7483732058976263, "learning_rate": 8.709457950680494e-06, "loss": 0.3679, "step": 7488 }, { "epoch": 0.25700068634179823, "grad_norm": 0.7989504291398277, "learning_rate": 8.709085293727372e-06, "loss": 0.3423, "step": 7489 }, { "epoch": 0.257035003431709, "grad_norm": 0.8283688375994387, "learning_rate": 8.708712590952825e-06, "loss": 0.3094, "step": 7490 }, { "epoch": 0.2570693205216198, "grad_norm": 0.7910422010547237, "learning_rate": 8.708339842361458e-06, "loss": 0.3359, "step": 7491 }, { "epoch": 0.25710363761153054, "grad_norm": 0.7566611202951083, "learning_rate": 8.707967047957876e-06, "loss": 0.314, "step": 7492 }, { "epoch": 0.2571379547014413, "grad_norm": 0.7473542090542928, "learning_rate": 8.707594207746683e-06, "loss": 0.295, "step": 7493 }, { "epoch": 0.2571722717913521, "grad_norm": 0.7028081206821576, "learning_rate": 8.707221321732488e-06, "loss": 0.2662, "step": 7494 }, { "epoch": 0.25720658888126285, "grad_norm": 0.8428116628270906, "learning_rate": 8.706848389919895e-06, "loss": 0.3062, "step": 7495 }, { "epoch": 0.25724090597117366, "grad_norm": 0.7785809029311839, "learning_rate": 8.706475412313513e-06, "loss": 0.3001, "step": 7496 }, { "epoch": 0.2572752230610844, "grad_norm": 0.7504980274144245, "learning_rate": 8.706102388917948e-06, "loss": 0.3203, "step": 7497 }, { "epoch": 0.2573095401509952, "grad_norm": 0.785790026364864, "learning_rate": 8.705729319737809e-06, "loss": 0.3723, "step": 7498 }, { "epoch": 0.25734385724090597, "grad_norm": 0.8235627127538009, "learning_rate": 8.705356204777702e-06, "loss": 0.3078, "step": 7499 }, { "epoch": 0.2573781743308167, "grad_norm": 0.8231675245451112, "learning_rate": 8.704983044042243e-06, "loss": 0.2575, "step": 7500 }, { "epoch": 0.25741249142072753, "grad_norm": 0.832751056918131, "learning_rate": 8.704609837536035e-06, "loss": 0.3428, "step": 7501 }, { "epoch": 0.2574468085106383, "grad_norm": 0.8059583511947265, "learning_rate": 8.704236585263694e-06, "loss": 0.3233, "step": 7502 }, { "epoch": 0.2574811256005491, "grad_norm": 0.8388400091772329, "learning_rate": 8.703863287229828e-06, "loss": 0.338, "step": 7503 }, { "epoch": 0.25751544269045984, "grad_norm": 0.7887998943056391, "learning_rate": 8.703489943439048e-06, "loss": 0.3107, "step": 7504 }, { "epoch": 0.25754975978037065, "grad_norm": 0.8050532652462116, "learning_rate": 8.70311655389597e-06, "loss": 0.3285, "step": 7505 }, { "epoch": 0.2575840768702814, "grad_norm": 0.7467413036481501, "learning_rate": 8.702743118605202e-06, "loss": 0.289, "step": 7506 }, { "epoch": 0.25761839396019215, "grad_norm": 0.7943049166212943, "learning_rate": 8.702369637571362e-06, "loss": 0.3413, "step": 7507 }, { "epoch": 0.25765271105010296, "grad_norm": 0.9130215463051432, "learning_rate": 8.70199611079906e-06, "loss": 0.3061, "step": 7508 }, { "epoch": 0.2576870281400137, "grad_norm": 0.7189636840172654, "learning_rate": 8.701622538292913e-06, "loss": 0.3534, "step": 7509 }, { "epoch": 0.2577213452299245, "grad_norm": 0.8879788687488094, "learning_rate": 8.701248920057535e-06, "loss": 0.333, "step": 7510 }, { "epoch": 0.25775566231983527, "grad_norm": 0.8403984768916758, "learning_rate": 8.70087525609754e-06, "loss": 0.3397, "step": 7511 }, { "epoch": 0.2577899794097461, "grad_norm": 0.8313825890517416, "learning_rate": 8.700501546417548e-06, "loss": 0.3701, "step": 7512 }, { "epoch": 0.2578242964996568, "grad_norm": 0.730737204615293, "learning_rate": 8.700127791022174e-06, "loss": 0.3041, "step": 7513 }, { "epoch": 0.25785861358956763, "grad_norm": 0.6259058792312586, "learning_rate": 8.699753989916036e-06, "loss": 0.2757, "step": 7514 }, { "epoch": 0.2578929306794784, "grad_norm": 0.7746795547913021, "learning_rate": 8.699380143103749e-06, "loss": 0.3796, "step": 7515 }, { "epoch": 0.25792724776938913, "grad_norm": 0.8091396366438177, "learning_rate": 8.699006250589933e-06, "loss": 0.2663, "step": 7516 }, { "epoch": 0.25796156485929994, "grad_norm": 0.8620363012030898, "learning_rate": 8.698632312379208e-06, "loss": 0.3932, "step": 7517 }, { "epoch": 0.2579958819492107, "grad_norm": 0.888597666758762, "learning_rate": 8.698258328476195e-06, "loss": 0.3983, "step": 7518 }, { "epoch": 0.2580301990391215, "grad_norm": 0.7339074337451276, "learning_rate": 8.697884298885507e-06, "loss": 0.3169, "step": 7519 }, { "epoch": 0.25806451612903225, "grad_norm": 0.9012152288329955, "learning_rate": 8.697510223611775e-06, "loss": 0.3397, "step": 7520 }, { "epoch": 0.25809883321894306, "grad_norm": 0.7471929936965522, "learning_rate": 8.697136102659614e-06, "loss": 0.2961, "step": 7521 }, { "epoch": 0.2581331503088538, "grad_norm": 0.7808232513912832, "learning_rate": 8.696761936033646e-06, "loss": 0.3615, "step": 7522 }, { "epoch": 0.25816746739876456, "grad_norm": 0.8663234174615252, "learning_rate": 8.696387723738494e-06, "loss": 0.2651, "step": 7523 }, { "epoch": 0.25820178448867537, "grad_norm": 0.7491104938488808, "learning_rate": 8.696013465778781e-06, "loss": 0.4148, "step": 7524 }, { "epoch": 0.2582361015785861, "grad_norm": 0.6716500089970046, "learning_rate": 8.695639162159134e-06, "loss": 0.2907, "step": 7525 }, { "epoch": 0.2582704186684969, "grad_norm": 0.7752773927779115, "learning_rate": 8.69526481288417e-06, "loss": 0.3103, "step": 7526 }, { "epoch": 0.2583047357584077, "grad_norm": 0.8144056912363103, "learning_rate": 8.694890417958518e-06, "loss": 0.3249, "step": 7527 }, { "epoch": 0.2583390528483185, "grad_norm": 0.7160795360657373, "learning_rate": 8.694515977386804e-06, "loss": 0.3336, "step": 7528 }, { "epoch": 0.25837336993822924, "grad_norm": 0.7902939915618287, "learning_rate": 8.694141491173653e-06, "loss": 0.2901, "step": 7529 }, { "epoch": 0.25840768702814, "grad_norm": 0.7574825693868867, "learning_rate": 8.693766959323688e-06, "loss": 0.3061, "step": 7530 }, { "epoch": 0.2584420041180508, "grad_norm": 0.7995596677030142, "learning_rate": 8.693392381841541e-06, "loss": 0.3118, "step": 7531 }, { "epoch": 0.25847632120796155, "grad_norm": 0.7621619975154754, "learning_rate": 8.693017758731837e-06, "loss": 0.2742, "step": 7532 }, { "epoch": 0.25851063829787235, "grad_norm": 0.7535759946554771, "learning_rate": 8.692643089999204e-06, "loss": 0.2635, "step": 7533 }, { "epoch": 0.2585449553877831, "grad_norm": 0.7407332364710727, "learning_rate": 8.69226837564827e-06, "loss": 0.3446, "step": 7534 }, { "epoch": 0.2585792724776939, "grad_norm": 0.9320607452703534, "learning_rate": 8.691893615683667e-06, "loss": 0.3133, "step": 7535 }, { "epoch": 0.25861358956760466, "grad_norm": 0.8147726898849734, "learning_rate": 8.69151881011002e-06, "loss": 0.3206, "step": 7536 }, { "epoch": 0.25864790665751547, "grad_norm": 0.8238829334712207, "learning_rate": 8.691143958931964e-06, "loss": 0.3954, "step": 7537 }, { "epoch": 0.2586822237474262, "grad_norm": 0.8404448168766041, "learning_rate": 8.690769062154127e-06, "loss": 0.2993, "step": 7538 }, { "epoch": 0.258716540837337, "grad_norm": 0.7427849475255888, "learning_rate": 8.690394119781142e-06, "loss": 0.3179, "step": 7539 }, { "epoch": 0.2587508579272478, "grad_norm": 0.7527908102733398, "learning_rate": 8.690019131817639e-06, "loss": 0.2967, "step": 7540 }, { "epoch": 0.25878517501715853, "grad_norm": 0.7763331597562444, "learning_rate": 8.689644098268252e-06, "loss": 0.3215, "step": 7541 }, { "epoch": 0.25881949210706934, "grad_norm": 0.6717882970470168, "learning_rate": 8.689269019137617e-06, "loss": 0.2511, "step": 7542 }, { "epoch": 0.2588538091969801, "grad_norm": 0.7406963614951965, "learning_rate": 8.688893894430361e-06, "loss": 0.3407, "step": 7543 }, { "epoch": 0.2588881262868909, "grad_norm": 0.8058916237399714, "learning_rate": 8.688518724151122e-06, "loss": 0.3737, "step": 7544 }, { "epoch": 0.25892244337680165, "grad_norm": 0.7765767064293632, "learning_rate": 8.688143508304536e-06, "loss": 0.3076, "step": 7545 }, { "epoch": 0.2589567604667124, "grad_norm": 0.7492972099872366, "learning_rate": 8.687768246895236e-06, "loss": 0.3586, "step": 7546 }, { "epoch": 0.2589910775566232, "grad_norm": 0.7332578492814358, "learning_rate": 8.68739293992786e-06, "loss": 0.3316, "step": 7547 }, { "epoch": 0.25902539464653396, "grad_norm": 0.797109793925084, "learning_rate": 8.687017587407042e-06, "loss": 0.3506, "step": 7548 }, { "epoch": 0.25905971173644476, "grad_norm": 0.8260889733141165, "learning_rate": 8.68664218933742e-06, "loss": 0.3451, "step": 7549 }, { "epoch": 0.2590940288263555, "grad_norm": 0.874974011661042, "learning_rate": 8.686266745723633e-06, "loss": 0.3208, "step": 7550 }, { "epoch": 0.2591283459162663, "grad_norm": 0.7773093547592305, "learning_rate": 8.685891256570318e-06, "loss": 0.3392, "step": 7551 }, { "epoch": 0.2591626630061771, "grad_norm": 0.9118710385438602, "learning_rate": 8.685515721882114e-06, "loss": 0.3208, "step": 7552 }, { "epoch": 0.2591969800960878, "grad_norm": 0.837725235739301, "learning_rate": 8.685140141663659e-06, "loss": 0.271, "step": 7553 }, { "epoch": 0.25923129718599863, "grad_norm": 0.8300792772567186, "learning_rate": 8.684764515919594e-06, "loss": 0.292, "step": 7554 }, { "epoch": 0.2592656142759094, "grad_norm": 0.8089567826186845, "learning_rate": 8.684388844654562e-06, "loss": 0.3327, "step": 7555 }, { "epoch": 0.2592999313658202, "grad_norm": 0.7470802625017462, "learning_rate": 8.684013127873199e-06, "loss": 0.2804, "step": 7556 }, { "epoch": 0.25933424845573094, "grad_norm": 0.8049640547116598, "learning_rate": 8.683637365580149e-06, "loss": 0.2643, "step": 7557 }, { "epoch": 0.25936856554564175, "grad_norm": 0.7712853954793505, "learning_rate": 8.683261557780056e-06, "loss": 0.3606, "step": 7558 }, { "epoch": 0.2594028826355525, "grad_norm": 0.7713495541158464, "learning_rate": 8.68288570447756e-06, "loss": 0.2903, "step": 7559 }, { "epoch": 0.2594371997254633, "grad_norm": 0.7760970972462907, "learning_rate": 8.682509805677304e-06, "loss": 0.3836, "step": 7560 }, { "epoch": 0.25947151681537406, "grad_norm": 0.8135817502464147, "learning_rate": 8.682133861383933e-06, "loss": 0.3433, "step": 7561 }, { "epoch": 0.2595058339052848, "grad_norm": 0.7875951905705719, "learning_rate": 8.68175787160209e-06, "loss": 0.3, "step": 7562 }, { "epoch": 0.2595401509951956, "grad_norm": 0.7103902750607249, "learning_rate": 8.681381836336421e-06, "loss": 0.304, "step": 7563 }, { "epoch": 0.25957446808510637, "grad_norm": 0.8149817049926721, "learning_rate": 8.681005755591574e-06, "loss": 0.3539, "step": 7564 }, { "epoch": 0.2596087851750172, "grad_norm": 0.9293537659934898, "learning_rate": 8.68062962937219e-06, "loss": 0.3596, "step": 7565 }, { "epoch": 0.2596431022649279, "grad_norm": 0.8500225872956233, "learning_rate": 8.680253457682922e-06, "loss": 0.3231, "step": 7566 }, { "epoch": 0.25967741935483873, "grad_norm": 0.7895296865680755, "learning_rate": 8.679877240528409e-06, "loss": 0.3113, "step": 7567 }, { "epoch": 0.2597117364447495, "grad_norm": 0.7826138515450916, "learning_rate": 8.679500977913304e-06, "loss": 0.3793, "step": 7568 }, { "epoch": 0.25974605353466024, "grad_norm": 0.855179520987931, "learning_rate": 8.679124669842256e-06, "loss": 0.3085, "step": 7569 }, { "epoch": 0.25978037062457104, "grad_norm": 0.8024405661725048, "learning_rate": 8.67874831631991e-06, "loss": 0.3074, "step": 7570 }, { "epoch": 0.2598146877144818, "grad_norm": 0.7856972564112047, "learning_rate": 8.67837191735092e-06, "loss": 0.3282, "step": 7571 }, { "epoch": 0.2598490048043926, "grad_norm": 0.7674601656191243, "learning_rate": 8.677995472939932e-06, "loss": 0.2845, "step": 7572 }, { "epoch": 0.25988332189430335, "grad_norm": 0.7294845113554705, "learning_rate": 8.677618983091598e-06, "loss": 0.303, "step": 7573 }, { "epoch": 0.25991763898421416, "grad_norm": 0.660208667877612, "learning_rate": 8.677242447810568e-06, "loss": 0.2742, "step": 7574 }, { "epoch": 0.2599519560741249, "grad_norm": 0.7561697869439572, "learning_rate": 8.676865867101496e-06, "loss": 0.342, "step": 7575 }, { "epoch": 0.25998627316403566, "grad_norm": 0.8792729520374821, "learning_rate": 8.676489240969034e-06, "loss": 0.2903, "step": 7576 }, { "epoch": 0.26002059025394647, "grad_norm": 0.754214726777421, "learning_rate": 8.676112569417834e-06, "loss": 0.3057, "step": 7577 }, { "epoch": 0.2600549073438572, "grad_norm": 0.791118312685597, "learning_rate": 8.675735852452547e-06, "loss": 0.3199, "step": 7578 }, { "epoch": 0.26008922443376803, "grad_norm": 0.6833995483966427, "learning_rate": 8.67535909007783e-06, "loss": 0.2726, "step": 7579 }, { "epoch": 0.2601235415236788, "grad_norm": 0.8732235764554589, "learning_rate": 8.674982282298336e-06, "loss": 0.3401, "step": 7580 }, { "epoch": 0.2601578586135896, "grad_norm": 0.7701260592047761, "learning_rate": 8.674605429118723e-06, "loss": 0.3416, "step": 7581 }, { "epoch": 0.26019217570350034, "grad_norm": 0.827663868311987, "learning_rate": 8.674228530543643e-06, "loss": 0.3503, "step": 7582 }, { "epoch": 0.26022649279341115, "grad_norm": 0.8986498947105201, "learning_rate": 8.67385158657775e-06, "loss": 0.3563, "step": 7583 }, { "epoch": 0.2602608098833219, "grad_norm": 0.8129927266822521, "learning_rate": 8.673474597225709e-06, "loss": 0.296, "step": 7584 }, { "epoch": 0.26029512697323265, "grad_norm": 0.7597335439611822, "learning_rate": 8.673097562492168e-06, "loss": 0.3237, "step": 7585 }, { "epoch": 0.26032944406314346, "grad_norm": 0.8292556194899069, "learning_rate": 8.672720482381791e-06, "loss": 0.3771, "step": 7586 }, { "epoch": 0.2603637611530542, "grad_norm": 0.7232993979749074, "learning_rate": 8.672343356899234e-06, "loss": 0.3054, "step": 7587 }, { "epoch": 0.260398078242965, "grad_norm": 0.7805892037242059, "learning_rate": 8.671966186049157e-06, "loss": 0.282, "step": 7588 }, { "epoch": 0.26043239533287577, "grad_norm": 0.8237268467895166, "learning_rate": 8.671588969836218e-06, "loss": 0.3459, "step": 7589 }, { "epoch": 0.2604667124227866, "grad_norm": 0.7389664815877584, "learning_rate": 8.671211708265079e-06, "loss": 0.2942, "step": 7590 }, { "epoch": 0.2605010295126973, "grad_norm": 0.8241697489146214, "learning_rate": 8.670834401340397e-06, "loss": 0.3537, "step": 7591 }, { "epoch": 0.2605353466026081, "grad_norm": 0.8082124036511785, "learning_rate": 8.670457049066838e-06, "loss": 0.4259, "step": 7592 }, { "epoch": 0.2605696636925189, "grad_norm": 0.6945647795813333, "learning_rate": 8.670079651449061e-06, "loss": 0.2724, "step": 7593 }, { "epoch": 0.26060398078242963, "grad_norm": 0.8513269695018729, "learning_rate": 8.669702208491729e-06, "loss": 0.3906, "step": 7594 }, { "epoch": 0.26063829787234044, "grad_norm": 0.6898695939485189, "learning_rate": 8.669324720199505e-06, "loss": 0.2842, "step": 7595 }, { "epoch": 0.2606726149622512, "grad_norm": 0.7373694927339176, "learning_rate": 8.668947186577053e-06, "loss": 0.2819, "step": 7596 }, { "epoch": 0.260706932052162, "grad_norm": 0.7224285112229856, "learning_rate": 8.668569607629034e-06, "loss": 0.2845, "step": 7597 }, { "epoch": 0.26074124914207275, "grad_norm": 0.7535651113513783, "learning_rate": 8.668191983360115e-06, "loss": 0.2981, "step": 7598 }, { "epoch": 0.2607755662319835, "grad_norm": 0.7404103130143067, "learning_rate": 8.66781431377496e-06, "loss": 0.3333, "step": 7599 }, { "epoch": 0.2608098833218943, "grad_norm": 0.7122052166508452, "learning_rate": 8.667436598878238e-06, "loss": 0.2626, "step": 7600 }, { "epoch": 0.26084420041180506, "grad_norm": 0.7274680401619653, "learning_rate": 8.667058838674611e-06, "loss": 0.3609, "step": 7601 }, { "epoch": 0.26087851750171587, "grad_norm": 0.710803504016043, "learning_rate": 8.666681033168747e-06, "loss": 0.2754, "step": 7602 }, { "epoch": 0.2609128345916266, "grad_norm": 0.7674001241329014, "learning_rate": 8.666303182365315e-06, "loss": 0.3161, "step": 7603 }, { "epoch": 0.2609471516815374, "grad_norm": 0.7240660191431845, "learning_rate": 8.665925286268982e-06, "loss": 0.3618, "step": 7604 }, { "epoch": 0.2609814687714482, "grad_norm": 0.8187335280450965, "learning_rate": 8.665547344884413e-06, "loss": 0.3321, "step": 7605 }, { "epoch": 0.261015785861359, "grad_norm": 0.8236831502462061, "learning_rate": 8.665169358216283e-06, "loss": 0.3154, "step": 7606 }, { "epoch": 0.26105010295126974, "grad_norm": 0.8095630280968618, "learning_rate": 8.664791326269259e-06, "loss": 0.3539, "step": 7607 }, { "epoch": 0.2610844200411805, "grad_norm": 0.7423374438395264, "learning_rate": 8.664413249048009e-06, "loss": 0.2829, "step": 7608 }, { "epoch": 0.2611187371310913, "grad_norm": 0.6607210290245843, "learning_rate": 8.664035126557206e-06, "loss": 0.2895, "step": 7609 }, { "epoch": 0.26115305422100205, "grad_norm": 0.8231181404625108, "learning_rate": 8.663656958801522e-06, "loss": 0.3469, "step": 7610 }, { "epoch": 0.26118737131091285, "grad_norm": 0.7340864869237739, "learning_rate": 8.663278745785629e-06, "loss": 0.3152, "step": 7611 }, { "epoch": 0.2612216884008236, "grad_norm": 0.7306608328083932, "learning_rate": 8.662900487514195e-06, "loss": 0.2919, "step": 7612 }, { "epoch": 0.2612560054907344, "grad_norm": 0.7682421885830296, "learning_rate": 8.662522183991898e-06, "loss": 0.3583, "step": 7613 }, { "epoch": 0.26129032258064516, "grad_norm": 0.8420896921673545, "learning_rate": 8.662143835223408e-06, "loss": 0.3033, "step": 7614 }, { "epoch": 0.2613246396705559, "grad_norm": 0.678649017044669, "learning_rate": 8.6617654412134e-06, "loss": 0.3148, "step": 7615 }, { "epoch": 0.2613589567604667, "grad_norm": 0.7637725001343182, "learning_rate": 8.66138700196655e-06, "loss": 0.3336, "step": 7616 }, { "epoch": 0.26139327385037747, "grad_norm": 0.7474946213336542, "learning_rate": 8.661008517487534e-06, "loss": 0.3508, "step": 7617 }, { "epoch": 0.2614275909402883, "grad_norm": 0.9011065161194596, "learning_rate": 8.660629987781025e-06, "loss": 0.3344, "step": 7618 }, { "epoch": 0.26146190803019903, "grad_norm": 0.7683388319045106, "learning_rate": 8.6602514128517e-06, "loss": 0.3361, "step": 7619 }, { "epoch": 0.26149622512010984, "grad_norm": 0.7254002528000214, "learning_rate": 8.659872792704235e-06, "loss": 0.3136, "step": 7620 }, { "epoch": 0.2615305422100206, "grad_norm": 0.7545450357333994, "learning_rate": 8.659494127343312e-06, "loss": 0.3057, "step": 7621 }, { "epoch": 0.26156485929993134, "grad_norm": 0.8038149290751491, "learning_rate": 8.659115416773604e-06, "loss": 0.3284, "step": 7622 }, { "epoch": 0.26159917638984215, "grad_norm": 0.9670913650183907, "learning_rate": 8.65873666099979e-06, "loss": 0.281, "step": 7623 }, { "epoch": 0.2616334934797529, "grad_norm": 0.7136929143056691, "learning_rate": 8.658357860026552e-06, "loss": 0.3077, "step": 7624 }, { "epoch": 0.2616678105696637, "grad_norm": 0.7395066027126491, "learning_rate": 8.657979013858567e-06, "loss": 0.3494, "step": 7625 }, { "epoch": 0.26170212765957446, "grad_norm": 0.8365967930789434, "learning_rate": 8.657600122500515e-06, "loss": 0.2766, "step": 7626 }, { "epoch": 0.26173644474948526, "grad_norm": 0.7016436225021054, "learning_rate": 8.65722118595708e-06, "loss": 0.3349, "step": 7627 }, { "epoch": 0.261770761839396, "grad_norm": 0.7727007809703429, "learning_rate": 8.656842204232938e-06, "loss": 0.2898, "step": 7628 }, { "epoch": 0.26180507892930677, "grad_norm": 0.7734336523280326, "learning_rate": 8.656463177332778e-06, "loss": 0.278, "step": 7629 }, { "epoch": 0.2618393960192176, "grad_norm": 0.8449774867314194, "learning_rate": 8.656084105261275e-06, "loss": 0.2719, "step": 7630 }, { "epoch": 0.2618737131091283, "grad_norm": 0.7922005367751417, "learning_rate": 8.655704988023118e-06, "loss": 0.3383, "step": 7631 }, { "epoch": 0.26190803019903913, "grad_norm": 0.8920062411644402, "learning_rate": 8.655325825622987e-06, "loss": 0.3284, "step": 7632 }, { "epoch": 0.2619423472889499, "grad_norm": 0.8191237320680984, "learning_rate": 8.654946618065567e-06, "loss": 0.3205, "step": 7633 }, { "epoch": 0.2619766643788607, "grad_norm": 0.804525983882658, "learning_rate": 8.654567365355542e-06, "loss": 0.2924, "step": 7634 }, { "epoch": 0.26201098146877144, "grad_norm": 0.7793259002733597, "learning_rate": 8.654188067497599e-06, "loss": 0.3633, "step": 7635 }, { "epoch": 0.26204529855868225, "grad_norm": 0.7166276442121544, "learning_rate": 8.653808724496425e-06, "loss": 0.2918, "step": 7636 }, { "epoch": 0.262079615648593, "grad_norm": 0.9022475865773024, "learning_rate": 8.6534293363567e-06, "loss": 0.3638, "step": 7637 }, { "epoch": 0.26211393273850375, "grad_norm": 0.819592498157277, "learning_rate": 8.653049903083117e-06, "loss": 0.2978, "step": 7638 }, { "epoch": 0.26214824982841456, "grad_norm": 0.7549112981731314, "learning_rate": 8.652670424680364e-06, "loss": 0.3174, "step": 7639 }, { "epoch": 0.2621825669183253, "grad_norm": 0.822117703920733, "learning_rate": 8.652290901153122e-06, "loss": 0.3372, "step": 7640 }, { "epoch": 0.2622168840082361, "grad_norm": 0.7288229621016238, "learning_rate": 8.651911332506086e-06, "loss": 0.2756, "step": 7641 }, { "epoch": 0.26225120109814687, "grad_norm": 0.7149155152085019, "learning_rate": 8.651531718743942e-06, "loss": 0.305, "step": 7642 }, { "epoch": 0.2622855181880577, "grad_norm": 0.7560161393352337, "learning_rate": 8.651152059871384e-06, "loss": 0.336, "step": 7643 }, { "epoch": 0.2623198352779684, "grad_norm": 0.7654257998576126, "learning_rate": 8.650772355893095e-06, "loss": 0.268, "step": 7644 }, { "epoch": 0.2623541523678792, "grad_norm": 0.799482519213405, "learning_rate": 8.650392606813774e-06, "loss": 0.3179, "step": 7645 }, { "epoch": 0.26238846945779, "grad_norm": 0.725649184101964, "learning_rate": 8.650012812638107e-06, "loss": 0.3124, "step": 7646 }, { "epoch": 0.26242278654770074, "grad_norm": 0.7831635843137237, "learning_rate": 8.649632973370785e-06, "loss": 0.3454, "step": 7647 }, { "epoch": 0.26245710363761154, "grad_norm": 0.7318111500340956, "learning_rate": 8.649253089016505e-06, "loss": 0.318, "step": 7648 }, { "epoch": 0.2624914207275223, "grad_norm": 0.7948520650791203, "learning_rate": 8.648873159579958e-06, "loss": 0.3616, "step": 7649 }, { "epoch": 0.2625257378174331, "grad_norm": 0.8276102977593095, "learning_rate": 8.648493185065834e-06, "loss": 0.2939, "step": 7650 }, { "epoch": 0.26256005490734385, "grad_norm": 0.8419304912546641, "learning_rate": 8.648113165478835e-06, "loss": 0.3591, "step": 7651 }, { "epoch": 0.2625943719972546, "grad_norm": 0.7937139705734346, "learning_rate": 8.647733100823647e-06, "loss": 0.3265, "step": 7652 }, { "epoch": 0.2626286890871654, "grad_norm": 0.7645185080405358, "learning_rate": 8.647352991104971e-06, "loss": 0.3025, "step": 7653 }, { "epoch": 0.26266300617707616, "grad_norm": 0.8443522206746487, "learning_rate": 8.646972836327503e-06, "loss": 0.372, "step": 7654 }, { "epoch": 0.26269732326698697, "grad_norm": 0.7946148123327198, "learning_rate": 8.646592636495935e-06, "loss": 0.3063, "step": 7655 }, { "epoch": 0.2627316403568977, "grad_norm": 0.756950394718027, "learning_rate": 8.646212391614968e-06, "loss": 0.3115, "step": 7656 }, { "epoch": 0.26276595744680853, "grad_norm": 0.8361899373684146, "learning_rate": 8.645832101689297e-06, "loss": 0.3242, "step": 7657 }, { "epoch": 0.2628002745367193, "grad_norm": 0.8518299626171916, "learning_rate": 8.645451766723622e-06, "loss": 0.3669, "step": 7658 }, { "epoch": 0.2628345916266301, "grad_norm": 0.6286163322186512, "learning_rate": 8.64507138672264e-06, "loss": 0.2794, "step": 7659 }, { "epoch": 0.26286890871654084, "grad_norm": 0.7500667953485229, "learning_rate": 8.64469096169105e-06, "loss": 0.3074, "step": 7660 }, { "epoch": 0.2629032258064516, "grad_norm": 0.8132676545551234, "learning_rate": 8.644310491633554e-06, "loss": 0.362, "step": 7661 }, { "epoch": 0.2629375428963624, "grad_norm": 0.761265614113223, "learning_rate": 8.64392997655485e-06, "loss": 0.3332, "step": 7662 }, { "epoch": 0.26297185998627315, "grad_norm": 0.769366478018233, "learning_rate": 8.64354941645964e-06, "loss": 0.3062, "step": 7663 }, { "epoch": 0.26300617707618396, "grad_norm": 0.9688658812520649, "learning_rate": 8.643168811352624e-06, "loss": 0.3355, "step": 7664 }, { "epoch": 0.2630404941660947, "grad_norm": 0.783564929419458, "learning_rate": 8.642788161238505e-06, "loss": 0.3053, "step": 7665 }, { "epoch": 0.2630748112560055, "grad_norm": 0.760727677669133, "learning_rate": 8.642407466121987e-06, "loss": 0.2984, "step": 7666 }, { "epoch": 0.26310912834591627, "grad_norm": 0.9269050126899951, "learning_rate": 8.64202672600777e-06, "loss": 0.332, "step": 7667 }, { "epoch": 0.263143445435827, "grad_norm": 0.95623477443463, "learning_rate": 8.641645940900558e-06, "loss": 0.2882, "step": 7668 }, { "epoch": 0.2631777625257378, "grad_norm": 0.7521780867665335, "learning_rate": 8.641265110805058e-06, "loss": 0.3113, "step": 7669 }, { "epoch": 0.2632120796156486, "grad_norm": 0.854506817795884, "learning_rate": 8.640884235725973e-06, "loss": 0.2939, "step": 7670 }, { "epoch": 0.2632463967055594, "grad_norm": 0.7524068652749276, "learning_rate": 8.640503315668005e-06, "loss": 0.3023, "step": 7671 }, { "epoch": 0.26328071379547013, "grad_norm": 0.8549032517478569, "learning_rate": 8.640122350635867e-06, "loss": 0.3083, "step": 7672 }, { "epoch": 0.26331503088538094, "grad_norm": 0.8463885127770918, "learning_rate": 8.639741340634258e-06, "loss": 0.3508, "step": 7673 }, { "epoch": 0.2633493479752917, "grad_norm": 0.8178675407258655, "learning_rate": 8.63936028566789e-06, "loss": 0.3434, "step": 7674 }, { "epoch": 0.26338366506520244, "grad_norm": 0.7509004943797595, "learning_rate": 8.638979185741468e-06, "loss": 0.2915, "step": 7675 }, { "epoch": 0.26341798215511325, "grad_norm": 0.8569760752659658, "learning_rate": 8.638598040859702e-06, "loss": 0.3368, "step": 7676 }, { "epoch": 0.263452299245024, "grad_norm": 0.7773360665665112, "learning_rate": 8.638216851027299e-06, "loss": 0.2773, "step": 7677 }, { "epoch": 0.2634866163349348, "grad_norm": 0.7850184053163053, "learning_rate": 8.637835616248968e-06, "loss": 0.2717, "step": 7678 }, { "epoch": 0.26352093342484556, "grad_norm": 0.7253549040293081, "learning_rate": 8.63745433652942e-06, "loss": 0.2716, "step": 7679 }, { "epoch": 0.26355525051475637, "grad_norm": 0.8721439111632169, "learning_rate": 8.637073011873364e-06, "loss": 0.3559, "step": 7680 }, { "epoch": 0.2635895676046671, "grad_norm": 0.7895034235557036, "learning_rate": 8.636691642285511e-06, "loss": 0.3412, "step": 7681 }, { "epoch": 0.2636238846945779, "grad_norm": 0.7893712924157228, "learning_rate": 8.636310227770573e-06, "loss": 0.327, "step": 7682 }, { "epoch": 0.2636582017844887, "grad_norm": 0.8062115157591794, "learning_rate": 8.63592876833326e-06, "loss": 0.3107, "step": 7683 }, { "epoch": 0.26369251887439943, "grad_norm": 0.8611538993648493, "learning_rate": 8.63554726397829e-06, "loss": 0.302, "step": 7684 }, { "epoch": 0.26372683596431024, "grad_norm": 0.8817727745124224, "learning_rate": 8.635165714710369e-06, "loss": 0.3732, "step": 7685 }, { "epoch": 0.263761153054221, "grad_norm": 0.7681054468092949, "learning_rate": 8.634784120534217e-06, "loss": 0.2946, "step": 7686 }, { "epoch": 0.2637954701441318, "grad_norm": 0.8046790793686596, "learning_rate": 8.634402481454542e-06, "loss": 0.3491, "step": 7687 }, { "epoch": 0.26382978723404255, "grad_norm": 0.8034063349330688, "learning_rate": 8.634020797476062e-06, "loss": 0.3108, "step": 7688 }, { "epoch": 0.26386410432395335, "grad_norm": 0.7237223158765524, "learning_rate": 8.633639068603492e-06, "loss": 0.3504, "step": 7689 }, { "epoch": 0.2638984214138641, "grad_norm": 0.7589141219148102, "learning_rate": 8.633257294841549e-06, "loss": 0.2988, "step": 7690 }, { "epoch": 0.26393273850377486, "grad_norm": 0.8086811158349869, "learning_rate": 8.632875476194947e-06, "loss": 0.3391, "step": 7691 }, { "epoch": 0.26396705559368566, "grad_norm": 0.7777418144190964, "learning_rate": 8.632493612668404e-06, "loss": 0.3469, "step": 7692 }, { "epoch": 0.2640013726835964, "grad_norm": 0.7002851548325293, "learning_rate": 8.632111704266638e-06, "loss": 0.3661, "step": 7693 }, { "epoch": 0.2640356897735072, "grad_norm": 0.7514185859262003, "learning_rate": 8.631729750994366e-06, "loss": 0.3138, "step": 7694 }, { "epoch": 0.26407000686341797, "grad_norm": 0.811356517467732, "learning_rate": 8.631347752856307e-06, "loss": 0.3396, "step": 7695 }, { "epoch": 0.2641043239533288, "grad_norm": 0.7923548668385241, "learning_rate": 8.630965709857181e-06, "loss": 0.3281, "step": 7696 }, { "epoch": 0.26413864104323953, "grad_norm": 0.8533670065850151, "learning_rate": 8.630583622001708e-06, "loss": 0.3017, "step": 7697 }, { "epoch": 0.2641729581331503, "grad_norm": 0.8117757227769712, "learning_rate": 8.630201489294605e-06, "loss": 0.3186, "step": 7698 }, { "epoch": 0.2642072752230611, "grad_norm": 1.0475495526250005, "learning_rate": 8.629819311740597e-06, "loss": 0.2984, "step": 7699 }, { "epoch": 0.26424159231297184, "grad_norm": 0.7956242464911247, "learning_rate": 8.629437089344402e-06, "loss": 0.3563, "step": 7700 }, { "epoch": 0.26427590940288265, "grad_norm": 0.8284953579038892, "learning_rate": 8.629054822110742e-06, "loss": 0.3018, "step": 7701 }, { "epoch": 0.2643102264927934, "grad_norm": 0.7696426075136283, "learning_rate": 8.628672510044343e-06, "loss": 0.3022, "step": 7702 }, { "epoch": 0.2643445435827042, "grad_norm": 0.7689881684785189, "learning_rate": 8.628290153149924e-06, "loss": 0.279, "step": 7703 }, { "epoch": 0.26437886067261496, "grad_norm": 0.7640364250006718, "learning_rate": 8.627907751432211e-06, "loss": 0.2804, "step": 7704 }, { "epoch": 0.26441317776252576, "grad_norm": 1.0101578140495013, "learning_rate": 8.627525304895927e-06, "loss": 0.3046, "step": 7705 }, { "epoch": 0.2644474948524365, "grad_norm": 0.7780639348248881, "learning_rate": 8.627142813545799e-06, "loss": 0.3502, "step": 7706 }, { "epoch": 0.26448181194234727, "grad_norm": 0.6961311930848638, "learning_rate": 8.62676027738655e-06, "loss": 0.3146, "step": 7707 }, { "epoch": 0.2645161290322581, "grad_norm": 0.7622524826390716, "learning_rate": 8.626377696422905e-06, "loss": 0.3968, "step": 7708 }, { "epoch": 0.2645504461221688, "grad_norm": 0.7921178250520652, "learning_rate": 8.625995070659592e-06, "loss": 0.334, "step": 7709 }, { "epoch": 0.26458476321207963, "grad_norm": 0.8149170688437528, "learning_rate": 8.625612400101337e-06, "loss": 0.367, "step": 7710 }, { "epoch": 0.2646190803019904, "grad_norm": 0.7747797626988336, "learning_rate": 8.625229684752869e-06, "loss": 0.3235, "step": 7711 }, { "epoch": 0.2646533973919012, "grad_norm": 0.7975283834099466, "learning_rate": 8.624846924618912e-06, "loss": 0.3343, "step": 7712 }, { "epoch": 0.26468771448181194, "grad_norm": 0.7137489379650165, "learning_rate": 8.6244641197042e-06, "loss": 0.2767, "step": 7713 }, { "epoch": 0.2647220315717227, "grad_norm": 0.7903316533218463, "learning_rate": 8.624081270013459e-06, "loss": 0.3248, "step": 7714 }, { "epoch": 0.2647563486616335, "grad_norm": 0.7728565975542876, "learning_rate": 8.62369837555142e-06, "loss": 0.309, "step": 7715 }, { "epoch": 0.26479066575154425, "grad_norm": 0.7733449662003782, "learning_rate": 8.62331543632281e-06, "loss": 0.3612, "step": 7716 }, { "epoch": 0.26482498284145506, "grad_norm": 0.7725050184704785, "learning_rate": 8.622932452332364e-06, "loss": 0.2821, "step": 7717 }, { "epoch": 0.2648592999313658, "grad_norm": 0.9189470262404721, "learning_rate": 8.622549423584812e-06, "loss": 0.3028, "step": 7718 }, { "epoch": 0.2648936170212766, "grad_norm": 0.7683184834241709, "learning_rate": 8.622166350084884e-06, "loss": 0.3519, "step": 7719 }, { "epoch": 0.26492793411118737, "grad_norm": 0.8396113205256823, "learning_rate": 8.621783231837315e-06, "loss": 0.3017, "step": 7720 }, { "epoch": 0.2649622512010981, "grad_norm": 0.7447367423810762, "learning_rate": 8.621400068846835e-06, "loss": 0.2885, "step": 7721 }, { "epoch": 0.2649965682910089, "grad_norm": 0.747437091180753, "learning_rate": 8.621016861118182e-06, "loss": 0.3337, "step": 7722 }, { "epoch": 0.2650308853809197, "grad_norm": 0.7311332589257236, "learning_rate": 8.620633608656084e-06, "loss": 0.3315, "step": 7723 }, { "epoch": 0.2650652024708305, "grad_norm": 0.9292394112087913, "learning_rate": 8.620250311465281e-06, "loss": 0.3394, "step": 7724 }, { "epoch": 0.26509951956074124, "grad_norm": 0.7600213159616152, "learning_rate": 8.619866969550506e-06, "loss": 0.3082, "step": 7725 }, { "epoch": 0.26513383665065204, "grad_norm": 0.7394042488807938, "learning_rate": 8.619483582916495e-06, "loss": 0.3218, "step": 7726 }, { "epoch": 0.2651681537405628, "grad_norm": 0.9427342551513357, "learning_rate": 8.619100151567984e-06, "loss": 0.3004, "step": 7727 }, { "epoch": 0.2652024708304736, "grad_norm": 0.8361694648863021, "learning_rate": 8.61871667550971e-06, "loss": 0.3102, "step": 7728 }, { "epoch": 0.26523678792038435, "grad_norm": 0.8011819086857699, "learning_rate": 8.618333154746411e-06, "loss": 0.401, "step": 7729 }, { "epoch": 0.2652711050102951, "grad_norm": 0.7364208651749711, "learning_rate": 8.617949589282825e-06, "loss": 0.3229, "step": 7730 }, { "epoch": 0.2653054221002059, "grad_norm": 0.8461199424794005, "learning_rate": 8.617565979123688e-06, "loss": 0.353, "step": 7731 }, { "epoch": 0.26533973919011666, "grad_norm": 0.7470934197031661, "learning_rate": 8.617182324273743e-06, "loss": 0.3037, "step": 7732 }, { "epoch": 0.26537405628002747, "grad_norm": 0.7857124902616547, "learning_rate": 8.616798624737725e-06, "loss": 0.3533, "step": 7733 }, { "epoch": 0.2654083733699382, "grad_norm": 0.7557898191547054, "learning_rate": 8.616414880520379e-06, "loss": 0.2882, "step": 7734 }, { "epoch": 0.26544269045984903, "grad_norm": 0.809115042053061, "learning_rate": 8.616031091626444e-06, "loss": 0.28, "step": 7735 }, { "epoch": 0.2654770075497598, "grad_norm": 0.8521273454702577, "learning_rate": 8.61564725806066e-06, "loss": 0.3451, "step": 7736 }, { "epoch": 0.26551132463967053, "grad_norm": 1.0113215593680014, "learning_rate": 8.615263379827769e-06, "loss": 0.3296, "step": 7737 }, { "epoch": 0.26554564172958134, "grad_norm": 0.8531181963574386, "learning_rate": 8.614879456932513e-06, "loss": 0.3281, "step": 7738 }, { "epoch": 0.2655799588194921, "grad_norm": 0.871237563733973, "learning_rate": 8.614495489379638e-06, "loss": 0.3426, "step": 7739 }, { "epoch": 0.2656142759094029, "grad_norm": 0.7960862979468263, "learning_rate": 8.614111477173883e-06, "loss": 0.2951, "step": 7740 }, { "epoch": 0.26564859299931365, "grad_norm": 0.8200619239861947, "learning_rate": 8.613727420319997e-06, "loss": 0.3578, "step": 7741 }, { "epoch": 0.26568291008922446, "grad_norm": 0.7898330127146184, "learning_rate": 8.61334331882272e-06, "loss": 0.3149, "step": 7742 }, { "epoch": 0.2657172271791352, "grad_norm": 0.7480056236892506, "learning_rate": 8.6129591726868e-06, "loss": 0.2959, "step": 7743 }, { "epoch": 0.26575154426904596, "grad_norm": 0.7417669131450877, "learning_rate": 8.612574981916981e-06, "loss": 0.2974, "step": 7744 }, { "epoch": 0.26578586135895677, "grad_norm": 0.7940909270690281, "learning_rate": 8.61219074651801e-06, "loss": 0.2955, "step": 7745 }, { "epoch": 0.2658201784488675, "grad_norm": 0.7460822975375083, "learning_rate": 8.611806466494635e-06, "loss": 0.3068, "step": 7746 }, { "epoch": 0.2658544955387783, "grad_norm": 0.8132247487216163, "learning_rate": 8.6114221418516e-06, "loss": 0.2952, "step": 7747 }, { "epoch": 0.2658888126286891, "grad_norm": 0.7691177754508794, "learning_rate": 8.611037772593656e-06, "loss": 0.3096, "step": 7748 }, { "epoch": 0.2659231297185999, "grad_norm": 0.836427866526895, "learning_rate": 8.610653358725551e-06, "loss": 0.3841, "step": 7749 }, { "epoch": 0.26595744680851063, "grad_norm": 0.8568919409254594, "learning_rate": 8.610268900252033e-06, "loss": 0.3269, "step": 7750 }, { "epoch": 0.26599176389842144, "grad_norm": 0.8682589753704963, "learning_rate": 8.609884397177852e-06, "loss": 0.3588, "step": 7751 }, { "epoch": 0.2660260809883322, "grad_norm": 0.798343622615183, "learning_rate": 8.609499849507759e-06, "loss": 0.2928, "step": 7752 }, { "epoch": 0.26606039807824294, "grad_norm": 0.7830666741009356, "learning_rate": 8.609115257246501e-06, "loss": 0.3313, "step": 7753 }, { "epoch": 0.26609471516815375, "grad_norm": 0.7575862608293359, "learning_rate": 8.608730620398833e-06, "loss": 0.3348, "step": 7754 }, { "epoch": 0.2661290322580645, "grad_norm": 0.7493292207529592, "learning_rate": 8.608345938969507e-06, "loss": 0.3074, "step": 7755 }, { "epoch": 0.2661633493479753, "grad_norm": 0.7941432784884895, "learning_rate": 8.607961212963271e-06, "loss": 0.2818, "step": 7756 }, { "epoch": 0.26619766643788606, "grad_norm": 0.7895173413305039, "learning_rate": 8.607576442384881e-06, "loss": 0.2826, "step": 7757 }, { "epoch": 0.26623198352779687, "grad_norm": 0.8493199921342314, "learning_rate": 8.607191627239093e-06, "loss": 0.3333, "step": 7758 }, { "epoch": 0.2662663006177076, "grad_norm": 0.7793300493369736, "learning_rate": 8.606806767530657e-06, "loss": 0.3312, "step": 7759 }, { "epoch": 0.26630061770761837, "grad_norm": 0.8041900585492273, "learning_rate": 8.606421863264326e-06, "loss": 0.3249, "step": 7760 }, { "epoch": 0.2663349347975292, "grad_norm": 0.777984634765814, "learning_rate": 8.60603691444486e-06, "loss": 0.3065, "step": 7761 }, { "epoch": 0.26636925188743993, "grad_norm": 0.8054123471521006, "learning_rate": 8.605651921077012e-06, "loss": 0.3618, "step": 7762 }, { "epoch": 0.26640356897735074, "grad_norm": 0.7183620461570136, "learning_rate": 8.605266883165539e-06, "loss": 0.2905, "step": 7763 }, { "epoch": 0.2664378860672615, "grad_norm": 0.8012025935835132, "learning_rate": 8.604881800715194e-06, "loss": 0.3258, "step": 7764 }, { "epoch": 0.2664722031571723, "grad_norm": 0.8224808071066113, "learning_rate": 8.604496673730741e-06, "loss": 0.284, "step": 7765 }, { "epoch": 0.26650652024708305, "grad_norm": 0.7827894868498978, "learning_rate": 8.60411150221693e-06, "loss": 0.3471, "step": 7766 }, { "epoch": 0.2665408373369938, "grad_norm": 0.7515182528872165, "learning_rate": 8.603726286178526e-06, "loss": 0.3289, "step": 7767 }, { "epoch": 0.2665751544269046, "grad_norm": 0.7837694543822223, "learning_rate": 8.603341025620285e-06, "loss": 0.3077, "step": 7768 }, { "epoch": 0.26660947151681536, "grad_norm": 0.89556795289208, "learning_rate": 8.602955720546966e-06, "loss": 0.3881, "step": 7769 }, { "epoch": 0.26664378860672616, "grad_norm": 0.7851834111516118, "learning_rate": 8.602570370963332e-06, "loss": 0.2895, "step": 7770 }, { "epoch": 0.2666781056966369, "grad_norm": 0.7409278027549441, "learning_rate": 8.60218497687414e-06, "loss": 0.2645, "step": 7771 }, { "epoch": 0.2667124227865477, "grad_norm": 0.8189551098869037, "learning_rate": 8.601799538284151e-06, "loss": 0.3795, "step": 7772 }, { "epoch": 0.26674673987645847, "grad_norm": 0.8520600288193371, "learning_rate": 8.601414055198129e-06, "loss": 0.3703, "step": 7773 }, { "epoch": 0.2667810569663693, "grad_norm": 0.7710308571040708, "learning_rate": 8.601028527620834e-06, "loss": 0.347, "step": 7774 }, { "epoch": 0.26681537405628003, "grad_norm": 0.764998489118254, "learning_rate": 8.600642955557031e-06, "loss": 0.306, "step": 7775 }, { "epoch": 0.2668496911461908, "grad_norm": 0.7113445116698606, "learning_rate": 8.600257339011482e-06, "loss": 0.277, "step": 7776 }, { "epoch": 0.2668840082361016, "grad_norm": 0.753276899808841, "learning_rate": 8.599871677988952e-06, "loss": 0.2957, "step": 7777 }, { "epoch": 0.26691832532601234, "grad_norm": 0.856993834009339, "learning_rate": 8.599485972494204e-06, "loss": 0.2874, "step": 7778 }, { "epoch": 0.26695264241592315, "grad_norm": 0.7360018002427728, "learning_rate": 8.599100222532002e-06, "loss": 0.272, "step": 7779 }, { "epoch": 0.2669869595058339, "grad_norm": 0.9173954286832826, "learning_rate": 8.598714428107114e-06, "loss": 0.3462, "step": 7780 }, { "epoch": 0.2670212765957447, "grad_norm": 0.8039444621850774, "learning_rate": 8.598328589224306e-06, "loss": 0.327, "step": 7781 }, { "epoch": 0.26705559368565546, "grad_norm": 0.8159268900875855, "learning_rate": 8.597942705888343e-06, "loss": 0.2833, "step": 7782 }, { "epoch": 0.2670899107755662, "grad_norm": 0.7733640379512782, "learning_rate": 8.59755677810399e-06, "loss": 0.2715, "step": 7783 }, { "epoch": 0.267124227865477, "grad_norm": 0.8522546029257227, "learning_rate": 8.59717080587602e-06, "loss": 0.3487, "step": 7784 }, { "epoch": 0.26715854495538777, "grad_norm": 0.7401285288191518, "learning_rate": 8.596784789209198e-06, "loss": 0.3062, "step": 7785 }, { "epoch": 0.2671928620452986, "grad_norm": 0.8642215030926008, "learning_rate": 8.596398728108294e-06, "loss": 0.3316, "step": 7786 }, { "epoch": 0.2672271791352093, "grad_norm": 0.7547976686160711, "learning_rate": 8.596012622578076e-06, "loss": 0.3659, "step": 7787 }, { "epoch": 0.26726149622512013, "grad_norm": 0.7622535452332663, "learning_rate": 8.595626472623314e-06, "loss": 0.3516, "step": 7788 }, { "epoch": 0.2672958133150309, "grad_norm": 0.7529991655397941, "learning_rate": 8.595240278248781e-06, "loss": 0.3181, "step": 7789 }, { "epoch": 0.26733013040494163, "grad_norm": 0.6979891133206854, "learning_rate": 8.594854039459244e-06, "loss": 0.2771, "step": 7790 }, { "epoch": 0.26736444749485244, "grad_norm": 0.781231539096527, "learning_rate": 8.594467756259478e-06, "loss": 0.3131, "step": 7791 }, { "epoch": 0.2673987645847632, "grad_norm": 0.8378961230248391, "learning_rate": 8.594081428654253e-06, "loss": 0.3397, "step": 7792 }, { "epoch": 0.267433081674674, "grad_norm": 0.776644562779122, "learning_rate": 8.59369505664834e-06, "loss": 0.3045, "step": 7793 }, { "epoch": 0.26746739876458475, "grad_norm": 0.8299072351289855, "learning_rate": 8.593308640246517e-06, "loss": 0.3228, "step": 7794 }, { "epoch": 0.26750171585449556, "grad_norm": 0.8712736200594245, "learning_rate": 8.592922179453556e-06, "loss": 0.2781, "step": 7795 }, { "epoch": 0.2675360329444063, "grad_norm": 0.7457688996398094, "learning_rate": 8.59253567427423e-06, "loss": 0.3132, "step": 7796 }, { "epoch": 0.2675703500343171, "grad_norm": 0.750306691648281, "learning_rate": 8.592149124713312e-06, "loss": 0.3024, "step": 7797 }, { "epoch": 0.26760466712422787, "grad_norm": 0.8301612350029198, "learning_rate": 8.59176253077558e-06, "loss": 0.3467, "step": 7798 }, { "epoch": 0.2676389842141386, "grad_norm": 0.8580531840879269, "learning_rate": 8.591375892465811e-06, "loss": 0.3226, "step": 7799 }, { "epoch": 0.2676733013040494, "grad_norm": 0.8031497829161316, "learning_rate": 8.59098920978878e-06, "loss": 0.3356, "step": 7800 }, { "epoch": 0.2677076183939602, "grad_norm": 0.7798320363598215, "learning_rate": 8.590602482749263e-06, "loss": 0.3465, "step": 7801 }, { "epoch": 0.267741935483871, "grad_norm": 0.7612733520865868, "learning_rate": 8.590215711352039e-06, "loss": 0.2733, "step": 7802 }, { "epoch": 0.26777625257378174, "grad_norm": 0.7287867109270202, "learning_rate": 8.589828895601886e-06, "loss": 0.2869, "step": 7803 }, { "epoch": 0.26781056966369254, "grad_norm": 0.835804618892193, "learning_rate": 8.589442035503584e-06, "loss": 0.332, "step": 7804 }, { "epoch": 0.2678448867536033, "grad_norm": 0.6931908315093922, "learning_rate": 8.589055131061907e-06, "loss": 0.3688, "step": 7805 }, { "epoch": 0.26787920384351405, "grad_norm": 0.7056056078264982, "learning_rate": 8.58866818228164e-06, "loss": 0.2636, "step": 7806 }, { "epoch": 0.26791352093342485, "grad_norm": 0.7792108112148084, "learning_rate": 8.588281189167562e-06, "loss": 0.2757, "step": 7807 }, { "epoch": 0.2679478380233356, "grad_norm": 0.7727255620628157, "learning_rate": 8.587894151724451e-06, "loss": 0.3275, "step": 7808 }, { "epoch": 0.2679821551132464, "grad_norm": 0.8003061008873188, "learning_rate": 8.587507069957094e-06, "loss": 0.3463, "step": 7809 }, { "epoch": 0.26801647220315716, "grad_norm": 0.8122115269330104, "learning_rate": 8.587119943870268e-06, "loss": 0.3666, "step": 7810 }, { "epoch": 0.26805078929306797, "grad_norm": 0.7877394829104917, "learning_rate": 8.586732773468757e-06, "loss": 0.3679, "step": 7811 }, { "epoch": 0.2680851063829787, "grad_norm": 0.7111708040711161, "learning_rate": 8.586345558757346e-06, "loss": 0.2668, "step": 7812 }, { "epoch": 0.2681194234728895, "grad_norm": 0.7136140947884201, "learning_rate": 8.585958299740817e-06, "loss": 0.2983, "step": 7813 }, { "epoch": 0.2681537405628003, "grad_norm": 0.7378094480484277, "learning_rate": 8.585570996423953e-06, "loss": 0.2794, "step": 7814 }, { "epoch": 0.26818805765271103, "grad_norm": 0.7061326110053006, "learning_rate": 8.58518364881154e-06, "loss": 0.2667, "step": 7815 }, { "epoch": 0.26822237474262184, "grad_norm": 0.7326163419216891, "learning_rate": 8.584796256908361e-06, "loss": 0.3508, "step": 7816 }, { "epoch": 0.2682566918325326, "grad_norm": 0.7981188228718089, "learning_rate": 8.584408820719205e-06, "loss": 0.3255, "step": 7817 }, { "epoch": 0.2682910089224434, "grad_norm": 0.8169437682953304, "learning_rate": 8.584021340248858e-06, "loss": 0.3292, "step": 7818 }, { "epoch": 0.26832532601235415, "grad_norm": 0.7380819559434555, "learning_rate": 8.583633815502106e-06, "loss": 0.3081, "step": 7819 }, { "epoch": 0.26835964310226496, "grad_norm": 0.7640204177982418, "learning_rate": 8.583246246483735e-06, "loss": 0.3427, "step": 7820 }, { "epoch": 0.2683939601921757, "grad_norm": 0.8484626683545812, "learning_rate": 8.582858633198535e-06, "loss": 0.3296, "step": 7821 }, { "epoch": 0.26842827728208646, "grad_norm": 0.782961833338063, "learning_rate": 8.582470975651293e-06, "loss": 0.3523, "step": 7822 }, { "epoch": 0.26846259437199727, "grad_norm": 0.8624364809355957, "learning_rate": 8.582083273846801e-06, "loss": 0.326, "step": 7823 }, { "epoch": 0.268496911461908, "grad_norm": 0.8432320517122679, "learning_rate": 8.581695527789844e-06, "loss": 0.3166, "step": 7824 }, { "epoch": 0.2685312285518188, "grad_norm": 0.8426814129603317, "learning_rate": 8.581307737485217e-06, "loss": 0.3079, "step": 7825 }, { "epoch": 0.2685655456417296, "grad_norm": 0.8396425440365849, "learning_rate": 8.580919902937706e-06, "loss": 0.3137, "step": 7826 }, { "epoch": 0.2685998627316404, "grad_norm": 0.7131786962959517, "learning_rate": 8.580532024152105e-06, "loss": 0.2975, "step": 7827 }, { "epoch": 0.26863417982155113, "grad_norm": 0.7861844393028791, "learning_rate": 8.580144101133206e-06, "loss": 0.2979, "step": 7828 }, { "epoch": 0.2686684969114619, "grad_norm": 0.7841477957012792, "learning_rate": 8.579756133885802e-06, "loss": 0.2698, "step": 7829 }, { "epoch": 0.2687028140013727, "grad_norm": 0.7980375428929966, "learning_rate": 8.579368122414683e-06, "loss": 0.3466, "step": 7830 }, { "epoch": 0.26873713109128344, "grad_norm": 0.6935436371902817, "learning_rate": 8.578980066724643e-06, "loss": 0.2716, "step": 7831 }, { "epoch": 0.26877144818119425, "grad_norm": 0.7994976818448843, "learning_rate": 8.578591966820477e-06, "loss": 0.3278, "step": 7832 }, { "epoch": 0.268805765271105, "grad_norm": 0.8341848755434962, "learning_rate": 8.578203822706981e-06, "loss": 0.2986, "step": 7833 }, { "epoch": 0.2688400823610158, "grad_norm": 0.7386735227506408, "learning_rate": 8.577815634388948e-06, "loss": 0.3522, "step": 7834 }, { "epoch": 0.26887439945092656, "grad_norm": 0.7102304061478247, "learning_rate": 8.577427401871174e-06, "loss": 0.2925, "step": 7835 }, { "epoch": 0.2689087165408373, "grad_norm": 0.7886468480991997, "learning_rate": 8.577039125158455e-06, "loss": 0.3401, "step": 7836 }, { "epoch": 0.2689430336307481, "grad_norm": 0.9597244840308802, "learning_rate": 8.57665080425559e-06, "loss": 0.3603, "step": 7837 }, { "epoch": 0.26897735072065887, "grad_norm": 0.7862068500839184, "learning_rate": 8.576262439167371e-06, "loss": 0.3501, "step": 7838 }, { "epoch": 0.2690116678105697, "grad_norm": 0.8527582544692833, "learning_rate": 8.575874029898602e-06, "loss": 0.3518, "step": 7839 }, { "epoch": 0.26904598490048043, "grad_norm": 0.7972594605259787, "learning_rate": 8.575485576454078e-06, "loss": 0.2662, "step": 7840 }, { "epoch": 0.26908030199039124, "grad_norm": 0.7577183039530537, "learning_rate": 8.575097078838597e-06, "loss": 0.3403, "step": 7841 }, { "epoch": 0.269114619080302, "grad_norm": 0.7649554646145971, "learning_rate": 8.57470853705696e-06, "loss": 0.3264, "step": 7842 }, { "epoch": 0.2691489361702128, "grad_norm": 0.6937737007469681, "learning_rate": 8.574319951113968e-06, "loss": 0.2749, "step": 7843 }, { "epoch": 0.26918325326012355, "grad_norm": 0.7823771875879371, "learning_rate": 8.573931321014419e-06, "loss": 0.339, "step": 7844 }, { "epoch": 0.2692175703500343, "grad_norm": 0.8480284517073345, "learning_rate": 8.573542646763115e-06, "loss": 0.3023, "step": 7845 }, { "epoch": 0.2692518874399451, "grad_norm": 0.800133881059606, "learning_rate": 8.573153928364859e-06, "loss": 0.2884, "step": 7846 }, { "epoch": 0.26928620452985585, "grad_norm": 0.7349430571931951, "learning_rate": 8.572765165824453e-06, "loss": 0.2944, "step": 7847 }, { "epoch": 0.26932052161976666, "grad_norm": 0.717471905092767, "learning_rate": 8.572376359146697e-06, "loss": 0.3423, "step": 7848 }, { "epoch": 0.2693548387096774, "grad_norm": 0.7521544970735825, "learning_rate": 8.571987508336396e-06, "loss": 0.2827, "step": 7849 }, { "epoch": 0.2693891557995882, "grad_norm": 0.8502310154859058, "learning_rate": 8.571598613398356e-06, "loss": 0.3734, "step": 7850 }, { "epoch": 0.26942347288949897, "grad_norm": 0.7663754990533376, "learning_rate": 8.571209674337378e-06, "loss": 0.3677, "step": 7851 }, { "epoch": 0.2694577899794097, "grad_norm": 0.7404175745090185, "learning_rate": 8.570820691158266e-06, "loss": 0.3397, "step": 7852 }, { "epoch": 0.26949210706932053, "grad_norm": 0.6943680139864921, "learning_rate": 8.57043166386583e-06, "loss": 0.2926, "step": 7853 }, { "epoch": 0.2695264241592313, "grad_norm": 0.7901086314287457, "learning_rate": 8.570042592464872e-06, "loss": 0.3489, "step": 7854 }, { "epoch": 0.2695607412491421, "grad_norm": 0.7763654806775807, "learning_rate": 8.569653476960202e-06, "loss": 0.3515, "step": 7855 }, { "epoch": 0.26959505833905284, "grad_norm": 0.7863110763659917, "learning_rate": 8.569264317356624e-06, "loss": 0.3082, "step": 7856 }, { "epoch": 0.26962937542896365, "grad_norm": 0.7277503580891128, "learning_rate": 8.568875113658945e-06, "loss": 0.2908, "step": 7857 }, { "epoch": 0.2696636925188744, "grad_norm": 0.70150651845635, "learning_rate": 8.568485865871978e-06, "loss": 0.3202, "step": 7858 }, { "epoch": 0.26969800960878515, "grad_norm": 0.7147854679871647, "learning_rate": 8.568096574000528e-06, "loss": 0.3456, "step": 7859 }, { "epoch": 0.26973232669869596, "grad_norm": 0.7696392755047223, "learning_rate": 8.567707238049403e-06, "loss": 0.3351, "step": 7860 }, { "epoch": 0.2697666437886067, "grad_norm": 0.7537983412767771, "learning_rate": 8.567317858023417e-06, "loss": 0.3146, "step": 7861 }, { "epoch": 0.2698009608785175, "grad_norm": 0.7780810785363652, "learning_rate": 8.566928433927375e-06, "loss": 0.3077, "step": 7862 }, { "epoch": 0.26983527796842827, "grad_norm": 0.8335352094920697, "learning_rate": 8.566538965766093e-06, "loss": 0.3337, "step": 7863 }, { "epoch": 0.2698695950583391, "grad_norm": 0.7783088407279198, "learning_rate": 8.56614945354438e-06, "loss": 0.3895, "step": 7864 }, { "epoch": 0.2699039121482498, "grad_norm": 0.8742265068523535, "learning_rate": 8.565759897267047e-06, "loss": 0.3357, "step": 7865 }, { "epoch": 0.2699382292381606, "grad_norm": 0.7690852307996741, "learning_rate": 8.565370296938908e-06, "loss": 0.311, "step": 7866 }, { "epoch": 0.2699725463280714, "grad_norm": 0.784789703381468, "learning_rate": 8.564980652564778e-06, "loss": 0.2876, "step": 7867 }, { "epoch": 0.27000686341798213, "grad_norm": 0.7217676145174129, "learning_rate": 8.564590964149466e-06, "loss": 0.3011, "step": 7868 }, { "epoch": 0.27004118050789294, "grad_norm": 0.8103840238293354, "learning_rate": 8.56420123169779e-06, "loss": 0.2755, "step": 7869 }, { "epoch": 0.2700754975978037, "grad_norm": 0.7431958975770618, "learning_rate": 8.563811455214562e-06, "loss": 0.2695, "step": 7870 }, { "epoch": 0.2701098146877145, "grad_norm": 0.8152624534623545, "learning_rate": 8.563421634704599e-06, "loss": 0.3131, "step": 7871 }, { "epoch": 0.27014413177762525, "grad_norm": 0.7450491440339143, "learning_rate": 8.563031770172716e-06, "loss": 0.2818, "step": 7872 }, { "epoch": 0.27017844886753606, "grad_norm": 0.8301519980884319, "learning_rate": 8.56264186162373e-06, "loss": 0.2902, "step": 7873 }, { "epoch": 0.2702127659574468, "grad_norm": 0.7166904876299338, "learning_rate": 8.562251909062458e-06, "loss": 0.2729, "step": 7874 }, { "epoch": 0.27024708304735756, "grad_norm": 0.7944250923751464, "learning_rate": 8.561861912493717e-06, "loss": 0.3033, "step": 7875 }, { "epoch": 0.27028140013726837, "grad_norm": 0.8371781524705874, "learning_rate": 8.561471871922323e-06, "loss": 0.3464, "step": 7876 }, { "epoch": 0.2703157172271791, "grad_norm": 0.8162386855758753, "learning_rate": 8.561081787353098e-06, "loss": 0.3639, "step": 7877 }, { "epoch": 0.2703500343170899, "grad_norm": 0.7817177672746068, "learning_rate": 8.560691658790859e-06, "loss": 0.3468, "step": 7878 }, { "epoch": 0.2703843514070007, "grad_norm": 0.699506205462741, "learning_rate": 8.560301486240425e-06, "loss": 0.2794, "step": 7879 }, { "epoch": 0.2704186684969115, "grad_norm": 0.8306044496936797, "learning_rate": 8.559911269706618e-06, "loss": 0.3402, "step": 7880 }, { "epoch": 0.27045298558682224, "grad_norm": 0.7888355278213233, "learning_rate": 8.559521009194255e-06, "loss": 0.2829, "step": 7881 }, { "epoch": 0.270487302676733, "grad_norm": 0.8216256719823638, "learning_rate": 8.559130704708164e-06, "loss": 0.2947, "step": 7882 }, { "epoch": 0.2705216197666438, "grad_norm": 0.8108391227444102, "learning_rate": 8.558740356253161e-06, "loss": 0.4056, "step": 7883 }, { "epoch": 0.27055593685655455, "grad_norm": 0.8637480745458361, "learning_rate": 8.558349963834069e-06, "loss": 0.3469, "step": 7884 }, { "epoch": 0.27059025394646535, "grad_norm": 0.7510763468526791, "learning_rate": 8.557959527455713e-06, "loss": 0.3308, "step": 7885 }, { "epoch": 0.2706245710363761, "grad_norm": 0.794433742928572, "learning_rate": 8.557569047122916e-06, "loss": 0.3015, "step": 7886 }, { "epoch": 0.2706588881262869, "grad_norm": 0.869154449120198, "learning_rate": 8.557178522840499e-06, "loss": 0.3153, "step": 7887 }, { "epoch": 0.27069320521619766, "grad_norm": 0.7922047994919929, "learning_rate": 8.556787954613291e-06, "loss": 0.3004, "step": 7888 }, { "epoch": 0.2707275223061084, "grad_norm": 0.7724544591977978, "learning_rate": 8.556397342446112e-06, "loss": 0.2923, "step": 7889 }, { "epoch": 0.2707618393960192, "grad_norm": 0.7349075490364624, "learning_rate": 8.556006686343791e-06, "loss": 0.3516, "step": 7890 }, { "epoch": 0.27079615648593, "grad_norm": 0.7296864839815785, "learning_rate": 8.555615986311155e-06, "loss": 0.2941, "step": 7891 }, { "epoch": 0.2708304735758408, "grad_norm": 0.8150334840827744, "learning_rate": 8.555225242353027e-06, "loss": 0.3125, "step": 7892 }, { "epoch": 0.27086479066575153, "grad_norm": 0.7359456797847498, "learning_rate": 8.554834454474236e-06, "loss": 0.2684, "step": 7893 }, { "epoch": 0.27089910775566234, "grad_norm": 0.7700130057429287, "learning_rate": 8.554443622679612e-06, "loss": 0.3477, "step": 7894 }, { "epoch": 0.2709334248455731, "grad_norm": 0.9262143349497777, "learning_rate": 8.55405274697398e-06, "loss": 0.3314, "step": 7895 }, { "epoch": 0.2709677419354839, "grad_norm": 0.8313402436998232, "learning_rate": 8.55366182736217e-06, "loss": 0.2564, "step": 7896 }, { "epoch": 0.27100205902539465, "grad_norm": 0.8506926315991447, "learning_rate": 8.55327086384901e-06, "loss": 0.3109, "step": 7897 }, { "epoch": 0.2710363761153054, "grad_norm": 0.9218692993330471, "learning_rate": 8.552879856439333e-06, "loss": 0.3317, "step": 7898 }, { "epoch": 0.2710706932052162, "grad_norm": 0.676559011969497, "learning_rate": 8.552488805137968e-06, "loss": 0.3083, "step": 7899 }, { "epoch": 0.27110501029512696, "grad_norm": 0.6564697544796199, "learning_rate": 8.552097709949744e-06, "loss": 0.2834, "step": 7900 }, { "epoch": 0.27113932738503776, "grad_norm": 1.0442774815457618, "learning_rate": 8.551706570879494e-06, "loss": 0.266, "step": 7901 }, { "epoch": 0.2711736444749485, "grad_norm": 0.6477137321406192, "learning_rate": 8.55131538793205e-06, "loss": 0.2887, "step": 7902 }, { "epoch": 0.2712079615648593, "grad_norm": 0.7110777854469301, "learning_rate": 8.550924161112247e-06, "loss": 0.3648, "step": 7903 }, { "epoch": 0.2712422786547701, "grad_norm": 0.7715081045615145, "learning_rate": 8.550532890424913e-06, "loss": 0.3408, "step": 7904 }, { "epoch": 0.2712765957446808, "grad_norm": 0.7194409434129132, "learning_rate": 8.550141575874889e-06, "loss": 0.3334, "step": 7905 }, { "epoch": 0.27131091283459163, "grad_norm": 0.7572184251525529, "learning_rate": 8.549750217467e-06, "loss": 0.3641, "step": 7906 }, { "epoch": 0.2713452299245024, "grad_norm": 0.7758898810567025, "learning_rate": 8.549358815206087e-06, "loss": 0.3514, "step": 7907 }, { "epoch": 0.2713795470144132, "grad_norm": 0.8080903022771522, "learning_rate": 8.548967369096987e-06, "loss": 0.3013, "step": 7908 }, { "epoch": 0.27141386410432394, "grad_norm": 0.7958968607768665, "learning_rate": 8.54857587914453e-06, "loss": 0.3735, "step": 7909 }, { "epoch": 0.27144818119423475, "grad_norm": 0.7845917035095257, "learning_rate": 8.548184345353556e-06, "loss": 0.3405, "step": 7910 }, { "epoch": 0.2714824982841455, "grad_norm": 0.770700126104807, "learning_rate": 8.547792767728903e-06, "loss": 0.2573, "step": 7911 }, { "epoch": 0.27151681537405625, "grad_norm": 0.7768357014314541, "learning_rate": 8.547401146275404e-06, "loss": 0.3417, "step": 7912 }, { "epoch": 0.27155113246396706, "grad_norm": 0.84439130670356, "learning_rate": 8.5470094809979e-06, "loss": 0.3609, "step": 7913 }, { "epoch": 0.2715854495538778, "grad_norm": 0.806515712861437, "learning_rate": 8.54661777190123e-06, "loss": 0.3575, "step": 7914 }, { "epoch": 0.2716197666437886, "grad_norm": 0.881144755815514, "learning_rate": 8.546226018990232e-06, "loss": 0.3759, "step": 7915 }, { "epoch": 0.27165408373369937, "grad_norm": 0.8633246731362876, "learning_rate": 8.545834222269746e-06, "loss": 0.3968, "step": 7916 }, { "epoch": 0.2716884008236102, "grad_norm": 0.7116530192804262, "learning_rate": 8.545442381744613e-06, "loss": 0.2707, "step": 7917 }, { "epoch": 0.27172271791352093, "grad_norm": 0.7744877088163147, "learning_rate": 8.545050497419672e-06, "loss": 0.3479, "step": 7918 }, { "epoch": 0.27175703500343174, "grad_norm": 0.9124404144131079, "learning_rate": 8.544658569299766e-06, "loss": 0.3458, "step": 7919 }, { "epoch": 0.2717913520933425, "grad_norm": 0.7619414526548016, "learning_rate": 8.544266597389735e-06, "loss": 0.3642, "step": 7920 }, { "epoch": 0.27182566918325324, "grad_norm": 0.852055309896225, "learning_rate": 8.543874581694424e-06, "loss": 0.291, "step": 7921 }, { "epoch": 0.27185998627316404, "grad_norm": 0.7925162292538536, "learning_rate": 8.543482522218673e-06, "loss": 0.3532, "step": 7922 }, { "epoch": 0.2718943033630748, "grad_norm": 0.6557335433801618, "learning_rate": 8.543090418967326e-06, "loss": 0.2666, "step": 7923 }, { "epoch": 0.2719286204529856, "grad_norm": 0.7164500752657437, "learning_rate": 8.542698271945228e-06, "loss": 0.36, "step": 7924 }, { "epoch": 0.27196293754289635, "grad_norm": 0.816302930641149, "learning_rate": 8.542306081157226e-06, "loss": 0.3875, "step": 7925 }, { "epoch": 0.27199725463280716, "grad_norm": 0.7527294542836607, "learning_rate": 8.541913846608161e-06, "loss": 0.333, "step": 7926 }, { "epoch": 0.2720315717227179, "grad_norm": 0.7614477701185038, "learning_rate": 8.54152156830288e-06, "loss": 0.2978, "step": 7927 }, { "epoch": 0.27206588881262866, "grad_norm": 0.8190499266126976, "learning_rate": 8.541129246246226e-06, "loss": 0.3026, "step": 7928 }, { "epoch": 0.27210020590253947, "grad_norm": 0.794604125296512, "learning_rate": 8.540736880443052e-06, "loss": 0.3445, "step": 7929 }, { "epoch": 0.2721345229924502, "grad_norm": 0.7494622033066549, "learning_rate": 8.540344470898201e-06, "loss": 0.3314, "step": 7930 }, { "epoch": 0.27216884008236103, "grad_norm": 0.7198634970466544, "learning_rate": 8.539952017616522e-06, "loss": 0.279, "step": 7931 }, { "epoch": 0.2722031571722718, "grad_norm": 0.9123665911256442, "learning_rate": 8.539559520602863e-06, "loss": 0.3092, "step": 7932 }, { "epoch": 0.2722374742621826, "grad_norm": 0.8341012360940868, "learning_rate": 8.539166979862072e-06, "loss": 0.3431, "step": 7933 }, { "epoch": 0.27227179135209334, "grad_norm": 0.6980506127863515, "learning_rate": 8.538774395398999e-06, "loss": 0.3219, "step": 7934 }, { "epoch": 0.2723061084420041, "grad_norm": 0.7821540941868893, "learning_rate": 8.538381767218495e-06, "loss": 0.352, "step": 7935 }, { "epoch": 0.2723404255319149, "grad_norm": 0.7973591890967763, "learning_rate": 8.53798909532541e-06, "loss": 0.294, "step": 7936 }, { "epoch": 0.27237474262182565, "grad_norm": 0.837553252409228, "learning_rate": 8.537596379724594e-06, "loss": 0.3071, "step": 7937 }, { "epoch": 0.27240905971173646, "grad_norm": 0.8527098559780402, "learning_rate": 8.5372036204209e-06, "loss": 0.3053, "step": 7938 }, { "epoch": 0.2724433768016472, "grad_norm": 0.767436071073462, "learning_rate": 8.536810817419178e-06, "loss": 0.321, "step": 7939 }, { "epoch": 0.272477693891558, "grad_norm": 0.7254682278547806, "learning_rate": 8.536417970724283e-06, "loss": 0.2739, "step": 7940 }, { "epoch": 0.27251201098146877, "grad_norm": 0.7404254001199704, "learning_rate": 8.536025080341066e-06, "loss": 0.3093, "step": 7941 }, { "epoch": 0.2725463280713796, "grad_norm": 0.8420553126532592, "learning_rate": 8.535632146274381e-06, "loss": 0.3358, "step": 7942 }, { "epoch": 0.2725806451612903, "grad_norm": 0.8739931199938599, "learning_rate": 8.535239168529086e-06, "loss": 0.2869, "step": 7943 }, { "epoch": 0.2726149622512011, "grad_norm": 0.7013692031726223, "learning_rate": 8.53484614711003e-06, "loss": 0.3177, "step": 7944 }, { "epoch": 0.2726492793411119, "grad_norm": 0.9220795765125254, "learning_rate": 8.534453082022072e-06, "loss": 0.3225, "step": 7945 }, { "epoch": 0.27268359643102263, "grad_norm": 0.7411979641754686, "learning_rate": 8.534059973270069e-06, "loss": 0.287, "step": 7946 }, { "epoch": 0.27271791352093344, "grad_norm": 0.7010272945544743, "learning_rate": 8.533666820858873e-06, "loss": 0.3024, "step": 7947 }, { "epoch": 0.2727522306108442, "grad_norm": 0.8032322292484475, "learning_rate": 8.533273624793343e-06, "loss": 0.3378, "step": 7948 }, { "epoch": 0.272786547700755, "grad_norm": 0.7846952520317088, "learning_rate": 8.532880385078337e-06, "loss": 0.3454, "step": 7949 }, { "epoch": 0.27282086479066575, "grad_norm": 0.8572871750607487, "learning_rate": 8.532487101718714e-06, "loss": 0.3238, "step": 7950 }, { "epoch": 0.2728551818805765, "grad_norm": 0.8656624066976732, "learning_rate": 8.53209377471933e-06, "loss": 0.329, "step": 7951 }, { "epoch": 0.2728894989704873, "grad_norm": 1.0438468048355778, "learning_rate": 8.531700404085047e-06, "loss": 0.2942, "step": 7952 }, { "epoch": 0.27292381606039806, "grad_norm": 0.9930564930647858, "learning_rate": 8.53130698982072e-06, "loss": 0.2806, "step": 7953 }, { "epoch": 0.27295813315030887, "grad_norm": 0.8094665165215578, "learning_rate": 8.530913531931214e-06, "loss": 0.3309, "step": 7954 }, { "epoch": 0.2729924502402196, "grad_norm": 0.6964672929498187, "learning_rate": 8.530520030421389e-06, "loss": 0.2971, "step": 7955 }, { "epoch": 0.2730267673301304, "grad_norm": 0.7453751405595617, "learning_rate": 8.530126485296103e-06, "loss": 0.2973, "step": 7956 }, { "epoch": 0.2730610844200412, "grad_norm": 0.9589384662213292, "learning_rate": 8.529732896560224e-06, "loss": 0.2947, "step": 7957 }, { "epoch": 0.27309540150995193, "grad_norm": 0.7518957304960763, "learning_rate": 8.529339264218606e-06, "loss": 0.2836, "step": 7958 }, { "epoch": 0.27312971859986274, "grad_norm": 0.8126949569663997, "learning_rate": 8.528945588276119e-06, "loss": 0.3155, "step": 7959 }, { "epoch": 0.2731640356897735, "grad_norm": 0.730213836137583, "learning_rate": 8.528551868737622e-06, "loss": 0.291, "step": 7960 }, { "epoch": 0.2731983527796843, "grad_norm": 0.8084671725750143, "learning_rate": 8.528158105607982e-06, "loss": 0.301, "step": 7961 }, { "epoch": 0.27323266986959505, "grad_norm": 0.8658856561780982, "learning_rate": 8.52776429889206e-06, "loss": 0.3622, "step": 7962 }, { "epoch": 0.27326698695950585, "grad_norm": 0.7932126549932773, "learning_rate": 8.527370448594726e-06, "loss": 0.415, "step": 7963 }, { "epoch": 0.2733013040494166, "grad_norm": 0.7552945885461456, "learning_rate": 8.52697655472084e-06, "loss": 0.3191, "step": 7964 }, { "epoch": 0.2733356211393274, "grad_norm": 0.8047647645257863, "learning_rate": 8.52658261727527e-06, "loss": 0.3541, "step": 7965 }, { "epoch": 0.27336993822923816, "grad_norm": 0.8857134078775261, "learning_rate": 8.526188636262886e-06, "loss": 0.3934, "step": 7966 }, { "epoch": 0.2734042553191489, "grad_norm": 0.7486814704307232, "learning_rate": 8.525794611688552e-06, "loss": 0.349, "step": 7967 }, { "epoch": 0.2734385724090597, "grad_norm": 0.7681364744309099, "learning_rate": 8.525400543557136e-06, "loss": 0.3097, "step": 7968 }, { "epoch": 0.2734728894989705, "grad_norm": 0.8991419671643663, "learning_rate": 8.525006431873506e-06, "loss": 0.3248, "step": 7969 }, { "epoch": 0.2735072065888813, "grad_norm": 0.7618162635237407, "learning_rate": 8.524612276642532e-06, "loss": 0.3689, "step": 7970 }, { "epoch": 0.27354152367879203, "grad_norm": 0.7155171424645546, "learning_rate": 8.52421807786908e-06, "loss": 0.2791, "step": 7971 }, { "epoch": 0.27357584076870284, "grad_norm": 0.7086938027634716, "learning_rate": 8.523823835558025e-06, "loss": 0.3144, "step": 7972 }, { "epoch": 0.2736101578586136, "grad_norm": 0.7794150904942623, "learning_rate": 8.523429549714235e-06, "loss": 0.3464, "step": 7973 }, { "epoch": 0.27364447494852434, "grad_norm": 0.7056765528424083, "learning_rate": 8.52303522034258e-06, "loss": 0.2986, "step": 7974 }, { "epoch": 0.27367879203843515, "grad_norm": 0.7428970058917362, "learning_rate": 8.522640847447932e-06, "loss": 0.3294, "step": 7975 }, { "epoch": 0.2737131091283459, "grad_norm": 0.7055079901579188, "learning_rate": 8.522246431035165e-06, "loss": 0.3077, "step": 7976 }, { "epoch": 0.2737474262182567, "grad_norm": 0.8099029572943908, "learning_rate": 8.521851971109149e-06, "loss": 0.3273, "step": 7977 }, { "epoch": 0.27378174330816746, "grad_norm": 0.860549636378933, "learning_rate": 8.521457467674758e-06, "loss": 0.3708, "step": 7978 }, { "epoch": 0.27381606039807826, "grad_norm": 0.8129502281507737, "learning_rate": 8.521062920736865e-06, "loss": 0.2909, "step": 7979 }, { "epoch": 0.273850377487989, "grad_norm": 0.8081299448050029, "learning_rate": 8.520668330300346e-06, "loss": 0.2915, "step": 7980 }, { "epoch": 0.27388469457789977, "grad_norm": 0.8534959481716613, "learning_rate": 8.520273696370072e-06, "loss": 0.3094, "step": 7981 }, { "epoch": 0.2739190116678106, "grad_norm": 0.7564072065617066, "learning_rate": 8.519879018950924e-06, "loss": 0.3012, "step": 7982 }, { "epoch": 0.2739533287577213, "grad_norm": 0.8382028051270881, "learning_rate": 8.519484298047773e-06, "loss": 0.3143, "step": 7983 }, { "epoch": 0.27398764584763213, "grad_norm": 0.8649934198101185, "learning_rate": 8.519089533665496e-06, "loss": 0.3036, "step": 7984 }, { "epoch": 0.2740219629375429, "grad_norm": 0.8274820339031447, "learning_rate": 8.518694725808972e-06, "loss": 0.3615, "step": 7985 }, { "epoch": 0.2740562800274537, "grad_norm": 0.7911629129377635, "learning_rate": 8.518299874483076e-06, "loss": 0.3006, "step": 7986 }, { "epoch": 0.27409059711736444, "grad_norm": 0.8558583909495805, "learning_rate": 8.517904979692689e-06, "loss": 0.376, "step": 7987 }, { "epoch": 0.27412491420727525, "grad_norm": 1.0645730875686306, "learning_rate": 8.517510041442684e-06, "loss": 0.3409, "step": 7988 }, { "epoch": 0.274159231297186, "grad_norm": 0.737001126662327, "learning_rate": 8.517115059737945e-06, "loss": 0.3338, "step": 7989 }, { "epoch": 0.27419354838709675, "grad_norm": 0.7631023651201522, "learning_rate": 8.516720034583351e-06, "loss": 0.2733, "step": 7990 }, { "epoch": 0.27422786547700756, "grad_norm": 0.7907683526761526, "learning_rate": 8.51632496598378e-06, "loss": 0.352, "step": 7991 }, { "epoch": 0.2742621825669183, "grad_norm": 0.7479051865219756, "learning_rate": 8.515929853944115e-06, "loss": 0.301, "step": 7992 }, { "epoch": 0.2742964996568291, "grad_norm": 0.8524159857976917, "learning_rate": 8.515534698469235e-06, "loss": 0.3335, "step": 7993 }, { "epoch": 0.27433081674673987, "grad_norm": 0.7216760686088813, "learning_rate": 8.515139499564023e-06, "loss": 0.2694, "step": 7994 }, { "epoch": 0.2743651338366507, "grad_norm": 0.7542455398802401, "learning_rate": 8.514744257233361e-06, "loss": 0.3271, "step": 7995 }, { "epoch": 0.27439945092656143, "grad_norm": 0.8325318665911849, "learning_rate": 8.514348971482128e-06, "loss": 0.3256, "step": 7996 }, { "epoch": 0.2744337680164722, "grad_norm": 0.6812798969647048, "learning_rate": 8.513953642315214e-06, "loss": 0.2934, "step": 7997 }, { "epoch": 0.274468085106383, "grad_norm": 0.7381808073001427, "learning_rate": 8.5135582697375e-06, "loss": 0.3093, "step": 7998 }, { "epoch": 0.27450240219629374, "grad_norm": 0.7514634115266866, "learning_rate": 8.513162853753869e-06, "loss": 0.3065, "step": 7999 }, { "epoch": 0.27453671928620454, "grad_norm": 0.748292258489637, "learning_rate": 8.512767394369207e-06, "loss": 0.2894, "step": 8000 }, { "epoch": 0.2745710363761153, "grad_norm": 0.7700190390083907, "learning_rate": 8.5123718915884e-06, "loss": 0.3356, "step": 8001 }, { "epoch": 0.2746053534660261, "grad_norm": 0.7905983208449632, "learning_rate": 8.511976345416331e-06, "loss": 0.3185, "step": 8002 }, { "epoch": 0.27463967055593685, "grad_norm": 0.7071404442474044, "learning_rate": 8.51158075585789e-06, "loss": 0.3171, "step": 8003 }, { "epoch": 0.2746739876458476, "grad_norm": 0.6813430623416296, "learning_rate": 8.511185122917965e-06, "loss": 0.3067, "step": 8004 }, { "epoch": 0.2747083047357584, "grad_norm": 0.7019829368086018, "learning_rate": 8.51078944660144e-06, "loss": 0.3142, "step": 8005 }, { "epoch": 0.27474262182566916, "grad_norm": 0.7535707790634871, "learning_rate": 8.510393726913204e-06, "loss": 0.3107, "step": 8006 }, { "epoch": 0.27477693891557997, "grad_norm": 0.8209140181448961, "learning_rate": 8.509997963858147e-06, "loss": 0.3183, "step": 8007 }, { "epoch": 0.2748112560054907, "grad_norm": 0.8133697993978366, "learning_rate": 8.509602157441158e-06, "loss": 0.3058, "step": 8008 }, { "epoch": 0.27484557309540153, "grad_norm": 0.7488913223193496, "learning_rate": 8.509206307667123e-06, "loss": 0.2945, "step": 8009 }, { "epoch": 0.2748798901853123, "grad_norm": 0.7919175401883591, "learning_rate": 8.508810414540938e-06, "loss": 0.3232, "step": 8010 }, { "epoch": 0.2749142072752231, "grad_norm": 0.883351884030027, "learning_rate": 8.508414478067492e-06, "loss": 0.3633, "step": 8011 }, { "epoch": 0.27494852436513384, "grad_norm": 0.7319299087729952, "learning_rate": 8.508018498251673e-06, "loss": 0.3215, "step": 8012 }, { "epoch": 0.2749828414550446, "grad_norm": 0.7387236314964414, "learning_rate": 8.507622475098378e-06, "loss": 0.2832, "step": 8013 }, { "epoch": 0.2750171585449554, "grad_norm": 0.8313312439407806, "learning_rate": 8.507226408612495e-06, "loss": 0.3083, "step": 8014 }, { "epoch": 0.27505147563486615, "grad_norm": 0.7536807370550105, "learning_rate": 8.506830298798919e-06, "loss": 0.3669, "step": 8015 }, { "epoch": 0.27508579272477696, "grad_norm": 0.8216777512047114, "learning_rate": 8.506434145662543e-06, "loss": 0.3201, "step": 8016 }, { "epoch": 0.2751201098146877, "grad_norm": 0.7717125493369114, "learning_rate": 8.506037949208262e-06, "loss": 0.3288, "step": 8017 }, { "epoch": 0.2751544269045985, "grad_norm": 0.8203150704076072, "learning_rate": 8.50564170944097e-06, "loss": 0.4314, "step": 8018 }, { "epoch": 0.27518874399450927, "grad_norm": 0.6975791720861707, "learning_rate": 8.50524542636556e-06, "loss": 0.2767, "step": 8019 }, { "epoch": 0.27522306108442, "grad_norm": 0.7681276131591165, "learning_rate": 8.504849099986931e-06, "loss": 0.3375, "step": 8020 }, { "epoch": 0.2752573781743308, "grad_norm": 0.7631498760738915, "learning_rate": 8.504452730309978e-06, "loss": 0.3705, "step": 8021 }, { "epoch": 0.2752916952642416, "grad_norm": 0.8065159234062047, "learning_rate": 8.504056317339598e-06, "loss": 0.3294, "step": 8022 }, { "epoch": 0.2753260123541524, "grad_norm": 0.82088175901948, "learning_rate": 8.503659861080685e-06, "loss": 0.3701, "step": 8023 }, { "epoch": 0.27536032944406313, "grad_norm": 0.8628703166222271, "learning_rate": 8.50326336153814e-06, "loss": 0.3224, "step": 8024 }, { "epoch": 0.27539464653397394, "grad_norm": 0.7706705128755899, "learning_rate": 8.502866818716861e-06, "loss": 0.2845, "step": 8025 }, { "epoch": 0.2754289636238847, "grad_norm": 0.8110597851946478, "learning_rate": 8.502470232621748e-06, "loss": 0.3145, "step": 8026 }, { "epoch": 0.27546328071379544, "grad_norm": 0.8530871477487161, "learning_rate": 8.502073603257697e-06, "loss": 0.2854, "step": 8027 }, { "epoch": 0.27549759780370625, "grad_norm": 0.794574539310842, "learning_rate": 8.50167693062961e-06, "loss": 0.2931, "step": 8028 }, { "epoch": 0.275531914893617, "grad_norm": 0.7886553959385164, "learning_rate": 8.501280214742386e-06, "loss": 0.3293, "step": 8029 }, { "epoch": 0.2755662319835278, "grad_norm": 0.8717639292063009, "learning_rate": 8.50088345560093e-06, "loss": 0.3094, "step": 8030 }, { "epoch": 0.27560054907343856, "grad_norm": 0.814362827349064, "learning_rate": 8.500486653210138e-06, "loss": 0.3221, "step": 8031 }, { "epoch": 0.27563486616334937, "grad_norm": 0.7640835550485078, "learning_rate": 8.500089807574915e-06, "loss": 0.3204, "step": 8032 }, { "epoch": 0.2756691832532601, "grad_norm": 0.7573623674841746, "learning_rate": 8.499692918700163e-06, "loss": 0.3491, "step": 8033 }, { "epoch": 0.2757035003431709, "grad_norm": 0.8915709242168508, "learning_rate": 8.499295986590787e-06, "loss": 0.2977, "step": 8034 }, { "epoch": 0.2757378174330817, "grad_norm": 1.496068800137765, "learning_rate": 8.498899011251687e-06, "loss": 0.3162, "step": 8035 }, { "epoch": 0.27577213452299243, "grad_norm": 0.789694745808458, "learning_rate": 8.498501992687768e-06, "loss": 0.337, "step": 8036 }, { "epoch": 0.27580645161290324, "grad_norm": 0.7391552887430812, "learning_rate": 8.498104930903939e-06, "loss": 0.3802, "step": 8037 }, { "epoch": 0.275840768702814, "grad_norm": 0.7781679903107331, "learning_rate": 8.497707825905099e-06, "loss": 0.3127, "step": 8038 }, { "epoch": 0.2758750857927248, "grad_norm": 0.8047631755414533, "learning_rate": 8.497310677696158e-06, "loss": 0.3262, "step": 8039 }, { "epoch": 0.27590940288263555, "grad_norm": 0.7271599989564999, "learning_rate": 8.496913486282022e-06, "loss": 0.3812, "step": 8040 }, { "epoch": 0.27594371997254635, "grad_norm": 0.7991125600101411, "learning_rate": 8.496516251667595e-06, "loss": 0.3717, "step": 8041 }, { "epoch": 0.2759780370624571, "grad_norm": 0.7705202003573767, "learning_rate": 8.496118973857787e-06, "loss": 0.2924, "step": 8042 }, { "epoch": 0.27601235415236786, "grad_norm": 0.8307127497629199, "learning_rate": 8.495721652857507e-06, "loss": 0.3102, "step": 8043 }, { "epoch": 0.27604667124227866, "grad_norm": 0.8028278839964514, "learning_rate": 8.495324288671658e-06, "loss": 0.3096, "step": 8044 }, { "epoch": 0.2760809883321894, "grad_norm": 0.8000342757138394, "learning_rate": 8.494926881305154e-06, "loss": 0.3156, "step": 8045 }, { "epoch": 0.2761153054221002, "grad_norm": 0.8428438869983589, "learning_rate": 8.494529430762906e-06, "loss": 0.3807, "step": 8046 }, { "epoch": 0.276149622512011, "grad_norm": 0.8203074999570744, "learning_rate": 8.494131937049817e-06, "loss": 0.2939, "step": 8047 }, { "epoch": 0.2761839396019218, "grad_norm": 0.7944464139752812, "learning_rate": 8.493734400170803e-06, "loss": 0.3073, "step": 8048 }, { "epoch": 0.27621825669183253, "grad_norm": 0.7894880106173933, "learning_rate": 8.493336820130775e-06, "loss": 0.3171, "step": 8049 }, { "epoch": 0.2762525737817433, "grad_norm": 0.6634035349808155, "learning_rate": 8.492939196934641e-06, "loss": 0.2888, "step": 8050 }, { "epoch": 0.2762868908716541, "grad_norm": 0.8862792540002394, "learning_rate": 8.492541530587317e-06, "loss": 0.3081, "step": 8051 }, { "epoch": 0.27632120796156484, "grad_norm": 0.8633380096974866, "learning_rate": 8.492143821093717e-06, "loss": 0.3144, "step": 8052 }, { "epoch": 0.27635552505147565, "grad_norm": 0.7637060447129341, "learning_rate": 8.491746068458749e-06, "loss": 0.3226, "step": 8053 }, { "epoch": 0.2763898421413864, "grad_norm": 0.6507246330210327, "learning_rate": 8.491348272687327e-06, "loss": 0.2376, "step": 8054 }, { "epoch": 0.2764241592312972, "grad_norm": 0.6679029906006222, "learning_rate": 8.49095043378437e-06, "loss": 0.3071, "step": 8055 }, { "epoch": 0.27645847632120796, "grad_norm": 0.7897894352048838, "learning_rate": 8.490552551754792e-06, "loss": 0.2879, "step": 8056 }, { "epoch": 0.27649279341111876, "grad_norm": 0.8396020695837807, "learning_rate": 8.490154626603503e-06, "loss": 0.2901, "step": 8057 }, { "epoch": 0.2765271105010295, "grad_norm": 0.8361865060443643, "learning_rate": 8.489756658335426e-06, "loss": 0.2949, "step": 8058 }, { "epoch": 0.27656142759094027, "grad_norm": 0.7775233458978655, "learning_rate": 8.489358646955473e-06, "loss": 0.3322, "step": 8059 }, { "epoch": 0.2765957446808511, "grad_norm": 0.7798806871348454, "learning_rate": 8.488960592468562e-06, "loss": 0.3456, "step": 8060 }, { "epoch": 0.2766300617707618, "grad_norm": 0.9120238091570574, "learning_rate": 8.48856249487961e-06, "loss": 0.3677, "step": 8061 }, { "epoch": 0.27666437886067263, "grad_norm": 0.7321756940763181, "learning_rate": 8.488164354193538e-06, "loss": 0.2939, "step": 8062 }, { "epoch": 0.2766986959505834, "grad_norm": 0.7630675616869337, "learning_rate": 8.48776617041526e-06, "loss": 0.2985, "step": 8063 }, { "epoch": 0.2767330130404942, "grad_norm": 0.8756247850575477, "learning_rate": 8.487367943549698e-06, "loss": 0.3113, "step": 8064 }, { "epoch": 0.27676733013040494, "grad_norm": 0.80666741888499, "learning_rate": 8.486969673601771e-06, "loss": 0.2882, "step": 8065 }, { "epoch": 0.2768016472203157, "grad_norm": 0.781162149359353, "learning_rate": 8.4865713605764e-06, "loss": 0.2726, "step": 8066 }, { "epoch": 0.2768359643102265, "grad_norm": 0.834319225525489, "learning_rate": 8.486173004478502e-06, "loss": 0.3731, "step": 8067 }, { "epoch": 0.27687028140013725, "grad_norm": 0.7896813875383921, "learning_rate": 8.485774605313003e-06, "loss": 0.3104, "step": 8068 }, { "epoch": 0.27690459849004806, "grad_norm": 0.7733493331416744, "learning_rate": 8.485376163084821e-06, "loss": 0.3005, "step": 8069 }, { "epoch": 0.2769389155799588, "grad_norm": 0.7735237473551211, "learning_rate": 8.484977677798883e-06, "loss": 0.2799, "step": 8070 }, { "epoch": 0.2769732326698696, "grad_norm": 0.7960260458118774, "learning_rate": 8.484579149460106e-06, "loss": 0.3564, "step": 8071 }, { "epoch": 0.27700754975978037, "grad_norm": 0.7034988346251474, "learning_rate": 8.48418057807342e-06, "loss": 0.2788, "step": 8072 }, { "epoch": 0.2770418668496911, "grad_norm": 0.7632210720636948, "learning_rate": 8.483781963643742e-06, "loss": 0.319, "step": 8073 }, { "epoch": 0.2770761839396019, "grad_norm": 0.8228158492191757, "learning_rate": 8.483383306176001e-06, "loss": 0.33, "step": 8074 }, { "epoch": 0.2771105010295127, "grad_norm": 0.8060923687568111, "learning_rate": 8.48298460567512e-06, "loss": 0.3823, "step": 8075 }, { "epoch": 0.2771448181194235, "grad_norm": 0.7834012070472276, "learning_rate": 8.482585862146024e-06, "loss": 0.3768, "step": 8076 }, { "epoch": 0.27717913520933424, "grad_norm": 0.8925327278743947, "learning_rate": 8.48218707559364e-06, "loss": 0.3492, "step": 8077 }, { "epoch": 0.27721345229924504, "grad_norm": 0.8662666685085154, "learning_rate": 8.481788246022896e-06, "loss": 0.3387, "step": 8078 }, { "epoch": 0.2772477693891558, "grad_norm": 0.7939136581189716, "learning_rate": 8.481389373438718e-06, "loss": 0.2987, "step": 8079 }, { "epoch": 0.2772820864790666, "grad_norm": 0.896843256764824, "learning_rate": 8.48099045784603e-06, "loss": 0.3416, "step": 8080 }, { "epoch": 0.27731640356897735, "grad_norm": 0.7706289422141553, "learning_rate": 8.480591499249766e-06, "loss": 0.2797, "step": 8081 }, { "epoch": 0.2773507206588881, "grad_norm": 1.0956902348856168, "learning_rate": 8.48019249765485e-06, "loss": 0.3086, "step": 8082 }, { "epoch": 0.2773850377487989, "grad_norm": 0.8455049081019584, "learning_rate": 8.479793453066216e-06, "loss": 0.3404, "step": 8083 }, { "epoch": 0.27741935483870966, "grad_norm": 0.8136649342833556, "learning_rate": 8.479394365488788e-06, "loss": 0.2735, "step": 8084 }, { "epoch": 0.27745367192862047, "grad_norm": 0.8415624415809397, "learning_rate": 8.478995234927503e-06, "loss": 0.3561, "step": 8085 }, { "epoch": 0.2774879890185312, "grad_norm": 0.8094144866011254, "learning_rate": 8.478596061387286e-06, "loss": 0.3645, "step": 8086 }, { "epoch": 0.27752230610844203, "grad_norm": 0.802604215346413, "learning_rate": 8.47819684487307e-06, "loss": 0.2858, "step": 8087 }, { "epoch": 0.2775566231983528, "grad_norm": 0.8015432110902961, "learning_rate": 8.477797585389786e-06, "loss": 0.339, "step": 8088 }, { "epoch": 0.27759094028826353, "grad_norm": 0.8604532196683198, "learning_rate": 8.477398282942368e-06, "loss": 0.3555, "step": 8089 }, { "epoch": 0.27762525737817434, "grad_norm": 0.8811399056840533, "learning_rate": 8.47699893753575e-06, "loss": 0.3199, "step": 8090 }, { "epoch": 0.2776595744680851, "grad_norm": 0.705229526150059, "learning_rate": 8.476599549174863e-06, "loss": 0.3375, "step": 8091 }, { "epoch": 0.2776938915579959, "grad_norm": 0.7764577240177545, "learning_rate": 8.476200117864642e-06, "loss": 0.3097, "step": 8092 }, { "epoch": 0.27772820864790665, "grad_norm": 0.6836552683718636, "learning_rate": 8.475800643610022e-06, "loss": 0.273, "step": 8093 }, { "epoch": 0.27776252573781746, "grad_norm": 0.7789478106445542, "learning_rate": 8.475401126415937e-06, "loss": 0.3156, "step": 8094 }, { "epoch": 0.2777968428277282, "grad_norm": 0.7919161812225104, "learning_rate": 8.475001566287321e-06, "loss": 0.3319, "step": 8095 }, { "epoch": 0.27783115991763896, "grad_norm": 0.8747540718421842, "learning_rate": 8.474601963229115e-06, "loss": 0.3338, "step": 8096 }, { "epoch": 0.27786547700754977, "grad_norm": 0.7766557820688988, "learning_rate": 8.474202317246251e-06, "loss": 0.3287, "step": 8097 }, { "epoch": 0.2778997940974605, "grad_norm": 0.7034343651825653, "learning_rate": 8.47380262834367e-06, "loss": 0.2941, "step": 8098 }, { "epoch": 0.2779341111873713, "grad_norm": 0.7506286258758618, "learning_rate": 8.473402896526305e-06, "loss": 0.3119, "step": 8099 }, { "epoch": 0.2779684282772821, "grad_norm": 0.7342631538738748, "learning_rate": 8.473003121799097e-06, "loss": 0.296, "step": 8100 }, { "epoch": 0.2780027453671929, "grad_norm": 0.7142956931481724, "learning_rate": 8.472603304166986e-06, "loss": 0.3002, "step": 8101 }, { "epoch": 0.27803706245710363, "grad_norm": 0.7611846238013323, "learning_rate": 8.47220344363491e-06, "loss": 0.3034, "step": 8102 }, { "epoch": 0.2780713795470144, "grad_norm": 0.6886807609864892, "learning_rate": 8.471803540207807e-06, "loss": 0.3412, "step": 8103 }, { "epoch": 0.2781056966369252, "grad_norm": 0.8442935488717417, "learning_rate": 8.471403593890622e-06, "loss": 0.326, "step": 8104 }, { "epoch": 0.27814001372683594, "grad_norm": 0.8610431204114873, "learning_rate": 8.47100360468829e-06, "loss": 0.2916, "step": 8105 }, { "epoch": 0.27817433081674675, "grad_norm": 0.8384508488264073, "learning_rate": 8.470603572605756e-06, "loss": 0.284, "step": 8106 }, { "epoch": 0.2782086479066575, "grad_norm": 0.8635892133166826, "learning_rate": 8.470203497647963e-06, "loss": 0.3094, "step": 8107 }, { "epoch": 0.2782429649965683, "grad_norm": 0.7844644771229665, "learning_rate": 8.46980337981985e-06, "loss": 0.2909, "step": 8108 }, { "epoch": 0.27827728208647906, "grad_norm": 0.8804636492203999, "learning_rate": 8.46940321912636e-06, "loss": 0.3389, "step": 8109 }, { "epoch": 0.27831159917638987, "grad_norm": 0.8560580537608955, "learning_rate": 8.46900301557244e-06, "loss": 0.4051, "step": 8110 }, { "epoch": 0.2783459162663006, "grad_norm": 0.8063086811417137, "learning_rate": 8.468602769163033e-06, "loss": 0.3623, "step": 8111 }, { "epoch": 0.27838023335621137, "grad_norm": 0.8393337681379679, "learning_rate": 8.468202479903081e-06, "loss": 0.3268, "step": 8112 }, { "epoch": 0.2784145504461222, "grad_norm": 0.9057896554604541, "learning_rate": 8.467802147797531e-06, "loss": 0.3618, "step": 8113 }, { "epoch": 0.27844886753603293, "grad_norm": 0.9029041226794251, "learning_rate": 8.467401772851329e-06, "loss": 0.3878, "step": 8114 }, { "epoch": 0.27848318462594374, "grad_norm": 0.7105700243065738, "learning_rate": 8.467001355069421e-06, "loss": 0.3088, "step": 8115 }, { "epoch": 0.2785175017158545, "grad_norm": 0.7043597527906692, "learning_rate": 8.466600894456753e-06, "loss": 0.3423, "step": 8116 }, { "epoch": 0.2785518188057653, "grad_norm": 0.7841746000620183, "learning_rate": 8.466200391018273e-06, "loss": 0.3418, "step": 8117 }, { "epoch": 0.27858613589567605, "grad_norm": 0.863842069457789, "learning_rate": 8.465799844758928e-06, "loss": 0.3416, "step": 8118 }, { "epoch": 0.2786204529855868, "grad_norm": 1.0983800558913368, "learning_rate": 8.465399255683667e-06, "loss": 0.3749, "step": 8119 }, { "epoch": 0.2786547700754976, "grad_norm": 0.85000196725105, "learning_rate": 8.46499862379744e-06, "loss": 0.3661, "step": 8120 }, { "epoch": 0.27868908716540836, "grad_norm": 0.8135193766911767, "learning_rate": 8.464597949105192e-06, "loss": 0.3144, "step": 8121 }, { "epoch": 0.27872340425531916, "grad_norm": 0.7519123669446364, "learning_rate": 8.464197231611877e-06, "loss": 0.339, "step": 8122 }, { "epoch": 0.2787577213452299, "grad_norm": 0.7599435306077201, "learning_rate": 8.463796471322445e-06, "loss": 0.324, "step": 8123 }, { "epoch": 0.2787920384351407, "grad_norm": 0.8032184299779276, "learning_rate": 8.463395668241843e-06, "loss": 0.2901, "step": 8124 }, { "epoch": 0.2788263555250515, "grad_norm": 0.7696514131207614, "learning_rate": 8.462994822375027e-06, "loss": 0.3527, "step": 8125 }, { "epoch": 0.2788606726149622, "grad_norm": 0.7711272990184795, "learning_rate": 8.462593933726949e-06, "loss": 0.3099, "step": 8126 }, { "epoch": 0.27889498970487303, "grad_norm": 0.8112656197857816, "learning_rate": 8.462193002302558e-06, "loss": 0.2752, "step": 8127 }, { "epoch": 0.2789293067947838, "grad_norm": 0.7913192780010911, "learning_rate": 8.46179202810681e-06, "loss": 0.2976, "step": 8128 }, { "epoch": 0.2789636238846946, "grad_norm": 0.7052928449640385, "learning_rate": 8.461391011144657e-06, "loss": 0.3347, "step": 8129 }, { "epoch": 0.27899794097460534, "grad_norm": 0.9074313580635481, "learning_rate": 8.460989951421055e-06, "loss": 0.3227, "step": 8130 }, { "epoch": 0.27903225806451615, "grad_norm": 0.7179193341056378, "learning_rate": 8.460588848940955e-06, "loss": 0.2797, "step": 8131 }, { "epoch": 0.2790665751544269, "grad_norm": 0.7800085024256354, "learning_rate": 8.460187703709316e-06, "loss": 0.3282, "step": 8132 }, { "epoch": 0.2791008922443377, "grad_norm": 0.7640023834836075, "learning_rate": 8.459786515731092e-06, "loss": 0.3231, "step": 8133 }, { "epoch": 0.27913520933424846, "grad_norm": 0.772257493594973, "learning_rate": 8.45938528501124e-06, "loss": 0.3518, "step": 8134 }, { "epoch": 0.2791695264241592, "grad_norm": 0.7391730659033456, "learning_rate": 8.458984011554715e-06, "loss": 0.319, "step": 8135 }, { "epoch": 0.27920384351407, "grad_norm": 0.965586269996495, "learning_rate": 8.458582695366475e-06, "loss": 0.3074, "step": 8136 }, { "epoch": 0.27923816060398077, "grad_norm": 0.744324734671622, "learning_rate": 8.45818133645148e-06, "loss": 0.3063, "step": 8137 }, { "epoch": 0.2792724776938916, "grad_norm": 0.8654285475078168, "learning_rate": 8.457779934814683e-06, "loss": 0.346, "step": 8138 }, { "epoch": 0.2793067947838023, "grad_norm": 0.7561776404019955, "learning_rate": 8.457378490461048e-06, "loss": 0.3136, "step": 8139 }, { "epoch": 0.27934111187371313, "grad_norm": 0.801062576451146, "learning_rate": 8.456977003395532e-06, "loss": 0.2619, "step": 8140 }, { "epoch": 0.2793754289636239, "grad_norm": 0.711397853221166, "learning_rate": 8.456575473623097e-06, "loss": 0.296, "step": 8141 }, { "epoch": 0.27940974605353464, "grad_norm": 0.734258673422992, "learning_rate": 8.456173901148701e-06, "loss": 0.3082, "step": 8142 }, { "epoch": 0.27944406314344544, "grad_norm": 0.7745001151021665, "learning_rate": 8.455772285977308e-06, "loss": 0.2774, "step": 8143 }, { "epoch": 0.2794783802333562, "grad_norm": 0.7090196798324366, "learning_rate": 8.455370628113875e-06, "loss": 0.2932, "step": 8144 }, { "epoch": 0.279512697323267, "grad_norm": 0.8816893900743682, "learning_rate": 8.454968927563367e-06, "loss": 0.3107, "step": 8145 }, { "epoch": 0.27954701441317775, "grad_norm": 0.6641184356026781, "learning_rate": 8.454567184330746e-06, "loss": 0.2963, "step": 8146 }, { "epoch": 0.27958133150308856, "grad_norm": 0.6782700308543943, "learning_rate": 8.454165398420977e-06, "loss": 0.2993, "step": 8147 }, { "epoch": 0.2796156485929993, "grad_norm": 0.8061997119945501, "learning_rate": 8.453763569839019e-06, "loss": 0.2934, "step": 8148 }, { "epoch": 0.27964996568291006, "grad_norm": 0.8457474860437236, "learning_rate": 8.45336169858984e-06, "loss": 0.3138, "step": 8149 }, { "epoch": 0.27968428277282087, "grad_norm": 0.7496328665931059, "learning_rate": 8.452959784678403e-06, "loss": 0.2697, "step": 8150 }, { "epoch": 0.2797185998627316, "grad_norm": 0.7817147946883527, "learning_rate": 8.452557828109674e-06, "loss": 0.3668, "step": 8151 }, { "epoch": 0.2797529169526424, "grad_norm": 0.786439230739705, "learning_rate": 8.45215582888862e-06, "loss": 0.3176, "step": 8152 }, { "epoch": 0.2797872340425532, "grad_norm": 0.8485837823589126, "learning_rate": 8.451753787020203e-06, "loss": 0.3228, "step": 8153 }, { "epoch": 0.279821551132464, "grad_norm": 0.7809653586094844, "learning_rate": 8.451351702509392e-06, "loss": 0.3123, "step": 8154 }, { "epoch": 0.27985586822237474, "grad_norm": 0.7027952317335137, "learning_rate": 8.450949575361156e-06, "loss": 0.3717, "step": 8155 }, { "epoch": 0.27989018531228554, "grad_norm": 0.799141452069514, "learning_rate": 8.450547405580461e-06, "loss": 0.2747, "step": 8156 }, { "epoch": 0.2799245024021963, "grad_norm": 0.7894505872736209, "learning_rate": 8.450145193172276e-06, "loss": 0.3061, "step": 8157 }, { "epoch": 0.27995881949210705, "grad_norm": 0.7471969056760046, "learning_rate": 8.449742938141569e-06, "loss": 0.2909, "step": 8158 }, { "epoch": 0.27999313658201785, "grad_norm": 0.920779639101307, "learning_rate": 8.449340640493311e-06, "loss": 0.3256, "step": 8159 }, { "epoch": 0.2800274536719286, "grad_norm": 0.7979150219917776, "learning_rate": 8.44893830023247e-06, "loss": 0.2965, "step": 8160 }, { "epoch": 0.2800617707618394, "grad_norm": 0.7995245781938395, "learning_rate": 8.448535917364019e-06, "loss": 0.3365, "step": 8161 }, { "epoch": 0.28009608785175016, "grad_norm": 0.7030471609433302, "learning_rate": 8.448133491892925e-06, "loss": 0.3099, "step": 8162 }, { "epoch": 0.28013040494166097, "grad_norm": 0.7558226072025035, "learning_rate": 8.447731023824164e-06, "loss": 0.3071, "step": 8163 }, { "epoch": 0.2801647220315717, "grad_norm": 0.821596022749411, "learning_rate": 8.447328513162705e-06, "loss": 0.3534, "step": 8164 }, { "epoch": 0.2801990391214825, "grad_norm": 0.7553857833243918, "learning_rate": 8.446925959913522e-06, "loss": 0.3265, "step": 8165 }, { "epoch": 0.2802333562113933, "grad_norm": 0.7793524971563113, "learning_rate": 8.446523364081585e-06, "loss": 0.3195, "step": 8166 }, { "epoch": 0.28026767330130403, "grad_norm": 0.8360243491458217, "learning_rate": 8.44612072567187e-06, "loss": 0.2967, "step": 8167 }, { "epoch": 0.28030199039121484, "grad_norm": 0.7308413398623512, "learning_rate": 8.445718044689355e-06, "loss": 0.3182, "step": 8168 }, { "epoch": 0.2803363074811256, "grad_norm": 0.769815117793955, "learning_rate": 8.44531532113901e-06, "loss": 0.3308, "step": 8169 }, { "epoch": 0.2803706245710364, "grad_norm": 0.7587958146345356, "learning_rate": 8.44491255502581e-06, "loss": 0.3497, "step": 8170 }, { "epoch": 0.28040494166094715, "grad_norm": 0.7578244238536264, "learning_rate": 8.444509746354731e-06, "loss": 0.3031, "step": 8171 }, { "epoch": 0.2804392587508579, "grad_norm": 0.7496780774640553, "learning_rate": 8.44410689513075e-06, "loss": 0.2716, "step": 8172 }, { "epoch": 0.2804735758407687, "grad_norm": 0.9119870764044513, "learning_rate": 8.443704001358845e-06, "loss": 0.3736, "step": 8173 }, { "epoch": 0.28050789293067946, "grad_norm": 0.7879010771894045, "learning_rate": 8.443301065043992e-06, "loss": 0.2939, "step": 8174 }, { "epoch": 0.28054221002059027, "grad_norm": 0.7534160691894047, "learning_rate": 8.442898086191169e-06, "loss": 0.3598, "step": 8175 }, { "epoch": 0.280576527110501, "grad_norm": 0.8639722320728023, "learning_rate": 8.442495064805353e-06, "loss": 0.3558, "step": 8176 }, { "epoch": 0.2806108442004118, "grad_norm": 0.8283943939228353, "learning_rate": 8.442092000891524e-06, "loss": 0.3553, "step": 8177 }, { "epoch": 0.2806451612903226, "grad_norm": 0.83427170621867, "learning_rate": 8.441688894454662e-06, "loss": 0.3022, "step": 8178 }, { "epoch": 0.2806794783802334, "grad_norm": 0.786399343945813, "learning_rate": 8.441285745499747e-06, "loss": 0.3123, "step": 8179 }, { "epoch": 0.28071379547014413, "grad_norm": 0.7305803872160391, "learning_rate": 8.440882554031757e-06, "loss": 0.3678, "step": 8180 }, { "epoch": 0.2807481125600549, "grad_norm": 0.7395433935597276, "learning_rate": 8.440479320055677e-06, "loss": 0.3351, "step": 8181 }, { "epoch": 0.2807824296499657, "grad_norm": 0.7830613323275649, "learning_rate": 8.440076043576486e-06, "loss": 0.3229, "step": 8182 }, { "epoch": 0.28081674673987644, "grad_norm": 0.7395886578939269, "learning_rate": 8.439672724599164e-06, "loss": 0.2819, "step": 8183 }, { "epoch": 0.28085106382978725, "grad_norm": 0.816062360104275, "learning_rate": 8.439269363128697e-06, "loss": 0.3013, "step": 8184 }, { "epoch": 0.280885380919698, "grad_norm": 0.7763342646656852, "learning_rate": 8.438865959170068e-06, "loss": 0.2515, "step": 8185 }, { "epoch": 0.2809196980096088, "grad_norm": 0.8052447602209302, "learning_rate": 8.438462512728257e-06, "loss": 0.3663, "step": 8186 }, { "epoch": 0.28095401509951956, "grad_norm": 0.7787677509804883, "learning_rate": 8.438059023808252e-06, "loss": 0.3115, "step": 8187 }, { "epoch": 0.2809883321894303, "grad_norm": 0.9497521372324792, "learning_rate": 8.437655492415036e-06, "loss": 0.2798, "step": 8188 }, { "epoch": 0.2810226492793411, "grad_norm": 0.7526406052164284, "learning_rate": 8.437251918553594e-06, "loss": 0.3129, "step": 8189 }, { "epoch": 0.28105696636925187, "grad_norm": 0.8072694888332729, "learning_rate": 8.436848302228913e-06, "loss": 0.3184, "step": 8190 }, { "epoch": 0.2810912834591627, "grad_norm": 0.7611305315307508, "learning_rate": 8.436444643445976e-06, "loss": 0.2953, "step": 8191 }, { "epoch": 0.28112560054907343, "grad_norm": 0.7216043050064564, "learning_rate": 8.436040942209773e-06, "loss": 0.3244, "step": 8192 }, { "epoch": 0.28115991763898424, "grad_norm": 0.7710809151195529, "learning_rate": 8.435637198525292e-06, "loss": 0.312, "step": 8193 }, { "epoch": 0.281194234728895, "grad_norm": 0.7969380681817367, "learning_rate": 8.435233412397518e-06, "loss": 0.349, "step": 8194 }, { "epoch": 0.28122855181880574, "grad_norm": 0.79399963031623, "learning_rate": 8.434829583831439e-06, "loss": 0.3476, "step": 8195 }, { "epoch": 0.28126286890871655, "grad_norm": 1.1940896590972705, "learning_rate": 8.434425712832046e-06, "loss": 0.31, "step": 8196 }, { "epoch": 0.2812971859986273, "grad_norm": 0.8470126319148353, "learning_rate": 8.434021799404329e-06, "loss": 0.3409, "step": 8197 }, { "epoch": 0.2813315030885381, "grad_norm": 0.7889589906016317, "learning_rate": 8.433617843553273e-06, "loss": 0.3044, "step": 8198 }, { "epoch": 0.28136582017844886, "grad_norm": 0.7846940313839997, "learning_rate": 8.433213845283875e-06, "loss": 0.326, "step": 8199 }, { "epoch": 0.28140013726835966, "grad_norm": 0.8347199003380427, "learning_rate": 8.43280980460112e-06, "loss": 0.34, "step": 8200 }, { "epoch": 0.2814344543582704, "grad_norm": 0.841115999773969, "learning_rate": 8.432405721510003e-06, "loss": 0.3116, "step": 8201 }, { "epoch": 0.2814687714481812, "grad_norm": 0.8380717977145484, "learning_rate": 8.432001596015515e-06, "loss": 0.3874, "step": 8202 }, { "epoch": 0.281503088538092, "grad_norm": 0.7748786047538859, "learning_rate": 8.43159742812265e-06, "loss": 0.3013, "step": 8203 }, { "epoch": 0.2815374056280027, "grad_norm": 0.8155525876022668, "learning_rate": 8.431193217836397e-06, "loss": 0.3351, "step": 8204 }, { "epoch": 0.28157172271791353, "grad_norm": 0.8352333809340977, "learning_rate": 8.430788965161754e-06, "loss": 0.3034, "step": 8205 }, { "epoch": 0.2816060398078243, "grad_norm": 0.7425736262853975, "learning_rate": 8.430384670103714e-06, "loss": 0.284, "step": 8206 }, { "epoch": 0.2816403568977351, "grad_norm": 1.0578036750325344, "learning_rate": 8.42998033266727e-06, "loss": 0.3353, "step": 8207 }, { "epoch": 0.28167467398764584, "grad_norm": 0.7508942187637666, "learning_rate": 8.429575952857418e-06, "loss": 0.3598, "step": 8208 }, { "epoch": 0.28170899107755665, "grad_norm": 0.8230274972408027, "learning_rate": 8.429171530679151e-06, "loss": 0.3391, "step": 8209 }, { "epoch": 0.2817433081674674, "grad_norm": 0.7231526629217717, "learning_rate": 8.428767066137474e-06, "loss": 0.3175, "step": 8210 }, { "epoch": 0.28177762525737815, "grad_norm": 0.812166585280632, "learning_rate": 8.428362559237372e-06, "loss": 0.3094, "step": 8211 }, { "epoch": 0.28181194234728896, "grad_norm": 0.8694773747811446, "learning_rate": 8.42795800998385e-06, "loss": 0.369, "step": 8212 }, { "epoch": 0.2818462594371997, "grad_norm": 0.7263631665692002, "learning_rate": 8.427553418381902e-06, "loss": 0.3013, "step": 8213 }, { "epoch": 0.2818805765271105, "grad_norm": 0.7021916990093069, "learning_rate": 8.427148784436529e-06, "loss": 0.3096, "step": 8214 }, { "epoch": 0.28191489361702127, "grad_norm": 0.6796078734895066, "learning_rate": 8.426744108152726e-06, "loss": 0.3056, "step": 8215 }, { "epoch": 0.2819492107069321, "grad_norm": 0.765523313191745, "learning_rate": 8.426339389535498e-06, "loss": 0.3379, "step": 8216 }, { "epoch": 0.2819835277968428, "grad_norm": 0.8189216100734741, "learning_rate": 8.42593462858984e-06, "loss": 0.3049, "step": 8217 }, { "epoch": 0.2820178448867536, "grad_norm": 0.7830627302496346, "learning_rate": 8.425529825320754e-06, "loss": 0.2999, "step": 8218 }, { "epoch": 0.2820521619766644, "grad_norm": 0.749212193507286, "learning_rate": 8.425124979733241e-06, "loss": 0.3568, "step": 8219 }, { "epoch": 0.28208647906657514, "grad_norm": 0.7992177640028649, "learning_rate": 8.424720091832302e-06, "loss": 0.3148, "step": 8220 }, { "epoch": 0.28212079615648594, "grad_norm": 0.7414420073537984, "learning_rate": 8.424315161622939e-06, "loss": 0.2749, "step": 8221 }, { "epoch": 0.2821551132463967, "grad_norm": 0.7431400194006506, "learning_rate": 8.423910189110154e-06, "loss": 0.272, "step": 8222 }, { "epoch": 0.2821894303363075, "grad_norm": 0.7842002527399509, "learning_rate": 8.423505174298952e-06, "loss": 0.304, "step": 8223 }, { "epoch": 0.28222374742621825, "grad_norm": 0.77655380030209, "learning_rate": 8.423100117194334e-06, "loss": 0.2938, "step": 8224 }, { "epoch": 0.28225806451612906, "grad_norm": 0.7302870122928179, "learning_rate": 8.422695017801305e-06, "loss": 0.3072, "step": 8225 }, { "epoch": 0.2822923816060398, "grad_norm": 0.7194517494542171, "learning_rate": 8.422289876124869e-06, "loss": 0.2797, "step": 8226 }, { "epoch": 0.28232669869595056, "grad_norm": 0.796633834154137, "learning_rate": 8.421884692170033e-06, "loss": 0.2879, "step": 8227 }, { "epoch": 0.28236101578586137, "grad_norm": 0.7006180586857863, "learning_rate": 8.4214794659418e-06, "loss": 0.3218, "step": 8228 }, { "epoch": 0.2823953328757721, "grad_norm": 0.7418737663967773, "learning_rate": 8.421074197445178e-06, "loss": 0.3647, "step": 8229 }, { "epoch": 0.2824296499656829, "grad_norm": 0.6923807767315515, "learning_rate": 8.420668886685173e-06, "loss": 0.3887, "step": 8230 }, { "epoch": 0.2824639670555937, "grad_norm": 1.0821393917872109, "learning_rate": 8.420263533666791e-06, "loss": 0.3083, "step": 8231 }, { "epoch": 0.2824982841455045, "grad_norm": 0.7858228540166272, "learning_rate": 8.419858138395043e-06, "loss": 0.3286, "step": 8232 }, { "epoch": 0.28253260123541524, "grad_norm": 0.8046589309751571, "learning_rate": 8.419452700874932e-06, "loss": 0.3363, "step": 8233 }, { "epoch": 0.282566918325326, "grad_norm": 0.8170950659292234, "learning_rate": 8.41904722111147e-06, "loss": 0.3304, "step": 8234 }, { "epoch": 0.2826012354152368, "grad_norm": 0.7665022493124908, "learning_rate": 8.418641699109668e-06, "loss": 0.2755, "step": 8235 }, { "epoch": 0.28263555250514755, "grad_norm": 0.8171523244924181, "learning_rate": 8.418236134874532e-06, "loss": 0.326, "step": 8236 }, { "epoch": 0.28266986959505835, "grad_norm": 0.8153465717462186, "learning_rate": 8.417830528411073e-06, "loss": 0.3485, "step": 8237 }, { "epoch": 0.2827041866849691, "grad_norm": 0.7848801114030753, "learning_rate": 8.417424879724305e-06, "loss": 0.3161, "step": 8238 }, { "epoch": 0.2827385037748799, "grad_norm": 0.7200065386387767, "learning_rate": 8.417019188819236e-06, "loss": 0.3511, "step": 8239 }, { "epoch": 0.28277282086479066, "grad_norm": 0.8370411389505727, "learning_rate": 8.416613455700877e-06, "loss": 0.2779, "step": 8240 }, { "epoch": 0.2828071379547014, "grad_norm": 0.8638320550012814, "learning_rate": 8.416207680374243e-06, "loss": 0.3754, "step": 8241 }, { "epoch": 0.2828414550446122, "grad_norm": 0.7774952679009044, "learning_rate": 8.415801862844346e-06, "loss": 0.2918, "step": 8242 }, { "epoch": 0.282875772134523, "grad_norm": 0.7871705160763136, "learning_rate": 8.4153960031162e-06, "loss": 0.2786, "step": 8243 }, { "epoch": 0.2829100892244338, "grad_norm": 0.8029947325328354, "learning_rate": 8.414990101194817e-06, "loss": 0.3969, "step": 8244 }, { "epoch": 0.28294440631434453, "grad_norm": 0.7650419196919018, "learning_rate": 8.414584157085215e-06, "loss": 0.3079, "step": 8245 }, { "epoch": 0.28297872340425534, "grad_norm": 0.8242054916084025, "learning_rate": 8.414178170792406e-06, "loss": 0.3366, "step": 8246 }, { "epoch": 0.2830130404941661, "grad_norm": 0.7631624549211206, "learning_rate": 8.413772142321406e-06, "loss": 0.2937, "step": 8247 }, { "epoch": 0.2830473575840769, "grad_norm": 0.789067022418855, "learning_rate": 8.41336607167723e-06, "loss": 0.2946, "step": 8248 }, { "epoch": 0.28308167467398765, "grad_norm": 0.942543596765052, "learning_rate": 8.4129599588649e-06, "loss": 0.3668, "step": 8249 }, { "epoch": 0.2831159917638984, "grad_norm": 0.7902943650972605, "learning_rate": 8.412553803889425e-06, "loss": 0.3218, "step": 8250 }, { "epoch": 0.2831503088538092, "grad_norm": 0.764855825802815, "learning_rate": 8.412147606755827e-06, "loss": 0.2904, "step": 8251 }, { "epoch": 0.28318462594371996, "grad_norm": 0.7931798531059068, "learning_rate": 8.411741367469123e-06, "loss": 0.285, "step": 8252 }, { "epoch": 0.28321894303363077, "grad_norm": 0.901211762966255, "learning_rate": 8.411335086034333e-06, "loss": 0.3581, "step": 8253 }, { "epoch": 0.2832532601235415, "grad_norm": 0.809561775198115, "learning_rate": 8.410928762456476e-06, "loss": 0.3179, "step": 8254 }, { "epoch": 0.2832875772134523, "grad_norm": 0.7559971881064131, "learning_rate": 8.41052239674057e-06, "loss": 0.3038, "step": 8255 }, { "epoch": 0.2833218943033631, "grad_norm": 0.8292907233981164, "learning_rate": 8.410115988891635e-06, "loss": 0.3497, "step": 8256 }, { "epoch": 0.2833562113932738, "grad_norm": 0.7513406461115749, "learning_rate": 8.409709538914694e-06, "loss": 0.304, "step": 8257 }, { "epoch": 0.28339052848318463, "grad_norm": 0.8854193205646042, "learning_rate": 8.409303046814768e-06, "loss": 0.3555, "step": 8258 }, { "epoch": 0.2834248455730954, "grad_norm": 0.7327738831585862, "learning_rate": 8.408896512596876e-06, "loss": 0.2622, "step": 8259 }, { "epoch": 0.2834591626630062, "grad_norm": 0.9791133057936636, "learning_rate": 8.408489936266043e-06, "loss": 0.3034, "step": 8260 }, { "epoch": 0.28349347975291694, "grad_norm": 0.7912045850166294, "learning_rate": 8.408083317827289e-06, "loss": 0.3123, "step": 8261 }, { "epoch": 0.28352779684282775, "grad_norm": 0.7870518965594642, "learning_rate": 8.407676657285641e-06, "loss": 0.3468, "step": 8262 }, { "epoch": 0.2835621139327385, "grad_norm": 0.7357427849466639, "learning_rate": 8.40726995464612e-06, "loss": 0.3204, "step": 8263 }, { "epoch": 0.28359643102264925, "grad_norm": 0.6503395827297839, "learning_rate": 8.406863209913751e-06, "loss": 0.3007, "step": 8264 }, { "epoch": 0.28363074811256006, "grad_norm": 0.7392186254009675, "learning_rate": 8.40645642309356e-06, "loss": 0.2688, "step": 8265 }, { "epoch": 0.2836650652024708, "grad_norm": 0.8484674051575426, "learning_rate": 8.406049594190571e-06, "loss": 0.3282, "step": 8266 }, { "epoch": 0.2836993822923816, "grad_norm": 0.8393526762347784, "learning_rate": 8.40564272320981e-06, "loss": 0.2864, "step": 8267 }, { "epoch": 0.28373369938229237, "grad_norm": 0.7472654571403854, "learning_rate": 8.405235810156304e-06, "loss": 0.3794, "step": 8268 }, { "epoch": 0.2837680164722032, "grad_norm": 0.7707774912033608, "learning_rate": 8.404828855035079e-06, "loss": 0.3149, "step": 8269 }, { "epoch": 0.28380233356211393, "grad_norm": 0.7343328824147832, "learning_rate": 8.404421857851163e-06, "loss": 0.2832, "step": 8270 }, { "epoch": 0.28383665065202474, "grad_norm": 0.7466404221516174, "learning_rate": 8.404014818609586e-06, "loss": 0.3012, "step": 8271 }, { "epoch": 0.2838709677419355, "grad_norm": 0.6943145342609794, "learning_rate": 8.403607737315373e-06, "loss": 0.2637, "step": 8272 }, { "epoch": 0.28390528483184624, "grad_norm": 0.6588527889336481, "learning_rate": 8.403200613973556e-06, "loss": 0.2599, "step": 8273 }, { "epoch": 0.28393960192175705, "grad_norm": 0.7672657042443277, "learning_rate": 8.402793448589162e-06, "loss": 0.2656, "step": 8274 }, { "epoch": 0.2839739190116678, "grad_norm": 0.7742281468553097, "learning_rate": 8.402386241167222e-06, "loss": 0.3295, "step": 8275 }, { "epoch": 0.2840082361015786, "grad_norm": 0.8346992275492446, "learning_rate": 8.401978991712766e-06, "loss": 0.2759, "step": 8276 }, { "epoch": 0.28404255319148936, "grad_norm": 0.8611687552136729, "learning_rate": 8.401571700230828e-06, "loss": 0.3188, "step": 8277 }, { "epoch": 0.28407687028140016, "grad_norm": 0.8085998457675351, "learning_rate": 8.401164366726438e-06, "loss": 0.335, "step": 8278 }, { "epoch": 0.2841111873713109, "grad_norm": 0.8702978791610287, "learning_rate": 8.400756991204627e-06, "loss": 0.3177, "step": 8279 }, { "epoch": 0.28414550446122167, "grad_norm": 0.8100849799333352, "learning_rate": 8.400349573670428e-06, "loss": 0.3078, "step": 8280 }, { "epoch": 0.28417982155113247, "grad_norm": 0.7887450579137303, "learning_rate": 8.399942114128871e-06, "loss": 0.313, "step": 8281 }, { "epoch": 0.2842141386410432, "grad_norm": 0.8120549610675606, "learning_rate": 8.399534612584998e-06, "loss": 0.2644, "step": 8282 }, { "epoch": 0.28424845573095403, "grad_norm": 0.7820821170624129, "learning_rate": 8.399127069043837e-06, "loss": 0.2939, "step": 8283 }, { "epoch": 0.2842827728208648, "grad_norm": 0.7760719242539196, "learning_rate": 8.398719483510423e-06, "loss": 0.3336, "step": 8284 }, { "epoch": 0.2843170899107756, "grad_norm": 0.7460947979514222, "learning_rate": 8.398311855989793e-06, "loss": 0.3286, "step": 8285 }, { "epoch": 0.28435140700068634, "grad_norm": 0.7954156973569253, "learning_rate": 8.397904186486982e-06, "loss": 0.3166, "step": 8286 }, { "epoch": 0.2843857240905971, "grad_norm": 0.8251836973883501, "learning_rate": 8.397496475007025e-06, "loss": 0.2925, "step": 8287 }, { "epoch": 0.2844200411805079, "grad_norm": 0.8004927575020998, "learning_rate": 8.397088721554962e-06, "loss": 0.3384, "step": 8288 }, { "epoch": 0.28445435827041865, "grad_norm": 0.799918745631847, "learning_rate": 8.396680926135827e-06, "loss": 0.3352, "step": 8289 }, { "epoch": 0.28448867536032946, "grad_norm": 0.8732999699121199, "learning_rate": 8.39627308875466e-06, "loss": 0.3012, "step": 8290 }, { "epoch": 0.2845229924502402, "grad_norm": 0.7607466901320197, "learning_rate": 8.3958652094165e-06, "loss": 0.3397, "step": 8291 }, { "epoch": 0.284557309540151, "grad_norm": 0.8204156111535281, "learning_rate": 8.395457288126384e-06, "loss": 0.2944, "step": 8292 }, { "epoch": 0.28459162663006177, "grad_norm": 0.8390524950739787, "learning_rate": 8.39504932488935e-06, "loss": 0.3223, "step": 8293 }, { "epoch": 0.2846259437199726, "grad_norm": 0.8480805165144035, "learning_rate": 8.394641319710441e-06, "loss": 0.3608, "step": 8294 }, { "epoch": 0.2846602608098833, "grad_norm": 0.798939235832193, "learning_rate": 8.394233272594698e-06, "loss": 0.3199, "step": 8295 }, { "epoch": 0.2846945778997941, "grad_norm": 0.8445893016147914, "learning_rate": 8.393825183547157e-06, "loss": 0.3968, "step": 8296 }, { "epoch": 0.2847288949897049, "grad_norm": 0.9579207437241111, "learning_rate": 8.393417052572865e-06, "loss": 0.341, "step": 8297 }, { "epoch": 0.28476321207961564, "grad_norm": 0.7709290670759251, "learning_rate": 8.393008879676861e-06, "loss": 0.3122, "step": 8298 }, { "epoch": 0.28479752916952644, "grad_norm": 0.8037282357325439, "learning_rate": 8.392600664864189e-06, "loss": 0.2918, "step": 8299 }, { "epoch": 0.2848318462594372, "grad_norm": 0.7905141644540212, "learning_rate": 8.39219240813989e-06, "loss": 0.3371, "step": 8300 }, { "epoch": 0.284866163349348, "grad_norm": 0.8015194660617097, "learning_rate": 8.391784109509008e-06, "loss": 0.338, "step": 8301 }, { "epoch": 0.28490048043925875, "grad_norm": 0.8197171666028061, "learning_rate": 8.39137576897659e-06, "loss": 0.3469, "step": 8302 }, { "epoch": 0.2849347975291695, "grad_norm": 0.8556286182597875, "learning_rate": 8.390967386547676e-06, "loss": 0.3128, "step": 8303 }, { "epoch": 0.2849691146190803, "grad_norm": 1.0007612311838472, "learning_rate": 8.390558962227315e-06, "loss": 0.3788, "step": 8304 }, { "epoch": 0.28500343170899106, "grad_norm": 0.746657666134203, "learning_rate": 8.39015049602055e-06, "loss": 0.3286, "step": 8305 }, { "epoch": 0.28503774879890187, "grad_norm": 0.8132421421194712, "learning_rate": 8.389741987932429e-06, "loss": 0.289, "step": 8306 }, { "epoch": 0.2850720658888126, "grad_norm": 0.8094125009597192, "learning_rate": 8.389333437967997e-06, "loss": 0.2797, "step": 8307 }, { "epoch": 0.2851063829787234, "grad_norm": 0.9122056650998142, "learning_rate": 8.388924846132303e-06, "loss": 0.3511, "step": 8308 }, { "epoch": 0.2851407000686342, "grad_norm": 0.7913797378278427, "learning_rate": 8.388516212430393e-06, "loss": 0.3257, "step": 8309 }, { "epoch": 0.28517501715854493, "grad_norm": 0.827353074567523, "learning_rate": 8.388107536867316e-06, "loss": 0.3306, "step": 8310 }, { "epoch": 0.28520933424845574, "grad_norm": 0.7752943249878299, "learning_rate": 8.38769881944812e-06, "loss": 0.3333, "step": 8311 }, { "epoch": 0.2852436513383665, "grad_norm": 0.7924778721462302, "learning_rate": 8.387290060177855e-06, "loss": 0.3017, "step": 8312 }, { "epoch": 0.2852779684282773, "grad_norm": 0.7001125021713727, "learning_rate": 8.38688125906157e-06, "loss": 0.2926, "step": 8313 }, { "epoch": 0.28531228551818805, "grad_norm": 0.882110574986846, "learning_rate": 8.386472416104317e-06, "loss": 0.3772, "step": 8314 }, { "epoch": 0.28534660260809885, "grad_norm": 0.7736336360270758, "learning_rate": 8.386063531311147e-06, "loss": 0.3003, "step": 8315 }, { "epoch": 0.2853809196980096, "grad_norm": 0.7282129686783327, "learning_rate": 8.385654604687106e-06, "loss": 0.2882, "step": 8316 }, { "epoch": 0.28541523678792036, "grad_norm": 1.0628451409314723, "learning_rate": 8.385245636237252e-06, "loss": 0.3447, "step": 8317 }, { "epoch": 0.28544955387783116, "grad_norm": 0.7830737386908624, "learning_rate": 8.384836625966635e-06, "loss": 0.3205, "step": 8318 }, { "epoch": 0.2854838709677419, "grad_norm": 0.6558360386117751, "learning_rate": 8.384427573880307e-06, "loss": 0.2763, "step": 8319 }, { "epoch": 0.2855181880576527, "grad_norm": 0.7458884955308405, "learning_rate": 8.384018479983321e-06, "loss": 0.3145, "step": 8320 }, { "epoch": 0.2855525051475635, "grad_norm": 0.8030143079521102, "learning_rate": 8.383609344280734e-06, "loss": 0.3333, "step": 8321 }, { "epoch": 0.2855868222374743, "grad_norm": 0.7755821217230514, "learning_rate": 8.383200166777599e-06, "loss": 0.3118, "step": 8322 }, { "epoch": 0.28562113932738503, "grad_norm": 0.8307485224443608, "learning_rate": 8.382790947478967e-06, "loss": 0.3424, "step": 8323 }, { "epoch": 0.28565545641729584, "grad_norm": 0.8238871393639362, "learning_rate": 8.3823816863899e-06, "loss": 0.327, "step": 8324 }, { "epoch": 0.2856897735072066, "grad_norm": 0.796223420855483, "learning_rate": 8.381972383515452e-06, "loss": 0.2962, "step": 8325 }, { "epoch": 0.28572409059711734, "grad_norm": 0.9032692803070692, "learning_rate": 8.381563038860674e-06, "loss": 0.3274, "step": 8326 }, { "epoch": 0.28575840768702815, "grad_norm": 0.7314905472200249, "learning_rate": 8.38115365243063e-06, "loss": 0.2912, "step": 8327 }, { "epoch": 0.2857927247769389, "grad_norm": 0.8497344174111124, "learning_rate": 8.380744224230373e-06, "loss": 0.3082, "step": 8328 }, { "epoch": 0.2858270418668497, "grad_norm": 0.7391595195549282, "learning_rate": 8.380334754264963e-06, "loss": 0.3294, "step": 8329 }, { "epoch": 0.28586135895676046, "grad_norm": 1.2232899195337015, "learning_rate": 8.379925242539459e-06, "loss": 0.269, "step": 8330 }, { "epoch": 0.28589567604667127, "grad_norm": 0.7276295392736573, "learning_rate": 8.37951568905892e-06, "loss": 0.3291, "step": 8331 }, { "epoch": 0.285929993136582, "grad_norm": 0.792784283530959, "learning_rate": 8.379106093828405e-06, "loss": 0.3415, "step": 8332 }, { "epoch": 0.28596431022649277, "grad_norm": 0.8817301901592339, "learning_rate": 8.37869645685297e-06, "loss": 0.3147, "step": 8333 }, { "epoch": 0.2859986273164036, "grad_norm": 0.7624752062525343, "learning_rate": 8.378286778137684e-06, "loss": 0.3027, "step": 8334 }, { "epoch": 0.2860329444063143, "grad_norm": 0.7154222333226694, "learning_rate": 8.377877057687601e-06, "loss": 0.2679, "step": 8335 }, { "epoch": 0.28606726149622513, "grad_norm": 0.9125246167864183, "learning_rate": 8.377467295507786e-06, "loss": 0.3491, "step": 8336 }, { "epoch": 0.2861015785861359, "grad_norm": 0.8018888503953484, "learning_rate": 8.3770574916033e-06, "loss": 0.3384, "step": 8337 }, { "epoch": 0.2861358956760467, "grad_norm": 0.879387036306996, "learning_rate": 8.376647645979205e-06, "loss": 0.3549, "step": 8338 }, { "epoch": 0.28617021276595744, "grad_norm": 0.7861129834625816, "learning_rate": 8.376237758640567e-06, "loss": 0.3164, "step": 8339 }, { "epoch": 0.2862045298558682, "grad_norm": 0.8725965411477683, "learning_rate": 8.375827829592447e-06, "loss": 0.3222, "step": 8340 }, { "epoch": 0.286238846945779, "grad_norm": 0.7432133733279344, "learning_rate": 8.375417858839912e-06, "loss": 0.3184, "step": 8341 }, { "epoch": 0.28627316403568975, "grad_norm": 0.8151630166615894, "learning_rate": 8.375007846388021e-06, "loss": 0.2988, "step": 8342 }, { "epoch": 0.28630748112560056, "grad_norm": 1.1685619918029484, "learning_rate": 8.374597792241845e-06, "loss": 0.3383, "step": 8343 }, { "epoch": 0.2863417982155113, "grad_norm": 0.8256195753570231, "learning_rate": 8.374187696406448e-06, "loss": 0.2842, "step": 8344 }, { "epoch": 0.2863761153054221, "grad_norm": 0.7832063743604653, "learning_rate": 8.373777558886895e-06, "loss": 0.2964, "step": 8345 }, { "epoch": 0.28641043239533287, "grad_norm": 0.8171412579230817, "learning_rate": 8.373367379688253e-06, "loss": 0.3085, "step": 8346 }, { "epoch": 0.2864447494852437, "grad_norm": 0.7769488782926989, "learning_rate": 8.372957158815591e-06, "loss": 0.2895, "step": 8347 }, { "epoch": 0.28647906657515443, "grad_norm": 0.827477351172748, "learning_rate": 8.372546896273977e-06, "loss": 0.347, "step": 8348 }, { "epoch": 0.2865133836650652, "grad_norm": 0.7739534388920553, "learning_rate": 8.372136592068475e-06, "loss": 0.3053, "step": 8349 }, { "epoch": 0.286547700754976, "grad_norm": 0.8155074069623836, "learning_rate": 8.37172624620416e-06, "loss": 0.339, "step": 8350 }, { "epoch": 0.28658201784488674, "grad_norm": 0.8494749858729564, "learning_rate": 8.371315858686098e-06, "loss": 0.3298, "step": 8351 }, { "epoch": 0.28661633493479755, "grad_norm": 0.7125320966875657, "learning_rate": 8.370905429519359e-06, "loss": 0.3514, "step": 8352 }, { "epoch": 0.2866506520247083, "grad_norm": 0.8943586956960786, "learning_rate": 8.370494958709011e-06, "loss": 0.3224, "step": 8353 }, { "epoch": 0.2866849691146191, "grad_norm": 0.7183579898443102, "learning_rate": 8.370084446260129e-06, "loss": 0.3495, "step": 8354 }, { "epoch": 0.28671928620452986, "grad_norm": 0.8060555306252096, "learning_rate": 8.369673892177782e-06, "loss": 0.32, "step": 8355 }, { "epoch": 0.2867536032944406, "grad_norm": 0.8181478490482121, "learning_rate": 8.369263296467046e-06, "loss": 0.3282, "step": 8356 }, { "epoch": 0.2867879203843514, "grad_norm": 0.7018525920755748, "learning_rate": 8.368852659132987e-06, "loss": 0.2807, "step": 8357 }, { "epoch": 0.28682223747426216, "grad_norm": 0.7749413723643113, "learning_rate": 8.368441980180682e-06, "loss": 0.3364, "step": 8358 }, { "epoch": 0.28685655456417297, "grad_norm": 0.7679129272664588, "learning_rate": 8.368031259615204e-06, "loss": 0.3671, "step": 8359 }, { "epoch": 0.2868908716540837, "grad_norm": 0.786481103032411, "learning_rate": 8.367620497441627e-06, "loss": 0.3632, "step": 8360 }, { "epoch": 0.28692518874399453, "grad_norm": 0.7512727667107283, "learning_rate": 8.367209693665023e-06, "loss": 0.2773, "step": 8361 }, { "epoch": 0.2869595058339053, "grad_norm": 0.7257829049356941, "learning_rate": 8.36679884829047e-06, "loss": 0.3102, "step": 8362 }, { "epoch": 0.28699382292381603, "grad_norm": 0.7863400167516698, "learning_rate": 8.366387961323043e-06, "loss": 0.304, "step": 8363 }, { "epoch": 0.28702814001372684, "grad_norm": 0.7591370142340227, "learning_rate": 8.365977032767816e-06, "loss": 0.2846, "step": 8364 }, { "epoch": 0.2870624571036376, "grad_norm": 0.8412905305709621, "learning_rate": 8.365566062629869e-06, "loss": 0.2763, "step": 8365 }, { "epoch": 0.2870967741935484, "grad_norm": 0.7218491796151898, "learning_rate": 8.365155050914276e-06, "loss": 0.3301, "step": 8366 }, { "epoch": 0.28713109128345915, "grad_norm": 0.9225109988509778, "learning_rate": 8.364743997626117e-06, "loss": 0.3315, "step": 8367 }, { "epoch": 0.28716540837336996, "grad_norm": 0.7899142360037072, "learning_rate": 8.364332902770466e-06, "loss": 0.2929, "step": 8368 }, { "epoch": 0.2871997254632807, "grad_norm": 0.8387790049988433, "learning_rate": 8.363921766352407e-06, "loss": 0.2817, "step": 8369 }, { "epoch": 0.2872340425531915, "grad_norm": 0.7105365985160021, "learning_rate": 8.363510588377018e-06, "loss": 0.3665, "step": 8370 }, { "epoch": 0.28726835964310227, "grad_norm": 0.8562357827492565, "learning_rate": 8.363099368849374e-06, "loss": 0.3555, "step": 8371 }, { "epoch": 0.287302676733013, "grad_norm": 0.7602721141806191, "learning_rate": 8.362688107774559e-06, "loss": 0.3089, "step": 8372 }, { "epoch": 0.2873369938229238, "grad_norm": 0.7838741057301382, "learning_rate": 8.362276805157655e-06, "loss": 0.3406, "step": 8373 }, { "epoch": 0.2873713109128346, "grad_norm": 0.7881400218810785, "learning_rate": 8.361865461003737e-06, "loss": 0.312, "step": 8374 }, { "epoch": 0.2874056280027454, "grad_norm": 0.7924737854873434, "learning_rate": 8.361454075317894e-06, "loss": 0.3594, "step": 8375 }, { "epoch": 0.28743994509265614, "grad_norm": 0.7363750392864982, "learning_rate": 8.361042648105205e-06, "loss": 0.3144, "step": 8376 }, { "epoch": 0.28747426218256694, "grad_norm": 0.8415065887658443, "learning_rate": 8.36063117937075e-06, "loss": 0.3248, "step": 8377 }, { "epoch": 0.2875085792724777, "grad_norm": 0.728023597999046, "learning_rate": 8.360219669119618e-06, "loss": 0.2735, "step": 8378 }, { "epoch": 0.28754289636238844, "grad_norm": 0.7345935515282529, "learning_rate": 8.359808117356889e-06, "loss": 0.3277, "step": 8379 }, { "epoch": 0.28757721345229925, "grad_norm": 0.7916266539740685, "learning_rate": 8.359396524087646e-06, "loss": 0.3205, "step": 8380 }, { "epoch": 0.28761153054221, "grad_norm": 0.8528074388754441, "learning_rate": 8.358984889316976e-06, "loss": 0.34, "step": 8381 }, { "epoch": 0.2876458476321208, "grad_norm": 0.7488688964328372, "learning_rate": 8.358573213049965e-06, "loss": 0.328, "step": 8382 }, { "epoch": 0.28768016472203156, "grad_norm": 0.8173609675763315, "learning_rate": 8.358161495291698e-06, "loss": 0.3587, "step": 8383 }, { "epoch": 0.28771448181194237, "grad_norm": 0.6980523424097076, "learning_rate": 8.35774973604726e-06, "loss": 0.2698, "step": 8384 }, { "epoch": 0.2877487989018531, "grad_norm": 0.850877349997577, "learning_rate": 8.35733793532174e-06, "loss": 0.3727, "step": 8385 }, { "epoch": 0.28778311599176387, "grad_norm": 0.8241248642568904, "learning_rate": 8.356926093120223e-06, "loss": 0.3041, "step": 8386 }, { "epoch": 0.2878174330816747, "grad_norm": 0.8119857350155962, "learning_rate": 8.356514209447799e-06, "loss": 0.3689, "step": 8387 }, { "epoch": 0.28785175017158543, "grad_norm": 0.7231265769671331, "learning_rate": 8.356102284309553e-06, "loss": 0.262, "step": 8388 }, { "epoch": 0.28788606726149624, "grad_norm": 0.9089360994925944, "learning_rate": 8.355690317710579e-06, "loss": 0.3496, "step": 8389 }, { "epoch": 0.287920384351407, "grad_norm": 0.7184226749081276, "learning_rate": 8.355278309655961e-06, "loss": 0.2622, "step": 8390 }, { "epoch": 0.2879547014413178, "grad_norm": 0.7945284366693053, "learning_rate": 8.354866260150793e-06, "loss": 0.2829, "step": 8391 }, { "epoch": 0.28798901853122855, "grad_norm": 0.8162272727288624, "learning_rate": 8.354454169200163e-06, "loss": 0.3153, "step": 8392 }, { "epoch": 0.28802333562113935, "grad_norm": 0.8119579911777391, "learning_rate": 8.354042036809162e-06, "loss": 0.2963, "step": 8393 }, { "epoch": 0.2880576527110501, "grad_norm": 0.8864196729528697, "learning_rate": 8.353629862982884e-06, "loss": 0.2936, "step": 8394 }, { "epoch": 0.28809196980096086, "grad_norm": 0.7685719678053075, "learning_rate": 8.353217647726416e-06, "loss": 0.2956, "step": 8395 }, { "epoch": 0.28812628689087166, "grad_norm": 0.8090695275496161, "learning_rate": 8.352805391044855e-06, "loss": 0.3035, "step": 8396 }, { "epoch": 0.2881606039807824, "grad_norm": 0.7884854680896849, "learning_rate": 8.352393092943295e-06, "loss": 0.3709, "step": 8397 }, { "epoch": 0.2881949210706932, "grad_norm": 0.7153289661447706, "learning_rate": 8.351980753426822e-06, "loss": 0.3261, "step": 8398 }, { "epoch": 0.288229238160604, "grad_norm": 0.8327769681181226, "learning_rate": 8.35156837250054e-06, "loss": 0.3182, "step": 8399 }, { "epoch": 0.2882635552505148, "grad_norm": 0.7855906298618727, "learning_rate": 8.351155950169533e-06, "loss": 0.3413, "step": 8400 }, { "epoch": 0.28829787234042553, "grad_norm": 0.675322808477657, "learning_rate": 8.350743486438906e-06, "loss": 0.297, "step": 8401 }, { "epoch": 0.2883321894303363, "grad_norm": 0.7759865051571025, "learning_rate": 8.350330981313749e-06, "loss": 0.2992, "step": 8402 }, { "epoch": 0.2883665065202471, "grad_norm": 0.7877922499696348, "learning_rate": 8.349918434799156e-06, "loss": 0.3272, "step": 8403 }, { "epoch": 0.28840082361015784, "grad_norm": 0.6990710053415743, "learning_rate": 8.349505846900228e-06, "loss": 0.3025, "step": 8404 }, { "epoch": 0.28843514070006865, "grad_norm": 0.7670117408519458, "learning_rate": 8.349093217622062e-06, "loss": 0.3107, "step": 8405 }, { "epoch": 0.2884694577899794, "grad_norm": 0.8098673072571361, "learning_rate": 8.34868054696975e-06, "loss": 0.3256, "step": 8406 }, { "epoch": 0.2885037748798902, "grad_norm": 0.7291185216779206, "learning_rate": 8.348267834948397e-06, "loss": 0.3274, "step": 8407 }, { "epoch": 0.28853809196980096, "grad_norm": 0.8651754961394896, "learning_rate": 8.347855081563098e-06, "loss": 0.3482, "step": 8408 }, { "epoch": 0.2885724090597117, "grad_norm": 0.7942368539899887, "learning_rate": 8.347442286818951e-06, "loss": 0.319, "step": 8409 }, { "epoch": 0.2886067261496225, "grad_norm": 0.8412810408022551, "learning_rate": 8.34702945072106e-06, "loss": 0.3369, "step": 8410 }, { "epoch": 0.28864104323953327, "grad_norm": 0.7710947749843416, "learning_rate": 8.346616573274522e-06, "loss": 0.3384, "step": 8411 }, { "epoch": 0.2886753603294441, "grad_norm": 0.7161452653614335, "learning_rate": 8.346203654484437e-06, "loss": 0.297, "step": 8412 }, { "epoch": 0.2887096774193548, "grad_norm": 0.7967806902797179, "learning_rate": 8.345790694355907e-06, "loss": 0.2951, "step": 8413 }, { "epoch": 0.28874399450926563, "grad_norm": 0.9594403609373361, "learning_rate": 8.345377692894034e-06, "loss": 0.2894, "step": 8414 }, { "epoch": 0.2887783115991764, "grad_norm": 0.8562096836516668, "learning_rate": 8.344964650103922e-06, "loss": 0.3246, "step": 8415 }, { "epoch": 0.2888126286890872, "grad_norm": 0.9280560468202017, "learning_rate": 8.344551565990669e-06, "loss": 0.3403, "step": 8416 }, { "epoch": 0.28884694577899794, "grad_norm": 1.026939951653826, "learning_rate": 8.344138440559382e-06, "loss": 0.2956, "step": 8417 }, { "epoch": 0.2888812628689087, "grad_norm": 0.7752143853989566, "learning_rate": 8.343725273815162e-06, "loss": 0.2942, "step": 8418 }, { "epoch": 0.2889155799588195, "grad_norm": 0.8544443138786798, "learning_rate": 8.343312065763116e-06, "loss": 0.337, "step": 8419 }, { "epoch": 0.28894989704873025, "grad_norm": 0.7711539551058466, "learning_rate": 8.342898816408346e-06, "loss": 0.3359, "step": 8420 }, { "epoch": 0.28898421413864106, "grad_norm": 0.732431379941473, "learning_rate": 8.342485525755962e-06, "loss": 0.2915, "step": 8421 }, { "epoch": 0.2890185312285518, "grad_norm": 0.7901014736280093, "learning_rate": 8.342072193811063e-06, "loss": 0.2567, "step": 8422 }, { "epoch": 0.2890528483184626, "grad_norm": 0.8307290036572874, "learning_rate": 8.341658820578758e-06, "loss": 0.3153, "step": 8423 }, { "epoch": 0.28908716540837337, "grad_norm": 0.8127394470804475, "learning_rate": 8.341245406064156e-06, "loss": 0.3774, "step": 8424 }, { "epoch": 0.2891214824982841, "grad_norm": 0.7435778878635879, "learning_rate": 8.340831950272363e-06, "loss": 0.3138, "step": 8425 }, { "epoch": 0.28915579958819493, "grad_norm": 0.7493744762996496, "learning_rate": 8.340418453208485e-06, "loss": 0.3032, "step": 8426 }, { "epoch": 0.2891901166781057, "grad_norm": 0.7760174012991181, "learning_rate": 8.340004914877632e-06, "loss": 0.3562, "step": 8427 }, { "epoch": 0.2892244337680165, "grad_norm": 0.7518457434204654, "learning_rate": 8.339591335284913e-06, "loss": 0.3223, "step": 8428 }, { "epoch": 0.28925875085792724, "grad_norm": 0.7769765067484808, "learning_rate": 8.339177714435435e-06, "loss": 0.2846, "step": 8429 }, { "epoch": 0.28929306794783805, "grad_norm": 0.7375510574396784, "learning_rate": 8.338764052334312e-06, "loss": 0.3063, "step": 8430 }, { "epoch": 0.2893273850377488, "grad_norm": 0.8354757283153389, "learning_rate": 8.33835034898665e-06, "loss": 0.3401, "step": 8431 }, { "epoch": 0.28936170212765955, "grad_norm": 0.7597130292149921, "learning_rate": 8.337936604397561e-06, "loss": 0.2441, "step": 8432 }, { "epoch": 0.28939601921757035, "grad_norm": 0.7327261242937424, "learning_rate": 8.337522818572159e-06, "loss": 0.3095, "step": 8433 }, { "epoch": 0.2894303363074811, "grad_norm": 0.7446075226978585, "learning_rate": 8.337108991515552e-06, "loss": 0.3245, "step": 8434 }, { "epoch": 0.2894646533973919, "grad_norm": 0.9877743907921321, "learning_rate": 8.336695123232854e-06, "loss": 0.362, "step": 8435 }, { "epoch": 0.28949897048730266, "grad_norm": 0.85444788761633, "learning_rate": 8.336281213729179e-06, "loss": 0.3275, "step": 8436 }, { "epoch": 0.28953328757721347, "grad_norm": 0.8329842572375674, "learning_rate": 8.335867263009638e-06, "loss": 0.3654, "step": 8437 }, { "epoch": 0.2895676046671242, "grad_norm": 0.6934702867538596, "learning_rate": 8.335453271079347e-06, "loss": 0.2898, "step": 8438 }, { "epoch": 0.28960192175703503, "grad_norm": 0.7242266652479532, "learning_rate": 8.335039237943419e-06, "loss": 0.3586, "step": 8439 }, { "epoch": 0.2896362388469458, "grad_norm": 0.7883461768901153, "learning_rate": 8.334625163606972e-06, "loss": 0.2916, "step": 8440 }, { "epoch": 0.28967055593685653, "grad_norm": 0.7718260291129172, "learning_rate": 8.334211048075116e-06, "loss": 0.321, "step": 8441 }, { "epoch": 0.28970487302676734, "grad_norm": 0.7695652096917784, "learning_rate": 8.333796891352971e-06, "loss": 0.3187, "step": 8442 }, { "epoch": 0.2897391901166781, "grad_norm": 0.7943054846499915, "learning_rate": 8.333382693445653e-06, "loss": 0.278, "step": 8443 }, { "epoch": 0.2897735072065889, "grad_norm": 0.7946457887598114, "learning_rate": 8.332968454358277e-06, "loss": 0.3477, "step": 8444 }, { "epoch": 0.28980782429649965, "grad_norm": 0.8929425199051335, "learning_rate": 8.332554174095962e-06, "loss": 0.3189, "step": 8445 }, { "epoch": 0.28984214138641046, "grad_norm": 0.7641653566941458, "learning_rate": 8.332139852663823e-06, "loss": 0.2803, "step": 8446 }, { "epoch": 0.2898764584763212, "grad_norm": 0.816697774025897, "learning_rate": 8.331725490066985e-06, "loss": 0.288, "step": 8447 }, { "epoch": 0.28991077556623196, "grad_norm": 0.7774462464049279, "learning_rate": 8.331311086310561e-06, "loss": 0.356, "step": 8448 }, { "epoch": 0.28994509265614277, "grad_norm": 0.7761729117848566, "learning_rate": 8.330896641399673e-06, "loss": 0.3391, "step": 8449 }, { "epoch": 0.2899794097460535, "grad_norm": 0.7434221057317791, "learning_rate": 8.33048215533944e-06, "loss": 0.3104, "step": 8450 }, { "epoch": 0.2900137268359643, "grad_norm": 0.7997806172367143, "learning_rate": 8.330067628134983e-06, "loss": 0.3553, "step": 8451 }, { "epoch": 0.2900480439258751, "grad_norm": 0.7879084089635308, "learning_rate": 8.329653059791425e-06, "loss": 0.3741, "step": 8452 }, { "epoch": 0.2900823610157859, "grad_norm": 0.7355698999549883, "learning_rate": 8.329238450313883e-06, "loss": 0.2966, "step": 8453 }, { "epoch": 0.29011667810569663, "grad_norm": 0.8368466438904661, "learning_rate": 8.328823799707482e-06, "loss": 0.3741, "step": 8454 }, { "epoch": 0.2901509951956074, "grad_norm": 0.831034139687722, "learning_rate": 8.328409107977344e-06, "loss": 0.3177, "step": 8455 }, { "epoch": 0.2901853122855182, "grad_norm": 0.8544171865498833, "learning_rate": 8.327994375128592e-06, "loss": 0.3286, "step": 8456 }, { "epoch": 0.29021962937542894, "grad_norm": 0.7634893531435869, "learning_rate": 8.32757960116635e-06, "loss": 0.3429, "step": 8457 }, { "epoch": 0.29025394646533975, "grad_norm": 0.7334550456590878, "learning_rate": 8.32716478609574e-06, "loss": 0.3575, "step": 8458 }, { "epoch": 0.2902882635552505, "grad_norm": 0.7493520654164428, "learning_rate": 8.32674992992189e-06, "loss": 0.2864, "step": 8459 }, { "epoch": 0.2903225806451613, "grad_norm": 0.824005259724091, "learning_rate": 8.326335032649924e-06, "loss": 0.3658, "step": 8460 }, { "epoch": 0.29035689773507206, "grad_norm": 0.765924640356542, "learning_rate": 8.325920094284964e-06, "loss": 0.2865, "step": 8461 }, { "epoch": 0.29039121482498287, "grad_norm": 0.7529600191120177, "learning_rate": 8.32550511483214e-06, "loss": 0.2865, "step": 8462 }, { "epoch": 0.2904255319148936, "grad_norm": 0.8814318978213872, "learning_rate": 8.32509009429658e-06, "loss": 0.3477, "step": 8463 }, { "epoch": 0.29045984900480437, "grad_norm": 0.6748676037559128, "learning_rate": 8.324675032683405e-06, "loss": 0.3277, "step": 8464 }, { "epoch": 0.2904941660947152, "grad_norm": 0.7908646150395066, "learning_rate": 8.324259929997748e-06, "loss": 0.3585, "step": 8465 }, { "epoch": 0.29052848318462593, "grad_norm": 0.8570705193195208, "learning_rate": 8.323844786244735e-06, "loss": 0.2866, "step": 8466 }, { "epoch": 0.29056280027453674, "grad_norm": 0.7145791323216503, "learning_rate": 8.323429601429495e-06, "loss": 0.2922, "step": 8467 }, { "epoch": 0.2905971173644475, "grad_norm": 0.7314857501880825, "learning_rate": 8.323014375557156e-06, "loss": 0.2902, "step": 8468 }, { "epoch": 0.2906314344543583, "grad_norm": 0.799630279740183, "learning_rate": 8.322599108632849e-06, "loss": 0.2719, "step": 8469 }, { "epoch": 0.29066575154426905, "grad_norm": 0.8411126220036774, "learning_rate": 8.322183800661705e-06, "loss": 0.3824, "step": 8470 }, { "epoch": 0.2907000686341798, "grad_norm": 0.6910159473956462, "learning_rate": 8.321768451648853e-06, "loss": 0.3103, "step": 8471 }, { "epoch": 0.2907343857240906, "grad_norm": 0.7393718551917545, "learning_rate": 8.321353061599424e-06, "loss": 0.3085, "step": 8472 }, { "epoch": 0.29076870281400136, "grad_norm": 0.7866978032947132, "learning_rate": 8.32093763051855e-06, "loss": 0.3421, "step": 8473 }, { "epoch": 0.29080301990391216, "grad_norm": 0.704960485902913, "learning_rate": 8.320522158411364e-06, "loss": 0.3198, "step": 8474 }, { "epoch": 0.2908373369938229, "grad_norm": 0.8969509144481375, "learning_rate": 8.320106645282998e-06, "loss": 0.3587, "step": 8475 }, { "epoch": 0.2908716540837337, "grad_norm": 0.7711864737348775, "learning_rate": 8.319691091138585e-06, "loss": 0.2879, "step": 8476 }, { "epoch": 0.2909059711736445, "grad_norm": 0.8309445572504331, "learning_rate": 8.31927549598326e-06, "loss": 0.3144, "step": 8477 }, { "epoch": 0.2909402882635552, "grad_norm": 0.7827532056134958, "learning_rate": 8.318859859822154e-06, "loss": 0.3694, "step": 8478 }, { "epoch": 0.29097460535346603, "grad_norm": 0.7467689532120602, "learning_rate": 8.318444182660406e-06, "loss": 0.319, "step": 8479 }, { "epoch": 0.2910089224433768, "grad_norm": 0.7254054052363298, "learning_rate": 8.318028464503148e-06, "loss": 0.3475, "step": 8480 }, { "epoch": 0.2910432395332876, "grad_norm": 0.7590886729234212, "learning_rate": 8.317612705355517e-06, "loss": 0.2697, "step": 8481 }, { "epoch": 0.29107755662319834, "grad_norm": 0.7879372196059788, "learning_rate": 8.317196905222648e-06, "loss": 0.3972, "step": 8482 }, { "epoch": 0.29111187371310915, "grad_norm": 0.8245588651497124, "learning_rate": 8.31678106410968e-06, "loss": 0.3048, "step": 8483 }, { "epoch": 0.2911461908030199, "grad_norm": 0.6885769368228646, "learning_rate": 8.316365182021749e-06, "loss": 0.307, "step": 8484 }, { "epoch": 0.2911805078929307, "grad_norm": 0.7918639671922474, "learning_rate": 8.315949258963993e-06, "loss": 0.3436, "step": 8485 }, { "epoch": 0.29121482498284146, "grad_norm": 0.7432378703609155, "learning_rate": 8.315533294941548e-06, "loss": 0.2691, "step": 8486 }, { "epoch": 0.2912491420727522, "grad_norm": 0.7653683667255047, "learning_rate": 8.315117289959557e-06, "loss": 0.3895, "step": 8487 }, { "epoch": 0.291283459162663, "grad_norm": 0.8319740204398759, "learning_rate": 8.314701244023156e-06, "loss": 0.2817, "step": 8488 }, { "epoch": 0.29131777625257377, "grad_norm": 0.869679406044003, "learning_rate": 8.314285157137485e-06, "loss": 0.3273, "step": 8489 }, { "epoch": 0.2913520933424846, "grad_norm": 0.7800584443276699, "learning_rate": 8.313869029307686e-06, "loss": 0.2893, "step": 8490 }, { "epoch": 0.2913864104323953, "grad_norm": 0.9116379539678875, "learning_rate": 8.313452860538899e-06, "loss": 0.3533, "step": 8491 }, { "epoch": 0.29142072752230613, "grad_norm": 0.7820684486269743, "learning_rate": 8.313036650836264e-06, "loss": 0.2679, "step": 8492 }, { "epoch": 0.2914550446122169, "grad_norm": 0.7123520254378819, "learning_rate": 8.312620400204923e-06, "loss": 0.3089, "step": 8493 }, { "epoch": 0.29148936170212764, "grad_norm": 0.7279418052767788, "learning_rate": 8.31220410865002e-06, "loss": 0.2688, "step": 8494 }, { "epoch": 0.29152367879203844, "grad_norm": 0.8077557683841454, "learning_rate": 8.311787776176698e-06, "loss": 0.3162, "step": 8495 }, { "epoch": 0.2915579958819492, "grad_norm": 0.7458187583453766, "learning_rate": 8.311371402790098e-06, "loss": 0.2991, "step": 8496 }, { "epoch": 0.29159231297186, "grad_norm": 0.7419947963330454, "learning_rate": 8.310954988495366e-06, "loss": 0.3306, "step": 8497 }, { "epoch": 0.29162663006177075, "grad_norm": 0.7998848995158299, "learning_rate": 8.310538533297644e-06, "loss": 0.2777, "step": 8498 }, { "epoch": 0.29166094715168156, "grad_norm": 0.8972264193549194, "learning_rate": 8.310122037202079e-06, "loss": 0.3762, "step": 8499 }, { "epoch": 0.2916952642415923, "grad_norm": 0.7501963740383082, "learning_rate": 8.309705500213814e-06, "loss": 0.3496, "step": 8500 }, { "epoch": 0.29172958133150306, "grad_norm": 0.7691047583617884, "learning_rate": 8.309288922337999e-06, "loss": 0.3023, "step": 8501 }, { "epoch": 0.29176389842141387, "grad_norm": 0.8297590569400843, "learning_rate": 8.308872303579776e-06, "loss": 0.3179, "step": 8502 }, { "epoch": 0.2917982155113246, "grad_norm": 0.8406001908893714, "learning_rate": 8.308455643944293e-06, "loss": 0.3375, "step": 8503 }, { "epoch": 0.29183253260123543, "grad_norm": 0.8463370545109299, "learning_rate": 8.308038943436697e-06, "loss": 0.2903, "step": 8504 }, { "epoch": 0.2918668496911462, "grad_norm": 0.7523986315450573, "learning_rate": 8.30762220206214e-06, "loss": 0.3355, "step": 8505 }, { "epoch": 0.291901166781057, "grad_norm": 0.732219563053465, "learning_rate": 8.307205419825764e-06, "loss": 0.2899, "step": 8506 }, { "epoch": 0.29193548387096774, "grad_norm": 0.7673314381428173, "learning_rate": 8.306788596732723e-06, "loss": 0.309, "step": 8507 }, { "epoch": 0.29196980096087854, "grad_norm": 0.8196761620356867, "learning_rate": 8.306371732788162e-06, "loss": 0.2768, "step": 8508 }, { "epoch": 0.2920041180507893, "grad_norm": 0.8231429634069426, "learning_rate": 8.305954827997234e-06, "loss": 0.2919, "step": 8509 }, { "epoch": 0.29203843514070005, "grad_norm": 0.8341397202178433, "learning_rate": 8.305537882365088e-06, "loss": 0.3278, "step": 8510 }, { "epoch": 0.29207275223061085, "grad_norm": 0.7842984469012934, "learning_rate": 8.305120895896876e-06, "loss": 0.3471, "step": 8511 }, { "epoch": 0.2921070693205216, "grad_norm": 0.8756931537949102, "learning_rate": 8.30470386859775e-06, "loss": 0.2803, "step": 8512 }, { "epoch": 0.2921413864104324, "grad_norm": 0.7718245428246266, "learning_rate": 8.304286800472858e-06, "loss": 0.3315, "step": 8513 }, { "epoch": 0.29217570350034316, "grad_norm": 0.8241293340781771, "learning_rate": 8.303869691527358e-06, "loss": 0.3395, "step": 8514 }, { "epoch": 0.29221002059025397, "grad_norm": 0.7974514692734068, "learning_rate": 8.303452541766396e-06, "loss": 0.3139, "step": 8515 }, { "epoch": 0.2922443376801647, "grad_norm": 0.7448440019712282, "learning_rate": 8.303035351195133e-06, "loss": 0.3216, "step": 8516 }, { "epoch": 0.2922786547700755, "grad_norm": 0.775105825028006, "learning_rate": 8.302618119818718e-06, "loss": 0.3689, "step": 8517 }, { "epoch": 0.2923129718599863, "grad_norm": 0.806037762459296, "learning_rate": 8.302200847642306e-06, "loss": 0.3377, "step": 8518 }, { "epoch": 0.29234728894989703, "grad_norm": 0.7801059027941268, "learning_rate": 8.301783534671053e-06, "loss": 0.305, "step": 8519 }, { "epoch": 0.29238160603980784, "grad_norm": 0.8787065385407182, "learning_rate": 8.301366180910114e-06, "loss": 0.3444, "step": 8520 }, { "epoch": 0.2924159231297186, "grad_norm": 0.7446841398106485, "learning_rate": 8.300948786364644e-06, "loss": 0.2764, "step": 8521 }, { "epoch": 0.2924502402196294, "grad_norm": 0.7262650697088783, "learning_rate": 8.300531351039802e-06, "loss": 0.3092, "step": 8522 }, { "epoch": 0.29248455730954015, "grad_norm": 0.7410127280799971, "learning_rate": 8.300113874940742e-06, "loss": 0.2708, "step": 8523 }, { "epoch": 0.2925188743994509, "grad_norm": 0.7668527824345788, "learning_rate": 8.299696358072623e-06, "loss": 0.2756, "step": 8524 }, { "epoch": 0.2925531914893617, "grad_norm": 0.736134978666503, "learning_rate": 8.299278800440602e-06, "loss": 0.3323, "step": 8525 }, { "epoch": 0.29258750857927246, "grad_norm": 0.8214689268012966, "learning_rate": 8.298861202049838e-06, "loss": 0.3462, "step": 8526 }, { "epoch": 0.29262182566918327, "grad_norm": 0.8078545338599622, "learning_rate": 8.298443562905492e-06, "loss": 0.2841, "step": 8527 }, { "epoch": 0.292656142759094, "grad_norm": 0.787887057193217, "learning_rate": 8.298025883012719e-06, "loss": 0.3047, "step": 8528 }, { "epoch": 0.2926904598490048, "grad_norm": 0.7988206583198302, "learning_rate": 8.297608162376682e-06, "loss": 0.3989, "step": 8529 }, { "epoch": 0.2927247769389156, "grad_norm": 0.7350284326211135, "learning_rate": 8.297190401002541e-06, "loss": 0.3406, "step": 8530 }, { "epoch": 0.2927590940288264, "grad_norm": 0.7475794958064044, "learning_rate": 8.296772598895457e-06, "loss": 0.3741, "step": 8531 }, { "epoch": 0.29279341111873713, "grad_norm": 0.8163616894234275, "learning_rate": 8.296354756060589e-06, "loss": 0.331, "step": 8532 }, { "epoch": 0.2928277282086479, "grad_norm": 0.8108783036076022, "learning_rate": 8.295936872503102e-06, "loss": 0.3165, "step": 8533 }, { "epoch": 0.2928620452985587, "grad_norm": 0.849055037379737, "learning_rate": 8.295518948228161e-06, "loss": 0.2674, "step": 8534 }, { "epoch": 0.29289636238846944, "grad_norm": 0.8300914917390626, "learning_rate": 8.295100983240922e-06, "loss": 0.2711, "step": 8535 }, { "epoch": 0.29293067947838025, "grad_norm": 0.8529608063912416, "learning_rate": 8.294682977546555e-06, "loss": 0.3683, "step": 8536 }, { "epoch": 0.292964996568291, "grad_norm": 0.9338681591629511, "learning_rate": 8.294264931150218e-06, "loss": 0.276, "step": 8537 }, { "epoch": 0.2929993136582018, "grad_norm": 0.8033231868741936, "learning_rate": 8.29384684405708e-06, "loss": 0.2871, "step": 8538 }, { "epoch": 0.29303363074811256, "grad_norm": 0.8303988808243572, "learning_rate": 8.293428716272303e-06, "loss": 0.3001, "step": 8539 }, { "epoch": 0.2930679478380233, "grad_norm": 0.7319204891707054, "learning_rate": 8.293010547801056e-06, "loss": 0.2746, "step": 8540 }, { "epoch": 0.2931022649279341, "grad_norm": 0.8077980988698101, "learning_rate": 8.292592338648503e-06, "loss": 0.3234, "step": 8541 }, { "epoch": 0.29313658201784487, "grad_norm": 0.8197123746916306, "learning_rate": 8.29217408881981e-06, "loss": 0.3839, "step": 8542 }, { "epoch": 0.2931708991077557, "grad_norm": 0.7302804395394418, "learning_rate": 8.291755798320144e-06, "loss": 0.328, "step": 8543 }, { "epoch": 0.29320521619766643, "grad_norm": 0.957224612843366, "learning_rate": 8.291337467154672e-06, "loss": 0.3494, "step": 8544 }, { "epoch": 0.29323953328757724, "grad_norm": 0.6749175429372354, "learning_rate": 8.290919095328565e-06, "loss": 0.3261, "step": 8545 }, { "epoch": 0.293273850377488, "grad_norm": 0.73029774757413, "learning_rate": 8.290500682846987e-06, "loss": 0.334, "step": 8546 }, { "epoch": 0.29330816746739874, "grad_norm": 0.8837846631567715, "learning_rate": 8.290082229715111e-06, "loss": 0.3314, "step": 8547 }, { "epoch": 0.29334248455730955, "grad_norm": 0.7598597126714067, "learning_rate": 8.289663735938105e-06, "loss": 0.3049, "step": 8548 }, { "epoch": 0.2933768016472203, "grad_norm": 0.7819443906366148, "learning_rate": 8.289245201521139e-06, "loss": 0.2764, "step": 8549 }, { "epoch": 0.2934111187371311, "grad_norm": 0.8078133246982115, "learning_rate": 8.288826626469383e-06, "loss": 0.2985, "step": 8550 }, { "epoch": 0.29344543582704186, "grad_norm": 0.7550035587913397, "learning_rate": 8.28840801078801e-06, "loss": 0.2989, "step": 8551 }, { "epoch": 0.29347975291695266, "grad_norm": 0.7862861257067418, "learning_rate": 8.287989354482186e-06, "loss": 0.3004, "step": 8552 }, { "epoch": 0.2935140700068634, "grad_norm": 0.8371955628119641, "learning_rate": 8.287570657557091e-06, "loss": 0.3204, "step": 8553 }, { "epoch": 0.29354838709677417, "grad_norm": 0.7512183053968398, "learning_rate": 8.28715192001789e-06, "loss": 0.3344, "step": 8554 }, { "epoch": 0.293582704186685, "grad_norm": 1.3196183270640962, "learning_rate": 8.286733141869764e-06, "loss": 0.2639, "step": 8555 }, { "epoch": 0.2936170212765957, "grad_norm": 0.7872762030600283, "learning_rate": 8.286314323117877e-06, "loss": 0.3357, "step": 8556 }, { "epoch": 0.29365133836650653, "grad_norm": 0.8535979727162473, "learning_rate": 8.285895463767411e-06, "loss": 0.3232, "step": 8557 }, { "epoch": 0.2936856554564173, "grad_norm": 0.9331646760087982, "learning_rate": 8.285476563823537e-06, "loss": 0.3333, "step": 8558 }, { "epoch": 0.2937199725463281, "grad_norm": 0.8152378305053427, "learning_rate": 8.28505762329143e-06, "loss": 0.3673, "step": 8559 }, { "epoch": 0.29375428963623884, "grad_norm": 0.822109713870264, "learning_rate": 8.284638642176266e-06, "loss": 0.2879, "step": 8560 }, { "epoch": 0.29378860672614965, "grad_norm": 0.7326467368653742, "learning_rate": 8.284219620483223e-06, "loss": 0.3051, "step": 8561 }, { "epoch": 0.2938229238160604, "grad_norm": 0.7128501095991776, "learning_rate": 8.283800558217474e-06, "loss": 0.2804, "step": 8562 }, { "epoch": 0.29385724090597115, "grad_norm": 0.7297907233382275, "learning_rate": 8.283381455384198e-06, "loss": 0.2989, "step": 8563 }, { "epoch": 0.29389155799588196, "grad_norm": 0.7181877758166864, "learning_rate": 8.282962311988572e-06, "loss": 0.3195, "step": 8564 }, { "epoch": 0.2939258750857927, "grad_norm": 0.8190808329997615, "learning_rate": 8.282543128035774e-06, "loss": 0.3236, "step": 8565 }, { "epoch": 0.2939601921757035, "grad_norm": 0.7009939435804822, "learning_rate": 8.282123903530983e-06, "loss": 0.2938, "step": 8566 }, { "epoch": 0.29399450926561427, "grad_norm": 0.7536390665597005, "learning_rate": 8.281704638479378e-06, "loss": 0.3766, "step": 8567 }, { "epoch": 0.2940288263555251, "grad_norm": 0.8052356495463616, "learning_rate": 8.281285332886137e-06, "loss": 0.3259, "step": 8568 }, { "epoch": 0.2940631434454358, "grad_norm": 0.8167566938851225, "learning_rate": 8.280865986756443e-06, "loss": 0.3561, "step": 8569 }, { "epoch": 0.2940974605353466, "grad_norm": 0.7770920484020002, "learning_rate": 8.280446600095472e-06, "loss": 0.3569, "step": 8570 }, { "epoch": 0.2941317776252574, "grad_norm": 0.7925743640071315, "learning_rate": 8.280027172908411e-06, "loss": 0.3067, "step": 8571 }, { "epoch": 0.29416609471516814, "grad_norm": 0.7580331519812872, "learning_rate": 8.279607705200438e-06, "loss": 0.2907, "step": 8572 }, { "epoch": 0.29420041180507894, "grad_norm": 0.8524457696665981, "learning_rate": 8.279188196976735e-06, "loss": 0.3346, "step": 8573 }, { "epoch": 0.2942347288949897, "grad_norm": 0.7825404136747832, "learning_rate": 8.278768648242485e-06, "loss": 0.2777, "step": 8574 }, { "epoch": 0.2942690459849005, "grad_norm": 0.694524364386642, "learning_rate": 8.27834905900287e-06, "loss": 0.3198, "step": 8575 }, { "epoch": 0.29430336307481125, "grad_norm": 0.8443096934273874, "learning_rate": 8.277929429263076e-06, "loss": 0.3645, "step": 8576 }, { "epoch": 0.294337680164722, "grad_norm": 0.7753975063600833, "learning_rate": 8.277509759028285e-06, "loss": 0.3264, "step": 8577 }, { "epoch": 0.2943719972546328, "grad_norm": 0.7148979065723603, "learning_rate": 8.277090048303682e-06, "loss": 0.2845, "step": 8578 }, { "epoch": 0.29440631434454356, "grad_norm": 0.6913380293154616, "learning_rate": 8.276670297094453e-06, "loss": 0.283, "step": 8579 }, { "epoch": 0.29444063143445437, "grad_norm": 0.7215602906899363, "learning_rate": 8.276250505405781e-06, "loss": 0.3122, "step": 8580 }, { "epoch": 0.2944749485243651, "grad_norm": 0.7677087120718247, "learning_rate": 8.275830673242856e-06, "loss": 0.4105, "step": 8581 }, { "epoch": 0.29450926561427593, "grad_norm": 0.7248830952477577, "learning_rate": 8.275410800610862e-06, "loss": 0.3223, "step": 8582 }, { "epoch": 0.2945435827041867, "grad_norm": 0.8174640709772996, "learning_rate": 8.274990887514985e-06, "loss": 0.3661, "step": 8583 }, { "epoch": 0.2945778997940975, "grad_norm": 0.8167299845194051, "learning_rate": 8.274570933960413e-06, "loss": 0.3173, "step": 8584 }, { "epoch": 0.29461221688400824, "grad_norm": 0.8285530894043871, "learning_rate": 8.274150939952337e-06, "loss": 0.3095, "step": 8585 }, { "epoch": 0.294646533973919, "grad_norm": 0.8672181733405542, "learning_rate": 8.273730905495943e-06, "loss": 0.3416, "step": 8586 }, { "epoch": 0.2946808510638298, "grad_norm": 0.7874503324892037, "learning_rate": 8.27331083059642e-06, "loss": 0.3051, "step": 8587 }, { "epoch": 0.29471516815374055, "grad_norm": 0.7626184557556702, "learning_rate": 8.272890715258957e-06, "loss": 0.3225, "step": 8588 }, { "epoch": 0.29474948524365135, "grad_norm": 0.6996708660680651, "learning_rate": 8.272470559488745e-06, "loss": 0.3087, "step": 8589 }, { "epoch": 0.2947838023335621, "grad_norm": 0.8240940370842812, "learning_rate": 8.272050363290976e-06, "loss": 0.2968, "step": 8590 }, { "epoch": 0.2948181194234729, "grad_norm": 0.8486762222547733, "learning_rate": 8.271630126670839e-06, "loss": 0.2996, "step": 8591 }, { "epoch": 0.29485243651338366, "grad_norm": 0.8770060725329871, "learning_rate": 8.271209849633527e-06, "loss": 0.3134, "step": 8592 }, { "epoch": 0.2948867536032944, "grad_norm": 0.7517776180391352, "learning_rate": 8.270789532184232e-06, "loss": 0.2741, "step": 8593 }, { "epoch": 0.2949210706932052, "grad_norm": 0.7633871483178559, "learning_rate": 8.270369174328144e-06, "loss": 0.332, "step": 8594 }, { "epoch": 0.294955387783116, "grad_norm": 0.7319464823918491, "learning_rate": 8.269948776070458e-06, "loss": 0.301, "step": 8595 }, { "epoch": 0.2949897048730268, "grad_norm": 0.7741796909915092, "learning_rate": 8.269528337416367e-06, "loss": 0.2731, "step": 8596 }, { "epoch": 0.29502402196293753, "grad_norm": 0.7460407122057084, "learning_rate": 8.269107858371065e-06, "loss": 0.258, "step": 8597 }, { "epoch": 0.29505833905284834, "grad_norm": 0.8374174544377337, "learning_rate": 8.268687338939746e-06, "loss": 0.3246, "step": 8598 }, { "epoch": 0.2950926561427591, "grad_norm": 0.7627707930994233, "learning_rate": 8.268266779127609e-06, "loss": 0.2906, "step": 8599 }, { "epoch": 0.29512697323266984, "grad_norm": 0.7849295227472833, "learning_rate": 8.267846178939845e-06, "loss": 0.3092, "step": 8600 }, { "epoch": 0.29516129032258065, "grad_norm": 0.7954504896673804, "learning_rate": 8.267425538381651e-06, "loss": 0.2843, "step": 8601 }, { "epoch": 0.2951956074124914, "grad_norm": 0.764063505167974, "learning_rate": 8.267004857458224e-06, "loss": 0.301, "step": 8602 }, { "epoch": 0.2952299245024022, "grad_norm": 0.7717582329468952, "learning_rate": 8.26658413617476e-06, "loss": 0.3519, "step": 8603 }, { "epoch": 0.29526424159231296, "grad_norm": 0.8458403279654985, "learning_rate": 8.266163374536457e-06, "loss": 0.3366, "step": 8604 }, { "epoch": 0.29529855868222377, "grad_norm": 0.8464975464525559, "learning_rate": 8.265742572548517e-06, "loss": 0.3293, "step": 8605 }, { "epoch": 0.2953328757721345, "grad_norm": 0.6977874209556303, "learning_rate": 8.26532173021613e-06, "loss": 0.3029, "step": 8606 }, { "epoch": 0.2953671928620453, "grad_norm": 0.745771792725253, "learning_rate": 8.264900847544505e-06, "loss": 0.3286, "step": 8607 }, { "epoch": 0.2954015099519561, "grad_norm": 0.8292437397883579, "learning_rate": 8.264479924538833e-06, "loss": 0.3281, "step": 8608 }, { "epoch": 0.2954358270418668, "grad_norm": 0.8105677506625297, "learning_rate": 8.26405896120432e-06, "loss": 0.2671, "step": 8609 }, { "epoch": 0.29547014413177763, "grad_norm": 0.7706950173355689, "learning_rate": 8.263637957546162e-06, "loss": 0.3151, "step": 8610 }, { "epoch": 0.2955044612216884, "grad_norm": 0.7744778074497003, "learning_rate": 8.263216913569563e-06, "loss": 0.3247, "step": 8611 }, { "epoch": 0.2955387783115992, "grad_norm": 0.7866444055873417, "learning_rate": 8.262795829279724e-06, "loss": 0.2702, "step": 8612 }, { "epoch": 0.29557309540150994, "grad_norm": 0.7799981388376243, "learning_rate": 8.262374704681847e-06, "loss": 0.3122, "step": 8613 }, { "epoch": 0.29560741249142075, "grad_norm": 0.7304262251530783, "learning_rate": 8.261953539781134e-06, "loss": 0.2921, "step": 8614 }, { "epoch": 0.2956417295813315, "grad_norm": 0.7418109709132025, "learning_rate": 8.261532334582786e-06, "loss": 0.267, "step": 8615 }, { "epoch": 0.29567604667124225, "grad_norm": 0.7643528640428572, "learning_rate": 8.26111108909201e-06, "loss": 0.2659, "step": 8616 }, { "epoch": 0.29571036376115306, "grad_norm": 0.7090263047562334, "learning_rate": 8.260689803314008e-06, "loss": 0.3298, "step": 8617 }, { "epoch": 0.2957446808510638, "grad_norm": 0.7000124807972826, "learning_rate": 8.260268477253986e-06, "loss": 0.2582, "step": 8618 }, { "epoch": 0.2957789979409746, "grad_norm": 0.7305418666592733, "learning_rate": 8.259847110917147e-06, "loss": 0.3246, "step": 8619 }, { "epoch": 0.29581331503088537, "grad_norm": 0.7085267079863291, "learning_rate": 8.2594257043087e-06, "loss": 0.2537, "step": 8620 }, { "epoch": 0.2958476321207962, "grad_norm": 0.832853874882523, "learning_rate": 8.259004257433844e-06, "loss": 0.3155, "step": 8621 }, { "epoch": 0.29588194921070693, "grad_norm": 0.6969958813050325, "learning_rate": 8.258582770297793e-06, "loss": 0.3102, "step": 8622 }, { "epoch": 0.2959162663006177, "grad_norm": 0.7632232944939057, "learning_rate": 8.258161242905752e-06, "loss": 0.3262, "step": 8623 }, { "epoch": 0.2959505833905285, "grad_norm": 0.7404558929614008, "learning_rate": 8.257739675262926e-06, "loss": 0.3104, "step": 8624 }, { "epoch": 0.29598490048043924, "grad_norm": 0.884302949772273, "learning_rate": 8.257318067374524e-06, "loss": 0.3315, "step": 8625 }, { "epoch": 0.29601921757035005, "grad_norm": 0.8923678785426077, "learning_rate": 8.256896419245757e-06, "loss": 0.3745, "step": 8626 }, { "epoch": 0.2960535346602608, "grad_norm": 0.7887771207211889, "learning_rate": 8.25647473088183e-06, "loss": 0.2845, "step": 8627 }, { "epoch": 0.2960878517501716, "grad_norm": 0.8115091499328944, "learning_rate": 8.256053002287955e-06, "loss": 0.3069, "step": 8628 }, { "epoch": 0.29612216884008236, "grad_norm": 0.7858801245996184, "learning_rate": 8.25563123346934e-06, "loss": 0.3116, "step": 8629 }, { "epoch": 0.29615648592999316, "grad_norm": 0.7408129104643535, "learning_rate": 8.2552094244312e-06, "loss": 0.3004, "step": 8630 }, { "epoch": 0.2961908030199039, "grad_norm": 0.8253236788147896, "learning_rate": 8.25478757517874e-06, "loss": 0.3129, "step": 8631 }, { "epoch": 0.29622512010981467, "grad_norm": 0.7312763584336616, "learning_rate": 8.254365685717177e-06, "loss": 0.3319, "step": 8632 }, { "epoch": 0.2962594371997255, "grad_norm": 0.7470745400434721, "learning_rate": 8.253943756051717e-06, "loss": 0.2582, "step": 8633 }, { "epoch": 0.2962937542896362, "grad_norm": 0.6794969043483944, "learning_rate": 8.253521786187578e-06, "loss": 0.2985, "step": 8634 }, { "epoch": 0.29632807137954703, "grad_norm": 0.6821518313427204, "learning_rate": 8.253099776129969e-06, "loss": 0.3706, "step": 8635 }, { "epoch": 0.2963623884694578, "grad_norm": 0.8309918682115416, "learning_rate": 8.252677725884105e-06, "loss": 0.2959, "step": 8636 }, { "epoch": 0.2963967055593686, "grad_norm": 0.7609006071575076, "learning_rate": 8.2522556354552e-06, "loss": 0.3662, "step": 8637 }, { "epoch": 0.29643102264927934, "grad_norm": 0.8107183758859946, "learning_rate": 8.25183350484847e-06, "loss": 0.3326, "step": 8638 }, { "epoch": 0.2964653397391901, "grad_norm": 0.7459907337376035, "learning_rate": 8.251411334069126e-06, "loss": 0.3112, "step": 8639 }, { "epoch": 0.2964996568291009, "grad_norm": 0.7598777658720482, "learning_rate": 8.250989123122388e-06, "loss": 0.3086, "step": 8640 }, { "epoch": 0.29653397391901165, "grad_norm": 0.8067124729552052, "learning_rate": 8.250566872013467e-06, "loss": 0.3295, "step": 8641 }, { "epoch": 0.29656829100892246, "grad_norm": 0.7101146401302448, "learning_rate": 8.250144580747584e-06, "loss": 0.2757, "step": 8642 }, { "epoch": 0.2966026080988332, "grad_norm": 0.7573040771289029, "learning_rate": 8.249722249329954e-06, "loss": 0.3029, "step": 8643 }, { "epoch": 0.296636925188744, "grad_norm": 0.7772500653911678, "learning_rate": 8.249299877765794e-06, "loss": 0.3177, "step": 8644 }, { "epoch": 0.29667124227865477, "grad_norm": 0.7299438159386449, "learning_rate": 8.248877466060323e-06, "loss": 0.2969, "step": 8645 }, { "epoch": 0.2967055593685655, "grad_norm": 0.7340177283431897, "learning_rate": 8.248455014218759e-06, "loss": 0.3167, "step": 8646 }, { "epoch": 0.2967398764584763, "grad_norm": 0.8017081239423307, "learning_rate": 8.24803252224632e-06, "loss": 0.3222, "step": 8647 }, { "epoch": 0.2967741935483871, "grad_norm": 0.7335179566124213, "learning_rate": 8.247609990148224e-06, "loss": 0.3164, "step": 8648 }, { "epoch": 0.2968085106382979, "grad_norm": 0.7252758033603298, "learning_rate": 8.247187417929696e-06, "loss": 0.285, "step": 8649 }, { "epoch": 0.29684282772820864, "grad_norm": 0.6821709770825016, "learning_rate": 8.24676480559595e-06, "loss": 0.2895, "step": 8650 }, { "epoch": 0.29687714481811944, "grad_norm": 0.7482399640154981, "learning_rate": 8.246342153152214e-06, "loss": 0.33, "step": 8651 }, { "epoch": 0.2969114619080302, "grad_norm": 0.7941314088128771, "learning_rate": 8.245919460603703e-06, "loss": 0.2957, "step": 8652 }, { "epoch": 0.296945778997941, "grad_norm": 0.7703092584981881, "learning_rate": 8.245496727955643e-06, "loss": 0.3233, "step": 8653 }, { "epoch": 0.29698009608785175, "grad_norm": 0.7754739828109103, "learning_rate": 8.245073955213254e-06, "loss": 0.3026, "step": 8654 }, { "epoch": 0.2970144131777625, "grad_norm": 0.8410875272573467, "learning_rate": 8.24465114238176e-06, "loss": 0.32, "step": 8655 }, { "epoch": 0.2970487302676733, "grad_norm": 0.7485376437753654, "learning_rate": 8.244228289466382e-06, "loss": 0.3035, "step": 8656 }, { "epoch": 0.29708304735758406, "grad_norm": 0.7076312110138084, "learning_rate": 8.243805396472349e-06, "loss": 0.277, "step": 8657 }, { "epoch": 0.29711736444749487, "grad_norm": 0.746022699496641, "learning_rate": 8.24338246340488e-06, "loss": 0.3001, "step": 8658 }, { "epoch": 0.2971516815374056, "grad_norm": 0.688839879904209, "learning_rate": 8.242959490269204e-06, "loss": 0.3202, "step": 8659 }, { "epoch": 0.29718599862731643, "grad_norm": 0.8401929723145538, "learning_rate": 8.242536477070541e-06, "loss": 0.3269, "step": 8660 }, { "epoch": 0.2972203157172272, "grad_norm": 0.7392173097689549, "learning_rate": 8.242113423814124e-06, "loss": 0.3302, "step": 8661 }, { "epoch": 0.29725463280713793, "grad_norm": 0.839913371223718, "learning_rate": 8.241690330505173e-06, "loss": 0.3651, "step": 8662 }, { "epoch": 0.29728894989704874, "grad_norm": 0.736712765582071, "learning_rate": 8.24126719714892e-06, "loss": 0.2868, "step": 8663 }, { "epoch": 0.2973232669869595, "grad_norm": 0.8998914033981225, "learning_rate": 8.240844023750588e-06, "loss": 0.3399, "step": 8664 }, { "epoch": 0.2973575840768703, "grad_norm": 0.8166352420148069, "learning_rate": 8.240420810315407e-06, "loss": 0.2922, "step": 8665 }, { "epoch": 0.29739190116678105, "grad_norm": 0.9251030417493318, "learning_rate": 8.239997556848605e-06, "loss": 0.3028, "step": 8666 }, { "epoch": 0.29742621825669185, "grad_norm": 0.8511538460137603, "learning_rate": 8.23957426335541e-06, "loss": 0.2921, "step": 8667 }, { "epoch": 0.2974605353466026, "grad_norm": 0.7994006077226677, "learning_rate": 8.239150929841054e-06, "loss": 0.3542, "step": 8668 }, { "epoch": 0.29749485243651336, "grad_norm": 1.0197614237828614, "learning_rate": 8.238727556310762e-06, "loss": 0.3376, "step": 8669 }, { "epoch": 0.29752916952642416, "grad_norm": 0.7907536087905878, "learning_rate": 8.238304142769768e-06, "loss": 0.273, "step": 8670 }, { "epoch": 0.2975634866163349, "grad_norm": 0.7698560032505916, "learning_rate": 8.2378806892233e-06, "loss": 0.3775, "step": 8671 }, { "epoch": 0.2975978037062457, "grad_norm": 0.7431499631994041, "learning_rate": 8.237457195676594e-06, "loss": 0.2989, "step": 8672 }, { "epoch": 0.2976321207961565, "grad_norm": 0.7291134565510708, "learning_rate": 8.237033662134879e-06, "loss": 0.286, "step": 8673 }, { "epoch": 0.2976664378860673, "grad_norm": 0.762634568771628, "learning_rate": 8.236610088603386e-06, "loss": 0.3136, "step": 8674 }, { "epoch": 0.29770075497597803, "grad_norm": 0.7658247319522901, "learning_rate": 8.236186475087349e-06, "loss": 0.3158, "step": 8675 }, { "epoch": 0.29773507206588884, "grad_norm": 0.7255457974492704, "learning_rate": 8.235762821591999e-06, "loss": 0.3106, "step": 8676 }, { "epoch": 0.2977693891557996, "grad_norm": 0.8449664459742354, "learning_rate": 8.235339128122575e-06, "loss": 0.352, "step": 8677 }, { "epoch": 0.29780370624571034, "grad_norm": 0.7683006447720845, "learning_rate": 8.234915394684308e-06, "loss": 0.32, "step": 8678 }, { "epoch": 0.29783802333562115, "grad_norm": 0.8230208695029999, "learning_rate": 8.234491621282432e-06, "loss": 0.339, "step": 8679 }, { "epoch": 0.2978723404255319, "grad_norm": 0.8241186386817189, "learning_rate": 8.234067807922181e-06, "loss": 0.3508, "step": 8680 }, { "epoch": 0.2979066575154427, "grad_norm": 0.7287555630043231, "learning_rate": 8.233643954608796e-06, "loss": 0.3172, "step": 8681 }, { "epoch": 0.29794097460535346, "grad_norm": 0.7452870465510538, "learning_rate": 8.233220061347509e-06, "loss": 0.3323, "step": 8682 }, { "epoch": 0.29797529169526427, "grad_norm": 0.7104380261861549, "learning_rate": 8.232796128143557e-06, "loss": 0.2966, "step": 8683 }, { "epoch": 0.298009608785175, "grad_norm": 0.783485824612664, "learning_rate": 8.232372155002179e-06, "loss": 0.3295, "step": 8684 }, { "epoch": 0.29804392587508577, "grad_norm": 0.7617462335872506, "learning_rate": 8.23194814192861e-06, "loss": 0.3103, "step": 8685 }, { "epoch": 0.2980782429649966, "grad_norm": 0.7737817878044847, "learning_rate": 8.231524088928091e-06, "loss": 0.2933, "step": 8686 }, { "epoch": 0.2981125600549073, "grad_norm": 0.7736187464541083, "learning_rate": 8.231099996005858e-06, "loss": 0.3843, "step": 8687 }, { "epoch": 0.29814687714481813, "grad_norm": 0.7010550678165609, "learning_rate": 8.230675863167155e-06, "loss": 0.3162, "step": 8688 }, { "epoch": 0.2981811942347289, "grad_norm": 0.7793196968231669, "learning_rate": 8.230251690417215e-06, "loss": 0.2954, "step": 8689 }, { "epoch": 0.2982155113246397, "grad_norm": 0.8371918421059441, "learning_rate": 8.229827477761283e-06, "loss": 0.2861, "step": 8690 }, { "epoch": 0.29824982841455044, "grad_norm": 0.7193725889743813, "learning_rate": 8.229403225204598e-06, "loss": 0.3081, "step": 8691 }, { "epoch": 0.2982841455044612, "grad_norm": 0.713097004903885, "learning_rate": 8.2289789327524e-06, "loss": 0.3225, "step": 8692 }, { "epoch": 0.298318462594372, "grad_norm": 0.7725303339497064, "learning_rate": 8.228554600409933e-06, "loss": 0.3623, "step": 8693 }, { "epoch": 0.29835277968428275, "grad_norm": 0.8035450986523381, "learning_rate": 8.228130228182438e-06, "loss": 0.3474, "step": 8694 }, { "epoch": 0.29838709677419356, "grad_norm": 0.7938774735878625, "learning_rate": 8.227705816075158e-06, "loss": 0.3385, "step": 8695 }, { "epoch": 0.2984214138641043, "grad_norm": 0.7805083976980814, "learning_rate": 8.227281364093334e-06, "loss": 0.3708, "step": 8696 }, { "epoch": 0.2984557309540151, "grad_norm": 0.7306205115126319, "learning_rate": 8.226856872242214e-06, "loss": 0.2818, "step": 8697 }, { "epoch": 0.29849004804392587, "grad_norm": 0.8052952467700014, "learning_rate": 8.226432340527037e-06, "loss": 0.3499, "step": 8698 }, { "epoch": 0.2985243651338367, "grad_norm": 0.734810709537079, "learning_rate": 8.226007768953053e-06, "loss": 0.3785, "step": 8699 }, { "epoch": 0.29855868222374743, "grad_norm": 0.8182006167717177, "learning_rate": 8.225583157525504e-06, "loss": 0.3265, "step": 8700 }, { "epoch": 0.2985929993136582, "grad_norm": 0.7980203409785814, "learning_rate": 8.225158506249633e-06, "loss": 0.3448, "step": 8701 }, { "epoch": 0.298627316403569, "grad_norm": 0.873761214754205, "learning_rate": 8.22473381513069e-06, "loss": 0.3962, "step": 8702 }, { "epoch": 0.29866163349347974, "grad_norm": 0.8341873148750785, "learning_rate": 8.224309084173921e-06, "loss": 0.3048, "step": 8703 }, { "epoch": 0.29869595058339055, "grad_norm": 0.7437429171941214, "learning_rate": 8.223884313384571e-06, "loss": 0.3105, "step": 8704 }, { "epoch": 0.2987302676733013, "grad_norm": 0.7610556645565688, "learning_rate": 8.22345950276789e-06, "loss": 0.306, "step": 8705 }, { "epoch": 0.2987645847632121, "grad_norm": 0.8753321202866821, "learning_rate": 8.223034652329125e-06, "loss": 0.3493, "step": 8706 }, { "epoch": 0.29879890185312286, "grad_norm": 0.7799021406798785, "learning_rate": 8.222609762073526e-06, "loss": 0.3147, "step": 8707 }, { "epoch": 0.2988332189430336, "grad_norm": 0.7634128288571619, "learning_rate": 8.222184832006337e-06, "loss": 0.3251, "step": 8708 }, { "epoch": 0.2988675360329444, "grad_norm": 0.7035005125362237, "learning_rate": 8.221759862132815e-06, "loss": 0.3248, "step": 8709 }, { "epoch": 0.29890185312285517, "grad_norm": 0.9483093289940632, "learning_rate": 8.221334852458203e-06, "loss": 0.2965, "step": 8710 }, { "epoch": 0.298936170212766, "grad_norm": 0.8098047726280184, "learning_rate": 8.220909802987757e-06, "loss": 0.2851, "step": 8711 }, { "epoch": 0.2989704873026767, "grad_norm": 0.7007187610927441, "learning_rate": 8.220484713726724e-06, "loss": 0.2687, "step": 8712 }, { "epoch": 0.29900480439258753, "grad_norm": 0.7879940911970232, "learning_rate": 8.220059584680357e-06, "loss": 0.3732, "step": 8713 }, { "epoch": 0.2990391214824983, "grad_norm": 0.801513843822603, "learning_rate": 8.21963441585391e-06, "loss": 0.3334, "step": 8714 }, { "epoch": 0.29907343857240903, "grad_norm": 0.7889272145681803, "learning_rate": 8.219209207252632e-06, "loss": 0.3538, "step": 8715 }, { "epoch": 0.29910775566231984, "grad_norm": 0.7522554361617461, "learning_rate": 8.218783958881778e-06, "loss": 0.3637, "step": 8716 }, { "epoch": 0.2991420727522306, "grad_norm": 0.9741500500089122, "learning_rate": 8.2183586707466e-06, "loss": 0.3419, "step": 8717 }, { "epoch": 0.2991763898421414, "grad_norm": 0.7462215037958436, "learning_rate": 8.217933342852354e-06, "loss": 0.2909, "step": 8718 }, { "epoch": 0.29921070693205215, "grad_norm": 0.8570586641491202, "learning_rate": 8.217507975204294e-06, "loss": 0.3051, "step": 8719 }, { "epoch": 0.29924502402196296, "grad_norm": 0.7500441479721474, "learning_rate": 8.217082567807671e-06, "loss": 0.3191, "step": 8720 }, { "epoch": 0.2992793411118737, "grad_norm": 0.8718333039036621, "learning_rate": 8.216657120667746e-06, "loss": 0.3469, "step": 8721 }, { "epoch": 0.2993136582017845, "grad_norm": 0.762231158580206, "learning_rate": 8.216231633789773e-06, "loss": 0.276, "step": 8722 }, { "epoch": 0.29934797529169527, "grad_norm": 0.7249904790582326, "learning_rate": 8.215806107179006e-06, "loss": 0.3646, "step": 8723 }, { "epoch": 0.299382292381606, "grad_norm": 0.9032157142193492, "learning_rate": 8.215380540840704e-06, "loss": 0.2673, "step": 8724 }, { "epoch": 0.2994166094715168, "grad_norm": 0.9786667273636118, "learning_rate": 8.214954934780125e-06, "loss": 0.2986, "step": 8725 }, { "epoch": 0.2994509265614276, "grad_norm": 0.9870980103107762, "learning_rate": 8.214529289002527e-06, "loss": 0.3293, "step": 8726 }, { "epoch": 0.2994852436513384, "grad_norm": 0.8082689079646996, "learning_rate": 8.214103603513167e-06, "loss": 0.3059, "step": 8727 }, { "epoch": 0.29951956074124914, "grad_norm": 0.7891301627102698, "learning_rate": 8.213677878317304e-06, "loss": 0.3156, "step": 8728 }, { "epoch": 0.29955387783115994, "grad_norm": 0.6694567781350371, "learning_rate": 8.213252113420199e-06, "loss": 0.3151, "step": 8729 }, { "epoch": 0.2995881949210707, "grad_norm": 0.736913552510971, "learning_rate": 8.212826308827106e-06, "loss": 0.2874, "step": 8730 }, { "epoch": 0.29962251201098145, "grad_norm": 0.7132853745349232, "learning_rate": 8.212400464543293e-06, "loss": 0.2491, "step": 8731 }, { "epoch": 0.29965682910089225, "grad_norm": 0.711356205509102, "learning_rate": 8.211974580574018e-06, "loss": 0.2781, "step": 8732 }, { "epoch": 0.299691146190803, "grad_norm": 0.7468986677839871, "learning_rate": 8.211548656924541e-06, "loss": 0.2891, "step": 8733 }, { "epoch": 0.2997254632807138, "grad_norm": 0.77146387841248, "learning_rate": 8.211122693600124e-06, "loss": 0.2938, "step": 8734 }, { "epoch": 0.29975978037062456, "grad_norm": 0.8689819688245893, "learning_rate": 8.210696690606031e-06, "loss": 0.2984, "step": 8735 }, { "epoch": 0.29979409746053537, "grad_norm": 0.813273413358163, "learning_rate": 8.210270647947524e-06, "loss": 0.3336, "step": 8736 }, { "epoch": 0.2998284145504461, "grad_norm": 0.7563067847702587, "learning_rate": 8.209844565629865e-06, "loss": 0.2907, "step": 8737 }, { "epoch": 0.29986273164035687, "grad_norm": 0.8219618048203835, "learning_rate": 8.20941844365832e-06, "loss": 0.391, "step": 8738 }, { "epoch": 0.2998970487302677, "grad_norm": 0.7239508986110075, "learning_rate": 8.20899228203815e-06, "loss": 0.2655, "step": 8739 }, { "epoch": 0.29993136582017843, "grad_norm": 0.794221462865364, "learning_rate": 8.208566080774622e-06, "loss": 0.3179, "step": 8740 }, { "epoch": 0.29996568291008924, "grad_norm": 0.739466249013881, "learning_rate": 8.208139839873002e-06, "loss": 0.2819, "step": 8741 }, { "epoch": 0.3, "grad_norm": 0.8953121892468225, "learning_rate": 8.207713559338554e-06, "loss": 0.2242, "step": 8742 }, { "epoch": 0.3000343170899108, "grad_norm": 0.9170901073348432, "learning_rate": 8.207287239176544e-06, "loss": 0.3857, "step": 8743 }, { "epoch": 0.30006863417982155, "grad_norm": 0.7497281282893687, "learning_rate": 8.20686087939224e-06, "loss": 0.2702, "step": 8744 }, { "epoch": 0.30010295126973235, "grad_norm": 0.7781451119146399, "learning_rate": 8.206434479990911e-06, "loss": 0.3001, "step": 8745 }, { "epoch": 0.3001372683596431, "grad_norm": 0.8209802497916434, "learning_rate": 8.20600804097782e-06, "loss": 0.3024, "step": 8746 }, { "epoch": 0.30017158544955386, "grad_norm": 0.8274198106265146, "learning_rate": 8.205581562358236e-06, "loss": 0.3056, "step": 8747 }, { "epoch": 0.30020590253946466, "grad_norm": 0.7853910822711767, "learning_rate": 8.205155044137433e-06, "loss": 0.3512, "step": 8748 }, { "epoch": 0.3002402196293754, "grad_norm": 0.766684722681269, "learning_rate": 8.204728486320675e-06, "loss": 0.3212, "step": 8749 }, { "epoch": 0.3002745367192862, "grad_norm": 0.7667611764957617, "learning_rate": 8.20430188891323e-06, "loss": 0.335, "step": 8750 }, { "epoch": 0.300308853809197, "grad_norm": 0.9147675905109495, "learning_rate": 8.203875251920375e-06, "loss": 0.3248, "step": 8751 }, { "epoch": 0.3003431708991078, "grad_norm": 0.7974272891824001, "learning_rate": 8.203448575347375e-06, "loss": 0.3029, "step": 8752 }, { "epoch": 0.30037748798901853, "grad_norm": 0.8256580059355769, "learning_rate": 8.203021859199502e-06, "loss": 0.3101, "step": 8753 }, { "epoch": 0.3004118050789293, "grad_norm": 0.7381201996390078, "learning_rate": 8.202595103482028e-06, "loss": 0.332, "step": 8754 }, { "epoch": 0.3004461221688401, "grad_norm": 0.7762079107989149, "learning_rate": 8.202168308200227e-06, "loss": 0.2997, "step": 8755 }, { "epoch": 0.30048043925875084, "grad_norm": 0.8327919388559083, "learning_rate": 8.201741473359369e-06, "loss": 0.3804, "step": 8756 }, { "epoch": 0.30051475634866165, "grad_norm": 0.7226860613239774, "learning_rate": 8.201314598964728e-06, "loss": 0.3205, "step": 8757 }, { "epoch": 0.3005490734385724, "grad_norm": 0.7063808841752369, "learning_rate": 8.200887685021576e-06, "loss": 0.27, "step": 8758 }, { "epoch": 0.3005833905284832, "grad_norm": 0.8190360041912919, "learning_rate": 8.200460731535191e-06, "loss": 0.326, "step": 8759 }, { "epoch": 0.30061770761839396, "grad_norm": 0.8322476970662354, "learning_rate": 8.200033738510843e-06, "loss": 0.2836, "step": 8760 }, { "epoch": 0.3006520247083047, "grad_norm": 0.8140547816152249, "learning_rate": 8.199606705953813e-06, "loss": 0.339, "step": 8761 }, { "epoch": 0.3006863417982155, "grad_norm": 0.8242596666680295, "learning_rate": 8.199179633869368e-06, "loss": 0.3391, "step": 8762 }, { "epoch": 0.30072065888812627, "grad_norm": 0.7956375138136337, "learning_rate": 8.19875252226279e-06, "loss": 0.2987, "step": 8763 }, { "epoch": 0.3007549759780371, "grad_norm": 0.7328187469768196, "learning_rate": 8.198325371139355e-06, "loss": 0.3322, "step": 8764 }, { "epoch": 0.3007892930679478, "grad_norm": 0.7319709077354934, "learning_rate": 8.197898180504338e-06, "loss": 0.3556, "step": 8765 }, { "epoch": 0.30082361015785863, "grad_norm": 0.81657272583101, "learning_rate": 8.197470950363019e-06, "loss": 0.3733, "step": 8766 }, { "epoch": 0.3008579272477694, "grad_norm": 0.850596490577531, "learning_rate": 8.197043680720671e-06, "loss": 0.2876, "step": 8767 }, { "epoch": 0.30089224433768014, "grad_norm": 0.7449454236498628, "learning_rate": 8.196616371582577e-06, "loss": 0.3116, "step": 8768 }, { "epoch": 0.30092656142759094, "grad_norm": 0.8321810935253419, "learning_rate": 8.196189022954015e-06, "loss": 0.3125, "step": 8769 }, { "epoch": 0.3009608785175017, "grad_norm": 0.785872543187124, "learning_rate": 8.195761634840264e-06, "loss": 0.3679, "step": 8770 }, { "epoch": 0.3009951956074125, "grad_norm": 0.7707033309966153, "learning_rate": 8.195334207246606e-06, "loss": 0.3261, "step": 8771 }, { "epoch": 0.30102951269732325, "grad_norm": 1.069537403782072, "learning_rate": 8.194906740178316e-06, "loss": 0.3091, "step": 8772 }, { "epoch": 0.30106382978723406, "grad_norm": 0.8565352001317366, "learning_rate": 8.19447923364068e-06, "loss": 0.2559, "step": 8773 }, { "epoch": 0.3010981468771448, "grad_norm": 0.7775274884653889, "learning_rate": 8.194051687638978e-06, "loss": 0.3733, "step": 8774 }, { "epoch": 0.3011324639670556, "grad_norm": 0.7997827074363585, "learning_rate": 8.193624102178489e-06, "loss": 0.2862, "step": 8775 }, { "epoch": 0.30116678105696637, "grad_norm": 0.6953624286524699, "learning_rate": 8.193196477264497e-06, "loss": 0.239, "step": 8776 }, { "epoch": 0.3012010981468771, "grad_norm": 0.6851344860980376, "learning_rate": 8.19276881290229e-06, "loss": 0.2482, "step": 8777 }, { "epoch": 0.30123541523678793, "grad_norm": 0.8153409827238485, "learning_rate": 8.192341109097142e-06, "loss": 0.3231, "step": 8778 }, { "epoch": 0.3012697323266987, "grad_norm": 0.8082252706681233, "learning_rate": 8.191913365854344e-06, "loss": 0.2759, "step": 8779 }, { "epoch": 0.3013040494166095, "grad_norm": 0.8586303314244175, "learning_rate": 8.191485583179177e-06, "loss": 0.3763, "step": 8780 }, { "epoch": 0.30133836650652024, "grad_norm": 0.7664161769211968, "learning_rate": 8.191057761076929e-06, "loss": 0.342, "step": 8781 }, { "epoch": 0.30137268359643105, "grad_norm": 0.8253673751454041, "learning_rate": 8.190629899552878e-06, "loss": 0.3436, "step": 8782 }, { "epoch": 0.3014070006863418, "grad_norm": 0.813755186530058, "learning_rate": 8.190201998612318e-06, "loss": 0.2964, "step": 8783 }, { "epoch": 0.30144131777625255, "grad_norm": 0.7880367159030061, "learning_rate": 8.189774058260532e-06, "loss": 0.2546, "step": 8784 }, { "epoch": 0.30147563486616336, "grad_norm": 0.7627342208109663, "learning_rate": 8.189346078502804e-06, "loss": 0.3402, "step": 8785 }, { "epoch": 0.3015099519560741, "grad_norm": 0.8278150592076565, "learning_rate": 8.188918059344426e-06, "loss": 0.2939, "step": 8786 }, { "epoch": 0.3015442690459849, "grad_norm": 0.8923692735417432, "learning_rate": 8.18849000079068e-06, "loss": 0.2879, "step": 8787 }, { "epoch": 0.30157858613589567, "grad_norm": 0.7564595528841555, "learning_rate": 8.18806190284686e-06, "loss": 0.2845, "step": 8788 }, { "epoch": 0.3016129032258065, "grad_norm": 0.7845709853858885, "learning_rate": 8.187633765518252e-06, "loss": 0.3617, "step": 8789 }, { "epoch": 0.3016472203157172, "grad_norm": 0.7543933555350378, "learning_rate": 8.187205588810146e-06, "loss": 0.2769, "step": 8790 }, { "epoch": 0.301681537405628, "grad_norm": 0.7272766724693924, "learning_rate": 8.186777372727829e-06, "loss": 0.2766, "step": 8791 }, { "epoch": 0.3017158544955388, "grad_norm": 1.0724335848330904, "learning_rate": 8.186349117276593e-06, "loss": 0.3118, "step": 8792 }, { "epoch": 0.30175017158544953, "grad_norm": 0.8342070864570464, "learning_rate": 8.18592082246173e-06, "loss": 0.3337, "step": 8793 }, { "epoch": 0.30178448867536034, "grad_norm": 0.7568048817768611, "learning_rate": 8.185492488288529e-06, "loss": 0.3088, "step": 8794 }, { "epoch": 0.3018188057652711, "grad_norm": 0.7214381554425332, "learning_rate": 8.185064114762282e-06, "loss": 0.3073, "step": 8795 }, { "epoch": 0.3018531228551819, "grad_norm": 0.890845357108138, "learning_rate": 8.184635701888282e-06, "loss": 0.3491, "step": 8796 }, { "epoch": 0.30188743994509265, "grad_norm": 0.8109372319761027, "learning_rate": 8.18420724967182e-06, "loss": 0.2748, "step": 8797 }, { "epoch": 0.30192175703500346, "grad_norm": 0.7510334215388553, "learning_rate": 8.18377875811819e-06, "loss": 0.2799, "step": 8798 }, { "epoch": 0.3019560741249142, "grad_norm": 0.705902644678121, "learning_rate": 8.183350227232687e-06, "loss": 0.3042, "step": 8799 }, { "epoch": 0.30199039121482496, "grad_norm": 0.7391227480186894, "learning_rate": 8.1829216570206e-06, "loss": 0.3096, "step": 8800 }, { "epoch": 0.30202470830473577, "grad_norm": 0.7545601537301981, "learning_rate": 8.18249304748723e-06, "loss": 0.2947, "step": 8801 }, { "epoch": 0.3020590253946465, "grad_norm": 0.733826875553146, "learning_rate": 8.182064398637868e-06, "loss": 0.2567, "step": 8802 }, { "epoch": 0.3020933424845573, "grad_norm": 0.7791290547320789, "learning_rate": 8.18163571047781e-06, "loss": 0.305, "step": 8803 }, { "epoch": 0.3021276595744681, "grad_norm": 0.7863765589130022, "learning_rate": 8.18120698301235e-06, "loss": 0.4035, "step": 8804 }, { "epoch": 0.3021619766643789, "grad_norm": 0.8182019177578836, "learning_rate": 8.18077821624679e-06, "loss": 0.2979, "step": 8805 }, { "epoch": 0.30219629375428964, "grad_norm": 0.770534619065984, "learning_rate": 8.180349410186424e-06, "loss": 0.3158, "step": 8806 }, { "epoch": 0.3022306108442004, "grad_norm": 0.8284344132843114, "learning_rate": 8.179920564836546e-06, "loss": 0.3393, "step": 8807 }, { "epoch": 0.3022649279341112, "grad_norm": 0.710313814723915, "learning_rate": 8.179491680202458e-06, "loss": 0.3136, "step": 8808 }, { "epoch": 0.30229924502402195, "grad_norm": 0.8096808916549466, "learning_rate": 8.179062756289458e-06, "loss": 0.3358, "step": 8809 }, { "epoch": 0.30233356211393275, "grad_norm": 0.7116785124483973, "learning_rate": 8.178633793102844e-06, "loss": 0.307, "step": 8810 }, { "epoch": 0.3023678792038435, "grad_norm": 0.6520872759135077, "learning_rate": 8.178204790647914e-06, "loss": 0.3146, "step": 8811 }, { "epoch": 0.3024021962937543, "grad_norm": 0.7037071330207334, "learning_rate": 8.17777574892997e-06, "loss": 0.3531, "step": 8812 }, { "epoch": 0.30243651338366506, "grad_norm": 0.7625394496805918, "learning_rate": 8.177346667954312e-06, "loss": 0.3053, "step": 8813 }, { "epoch": 0.3024708304735758, "grad_norm": 0.835961255490165, "learning_rate": 8.17691754772624e-06, "loss": 0.3476, "step": 8814 }, { "epoch": 0.3025051475634866, "grad_norm": 0.8655247481510229, "learning_rate": 8.176488388251056e-06, "loss": 0.33, "step": 8815 }, { "epoch": 0.30253946465339737, "grad_norm": 0.6327425987966899, "learning_rate": 8.17605918953406e-06, "loss": 0.3033, "step": 8816 }, { "epoch": 0.3025737817433082, "grad_norm": 0.8170926519157472, "learning_rate": 8.175629951580556e-06, "loss": 0.359, "step": 8817 }, { "epoch": 0.30260809883321893, "grad_norm": 0.7878087461311947, "learning_rate": 8.17520067439585e-06, "loss": 0.3165, "step": 8818 }, { "epoch": 0.30264241592312974, "grad_norm": 0.7728353744068484, "learning_rate": 8.174771357985237e-06, "loss": 0.2691, "step": 8819 }, { "epoch": 0.3026767330130405, "grad_norm": 0.7341611627710187, "learning_rate": 8.174342002354027e-06, "loss": 0.2872, "step": 8820 }, { "epoch": 0.3027110501029513, "grad_norm": 0.6839876665932166, "learning_rate": 8.173912607507521e-06, "loss": 0.3174, "step": 8821 }, { "epoch": 0.30274536719286205, "grad_norm": 0.7272617539821488, "learning_rate": 8.173483173451029e-06, "loss": 0.2749, "step": 8822 }, { "epoch": 0.3027796842827728, "grad_norm": 0.7927748922105617, "learning_rate": 8.17305370018985e-06, "loss": 0.3379, "step": 8823 }, { "epoch": 0.3028140013726836, "grad_norm": 0.8376074956550175, "learning_rate": 8.17262418772929e-06, "loss": 0.3517, "step": 8824 }, { "epoch": 0.30284831846259436, "grad_norm": 0.8340663641809174, "learning_rate": 8.17219463607466e-06, "loss": 0.293, "step": 8825 }, { "epoch": 0.30288263555250516, "grad_norm": 0.7729546327927401, "learning_rate": 8.171765045231262e-06, "loss": 0.2868, "step": 8826 }, { "epoch": 0.3029169526424159, "grad_norm": 0.8835361011255564, "learning_rate": 8.171335415204405e-06, "loss": 0.3498, "step": 8827 }, { "epoch": 0.3029512697323267, "grad_norm": 0.700811578027142, "learning_rate": 8.170905745999398e-06, "loss": 0.2395, "step": 8828 }, { "epoch": 0.3029855868222375, "grad_norm": 0.8301707749202927, "learning_rate": 8.170476037621546e-06, "loss": 0.3401, "step": 8829 }, { "epoch": 0.3030199039121482, "grad_norm": 0.6897645960516673, "learning_rate": 8.170046290076159e-06, "loss": 0.3294, "step": 8830 }, { "epoch": 0.30305422100205903, "grad_norm": 0.8544669500205672, "learning_rate": 8.169616503368547e-06, "loss": 0.3384, "step": 8831 }, { "epoch": 0.3030885380919698, "grad_norm": 0.7272441998905042, "learning_rate": 8.169186677504017e-06, "loss": 0.2818, "step": 8832 }, { "epoch": 0.3031228551818806, "grad_norm": 0.7909725965935818, "learning_rate": 8.168756812487881e-06, "loss": 0.3938, "step": 8833 }, { "epoch": 0.30315717227179134, "grad_norm": 0.7652137613810917, "learning_rate": 8.16832690832545e-06, "loss": 0.2763, "step": 8834 }, { "epoch": 0.30319148936170215, "grad_norm": 0.8539518185044085, "learning_rate": 8.167896965022032e-06, "loss": 0.3466, "step": 8835 }, { "epoch": 0.3032258064516129, "grad_norm": 0.7867343616416534, "learning_rate": 8.167466982582942e-06, "loss": 0.2919, "step": 8836 }, { "epoch": 0.30326012354152365, "grad_norm": 0.9690179415096624, "learning_rate": 8.16703696101349e-06, "loss": 0.3094, "step": 8837 }, { "epoch": 0.30329444063143446, "grad_norm": 0.8252937390189774, "learning_rate": 8.166606900318989e-06, "loss": 0.3795, "step": 8838 }, { "epoch": 0.3033287577213452, "grad_norm": 0.7465881963013861, "learning_rate": 8.16617680050475e-06, "loss": 0.3098, "step": 8839 }, { "epoch": 0.303363074811256, "grad_norm": 0.7130533775010762, "learning_rate": 8.165746661576091e-06, "loss": 0.254, "step": 8840 }, { "epoch": 0.30339739190116677, "grad_norm": 0.7555338679990528, "learning_rate": 8.165316483538321e-06, "loss": 0.33, "step": 8841 }, { "epoch": 0.3034317089910776, "grad_norm": 0.7826787118550043, "learning_rate": 8.164886266396756e-06, "loss": 0.3421, "step": 8842 }, { "epoch": 0.3034660260809883, "grad_norm": 0.8503800071499196, "learning_rate": 8.164456010156712e-06, "loss": 0.3078, "step": 8843 }, { "epoch": 0.30350034317089913, "grad_norm": 0.777504187400108, "learning_rate": 8.164025714823503e-06, "loss": 0.3316, "step": 8844 }, { "epoch": 0.3035346602608099, "grad_norm": 0.8091306700064959, "learning_rate": 8.163595380402445e-06, "loss": 0.3131, "step": 8845 }, { "epoch": 0.30356897735072064, "grad_norm": 0.7495285358743826, "learning_rate": 8.163165006898854e-06, "loss": 0.3111, "step": 8846 }, { "epoch": 0.30360329444063144, "grad_norm": 0.8610580172897372, "learning_rate": 8.162734594318049e-06, "loss": 0.3413, "step": 8847 }, { "epoch": 0.3036376115305422, "grad_norm": 0.865379989983423, "learning_rate": 8.162304142665343e-06, "loss": 0.3106, "step": 8848 }, { "epoch": 0.303671928620453, "grad_norm": 0.7650861946686546, "learning_rate": 8.161873651946057e-06, "loss": 0.2944, "step": 8849 }, { "epoch": 0.30370624571036375, "grad_norm": 0.7088609903875015, "learning_rate": 8.16144312216551e-06, "loss": 0.2633, "step": 8850 }, { "epoch": 0.30374056280027456, "grad_norm": 0.743321075940635, "learning_rate": 8.161012553329018e-06, "loss": 0.3144, "step": 8851 }, { "epoch": 0.3037748798901853, "grad_norm": 0.8498863920374364, "learning_rate": 8.160581945441902e-06, "loss": 0.3785, "step": 8852 }, { "epoch": 0.30380919698009606, "grad_norm": 0.7911366484281989, "learning_rate": 8.16015129850948e-06, "loss": 0.3048, "step": 8853 }, { "epoch": 0.30384351407000687, "grad_norm": 0.8344665358476554, "learning_rate": 8.159720612537074e-06, "loss": 0.2902, "step": 8854 }, { "epoch": 0.3038778311599176, "grad_norm": 0.8324143704953004, "learning_rate": 8.159289887530002e-06, "loss": 0.3173, "step": 8855 }, { "epoch": 0.30391214824982843, "grad_norm": 0.8561829269213249, "learning_rate": 8.158859123493587e-06, "loss": 0.2986, "step": 8856 }, { "epoch": 0.3039464653397392, "grad_norm": 0.7599579926031405, "learning_rate": 8.158428320433151e-06, "loss": 0.2731, "step": 8857 }, { "epoch": 0.30398078242965, "grad_norm": 0.8198830987376803, "learning_rate": 8.157997478354016e-06, "loss": 0.3427, "step": 8858 }, { "epoch": 0.30401509951956074, "grad_norm": 0.7501891509056247, "learning_rate": 8.157566597261503e-06, "loss": 0.3125, "step": 8859 }, { "epoch": 0.3040494166094715, "grad_norm": 0.7835230704317572, "learning_rate": 8.15713567716094e-06, "loss": 0.2988, "step": 8860 }, { "epoch": 0.3040837336993823, "grad_norm": 0.814078041480552, "learning_rate": 8.156704718057641e-06, "loss": 0.338, "step": 8861 }, { "epoch": 0.30411805078929305, "grad_norm": 0.7355019632009795, "learning_rate": 8.156273719956939e-06, "loss": 0.3158, "step": 8862 }, { "epoch": 0.30415236787920386, "grad_norm": 0.7995315246610659, "learning_rate": 8.155842682864154e-06, "loss": 0.2987, "step": 8863 }, { "epoch": 0.3041866849691146, "grad_norm": 0.8236283901816067, "learning_rate": 8.155411606784612e-06, "loss": 0.2927, "step": 8864 }, { "epoch": 0.3042210020590254, "grad_norm": 0.880943546512108, "learning_rate": 8.154980491723638e-06, "loss": 0.3044, "step": 8865 }, { "epoch": 0.30425531914893617, "grad_norm": 0.780290323547041, "learning_rate": 8.15454933768656e-06, "loss": 0.3404, "step": 8866 }, { "epoch": 0.30428963623884697, "grad_norm": 0.8550029957806676, "learning_rate": 8.154118144678702e-06, "loss": 0.3753, "step": 8867 }, { "epoch": 0.3043239533287577, "grad_norm": 0.76262319902256, "learning_rate": 8.15368691270539e-06, "loss": 0.3185, "step": 8868 }, { "epoch": 0.3043582704186685, "grad_norm": 0.6229663259677531, "learning_rate": 8.153255641771955e-06, "loss": 0.2947, "step": 8869 }, { "epoch": 0.3043925875085793, "grad_norm": 0.6788004005656364, "learning_rate": 8.152824331883723e-06, "loss": 0.2835, "step": 8870 }, { "epoch": 0.30442690459849003, "grad_norm": 0.7761465967475782, "learning_rate": 8.15239298304602e-06, "loss": 0.359, "step": 8871 }, { "epoch": 0.30446122168840084, "grad_norm": 0.7297414977056262, "learning_rate": 8.15196159526418e-06, "loss": 0.2892, "step": 8872 }, { "epoch": 0.3044955387783116, "grad_norm": 0.8413605730220812, "learning_rate": 8.151530168543527e-06, "loss": 0.3118, "step": 8873 }, { "epoch": 0.3045298558682224, "grad_norm": 0.7770263659448127, "learning_rate": 8.151098702889393e-06, "loss": 0.3197, "step": 8874 }, { "epoch": 0.30456417295813315, "grad_norm": 0.896868410026266, "learning_rate": 8.150667198307109e-06, "loss": 0.3602, "step": 8875 }, { "epoch": 0.3045984900480439, "grad_norm": 0.6824469995271528, "learning_rate": 8.150235654802006e-06, "loss": 0.3004, "step": 8876 }, { "epoch": 0.3046328071379547, "grad_norm": 0.8259853544720338, "learning_rate": 8.149804072379413e-06, "loss": 0.3323, "step": 8877 }, { "epoch": 0.30466712422786546, "grad_norm": 0.7864797081681169, "learning_rate": 8.149372451044664e-06, "loss": 0.3133, "step": 8878 }, { "epoch": 0.30470144131777627, "grad_norm": 0.8061300442745307, "learning_rate": 8.14894079080309e-06, "loss": 0.3335, "step": 8879 }, { "epoch": 0.304735758407687, "grad_norm": 0.8028504604334794, "learning_rate": 8.148509091660023e-06, "loss": 0.3276, "step": 8880 }, { "epoch": 0.3047700754975978, "grad_norm": 0.7463445006365342, "learning_rate": 8.148077353620798e-06, "loss": 0.2924, "step": 8881 }, { "epoch": 0.3048043925875086, "grad_norm": 0.7638181170492601, "learning_rate": 8.147645576690748e-06, "loss": 0.2802, "step": 8882 }, { "epoch": 0.30483870967741933, "grad_norm": 0.7863088028529875, "learning_rate": 8.147213760875205e-06, "loss": 0.3604, "step": 8883 }, { "epoch": 0.30487302676733014, "grad_norm": 0.727792146816714, "learning_rate": 8.146781906179506e-06, "loss": 0.2862, "step": 8884 }, { "epoch": 0.3049073438572409, "grad_norm": 0.7565531835537939, "learning_rate": 8.146350012608985e-06, "loss": 0.3077, "step": 8885 }, { "epoch": 0.3049416609471517, "grad_norm": 0.700514962748532, "learning_rate": 8.14591808016898e-06, "loss": 0.3125, "step": 8886 }, { "epoch": 0.30497597803706245, "grad_norm": 0.7313676867643395, "learning_rate": 8.145486108864823e-06, "loss": 0.2876, "step": 8887 }, { "epoch": 0.30501029512697325, "grad_norm": 0.8535814062254375, "learning_rate": 8.145054098701853e-06, "loss": 0.3653, "step": 8888 }, { "epoch": 0.305044612216884, "grad_norm": 0.7691552422270354, "learning_rate": 8.144622049685406e-06, "loss": 0.2654, "step": 8889 }, { "epoch": 0.3050789293067948, "grad_norm": 0.834367155901881, "learning_rate": 8.144189961820822e-06, "loss": 0.3146, "step": 8890 }, { "epoch": 0.30511324639670556, "grad_norm": 0.721232278151863, "learning_rate": 8.143757835113432e-06, "loss": 0.3123, "step": 8891 }, { "epoch": 0.3051475634866163, "grad_norm": 0.9103159869721575, "learning_rate": 8.143325669568583e-06, "loss": 0.3053, "step": 8892 }, { "epoch": 0.3051818805765271, "grad_norm": 0.8183078637632071, "learning_rate": 8.14289346519161e-06, "loss": 0.3028, "step": 8893 }, { "epoch": 0.30521619766643787, "grad_norm": 0.8786959785524318, "learning_rate": 8.14246122198785e-06, "loss": 0.3034, "step": 8894 }, { "epoch": 0.3052505147563487, "grad_norm": 0.7378155307011864, "learning_rate": 8.142028939962647e-06, "loss": 0.3189, "step": 8895 }, { "epoch": 0.30528483184625943, "grad_norm": 0.800210198183177, "learning_rate": 8.14159661912134e-06, "loss": 0.3037, "step": 8896 }, { "epoch": 0.30531914893617024, "grad_norm": 0.6601686442454924, "learning_rate": 8.141164259469269e-06, "loss": 0.2848, "step": 8897 }, { "epoch": 0.305353466026081, "grad_norm": 0.7727706730039119, "learning_rate": 8.140731861011777e-06, "loss": 0.3226, "step": 8898 }, { "epoch": 0.30538778311599174, "grad_norm": 0.7594358027002345, "learning_rate": 8.140299423754203e-06, "loss": 0.2775, "step": 8899 }, { "epoch": 0.30542210020590255, "grad_norm": 0.7613586027767433, "learning_rate": 8.139866947701892e-06, "loss": 0.2857, "step": 8900 }, { "epoch": 0.3054564172958133, "grad_norm": 0.758755837533724, "learning_rate": 8.139434432860186e-06, "loss": 0.3086, "step": 8901 }, { "epoch": 0.3054907343857241, "grad_norm": 0.7620723406124353, "learning_rate": 8.139001879234425e-06, "loss": 0.3514, "step": 8902 }, { "epoch": 0.30552505147563486, "grad_norm": 0.8148016710030631, "learning_rate": 8.138569286829958e-06, "loss": 0.3235, "step": 8903 }, { "epoch": 0.30555936856554566, "grad_norm": 0.804462595575445, "learning_rate": 8.138136655652125e-06, "loss": 0.2805, "step": 8904 }, { "epoch": 0.3055936856554564, "grad_norm": 0.7398458989594211, "learning_rate": 8.137703985706273e-06, "loss": 0.3474, "step": 8905 }, { "epoch": 0.30562800274536717, "grad_norm": 0.8194998076151035, "learning_rate": 8.137271276997748e-06, "loss": 0.3189, "step": 8906 }, { "epoch": 0.305662319835278, "grad_norm": 0.706725465599236, "learning_rate": 8.136838529531892e-06, "loss": 0.2575, "step": 8907 }, { "epoch": 0.3056966369251887, "grad_norm": 0.7407438145100147, "learning_rate": 8.136405743314055e-06, "loss": 0.3505, "step": 8908 }, { "epoch": 0.30573095401509953, "grad_norm": 1.0061886403001215, "learning_rate": 8.13597291834958e-06, "loss": 0.3175, "step": 8909 }, { "epoch": 0.3057652711050103, "grad_norm": 0.6737818181844316, "learning_rate": 8.135540054643816e-06, "loss": 0.2967, "step": 8910 }, { "epoch": 0.3057995881949211, "grad_norm": 0.826092563332995, "learning_rate": 8.135107152202112e-06, "loss": 0.3432, "step": 8911 }, { "epoch": 0.30583390528483184, "grad_norm": 0.7255733814631711, "learning_rate": 8.134674211029813e-06, "loss": 0.3045, "step": 8912 }, { "epoch": 0.30586822237474265, "grad_norm": 0.7920521937663682, "learning_rate": 8.134241231132269e-06, "loss": 0.3176, "step": 8913 }, { "epoch": 0.3059025394646534, "grad_norm": 0.8367875568961215, "learning_rate": 8.133808212514829e-06, "loss": 0.2802, "step": 8914 }, { "epoch": 0.30593685655456415, "grad_norm": 0.7276797816285142, "learning_rate": 8.133375155182842e-06, "loss": 0.3109, "step": 8915 }, { "epoch": 0.30597117364447496, "grad_norm": 0.8499212282612668, "learning_rate": 8.132942059141658e-06, "loss": 0.3472, "step": 8916 }, { "epoch": 0.3060054907343857, "grad_norm": 0.7835262769364089, "learning_rate": 8.132508924396628e-06, "loss": 0.3027, "step": 8917 }, { "epoch": 0.3060398078242965, "grad_norm": 0.7704127087877073, "learning_rate": 8.132075750953102e-06, "loss": 0.3169, "step": 8918 }, { "epoch": 0.30607412491420727, "grad_norm": 0.7519638062142783, "learning_rate": 8.131642538816431e-06, "loss": 0.33, "step": 8919 }, { "epoch": 0.3061084420041181, "grad_norm": 0.7134972316157355, "learning_rate": 8.131209287991968e-06, "loss": 0.3087, "step": 8920 }, { "epoch": 0.3061427590940288, "grad_norm": 0.7162293924531172, "learning_rate": 8.130775998485066e-06, "loss": 0.3207, "step": 8921 }, { "epoch": 0.3061770761839396, "grad_norm": 0.8153904031584208, "learning_rate": 8.130342670301075e-06, "loss": 0.3342, "step": 8922 }, { "epoch": 0.3062113932738504, "grad_norm": 0.7711289195829906, "learning_rate": 8.129909303445352e-06, "loss": 0.3622, "step": 8923 }, { "epoch": 0.30624571036376114, "grad_norm": 0.7225306386525056, "learning_rate": 8.129475897923246e-06, "loss": 0.2978, "step": 8924 }, { "epoch": 0.30628002745367194, "grad_norm": 0.6759884873647151, "learning_rate": 8.129042453740116e-06, "loss": 0.2767, "step": 8925 }, { "epoch": 0.3063143445435827, "grad_norm": 0.7489543581605991, "learning_rate": 8.128608970901315e-06, "loss": 0.3474, "step": 8926 }, { "epoch": 0.3063486616334935, "grad_norm": 0.8660279377930645, "learning_rate": 8.128175449412196e-06, "loss": 0.3055, "step": 8927 }, { "epoch": 0.30638297872340425, "grad_norm": 0.7776233607291282, "learning_rate": 8.127741889278118e-06, "loss": 0.2787, "step": 8928 }, { "epoch": 0.306417295813315, "grad_norm": 0.7498909395778138, "learning_rate": 8.127308290504435e-06, "loss": 0.2666, "step": 8929 }, { "epoch": 0.3064516129032258, "grad_norm": 0.8517873667427697, "learning_rate": 8.126874653096504e-06, "loss": 0.2927, "step": 8930 }, { "epoch": 0.30648592999313656, "grad_norm": 0.7872726694803874, "learning_rate": 8.12644097705968e-06, "loss": 0.3216, "step": 8931 }, { "epoch": 0.30652024708304737, "grad_norm": 0.8072730263611354, "learning_rate": 8.126007262399325e-06, "loss": 0.3227, "step": 8932 }, { "epoch": 0.3065545641729581, "grad_norm": 0.8813887998705533, "learning_rate": 8.125573509120795e-06, "loss": 0.3308, "step": 8933 }, { "epoch": 0.30658888126286893, "grad_norm": 0.8904419861648063, "learning_rate": 8.125139717229449e-06, "loss": 0.3287, "step": 8934 }, { "epoch": 0.3066231983527797, "grad_norm": 0.783316979065689, "learning_rate": 8.124705886730645e-06, "loss": 0.2985, "step": 8935 }, { "epoch": 0.3066575154426905, "grad_norm": 0.8249185269245368, "learning_rate": 8.12427201762974e-06, "loss": 0.2837, "step": 8936 }, { "epoch": 0.30669183253260124, "grad_norm": 0.7948927546955522, "learning_rate": 8.1238381099321e-06, "loss": 0.2939, "step": 8937 }, { "epoch": 0.306726149622512, "grad_norm": 0.9042705153333639, "learning_rate": 8.123404163643081e-06, "loss": 0.3289, "step": 8938 }, { "epoch": 0.3067604667124228, "grad_norm": 0.741606903150996, "learning_rate": 8.122970178768046e-06, "loss": 0.3002, "step": 8939 }, { "epoch": 0.30679478380233355, "grad_norm": 0.8912283691203585, "learning_rate": 8.122536155312355e-06, "loss": 0.3365, "step": 8940 }, { "epoch": 0.30682910089224436, "grad_norm": 0.80101684034306, "learning_rate": 8.12210209328137e-06, "loss": 0.3372, "step": 8941 }, { "epoch": 0.3068634179821551, "grad_norm": 0.7061880843973544, "learning_rate": 8.121667992680453e-06, "loss": 0.2704, "step": 8942 }, { "epoch": 0.3068977350720659, "grad_norm": 0.7500256747276093, "learning_rate": 8.12123385351497e-06, "loss": 0.3388, "step": 8943 }, { "epoch": 0.30693205216197667, "grad_norm": 0.7114858777742902, "learning_rate": 8.12079967579028e-06, "loss": 0.2751, "step": 8944 }, { "epoch": 0.3069663692518874, "grad_norm": 0.7388677850140014, "learning_rate": 8.120365459511747e-06, "loss": 0.3004, "step": 8945 }, { "epoch": 0.3070006863417982, "grad_norm": 0.8989659283552738, "learning_rate": 8.119931204684739e-06, "loss": 0.3029, "step": 8946 }, { "epoch": 0.307035003431709, "grad_norm": 0.8682959881156016, "learning_rate": 8.119496911314618e-06, "loss": 0.3006, "step": 8947 }, { "epoch": 0.3070693205216198, "grad_norm": 0.739281628379459, "learning_rate": 8.11906257940675e-06, "loss": 0.3222, "step": 8948 }, { "epoch": 0.30710363761153053, "grad_norm": 0.7017594244089086, "learning_rate": 8.118628208966498e-06, "loss": 0.3076, "step": 8949 }, { "epoch": 0.30713795470144134, "grad_norm": 0.8366903792298639, "learning_rate": 8.118193799999233e-06, "loss": 0.3145, "step": 8950 }, { "epoch": 0.3071722717913521, "grad_norm": 0.7674804712689121, "learning_rate": 8.117759352510317e-06, "loss": 0.3611, "step": 8951 }, { "epoch": 0.30720658888126284, "grad_norm": 0.8818065766964284, "learning_rate": 8.11732486650512e-06, "loss": 0.3414, "step": 8952 }, { "epoch": 0.30724090597117365, "grad_norm": 0.6690208612045107, "learning_rate": 8.11689034198901e-06, "loss": 0.2782, "step": 8953 }, { "epoch": 0.3072752230610844, "grad_norm": 0.7748255394818788, "learning_rate": 8.116455778967353e-06, "loss": 0.3562, "step": 8954 }, { "epoch": 0.3073095401509952, "grad_norm": 0.7991509632960916, "learning_rate": 8.116021177445517e-06, "loss": 0.2675, "step": 8955 }, { "epoch": 0.30734385724090596, "grad_norm": 0.8183888714000002, "learning_rate": 8.115586537428875e-06, "loss": 0.3032, "step": 8956 }, { "epoch": 0.30737817433081677, "grad_norm": 0.8305089630440758, "learning_rate": 8.115151858922792e-06, "loss": 0.3316, "step": 8957 }, { "epoch": 0.3074124914207275, "grad_norm": 0.7685250339733859, "learning_rate": 8.114717141932639e-06, "loss": 0.3367, "step": 8958 }, { "epoch": 0.3074468085106383, "grad_norm": 0.7581560911657554, "learning_rate": 8.114282386463788e-06, "loss": 0.2875, "step": 8959 }, { "epoch": 0.3074811256005491, "grad_norm": 0.7796872604909351, "learning_rate": 8.113847592521609e-06, "loss": 0.3003, "step": 8960 }, { "epoch": 0.30751544269045983, "grad_norm": 0.8234901060186816, "learning_rate": 8.113412760111474e-06, "loss": 0.3206, "step": 8961 }, { "epoch": 0.30754975978037064, "grad_norm": 0.8221617643282534, "learning_rate": 8.112977889238753e-06, "loss": 0.3123, "step": 8962 }, { "epoch": 0.3075840768702814, "grad_norm": 0.7626081906858746, "learning_rate": 8.112542979908819e-06, "loss": 0.2794, "step": 8963 }, { "epoch": 0.3076183939601922, "grad_norm": 0.7630942963901984, "learning_rate": 8.112108032127047e-06, "loss": 0.3519, "step": 8964 }, { "epoch": 0.30765271105010294, "grad_norm": 0.8202508976676662, "learning_rate": 8.11167304589881e-06, "loss": 0.3113, "step": 8965 }, { "epoch": 0.30768702814001375, "grad_norm": 0.8352719455120948, "learning_rate": 8.111238021229477e-06, "loss": 0.3622, "step": 8966 }, { "epoch": 0.3077213452299245, "grad_norm": 0.7726377914629278, "learning_rate": 8.110802958124428e-06, "loss": 0.3097, "step": 8967 }, { "epoch": 0.30775566231983525, "grad_norm": 0.7990474521792094, "learning_rate": 8.110367856589035e-06, "loss": 0.3016, "step": 8968 }, { "epoch": 0.30778997940974606, "grad_norm": 0.7645221509976626, "learning_rate": 8.109932716628672e-06, "loss": 0.3255, "step": 8969 }, { "epoch": 0.3078242964996568, "grad_norm": 0.7702368273571367, "learning_rate": 8.109497538248717e-06, "loss": 0.3112, "step": 8970 }, { "epoch": 0.3078586135895676, "grad_norm": 0.7637744042459319, "learning_rate": 8.109062321454544e-06, "loss": 0.3574, "step": 8971 }, { "epoch": 0.30789293067947837, "grad_norm": 0.7661749450855264, "learning_rate": 8.108627066251534e-06, "loss": 0.302, "step": 8972 }, { "epoch": 0.3079272477693892, "grad_norm": 0.6738660385584202, "learning_rate": 8.108191772645057e-06, "loss": 0.2532, "step": 8973 }, { "epoch": 0.30796156485929993, "grad_norm": 0.7331693611505868, "learning_rate": 8.107756440640497e-06, "loss": 0.2936, "step": 8974 }, { "epoch": 0.3079958819492107, "grad_norm": 0.8367766451935702, "learning_rate": 8.10732107024323e-06, "loss": 0.2825, "step": 8975 }, { "epoch": 0.3080301990391215, "grad_norm": 0.8666978747064623, "learning_rate": 8.106885661458633e-06, "loss": 0.4222, "step": 8976 }, { "epoch": 0.30806451612903224, "grad_norm": 0.8771233661279326, "learning_rate": 8.106450214292084e-06, "loss": 0.3033, "step": 8977 }, { "epoch": 0.30809883321894305, "grad_norm": 0.7330067254276265, "learning_rate": 8.106014728748965e-06, "loss": 0.2991, "step": 8978 }, { "epoch": 0.3081331503088538, "grad_norm": 0.7622608636140149, "learning_rate": 8.105579204834659e-06, "loss": 0.3112, "step": 8979 }, { "epoch": 0.3081674673987646, "grad_norm": 0.9456354770938769, "learning_rate": 8.105143642554537e-06, "loss": 0.3099, "step": 8980 }, { "epoch": 0.30820178448867536, "grad_norm": 0.8001108662901915, "learning_rate": 8.10470804191399e-06, "loss": 0.3379, "step": 8981 }, { "epoch": 0.30823610157858616, "grad_norm": 0.767673796720805, "learning_rate": 8.104272402918394e-06, "loss": 0.3538, "step": 8982 }, { "epoch": 0.3082704186684969, "grad_norm": 0.7824993888544328, "learning_rate": 8.10383672557313e-06, "loss": 0.3291, "step": 8983 }, { "epoch": 0.30830473575840767, "grad_norm": 0.7824891631621557, "learning_rate": 8.103401009883584e-06, "loss": 0.3052, "step": 8984 }, { "epoch": 0.3083390528483185, "grad_norm": 0.7006177518839737, "learning_rate": 8.102965255855137e-06, "loss": 0.3084, "step": 8985 }, { "epoch": 0.3083733699382292, "grad_norm": 0.8008759882936294, "learning_rate": 8.102529463493171e-06, "loss": 0.3177, "step": 8986 }, { "epoch": 0.30840768702814003, "grad_norm": 0.7264845975480146, "learning_rate": 8.102093632803069e-06, "loss": 0.2966, "step": 8987 }, { "epoch": 0.3084420041180508, "grad_norm": 0.8226264852991807, "learning_rate": 8.10165776379022e-06, "loss": 0.2957, "step": 8988 }, { "epoch": 0.3084763212079616, "grad_norm": 0.7967525384229082, "learning_rate": 8.101221856460004e-06, "loss": 0.281, "step": 8989 }, { "epoch": 0.30851063829787234, "grad_norm": 0.7432806635839748, "learning_rate": 8.100785910817807e-06, "loss": 0.3264, "step": 8990 }, { "epoch": 0.3085449553877831, "grad_norm": 0.7319462494901723, "learning_rate": 8.100349926869016e-06, "loss": 0.3025, "step": 8991 }, { "epoch": 0.3085792724776939, "grad_norm": 0.8022397052253687, "learning_rate": 8.099913904619017e-06, "loss": 0.3061, "step": 8992 }, { "epoch": 0.30861358956760465, "grad_norm": 0.8157924103247478, "learning_rate": 8.099477844073194e-06, "loss": 0.2738, "step": 8993 }, { "epoch": 0.30864790665751546, "grad_norm": 0.7649398440821802, "learning_rate": 8.099041745236936e-06, "loss": 0.3216, "step": 8994 }, { "epoch": 0.3086822237474262, "grad_norm": 1.0615807048056218, "learning_rate": 8.098605608115633e-06, "loss": 0.3124, "step": 8995 }, { "epoch": 0.308716540837337, "grad_norm": 0.8291271032676366, "learning_rate": 8.098169432714667e-06, "loss": 0.3375, "step": 8996 }, { "epoch": 0.30875085792724777, "grad_norm": 0.8326010453007345, "learning_rate": 8.097733219039433e-06, "loss": 0.3975, "step": 8997 }, { "epoch": 0.3087851750171585, "grad_norm": 0.7177898983156954, "learning_rate": 8.097296967095314e-06, "loss": 0.2945, "step": 8998 }, { "epoch": 0.3088194921070693, "grad_norm": 0.7735029496096362, "learning_rate": 8.096860676887704e-06, "loss": 0.3259, "step": 8999 }, { "epoch": 0.3088538091969801, "grad_norm": 0.72842172109312, "learning_rate": 8.096424348421988e-06, "loss": 0.2852, "step": 9000 }, { "epoch": 0.3088881262868909, "grad_norm": 0.705581155469501, "learning_rate": 8.095987981703562e-06, "loss": 0.3311, "step": 9001 }, { "epoch": 0.30892244337680164, "grad_norm": 0.7584747594806325, "learning_rate": 8.095551576737814e-06, "loss": 0.267, "step": 9002 }, { "epoch": 0.30895676046671244, "grad_norm": 0.8358041665886967, "learning_rate": 8.095115133530134e-06, "loss": 0.2965, "step": 9003 }, { "epoch": 0.3089910775566232, "grad_norm": 0.8041730232207442, "learning_rate": 8.094678652085916e-06, "loss": 0.358, "step": 9004 }, { "epoch": 0.30902539464653395, "grad_norm": 0.7120031483205694, "learning_rate": 8.094242132410551e-06, "loss": 0.3524, "step": 9005 }, { "epoch": 0.30905971173644475, "grad_norm": 0.7459457563566195, "learning_rate": 8.093805574509433e-06, "loss": 0.3124, "step": 9006 }, { "epoch": 0.3090940288263555, "grad_norm": 0.8309815620377239, "learning_rate": 8.093368978387952e-06, "loss": 0.3232, "step": 9007 }, { "epoch": 0.3091283459162663, "grad_norm": 0.8403699062779718, "learning_rate": 8.092932344051504e-06, "loss": 0.2838, "step": 9008 }, { "epoch": 0.30916266300617706, "grad_norm": 0.8247284254723842, "learning_rate": 8.092495671505485e-06, "loss": 0.2992, "step": 9009 }, { "epoch": 0.30919698009608787, "grad_norm": 0.695065414118951, "learning_rate": 8.092058960755288e-06, "loss": 0.3404, "step": 9010 }, { "epoch": 0.3092312971859986, "grad_norm": 0.803058537510381, "learning_rate": 8.091622211806305e-06, "loss": 0.3498, "step": 9011 }, { "epoch": 0.30926561427590943, "grad_norm": 0.8468664824361379, "learning_rate": 8.091185424663934e-06, "loss": 0.3019, "step": 9012 }, { "epoch": 0.3092999313658202, "grad_norm": 0.7324527459627828, "learning_rate": 8.090748599333573e-06, "loss": 0.2503, "step": 9013 }, { "epoch": 0.30933424845573093, "grad_norm": 0.8396961994772616, "learning_rate": 8.090311735820616e-06, "loss": 0.331, "step": 9014 }, { "epoch": 0.30936856554564174, "grad_norm": 0.8221754180859796, "learning_rate": 8.08987483413046e-06, "loss": 0.3573, "step": 9015 }, { "epoch": 0.3094028826355525, "grad_norm": 0.7144791549522236, "learning_rate": 8.089437894268502e-06, "loss": 0.2831, "step": 9016 }, { "epoch": 0.3094371997254633, "grad_norm": 0.732344152921696, "learning_rate": 8.089000916240142e-06, "loss": 0.3022, "step": 9017 }, { "epoch": 0.30947151681537405, "grad_norm": 0.8822535043109385, "learning_rate": 8.088563900050777e-06, "loss": 0.3739, "step": 9018 }, { "epoch": 0.30950583390528486, "grad_norm": 0.7494628470421361, "learning_rate": 8.088126845705803e-06, "loss": 0.3285, "step": 9019 }, { "epoch": 0.3095401509951956, "grad_norm": 0.7748333375808465, "learning_rate": 8.087689753210625e-06, "loss": 0.3113, "step": 9020 }, { "epoch": 0.30957446808510636, "grad_norm": 0.8402741764994921, "learning_rate": 8.087252622570641e-06, "loss": 0.3045, "step": 9021 }, { "epoch": 0.30960878517501716, "grad_norm": 0.7410453700699821, "learning_rate": 8.086815453791248e-06, "loss": 0.3149, "step": 9022 }, { "epoch": 0.3096431022649279, "grad_norm": 0.7737801116185254, "learning_rate": 8.086378246877852e-06, "loss": 0.2467, "step": 9023 }, { "epoch": 0.3096774193548387, "grad_norm": 0.748969926856388, "learning_rate": 8.085941001835848e-06, "loss": 0.2989, "step": 9024 }, { "epoch": 0.3097117364447495, "grad_norm": 0.8030745138957694, "learning_rate": 8.085503718670643e-06, "loss": 0.3418, "step": 9025 }, { "epoch": 0.3097460535346603, "grad_norm": 0.7588551344237857, "learning_rate": 8.085066397387635e-06, "loss": 0.2973, "step": 9026 }, { "epoch": 0.30978037062457103, "grad_norm": 0.7939239966671191, "learning_rate": 8.08462903799223e-06, "loss": 0.3631, "step": 9027 }, { "epoch": 0.3098146877144818, "grad_norm": 0.9252255734587829, "learning_rate": 8.08419164048983e-06, "loss": 0.3215, "step": 9028 }, { "epoch": 0.3098490048043926, "grad_norm": 0.7729096269466728, "learning_rate": 8.083754204885835e-06, "loss": 0.3058, "step": 9029 }, { "epoch": 0.30988332189430334, "grad_norm": 0.7744935189836208, "learning_rate": 8.083316731185655e-06, "loss": 0.3497, "step": 9030 }, { "epoch": 0.30991763898421415, "grad_norm": 0.8547152303842425, "learning_rate": 8.08287921939469e-06, "loss": 0.357, "step": 9031 }, { "epoch": 0.3099519560741249, "grad_norm": 0.9789710199268081, "learning_rate": 8.082441669518348e-06, "loss": 0.3186, "step": 9032 }, { "epoch": 0.3099862731640357, "grad_norm": 0.7713820212156016, "learning_rate": 8.082004081562032e-06, "loss": 0.3385, "step": 9033 }, { "epoch": 0.31002059025394646, "grad_norm": 0.7158132544695932, "learning_rate": 8.081566455531149e-06, "loss": 0.3027, "step": 9034 }, { "epoch": 0.31005490734385727, "grad_norm": 0.8004741588031105, "learning_rate": 8.081128791431105e-06, "loss": 0.3494, "step": 9035 }, { "epoch": 0.310089224433768, "grad_norm": 0.7487872569156908, "learning_rate": 8.080691089267307e-06, "loss": 0.311, "step": 9036 }, { "epoch": 0.31012354152367877, "grad_norm": 0.9749416258791179, "learning_rate": 8.080253349045163e-06, "loss": 0.3082, "step": 9037 }, { "epoch": 0.3101578586135896, "grad_norm": 0.7338157642574218, "learning_rate": 8.07981557077008e-06, "loss": 0.3289, "step": 9038 }, { "epoch": 0.31019217570350033, "grad_norm": 0.804261972057605, "learning_rate": 8.079377754447467e-06, "loss": 0.356, "step": 9039 }, { "epoch": 0.31022649279341113, "grad_norm": 0.8020709279080066, "learning_rate": 8.078939900082729e-06, "loss": 0.3107, "step": 9040 }, { "epoch": 0.3102608098833219, "grad_norm": 0.689265348528194, "learning_rate": 8.078502007681282e-06, "loss": 0.2932, "step": 9041 }, { "epoch": 0.3102951269732327, "grad_norm": 0.7666332142394855, "learning_rate": 8.078064077248529e-06, "loss": 0.3068, "step": 9042 }, { "epoch": 0.31032944406314344, "grad_norm": 0.7494312044476305, "learning_rate": 8.077626108789884e-06, "loss": 0.2695, "step": 9043 }, { "epoch": 0.3103637611530542, "grad_norm": 0.8712705491012749, "learning_rate": 8.077188102310757e-06, "loss": 0.3078, "step": 9044 }, { "epoch": 0.310398078242965, "grad_norm": 0.8403259396645458, "learning_rate": 8.076750057816558e-06, "loss": 0.3187, "step": 9045 }, { "epoch": 0.31043239533287575, "grad_norm": 0.7833247759726717, "learning_rate": 8.0763119753127e-06, "loss": 0.3209, "step": 9046 }, { "epoch": 0.31046671242278656, "grad_norm": 0.8132188167279462, "learning_rate": 8.075873854804593e-06, "loss": 0.3345, "step": 9047 }, { "epoch": 0.3105010295126973, "grad_norm": 0.7827241965563063, "learning_rate": 8.075435696297652e-06, "loss": 0.3417, "step": 9048 }, { "epoch": 0.3105353466026081, "grad_norm": 0.7969067729974278, "learning_rate": 8.074997499797288e-06, "loss": 0.265, "step": 9049 }, { "epoch": 0.31056966369251887, "grad_norm": 0.8843926112761741, "learning_rate": 8.074559265308915e-06, "loss": 0.2806, "step": 9050 }, { "epoch": 0.3106039807824296, "grad_norm": 0.8227401767303413, "learning_rate": 8.074120992837945e-06, "loss": 0.3892, "step": 9051 }, { "epoch": 0.31063829787234043, "grad_norm": 0.8008673949385279, "learning_rate": 8.073682682389798e-06, "loss": 0.3052, "step": 9052 }, { "epoch": 0.3106726149622512, "grad_norm": 0.8127403071600019, "learning_rate": 8.073244333969883e-06, "loss": 0.4086, "step": 9053 }, { "epoch": 0.310706932052162, "grad_norm": 0.788038382239105, "learning_rate": 8.072805947583617e-06, "loss": 0.325, "step": 9054 }, { "epoch": 0.31074124914207274, "grad_norm": 0.738579521782931, "learning_rate": 8.072367523236418e-06, "loss": 0.2955, "step": 9055 }, { "epoch": 0.31077556623198355, "grad_norm": 0.7538487715382512, "learning_rate": 8.0719290609337e-06, "loss": 0.3729, "step": 9056 }, { "epoch": 0.3108098833218943, "grad_norm": 0.8438160957526047, "learning_rate": 8.071490560680877e-06, "loss": 0.3352, "step": 9057 }, { "epoch": 0.3108442004118051, "grad_norm": 0.7516957648913889, "learning_rate": 8.07105202248337e-06, "loss": 0.3415, "step": 9058 }, { "epoch": 0.31087851750171586, "grad_norm": 0.7694014127122843, "learning_rate": 8.070613446346598e-06, "loss": 0.33, "step": 9059 }, { "epoch": 0.3109128345916266, "grad_norm": 0.7974924869044532, "learning_rate": 8.070174832275975e-06, "loss": 0.2901, "step": 9060 }, { "epoch": 0.3109471516815374, "grad_norm": 0.7851096355091718, "learning_rate": 8.069736180276921e-06, "loss": 0.3171, "step": 9061 }, { "epoch": 0.31098146877144817, "grad_norm": 0.7666433806996651, "learning_rate": 8.069297490354857e-06, "loss": 0.3315, "step": 9062 }, { "epoch": 0.311015785861359, "grad_norm": 0.7745690932461156, "learning_rate": 8.068858762515199e-06, "loss": 0.33, "step": 9063 }, { "epoch": 0.3110501029512697, "grad_norm": 0.737726812576157, "learning_rate": 8.06841999676337e-06, "loss": 0.3504, "step": 9064 }, { "epoch": 0.31108442004118053, "grad_norm": 0.7359706851580124, "learning_rate": 8.06798119310479e-06, "loss": 0.2912, "step": 9065 }, { "epoch": 0.3111187371310913, "grad_norm": 0.8176051859013485, "learning_rate": 8.067542351544879e-06, "loss": 0.3463, "step": 9066 }, { "epoch": 0.31115305422100203, "grad_norm": 0.7436091624285903, "learning_rate": 8.067103472089059e-06, "loss": 0.3242, "step": 9067 }, { "epoch": 0.31118737131091284, "grad_norm": 0.7798444971256637, "learning_rate": 8.066664554742751e-06, "loss": 0.2849, "step": 9068 }, { "epoch": 0.3112216884008236, "grad_norm": 0.8055296173717116, "learning_rate": 8.066225599511378e-06, "loss": 0.3094, "step": 9069 }, { "epoch": 0.3112560054907344, "grad_norm": 0.7846109207600784, "learning_rate": 8.065786606400364e-06, "loss": 0.3538, "step": 9070 }, { "epoch": 0.31129032258064515, "grad_norm": 0.8125541308300915, "learning_rate": 8.065347575415128e-06, "loss": 0.2951, "step": 9071 }, { "epoch": 0.31132463967055596, "grad_norm": 0.7534004550728522, "learning_rate": 8.064908506561098e-06, "loss": 0.3295, "step": 9072 }, { "epoch": 0.3113589567604667, "grad_norm": 0.7224908273277901, "learning_rate": 8.064469399843696e-06, "loss": 0.2788, "step": 9073 }, { "epoch": 0.31139327385037746, "grad_norm": 0.8468725255494136, "learning_rate": 8.064030255268349e-06, "loss": 0.2643, "step": 9074 }, { "epoch": 0.31142759094028827, "grad_norm": 0.7726152828896599, "learning_rate": 8.06359107284048e-06, "loss": 0.2918, "step": 9075 }, { "epoch": 0.311461908030199, "grad_norm": 0.7299286653992435, "learning_rate": 8.063151852565514e-06, "loss": 0.2705, "step": 9076 }, { "epoch": 0.3114962251201098, "grad_norm": 0.7949056942405764, "learning_rate": 8.062712594448879e-06, "loss": 0.3075, "step": 9077 }, { "epoch": 0.3115305422100206, "grad_norm": 1.018754182367444, "learning_rate": 8.062273298496e-06, "loss": 0.3457, "step": 9078 }, { "epoch": 0.3115648592999314, "grad_norm": 0.7501084041085607, "learning_rate": 8.061833964712305e-06, "loss": 0.2883, "step": 9079 }, { "epoch": 0.31159917638984214, "grad_norm": 0.7619665812001072, "learning_rate": 8.06139459310322e-06, "loss": 0.3193, "step": 9080 }, { "epoch": 0.31163349347975294, "grad_norm": 0.7671836779949933, "learning_rate": 8.060955183674177e-06, "loss": 0.3283, "step": 9081 }, { "epoch": 0.3116678105696637, "grad_norm": 0.7269163488779512, "learning_rate": 8.060515736430598e-06, "loss": 0.3512, "step": 9082 }, { "epoch": 0.31170212765957445, "grad_norm": 0.8052827218773837, "learning_rate": 8.060076251377917e-06, "loss": 0.3403, "step": 9083 }, { "epoch": 0.31173644474948525, "grad_norm": 0.7617371488887105, "learning_rate": 8.059636728521562e-06, "loss": 0.3036, "step": 9084 }, { "epoch": 0.311770761839396, "grad_norm": 0.8608710726106076, "learning_rate": 8.059197167866962e-06, "loss": 0.3622, "step": 9085 }, { "epoch": 0.3118050789293068, "grad_norm": 0.8029325255806684, "learning_rate": 8.058757569419548e-06, "loss": 0.3187, "step": 9086 }, { "epoch": 0.31183939601921756, "grad_norm": 0.7537041320352083, "learning_rate": 8.05831793318475e-06, "loss": 0.3341, "step": 9087 }, { "epoch": 0.31187371310912837, "grad_norm": 0.8438980929068497, "learning_rate": 8.057878259168e-06, "loss": 0.2773, "step": 9088 }, { "epoch": 0.3119080301990391, "grad_norm": 0.6879439273903631, "learning_rate": 8.057438547374728e-06, "loss": 0.2955, "step": 9089 }, { "epoch": 0.3119423472889499, "grad_norm": 0.8307720046320578, "learning_rate": 8.056998797810367e-06, "loss": 0.3323, "step": 9090 }, { "epoch": 0.3119766643788607, "grad_norm": 0.7467801186924682, "learning_rate": 8.056559010480352e-06, "loss": 0.2994, "step": 9091 }, { "epoch": 0.31201098146877143, "grad_norm": 0.8347743699510519, "learning_rate": 8.056119185390114e-06, "loss": 0.2882, "step": 9092 }, { "epoch": 0.31204529855868224, "grad_norm": 0.7684104526048545, "learning_rate": 8.055679322545085e-06, "loss": 0.2833, "step": 9093 }, { "epoch": 0.312079615648593, "grad_norm": 0.9407094896700118, "learning_rate": 8.055239421950703e-06, "loss": 0.3948, "step": 9094 }, { "epoch": 0.3121139327385038, "grad_norm": 0.7036659553453446, "learning_rate": 8.054799483612396e-06, "loss": 0.3077, "step": 9095 }, { "epoch": 0.31214824982841455, "grad_norm": 0.7718282652271448, "learning_rate": 8.054359507535606e-06, "loss": 0.3056, "step": 9096 }, { "epoch": 0.3121825669183253, "grad_norm": 0.8025719469720479, "learning_rate": 8.053919493725765e-06, "loss": 0.2893, "step": 9097 }, { "epoch": 0.3122168840082361, "grad_norm": 0.7876621725736258, "learning_rate": 8.05347944218831e-06, "loss": 0.3228, "step": 9098 }, { "epoch": 0.31225120109814686, "grad_norm": 0.7882404300485478, "learning_rate": 8.053039352928674e-06, "loss": 0.359, "step": 9099 }, { "epoch": 0.31228551818805766, "grad_norm": 0.8180668463816715, "learning_rate": 8.052599225952299e-06, "loss": 0.3691, "step": 9100 }, { "epoch": 0.3123198352779684, "grad_norm": 0.7815888791644346, "learning_rate": 8.052159061264618e-06, "loss": 0.2795, "step": 9101 }, { "epoch": 0.3123541523678792, "grad_norm": 0.7411365337171084, "learning_rate": 8.05171885887107e-06, "loss": 0.3051, "step": 9102 }, { "epoch": 0.31238846945779, "grad_norm": 0.8242918843760074, "learning_rate": 8.051278618777092e-06, "loss": 0.3048, "step": 9103 }, { "epoch": 0.3124227865477008, "grad_norm": 0.7292750888872267, "learning_rate": 8.050838340988127e-06, "loss": 0.3068, "step": 9104 }, { "epoch": 0.31245710363761153, "grad_norm": 0.828296909089647, "learning_rate": 8.05039802550961e-06, "loss": 0.3081, "step": 9105 }, { "epoch": 0.3124914207275223, "grad_norm": 0.7385558340119732, "learning_rate": 8.049957672346983e-06, "loss": 0.3063, "step": 9106 }, { "epoch": 0.3125257378174331, "grad_norm": 0.7400558340131722, "learning_rate": 8.049517281505684e-06, "loss": 0.3273, "step": 9107 }, { "epoch": 0.31256005490734384, "grad_norm": 0.7336842797985093, "learning_rate": 8.049076852991153e-06, "loss": 0.305, "step": 9108 }, { "epoch": 0.31259437199725465, "grad_norm": 0.7353054588016832, "learning_rate": 8.048636386808835e-06, "loss": 0.3061, "step": 9109 }, { "epoch": 0.3126286890871654, "grad_norm": 0.7832461909925039, "learning_rate": 8.048195882964168e-06, "loss": 0.2865, "step": 9110 }, { "epoch": 0.3126630061770762, "grad_norm": 0.7604745569259419, "learning_rate": 8.047755341462593e-06, "loss": 0.3427, "step": 9111 }, { "epoch": 0.31269732326698696, "grad_norm": 0.7751323406709751, "learning_rate": 8.047314762309554e-06, "loss": 0.2513, "step": 9112 }, { "epoch": 0.3127316403568977, "grad_norm": 0.6605891612064129, "learning_rate": 8.046874145510495e-06, "loss": 0.3186, "step": 9113 }, { "epoch": 0.3127659574468085, "grad_norm": 0.8420668222171092, "learning_rate": 8.04643349107086e-06, "loss": 0.2638, "step": 9114 }, { "epoch": 0.31280027453671927, "grad_norm": 0.8470153136862475, "learning_rate": 8.045992798996088e-06, "loss": 0.3867, "step": 9115 }, { "epoch": 0.3128345916266301, "grad_norm": 0.7558775682476657, "learning_rate": 8.045552069291629e-06, "loss": 0.3035, "step": 9116 }, { "epoch": 0.31286890871654083, "grad_norm": 0.7223047057789062, "learning_rate": 8.045111301962923e-06, "loss": 0.2843, "step": 9117 }, { "epoch": 0.31290322580645163, "grad_norm": 0.8645636941573511, "learning_rate": 8.044670497015417e-06, "loss": 0.3817, "step": 9118 }, { "epoch": 0.3129375428963624, "grad_norm": 0.8124702504259818, "learning_rate": 8.044229654454558e-06, "loss": 0.2754, "step": 9119 }, { "epoch": 0.31297185998627314, "grad_norm": 0.7969765645494045, "learning_rate": 8.043788774285792e-06, "loss": 0.3568, "step": 9120 }, { "epoch": 0.31300617707618394, "grad_norm": 0.8287148161284514, "learning_rate": 8.043347856514561e-06, "loss": 0.3816, "step": 9121 }, { "epoch": 0.3130404941660947, "grad_norm": 0.689889222714732, "learning_rate": 8.042906901146317e-06, "loss": 0.2198, "step": 9122 }, { "epoch": 0.3130748112560055, "grad_norm": 0.8261209967983839, "learning_rate": 8.042465908186507e-06, "loss": 0.3295, "step": 9123 }, { "epoch": 0.31310912834591625, "grad_norm": 0.7521912383330654, "learning_rate": 8.042024877640577e-06, "loss": 0.2208, "step": 9124 }, { "epoch": 0.31314344543582706, "grad_norm": 0.8008503772316846, "learning_rate": 8.041583809513975e-06, "loss": 0.3281, "step": 9125 }, { "epoch": 0.3131777625257378, "grad_norm": 0.7716842499241664, "learning_rate": 8.041142703812154e-06, "loss": 0.2879, "step": 9126 }, { "epoch": 0.3132120796156486, "grad_norm": 1.1363356396288578, "learning_rate": 8.040701560540558e-06, "loss": 0.3381, "step": 9127 }, { "epoch": 0.31324639670555937, "grad_norm": 0.7736948086201638, "learning_rate": 8.04026037970464e-06, "loss": 0.2984, "step": 9128 }, { "epoch": 0.3132807137954701, "grad_norm": 0.8281038617849223, "learning_rate": 8.039819161309851e-06, "loss": 0.3044, "step": 9129 }, { "epoch": 0.31331503088538093, "grad_norm": 0.7287278901746901, "learning_rate": 8.039377905361638e-06, "loss": 0.3309, "step": 9130 }, { "epoch": 0.3133493479752917, "grad_norm": 0.682009644581405, "learning_rate": 8.038936611865456e-06, "loss": 0.316, "step": 9131 }, { "epoch": 0.3133836650652025, "grad_norm": 0.7802740126509534, "learning_rate": 8.038495280826756e-06, "loss": 0.2671, "step": 9132 }, { "epoch": 0.31341798215511324, "grad_norm": 0.7393924782694644, "learning_rate": 8.038053912250988e-06, "loss": 0.3164, "step": 9133 }, { "epoch": 0.31345229924502405, "grad_norm": 0.8092136267496531, "learning_rate": 8.037612506143606e-06, "loss": 0.3415, "step": 9134 }, { "epoch": 0.3134866163349348, "grad_norm": 0.8077709662955408, "learning_rate": 8.037171062510063e-06, "loss": 0.3809, "step": 9135 }, { "epoch": 0.31352093342484555, "grad_norm": 0.827899827992704, "learning_rate": 8.036729581355813e-06, "loss": 0.3182, "step": 9136 }, { "epoch": 0.31355525051475636, "grad_norm": 0.719505921783008, "learning_rate": 8.03628806268631e-06, "loss": 0.3548, "step": 9137 }, { "epoch": 0.3135895676046671, "grad_norm": 0.7039822327190711, "learning_rate": 8.035846506507006e-06, "loss": 0.3253, "step": 9138 }, { "epoch": 0.3136238846945779, "grad_norm": 0.8751880429299866, "learning_rate": 8.035404912823362e-06, "loss": 0.2788, "step": 9139 }, { "epoch": 0.31365820178448867, "grad_norm": 0.8178866917936072, "learning_rate": 8.034963281640824e-06, "loss": 0.313, "step": 9140 }, { "epoch": 0.3136925188743995, "grad_norm": 0.8231037197700412, "learning_rate": 8.034521612964857e-06, "loss": 0.3676, "step": 9141 }, { "epoch": 0.3137268359643102, "grad_norm": 0.9595967502536739, "learning_rate": 8.03407990680091e-06, "loss": 0.3376, "step": 9142 }, { "epoch": 0.313761153054221, "grad_norm": 0.7660146179028297, "learning_rate": 8.033638163154447e-06, "loss": 0.316, "step": 9143 }, { "epoch": 0.3137954701441318, "grad_norm": 0.7486696892452177, "learning_rate": 8.033196382030918e-06, "loss": 0.2574, "step": 9144 }, { "epoch": 0.31382978723404253, "grad_norm": 0.8046584969669239, "learning_rate": 8.032754563435786e-06, "loss": 0.2836, "step": 9145 }, { "epoch": 0.31386410432395334, "grad_norm": 0.7574252848789237, "learning_rate": 8.032312707374506e-06, "loss": 0.3545, "step": 9146 }, { "epoch": 0.3138984214138641, "grad_norm": 0.7672450410280094, "learning_rate": 8.031870813852539e-06, "loss": 0.312, "step": 9147 }, { "epoch": 0.3139327385037749, "grad_norm": 0.9519499356577442, "learning_rate": 8.031428882875344e-06, "loss": 0.3203, "step": 9148 }, { "epoch": 0.31396705559368565, "grad_norm": 0.7506849177206952, "learning_rate": 8.030986914448377e-06, "loss": 0.2828, "step": 9149 }, { "epoch": 0.31400137268359646, "grad_norm": 0.717027381800665, "learning_rate": 8.0305449085771e-06, "loss": 0.3858, "step": 9150 }, { "epoch": 0.3140356897735072, "grad_norm": 0.7040389620064293, "learning_rate": 8.030102865266975e-06, "loss": 0.3099, "step": 9151 }, { "epoch": 0.31407000686341796, "grad_norm": 0.8582768791690645, "learning_rate": 8.029660784523463e-06, "loss": 0.3491, "step": 9152 }, { "epoch": 0.31410432395332877, "grad_norm": 0.8324815271584758, "learning_rate": 8.029218666352023e-06, "loss": 0.3113, "step": 9153 }, { "epoch": 0.3141386410432395, "grad_norm": 0.7768491658845943, "learning_rate": 8.028776510758119e-06, "loss": 0.3115, "step": 9154 }, { "epoch": 0.3141729581331503, "grad_norm": 0.8392500518523636, "learning_rate": 8.028334317747212e-06, "loss": 0.3444, "step": 9155 }, { "epoch": 0.3142072752230611, "grad_norm": 0.741155381399484, "learning_rate": 8.027892087324765e-06, "loss": 0.3107, "step": 9156 }, { "epoch": 0.3142415923129719, "grad_norm": 0.8274385464773254, "learning_rate": 8.027449819496241e-06, "loss": 0.3465, "step": 9157 }, { "epoch": 0.31427590940288264, "grad_norm": 0.8203984473678222, "learning_rate": 8.027007514267105e-06, "loss": 0.2694, "step": 9158 }, { "epoch": 0.3143102264927934, "grad_norm": 1.3030794151697411, "learning_rate": 8.026565171642819e-06, "loss": 0.3624, "step": 9159 }, { "epoch": 0.3143445435827042, "grad_norm": 0.8497038695610051, "learning_rate": 8.02612279162885e-06, "loss": 0.3385, "step": 9160 }, { "epoch": 0.31437886067261495, "grad_norm": 0.6768662000197094, "learning_rate": 8.025680374230663e-06, "loss": 0.3455, "step": 9161 }, { "epoch": 0.31441317776252575, "grad_norm": 0.7767968786297377, "learning_rate": 8.02523791945372e-06, "loss": 0.3061, "step": 9162 }, { "epoch": 0.3144474948524365, "grad_norm": 0.7533539225727638, "learning_rate": 8.024795427303491e-06, "loss": 0.3059, "step": 9163 }, { "epoch": 0.3144818119423473, "grad_norm": 0.7788658166861072, "learning_rate": 8.024352897785443e-06, "loss": 0.324, "step": 9164 }, { "epoch": 0.31451612903225806, "grad_norm": 0.8472720719177402, "learning_rate": 8.023910330905037e-06, "loss": 0.3155, "step": 9165 }, { "epoch": 0.3145504461221688, "grad_norm": 0.7353602391877121, "learning_rate": 8.023467726667748e-06, "loss": 0.3137, "step": 9166 }, { "epoch": 0.3145847632120796, "grad_norm": 0.7452015059916568, "learning_rate": 8.02302508507904e-06, "loss": 0.2627, "step": 9167 }, { "epoch": 0.3146190803019904, "grad_norm": 0.8506686741231362, "learning_rate": 8.02258240614438e-06, "loss": 0.3319, "step": 9168 }, { "epoch": 0.3146533973919012, "grad_norm": 0.7749889936251273, "learning_rate": 8.022139689869239e-06, "loss": 0.314, "step": 9169 }, { "epoch": 0.31468771448181193, "grad_norm": 0.7617657401832888, "learning_rate": 8.021696936259085e-06, "loss": 0.2999, "step": 9170 }, { "epoch": 0.31472203157172274, "grad_norm": 0.8892278125885599, "learning_rate": 8.021254145319388e-06, "loss": 0.3329, "step": 9171 }, { "epoch": 0.3147563486616335, "grad_norm": 0.7513123667290882, "learning_rate": 8.02081131705562e-06, "loss": 0.2992, "step": 9172 }, { "epoch": 0.3147906657515443, "grad_norm": 0.764949971235456, "learning_rate": 8.020368451473248e-06, "loss": 0.3053, "step": 9173 }, { "epoch": 0.31482498284145505, "grad_norm": 0.7565298061246074, "learning_rate": 8.019925548577747e-06, "loss": 0.297, "step": 9174 }, { "epoch": 0.3148592999313658, "grad_norm": 0.6818764834866355, "learning_rate": 8.019482608374586e-06, "loss": 0.2863, "step": 9175 }, { "epoch": 0.3148936170212766, "grad_norm": 0.7474350940697634, "learning_rate": 8.019039630869239e-06, "loss": 0.3164, "step": 9176 }, { "epoch": 0.31492793411118736, "grad_norm": 0.7033427052593677, "learning_rate": 8.018596616067175e-06, "loss": 0.2774, "step": 9177 }, { "epoch": 0.31496225120109816, "grad_norm": 0.809158454904068, "learning_rate": 8.018153563973871e-06, "loss": 0.253, "step": 9178 }, { "epoch": 0.3149965682910089, "grad_norm": 0.739398027324104, "learning_rate": 8.017710474594798e-06, "loss": 0.2883, "step": 9179 }, { "epoch": 0.3150308853809197, "grad_norm": 0.8685417937722167, "learning_rate": 8.017267347935429e-06, "loss": 0.3339, "step": 9180 }, { "epoch": 0.3150652024708305, "grad_norm": 0.7400960086601834, "learning_rate": 8.016824184001241e-06, "loss": 0.2435, "step": 9181 }, { "epoch": 0.3150995195607412, "grad_norm": 0.7488331389495011, "learning_rate": 8.016380982797708e-06, "loss": 0.3137, "step": 9182 }, { "epoch": 0.31513383665065203, "grad_norm": 0.6820794922645295, "learning_rate": 8.015937744330304e-06, "loss": 0.3227, "step": 9183 }, { "epoch": 0.3151681537405628, "grad_norm": 0.8286932526882547, "learning_rate": 8.015494468604505e-06, "loss": 0.3549, "step": 9184 }, { "epoch": 0.3152024708304736, "grad_norm": 0.7780416878079078, "learning_rate": 8.015051155625788e-06, "loss": 0.2778, "step": 9185 }, { "epoch": 0.31523678792038434, "grad_norm": 0.8979024486972699, "learning_rate": 8.01460780539963e-06, "loss": 0.3217, "step": 9186 }, { "epoch": 0.31527110501029515, "grad_norm": 0.7564382326029009, "learning_rate": 8.014164417931508e-06, "loss": 0.2888, "step": 9187 }, { "epoch": 0.3153054221002059, "grad_norm": 0.69479687609717, "learning_rate": 8.013720993226896e-06, "loss": 0.2924, "step": 9188 }, { "epoch": 0.31533973919011665, "grad_norm": 0.8015654522378561, "learning_rate": 8.013277531291276e-06, "loss": 0.3253, "step": 9189 }, { "epoch": 0.31537405628002746, "grad_norm": 0.8389486846580307, "learning_rate": 8.012834032130125e-06, "loss": 0.3341, "step": 9190 }, { "epoch": 0.3154083733699382, "grad_norm": 0.7890252311151524, "learning_rate": 8.012390495748923e-06, "loss": 0.3189, "step": 9191 }, { "epoch": 0.315442690459849, "grad_norm": 0.6605315553195105, "learning_rate": 8.011946922153148e-06, "loss": 0.2415, "step": 9192 }, { "epoch": 0.31547700754975977, "grad_norm": 0.8357118518587101, "learning_rate": 8.01150331134828e-06, "loss": 0.3535, "step": 9193 }, { "epoch": 0.3155113246396706, "grad_norm": 0.7591877762769245, "learning_rate": 8.0110596633398e-06, "loss": 0.2877, "step": 9194 }, { "epoch": 0.3155456417295813, "grad_norm": 0.845681870393518, "learning_rate": 8.01061597813319e-06, "loss": 0.3403, "step": 9195 }, { "epoch": 0.31557995881949213, "grad_norm": 0.7126000512918987, "learning_rate": 8.010172255733928e-06, "loss": 0.2895, "step": 9196 }, { "epoch": 0.3156142759094029, "grad_norm": 0.7478542810765776, "learning_rate": 8.009728496147498e-06, "loss": 0.3347, "step": 9197 }, { "epoch": 0.31564859299931364, "grad_norm": 0.7719667186989536, "learning_rate": 8.00928469937938e-06, "loss": 0.3103, "step": 9198 }, { "epoch": 0.31568291008922444, "grad_norm": 0.7913238886318048, "learning_rate": 8.00884086543506e-06, "loss": 0.3257, "step": 9199 }, { "epoch": 0.3157172271791352, "grad_norm": 0.7338425678041612, "learning_rate": 8.008396994320017e-06, "loss": 0.3522, "step": 9200 }, { "epoch": 0.315751544269046, "grad_norm": 0.885984356388868, "learning_rate": 8.007953086039737e-06, "loss": 0.3298, "step": 9201 }, { "epoch": 0.31578586135895675, "grad_norm": 0.8359310541718493, "learning_rate": 8.007509140599704e-06, "loss": 0.3601, "step": 9202 }, { "epoch": 0.31582017844886756, "grad_norm": 0.8268786151567259, "learning_rate": 8.007065158005403e-06, "loss": 0.2711, "step": 9203 }, { "epoch": 0.3158544955387783, "grad_norm": 0.7367206252414616, "learning_rate": 8.006621138262316e-06, "loss": 0.2862, "step": 9204 }, { "epoch": 0.31588881262868906, "grad_norm": 0.7639413967464999, "learning_rate": 8.006177081375931e-06, "loss": 0.3338, "step": 9205 }, { "epoch": 0.31592312971859987, "grad_norm": 0.7411207223818767, "learning_rate": 8.005732987351734e-06, "loss": 0.3102, "step": 9206 }, { "epoch": 0.3159574468085106, "grad_norm": 0.7441819816715953, "learning_rate": 8.00528885619521e-06, "loss": 0.3086, "step": 9207 }, { "epoch": 0.31599176389842143, "grad_norm": 0.7314528722587325, "learning_rate": 8.004844687911844e-06, "loss": 0.2755, "step": 9208 }, { "epoch": 0.3160260809883322, "grad_norm": 0.9235720068723551, "learning_rate": 8.004400482507127e-06, "loss": 0.2977, "step": 9209 }, { "epoch": 0.316060398078243, "grad_norm": 0.695354702404884, "learning_rate": 8.003956239986544e-06, "loss": 0.2859, "step": 9210 }, { "epoch": 0.31609471516815374, "grad_norm": 0.8034382733600642, "learning_rate": 8.003511960355585e-06, "loss": 0.256, "step": 9211 }, { "epoch": 0.3161290322580645, "grad_norm": 0.7440785616934098, "learning_rate": 8.003067643619738e-06, "loss": 0.3304, "step": 9212 }, { "epoch": 0.3161633493479753, "grad_norm": 0.8072969784624349, "learning_rate": 8.002623289784491e-06, "loss": 0.3504, "step": 9213 }, { "epoch": 0.31619766643788605, "grad_norm": 0.7979789050713001, "learning_rate": 8.002178898855334e-06, "loss": 0.303, "step": 9214 }, { "epoch": 0.31623198352779686, "grad_norm": 0.8375747733294394, "learning_rate": 8.001734470837754e-06, "loss": 0.3083, "step": 9215 }, { "epoch": 0.3162663006177076, "grad_norm": 0.8501694372939211, "learning_rate": 8.001290005737247e-06, "loss": 0.312, "step": 9216 }, { "epoch": 0.3163006177076184, "grad_norm": 0.8409565742186011, "learning_rate": 8.000845503559301e-06, "loss": 0.3045, "step": 9217 }, { "epoch": 0.31633493479752917, "grad_norm": 0.7738323660756128, "learning_rate": 8.000400964309408e-06, "loss": 0.284, "step": 9218 }, { "epoch": 0.31636925188744, "grad_norm": 0.8634818954229155, "learning_rate": 7.999956387993058e-06, "loss": 0.3074, "step": 9219 }, { "epoch": 0.3164035689773507, "grad_norm": 0.8072939719434881, "learning_rate": 7.999511774615745e-06, "loss": 0.294, "step": 9220 }, { "epoch": 0.3164378860672615, "grad_norm": 0.7151839418616287, "learning_rate": 7.999067124182959e-06, "loss": 0.2962, "step": 9221 }, { "epoch": 0.3164722031571723, "grad_norm": 0.8298614824186897, "learning_rate": 7.9986224367002e-06, "loss": 0.2867, "step": 9222 }, { "epoch": 0.31650652024708303, "grad_norm": 0.7789544538812696, "learning_rate": 7.998177712172952e-06, "loss": 0.3159, "step": 9223 }, { "epoch": 0.31654083733699384, "grad_norm": 0.8315759250337398, "learning_rate": 7.997732950606715e-06, "loss": 0.328, "step": 9224 }, { "epoch": 0.3165751544269046, "grad_norm": 0.7081869537547265, "learning_rate": 7.997288152006985e-06, "loss": 0.28, "step": 9225 }, { "epoch": 0.3166094715168154, "grad_norm": 0.7085063279107654, "learning_rate": 7.996843316379252e-06, "loss": 0.2862, "step": 9226 }, { "epoch": 0.31664378860672615, "grad_norm": 0.8380544921259288, "learning_rate": 7.996398443729014e-06, "loss": 0.3056, "step": 9227 }, { "epoch": 0.3166781056966369, "grad_norm": 0.804865880651929, "learning_rate": 7.995953534061769e-06, "loss": 0.2683, "step": 9228 }, { "epoch": 0.3167124227865477, "grad_norm": 0.9016028464471273, "learning_rate": 7.995508587383009e-06, "loss": 0.3585, "step": 9229 }, { "epoch": 0.31674673987645846, "grad_norm": 0.8003468428051598, "learning_rate": 7.995063603698231e-06, "loss": 0.3122, "step": 9230 }, { "epoch": 0.31678105696636927, "grad_norm": 0.7375470094524328, "learning_rate": 7.994618583012937e-06, "loss": 0.2864, "step": 9231 }, { "epoch": 0.31681537405628, "grad_norm": 0.7055610343891073, "learning_rate": 7.994173525332622e-06, "loss": 0.3128, "step": 9232 }, { "epoch": 0.3168496911461908, "grad_norm": 0.8017066600941593, "learning_rate": 7.993728430662783e-06, "loss": 0.3373, "step": 9233 }, { "epoch": 0.3168840082361016, "grad_norm": 0.7145242577823954, "learning_rate": 7.993283299008919e-06, "loss": 0.3136, "step": 9234 }, { "epoch": 0.31691832532601233, "grad_norm": 0.7621895983897217, "learning_rate": 7.99283813037653e-06, "loss": 0.2878, "step": 9235 }, { "epoch": 0.31695264241592314, "grad_norm": 0.7599977474740475, "learning_rate": 7.992392924771117e-06, "loss": 0.2811, "step": 9236 }, { "epoch": 0.3169869595058339, "grad_norm": 0.8296302520056879, "learning_rate": 7.991947682198175e-06, "loss": 0.3399, "step": 9237 }, { "epoch": 0.3170212765957447, "grad_norm": 0.7563095284815281, "learning_rate": 7.99150240266321e-06, "loss": 0.3308, "step": 9238 }, { "epoch": 0.31705559368565545, "grad_norm": 0.918486333651132, "learning_rate": 7.99105708617172e-06, "loss": 0.2857, "step": 9239 }, { "epoch": 0.31708991077556625, "grad_norm": 0.8563817742757971, "learning_rate": 7.990611732729206e-06, "loss": 0.3008, "step": 9240 }, { "epoch": 0.317124227865477, "grad_norm": 0.7479865322778929, "learning_rate": 7.990166342341172e-06, "loss": 0.2714, "step": 9241 }, { "epoch": 0.31715854495538776, "grad_norm": 0.7415416511790346, "learning_rate": 7.98972091501312e-06, "loss": 0.2752, "step": 9242 }, { "epoch": 0.31719286204529856, "grad_norm": 0.7016498058943846, "learning_rate": 7.98927545075055e-06, "loss": 0.2872, "step": 9243 }, { "epoch": 0.3172271791352093, "grad_norm": 0.8385316628865503, "learning_rate": 7.988829949558967e-06, "loss": 0.3141, "step": 9244 }, { "epoch": 0.3172614962251201, "grad_norm": 0.7585503934943929, "learning_rate": 7.988384411443875e-06, "loss": 0.3521, "step": 9245 }, { "epoch": 0.3172958133150309, "grad_norm": 0.6691662978086994, "learning_rate": 7.987938836410777e-06, "loss": 0.3443, "step": 9246 }, { "epoch": 0.3173301304049417, "grad_norm": 0.7910749966425371, "learning_rate": 7.98749322446518e-06, "loss": 0.3315, "step": 9247 }, { "epoch": 0.31736444749485243, "grad_norm": 0.8442872573818638, "learning_rate": 7.987047575612585e-06, "loss": 0.2977, "step": 9248 }, { "epoch": 0.31739876458476324, "grad_norm": 0.7352116020940745, "learning_rate": 7.986601889858501e-06, "loss": 0.3034, "step": 9249 }, { "epoch": 0.317433081674674, "grad_norm": 0.7344194527369903, "learning_rate": 7.986156167208432e-06, "loss": 0.2941, "step": 9250 }, { "epoch": 0.31746739876458474, "grad_norm": 0.8192432974307101, "learning_rate": 7.985710407667885e-06, "loss": 0.2996, "step": 9251 }, { "epoch": 0.31750171585449555, "grad_norm": 0.7051310709568622, "learning_rate": 7.98526461124237e-06, "loss": 0.3139, "step": 9252 }, { "epoch": 0.3175360329444063, "grad_norm": 0.7873730256462831, "learning_rate": 7.98481877793739e-06, "loss": 0.3162, "step": 9253 }, { "epoch": 0.3175703500343171, "grad_norm": 0.7386228799160764, "learning_rate": 7.984372907758451e-06, "loss": 0.3523, "step": 9254 }, { "epoch": 0.31760466712422786, "grad_norm": 0.763406161574861, "learning_rate": 7.983927000711066e-06, "loss": 0.3434, "step": 9255 }, { "epoch": 0.31763898421413866, "grad_norm": 0.7893786535877503, "learning_rate": 7.98348105680074e-06, "loss": 0.2812, "step": 9256 }, { "epoch": 0.3176733013040494, "grad_norm": 0.7845828143227297, "learning_rate": 7.983035076032988e-06, "loss": 0.2913, "step": 9257 }, { "epoch": 0.31770761839396017, "grad_norm": 0.7411587819387803, "learning_rate": 7.982589058413312e-06, "loss": 0.2865, "step": 9258 }, { "epoch": 0.317741935483871, "grad_norm": 0.8174205036050339, "learning_rate": 7.982143003947226e-06, "loss": 0.3264, "step": 9259 }, { "epoch": 0.3177762525737817, "grad_norm": 0.7979668988908064, "learning_rate": 7.981696912640243e-06, "loss": 0.3708, "step": 9260 }, { "epoch": 0.31781056966369253, "grad_norm": 0.7722622654139418, "learning_rate": 7.98125078449787e-06, "loss": 0.3097, "step": 9261 }, { "epoch": 0.3178448867536033, "grad_norm": 0.7028063087034075, "learning_rate": 7.980804619525618e-06, "loss": 0.3259, "step": 9262 }, { "epoch": 0.3178792038435141, "grad_norm": 0.7047886182235444, "learning_rate": 7.980358417728998e-06, "loss": 0.2939, "step": 9263 }, { "epoch": 0.31791352093342484, "grad_norm": 0.7339331561755031, "learning_rate": 7.979912179113529e-06, "loss": 0.2965, "step": 9264 }, { "epoch": 0.3179478380233356, "grad_norm": 0.7390440710306446, "learning_rate": 7.979465903684716e-06, "loss": 0.3093, "step": 9265 }, { "epoch": 0.3179821551132464, "grad_norm": 0.7353866668125583, "learning_rate": 7.979019591448077e-06, "loss": 0.3148, "step": 9266 }, { "epoch": 0.31801647220315715, "grad_norm": 0.8135267601045534, "learning_rate": 7.978573242409123e-06, "loss": 0.292, "step": 9267 }, { "epoch": 0.31805078929306796, "grad_norm": 0.9034026788989088, "learning_rate": 7.978126856573371e-06, "loss": 0.3438, "step": 9268 }, { "epoch": 0.3180851063829787, "grad_norm": 0.8550337377161121, "learning_rate": 7.977680433946333e-06, "loss": 0.3164, "step": 9269 }, { "epoch": 0.3181194234728895, "grad_norm": 1.0368218791783264, "learning_rate": 7.977233974533524e-06, "loss": 0.2581, "step": 9270 }, { "epoch": 0.31815374056280027, "grad_norm": 0.723822040270499, "learning_rate": 7.976787478340462e-06, "loss": 0.314, "step": 9271 }, { "epoch": 0.3181880576527111, "grad_norm": 0.7636818251354313, "learning_rate": 7.976340945372659e-06, "loss": 0.303, "step": 9272 }, { "epoch": 0.3182223747426218, "grad_norm": 1.3009248691165747, "learning_rate": 7.975894375635635e-06, "loss": 0.3385, "step": 9273 }, { "epoch": 0.3182566918325326, "grad_norm": 0.7408553647633257, "learning_rate": 7.975447769134907e-06, "loss": 0.3525, "step": 9274 }, { "epoch": 0.3182910089224434, "grad_norm": 0.8765079056397267, "learning_rate": 7.97500112587599e-06, "loss": 0.312, "step": 9275 }, { "epoch": 0.31832532601235414, "grad_norm": 0.8450618902041294, "learning_rate": 7.9745544458644e-06, "loss": 0.3654, "step": 9276 }, { "epoch": 0.31835964310226494, "grad_norm": 0.7930402555136692, "learning_rate": 7.97410772910566e-06, "loss": 0.3346, "step": 9277 }, { "epoch": 0.3183939601921757, "grad_norm": 0.7547390580418087, "learning_rate": 7.973660975605288e-06, "loss": 0.2997, "step": 9278 }, { "epoch": 0.3184282772820865, "grad_norm": 0.7728853409368545, "learning_rate": 7.973214185368798e-06, "loss": 0.3632, "step": 9279 }, { "epoch": 0.31846259437199725, "grad_norm": 0.6972294550337317, "learning_rate": 7.972767358401718e-06, "loss": 0.3041, "step": 9280 }, { "epoch": 0.318496911461908, "grad_norm": 0.8071222082183003, "learning_rate": 7.972320494709559e-06, "loss": 0.4188, "step": 9281 }, { "epoch": 0.3185312285518188, "grad_norm": 0.75498973687775, "learning_rate": 7.971873594297847e-06, "loss": 0.2786, "step": 9282 }, { "epoch": 0.31856554564172956, "grad_norm": 0.8230438193446407, "learning_rate": 7.971426657172102e-06, "loss": 0.3227, "step": 9283 }, { "epoch": 0.31859986273164037, "grad_norm": 0.7535095506430354, "learning_rate": 7.970979683337845e-06, "loss": 0.2736, "step": 9284 }, { "epoch": 0.3186341798215511, "grad_norm": 0.7478244760629416, "learning_rate": 7.970532672800598e-06, "loss": 0.3035, "step": 9285 }, { "epoch": 0.31866849691146193, "grad_norm": 0.7785967563830516, "learning_rate": 7.970085625565882e-06, "loss": 0.3137, "step": 9286 }, { "epoch": 0.3187028140013727, "grad_norm": 0.7510776212793308, "learning_rate": 7.969638541639223e-06, "loss": 0.3308, "step": 9287 }, { "epoch": 0.31873713109128343, "grad_norm": 0.8001002425331392, "learning_rate": 7.96919142102614e-06, "loss": 0.2848, "step": 9288 }, { "epoch": 0.31877144818119424, "grad_norm": 0.7329093530134548, "learning_rate": 7.968744263732159e-06, "loss": 0.2791, "step": 9289 }, { "epoch": 0.318805765271105, "grad_norm": 0.8971427840300621, "learning_rate": 7.968297069762804e-06, "loss": 0.3489, "step": 9290 }, { "epoch": 0.3188400823610158, "grad_norm": 0.9324839325669745, "learning_rate": 7.967849839123598e-06, "loss": 0.3908, "step": 9291 }, { "epoch": 0.31887439945092655, "grad_norm": 0.8878119901039472, "learning_rate": 7.96740257182007e-06, "loss": 0.3676, "step": 9292 }, { "epoch": 0.31890871654083736, "grad_norm": 0.7916446609190771, "learning_rate": 7.966955267857743e-06, "loss": 0.3436, "step": 9293 }, { "epoch": 0.3189430336307481, "grad_norm": 0.8894884108686016, "learning_rate": 7.96650792724214e-06, "loss": 0.2952, "step": 9294 }, { "epoch": 0.3189773507206589, "grad_norm": 0.8788908440859183, "learning_rate": 7.966060549978793e-06, "loss": 0.3281, "step": 9295 }, { "epoch": 0.31901166781056967, "grad_norm": 0.8107042028580151, "learning_rate": 7.965613136073225e-06, "loss": 0.3254, "step": 9296 }, { "epoch": 0.3190459849004804, "grad_norm": 0.8125729841362868, "learning_rate": 7.965165685530963e-06, "loss": 0.2919, "step": 9297 }, { "epoch": 0.3190803019903912, "grad_norm": 0.7300465649224814, "learning_rate": 7.964718198357538e-06, "loss": 0.3418, "step": 9298 }, { "epoch": 0.319114619080302, "grad_norm": 0.7889014529495338, "learning_rate": 7.964270674558474e-06, "loss": 0.3026, "step": 9299 }, { "epoch": 0.3191489361702128, "grad_norm": 0.8378601463686849, "learning_rate": 7.963823114139304e-06, "loss": 0.2789, "step": 9300 }, { "epoch": 0.31918325326012353, "grad_norm": 0.7320201036638729, "learning_rate": 7.963375517105555e-06, "loss": 0.2988, "step": 9301 }, { "epoch": 0.31921757035003434, "grad_norm": 0.7425062025506448, "learning_rate": 7.962927883462754e-06, "loss": 0.3219, "step": 9302 }, { "epoch": 0.3192518874399451, "grad_norm": 0.8312755096965098, "learning_rate": 7.962480213216435e-06, "loss": 0.3469, "step": 9303 }, { "epoch": 0.31928620452985584, "grad_norm": 0.719255096828274, "learning_rate": 7.962032506372127e-06, "loss": 0.2781, "step": 9304 }, { "epoch": 0.31932052161976665, "grad_norm": 0.770731473078003, "learning_rate": 7.961584762935362e-06, "loss": 0.3413, "step": 9305 }, { "epoch": 0.3193548387096774, "grad_norm": 0.7397017084007496, "learning_rate": 7.961136982911668e-06, "loss": 0.2865, "step": 9306 }, { "epoch": 0.3193891557995882, "grad_norm": 0.6759884201730701, "learning_rate": 7.960689166306579e-06, "loss": 0.2944, "step": 9307 }, { "epoch": 0.31942347288949896, "grad_norm": 0.7589625357939895, "learning_rate": 7.960241313125627e-06, "loss": 0.3624, "step": 9308 }, { "epoch": 0.31945778997940977, "grad_norm": 0.7540386355583697, "learning_rate": 7.959793423374347e-06, "loss": 0.3392, "step": 9309 }, { "epoch": 0.3194921070693205, "grad_norm": 0.8915240938020008, "learning_rate": 7.959345497058268e-06, "loss": 0.4267, "step": 9310 }, { "epoch": 0.31952642415923127, "grad_norm": 0.7180320281834749, "learning_rate": 7.958897534182929e-06, "loss": 0.2953, "step": 9311 }, { "epoch": 0.3195607412491421, "grad_norm": 0.7777991376326253, "learning_rate": 7.958449534753855e-06, "loss": 0.2907, "step": 9312 }, { "epoch": 0.31959505833905283, "grad_norm": 0.7697287025018913, "learning_rate": 7.958001498776591e-06, "loss": 0.3133, "step": 9313 }, { "epoch": 0.31962937542896364, "grad_norm": 0.8709738703609406, "learning_rate": 7.957553426256665e-06, "loss": 0.3209, "step": 9314 }, { "epoch": 0.3196636925188744, "grad_norm": 0.6967150717144819, "learning_rate": 7.957105317199616e-06, "loss": 0.284, "step": 9315 }, { "epoch": 0.3196980096087852, "grad_norm": 0.8539166086085106, "learning_rate": 7.956657171610976e-06, "loss": 0.2798, "step": 9316 }, { "epoch": 0.31973232669869595, "grad_norm": 0.7145779371389386, "learning_rate": 7.956208989496286e-06, "loss": 0.3074, "step": 9317 }, { "epoch": 0.31976664378860675, "grad_norm": 0.8226378642015243, "learning_rate": 7.95576077086108e-06, "loss": 0.3047, "step": 9318 }, { "epoch": 0.3198009608785175, "grad_norm": 0.7689759960946637, "learning_rate": 7.955312515710895e-06, "loss": 0.2806, "step": 9319 }, { "epoch": 0.31983527796842826, "grad_norm": 0.6938226514107747, "learning_rate": 7.95486422405127e-06, "loss": 0.2952, "step": 9320 }, { "epoch": 0.31986959505833906, "grad_norm": 0.7575253185330134, "learning_rate": 7.954415895887743e-06, "loss": 0.3386, "step": 9321 }, { "epoch": 0.3199039121482498, "grad_norm": 0.7700592921351628, "learning_rate": 7.953967531225852e-06, "loss": 0.2843, "step": 9322 }, { "epoch": 0.3199382292381606, "grad_norm": 0.7589009248356533, "learning_rate": 7.953519130071135e-06, "loss": 0.3067, "step": 9323 }, { "epoch": 0.31997254632807137, "grad_norm": 0.7820399832115448, "learning_rate": 7.953070692429134e-06, "loss": 0.3601, "step": 9324 }, { "epoch": 0.3200068634179822, "grad_norm": 0.740111196146842, "learning_rate": 7.952622218305387e-06, "loss": 0.3298, "step": 9325 }, { "epoch": 0.32004118050789293, "grad_norm": 0.758545510182546, "learning_rate": 7.952173707705436e-06, "loss": 0.288, "step": 9326 }, { "epoch": 0.3200754975978037, "grad_norm": 0.758574210554172, "learning_rate": 7.951725160634818e-06, "loss": 0.3169, "step": 9327 }, { "epoch": 0.3201098146877145, "grad_norm": 0.7687085146690963, "learning_rate": 7.95127657709908e-06, "loss": 0.3123, "step": 9328 }, { "epoch": 0.32014413177762524, "grad_norm": 0.8078347912989419, "learning_rate": 7.95082795710376e-06, "loss": 0.3504, "step": 9329 }, { "epoch": 0.32017844886753605, "grad_norm": 0.747000728024352, "learning_rate": 7.950379300654399e-06, "loss": 0.3501, "step": 9330 }, { "epoch": 0.3202127659574468, "grad_norm": 0.7568180250902616, "learning_rate": 7.949930607756545e-06, "loss": 0.3185, "step": 9331 }, { "epoch": 0.3202470830473576, "grad_norm": 0.83435673863219, "learning_rate": 7.949481878415733e-06, "loss": 0.3028, "step": 9332 }, { "epoch": 0.32028140013726836, "grad_norm": 0.703019424765654, "learning_rate": 7.949033112637515e-06, "loss": 0.2811, "step": 9333 }, { "epoch": 0.3203157172271791, "grad_norm": 0.9851510654822218, "learning_rate": 7.94858431042743e-06, "loss": 0.2663, "step": 9334 }, { "epoch": 0.3203500343170899, "grad_norm": 0.7758736203436555, "learning_rate": 7.948135471791023e-06, "loss": 0.33, "step": 9335 }, { "epoch": 0.32038435140700067, "grad_norm": 0.8187014143851546, "learning_rate": 7.947686596733839e-06, "loss": 0.3557, "step": 9336 }, { "epoch": 0.3204186684969115, "grad_norm": 0.765136014314944, "learning_rate": 7.947237685261425e-06, "loss": 0.3386, "step": 9337 }, { "epoch": 0.3204529855868222, "grad_norm": 0.8294420079144623, "learning_rate": 7.946788737379325e-06, "loss": 0.3043, "step": 9338 }, { "epoch": 0.32048730267673303, "grad_norm": 0.8064071075990096, "learning_rate": 7.946339753093086e-06, "loss": 0.2944, "step": 9339 }, { "epoch": 0.3205216197666438, "grad_norm": 0.7535350003591467, "learning_rate": 7.945890732408255e-06, "loss": 0.3085, "step": 9340 }, { "epoch": 0.3205559368565546, "grad_norm": 0.8064033284168391, "learning_rate": 7.945441675330376e-06, "loss": 0.3334, "step": 9341 }, { "epoch": 0.32059025394646534, "grad_norm": 0.701797896533142, "learning_rate": 7.944992581865001e-06, "loss": 0.3649, "step": 9342 }, { "epoch": 0.3206245710363761, "grad_norm": 0.7044074138732898, "learning_rate": 7.944543452017676e-06, "loss": 0.2883, "step": 9343 }, { "epoch": 0.3206588881262869, "grad_norm": 0.7353901432993173, "learning_rate": 7.94409428579395e-06, "loss": 0.2919, "step": 9344 }, { "epoch": 0.32069320521619765, "grad_norm": 0.8110492394285516, "learning_rate": 7.943645083199374e-06, "loss": 0.3565, "step": 9345 }, { "epoch": 0.32072752230610846, "grad_norm": 0.8136580657927075, "learning_rate": 7.94319584423949e-06, "loss": 0.3023, "step": 9346 }, { "epoch": 0.3207618393960192, "grad_norm": 0.7310152185716017, "learning_rate": 7.942746568919857e-06, "loss": 0.2804, "step": 9347 }, { "epoch": 0.32079615648593, "grad_norm": 0.7784374165992192, "learning_rate": 7.942297257246019e-06, "loss": 0.3121, "step": 9348 }, { "epoch": 0.32083047357584077, "grad_norm": 0.756945114387023, "learning_rate": 7.941847909223529e-06, "loss": 0.2877, "step": 9349 }, { "epoch": 0.3208647906657515, "grad_norm": 0.8103838230271355, "learning_rate": 7.941398524857938e-06, "loss": 0.3202, "step": 9350 }, { "epoch": 0.3208991077556623, "grad_norm": 0.7462080515376533, "learning_rate": 7.940949104154798e-06, "loss": 0.3064, "step": 9351 }, { "epoch": 0.3209334248455731, "grad_norm": 0.8549553313224616, "learning_rate": 7.94049964711966e-06, "loss": 0.3484, "step": 9352 }, { "epoch": 0.3209677419354839, "grad_norm": 0.8501574101767079, "learning_rate": 7.940050153758076e-06, "loss": 0.3402, "step": 9353 }, { "epoch": 0.32100205902539464, "grad_norm": 0.7479111112201512, "learning_rate": 7.939600624075603e-06, "loss": 0.3153, "step": 9354 }, { "epoch": 0.32103637611530544, "grad_norm": 0.7063233410209007, "learning_rate": 7.93915105807779e-06, "loss": 0.2872, "step": 9355 }, { "epoch": 0.3210706932052162, "grad_norm": 0.8284157684814265, "learning_rate": 7.93870145577019e-06, "loss": 0.308, "step": 9356 }, { "epoch": 0.32110501029512695, "grad_norm": 0.7376729738732907, "learning_rate": 7.938251817158364e-06, "loss": 0.361, "step": 9357 }, { "epoch": 0.32113932738503775, "grad_norm": 0.7640437486674438, "learning_rate": 7.93780214224786e-06, "loss": 0.2797, "step": 9358 }, { "epoch": 0.3211736444749485, "grad_norm": 0.846768029941109, "learning_rate": 7.937352431044237e-06, "loss": 0.3068, "step": 9359 }, { "epoch": 0.3212079615648593, "grad_norm": 0.8183093295999392, "learning_rate": 7.936902683553048e-06, "loss": 0.282, "step": 9360 }, { "epoch": 0.32124227865477006, "grad_norm": 0.7601362847597314, "learning_rate": 7.936452899779852e-06, "loss": 0.2884, "step": 9361 }, { "epoch": 0.32127659574468087, "grad_norm": 0.7808863082193583, "learning_rate": 7.936003079730201e-06, "loss": 0.2967, "step": 9362 }, { "epoch": 0.3213109128345916, "grad_norm": 0.7854619768172535, "learning_rate": 7.935553223409658e-06, "loss": 0.3073, "step": 9363 }, { "epoch": 0.32134522992450243, "grad_norm": 0.7550358235586523, "learning_rate": 7.935103330823777e-06, "loss": 0.4081, "step": 9364 }, { "epoch": 0.3213795470144132, "grad_norm": 0.7620796835159919, "learning_rate": 7.934653401978115e-06, "loss": 0.2868, "step": 9365 }, { "epoch": 0.32141386410432393, "grad_norm": 0.7917680487287992, "learning_rate": 7.934203436878233e-06, "loss": 0.2946, "step": 9366 }, { "epoch": 0.32144818119423474, "grad_norm": 0.7585282705959636, "learning_rate": 7.933753435529688e-06, "loss": 0.25, "step": 9367 }, { "epoch": 0.3214824982841455, "grad_norm": 0.7945033878315914, "learning_rate": 7.93330339793804e-06, "loss": 0.3507, "step": 9368 }, { "epoch": 0.3215168153740563, "grad_norm": 0.7640723702536025, "learning_rate": 7.93285332410885e-06, "loss": 0.3313, "step": 9369 }, { "epoch": 0.32155113246396705, "grad_norm": 0.7029125650745338, "learning_rate": 7.932403214047672e-06, "loss": 0.2933, "step": 9370 }, { "epoch": 0.32158544955387786, "grad_norm": 0.7706513882270868, "learning_rate": 7.931953067760074e-06, "loss": 0.4386, "step": 9371 }, { "epoch": 0.3216197666437886, "grad_norm": 0.8069314643208372, "learning_rate": 7.931502885251613e-06, "loss": 0.3315, "step": 9372 }, { "epoch": 0.32165408373369936, "grad_norm": 0.7764724764983492, "learning_rate": 7.931052666527854e-06, "loss": 0.3358, "step": 9373 }, { "epoch": 0.32168840082361017, "grad_norm": 0.7807813884333455, "learning_rate": 7.930602411594355e-06, "loss": 0.3032, "step": 9374 }, { "epoch": 0.3217227179135209, "grad_norm": 0.7747017006686397, "learning_rate": 7.93015212045668e-06, "loss": 0.2653, "step": 9375 }, { "epoch": 0.3217570350034317, "grad_norm": 0.7885844243258858, "learning_rate": 7.929701793120392e-06, "loss": 0.3265, "step": 9376 }, { "epoch": 0.3217913520933425, "grad_norm": 0.8344816055575507, "learning_rate": 7.929251429591053e-06, "loss": 0.305, "step": 9377 }, { "epoch": 0.3218256691832533, "grad_norm": 0.8736059781575906, "learning_rate": 7.928801029874229e-06, "loss": 0.3175, "step": 9378 }, { "epoch": 0.32185998627316403, "grad_norm": 0.798132778823891, "learning_rate": 7.928350593975485e-06, "loss": 0.337, "step": 9379 }, { "epoch": 0.3218943033630748, "grad_norm": 0.7358735418077982, "learning_rate": 7.92790012190038e-06, "loss": 0.2651, "step": 9380 }, { "epoch": 0.3219286204529856, "grad_norm": 0.7997117640993786, "learning_rate": 7.927449613654483e-06, "loss": 0.3311, "step": 9381 }, { "epoch": 0.32196293754289634, "grad_norm": 0.8982678250723023, "learning_rate": 7.92699906924336e-06, "loss": 0.3389, "step": 9382 }, { "epoch": 0.32199725463280715, "grad_norm": 0.7490420562404142, "learning_rate": 7.926548488672576e-06, "loss": 0.3443, "step": 9383 }, { "epoch": 0.3220315717227179, "grad_norm": 0.818930657179096, "learning_rate": 7.926097871947698e-06, "loss": 0.2884, "step": 9384 }, { "epoch": 0.3220658888126287, "grad_norm": 0.8034231210902315, "learning_rate": 7.925647219074292e-06, "loss": 0.332, "step": 9385 }, { "epoch": 0.32210020590253946, "grad_norm": 0.764585131835506, "learning_rate": 7.925196530057925e-06, "loss": 0.3312, "step": 9386 }, { "epoch": 0.32213452299245027, "grad_norm": 0.7863053946759168, "learning_rate": 7.924745804904166e-06, "loss": 0.3078, "step": 9387 }, { "epoch": 0.322168840082361, "grad_norm": 0.7885454601486325, "learning_rate": 7.924295043618581e-06, "loss": 0.3145, "step": 9388 }, { "epoch": 0.32220315717227177, "grad_norm": 0.837855737754048, "learning_rate": 7.923844246206742e-06, "loss": 0.2981, "step": 9389 }, { "epoch": 0.3222374742621826, "grad_norm": 0.8792360751581928, "learning_rate": 7.923393412674215e-06, "loss": 0.2821, "step": 9390 }, { "epoch": 0.32227179135209333, "grad_norm": 0.7207390258524005, "learning_rate": 7.922942543026571e-06, "loss": 0.2704, "step": 9391 }, { "epoch": 0.32230610844200414, "grad_norm": 0.7738561589181834, "learning_rate": 7.922491637269381e-06, "loss": 0.3098, "step": 9392 }, { "epoch": 0.3223404255319149, "grad_norm": 0.8217538509498622, "learning_rate": 7.922040695408213e-06, "loss": 0.3316, "step": 9393 }, { "epoch": 0.3223747426218257, "grad_norm": 0.8123359498099624, "learning_rate": 7.921589717448638e-06, "loss": 0.3191, "step": 9394 }, { "epoch": 0.32240905971173645, "grad_norm": 0.7291528261605335, "learning_rate": 7.921138703396229e-06, "loss": 0.2913, "step": 9395 }, { "epoch": 0.3224433768016472, "grad_norm": 0.7415327098028618, "learning_rate": 7.920687653256557e-06, "loss": 0.2937, "step": 9396 }, { "epoch": 0.322477693891558, "grad_norm": 0.8222860799676012, "learning_rate": 7.920236567035196e-06, "loss": 0.3596, "step": 9397 }, { "epoch": 0.32251201098146876, "grad_norm": 0.7819245224746955, "learning_rate": 7.919785444737716e-06, "loss": 0.3123, "step": 9398 }, { "epoch": 0.32254632807137956, "grad_norm": 0.7871926927988605, "learning_rate": 7.91933428636969e-06, "loss": 0.2866, "step": 9399 }, { "epoch": 0.3225806451612903, "grad_norm": 0.7421985284094776, "learning_rate": 7.918883091936693e-06, "loss": 0.2741, "step": 9400 }, { "epoch": 0.3226149622512011, "grad_norm": 0.7648478192515167, "learning_rate": 7.918431861444298e-06, "loss": 0.2879, "step": 9401 }, { "epoch": 0.32264927934111187, "grad_norm": 0.7567973164596361, "learning_rate": 7.917980594898081e-06, "loss": 0.3712, "step": 9402 }, { "epoch": 0.3226835964310226, "grad_norm": 0.7732855734638612, "learning_rate": 7.917529292303614e-06, "loss": 0.2644, "step": 9403 }, { "epoch": 0.32271791352093343, "grad_norm": 0.6752849744637678, "learning_rate": 7.917077953666475e-06, "loss": 0.3362, "step": 9404 }, { "epoch": 0.3227522306108442, "grad_norm": 0.8832490863086087, "learning_rate": 7.91662657899224e-06, "loss": 0.2834, "step": 9405 }, { "epoch": 0.322786547700755, "grad_norm": 0.6638742199526666, "learning_rate": 7.916175168286482e-06, "loss": 0.3127, "step": 9406 }, { "epoch": 0.32282086479066574, "grad_norm": 0.7766114640418939, "learning_rate": 7.915723721554781e-06, "loss": 0.2678, "step": 9407 }, { "epoch": 0.32285518188057655, "grad_norm": 0.8202390826370048, "learning_rate": 7.915272238802713e-06, "loss": 0.3333, "step": 9408 }, { "epoch": 0.3228894989704873, "grad_norm": 0.7396613973668806, "learning_rate": 7.914820720035855e-06, "loss": 0.277, "step": 9409 }, { "epoch": 0.3229238160603981, "grad_norm": 0.7652101028423453, "learning_rate": 7.914369165259786e-06, "loss": 0.362, "step": 9410 }, { "epoch": 0.32295813315030886, "grad_norm": 0.7198020486270377, "learning_rate": 7.913917574480083e-06, "loss": 0.283, "step": 9411 }, { "epoch": 0.3229924502402196, "grad_norm": 0.7754540872289573, "learning_rate": 7.913465947702326e-06, "loss": 0.2956, "step": 9412 }, { "epoch": 0.3230267673301304, "grad_norm": 0.6553225987956162, "learning_rate": 7.913014284932092e-06, "loss": 0.2813, "step": 9413 }, { "epoch": 0.32306108442004117, "grad_norm": 0.8185940591015771, "learning_rate": 7.912562586174966e-06, "loss": 0.385, "step": 9414 }, { "epoch": 0.323095401509952, "grad_norm": 0.6978660838664963, "learning_rate": 7.912110851436523e-06, "loss": 0.2854, "step": 9415 }, { "epoch": 0.3231297185998627, "grad_norm": 0.8165267712086433, "learning_rate": 7.911659080722347e-06, "loss": 0.2996, "step": 9416 }, { "epoch": 0.32316403568977353, "grad_norm": 0.7531117535754555, "learning_rate": 7.911207274038017e-06, "loss": 0.2997, "step": 9417 }, { "epoch": 0.3231983527796843, "grad_norm": 0.7933364571885072, "learning_rate": 7.910755431389114e-06, "loss": 0.3161, "step": 9418 }, { "epoch": 0.32323266986959504, "grad_norm": 0.6964375852715123, "learning_rate": 7.910303552781222e-06, "loss": 0.2922, "step": 9419 }, { "epoch": 0.32326698695950584, "grad_norm": 0.8641722147804896, "learning_rate": 7.909851638219923e-06, "loss": 0.294, "step": 9420 }, { "epoch": 0.3233013040494166, "grad_norm": 0.8380948129411607, "learning_rate": 7.909399687710802e-06, "loss": 0.3474, "step": 9421 }, { "epoch": 0.3233356211393274, "grad_norm": 0.7306036411566104, "learning_rate": 7.908947701259438e-06, "loss": 0.3514, "step": 9422 }, { "epoch": 0.32336993822923815, "grad_norm": 0.8869084271686236, "learning_rate": 7.908495678871413e-06, "loss": 0.3311, "step": 9423 }, { "epoch": 0.32340425531914896, "grad_norm": 0.7412534476753114, "learning_rate": 7.908043620552321e-06, "loss": 0.2654, "step": 9424 }, { "epoch": 0.3234385724090597, "grad_norm": 0.7819692629554887, "learning_rate": 7.907591526307736e-06, "loss": 0.316, "step": 9425 }, { "epoch": 0.32347288949897046, "grad_norm": 0.8131244062599315, "learning_rate": 7.90713939614325e-06, "loss": 0.297, "step": 9426 }, { "epoch": 0.32350720658888127, "grad_norm": 0.7185071831894714, "learning_rate": 7.906687230064444e-06, "loss": 0.3114, "step": 9427 }, { "epoch": 0.323541523678792, "grad_norm": 0.7282616686048186, "learning_rate": 7.906235028076909e-06, "loss": 0.3389, "step": 9428 }, { "epoch": 0.3235758407687028, "grad_norm": 0.758924263932178, "learning_rate": 7.905782790186227e-06, "loss": 0.2745, "step": 9429 }, { "epoch": 0.3236101578586136, "grad_norm": 0.6795584383707949, "learning_rate": 7.905330516397986e-06, "loss": 0.2924, "step": 9430 }, { "epoch": 0.3236444749485244, "grad_norm": 0.7111530195783282, "learning_rate": 7.904878206717775e-06, "loss": 0.2714, "step": 9431 }, { "epoch": 0.32367879203843514, "grad_norm": 0.7075406764505616, "learning_rate": 7.904425861151179e-06, "loss": 0.2664, "step": 9432 }, { "epoch": 0.32371310912834594, "grad_norm": 0.6923897576000441, "learning_rate": 7.903973479703788e-06, "loss": 0.2919, "step": 9433 }, { "epoch": 0.3237474262182567, "grad_norm": 0.6698591169792208, "learning_rate": 7.903521062381192e-06, "loss": 0.2739, "step": 9434 }, { "epoch": 0.32378174330816745, "grad_norm": 0.7682170962044211, "learning_rate": 7.903068609188977e-06, "loss": 0.3059, "step": 9435 }, { "epoch": 0.32381606039807825, "grad_norm": 0.7607682672131219, "learning_rate": 7.902616120132733e-06, "loss": 0.289, "step": 9436 }, { "epoch": 0.323850377487989, "grad_norm": 0.8250852813314442, "learning_rate": 7.902163595218054e-06, "loss": 0.3175, "step": 9437 }, { "epoch": 0.3238846945778998, "grad_norm": 0.7723395615591986, "learning_rate": 7.901711034450525e-06, "loss": 0.3039, "step": 9438 }, { "epoch": 0.32391901166781056, "grad_norm": 0.8647308488582216, "learning_rate": 7.901258437835739e-06, "loss": 0.3001, "step": 9439 }, { "epoch": 0.32395332875772137, "grad_norm": 0.8265725105640123, "learning_rate": 7.90080580537929e-06, "loss": 0.3065, "step": 9440 }, { "epoch": 0.3239876458476321, "grad_norm": 0.8338370965020916, "learning_rate": 7.900353137086764e-06, "loss": 0.3228, "step": 9441 }, { "epoch": 0.3240219629375429, "grad_norm": 0.7765708149667202, "learning_rate": 7.899900432963758e-06, "loss": 0.3366, "step": 9442 }, { "epoch": 0.3240562800274537, "grad_norm": 0.7528768987842679, "learning_rate": 7.899447693015865e-06, "loss": 0.3013, "step": 9443 }, { "epoch": 0.32409059711736443, "grad_norm": 0.7426919392539392, "learning_rate": 7.898994917248674e-06, "loss": 0.2939, "step": 9444 }, { "epoch": 0.32412491420727524, "grad_norm": 0.7851631012725746, "learning_rate": 7.898542105667781e-06, "loss": 0.2811, "step": 9445 }, { "epoch": 0.324159231297186, "grad_norm": 0.7402903304148851, "learning_rate": 7.89808925827878e-06, "loss": 0.3701, "step": 9446 }, { "epoch": 0.3241935483870968, "grad_norm": 0.822658743107252, "learning_rate": 7.897636375087265e-06, "loss": 0.3313, "step": 9447 }, { "epoch": 0.32422786547700755, "grad_norm": 0.7021430544397103, "learning_rate": 7.89718345609883e-06, "loss": 0.3234, "step": 9448 }, { "epoch": 0.3242621825669183, "grad_norm": 0.7194290381770935, "learning_rate": 7.896730501319073e-06, "loss": 0.295, "step": 9449 }, { "epoch": 0.3242964996568291, "grad_norm": 0.8115326368771695, "learning_rate": 7.896277510753587e-06, "loss": 0.3332, "step": 9450 }, { "epoch": 0.32433081674673986, "grad_norm": 0.898306959880329, "learning_rate": 7.895824484407968e-06, "loss": 0.3168, "step": 9451 }, { "epoch": 0.32436513383665067, "grad_norm": 0.753920600468075, "learning_rate": 7.895371422287815e-06, "loss": 0.2763, "step": 9452 }, { "epoch": 0.3243994509265614, "grad_norm": 0.8678796138819008, "learning_rate": 7.894918324398723e-06, "loss": 0.2825, "step": 9453 }, { "epoch": 0.3244337680164722, "grad_norm": 0.7753626935049023, "learning_rate": 7.894465190746292e-06, "loss": 0.3501, "step": 9454 }, { "epoch": 0.324468085106383, "grad_norm": 0.7859997154801345, "learning_rate": 7.894012021336117e-06, "loss": 0.2839, "step": 9455 }, { "epoch": 0.3245024021962937, "grad_norm": 0.8780841168946864, "learning_rate": 7.893558816173796e-06, "loss": 0.3566, "step": 9456 }, { "epoch": 0.32453671928620453, "grad_norm": 0.7318861665767226, "learning_rate": 7.893105575264933e-06, "loss": 0.2905, "step": 9457 }, { "epoch": 0.3245710363761153, "grad_norm": 0.7229852525577763, "learning_rate": 7.89265229861512e-06, "loss": 0.2861, "step": 9458 }, { "epoch": 0.3246053534660261, "grad_norm": 0.7981784310497452, "learning_rate": 7.892198986229963e-06, "loss": 0.3617, "step": 9459 }, { "epoch": 0.32463967055593684, "grad_norm": 0.8824536051111833, "learning_rate": 7.891745638115058e-06, "loss": 0.2903, "step": 9460 }, { "epoch": 0.32467398764584765, "grad_norm": 0.7621552631403915, "learning_rate": 7.891292254276006e-06, "loss": 0.3201, "step": 9461 }, { "epoch": 0.3247083047357584, "grad_norm": 0.8386658122391939, "learning_rate": 7.890838834718413e-06, "loss": 0.3416, "step": 9462 }, { "epoch": 0.3247426218256692, "grad_norm": 0.8298216609410447, "learning_rate": 7.890385379447872e-06, "loss": 0.3264, "step": 9463 }, { "epoch": 0.32477693891557996, "grad_norm": 0.8049592084996062, "learning_rate": 7.889931888469992e-06, "loss": 0.3491, "step": 9464 }, { "epoch": 0.3248112560054907, "grad_norm": 0.8362955010735359, "learning_rate": 7.889478361790372e-06, "loss": 0.3216, "step": 9465 }, { "epoch": 0.3248455730954015, "grad_norm": 0.9056534371821104, "learning_rate": 7.889024799414618e-06, "loss": 0.3542, "step": 9466 }, { "epoch": 0.32487989018531227, "grad_norm": 0.7791087077978421, "learning_rate": 7.888571201348327e-06, "loss": 0.3024, "step": 9467 }, { "epoch": 0.3249142072752231, "grad_norm": 0.8703641725811471, "learning_rate": 7.88811756759711e-06, "loss": 0.3646, "step": 9468 }, { "epoch": 0.32494852436513383, "grad_norm": 0.8271894118779616, "learning_rate": 7.887663898166567e-06, "loss": 0.3482, "step": 9469 }, { "epoch": 0.32498284145504464, "grad_norm": 0.8238709581864821, "learning_rate": 7.8872101930623e-06, "loss": 0.3522, "step": 9470 }, { "epoch": 0.3250171585449554, "grad_norm": 0.9301244515363059, "learning_rate": 7.88675645228992e-06, "loss": 0.2746, "step": 9471 }, { "epoch": 0.32505147563486614, "grad_norm": 0.7198176465616356, "learning_rate": 7.88630267585503e-06, "loss": 0.2836, "step": 9472 }, { "epoch": 0.32508579272477695, "grad_norm": 0.8000725439568106, "learning_rate": 7.885848863763235e-06, "loss": 0.2459, "step": 9473 }, { "epoch": 0.3251201098146877, "grad_norm": 0.7245292151483557, "learning_rate": 7.885395016020141e-06, "loss": 0.2885, "step": 9474 }, { "epoch": 0.3251544269045985, "grad_norm": 0.8506735111068443, "learning_rate": 7.884941132631358e-06, "loss": 0.3497, "step": 9475 }, { "epoch": 0.32518874399450926, "grad_norm": 0.8090517593922253, "learning_rate": 7.884487213602488e-06, "loss": 0.3222, "step": 9476 }, { "epoch": 0.32522306108442006, "grad_norm": 0.8118007411544997, "learning_rate": 7.884033258939144e-06, "loss": 0.3508, "step": 9477 }, { "epoch": 0.3252573781743308, "grad_norm": 0.8604070260953925, "learning_rate": 7.883579268646929e-06, "loss": 0.3114, "step": 9478 }, { "epoch": 0.32529169526424156, "grad_norm": 0.7489023730717257, "learning_rate": 7.883125242731456e-06, "loss": 0.31, "step": 9479 }, { "epoch": 0.32532601235415237, "grad_norm": 0.7331115786105205, "learning_rate": 7.882671181198332e-06, "loss": 0.344, "step": 9480 }, { "epoch": 0.3253603294440631, "grad_norm": 0.7762289537144305, "learning_rate": 7.882217084053163e-06, "loss": 0.3479, "step": 9481 }, { "epoch": 0.32539464653397393, "grad_norm": 0.7866070095106341, "learning_rate": 7.881762951301565e-06, "loss": 0.2915, "step": 9482 }, { "epoch": 0.3254289636238847, "grad_norm": 0.7297247359619137, "learning_rate": 7.881308782949147e-06, "loss": 0.2705, "step": 9483 }, { "epoch": 0.3254632807137955, "grad_norm": 0.7946145749194501, "learning_rate": 7.880854579001516e-06, "loss": 0.3001, "step": 9484 }, { "epoch": 0.32549759780370624, "grad_norm": 0.7841373386392215, "learning_rate": 7.880400339464286e-06, "loss": 0.3019, "step": 9485 }, { "epoch": 0.32553191489361705, "grad_norm": 0.7192321467910407, "learning_rate": 7.879946064343069e-06, "loss": 0.2947, "step": 9486 }, { "epoch": 0.3255662319835278, "grad_norm": 0.7576962236874635, "learning_rate": 7.879491753643475e-06, "loss": 0.344, "step": 9487 }, { "epoch": 0.32560054907343855, "grad_norm": 0.7476990484195527, "learning_rate": 7.87903740737112e-06, "loss": 0.307, "step": 9488 }, { "epoch": 0.32563486616334936, "grad_norm": 0.7546580390259359, "learning_rate": 7.87858302553161e-06, "loss": 0.2778, "step": 9489 }, { "epoch": 0.3256691832532601, "grad_norm": 0.7525598377287881, "learning_rate": 7.878128608130567e-06, "loss": 0.2964, "step": 9490 }, { "epoch": 0.3257035003431709, "grad_norm": 0.8026406262196425, "learning_rate": 7.877674155173598e-06, "loss": 0.3202, "step": 9491 }, { "epoch": 0.32573781743308167, "grad_norm": 0.7588540510615764, "learning_rate": 7.87721966666632e-06, "loss": 0.2891, "step": 9492 }, { "epoch": 0.3257721345229925, "grad_norm": 0.7732355573703649, "learning_rate": 7.87676514261435e-06, "loss": 0.2569, "step": 9493 }, { "epoch": 0.3258064516129032, "grad_norm": 0.7824751594370438, "learning_rate": 7.876310583023298e-06, "loss": 0.2805, "step": 9494 }, { "epoch": 0.325840768702814, "grad_norm": 0.8012974660872323, "learning_rate": 7.875855987898782e-06, "loss": 0.2915, "step": 9495 }, { "epoch": 0.3258750857927248, "grad_norm": 0.795614502001086, "learning_rate": 7.875401357246419e-06, "loss": 0.324, "step": 9496 }, { "epoch": 0.32590940288263553, "grad_norm": 0.8001424531197098, "learning_rate": 7.874946691071826e-06, "loss": 0.2866, "step": 9497 }, { "epoch": 0.32594371997254634, "grad_norm": 0.8912201245706909, "learning_rate": 7.874491989380618e-06, "loss": 0.3709, "step": 9498 }, { "epoch": 0.3259780370624571, "grad_norm": 0.706981481822175, "learning_rate": 7.874037252178411e-06, "loss": 0.2846, "step": 9499 }, { "epoch": 0.3260123541523679, "grad_norm": 0.7766080006764684, "learning_rate": 7.873582479470824e-06, "loss": 0.2987, "step": 9500 }, { "epoch": 0.32604667124227865, "grad_norm": 0.7072205630291832, "learning_rate": 7.873127671263478e-06, "loss": 0.3185, "step": 9501 }, { "epoch": 0.3260809883321894, "grad_norm": 0.798315136032223, "learning_rate": 7.87267282756199e-06, "loss": 0.3724, "step": 9502 }, { "epoch": 0.3261153054221002, "grad_norm": 0.7361806500208368, "learning_rate": 7.872217948371976e-06, "loss": 0.3056, "step": 9503 }, { "epoch": 0.32614962251201096, "grad_norm": 0.7800778052410153, "learning_rate": 7.87176303369906e-06, "loss": 0.2968, "step": 9504 }, { "epoch": 0.32618393960192177, "grad_norm": 0.8369979822436724, "learning_rate": 7.871308083548859e-06, "loss": 0.2804, "step": 9505 }, { "epoch": 0.3262182566918325, "grad_norm": 0.8553302301265282, "learning_rate": 7.870853097926994e-06, "loss": 0.3029, "step": 9506 }, { "epoch": 0.3262525737817433, "grad_norm": 0.8981279900886869, "learning_rate": 7.870398076839086e-06, "loss": 0.2699, "step": 9507 }, { "epoch": 0.3262868908716541, "grad_norm": 0.8153546376535545, "learning_rate": 7.869943020290757e-06, "loss": 0.3095, "step": 9508 }, { "epoch": 0.3263212079615649, "grad_norm": 0.7442480847552204, "learning_rate": 7.869487928287626e-06, "loss": 0.2855, "step": 9509 }, { "epoch": 0.32635552505147564, "grad_norm": 0.953849826810777, "learning_rate": 7.869032800835321e-06, "loss": 0.2808, "step": 9510 }, { "epoch": 0.3263898421413864, "grad_norm": 0.7939520474113987, "learning_rate": 7.868577637939459e-06, "loss": 0.3004, "step": 9511 }, { "epoch": 0.3264241592312972, "grad_norm": 0.8310284348475259, "learning_rate": 7.868122439605663e-06, "loss": 0.3066, "step": 9512 }, { "epoch": 0.32645847632120795, "grad_norm": 0.7481890564581171, "learning_rate": 7.86766720583956e-06, "loss": 0.3546, "step": 9513 }, { "epoch": 0.32649279341111875, "grad_norm": 0.8319166670952031, "learning_rate": 7.86721193664677e-06, "loss": 0.3278, "step": 9514 }, { "epoch": 0.3265271105010295, "grad_norm": 0.7726031086631846, "learning_rate": 7.86675663203292e-06, "loss": 0.2735, "step": 9515 }, { "epoch": 0.3265614275909403, "grad_norm": 0.787871214507726, "learning_rate": 7.866301292003635e-06, "loss": 0.3053, "step": 9516 }, { "epoch": 0.32659574468085106, "grad_norm": 0.6804919584843322, "learning_rate": 7.86584591656454e-06, "loss": 0.2866, "step": 9517 }, { "epoch": 0.3266300617707618, "grad_norm": 0.7092093830864045, "learning_rate": 7.86539050572126e-06, "loss": 0.2918, "step": 9518 }, { "epoch": 0.3266643788606726, "grad_norm": 0.8036662249798364, "learning_rate": 7.86493505947942e-06, "loss": 0.3017, "step": 9519 }, { "epoch": 0.3266986959505834, "grad_norm": 0.7979076795973628, "learning_rate": 7.864479577844648e-06, "loss": 0.2975, "step": 9520 }, { "epoch": 0.3267330130404942, "grad_norm": 0.7145446311197245, "learning_rate": 7.86402406082257e-06, "loss": 0.2831, "step": 9521 }, { "epoch": 0.32676733013040493, "grad_norm": 0.8061670707783567, "learning_rate": 7.863568508418814e-06, "loss": 0.3779, "step": 9522 }, { "epoch": 0.32680164722031574, "grad_norm": 0.8412190296933413, "learning_rate": 7.86311292063901e-06, "loss": 0.3504, "step": 9523 }, { "epoch": 0.3268359643102265, "grad_norm": 0.7820368881511387, "learning_rate": 7.86265729748878e-06, "loss": 0.3196, "step": 9524 }, { "epoch": 0.32687028140013724, "grad_norm": 0.8788688810422698, "learning_rate": 7.862201638973758e-06, "loss": 0.3097, "step": 9525 }, { "epoch": 0.32690459849004805, "grad_norm": 0.7817770565540849, "learning_rate": 7.861745945099572e-06, "loss": 0.3128, "step": 9526 }, { "epoch": 0.3269389155799588, "grad_norm": 0.7537985018719557, "learning_rate": 7.86129021587185e-06, "loss": 0.284, "step": 9527 }, { "epoch": 0.3269732326698696, "grad_norm": 0.7559842970528579, "learning_rate": 7.860834451296226e-06, "loss": 0.2608, "step": 9528 }, { "epoch": 0.32700754975978036, "grad_norm": 0.8081529138643295, "learning_rate": 7.860378651378326e-06, "loss": 0.3309, "step": 9529 }, { "epoch": 0.32704186684969117, "grad_norm": 0.7453670741763025, "learning_rate": 7.859922816123784e-06, "loss": 0.2735, "step": 9530 }, { "epoch": 0.3270761839396019, "grad_norm": 0.8985002825333008, "learning_rate": 7.859466945538227e-06, "loss": 0.3136, "step": 9531 }, { "epoch": 0.3271105010295127, "grad_norm": 0.7700132685528018, "learning_rate": 7.859011039627294e-06, "loss": 0.3457, "step": 9532 }, { "epoch": 0.3271448181194235, "grad_norm": 0.7605261018852355, "learning_rate": 7.85855509839661e-06, "loss": 0.306, "step": 9533 }, { "epoch": 0.3271791352093342, "grad_norm": 0.8152622353030221, "learning_rate": 7.858099121851811e-06, "loss": 0.2658, "step": 9534 }, { "epoch": 0.32721345229924503, "grad_norm": 0.7849987221852186, "learning_rate": 7.85764310999853e-06, "loss": 0.277, "step": 9535 }, { "epoch": 0.3272477693891558, "grad_norm": 0.8180637920085997, "learning_rate": 7.8571870628424e-06, "loss": 0.3047, "step": 9536 }, { "epoch": 0.3272820864790666, "grad_norm": 0.7587712819873093, "learning_rate": 7.856730980389055e-06, "loss": 0.2556, "step": 9537 }, { "epoch": 0.32731640356897734, "grad_norm": 0.7532509363165671, "learning_rate": 7.856274862644129e-06, "loss": 0.3372, "step": 9538 }, { "epoch": 0.32735072065888815, "grad_norm": 0.7470152906231885, "learning_rate": 7.855818709613257e-06, "loss": 0.343, "step": 9539 }, { "epoch": 0.3273850377487989, "grad_norm": 0.9009369114421933, "learning_rate": 7.855362521302074e-06, "loss": 0.3052, "step": 9540 }, { "epoch": 0.32741935483870965, "grad_norm": 0.8706658695557615, "learning_rate": 7.854906297716217e-06, "loss": 0.303, "step": 9541 }, { "epoch": 0.32745367192862046, "grad_norm": 0.7852279266325548, "learning_rate": 7.854450038861322e-06, "loss": 0.3445, "step": 9542 }, { "epoch": 0.3274879890185312, "grad_norm": 0.7341621610575042, "learning_rate": 7.853993744743023e-06, "loss": 0.2841, "step": 9543 }, { "epoch": 0.327522306108442, "grad_norm": 0.7487880798980687, "learning_rate": 7.85353741536696e-06, "loss": 0.2323, "step": 9544 }, { "epoch": 0.32755662319835277, "grad_norm": 0.7571976560249191, "learning_rate": 7.853081050738768e-06, "loss": 0.2941, "step": 9545 }, { "epoch": 0.3275909402882636, "grad_norm": 0.7729366068298605, "learning_rate": 7.852624650864087e-06, "loss": 0.3076, "step": 9546 }, { "epoch": 0.32762525737817433, "grad_norm": 0.6961323850904542, "learning_rate": 7.852168215748555e-06, "loss": 0.2713, "step": 9547 }, { "epoch": 0.3276595744680851, "grad_norm": 0.6832939760028245, "learning_rate": 7.851711745397808e-06, "loss": 0.2724, "step": 9548 }, { "epoch": 0.3276938915579959, "grad_norm": 0.7913605128215386, "learning_rate": 7.851255239817488e-06, "loss": 0.3178, "step": 9549 }, { "epoch": 0.32772820864790664, "grad_norm": 0.7206888783148819, "learning_rate": 7.850798699013234e-06, "loss": 0.297, "step": 9550 }, { "epoch": 0.32776252573781745, "grad_norm": 0.771486511361795, "learning_rate": 7.850342122990685e-06, "loss": 0.2969, "step": 9551 }, { "epoch": 0.3277968428277282, "grad_norm": 0.7348309101846642, "learning_rate": 7.849885511755484e-06, "loss": 0.307, "step": 9552 }, { "epoch": 0.327831159917639, "grad_norm": 0.7294304077590196, "learning_rate": 7.849428865313268e-06, "loss": 0.3001, "step": 9553 }, { "epoch": 0.32786547700754975, "grad_norm": 0.7277378182102543, "learning_rate": 7.848972183669681e-06, "loss": 0.3228, "step": 9554 }, { "epoch": 0.32789979409746056, "grad_norm": 0.8269454485587072, "learning_rate": 7.848515466830366e-06, "loss": 0.3382, "step": 9555 }, { "epoch": 0.3279341111873713, "grad_norm": 0.9196278621508146, "learning_rate": 7.84805871480096e-06, "loss": 0.3989, "step": 9556 }, { "epoch": 0.32796842827728206, "grad_norm": 0.803629064283047, "learning_rate": 7.847601927587112e-06, "loss": 0.3231, "step": 9557 }, { "epoch": 0.32800274536719287, "grad_norm": 0.8214644814535921, "learning_rate": 7.84714510519446e-06, "loss": 0.3567, "step": 9558 }, { "epoch": 0.3280370624571036, "grad_norm": 0.7459143449275948, "learning_rate": 7.846688247628653e-06, "loss": 0.3238, "step": 9559 }, { "epoch": 0.32807137954701443, "grad_norm": 0.7930804710887637, "learning_rate": 7.846231354895329e-06, "loss": 0.3142, "step": 9560 }, { "epoch": 0.3281056966369252, "grad_norm": 0.8243989844089508, "learning_rate": 7.845774427000135e-06, "loss": 0.3155, "step": 9561 }, { "epoch": 0.328140013726836, "grad_norm": 0.8196391290168175, "learning_rate": 7.845317463948718e-06, "loss": 0.4011, "step": 9562 }, { "epoch": 0.32817433081674674, "grad_norm": 0.8134109377852462, "learning_rate": 7.844860465746718e-06, "loss": 0.3611, "step": 9563 }, { "epoch": 0.3282086479066575, "grad_norm": 0.7964129876403138, "learning_rate": 7.844403432399785e-06, "loss": 0.2975, "step": 9564 }, { "epoch": 0.3282429649965683, "grad_norm": 0.7141412554448174, "learning_rate": 7.843946363913566e-06, "loss": 0.2762, "step": 9565 }, { "epoch": 0.32827728208647905, "grad_norm": 0.749874067117929, "learning_rate": 7.843489260293703e-06, "loss": 0.3334, "step": 9566 }, { "epoch": 0.32831159917638986, "grad_norm": 0.7382195818197963, "learning_rate": 7.843032121545847e-06, "loss": 0.3121, "step": 9567 }, { "epoch": 0.3283459162663006, "grad_norm": 0.762527034740756, "learning_rate": 7.842574947675641e-06, "loss": 0.287, "step": 9568 }, { "epoch": 0.3283802333562114, "grad_norm": 0.7993765613195383, "learning_rate": 7.842117738688736e-06, "loss": 0.3267, "step": 9569 }, { "epoch": 0.32841455044612217, "grad_norm": 0.786004899154831, "learning_rate": 7.841660494590783e-06, "loss": 0.3211, "step": 9570 }, { "epoch": 0.3284488675360329, "grad_norm": 0.8895960416320394, "learning_rate": 7.841203215387424e-06, "loss": 0.3058, "step": 9571 }, { "epoch": 0.3284831846259437, "grad_norm": 0.7708051109235722, "learning_rate": 7.840745901084314e-06, "loss": 0.3083, "step": 9572 }, { "epoch": 0.3285175017158545, "grad_norm": 0.9785821769254817, "learning_rate": 7.840288551687101e-06, "loss": 0.2953, "step": 9573 }, { "epoch": 0.3285518188057653, "grad_norm": 0.8489076294962156, "learning_rate": 7.839831167201433e-06, "loss": 0.2798, "step": 9574 }, { "epoch": 0.32858613589567603, "grad_norm": 0.8143947268376969, "learning_rate": 7.839373747632963e-06, "loss": 0.3141, "step": 9575 }, { "epoch": 0.32862045298558684, "grad_norm": 0.8084888471477525, "learning_rate": 7.83891629298734e-06, "loss": 0.3199, "step": 9576 }, { "epoch": 0.3286547700754976, "grad_norm": 0.6226636371167932, "learning_rate": 7.838458803270216e-06, "loss": 0.2745, "step": 9577 }, { "epoch": 0.3286890871654084, "grad_norm": 0.8198610568564265, "learning_rate": 7.838001278487243e-06, "loss": 0.2985, "step": 9578 }, { "epoch": 0.32872340425531915, "grad_norm": 0.7607605163467714, "learning_rate": 7.837543718644072e-06, "loss": 0.2987, "step": 9579 }, { "epoch": 0.3287577213452299, "grad_norm": 0.7423914037276625, "learning_rate": 7.837086123746358e-06, "loss": 0.3275, "step": 9580 }, { "epoch": 0.3287920384351407, "grad_norm": 0.8019020396844112, "learning_rate": 7.836628493799753e-06, "loss": 0.293, "step": 9581 }, { "epoch": 0.32882635552505146, "grad_norm": 0.7604597785677454, "learning_rate": 7.836170828809908e-06, "loss": 0.3046, "step": 9582 }, { "epoch": 0.32886067261496227, "grad_norm": 0.7285642053450779, "learning_rate": 7.83571312878248e-06, "loss": 0.2794, "step": 9583 }, { "epoch": 0.328894989704873, "grad_norm": 0.6857685244601266, "learning_rate": 7.835255393723124e-06, "loss": 0.2867, "step": 9584 }, { "epoch": 0.3289293067947838, "grad_norm": 0.6906304386936262, "learning_rate": 7.834797623637493e-06, "loss": 0.3103, "step": 9585 }, { "epoch": 0.3289636238846946, "grad_norm": 0.7816349473387759, "learning_rate": 7.83433981853124e-06, "loss": 0.3207, "step": 9586 }, { "epoch": 0.32899794097460533, "grad_norm": 0.8458784072445772, "learning_rate": 7.833881978410025e-06, "loss": 0.3185, "step": 9587 }, { "epoch": 0.32903225806451614, "grad_norm": 0.8467949813361797, "learning_rate": 7.8334241032795e-06, "loss": 0.3006, "step": 9588 }, { "epoch": 0.3290665751544269, "grad_norm": 0.9160978499769968, "learning_rate": 7.832966193145326e-06, "loss": 0.2838, "step": 9589 }, { "epoch": 0.3291008922443377, "grad_norm": 0.842909874387608, "learning_rate": 7.832508248013158e-06, "loss": 0.3563, "step": 9590 }, { "epoch": 0.32913520933424845, "grad_norm": 0.762032329283418, "learning_rate": 7.832050267888651e-06, "loss": 0.2973, "step": 9591 }, { "epoch": 0.32916952642415925, "grad_norm": 0.7191500136585287, "learning_rate": 7.831592252777467e-06, "loss": 0.3122, "step": 9592 }, { "epoch": 0.32920384351407, "grad_norm": 0.7229053854204396, "learning_rate": 7.83113420268526e-06, "loss": 0.2828, "step": 9593 }, { "epoch": 0.32923816060398076, "grad_norm": 0.9013091767212115, "learning_rate": 7.830676117617691e-06, "loss": 0.3707, "step": 9594 }, { "epoch": 0.32927247769389156, "grad_norm": 0.9484633389373958, "learning_rate": 7.830217997580419e-06, "loss": 0.3048, "step": 9595 }, { "epoch": 0.3293067947838023, "grad_norm": 0.7432581345662819, "learning_rate": 7.829759842579103e-06, "loss": 0.3085, "step": 9596 }, { "epoch": 0.3293411118737131, "grad_norm": 0.784087808829009, "learning_rate": 7.829301652619405e-06, "loss": 0.3644, "step": 9597 }, { "epoch": 0.3293754289636239, "grad_norm": 0.8490512011696945, "learning_rate": 7.828843427706982e-06, "loss": 0.335, "step": 9598 }, { "epoch": 0.3294097460535347, "grad_norm": 0.8546347792927907, "learning_rate": 7.828385167847497e-06, "loss": 0.3152, "step": 9599 }, { "epoch": 0.32944406314344543, "grad_norm": 0.767989617169547, "learning_rate": 7.827926873046611e-06, "loss": 0.2844, "step": 9600 }, { "epoch": 0.32947838023335624, "grad_norm": 0.8198218952138766, "learning_rate": 7.827468543309987e-06, "loss": 0.2906, "step": 9601 }, { "epoch": 0.329512697323267, "grad_norm": 0.70875610014404, "learning_rate": 7.827010178643283e-06, "loss": 0.3078, "step": 9602 }, { "epoch": 0.32954701441317774, "grad_norm": 0.7819901860091032, "learning_rate": 7.826551779052166e-06, "loss": 0.3441, "step": 9603 }, { "epoch": 0.32958133150308855, "grad_norm": 0.8163918273347328, "learning_rate": 7.826093344542297e-06, "loss": 0.3096, "step": 9604 }, { "epoch": 0.3296156485929993, "grad_norm": 0.8341093981800259, "learning_rate": 7.825634875119338e-06, "loss": 0.2891, "step": 9605 }, { "epoch": 0.3296499656829101, "grad_norm": 0.8076097990036695, "learning_rate": 7.825176370788956e-06, "loss": 0.3385, "step": 9606 }, { "epoch": 0.32968428277282086, "grad_norm": 0.8195836275298694, "learning_rate": 7.824717831556815e-06, "loss": 0.331, "step": 9607 }, { "epoch": 0.32971859986273166, "grad_norm": 0.8347081975988415, "learning_rate": 7.824259257428576e-06, "loss": 0.3254, "step": 9608 }, { "epoch": 0.3297529169526424, "grad_norm": 1.098806447597664, "learning_rate": 7.823800648409907e-06, "loss": 0.3085, "step": 9609 }, { "epoch": 0.32978723404255317, "grad_norm": 0.9105473455930037, "learning_rate": 7.823342004506474e-06, "loss": 0.346, "step": 9610 }, { "epoch": 0.329821551132464, "grad_norm": 0.7526276045147906, "learning_rate": 7.822883325723943e-06, "loss": 0.2728, "step": 9611 }, { "epoch": 0.3298558682223747, "grad_norm": 0.7644820497031206, "learning_rate": 7.822424612067978e-06, "loss": 0.2837, "step": 9612 }, { "epoch": 0.32989018531228553, "grad_norm": 1.1292619869261642, "learning_rate": 7.821965863544248e-06, "loss": 0.2886, "step": 9613 }, { "epoch": 0.3299245024021963, "grad_norm": 0.8061818167658743, "learning_rate": 7.82150708015842e-06, "loss": 0.2617, "step": 9614 }, { "epoch": 0.3299588194921071, "grad_norm": 0.7976569120729786, "learning_rate": 7.821048261916162e-06, "loss": 0.2766, "step": 9615 }, { "epoch": 0.32999313658201784, "grad_norm": 0.9434042842283652, "learning_rate": 7.820589408823141e-06, "loss": 0.3637, "step": 9616 }, { "epoch": 0.3300274536719286, "grad_norm": 0.829932343647985, "learning_rate": 7.820130520885027e-06, "loss": 0.3282, "step": 9617 }, { "epoch": 0.3300617707618394, "grad_norm": 0.7501845746173266, "learning_rate": 7.819671598107487e-06, "loss": 0.3227, "step": 9618 }, { "epoch": 0.33009608785175015, "grad_norm": 0.7927642266948053, "learning_rate": 7.819212640496195e-06, "loss": 0.3239, "step": 9619 }, { "epoch": 0.33013040494166096, "grad_norm": 0.822620390198389, "learning_rate": 7.818753648056816e-06, "loss": 0.3291, "step": 9620 }, { "epoch": 0.3301647220315717, "grad_norm": 0.7385697473883289, "learning_rate": 7.81829462079502e-06, "loss": 0.2923, "step": 9621 }, { "epoch": 0.3301990391214825, "grad_norm": 0.8333913654865387, "learning_rate": 7.817835558716482e-06, "loss": 0.2825, "step": 9622 }, { "epoch": 0.33023335621139327, "grad_norm": 0.7465122419273658, "learning_rate": 7.817376461826871e-06, "loss": 0.2648, "step": 9623 }, { "epoch": 0.3302676733013041, "grad_norm": 0.8343624958431871, "learning_rate": 7.816917330131858e-06, "loss": 0.3687, "step": 9624 }, { "epoch": 0.33030199039121483, "grad_norm": 0.7310458548519365, "learning_rate": 7.816458163637115e-06, "loss": 0.3499, "step": 9625 }, { "epoch": 0.3303363074811256, "grad_norm": 0.8272226140635923, "learning_rate": 7.815998962348317e-06, "loss": 0.295, "step": 9626 }, { "epoch": 0.3303706245710364, "grad_norm": 0.7550416120478349, "learning_rate": 7.815539726271133e-06, "loss": 0.2811, "step": 9627 }, { "epoch": 0.33040494166094714, "grad_norm": 0.7261849213053947, "learning_rate": 7.81508045541124e-06, "loss": 0.3419, "step": 9628 }, { "epoch": 0.33043925875085794, "grad_norm": 0.7009830817358366, "learning_rate": 7.81462114977431e-06, "loss": 0.2978, "step": 9629 }, { "epoch": 0.3304735758407687, "grad_norm": 0.7328795001074265, "learning_rate": 7.814161809366014e-06, "loss": 0.3261, "step": 9630 }, { "epoch": 0.3305078929306795, "grad_norm": 0.8380101509749116, "learning_rate": 7.813702434192031e-06, "loss": 0.3747, "step": 9631 }, { "epoch": 0.33054221002059025, "grad_norm": 0.7770469229069292, "learning_rate": 7.813243024258037e-06, "loss": 0.3425, "step": 9632 }, { "epoch": 0.330576527110501, "grad_norm": 0.7704073024466433, "learning_rate": 7.812783579569705e-06, "loss": 0.2829, "step": 9633 }, { "epoch": 0.3306108442004118, "grad_norm": 0.7902922839009341, "learning_rate": 7.81232410013271e-06, "loss": 0.2879, "step": 9634 }, { "epoch": 0.33064516129032256, "grad_norm": 0.7655055062365683, "learning_rate": 7.811864585952731e-06, "loss": 0.2708, "step": 9635 }, { "epoch": 0.33067947838023337, "grad_norm": 0.8307705725194011, "learning_rate": 7.811405037035441e-06, "loss": 0.3142, "step": 9636 }, { "epoch": 0.3307137954701441, "grad_norm": 0.7445838317813214, "learning_rate": 7.810945453386521e-06, "loss": 0.2848, "step": 9637 }, { "epoch": 0.33074811256005493, "grad_norm": 0.7490437770936222, "learning_rate": 7.810485835011647e-06, "loss": 0.308, "step": 9638 }, { "epoch": 0.3307824296499657, "grad_norm": 0.7090049552951264, "learning_rate": 7.810026181916498e-06, "loss": 0.2806, "step": 9639 }, { "epoch": 0.33081674673987643, "grad_norm": 0.8040903081996039, "learning_rate": 7.80956649410675e-06, "loss": 0.3153, "step": 9640 }, { "epoch": 0.33085106382978724, "grad_norm": 0.8095003049985628, "learning_rate": 7.809106771588086e-06, "loss": 0.3201, "step": 9641 }, { "epoch": 0.330885380919698, "grad_norm": 0.7932032713844642, "learning_rate": 7.808647014366182e-06, "loss": 0.3102, "step": 9642 }, { "epoch": 0.3309196980096088, "grad_norm": 0.6947659711534829, "learning_rate": 7.808187222446717e-06, "loss": 0.2541, "step": 9643 }, { "epoch": 0.33095401509951955, "grad_norm": 0.865351480576206, "learning_rate": 7.807727395835375e-06, "loss": 0.3706, "step": 9644 }, { "epoch": 0.33098833218943036, "grad_norm": 0.7425422766781297, "learning_rate": 7.807267534537833e-06, "loss": 0.2968, "step": 9645 }, { "epoch": 0.3310226492793411, "grad_norm": 0.8252930635352652, "learning_rate": 7.806807638559774e-06, "loss": 0.3325, "step": 9646 }, { "epoch": 0.3310569663692519, "grad_norm": 0.7012673671559284, "learning_rate": 7.806347707906881e-06, "loss": 0.3112, "step": 9647 }, { "epoch": 0.33109128345916267, "grad_norm": 0.8343694904731921, "learning_rate": 7.805887742584832e-06, "loss": 0.3153, "step": 9648 }, { "epoch": 0.3311256005490734, "grad_norm": 0.805235073433697, "learning_rate": 7.805427742599311e-06, "loss": 0.2987, "step": 9649 }, { "epoch": 0.3311599176389842, "grad_norm": 0.819379926111349, "learning_rate": 7.804967707956001e-06, "loss": 0.3095, "step": 9650 }, { "epoch": 0.331194234728895, "grad_norm": 0.7619349779990566, "learning_rate": 7.804507638660588e-06, "loss": 0.2975, "step": 9651 }, { "epoch": 0.3312285518188058, "grad_norm": 0.7457410115440708, "learning_rate": 7.80404753471875e-06, "loss": 0.323, "step": 9652 }, { "epoch": 0.33126286890871653, "grad_norm": 0.6959728033924012, "learning_rate": 7.803587396136175e-06, "loss": 0.2608, "step": 9653 }, { "epoch": 0.33129718599862734, "grad_norm": 0.6771200213127956, "learning_rate": 7.803127222918545e-06, "loss": 0.3036, "step": 9654 }, { "epoch": 0.3313315030885381, "grad_norm": 0.7832454798545014, "learning_rate": 7.802667015071547e-06, "loss": 0.267, "step": 9655 }, { "epoch": 0.33136582017844884, "grad_norm": 0.7613348507948824, "learning_rate": 7.802206772600867e-06, "loss": 0.3322, "step": 9656 }, { "epoch": 0.33140013726835965, "grad_norm": 0.8450870773301643, "learning_rate": 7.801746495512188e-06, "loss": 0.3433, "step": 9657 }, { "epoch": 0.3314344543582704, "grad_norm": 0.7329016091893619, "learning_rate": 7.801286183811199e-06, "loss": 0.313, "step": 9658 }, { "epoch": 0.3314687714481812, "grad_norm": 0.7554376191767302, "learning_rate": 7.800825837503583e-06, "loss": 0.26, "step": 9659 }, { "epoch": 0.33150308853809196, "grad_norm": 0.7658621232669643, "learning_rate": 7.80036545659503e-06, "loss": 0.3116, "step": 9660 }, { "epoch": 0.33153740562800277, "grad_norm": 0.8645188591042068, "learning_rate": 7.799905041091227e-06, "loss": 0.2916, "step": 9661 }, { "epoch": 0.3315717227179135, "grad_norm": 0.7115815004747789, "learning_rate": 7.799444590997863e-06, "loss": 0.2875, "step": 9662 }, { "epoch": 0.33160603980782427, "grad_norm": 0.7495076793440193, "learning_rate": 7.798984106320621e-06, "loss": 0.2673, "step": 9663 }, { "epoch": 0.3316403568977351, "grad_norm": 0.7932541178164539, "learning_rate": 7.798523587065198e-06, "loss": 0.279, "step": 9664 }, { "epoch": 0.33167467398764583, "grad_norm": 0.7218779918775589, "learning_rate": 7.798063033237278e-06, "loss": 0.3288, "step": 9665 }, { "epoch": 0.33170899107755664, "grad_norm": 0.7517007164694148, "learning_rate": 7.79760244484255e-06, "loss": 0.3359, "step": 9666 }, { "epoch": 0.3317433081674674, "grad_norm": 0.9357162616642243, "learning_rate": 7.797141821886705e-06, "loss": 0.3062, "step": 9667 }, { "epoch": 0.3317776252573782, "grad_norm": 0.7416329242352333, "learning_rate": 7.796681164375435e-06, "loss": 0.279, "step": 9668 }, { "epoch": 0.33181194234728895, "grad_norm": 0.7774852711130177, "learning_rate": 7.79622047231443e-06, "loss": 0.3444, "step": 9669 }, { "epoch": 0.33184625943719975, "grad_norm": 0.7670785804565552, "learning_rate": 7.79575974570938e-06, "loss": 0.3134, "step": 9670 }, { "epoch": 0.3318805765271105, "grad_norm": 0.7508086851591904, "learning_rate": 7.79529898456598e-06, "loss": 0.2879, "step": 9671 }, { "epoch": 0.33191489361702126, "grad_norm": 0.7840322042769009, "learning_rate": 7.794838188889919e-06, "loss": 0.2834, "step": 9672 }, { "epoch": 0.33194921070693206, "grad_norm": 0.9127025515971284, "learning_rate": 7.79437735868689e-06, "loss": 0.34, "step": 9673 }, { "epoch": 0.3319835277968428, "grad_norm": 0.7666323999215459, "learning_rate": 7.793916493962588e-06, "loss": 0.2697, "step": 9674 }, { "epoch": 0.3320178448867536, "grad_norm": 0.7552151441906464, "learning_rate": 7.793455594722703e-06, "loss": 0.3411, "step": 9675 }, { "epoch": 0.3320521619766644, "grad_norm": 0.7434595826510447, "learning_rate": 7.792994660972933e-06, "loss": 0.3309, "step": 9676 }, { "epoch": 0.3320864790665752, "grad_norm": 0.7816478533611916, "learning_rate": 7.79253369271897e-06, "loss": 0.3126, "step": 9677 }, { "epoch": 0.33212079615648593, "grad_norm": 0.7499539972320514, "learning_rate": 7.792072689966508e-06, "loss": 0.2523, "step": 9678 }, { "epoch": 0.3321551132463967, "grad_norm": 0.7591152235718862, "learning_rate": 7.791611652721243e-06, "loss": 0.2696, "step": 9679 }, { "epoch": 0.3321894303363075, "grad_norm": 0.842347305718225, "learning_rate": 7.791150580988871e-06, "loss": 0.3448, "step": 9680 }, { "epoch": 0.33222374742621824, "grad_norm": 0.8086536995539211, "learning_rate": 7.790689474775089e-06, "loss": 0.3552, "step": 9681 }, { "epoch": 0.33225806451612905, "grad_norm": 0.8227371315109498, "learning_rate": 7.79022833408559e-06, "loss": 0.3304, "step": 9682 }, { "epoch": 0.3322923816060398, "grad_norm": 0.8750097051900285, "learning_rate": 7.789767158926076e-06, "loss": 0.287, "step": 9683 }, { "epoch": 0.3323266986959506, "grad_norm": 0.7763431511149123, "learning_rate": 7.789305949302239e-06, "loss": 0.2943, "step": 9684 }, { "epoch": 0.33236101578586136, "grad_norm": 0.7160423473812444, "learning_rate": 7.788844705219779e-06, "loss": 0.3089, "step": 9685 }, { "epoch": 0.3323953328757721, "grad_norm": 0.8019019042054276, "learning_rate": 7.788383426684394e-06, "loss": 0.3039, "step": 9686 }, { "epoch": 0.3324296499656829, "grad_norm": 0.8012013344860304, "learning_rate": 7.787922113701785e-06, "loss": 0.2575, "step": 9687 }, { "epoch": 0.33246396705559367, "grad_norm": 0.7525067924015894, "learning_rate": 7.787460766277646e-06, "loss": 0.3085, "step": 9688 }, { "epoch": 0.3324982841455045, "grad_norm": 0.7823314123515884, "learning_rate": 7.786999384417681e-06, "loss": 0.2796, "step": 9689 }, { "epoch": 0.3325326012354152, "grad_norm": 0.763251308341057, "learning_rate": 7.786537968127588e-06, "loss": 0.2827, "step": 9690 }, { "epoch": 0.33256691832532603, "grad_norm": 0.8283063120896297, "learning_rate": 7.786076517413067e-06, "loss": 0.3099, "step": 9691 }, { "epoch": 0.3326012354152368, "grad_norm": 0.8232065849698982, "learning_rate": 7.785615032279816e-06, "loss": 0.3605, "step": 9692 }, { "epoch": 0.33263555250514754, "grad_norm": 0.6950026367690944, "learning_rate": 7.785153512733542e-06, "loss": 0.3159, "step": 9693 }, { "epoch": 0.33266986959505834, "grad_norm": 0.7303629838949912, "learning_rate": 7.784691958779941e-06, "loss": 0.2985, "step": 9694 }, { "epoch": 0.3327041866849691, "grad_norm": 0.9184751065197301, "learning_rate": 7.78423037042472e-06, "loss": 0.318, "step": 9695 }, { "epoch": 0.3327385037748799, "grad_norm": 0.7274919982935001, "learning_rate": 7.78376874767358e-06, "loss": 0.2816, "step": 9696 }, { "epoch": 0.33277282086479065, "grad_norm": 0.7559534834281189, "learning_rate": 7.78330709053222e-06, "loss": 0.2831, "step": 9697 }, { "epoch": 0.33280713795470146, "grad_norm": 0.7392880271113472, "learning_rate": 7.782845399006346e-06, "loss": 0.3066, "step": 9698 }, { "epoch": 0.3328414550446122, "grad_norm": 0.8583597472744657, "learning_rate": 7.782383673101664e-06, "loss": 0.3068, "step": 9699 }, { "epoch": 0.332875772134523, "grad_norm": 0.7800709362834408, "learning_rate": 7.781921912823873e-06, "loss": 0.3536, "step": 9700 }, { "epoch": 0.33291008922443377, "grad_norm": 0.6463985011715815, "learning_rate": 7.781460118178681e-06, "loss": 0.3089, "step": 9701 }, { "epoch": 0.3329444063143445, "grad_norm": 0.8072296026036371, "learning_rate": 7.780998289171792e-06, "loss": 0.3221, "step": 9702 }, { "epoch": 0.33297872340425533, "grad_norm": 0.7000323437498054, "learning_rate": 7.780536425808912e-06, "loss": 0.3272, "step": 9703 }, { "epoch": 0.3330130404941661, "grad_norm": 0.6828942612665291, "learning_rate": 7.780074528095746e-06, "loss": 0.293, "step": 9704 }, { "epoch": 0.3330473575840769, "grad_norm": 1.0104028209695262, "learning_rate": 7.779612596038e-06, "loss": 0.3807, "step": 9705 }, { "epoch": 0.33308167467398764, "grad_norm": 0.9556680627812966, "learning_rate": 7.77915062964138e-06, "loss": 0.3289, "step": 9706 }, { "epoch": 0.33311599176389844, "grad_norm": 0.8102564677819887, "learning_rate": 7.778688628911597e-06, "loss": 0.2837, "step": 9707 }, { "epoch": 0.3331503088538092, "grad_norm": 0.7809892131130284, "learning_rate": 7.778226593854354e-06, "loss": 0.2911, "step": 9708 }, { "epoch": 0.33318462594371995, "grad_norm": 0.7349823966359597, "learning_rate": 7.777764524475362e-06, "loss": 0.2831, "step": 9709 }, { "epoch": 0.33321894303363075, "grad_norm": 0.7150663358523711, "learning_rate": 7.777302420780327e-06, "loss": 0.3232, "step": 9710 }, { "epoch": 0.3332532601235415, "grad_norm": 0.8898126143081496, "learning_rate": 7.776840282774957e-06, "loss": 0.2827, "step": 9711 }, { "epoch": 0.3332875772134523, "grad_norm": 0.7261948551597803, "learning_rate": 7.776378110464964e-06, "loss": 0.307, "step": 9712 }, { "epoch": 0.33332189430336306, "grad_norm": 0.7165511762010703, "learning_rate": 7.775915903856057e-06, "loss": 0.3169, "step": 9713 }, { "epoch": 0.33335621139327387, "grad_norm": 0.7061871906266967, "learning_rate": 7.775453662953945e-06, "loss": 0.3491, "step": 9714 }, { "epoch": 0.3333905284831846, "grad_norm": 0.7841094533618098, "learning_rate": 7.774991387764337e-06, "loss": 0.3136, "step": 9715 }, { "epoch": 0.3334248455730954, "grad_norm": 0.7430765893216518, "learning_rate": 7.77452907829295e-06, "loss": 0.3403, "step": 9716 }, { "epoch": 0.3334591626630062, "grad_norm": 0.7023478015080791, "learning_rate": 7.774066734545487e-06, "loss": 0.2604, "step": 9717 }, { "epoch": 0.33349347975291693, "grad_norm": 0.7933784004685189, "learning_rate": 7.773604356527666e-06, "loss": 0.3465, "step": 9718 }, { "epoch": 0.33352779684282774, "grad_norm": 0.7629183425651117, "learning_rate": 7.773141944245196e-06, "loss": 0.288, "step": 9719 }, { "epoch": 0.3335621139327385, "grad_norm": 0.7861446020321067, "learning_rate": 7.77267949770379e-06, "loss": 0.3037, "step": 9720 }, { "epoch": 0.3335964310226493, "grad_norm": 0.8340304244301497, "learning_rate": 7.772217016909161e-06, "loss": 0.2837, "step": 9721 }, { "epoch": 0.33363074811256005, "grad_norm": 0.8307054833420516, "learning_rate": 7.771754501867026e-06, "loss": 0.3189, "step": 9722 }, { "epoch": 0.33366506520247086, "grad_norm": 0.8539358824359401, "learning_rate": 7.771291952583093e-06, "loss": 0.3149, "step": 9723 }, { "epoch": 0.3336993822923816, "grad_norm": 0.7709342089981822, "learning_rate": 7.770829369063078e-06, "loss": 0.3065, "step": 9724 }, { "epoch": 0.33373369938229236, "grad_norm": 0.71428856109023, "learning_rate": 7.7703667513127e-06, "loss": 0.2959, "step": 9725 }, { "epoch": 0.33376801647220317, "grad_norm": 0.7294346199359453, "learning_rate": 7.769904099337668e-06, "loss": 0.2508, "step": 9726 }, { "epoch": 0.3338023335621139, "grad_norm": 0.8205261961365414, "learning_rate": 7.7694414131437e-06, "loss": 0.3826, "step": 9727 }, { "epoch": 0.3338366506520247, "grad_norm": 0.8088645523432957, "learning_rate": 7.768978692736515e-06, "loss": 0.2866, "step": 9728 }, { "epoch": 0.3338709677419355, "grad_norm": 0.7519845482859373, "learning_rate": 7.768515938121823e-06, "loss": 0.271, "step": 9729 }, { "epoch": 0.3339052848318463, "grad_norm": 0.8013267461474219, "learning_rate": 7.768053149305348e-06, "loss": 0.3217, "step": 9730 }, { "epoch": 0.33393960192175703, "grad_norm": 0.8435034535100019, "learning_rate": 7.767590326292801e-06, "loss": 0.2769, "step": 9731 }, { "epoch": 0.3339739190116678, "grad_norm": 0.8506920223407594, "learning_rate": 7.767127469089903e-06, "loss": 0.3873, "step": 9732 }, { "epoch": 0.3340082361015786, "grad_norm": 0.7200897518680833, "learning_rate": 7.76666457770237e-06, "loss": 0.2637, "step": 9733 }, { "epoch": 0.33404255319148934, "grad_norm": 0.8124460861710261, "learning_rate": 7.766201652135924e-06, "loss": 0.2892, "step": 9734 }, { "epoch": 0.33407687028140015, "grad_norm": 0.9303544823649917, "learning_rate": 7.765738692396281e-06, "loss": 0.2835, "step": 9735 }, { "epoch": 0.3341111873713109, "grad_norm": 0.7403135576500762, "learning_rate": 7.76527569848916e-06, "loss": 0.2719, "step": 9736 }, { "epoch": 0.3341455044612217, "grad_norm": 0.800833831606992, "learning_rate": 7.764812670420283e-06, "loss": 0.2737, "step": 9737 }, { "epoch": 0.33417982155113246, "grad_norm": 0.7836169690158918, "learning_rate": 7.764349608195369e-06, "loss": 0.2953, "step": 9738 }, { "epoch": 0.3342141386410432, "grad_norm": 0.6789418041323372, "learning_rate": 7.763886511820136e-06, "loss": 0.2711, "step": 9739 }, { "epoch": 0.334248455730954, "grad_norm": 0.7685230853903849, "learning_rate": 7.76342338130031e-06, "loss": 0.328, "step": 9740 }, { "epoch": 0.33428277282086477, "grad_norm": 0.8009227054886829, "learning_rate": 7.762960216641611e-06, "loss": 0.3044, "step": 9741 }, { "epoch": 0.3343170899107756, "grad_norm": 0.7752998211045584, "learning_rate": 7.762497017849756e-06, "loss": 0.2734, "step": 9742 }, { "epoch": 0.33435140700068633, "grad_norm": 0.7249747548346303, "learning_rate": 7.762033784930473e-06, "loss": 0.3193, "step": 9743 }, { "epoch": 0.33438572409059714, "grad_norm": 0.7799462036552345, "learning_rate": 7.761570517889485e-06, "loss": 0.3295, "step": 9744 }, { "epoch": 0.3344200411805079, "grad_norm": 0.7732502393104228, "learning_rate": 7.76110721673251e-06, "loss": 0.2944, "step": 9745 }, { "epoch": 0.3344543582704187, "grad_norm": 0.8563999172527668, "learning_rate": 7.760643881465274e-06, "loss": 0.3504, "step": 9746 }, { "epoch": 0.33448867536032945, "grad_norm": 0.7535992774826591, "learning_rate": 7.760180512093503e-06, "loss": 0.2528, "step": 9747 }, { "epoch": 0.3345229924502402, "grad_norm": 0.7326340652124719, "learning_rate": 7.759717108622922e-06, "loss": 0.2911, "step": 9748 }, { "epoch": 0.334557309540151, "grad_norm": 0.6806707802047576, "learning_rate": 7.75925367105925e-06, "loss": 0.3015, "step": 9749 }, { "epoch": 0.33459162663006176, "grad_norm": 0.7576850568943264, "learning_rate": 7.758790199408217e-06, "loss": 0.3169, "step": 9750 }, { "epoch": 0.33462594371997256, "grad_norm": 0.8488496995539082, "learning_rate": 7.758326693675548e-06, "loss": 0.3201, "step": 9751 }, { "epoch": 0.3346602608098833, "grad_norm": 0.7952259118175047, "learning_rate": 7.75786315386697e-06, "loss": 0.36, "step": 9752 }, { "epoch": 0.3346945778997941, "grad_norm": 0.8259216395524812, "learning_rate": 7.757399579988204e-06, "loss": 0.2753, "step": 9753 }, { "epoch": 0.3347288949897049, "grad_norm": 0.824110907704828, "learning_rate": 7.756935972044986e-06, "loss": 0.3002, "step": 9754 }, { "epoch": 0.3347632120796156, "grad_norm": 0.7913678307363166, "learning_rate": 7.756472330043035e-06, "loss": 0.3211, "step": 9755 }, { "epoch": 0.33479752916952643, "grad_norm": 0.8181256836864093, "learning_rate": 7.756008653988082e-06, "loss": 0.3384, "step": 9756 }, { "epoch": 0.3348318462594372, "grad_norm": 0.8882650081032344, "learning_rate": 7.755544943885858e-06, "loss": 0.3513, "step": 9757 }, { "epoch": 0.334866163349348, "grad_norm": 0.7446584832696227, "learning_rate": 7.755081199742084e-06, "loss": 0.2963, "step": 9758 }, { "epoch": 0.33490048043925874, "grad_norm": 0.8436904399159811, "learning_rate": 7.754617421562497e-06, "loss": 0.3475, "step": 9759 }, { "epoch": 0.33493479752916955, "grad_norm": 0.8148980404702698, "learning_rate": 7.754153609352826e-06, "loss": 0.3127, "step": 9760 }, { "epoch": 0.3349691146190803, "grad_norm": 0.7864727490798065, "learning_rate": 7.753689763118794e-06, "loss": 0.3847, "step": 9761 }, { "epoch": 0.33500343170899105, "grad_norm": 0.9577195741229829, "learning_rate": 7.753225882866138e-06, "loss": 0.2732, "step": 9762 }, { "epoch": 0.33503774879890186, "grad_norm": 0.7655428187489942, "learning_rate": 7.752761968600584e-06, "loss": 0.2572, "step": 9763 }, { "epoch": 0.3350720658888126, "grad_norm": 0.8095458687308384, "learning_rate": 7.752298020327866e-06, "loss": 0.3168, "step": 9764 }, { "epoch": 0.3351063829787234, "grad_norm": 0.747990720625876, "learning_rate": 7.751834038053716e-06, "loss": 0.2645, "step": 9765 }, { "epoch": 0.33514070006863417, "grad_norm": 0.852456490619344, "learning_rate": 7.751370021783864e-06, "loss": 0.3197, "step": 9766 }, { "epoch": 0.335175017158545, "grad_norm": 0.8639699603544088, "learning_rate": 7.750905971524044e-06, "loss": 0.3355, "step": 9767 }, { "epoch": 0.3352093342484557, "grad_norm": 0.6874651684680417, "learning_rate": 7.750441887279985e-06, "loss": 0.282, "step": 9768 }, { "epoch": 0.33524365133836653, "grad_norm": 0.7741908142315935, "learning_rate": 7.749977769057426e-06, "loss": 0.3136, "step": 9769 }, { "epoch": 0.3352779684282773, "grad_norm": 0.8335868069681356, "learning_rate": 7.7495136168621e-06, "loss": 0.298, "step": 9770 }, { "epoch": 0.33531228551818804, "grad_norm": 0.7097057860301432, "learning_rate": 7.749049430699734e-06, "loss": 0.3407, "step": 9771 }, { "epoch": 0.33534660260809884, "grad_norm": 0.7673100414239281, "learning_rate": 7.74858521057607e-06, "loss": 0.3074, "step": 9772 }, { "epoch": 0.3353809196980096, "grad_norm": 0.780798406815613, "learning_rate": 7.748120956496839e-06, "loss": 0.2978, "step": 9773 }, { "epoch": 0.3354152367879204, "grad_norm": 1.1397738288778938, "learning_rate": 7.747656668467778e-06, "loss": 0.3225, "step": 9774 }, { "epoch": 0.33544955387783115, "grad_norm": 0.7109331924925351, "learning_rate": 7.747192346494623e-06, "loss": 0.2504, "step": 9775 }, { "epoch": 0.33548387096774196, "grad_norm": 0.7690039527934495, "learning_rate": 7.74672799058311e-06, "loss": 0.2611, "step": 9776 }, { "epoch": 0.3355181880576527, "grad_norm": 0.8638272635352109, "learning_rate": 7.746263600738973e-06, "loss": 0.368, "step": 9777 }, { "epoch": 0.33555250514756346, "grad_norm": 0.7935004920581193, "learning_rate": 7.745799176967951e-06, "loss": 0.2868, "step": 9778 }, { "epoch": 0.33558682223747427, "grad_norm": 0.7301369197253381, "learning_rate": 7.745334719275783e-06, "loss": 0.3049, "step": 9779 }, { "epoch": 0.335621139327385, "grad_norm": 0.905198961354262, "learning_rate": 7.744870227668205e-06, "loss": 0.3112, "step": 9780 }, { "epoch": 0.3356554564172958, "grad_norm": 0.7582891405853927, "learning_rate": 7.744405702150955e-06, "loss": 0.26, "step": 9781 }, { "epoch": 0.3356897735072066, "grad_norm": 0.7352274425709276, "learning_rate": 7.743941142729772e-06, "loss": 0.2566, "step": 9782 }, { "epoch": 0.3357240905971174, "grad_norm": 0.7393179855551159, "learning_rate": 7.743476549410397e-06, "loss": 0.3209, "step": 9783 }, { "epoch": 0.33575840768702814, "grad_norm": 0.7696243135152881, "learning_rate": 7.743011922198567e-06, "loss": 0.2947, "step": 9784 }, { "epoch": 0.3357927247769389, "grad_norm": 0.7588096940393445, "learning_rate": 7.742547261100021e-06, "loss": 0.2758, "step": 9785 }, { "epoch": 0.3358270418668497, "grad_norm": 0.731820329361013, "learning_rate": 7.742082566120504e-06, "loss": 0.2637, "step": 9786 }, { "epoch": 0.33586135895676045, "grad_norm": 0.7118523604487111, "learning_rate": 7.741617837265753e-06, "loss": 0.3015, "step": 9787 }, { "epoch": 0.33589567604667125, "grad_norm": 0.7762167331063057, "learning_rate": 7.74115307454151e-06, "loss": 0.3218, "step": 9788 }, { "epoch": 0.335929993136582, "grad_norm": 0.676896936575177, "learning_rate": 7.740688277953515e-06, "loss": 0.271, "step": 9789 }, { "epoch": 0.3359643102264928, "grad_norm": 0.8698082666848771, "learning_rate": 7.740223447507513e-06, "loss": 0.3243, "step": 9790 }, { "epoch": 0.33599862731640356, "grad_norm": 0.77548530938061, "learning_rate": 7.739758583209245e-06, "loss": 0.3406, "step": 9791 }, { "epoch": 0.33603294440631437, "grad_norm": 0.7311036182423336, "learning_rate": 7.739293685064454e-06, "loss": 0.2534, "step": 9792 }, { "epoch": 0.3360672614962251, "grad_norm": 0.7464703693383823, "learning_rate": 7.738828753078882e-06, "loss": 0.3424, "step": 9793 }, { "epoch": 0.3361015785861359, "grad_norm": 0.7684961727808062, "learning_rate": 7.738363787258277e-06, "loss": 0.3043, "step": 9794 }, { "epoch": 0.3361358956760467, "grad_norm": 0.759465331715738, "learning_rate": 7.737898787608375e-06, "loss": 0.2921, "step": 9795 }, { "epoch": 0.33617021276595743, "grad_norm": 0.7197537800154185, "learning_rate": 7.73743375413493e-06, "loss": 0.3058, "step": 9796 }, { "epoch": 0.33620452985586824, "grad_norm": 0.899788479926746, "learning_rate": 7.73696868684368e-06, "loss": 0.2703, "step": 9797 }, { "epoch": 0.336238846945779, "grad_norm": 0.7217031696540196, "learning_rate": 7.736503585740376e-06, "loss": 0.3069, "step": 9798 }, { "epoch": 0.3362731640356898, "grad_norm": 0.8690567225305614, "learning_rate": 7.736038450830758e-06, "loss": 0.3353, "step": 9799 }, { "epoch": 0.33630748112560055, "grad_norm": 0.8062822518975322, "learning_rate": 7.735573282120575e-06, "loss": 0.2862, "step": 9800 }, { "epoch": 0.3363417982155113, "grad_norm": 0.8161839181216721, "learning_rate": 7.735108079615574e-06, "loss": 0.2889, "step": 9801 }, { "epoch": 0.3363761153054221, "grad_norm": 0.8978938609948289, "learning_rate": 7.734642843321502e-06, "loss": 0.2833, "step": 9802 }, { "epoch": 0.33641043239533286, "grad_norm": 0.7612608518453667, "learning_rate": 7.734177573244104e-06, "loss": 0.2895, "step": 9803 }, { "epoch": 0.33644474948524367, "grad_norm": 0.7800464510422465, "learning_rate": 7.733712269389132e-06, "loss": 0.2912, "step": 9804 }, { "epoch": 0.3364790665751544, "grad_norm": 0.6985777945988114, "learning_rate": 7.733246931762332e-06, "loss": 0.31, "step": 9805 }, { "epoch": 0.3365133836650652, "grad_norm": 0.7389389953531199, "learning_rate": 7.732781560369452e-06, "loss": 0.3248, "step": 9806 }, { "epoch": 0.336547700754976, "grad_norm": 0.7841916286007062, "learning_rate": 7.732316155216242e-06, "loss": 0.2945, "step": 9807 }, { "epoch": 0.3365820178448867, "grad_norm": 0.6789945153360477, "learning_rate": 7.731850716308451e-06, "loss": 0.2948, "step": 9808 }, { "epoch": 0.33661633493479753, "grad_norm": 0.7926078353724267, "learning_rate": 7.73138524365183e-06, "loss": 0.2919, "step": 9809 }, { "epoch": 0.3366506520247083, "grad_norm": 0.8084522851168342, "learning_rate": 7.730919737252129e-06, "loss": 0.3118, "step": 9810 }, { "epoch": 0.3366849691146191, "grad_norm": 0.7647663180008148, "learning_rate": 7.730454197115098e-06, "loss": 0.3363, "step": 9811 }, { "epoch": 0.33671928620452984, "grad_norm": 0.7708883203828175, "learning_rate": 7.72998862324649e-06, "loss": 0.2873, "step": 9812 }, { "epoch": 0.33675360329444065, "grad_norm": 0.8393167311013746, "learning_rate": 7.729523015652053e-06, "loss": 0.3327, "step": 9813 }, { "epoch": 0.3367879203843514, "grad_norm": 0.7700503555606132, "learning_rate": 7.729057374337543e-06, "loss": 0.3341, "step": 9814 }, { "epoch": 0.3368222374742622, "grad_norm": 0.7251800930155082, "learning_rate": 7.728591699308712e-06, "loss": 0.2795, "step": 9815 }, { "epoch": 0.33685655456417296, "grad_norm": 0.8384413863823046, "learning_rate": 7.72812599057131e-06, "loss": 0.3405, "step": 9816 }, { "epoch": 0.3368908716540837, "grad_norm": 0.7907157139329725, "learning_rate": 7.727660248131093e-06, "loss": 0.3348, "step": 9817 }, { "epoch": 0.3369251887439945, "grad_norm": 0.7948799476015769, "learning_rate": 7.727194471993814e-06, "loss": 0.3082, "step": 9818 }, { "epoch": 0.33695950583390527, "grad_norm": 0.726897989614905, "learning_rate": 7.726728662165227e-06, "loss": 0.3328, "step": 9819 }, { "epoch": 0.3369938229238161, "grad_norm": 0.80551141055661, "learning_rate": 7.726262818651084e-06, "loss": 0.3418, "step": 9820 }, { "epoch": 0.33702814001372683, "grad_norm": 0.7904865513896316, "learning_rate": 7.725796941457145e-06, "loss": 0.2955, "step": 9821 }, { "epoch": 0.33706245710363764, "grad_norm": 0.7606942040539173, "learning_rate": 7.725331030589162e-06, "loss": 0.3328, "step": 9822 }, { "epoch": 0.3370967741935484, "grad_norm": 0.7217461731428314, "learning_rate": 7.724865086052891e-06, "loss": 0.2805, "step": 9823 }, { "epoch": 0.33713109128345914, "grad_norm": 0.7886071292378898, "learning_rate": 7.72439910785409e-06, "loss": 0.3353, "step": 9824 }, { "epoch": 0.33716540837336995, "grad_norm": 0.7749610566907779, "learning_rate": 7.723933095998516e-06, "loss": 0.3669, "step": 9825 }, { "epoch": 0.3371997254632807, "grad_norm": 0.700963620717398, "learning_rate": 7.72346705049192e-06, "loss": 0.2802, "step": 9826 }, { "epoch": 0.3372340425531915, "grad_norm": 0.8205985020145071, "learning_rate": 7.723000971340066e-06, "loss": 0.2648, "step": 9827 }, { "epoch": 0.33726835964310226, "grad_norm": 0.7652758588483175, "learning_rate": 7.72253485854871e-06, "loss": 0.2487, "step": 9828 }, { "epoch": 0.33730267673301306, "grad_norm": 0.7551362752707961, "learning_rate": 7.722068712123609e-06, "loss": 0.279, "step": 9829 }, { "epoch": 0.3373369938229238, "grad_norm": 0.8260750872561917, "learning_rate": 7.721602532070523e-06, "loss": 0.3336, "step": 9830 }, { "epoch": 0.33737131091283457, "grad_norm": 0.8304729120644527, "learning_rate": 7.721136318395212e-06, "loss": 0.3361, "step": 9831 }, { "epoch": 0.3374056280027454, "grad_norm": 0.8467627807633995, "learning_rate": 7.720670071103434e-06, "loss": 0.3581, "step": 9832 }, { "epoch": 0.3374399450926561, "grad_norm": 0.6972698303906404, "learning_rate": 7.720203790200947e-06, "loss": 0.2847, "step": 9833 }, { "epoch": 0.33747426218256693, "grad_norm": 0.759806906899383, "learning_rate": 7.719737475693516e-06, "loss": 0.3887, "step": 9834 }, { "epoch": 0.3375085792724777, "grad_norm": 0.7112944586483719, "learning_rate": 7.719271127586899e-06, "loss": 0.2689, "step": 9835 }, { "epoch": 0.3375428963623885, "grad_norm": 0.7921317419161344, "learning_rate": 7.718804745886856e-06, "loss": 0.2973, "step": 9836 }, { "epoch": 0.33757721345229924, "grad_norm": 0.8089094220815517, "learning_rate": 7.718338330599154e-06, "loss": 0.2872, "step": 9837 }, { "epoch": 0.33761153054221005, "grad_norm": 0.8275430737615107, "learning_rate": 7.717871881729548e-06, "loss": 0.3019, "step": 9838 }, { "epoch": 0.3376458476321208, "grad_norm": 0.7578758313245652, "learning_rate": 7.717405399283803e-06, "loss": 0.3227, "step": 9839 }, { "epoch": 0.33768016472203155, "grad_norm": 0.7472783084165506, "learning_rate": 7.716938883267685e-06, "loss": 0.3576, "step": 9840 }, { "epoch": 0.33771448181194236, "grad_norm": 0.8065987811082038, "learning_rate": 7.716472333686952e-06, "loss": 0.2982, "step": 9841 }, { "epoch": 0.3377487989018531, "grad_norm": 0.7472067549828317, "learning_rate": 7.716005750547372e-06, "loss": 0.2936, "step": 9842 }, { "epoch": 0.3377831159917639, "grad_norm": 0.9201430473494026, "learning_rate": 7.715539133854708e-06, "loss": 0.329, "step": 9843 }, { "epoch": 0.33781743308167467, "grad_norm": 0.7520262774193737, "learning_rate": 7.715072483614725e-06, "loss": 0.277, "step": 9844 }, { "epoch": 0.3378517501715855, "grad_norm": 0.8494932435931923, "learning_rate": 7.714605799833184e-06, "loss": 0.2746, "step": 9845 }, { "epoch": 0.3378860672614962, "grad_norm": 0.7187728488935026, "learning_rate": 7.714139082515855e-06, "loss": 0.2799, "step": 9846 }, { "epoch": 0.337920384351407, "grad_norm": 0.7808839141505295, "learning_rate": 7.713672331668502e-06, "loss": 0.3785, "step": 9847 }, { "epoch": 0.3379547014413178, "grad_norm": 0.8225667795797933, "learning_rate": 7.713205547296891e-06, "loss": 0.2835, "step": 9848 }, { "epoch": 0.33798901853122854, "grad_norm": 0.7114333608457521, "learning_rate": 7.712738729406788e-06, "loss": 0.2438, "step": 9849 }, { "epoch": 0.33802333562113934, "grad_norm": 0.6704040005365104, "learning_rate": 7.712271878003961e-06, "loss": 0.2979, "step": 9850 }, { "epoch": 0.3380576527110501, "grad_norm": 0.7729367145142498, "learning_rate": 7.71180499309418e-06, "loss": 0.3325, "step": 9851 }, { "epoch": 0.3380919698009609, "grad_norm": 0.7669254503086015, "learning_rate": 7.711338074683206e-06, "loss": 0.2713, "step": 9852 }, { "epoch": 0.33812628689087165, "grad_norm": 0.7653028481138441, "learning_rate": 7.710871122776812e-06, "loss": 0.3158, "step": 9853 }, { "epoch": 0.3381606039807824, "grad_norm": 0.8690888566850561, "learning_rate": 7.710404137380768e-06, "loss": 0.3667, "step": 9854 }, { "epoch": 0.3381949210706932, "grad_norm": 0.7957348673601179, "learning_rate": 7.70993711850084e-06, "loss": 0.3351, "step": 9855 }, { "epoch": 0.33822923816060396, "grad_norm": 0.8749039378103839, "learning_rate": 7.709470066142796e-06, "loss": 0.2891, "step": 9856 }, { "epoch": 0.33826355525051477, "grad_norm": 0.736245089034687, "learning_rate": 7.70900298031241e-06, "loss": 0.2905, "step": 9857 }, { "epoch": 0.3382978723404255, "grad_norm": 0.7636681149849202, "learning_rate": 7.708535861015452e-06, "loss": 0.2881, "step": 9858 }, { "epoch": 0.3383321894303363, "grad_norm": 0.8360442637849813, "learning_rate": 7.708068708257687e-06, "loss": 0.3364, "step": 9859 }, { "epoch": 0.3383665065202471, "grad_norm": 0.6930783882984767, "learning_rate": 7.707601522044896e-06, "loss": 0.3065, "step": 9860 }, { "epoch": 0.3384008236101579, "grad_norm": 0.8329377959685631, "learning_rate": 7.707134302382842e-06, "loss": 0.2984, "step": 9861 }, { "epoch": 0.33843514070006864, "grad_norm": 0.7123248009653409, "learning_rate": 7.706667049277299e-06, "loss": 0.3542, "step": 9862 }, { "epoch": 0.3384694577899794, "grad_norm": 0.7323493442559573, "learning_rate": 7.706199762734043e-06, "loss": 0.2868, "step": 9863 }, { "epoch": 0.3385037748798902, "grad_norm": 0.7604394214168865, "learning_rate": 7.705732442758842e-06, "loss": 0.3004, "step": 9864 }, { "epoch": 0.33853809196980095, "grad_norm": 0.9240879430852155, "learning_rate": 7.705265089357471e-06, "loss": 0.2905, "step": 9865 }, { "epoch": 0.33857240905971175, "grad_norm": 0.8526937547517667, "learning_rate": 7.704797702535706e-06, "loss": 0.2943, "step": 9866 }, { "epoch": 0.3386067261496225, "grad_norm": 0.7156880010641476, "learning_rate": 7.704330282299317e-06, "loss": 0.2749, "step": 9867 }, { "epoch": 0.3386410432395333, "grad_norm": 0.8146989507341902, "learning_rate": 7.70386282865408e-06, "loss": 0.2986, "step": 9868 }, { "epoch": 0.33867536032944406, "grad_norm": 0.6753706635269261, "learning_rate": 7.703395341605771e-06, "loss": 0.2336, "step": 9869 }, { "epoch": 0.3387096774193548, "grad_norm": 0.8122716773393739, "learning_rate": 7.702927821160164e-06, "loss": 0.3369, "step": 9870 }, { "epoch": 0.3387439945092656, "grad_norm": 0.770189546782957, "learning_rate": 7.702460267323036e-06, "loss": 0.3091, "step": 9871 }, { "epoch": 0.3387783115991764, "grad_norm": 0.7444855932406158, "learning_rate": 7.701992680100161e-06, "loss": 0.2728, "step": 9872 }, { "epoch": 0.3388126286890872, "grad_norm": 0.7629054090277979, "learning_rate": 7.70152505949732e-06, "loss": 0.2929, "step": 9873 }, { "epoch": 0.33884694577899793, "grad_norm": 0.7523373195231243, "learning_rate": 7.701057405520282e-06, "loss": 0.3235, "step": 9874 }, { "epoch": 0.33888126286890874, "grad_norm": 0.8359698233635258, "learning_rate": 7.700589718174832e-06, "loss": 0.2906, "step": 9875 }, { "epoch": 0.3389155799588195, "grad_norm": 0.7950419401081215, "learning_rate": 7.700121997466743e-06, "loss": 0.261, "step": 9876 }, { "epoch": 0.33894989704873024, "grad_norm": 0.7917563788690066, "learning_rate": 7.699654243401795e-06, "loss": 0.4041, "step": 9877 }, { "epoch": 0.33898421413864105, "grad_norm": 0.8033989852793932, "learning_rate": 7.699186455985765e-06, "loss": 0.3362, "step": 9878 }, { "epoch": 0.3390185312285518, "grad_norm": 0.7816300409651897, "learning_rate": 7.698718635224436e-06, "loss": 0.2931, "step": 9879 }, { "epoch": 0.3390528483184626, "grad_norm": 0.7506940463949956, "learning_rate": 7.698250781123583e-06, "loss": 0.2722, "step": 9880 }, { "epoch": 0.33908716540837336, "grad_norm": 0.7542638320923356, "learning_rate": 7.697782893688988e-06, "loss": 0.2707, "step": 9881 }, { "epoch": 0.33912148249828417, "grad_norm": 0.9138877430289385, "learning_rate": 7.697314972926431e-06, "loss": 0.3277, "step": 9882 }, { "epoch": 0.3391557995881949, "grad_norm": 0.8060844470727551, "learning_rate": 7.696847018841691e-06, "loss": 0.3017, "step": 9883 }, { "epoch": 0.3391901166781057, "grad_norm": 0.7242208432265774, "learning_rate": 7.69637903144055e-06, "loss": 0.3225, "step": 9884 }, { "epoch": 0.3392244337680165, "grad_norm": 0.7377448138156167, "learning_rate": 7.69591101072879e-06, "loss": 0.2725, "step": 9885 }, { "epoch": 0.3392587508579272, "grad_norm": 0.6802421917272568, "learning_rate": 7.695442956712195e-06, "loss": 0.2641, "step": 9886 }, { "epoch": 0.33929306794783803, "grad_norm": 0.6953589069989289, "learning_rate": 7.694974869396544e-06, "loss": 0.2554, "step": 9887 }, { "epoch": 0.3393273850377488, "grad_norm": 0.8360076745880088, "learning_rate": 7.694506748787619e-06, "loss": 0.3181, "step": 9888 }, { "epoch": 0.3393617021276596, "grad_norm": 0.7202246062364753, "learning_rate": 7.694038594891205e-06, "loss": 0.2888, "step": 9889 }, { "epoch": 0.33939601921757034, "grad_norm": 0.7457065936292991, "learning_rate": 7.693570407713085e-06, "loss": 0.386, "step": 9890 }, { "epoch": 0.33943033630748115, "grad_norm": 0.8314463018399314, "learning_rate": 7.693102187259044e-06, "loss": 0.3109, "step": 9891 }, { "epoch": 0.3394646533973919, "grad_norm": 0.7361254168592951, "learning_rate": 7.692633933534866e-06, "loss": 0.2565, "step": 9892 }, { "epoch": 0.33949897048730265, "grad_norm": 0.7731252124878742, "learning_rate": 7.692165646546332e-06, "loss": 0.3243, "step": 9893 }, { "epoch": 0.33953328757721346, "grad_norm": 0.8556003153848633, "learning_rate": 7.691697326299232e-06, "loss": 0.3641, "step": 9894 }, { "epoch": 0.3395676046671242, "grad_norm": 0.8458702592086818, "learning_rate": 7.69122897279935e-06, "loss": 0.2983, "step": 9895 }, { "epoch": 0.339601921757035, "grad_norm": 0.8039769562798351, "learning_rate": 7.690760586052472e-06, "loss": 0.3276, "step": 9896 }, { "epoch": 0.33963623884694577, "grad_norm": 0.7818951450086659, "learning_rate": 7.690292166064383e-06, "loss": 0.3037, "step": 9897 }, { "epoch": 0.3396705559368566, "grad_norm": 0.7789118439151571, "learning_rate": 7.689823712840873e-06, "loss": 0.3004, "step": 9898 }, { "epoch": 0.33970487302676733, "grad_norm": 0.658504169184195, "learning_rate": 7.689355226387724e-06, "loss": 0.3103, "step": 9899 }, { "epoch": 0.3397391901166781, "grad_norm": 0.8230579993388368, "learning_rate": 7.68888670671073e-06, "loss": 0.339, "step": 9900 }, { "epoch": 0.3397735072065889, "grad_norm": 0.8114595568200457, "learning_rate": 7.688418153815672e-06, "loss": 0.2894, "step": 9901 }, { "epoch": 0.33980782429649964, "grad_norm": 0.7623286244631602, "learning_rate": 7.687949567708344e-06, "loss": 0.2725, "step": 9902 }, { "epoch": 0.33984214138641045, "grad_norm": 0.7875161108118524, "learning_rate": 7.687480948394533e-06, "loss": 0.3037, "step": 9903 }, { "epoch": 0.3398764584763212, "grad_norm": 0.7600983284199461, "learning_rate": 7.687012295880027e-06, "loss": 0.293, "step": 9904 }, { "epoch": 0.339910775566232, "grad_norm": 0.7955951745044344, "learning_rate": 7.68654361017062e-06, "loss": 0.3678, "step": 9905 }, { "epoch": 0.33994509265614276, "grad_norm": 0.8385775103953285, "learning_rate": 7.686074891272097e-06, "loss": 0.2994, "step": 9906 }, { "epoch": 0.3399794097460535, "grad_norm": 0.7077085303247119, "learning_rate": 7.685606139190249e-06, "loss": 0.3093, "step": 9907 }, { "epoch": 0.3400137268359643, "grad_norm": 1.0662603031201336, "learning_rate": 7.685137353930869e-06, "loss": 0.3698, "step": 9908 }, { "epoch": 0.34004804392587507, "grad_norm": 0.7319852978855547, "learning_rate": 7.684668535499749e-06, "loss": 0.3401, "step": 9909 }, { "epoch": 0.3400823610157859, "grad_norm": 0.7324506241025905, "learning_rate": 7.684199683902678e-06, "loss": 0.2476, "step": 9910 }, { "epoch": 0.3401166781056966, "grad_norm": 0.7910459980307311, "learning_rate": 7.68373079914545e-06, "loss": 0.3417, "step": 9911 }, { "epoch": 0.34015099519560743, "grad_norm": 0.8121980682178757, "learning_rate": 7.683261881233857e-06, "loss": 0.2574, "step": 9912 }, { "epoch": 0.3401853122855182, "grad_norm": 0.8208272108783443, "learning_rate": 7.68279293017369e-06, "loss": 0.3278, "step": 9913 }, { "epoch": 0.340219629375429, "grad_norm": 0.712753465471402, "learning_rate": 7.682323945970746e-06, "loss": 0.2682, "step": 9914 }, { "epoch": 0.34025394646533974, "grad_norm": 0.8986031403385277, "learning_rate": 7.681854928630817e-06, "loss": 0.3164, "step": 9915 }, { "epoch": 0.3402882635552505, "grad_norm": 0.8554119625331484, "learning_rate": 7.681385878159697e-06, "loss": 0.2564, "step": 9916 }, { "epoch": 0.3403225806451613, "grad_norm": 0.8132525060718698, "learning_rate": 7.68091679456318e-06, "loss": 0.3325, "step": 9917 }, { "epoch": 0.34035689773507205, "grad_norm": 0.756332079036236, "learning_rate": 7.680447677847063e-06, "loss": 0.3171, "step": 9918 }, { "epoch": 0.34039121482498286, "grad_norm": 0.8157426013381187, "learning_rate": 7.679978528017138e-06, "loss": 0.3029, "step": 9919 }, { "epoch": 0.3404255319148936, "grad_norm": 0.6146870166123283, "learning_rate": 7.679509345079203e-06, "loss": 0.2925, "step": 9920 }, { "epoch": 0.3404598490048044, "grad_norm": 0.7437474522805805, "learning_rate": 7.679040129039056e-06, "loss": 0.2851, "step": 9921 }, { "epoch": 0.34049416609471517, "grad_norm": 0.7048832802372035, "learning_rate": 7.67857087990249e-06, "loss": 0.2726, "step": 9922 }, { "epoch": 0.3405284831846259, "grad_norm": 0.7468444257898412, "learning_rate": 7.678101597675302e-06, "loss": 0.2941, "step": 9923 }, { "epoch": 0.3405628002745367, "grad_norm": 0.7093937060491686, "learning_rate": 7.677632282363295e-06, "loss": 0.3323, "step": 9924 }, { "epoch": 0.3405971173644475, "grad_norm": 0.7181951176572697, "learning_rate": 7.677162933972263e-06, "loss": 0.2946, "step": 9925 }, { "epoch": 0.3406314344543583, "grad_norm": 0.8106770379062235, "learning_rate": 7.676693552508e-06, "loss": 0.3036, "step": 9926 }, { "epoch": 0.34066575154426904, "grad_norm": 0.6814602827886528, "learning_rate": 7.676224137976314e-06, "loss": 0.2728, "step": 9927 }, { "epoch": 0.34070006863417984, "grad_norm": 0.8073675615411433, "learning_rate": 7.675754690382995e-06, "loss": 0.3438, "step": 9928 }, { "epoch": 0.3407343857240906, "grad_norm": 0.8677381020930924, "learning_rate": 7.675285209733847e-06, "loss": 0.2953, "step": 9929 }, { "epoch": 0.34076870281400135, "grad_norm": 0.7878179221785542, "learning_rate": 7.674815696034672e-06, "loss": 0.3342, "step": 9930 }, { "epoch": 0.34080301990391215, "grad_norm": 0.7808176497420087, "learning_rate": 7.674346149291264e-06, "loss": 0.2927, "step": 9931 }, { "epoch": 0.3408373369938229, "grad_norm": 0.7785620340213479, "learning_rate": 7.673876569509428e-06, "loss": 0.3245, "step": 9932 }, { "epoch": 0.3408716540837337, "grad_norm": 0.7901750045598696, "learning_rate": 7.673406956694967e-06, "loss": 0.2982, "step": 9933 }, { "epoch": 0.34090597117364446, "grad_norm": 0.653542478842603, "learning_rate": 7.672937310853679e-06, "loss": 0.3101, "step": 9934 }, { "epoch": 0.34094028826355527, "grad_norm": 0.7277318581996948, "learning_rate": 7.672467631991365e-06, "loss": 0.3085, "step": 9935 }, { "epoch": 0.340974605353466, "grad_norm": 0.7450551015478889, "learning_rate": 7.67199792011383e-06, "loss": 0.2823, "step": 9936 }, { "epoch": 0.3410089224433768, "grad_norm": 0.718852640891822, "learning_rate": 7.671528175226877e-06, "loss": 0.2985, "step": 9937 }, { "epoch": 0.3410432395332876, "grad_norm": 0.7841603041062492, "learning_rate": 7.671058397336307e-06, "loss": 0.2888, "step": 9938 }, { "epoch": 0.34107755662319833, "grad_norm": 0.7828224409128829, "learning_rate": 7.670588586447924e-06, "loss": 0.272, "step": 9939 }, { "epoch": 0.34111187371310914, "grad_norm": 0.7474519770402844, "learning_rate": 7.670118742567536e-06, "loss": 0.297, "step": 9940 }, { "epoch": 0.3411461908030199, "grad_norm": 0.7976801454008137, "learning_rate": 7.669648865700942e-06, "loss": 0.2768, "step": 9941 }, { "epoch": 0.3411805078929307, "grad_norm": 0.6906704270301183, "learning_rate": 7.669178955853947e-06, "loss": 0.28, "step": 9942 }, { "epoch": 0.34121482498284145, "grad_norm": 0.7726580129353435, "learning_rate": 7.66870901303236e-06, "loss": 0.3186, "step": 9943 }, { "epoch": 0.34124914207275225, "grad_norm": 0.7840155024679293, "learning_rate": 7.668239037241984e-06, "loss": 0.2685, "step": 9944 }, { "epoch": 0.341283459162663, "grad_norm": 0.7777279501153368, "learning_rate": 7.667769028488625e-06, "loss": 0.3038, "step": 9945 }, { "epoch": 0.34131777625257376, "grad_norm": 0.8525895752156104, "learning_rate": 7.667298986778091e-06, "loss": 0.2501, "step": 9946 }, { "epoch": 0.34135209334248456, "grad_norm": 0.7172292851148447, "learning_rate": 7.666828912116188e-06, "loss": 0.273, "step": 9947 }, { "epoch": 0.3413864104323953, "grad_norm": 0.8627795396999813, "learning_rate": 7.666358804508722e-06, "loss": 0.2879, "step": 9948 }, { "epoch": 0.3414207275223061, "grad_norm": 0.829943094760607, "learning_rate": 7.665888663961503e-06, "loss": 0.39, "step": 9949 }, { "epoch": 0.3414550446122169, "grad_norm": 0.6806717701808622, "learning_rate": 7.665418490480336e-06, "loss": 0.3067, "step": 9950 }, { "epoch": 0.3414893617021277, "grad_norm": 0.8130830401321357, "learning_rate": 7.664948284071032e-06, "loss": 0.3598, "step": 9951 }, { "epoch": 0.34152367879203843, "grad_norm": 0.8242473108070747, "learning_rate": 7.664478044739398e-06, "loss": 0.3025, "step": 9952 }, { "epoch": 0.3415579958819492, "grad_norm": 0.8274502544916617, "learning_rate": 7.664007772491245e-06, "loss": 0.3553, "step": 9953 }, { "epoch": 0.34159231297186, "grad_norm": 0.761630367319145, "learning_rate": 7.663537467332383e-06, "loss": 0.314, "step": 9954 }, { "epoch": 0.34162663006177074, "grad_norm": 0.7119135165628122, "learning_rate": 7.663067129268618e-06, "loss": 0.3253, "step": 9955 }, { "epoch": 0.34166094715168155, "grad_norm": 0.8729862891691524, "learning_rate": 7.662596758305767e-06, "loss": 0.357, "step": 9956 }, { "epoch": 0.3416952642415923, "grad_norm": 0.7838496690391614, "learning_rate": 7.662126354449634e-06, "loss": 0.2775, "step": 9957 }, { "epoch": 0.3417295813315031, "grad_norm": 0.7982246581227637, "learning_rate": 7.661655917706037e-06, "loss": 0.3184, "step": 9958 }, { "epoch": 0.34176389842141386, "grad_norm": 0.8061702294590833, "learning_rate": 7.661185448080781e-06, "loss": 0.2805, "step": 9959 }, { "epoch": 0.34179821551132467, "grad_norm": 0.7367344180917262, "learning_rate": 7.660714945579684e-06, "loss": 0.3272, "step": 9960 }, { "epoch": 0.3418325326012354, "grad_norm": 0.6781547555320361, "learning_rate": 7.660244410208555e-06, "loss": 0.2462, "step": 9961 }, { "epoch": 0.34186684969114617, "grad_norm": 0.781173023079023, "learning_rate": 7.659773841973208e-06, "loss": 0.3472, "step": 9962 }, { "epoch": 0.341901166781057, "grad_norm": 0.8033047825907783, "learning_rate": 7.659303240879456e-06, "loss": 0.3171, "step": 9963 }, { "epoch": 0.3419354838709677, "grad_norm": 0.8093868857424786, "learning_rate": 7.658832606933113e-06, "loss": 0.3158, "step": 9964 }, { "epoch": 0.34196980096087853, "grad_norm": 0.6560784418550669, "learning_rate": 7.658361940139994e-06, "loss": 0.2764, "step": 9965 }, { "epoch": 0.3420041180507893, "grad_norm": 0.8302614799116109, "learning_rate": 7.657891240505912e-06, "loss": 0.3665, "step": 9966 }, { "epoch": 0.3420384351407001, "grad_norm": 0.6946879734949605, "learning_rate": 7.65742050803668e-06, "loss": 0.3017, "step": 9967 }, { "epoch": 0.34207275223061084, "grad_norm": 0.7532593528376503, "learning_rate": 7.656949742738118e-06, "loss": 0.3009, "step": 9968 }, { "epoch": 0.3421070693205216, "grad_norm": 0.8989959633217783, "learning_rate": 7.65647894461604e-06, "loss": 0.3031, "step": 9969 }, { "epoch": 0.3421413864104324, "grad_norm": 0.7358267393177623, "learning_rate": 7.656008113676261e-06, "loss": 0.2982, "step": 9970 }, { "epoch": 0.34217570350034315, "grad_norm": 0.6903932997796397, "learning_rate": 7.655537249924599e-06, "loss": 0.3123, "step": 9971 }, { "epoch": 0.34221002059025396, "grad_norm": 0.8915380960761697, "learning_rate": 7.655066353366871e-06, "loss": 0.3361, "step": 9972 }, { "epoch": 0.3422443376801647, "grad_norm": 0.7506037479273117, "learning_rate": 7.654595424008892e-06, "loss": 0.2747, "step": 9973 }, { "epoch": 0.3422786547700755, "grad_norm": 0.7836176800428334, "learning_rate": 7.654124461856482e-06, "loss": 0.2919, "step": 9974 }, { "epoch": 0.34231297185998627, "grad_norm": 0.792692360726369, "learning_rate": 7.65365346691546e-06, "loss": 0.3623, "step": 9975 }, { "epoch": 0.342347288949897, "grad_norm": 0.8266564990192672, "learning_rate": 7.653182439191642e-06, "loss": 0.278, "step": 9976 }, { "epoch": 0.34238160603980783, "grad_norm": 0.7688342307705235, "learning_rate": 7.652711378690849e-06, "loss": 0.2779, "step": 9977 }, { "epoch": 0.3424159231297186, "grad_norm": 0.7316915395980736, "learning_rate": 7.652240285418899e-06, "loss": 0.3498, "step": 9978 }, { "epoch": 0.3424502402196294, "grad_norm": 0.7596368727680544, "learning_rate": 7.651769159381613e-06, "loss": 0.3294, "step": 9979 }, { "epoch": 0.34248455730954014, "grad_norm": 0.726073600800601, "learning_rate": 7.65129800058481e-06, "loss": 0.2858, "step": 9980 }, { "epoch": 0.34251887439945095, "grad_norm": 0.7874637323406829, "learning_rate": 7.650826809034311e-06, "loss": 0.3233, "step": 9981 }, { "epoch": 0.3425531914893617, "grad_norm": 0.7253883381862847, "learning_rate": 7.650355584735938e-06, "loss": 0.2923, "step": 9982 }, { "epoch": 0.3425875085792725, "grad_norm": 0.785478236123595, "learning_rate": 7.649884327695511e-06, "loss": 0.3146, "step": 9983 }, { "epoch": 0.34262182566918326, "grad_norm": 0.6992508546400951, "learning_rate": 7.649413037918853e-06, "loss": 0.2722, "step": 9984 }, { "epoch": 0.342656142759094, "grad_norm": 0.795963385851242, "learning_rate": 7.648941715411789e-06, "loss": 0.2831, "step": 9985 }, { "epoch": 0.3426904598490048, "grad_norm": 0.7810086355100673, "learning_rate": 7.648470360180135e-06, "loss": 0.3102, "step": 9986 }, { "epoch": 0.34272477693891557, "grad_norm": 1.2869651963989002, "learning_rate": 7.647998972229717e-06, "loss": 0.3456, "step": 9987 }, { "epoch": 0.34275909402882637, "grad_norm": 0.8476230363944862, "learning_rate": 7.647527551566362e-06, "loss": 0.3202, "step": 9988 }, { "epoch": 0.3427934111187371, "grad_norm": 0.8196729732259438, "learning_rate": 7.647056098195889e-06, "loss": 0.3125, "step": 9989 }, { "epoch": 0.34282772820864793, "grad_norm": 0.7898592879162752, "learning_rate": 7.646584612124121e-06, "loss": 0.3119, "step": 9990 }, { "epoch": 0.3428620452985587, "grad_norm": 0.8022505396144762, "learning_rate": 7.64611309335689e-06, "loss": 0.3193, "step": 9991 }, { "epoch": 0.34289636238846943, "grad_norm": 0.7419158131426672, "learning_rate": 7.645641541900016e-06, "loss": 0.3006, "step": 9992 }, { "epoch": 0.34293067947838024, "grad_norm": 0.7646584981387504, "learning_rate": 7.645169957759325e-06, "loss": 0.3364, "step": 9993 }, { "epoch": 0.342964996568291, "grad_norm": 0.7763186760448724, "learning_rate": 7.644698340940644e-06, "loss": 0.2788, "step": 9994 }, { "epoch": 0.3429993136582018, "grad_norm": 0.7336479488207638, "learning_rate": 7.644226691449796e-06, "loss": 0.2923, "step": 9995 }, { "epoch": 0.34303363074811255, "grad_norm": 0.7699617444081391, "learning_rate": 7.643755009292612e-06, "loss": 0.3039, "step": 9996 }, { "epoch": 0.34306794783802336, "grad_norm": 0.7156209501794383, "learning_rate": 7.643283294474918e-06, "loss": 0.2818, "step": 9997 }, { "epoch": 0.3431022649279341, "grad_norm": 0.8279284583068491, "learning_rate": 7.642811547002538e-06, "loss": 0.3246, "step": 9998 }, { "epoch": 0.34313658201784486, "grad_norm": 0.8315472801438963, "learning_rate": 7.642339766881304e-06, "loss": 0.3397, "step": 9999 }, { "epoch": 0.34317089910775567, "grad_norm": 0.7214930890760606, "learning_rate": 7.641867954117043e-06, "loss": 0.2744, "step": 10000 }, { "epoch": 0.3432052161976664, "grad_norm": 0.7761250269865685, "learning_rate": 7.641396108715585e-06, "loss": 0.2956, "step": 10001 }, { "epoch": 0.3432395332875772, "grad_norm": 0.8456039654804123, "learning_rate": 7.640924230682754e-06, "loss": 0.3555, "step": 10002 }, { "epoch": 0.343273850377488, "grad_norm": 0.7683534248810816, "learning_rate": 7.640452320024385e-06, "loss": 0.2848, "step": 10003 }, { "epoch": 0.3433081674673988, "grad_norm": 0.8586708716637874, "learning_rate": 7.639980376746306e-06, "loss": 0.3298, "step": 10004 }, { "epoch": 0.34334248455730954, "grad_norm": 0.6987963867292484, "learning_rate": 7.639508400854348e-06, "loss": 0.2776, "step": 10005 }, { "epoch": 0.34337680164722034, "grad_norm": 0.7163858253904603, "learning_rate": 7.63903639235434e-06, "loss": 0.3185, "step": 10006 }, { "epoch": 0.3434111187371311, "grad_norm": 0.8690406246919877, "learning_rate": 7.638564351252115e-06, "loss": 0.3696, "step": 10007 }, { "epoch": 0.34344543582704185, "grad_norm": 0.8621401108037464, "learning_rate": 7.638092277553503e-06, "loss": 0.2943, "step": 10008 }, { "epoch": 0.34347975291695265, "grad_norm": 0.791828436399382, "learning_rate": 7.637620171264339e-06, "loss": 0.2898, "step": 10009 }, { "epoch": 0.3435140700068634, "grad_norm": 0.7549849458821674, "learning_rate": 7.637148032390452e-06, "loss": 0.3621, "step": 10010 }, { "epoch": 0.3435483870967742, "grad_norm": 0.7822470232649563, "learning_rate": 7.636675860937675e-06, "loss": 0.2891, "step": 10011 }, { "epoch": 0.34358270418668496, "grad_norm": 0.7394268015515291, "learning_rate": 7.63620365691184e-06, "loss": 0.2997, "step": 10012 }, { "epoch": 0.34361702127659577, "grad_norm": 0.8428220160606165, "learning_rate": 7.635731420318784e-06, "loss": 0.3021, "step": 10013 }, { "epoch": 0.3436513383665065, "grad_norm": 0.9375587575955397, "learning_rate": 7.63525915116434e-06, "loss": 0.2967, "step": 10014 }, { "epoch": 0.34368565545641727, "grad_norm": 0.8174920985192274, "learning_rate": 7.63478684945434e-06, "loss": 0.321, "step": 10015 }, { "epoch": 0.3437199725463281, "grad_norm": 0.7761620876784228, "learning_rate": 7.634314515194621e-06, "loss": 0.2896, "step": 10016 }, { "epoch": 0.34375428963623883, "grad_norm": 0.8198852323138226, "learning_rate": 7.633842148391017e-06, "loss": 0.2672, "step": 10017 }, { "epoch": 0.34378860672614964, "grad_norm": 0.7985990863881968, "learning_rate": 7.633369749049366e-06, "loss": 0.3128, "step": 10018 }, { "epoch": 0.3438229238160604, "grad_norm": 0.8733172597150607, "learning_rate": 7.6328973171755e-06, "loss": 0.3248, "step": 10019 }, { "epoch": 0.3438572409059712, "grad_norm": 0.8353886412159417, "learning_rate": 7.632424852775257e-06, "loss": 0.3263, "step": 10020 }, { "epoch": 0.34389155799588195, "grad_norm": 0.806243812841305, "learning_rate": 7.631952355854475e-06, "loss": 0.2673, "step": 10021 }, { "epoch": 0.3439258750857927, "grad_norm": 0.7530088595459397, "learning_rate": 7.63147982641899e-06, "loss": 0.3269, "step": 10022 }, { "epoch": 0.3439601921757035, "grad_norm": 0.7887423081896459, "learning_rate": 7.631007264474641e-06, "loss": 0.352, "step": 10023 }, { "epoch": 0.34399450926561426, "grad_norm": 0.697217669259269, "learning_rate": 7.630534670027263e-06, "loss": 0.299, "step": 10024 }, { "epoch": 0.34402882635552506, "grad_norm": 0.7334705804278995, "learning_rate": 7.630062043082697e-06, "loss": 0.3352, "step": 10025 }, { "epoch": 0.3440631434454358, "grad_norm": 0.8054181162211168, "learning_rate": 7.62958938364678e-06, "loss": 0.3451, "step": 10026 }, { "epoch": 0.3440974605353466, "grad_norm": 0.6279810359763994, "learning_rate": 7.629116691725353e-06, "loss": 0.2599, "step": 10027 }, { "epoch": 0.3441317776252574, "grad_norm": 0.7254451746556945, "learning_rate": 7.628643967324254e-06, "loss": 0.2918, "step": 10028 }, { "epoch": 0.3441660947151682, "grad_norm": 0.7658399045285036, "learning_rate": 7.628171210449322e-06, "loss": 0.2869, "step": 10029 }, { "epoch": 0.34420041180507893, "grad_norm": 0.7696424804875408, "learning_rate": 7.627698421106402e-06, "loss": 0.3026, "step": 10030 }, { "epoch": 0.3442347288949897, "grad_norm": 0.7692094176731387, "learning_rate": 7.627225599301329e-06, "loss": 0.2588, "step": 10031 }, { "epoch": 0.3442690459849005, "grad_norm": 0.7554454292904871, "learning_rate": 7.626752745039948e-06, "loss": 0.3362, "step": 10032 }, { "epoch": 0.34430336307481124, "grad_norm": 0.7906835155727275, "learning_rate": 7.6262798583281005e-06, "loss": 0.2935, "step": 10033 }, { "epoch": 0.34433768016472205, "grad_norm": 0.7523222856422223, "learning_rate": 7.625806939171624e-06, "loss": 0.2924, "step": 10034 }, { "epoch": 0.3443719972546328, "grad_norm": 0.858223298369344, "learning_rate": 7.625333987576366e-06, "loss": 0.2878, "step": 10035 }, { "epoch": 0.3444063143445436, "grad_norm": 0.8607952134337887, "learning_rate": 7.624861003548167e-06, "loss": 0.3658, "step": 10036 }, { "epoch": 0.34444063143445436, "grad_norm": 0.756820765396187, "learning_rate": 7.624387987092872e-06, "loss": 0.324, "step": 10037 }, { "epoch": 0.3444749485243651, "grad_norm": 0.9286711160886953, "learning_rate": 7.623914938216321e-06, "loss": 0.293, "step": 10038 }, { "epoch": 0.3445092656142759, "grad_norm": 0.8669898026248714, "learning_rate": 7.623441856924363e-06, "loss": 0.2319, "step": 10039 }, { "epoch": 0.34454358270418667, "grad_norm": 0.7417808003108866, "learning_rate": 7.622968743222837e-06, "loss": 0.3106, "step": 10040 }, { "epoch": 0.3445778997940975, "grad_norm": 0.7013365538313963, "learning_rate": 7.622495597117592e-06, "loss": 0.2395, "step": 10041 }, { "epoch": 0.3446122168840082, "grad_norm": 0.7364280173461357, "learning_rate": 7.62202241861447e-06, "loss": 0.3152, "step": 10042 }, { "epoch": 0.34464653397391903, "grad_norm": 0.8028509874049987, "learning_rate": 7.621549207719317e-06, "loss": 0.3316, "step": 10043 }, { "epoch": 0.3446808510638298, "grad_norm": 0.8036174321714958, "learning_rate": 7.621075964437983e-06, "loss": 0.3072, "step": 10044 }, { "epoch": 0.34471516815374054, "grad_norm": 0.6828793374509697, "learning_rate": 7.620602688776308e-06, "loss": 0.277, "step": 10045 }, { "epoch": 0.34474948524365134, "grad_norm": 0.8094711749285187, "learning_rate": 7.6201293807401445e-06, "loss": 0.293, "step": 10046 }, { "epoch": 0.3447838023335621, "grad_norm": 0.8071967578133424, "learning_rate": 7.619656040335337e-06, "loss": 0.3021, "step": 10047 }, { "epoch": 0.3448181194234729, "grad_norm": 0.7705512238396603, "learning_rate": 7.619182667567733e-06, "loss": 0.2831, "step": 10048 }, { "epoch": 0.34485243651338365, "grad_norm": 0.8601687008606996, "learning_rate": 7.61870926244318e-06, "loss": 0.3307, "step": 10049 }, { "epoch": 0.34488675360329446, "grad_norm": 0.8051923896296922, "learning_rate": 7.618235824967528e-06, "loss": 0.2859, "step": 10050 }, { "epoch": 0.3449210706932052, "grad_norm": 0.7463875928762648, "learning_rate": 7.617762355146625e-06, "loss": 0.2719, "step": 10051 }, { "epoch": 0.344955387783116, "grad_norm": 0.8011218426872153, "learning_rate": 7.617288852986321e-06, "loss": 0.266, "step": 10052 }, { "epoch": 0.34498970487302677, "grad_norm": 0.6828708756745755, "learning_rate": 7.616815318492463e-06, "loss": 0.2933, "step": 10053 }, { "epoch": 0.3450240219629375, "grad_norm": 0.7262553509381825, "learning_rate": 7.616341751670904e-06, "loss": 0.2505, "step": 10054 }, { "epoch": 0.34505833905284833, "grad_norm": 0.6815881211857999, "learning_rate": 7.615868152527492e-06, "loss": 0.2654, "step": 10055 }, { "epoch": 0.3450926561427591, "grad_norm": 0.8856084035008464, "learning_rate": 7.615394521068079e-06, "loss": 0.3444, "step": 10056 }, { "epoch": 0.3451269732326699, "grad_norm": 0.825483596735322, "learning_rate": 7.614920857298516e-06, "loss": 0.3217, "step": 10057 }, { "epoch": 0.34516129032258064, "grad_norm": 0.7806785005249897, "learning_rate": 7.614447161224655e-06, "loss": 0.2743, "step": 10058 }, { "epoch": 0.34519560741249145, "grad_norm": 0.7542756903658198, "learning_rate": 7.613973432852348e-06, "loss": 0.2674, "step": 10059 }, { "epoch": 0.3452299245024022, "grad_norm": 0.6272175974362453, "learning_rate": 7.6134996721874455e-06, "loss": 0.2788, "step": 10060 }, { "epoch": 0.34526424159231295, "grad_norm": 0.7662696352558744, "learning_rate": 7.6130258792358014e-06, "loss": 0.3088, "step": 10061 }, { "epoch": 0.34529855868222376, "grad_norm": 0.7203509252270884, "learning_rate": 7.61255205400327e-06, "loss": 0.3078, "step": 10062 }, { "epoch": 0.3453328757721345, "grad_norm": 0.7345149364568831, "learning_rate": 7.6120781964957045e-06, "loss": 0.2866, "step": 10063 }, { "epoch": 0.3453671928620453, "grad_norm": 0.7702275365751593, "learning_rate": 7.611604306718956e-06, "loss": 0.3522, "step": 10064 }, { "epoch": 0.34540150995195606, "grad_norm": 0.8366300245356982, "learning_rate": 7.611130384678883e-06, "loss": 0.2899, "step": 10065 }, { "epoch": 0.34543582704186687, "grad_norm": 0.7251523720514504, "learning_rate": 7.610656430381338e-06, "loss": 0.3085, "step": 10066 }, { "epoch": 0.3454701441317776, "grad_norm": 0.7386757535129593, "learning_rate": 7.6101824438321755e-06, "loss": 0.2939, "step": 10067 }, { "epoch": 0.3455044612216884, "grad_norm": 0.775923250794074, "learning_rate": 7.609708425037255e-06, "loss": 0.3099, "step": 10068 }, { "epoch": 0.3455387783115992, "grad_norm": 0.7551112963375077, "learning_rate": 7.609234374002426e-06, "loss": 0.37, "step": 10069 }, { "epoch": 0.34557309540150993, "grad_norm": 0.7838989885774899, "learning_rate": 7.60876029073355e-06, "loss": 0.3607, "step": 10070 }, { "epoch": 0.34560741249142074, "grad_norm": 0.6958542193925097, "learning_rate": 7.608286175236482e-06, "loss": 0.3462, "step": 10071 }, { "epoch": 0.3456417295813315, "grad_norm": 0.6788178042577724, "learning_rate": 7.607812027517079e-06, "loss": 0.3108, "step": 10072 }, { "epoch": 0.3456760466712423, "grad_norm": 0.8364355264292691, "learning_rate": 7.6073378475812e-06, "loss": 0.3456, "step": 10073 }, { "epoch": 0.34571036376115305, "grad_norm": 0.8685016727944141, "learning_rate": 7.6068636354347e-06, "loss": 0.2921, "step": 10074 }, { "epoch": 0.34574468085106386, "grad_norm": 0.7537506698276116, "learning_rate": 7.606389391083441e-06, "loss": 0.3259, "step": 10075 }, { "epoch": 0.3457789979409746, "grad_norm": 0.6994917209182658, "learning_rate": 7.605915114533277e-06, "loss": 0.3123, "step": 10076 }, { "epoch": 0.34581331503088536, "grad_norm": 0.9613491411075463, "learning_rate": 7.605440805790073e-06, "loss": 0.3239, "step": 10077 }, { "epoch": 0.34584763212079617, "grad_norm": 0.9016425691604619, "learning_rate": 7.604966464859685e-06, "loss": 0.3117, "step": 10078 }, { "epoch": 0.3458819492107069, "grad_norm": 0.8190539924429044, "learning_rate": 7.604492091747972e-06, "loss": 0.3489, "step": 10079 }, { "epoch": 0.3459162663006177, "grad_norm": 0.7274121843176914, "learning_rate": 7.6040176864607965e-06, "loss": 0.2729, "step": 10080 }, { "epoch": 0.3459505833905285, "grad_norm": 1.0199605396872635, "learning_rate": 7.603543249004018e-06, "loss": 0.2924, "step": 10081 }, { "epoch": 0.3459849004804393, "grad_norm": 0.7398031332518159, "learning_rate": 7.603068779383499e-06, "loss": 0.2779, "step": 10082 }, { "epoch": 0.34601921757035004, "grad_norm": 0.8142026367313342, "learning_rate": 7.6025942776051e-06, "loss": 0.3217, "step": 10083 }, { "epoch": 0.3460535346602608, "grad_norm": 0.7893011940093079, "learning_rate": 7.602119743674683e-06, "loss": 0.2926, "step": 10084 }, { "epoch": 0.3460878517501716, "grad_norm": 0.7513433653587953, "learning_rate": 7.6016451775981095e-06, "loss": 0.3031, "step": 10085 }, { "epoch": 0.34612216884008234, "grad_norm": 0.8983518901835589, "learning_rate": 7.601170579381243e-06, "loss": 0.3276, "step": 10086 }, { "epoch": 0.34615648592999315, "grad_norm": 0.7729496817786232, "learning_rate": 7.600695949029948e-06, "loss": 0.3122, "step": 10087 }, { "epoch": 0.3461908030199039, "grad_norm": 0.72502045083209, "learning_rate": 7.600221286550084e-06, "loss": 0.2912, "step": 10088 }, { "epoch": 0.3462251201098147, "grad_norm": 0.7035969899862714, "learning_rate": 7.599746591947521e-06, "loss": 0.2615, "step": 10089 }, { "epoch": 0.34625943719972546, "grad_norm": 0.7196053447445713, "learning_rate": 7.599271865228118e-06, "loss": 0.3038, "step": 10090 }, { "epoch": 0.3462937542896362, "grad_norm": 0.7819595659643745, "learning_rate": 7.5987971063977416e-06, "loss": 0.2554, "step": 10091 }, { "epoch": 0.346328071379547, "grad_norm": 0.7121073205114573, "learning_rate": 7.5983223154622575e-06, "loss": 0.2964, "step": 10092 }, { "epoch": 0.34636238846945777, "grad_norm": 0.7562762763640863, "learning_rate": 7.59784749242753e-06, "loss": 0.2888, "step": 10093 }, { "epoch": 0.3463967055593686, "grad_norm": 0.7372978480058446, "learning_rate": 7.597372637299425e-06, "loss": 0.3039, "step": 10094 }, { "epoch": 0.34643102264927933, "grad_norm": 0.8301592661675479, "learning_rate": 7.59689775008381e-06, "loss": 0.2852, "step": 10095 }, { "epoch": 0.34646533973919014, "grad_norm": 0.814772640477307, "learning_rate": 7.596422830786552e-06, "loss": 0.2942, "step": 10096 }, { "epoch": 0.3464996568291009, "grad_norm": 0.7436064880224498, "learning_rate": 7.5959478794135165e-06, "loss": 0.307, "step": 10097 }, { "epoch": 0.3465339739190117, "grad_norm": 0.8057710890710917, "learning_rate": 7.59547289597057e-06, "loss": 0.374, "step": 10098 }, { "epoch": 0.34656829100892245, "grad_norm": 0.7860165592962074, "learning_rate": 7.594997880463584e-06, "loss": 0.2808, "step": 10099 }, { "epoch": 0.3466026080988332, "grad_norm": 0.8476496998281841, "learning_rate": 7.594522832898423e-06, "loss": 0.3169, "step": 10100 }, { "epoch": 0.346636925188744, "grad_norm": 0.8101332602538216, "learning_rate": 7.594047753280958e-06, "loss": 0.3102, "step": 10101 }, { "epoch": 0.34667124227865476, "grad_norm": 0.8007592620758959, "learning_rate": 7.5935726416170565e-06, "loss": 0.3297, "step": 10102 }, { "epoch": 0.34670555936856556, "grad_norm": 0.7314365510131667, "learning_rate": 7.593097497912591e-06, "loss": 0.2537, "step": 10103 }, { "epoch": 0.3467398764584763, "grad_norm": 0.7715134032309854, "learning_rate": 7.592622322173426e-06, "loss": 0.3146, "step": 10104 }, { "epoch": 0.3467741935483871, "grad_norm": 0.7057967338423716, "learning_rate": 7.592147114405437e-06, "loss": 0.2937, "step": 10105 }, { "epoch": 0.3468085106382979, "grad_norm": 0.6986187405685965, "learning_rate": 7.5916718746144935e-06, "loss": 0.2608, "step": 10106 }, { "epoch": 0.3468428277282086, "grad_norm": 0.8399081563676263, "learning_rate": 7.591196602806464e-06, "loss": 0.3389, "step": 10107 }, { "epoch": 0.34687714481811943, "grad_norm": 0.7318026750418657, "learning_rate": 7.590721298987223e-06, "loss": 0.3781, "step": 10108 }, { "epoch": 0.3469114619080302, "grad_norm": 0.7767672402139598, "learning_rate": 7.590245963162638e-06, "loss": 0.2881, "step": 10109 }, { "epoch": 0.346945778997941, "grad_norm": 0.7161820295373891, "learning_rate": 7.589770595338586e-06, "loss": 0.315, "step": 10110 }, { "epoch": 0.34698009608785174, "grad_norm": 0.7554071440895771, "learning_rate": 7.589295195520938e-06, "loss": 0.2767, "step": 10111 }, { "epoch": 0.34701441317776255, "grad_norm": 0.7456952311273656, "learning_rate": 7.588819763715566e-06, "loss": 0.3094, "step": 10112 }, { "epoch": 0.3470487302676733, "grad_norm": 2.5677864397917607, "learning_rate": 7.588344299928345e-06, "loss": 0.3539, "step": 10113 }, { "epoch": 0.34708304735758405, "grad_norm": 0.6686714082711772, "learning_rate": 7.587868804165146e-06, "loss": 0.2773, "step": 10114 }, { "epoch": 0.34711736444749486, "grad_norm": 1.6693830420678235, "learning_rate": 7.587393276431847e-06, "loss": 0.279, "step": 10115 }, { "epoch": 0.3471516815374056, "grad_norm": 1.4677686718568888, "learning_rate": 7.586917716734319e-06, "loss": 0.2723, "step": 10116 }, { "epoch": 0.3471859986273164, "grad_norm": 2.358559319441851, "learning_rate": 7.58644212507844e-06, "loss": 0.333, "step": 10117 }, { "epoch": 0.34722031571722717, "grad_norm": 0.7694151870738084, "learning_rate": 7.585966501470083e-06, "loss": 0.2887, "step": 10118 }, { "epoch": 0.347254632807138, "grad_norm": 0.9730052621861087, "learning_rate": 7.585490845915127e-06, "loss": 0.2763, "step": 10119 }, { "epoch": 0.3472889498970487, "grad_norm": 0.7365499528189191, "learning_rate": 7.585015158419444e-06, "loss": 0.3129, "step": 10120 }, { "epoch": 0.34732326698695953, "grad_norm": 0.7102800872832975, "learning_rate": 7.584539438988913e-06, "loss": 0.2363, "step": 10121 }, { "epoch": 0.3473575840768703, "grad_norm": 0.7737610709713664, "learning_rate": 7.584063687629411e-06, "loss": 0.3439, "step": 10122 }, { "epoch": 0.34739190116678104, "grad_norm": 0.755545809869848, "learning_rate": 7.583587904346813e-06, "loss": 0.269, "step": 10123 }, { "epoch": 0.34742621825669184, "grad_norm": 0.7346885214699445, "learning_rate": 7.5831120891470004e-06, "loss": 0.3537, "step": 10124 }, { "epoch": 0.3474605353466026, "grad_norm": 0.848621198052269, "learning_rate": 7.582636242035848e-06, "loss": 0.2867, "step": 10125 }, { "epoch": 0.3474948524365134, "grad_norm": 0.7308406641324126, "learning_rate": 7.582160363019237e-06, "loss": 0.258, "step": 10126 }, { "epoch": 0.34752916952642415, "grad_norm": 1.0551721407838646, "learning_rate": 7.581684452103046e-06, "loss": 0.3142, "step": 10127 }, { "epoch": 0.34756348661633496, "grad_norm": 0.6649300730978841, "learning_rate": 7.581208509293152e-06, "loss": 0.299, "step": 10128 }, { "epoch": 0.3475978037062457, "grad_norm": 0.8501568039759115, "learning_rate": 7.5807325345954385e-06, "loss": 0.285, "step": 10129 }, { "epoch": 0.34763212079615646, "grad_norm": 0.7687880782943138, "learning_rate": 7.580256528015781e-06, "loss": 0.2932, "step": 10130 }, { "epoch": 0.34766643788606727, "grad_norm": 0.9230233018232097, "learning_rate": 7.579780489560062e-06, "loss": 0.2833, "step": 10131 }, { "epoch": 0.347700754975978, "grad_norm": 0.7252575951244855, "learning_rate": 7.579304419234166e-06, "loss": 0.2909, "step": 10132 }, { "epoch": 0.34773507206588883, "grad_norm": 0.8137731521004583, "learning_rate": 7.578828317043969e-06, "loss": 0.3088, "step": 10133 }, { "epoch": 0.3477693891557996, "grad_norm": 0.6877558221090413, "learning_rate": 7.578352182995354e-06, "loss": 0.229, "step": 10134 }, { "epoch": 0.3478037062457104, "grad_norm": 0.7748620630776887, "learning_rate": 7.577876017094206e-06, "loss": 0.2542, "step": 10135 }, { "epoch": 0.34783802333562114, "grad_norm": 0.8182862868005671, "learning_rate": 7.577399819346403e-06, "loss": 0.3613, "step": 10136 }, { "epoch": 0.3478723404255319, "grad_norm": 0.760523491668125, "learning_rate": 7.576923589757832e-06, "loss": 0.2821, "step": 10137 }, { "epoch": 0.3479066575154427, "grad_norm": 0.7359374018870564, "learning_rate": 7.576447328334373e-06, "loss": 0.3155, "step": 10138 }, { "epoch": 0.34794097460535345, "grad_norm": 0.7788569771728406, "learning_rate": 7.575971035081912e-06, "loss": 0.3171, "step": 10139 }, { "epoch": 0.34797529169526425, "grad_norm": 0.6935173927388073, "learning_rate": 7.575494710006333e-06, "loss": 0.2676, "step": 10140 }, { "epoch": 0.348009608785175, "grad_norm": 0.7473579856496011, "learning_rate": 7.575018353113517e-06, "loss": 0.3153, "step": 10141 }, { "epoch": 0.3480439258750858, "grad_norm": 0.7841574059324514, "learning_rate": 7.574541964409354e-06, "loss": 0.2611, "step": 10142 }, { "epoch": 0.34807824296499656, "grad_norm": 0.8671990839158739, "learning_rate": 7.574065543899726e-06, "loss": 0.2797, "step": 10143 }, { "epoch": 0.3481125600549073, "grad_norm": 0.8601829958182693, "learning_rate": 7.5735890915905185e-06, "loss": 0.3136, "step": 10144 }, { "epoch": 0.3481468771448181, "grad_norm": 0.7185677860597784, "learning_rate": 7.573112607487618e-06, "loss": 0.27, "step": 10145 }, { "epoch": 0.3481811942347289, "grad_norm": 0.7627965918994875, "learning_rate": 7.5726360915969125e-06, "loss": 0.3575, "step": 10146 }, { "epoch": 0.3482155113246397, "grad_norm": 0.8693047023743666, "learning_rate": 7.572159543924287e-06, "loss": 0.2993, "step": 10147 }, { "epoch": 0.34824982841455043, "grad_norm": 0.7792819473508625, "learning_rate": 7.57168296447563e-06, "loss": 0.3201, "step": 10148 }, { "epoch": 0.34828414550446124, "grad_norm": 0.6965992165900413, "learning_rate": 7.5712063532568256e-06, "loss": 0.2654, "step": 10149 }, { "epoch": 0.348318462594372, "grad_norm": 0.728145024995696, "learning_rate": 7.570729710273768e-06, "loss": 0.2948, "step": 10150 }, { "epoch": 0.3483527796842828, "grad_norm": 0.897771289551815, "learning_rate": 7.57025303553234e-06, "loss": 0.277, "step": 10151 }, { "epoch": 0.34838709677419355, "grad_norm": 0.8097229623898287, "learning_rate": 7.5697763290384315e-06, "loss": 0.2926, "step": 10152 }, { "epoch": 0.3484214138641043, "grad_norm": 0.7154299399223004, "learning_rate": 7.569299590797934e-06, "loss": 0.2566, "step": 10153 }, { "epoch": 0.3484557309540151, "grad_norm": 0.8148790655966487, "learning_rate": 7.568822820816735e-06, "loss": 0.2835, "step": 10154 }, { "epoch": 0.34849004804392586, "grad_norm": 0.740696760662846, "learning_rate": 7.568346019100725e-06, "loss": 0.2842, "step": 10155 }, { "epoch": 0.34852436513383667, "grad_norm": 0.7437758025626363, "learning_rate": 7.567869185655794e-06, "loss": 0.3703, "step": 10156 }, { "epoch": 0.3485586822237474, "grad_norm": 0.8399190145282264, "learning_rate": 7.567392320487834e-06, "loss": 0.3247, "step": 10157 }, { "epoch": 0.3485929993136582, "grad_norm": 0.6846886585282311, "learning_rate": 7.566915423602735e-06, "loss": 0.2824, "step": 10158 }, { "epoch": 0.348627316403569, "grad_norm": 0.7767738802218349, "learning_rate": 7.566438495006389e-06, "loss": 0.2751, "step": 10159 }, { "epoch": 0.34866163349347973, "grad_norm": 0.7557802170427534, "learning_rate": 7.565961534704687e-06, "loss": 0.2929, "step": 10160 }, { "epoch": 0.34869595058339053, "grad_norm": 0.8172208687382825, "learning_rate": 7.565484542703521e-06, "loss": 0.3095, "step": 10161 }, { "epoch": 0.3487302676733013, "grad_norm": 0.8820085972975775, "learning_rate": 7.565007519008785e-06, "loss": 0.3347, "step": 10162 }, { "epoch": 0.3487645847632121, "grad_norm": 0.9214715009393681, "learning_rate": 7.564530463626372e-06, "loss": 0.3061, "step": 10163 }, { "epoch": 0.34879890185312284, "grad_norm": 0.7257305877979112, "learning_rate": 7.564053376562176e-06, "loss": 0.2812, "step": 10164 }, { "epoch": 0.34883321894303365, "grad_norm": 0.7809035852456886, "learning_rate": 7.563576257822089e-06, "loss": 0.3365, "step": 10165 }, { "epoch": 0.3488675360329444, "grad_norm": 0.8382891288724916, "learning_rate": 7.563099107412006e-06, "loss": 0.3166, "step": 10166 }, { "epoch": 0.34890185312285515, "grad_norm": 0.7041238007791492, "learning_rate": 7.5626219253378205e-06, "loss": 0.2878, "step": 10167 }, { "epoch": 0.34893617021276596, "grad_norm": 0.8124919605209365, "learning_rate": 7.562144711605431e-06, "loss": 0.2815, "step": 10168 }, { "epoch": 0.3489704873026767, "grad_norm": 0.7177459809631614, "learning_rate": 7.56166746622073e-06, "loss": 0.3029, "step": 10169 }, { "epoch": 0.3490048043925875, "grad_norm": 0.7160785644684889, "learning_rate": 7.561190189189613e-06, "loss": 0.28, "step": 10170 }, { "epoch": 0.34903912148249827, "grad_norm": 0.7277149408202734, "learning_rate": 7.56071288051798e-06, "loss": 0.2537, "step": 10171 }, { "epoch": 0.3490734385724091, "grad_norm": 1.0392813008608843, "learning_rate": 7.560235540211723e-06, "loss": 0.2918, "step": 10172 }, { "epoch": 0.34910775566231983, "grad_norm": 0.8518316662351963, "learning_rate": 7.559758168276741e-06, "loss": 0.3327, "step": 10173 }, { "epoch": 0.34914207275223064, "grad_norm": 0.7719567835631588, "learning_rate": 7.559280764718933e-06, "loss": 0.3714, "step": 10174 }, { "epoch": 0.3491763898421414, "grad_norm": 1.2372956630797045, "learning_rate": 7.558803329544193e-06, "loss": 0.3752, "step": 10175 }, { "epoch": 0.34921070693205214, "grad_norm": 0.7678111829041822, "learning_rate": 7.55832586275842e-06, "loss": 0.2931, "step": 10176 }, { "epoch": 0.34924502402196295, "grad_norm": 0.7620475040256426, "learning_rate": 7.557848364367516e-06, "loss": 0.2874, "step": 10177 }, { "epoch": 0.3492793411118737, "grad_norm": 0.8422337059008576, "learning_rate": 7.557370834377377e-06, "loss": 0.3032, "step": 10178 }, { "epoch": 0.3493136582017845, "grad_norm": 0.6868656787698378, "learning_rate": 7.556893272793902e-06, "loss": 0.2911, "step": 10179 }, { "epoch": 0.34934797529169526, "grad_norm": 0.7807259452651852, "learning_rate": 7.556415679622993e-06, "loss": 0.3491, "step": 10180 }, { "epoch": 0.34938229238160606, "grad_norm": 0.7837028955531333, "learning_rate": 7.555938054870548e-06, "loss": 0.3025, "step": 10181 }, { "epoch": 0.3494166094715168, "grad_norm": 0.7805998797570967, "learning_rate": 7.555460398542469e-06, "loss": 0.3036, "step": 10182 }, { "epoch": 0.34945092656142757, "grad_norm": 0.6955819751492881, "learning_rate": 7.554982710644655e-06, "loss": 0.2901, "step": 10183 }, { "epoch": 0.3494852436513384, "grad_norm": 0.7872789562608796, "learning_rate": 7.554504991183009e-06, "loss": 0.2946, "step": 10184 }, { "epoch": 0.3495195607412491, "grad_norm": 0.7352474947947164, "learning_rate": 7.554027240163432e-06, "loss": 0.3239, "step": 10185 }, { "epoch": 0.34955387783115993, "grad_norm": 0.7720305458432348, "learning_rate": 7.553549457591825e-06, "loss": 0.3468, "step": 10186 }, { "epoch": 0.3495881949210707, "grad_norm": 0.7813633313466565, "learning_rate": 7.553071643474095e-06, "loss": 0.3256, "step": 10187 }, { "epoch": 0.3496225120109815, "grad_norm": 0.7269441582379441, "learning_rate": 7.552593797816139e-06, "loss": 0.2838, "step": 10188 }, { "epoch": 0.34965682910089224, "grad_norm": 0.8645994114726703, "learning_rate": 7.552115920623861e-06, "loss": 0.3385, "step": 10189 }, { "epoch": 0.349691146190803, "grad_norm": 0.825752345829038, "learning_rate": 7.551638011903168e-06, "loss": 0.3487, "step": 10190 }, { "epoch": 0.3497254632807138, "grad_norm": 0.8885482356777723, "learning_rate": 7.551160071659964e-06, "loss": 0.3196, "step": 10191 }, { "epoch": 0.34975978037062455, "grad_norm": 0.7630803157749679, "learning_rate": 7.5506820999001495e-06, "loss": 0.3145, "step": 10192 }, { "epoch": 0.34979409746053536, "grad_norm": 0.8942998931704035, "learning_rate": 7.550204096629633e-06, "loss": 0.2442, "step": 10193 }, { "epoch": 0.3498284145504461, "grad_norm": 0.759879980976064, "learning_rate": 7.549726061854317e-06, "loss": 0.3682, "step": 10194 }, { "epoch": 0.3498627316403569, "grad_norm": 0.7111561825161721, "learning_rate": 7.549247995580109e-06, "loss": 0.3545, "step": 10195 }, { "epoch": 0.34989704873026767, "grad_norm": 0.7552952357160064, "learning_rate": 7.548769897812913e-06, "loss": 0.2456, "step": 10196 }, { "epoch": 0.3499313658201785, "grad_norm": 0.8085851569379673, "learning_rate": 7.548291768558638e-06, "loss": 0.2979, "step": 10197 }, { "epoch": 0.3499656829100892, "grad_norm": 0.8349045475703352, "learning_rate": 7.547813607823186e-06, "loss": 0.3626, "step": 10198 }, { "epoch": 0.35, "grad_norm": 0.7892506192700459, "learning_rate": 7.547335415612471e-06, "loss": 0.3036, "step": 10199 }, { "epoch": 0.3500343170899108, "grad_norm": 0.7866990054292393, "learning_rate": 7.546857191932395e-06, "loss": 0.3152, "step": 10200 }, { "epoch": 0.35006863417982154, "grad_norm": 0.8452185502109348, "learning_rate": 7.54637893678887e-06, "loss": 0.3129, "step": 10201 }, { "epoch": 0.35010295126973234, "grad_norm": 0.7182506508480965, "learning_rate": 7.545900650187799e-06, "loss": 0.2627, "step": 10202 }, { "epoch": 0.3501372683596431, "grad_norm": 1.14677899847367, "learning_rate": 7.545422332135096e-06, "loss": 0.2568, "step": 10203 }, { "epoch": 0.3501715854495539, "grad_norm": 0.8003165195094809, "learning_rate": 7.5449439826366664e-06, "loss": 0.3586, "step": 10204 }, { "epoch": 0.35020590253946465, "grad_norm": 0.7452451918511871, "learning_rate": 7.5444656016984206e-06, "loss": 0.2915, "step": 10205 }, { "epoch": 0.3502402196293754, "grad_norm": 0.758894240328538, "learning_rate": 7.54398718932627e-06, "loss": 0.3016, "step": 10206 }, { "epoch": 0.3502745367192862, "grad_norm": 0.6701078888713544, "learning_rate": 7.543508745526122e-06, "loss": 0.2831, "step": 10207 }, { "epoch": 0.35030885380919696, "grad_norm": 0.7441826886262394, "learning_rate": 7.54303027030389e-06, "loss": 0.3322, "step": 10208 }, { "epoch": 0.35034317089910777, "grad_norm": 0.8129538534374428, "learning_rate": 7.542551763665487e-06, "loss": 0.279, "step": 10209 }, { "epoch": 0.3503774879890185, "grad_norm": 0.7774753462877133, "learning_rate": 7.542073225616817e-06, "loss": 0.3033, "step": 10210 }, { "epoch": 0.35041180507892933, "grad_norm": 0.837618163872609, "learning_rate": 7.5415946561637976e-06, "loss": 0.2532, "step": 10211 }, { "epoch": 0.3504461221688401, "grad_norm": 0.7680585411594685, "learning_rate": 7.541116055312338e-06, "loss": 0.2871, "step": 10212 }, { "epoch": 0.35048043925875083, "grad_norm": 0.7408828622230902, "learning_rate": 7.540637423068354e-06, "loss": 0.2844, "step": 10213 }, { "epoch": 0.35051475634866164, "grad_norm": 0.745446123307246, "learning_rate": 7.540158759437757e-06, "loss": 0.248, "step": 10214 }, { "epoch": 0.3505490734385724, "grad_norm": 0.7751772560451834, "learning_rate": 7.5396800644264604e-06, "loss": 0.2773, "step": 10215 }, { "epoch": 0.3505833905284832, "grad_norm": 0.7633484502842326, "learning_rate": 7.5392013380403765e-06, "loss": 0.2753, "step": 10216 }, { "epoch": 0.35061770761839395, "grad_norm": 0.7565333582633205, "learning_rate": 7.538722580285419e-06, "loss": 0.3245, "step": 10217 }, { "epoch": 0.35065202470830475, "grad_norm": 0.8604689560135423, "learning_rate": 7.538243791167505e-06, "loss": 0.3099, "step": 10218 }, { "epoch": 0.3506863417982155, "grad_norm": 0.7983383388348505, "learning_rate": 7.537764970692549e-06, "loss": 0.346, "step": 10219 }, { "epoch": 0.3507206588881263, "grad_norm": 0.8680212699641544, "learning_rate": 7.537286118866464e-06, "loss": 0.3166, "step": 10220 }, { "epoch": 0.35075497597803706, "grad_norm": 0.8198322137539203, "learning_rate": 7.53680723569517e-06, "loss": 0.2822, "step": 10221 }, { "epoch": 0.3507892930679478, "grad_norm": 0.8587525217071685, "learning_rate": 7.5363283211845785e-06, "loss": 0.3376, "step": 10222 }, { "epoch": 0.3508236101578586, "grad_norm": 0.8093411480932899, "learning_rate": 7.535849375340607e-06, "loss": 0.3343, "step": 10223 }, { "epoch": 0.3508579272477694, "grad_norm": 0.9464930833370975, "learning_rate": 7.535370398169173e-06, "loss": 0.2781, "step": 10224 }, { "epoch": 0.3508922443376802, "grad_norm": 0.7736661054068756, "learning_rate": 7.534891389676195e-06, "loss": 0.2592, "step": 10225 }, { "epoch": 0.35092656142759093, "grad_norm": 0.7790687762126902, "learning_rate": 7.534412349867589e-06, "loss": 0.2761, "step": 10226 }, { "epoch": 0.35096087851750174, "grad_norm": 0.8462389776484721, "learning_rate": 7.5339332787492724e-06, "loss": 0.318, "step": 10227 }, { "epoch": 0.3509951956074125, "grad_norm": 0.7963787436623804, "learning_rate": 7.533454176327165e-06, "loss": 0.3598, "step": 10228 }, { "epoch": 0.35102951269732324, "grad_norm": 0.7655696070015584, "learning_rate": 7.532975042607185e-06, "loss": 0.3027, "step": 10229 }, { "epoch": 0.35106382978723405, "grad_norm": 0.7128288012106793, "learning_rate": 7.532495877595252e-06, "loss": 0.287, "step": 10230 }, { "epoch": 0.3510981468771448, "grad_norm": 0.7635434861996911, "learning_rate": 7.532016681297285e-06, "loss": 0.3011, "step": 10231 }, { "epoch": 0.3511324639670556, "grad_norm": 0.7800339494497359, "learning_rate": 7.531537453719202e-06, "loss": 0.2819, "step": 10232 }, { "epoch": 0.35116678105696636, "grad_norm": 0.7801129851880129, "learning_rate": 7.531058194866927e-06, "loss": 0.2918, "step": 10233 }, { "epoch": 0.35120109814687717, "grad_norm": 0.7265143905800001, "learning_rate": 7.530578904746378e-06, "loss": 0.292, "step": 10234 }, { "epoch": 0.3512354152367879, "grad_norm": 0.7569832874457131, "learning_rate": 7.530099583363478e-06, "loss": 0.3588, "step": 10235 }, { "epoch": 0.35126973232669867, "grad_norm": 0.8186492839793267, "learning_rate": 7.529620230724146e-06, "loss": 0.312, "step": 10236 }, { "epoch": 0.3513040494166095, "grad_norm": 0.791075949993237, "learning_rate": 7.529140846834306e-06, "loss": 0.2903, "step": 10237 }, { "epoch": 0.3513383665065202, "grad_norm": 0.925422744668912, "learning_rate": 7.528661431699882e-06, "loss": 0.3158, "step": 10238 }, { "epoch": 0.35137268359643103, "grad_norm": 0.7606142601387093, "learning_rate": 7.528181985326792e-06, "loss": 0.308, "step": 10239 }, { "epoch": 0.3514070006863418, "grad_norm": 0.6874112308975421, "learning_rate": 7.5277025077209595e-06, "loss": 0.2759, "step": 10240 }, { "epoch": 0.3514413177762526, "grad_norm": 0.8023533015521045, "learning_rate": 7.5272229988883126e-06, "loss": 0.2893, "step": 10241 }, { "epoch": 0.35147563486616334, "grad_norm": 0.7999469123511491, "learning_rate": 7.52674345883477e-06, "loss": 0.339, "step": 10242 }, { "epoch": 0.35150995195607415, "grad_norm": 0.841536668374402, "learning_rate": 7.526263887566259e-06, "loss": 0.3202, "step": 10243 }, { "epoch": 0.3515442690459849, "grad_norm": 0.8551062118997966, "learning_rate": 7.525784285088702e-06, "loss": 0.2301, "step": 10244 }, { "epoch": 0.35157858613589565, "grad_norm": 0.81462204076564, "learning_rate": 7.525304651408025e-06, "loss": 0.2869, "step": 10245 }, { "epoch": 0.35161290322580646, "grad_norm": 0.9672846258510445, "learning_rate": 7.524824986530155e-06, "loss": 0.3365, "step": 10246 }, { "epoch": 0.3516472203157172, "grad_norm": 0.797093163374296, "learning_rate": 7.5243452904610145e-06, "loss": 0.2728, "step": 10247 }, { "epoch": 0.351681537405628, "grad_norm": 0.8311978024919012, "learning_rate": 7.523865563206532e-06, "loss": 0.3095, "step": 10248 }, { "epoch": 0.35171585449553877, "grad_norm": 0.6878044806405668, "learning_rate": 7.523385804772632e-06, "loss": 0.3019, "step": 10249 }, { "epoch": 0.3517501715854496, "grad_norm": 0.7988539849966192, "learning_rate": 7.522906015165241e-06, "loss": 0.2174, "step": 10250 }, { "epoch": 0.35178448867536033, "grad_norm": 0.7048392793438328, "learning_rate": 7.522426194390289e-06, "loss": 0.3596, "step": 10251 }, { "epoch": 0.3518188057652711, "grad_norm": 0.8081268231914611, "learning_rate": 7.521946342453702e-06, "loss": 0.2828, "step": 10252 }, { "epoch": 0.3518531228551819, "grad_norm": 0.8806367620321951, "learning_rate": 7.521466459361408e-06, "loss": 0.3141, "step": 10253 }, { "epoch": 0.35188743994509264, "grad_norm": 0.8897999547482103, "learning_rate": 7.520986545119338e-06, "loss": 0.2648, "step": 10254 }, { "epoch": 0.35192175703500345, "grad_norm": 0.7379626108848967, "learning_rate": 7.5205065997334155e-06, "loss": 0.2861, "step": 10255 }, { "epoch": 0.3519560741249142, "grad_norm": 0.721829760918547, "learning_rate": 7.520026623209574e-06, "loss": 0.3027, "step": 10256 }, { "epoch": 0.351990391214825, "grad_norm": 0.7798728879635959, "learning_rate": 7.519546615553741e-06, "loss": 0.3545, "step": 10257 }, { "epoch": 0.35202470830473576, "grad_norm": 0.853372004031979, "learning_rate": 7.519066576771847e-06, "loss": 0.2989, "step": 10258 }, { "epoch": 0.3520590253946465, "grad_norm": 0.749135829975459, "learning_rate": 7.518586506869823e-06, "loss": 0.3215, "step": 10259 }, { "epoch": 0.3520933424845573, "grad_norm": 0.8567210098604332, "learning_rate": 7.5181064058536e-06, "loss": 0.3168, "step": 10260 }, { "epoch": 0.35212765957446807, "grad_norm": 0.6586304506225891, "learning_rate": 7.517626273729106e-06, "loss": 0.291, "step": 10261 }, { "epoch": 0.3521619766643789, "grad_norm": 0.7837579958229183, "learning_rate": 7.5171461105022755e-06, "loss": 0.2838, "step": 10262 }, { "epoch": 0.3521962937542896, "grad_norm": 0.8528216196863552, "learning_rate": 7.5166659161790424e-06, "loss": 0.357, "step": 10263 }, { "epoch": 0.35223061084420043, "grad_norm": 0.8441932078532516, "learning_rate": 7.516185690765332e-06, "loss": 0.3311, "step": 10264 }, { "epoch": 0.3522649279341112, "grad_norm": 0.7231926856949634, "learning_rate": 7.515705434267082e-06, "loss": 0.3075, "step": 10265 }, { "epoch": 0.352299245024022, "grad_norm": 0.6595947687576462, "learning_rate": 7.5152251466902234e-06, "loss": 0.2891, "step": 10266 }, { "epoch": 0.35233356211393274, "grad_norm": 0.8733680621847185, "learning_rate": 7.514744828040695e-06, "loss": 0.303, "step": 10267 }, { "epoch": 0.3523678792038435, "grad_norm": 0.7957798479104489, "learning_rate": 7.514264478324422e-06, "loss": 0.2832, "step": 10268 }, { "epoch": 0.3524021962937543, "grad_norm": 0.8514238432756595, "learning_rate": 7.513784097547343e-06, "loss": 0.2992, "step": 10269 }, { "epoch": 0.35243651338366505, "grad_norm": 0.8169750922221467, "learning_rate": 7.513303685715394e-06, "loss": 0.3122, "step": 10270 }, { "epoch": 0.35247083047357586, "grad_norm": 0.7522158840178484, "learning_rate": 7.512823242834507e-06, "loss": 0.288, "step": 10271 }, { "epoch": 0.3525051475634866, "grad_norm": 0.7298234471004991, "learning_rate": 7.512342768910619e-06, "loss": 0.2914, "step": 10272 }, { "epoch": 0.3525394646533974, "grad_norm": 0.8255330372336565, "learning_rate": 7.5118622639496654e-06, "loss": 0.3123, "step": 10273 }, { "epoch": 0.35257378174330817, "grad_norm": 0.7835243773613684, "learning_rate": 7.511381727957581e-06, "loss": 0.2712, "step": 10274 }, { "epoch": 0.3526080988332189, "grad_norm": 0.7465716900653883, "learning_rate": 7.5109011609403035e-06, "loss": 0.2847, "step": 10275 }, { "epoch": 0.3526424159231297, "grad_norm": 0.7576629339015771, "learning_rate": 7.5104205629037706e-06, "loss": 0.2901, "step": 10276 }, { "epoch": 0.3526767330130405, "grad_norm": 0.7962422842947728, "learning_rate": 7.509939933853918e-06, "loss": 0.292, "step": 10277 }, { "epoch": 0.3527110501029513, "grad_norm": 0.759098745142999, "learning_rate": 7.509459273796684e-06, "loss": 0.283, "step": 10278 }, { "epoch": 0.35274536719286204, "grad_norm": 0.8058284424829036, "learning_rate": 7.508978582738007e-06, "loss": 0.3163, "step": 10279 }, { "epoch": 0.35277968428277284, "grad_norm": 0.858747741547209, "learning_rate": 7.508497860683823e-06, "loss": 0.3207, "step": 10280 }, { "epoch": 0.3528140013726836, "grad_norm": 0.7685276125404555, "learning_rate": 7.5080171076400734e-06, "loss": 0.3236, "step": 10281 }, { "epoch": 0.35284831846259435, "grad_norm": 0.8712595736590116, "learning_rate": 7.5075363236126965e-06, "loss": 0.3701, "step": 10282 }, { "epoch": 0.35288263555250515, "grad_norm": 0.7257811208107956, "learning_rate": 7.507055508607632e-06, "loss": 0.276, "step": 10283 }, { "epoch": 0.3529169526424159, "grad_norm": 0.7268498970014876, "learning_rate": 7.506574662630821e-06, "loss": 0.2964, "step": 10284 }, { "epoch": 0.3529512697323267, "grad_norm": 0.7936071019602174, "learning_rate": 7.5060937856882e-06, "loss": 0.3202, "step": 10285 }, { "epoch": 0.35298558682223746, "grad_norm": 0.8068942227124482, "learning_rate": 7.505612877785715e-06, "loss": 0.2723, "step": 10286 }, { "epoch": 0.35301990391214827, "grad_norm": 0.7408468467796304, "learning_rate": 7.5051319389293e-06, "loss": 0.3118, "step": 10287 }, { "epoch": 0.353054221002059, "grad_norm": 0.9023211118602118, "learning_rate": 7.5046509691249025e-06, "loss": 0.3103, "step": 10288 }, { "epoch": 0.35308853809196983, "grad_norm": 0.7536830443562605, "learning_rate": 7.504169968378465e-06, "loss": 0.2926, "step": 10289 }, { "epoch": 0.3531228551818806, "grad_norm": 0.969871530330663, "learning_rate": 7.503688936695924e-06, "loss": 0.3099, "step": 10290 }, { "epoch": 0.35315717227179133, "grad_norm": 0.8219858702017749, "learning_rate": 7.503207874083227e-06, "loss": 0.3512, "step": 10291 }, { "epoch": 0.35319148936170214, "grad_norm": 0.846469378836867, "learning_rate": 7.5027267805463146e-06, "loss": 0.2948, "step": 10292 }, { "epoch": 0.3532258064516129, "grad_norm": 0.8442893374613045, "learning_rate": 7.502245656091129e-06, "loss": 0.3547, "step": 10293 }, { "epoch": 0.3532601235415237, "grad_norm": 0.8315095217391585, "learning_rate": 7.501764500723618e-06, "loss": 0.2925, "step": 10294 }, { "epoch": 0.35329444063143445, "grad_norm": 0.7939933598709555, "learning_rate": 7.501283314449722e-06, "loss": 0.2823, "step": 10295 }, { "epoch": 0.35332875772134525, "grad_norm": 0.8396317555221188, "learning_rate": 7.500802097275388e-06, "loss": 0.2818, "step": 10296 }, { "epoch": 0.353363074811256, "grad_norm": 0.7881651713652588, "learning_rate": 7.50032084920656e-06, "loss": 0.3088, "step": 10297 }, { "epoch": 0.35339739190116676, "grad_norm": 0.7131111506787219, "learning_rate": 7.499839570249181e-06, "loss": 0.2999, "step": 10298 }, { "epoch": 0.35343170899107756, "grad_norm": 0.7332592830120617, "learning_rate": 7.4993582604091994e-06, "loss": 0.3012, "step": 10299 }, { "epoch": 0.3534660260809883, "grad_norm": 0.7833402941687092, "learning_rate": 7.498876919692561e-06, "loss": 0.3002, "step": 10300 }, { "epoch": 0.3535003431708991, "grad_norm": 0.8310154824157974, "learning_rate": 7.498395548105212e-06, "loss": 0.3487, "step": 10301 }, { "epoch": 0.3535346602608099, "grad_norm": 0.7276161715557082, "learning_rate": 7.497914145653098e-06, "loss": 0.3211, "step": 10302 }, { "epoch": 0.3535689773507207, "grad_norm": 0.7562705456193187, "learning_rate": 7.497432712342166e-06, "loss": 0.2854, "step": 10303 }, { "epoch": 0.35360329444063143, "grad_norm": 0.7064038791872131, "learning_rate": 7.496951248178367e-06, "loss": 0.3132, "step": 10304 }, { "epoch": 0.3536376115305422, "grad_norm": 0.8374723919157766, "learning_rate": 7.496469753167646e-06, "loss": 0.299, "step": 10305 }, { "epoch": 0.353671928620453, "grad_norm": 0.7572278206767049, "learning_rate": 7.495988227315951e-06, "loss": 0.2531, "step": 10306 }, { "epoch": 0.35370624571036374, "grad_norm": 0.7713782794241864, "learning_rate": 7.4955066706292315e-06, "loss": 0.2807, "step": 10307 }, { "epoch": 0.35374056280027455, "grad_norm": 0.73528352009429, "learning_rate": 7.495025083113437e-06, "loss": 0.2887, "step": 10308 }, { "epoch": 0.3537748798901853, "grad_norm": 0.753179425844824, "learning_rate": 7.494543464774515e-06, "loss": 0.2806, "step": 10309 }, { "epoch": 0.3538091969800961, "grad_norm": 0.7492638104527037, "learning_rate": 7.494061815618419e-06, "loss": 0.285, "step": 10310 }, { "epoch": 0.35384351407000686, "grad_norm": 0.7271041110164914, "learning_rate": 7.493580135651095e-06, "loss": 0.2352, "step": 10311 }, { "epoch": 0.35387783115991767, "grad_norm": 0.7212151072947605, "learning_rate": 7.493098424878497e-06, "loss": 0.2581, "step": 10312 }, { "epoch": 0.3539121482498284, "grad_norm": 0.8289152007895383, "learning_rate": 7.492616683306575e-06, "loss": 0.3015, "step": 10313 }, { "epoch": 0.35394646533973917, "grad_norm": 0.727955289102245, "learning_rate": 7.49213491094128e-06, "loss": 0.2965, "step": 10314 }, { "epoch": 0.35398078242965, "grad_norm": 0.761441077980943, "learning_rate": 7.491653107788563e-06, "loss": 0.3159, "step": 10315 }, { "epoch": 0.3540150995195607, "grad_norm": 0.7680960239920885, "learning_rate": 7.491171273854377e-06, "loss": 0.3342, "step": 10316 }, { "epoch": 0.35404941660947153, "grad_norm": 0.8130916381878569, "learning_rate": 7.490689409144673e-06, "loss": 0.3407, "step": 10317 }, { "epoch": 0.3540837336993823, "grad_norm": 0.7362021702775134, "learning_rate": 7.490207513665408e-06, "loss": 0.3152, "step": 10318 }, { "epoch": 0.3541180507892931, "grad_norm": 0.787149175462493, "learning_rate": 7.489725587422531e-06, "loss": 0.332, "step": 10319 }, { "epoch": 0.35415236787920384, "grad_norm": 0.7282517993656601, "learning_rate": 7.489243630421996e-06, "loss": 0.3099, "step": 10320 }, { "epoch": 0.3541866849691146, "grad_norm": 0.7740573281708175, "learning_rate": 7.4887616426697606e-06, "loss": 0.3376, "step": 10321 }, { "epoch": 0.3542210020590254, "grad_norm": 0.7451812258419371, "learning_rate": 7.488279624171775e-06, "loss": 0.2718, "step": 10322 }, { "epoch": 0.35425531914893615, "grad_norm": 0.8038619651430439, "learning_rate": 7.487797574933996e-06, "loss": 0.3274, "step": 10323 }, { "epoch": 0.35428963623884696, "grad_norm": 0.9385759245255505, "learning_rate": 7.4873154949623785e-06, "loss": 0.3255, "step": 10324 }, { "epoch": 0.3543239533287577, "grad_norm": 0.7104808959986546, "learning_rate": 7.486833384262878e-06, "loss": 0.294, "step": 10325 }, { "epoch": 0.3543582704186685, "grad_norm": 0.74094743229374, "learning_rate": 7.48635124284145e-06, "loss": 0.2776, "step": 10326 }, { "epoch": 0.35439258750857927, "grad_norm": 0.7698809032446419, "learning_rate": 7.485869070704052e-06, "loss": 0.3115, "step": 10327 }, { "epoch": 0.35442690459849, "grad_norm": 0.8095067866085598, "learning_rate": 7.48538686785664e-06, "loss": 0.3009, "step": 10328 }, { "epoch": 0.35446122168840083, "grad_norm": 0.8022833149640586, "learning_rate": 7.4849046343051705e-06, "loss": 0.2978, "step": 10329 }, { "epoch": 0.3544955387783116, "grad_norm": 0.9069487084223051, "learning_rate": 7.4844223700556e-06, "loss": 0.3586, "step": 10330 }, { "epoch": 0.3545298558682224, "grad_norm": 0.9081025826445916, "learning_rate": 7.483940075113889e-06, "loss": 0.2927, "step": 10331 }, { "epoch": 0.35456417295813314, "grad_norm": 0.8120736476469486, "learning_rate": 7.483457749485994e-06, "loss": 0.3379, "step": 10332 }, { "epoch": 0.35459849004804395, "grad_norm": 0.7995009228649096, "learning_rate": 7.4829753931778746e-06, "loss": 0.3411, "step": 10333 }, { "epoch": 0.3546328071379547, "grad_norm": 0.9575609636006929, "learning_rate": 7.4824930061954895e-06, "loss": 0.2879, "step": 10334 }, { "epoch": 0.3546671242278655, "grad_norm": 0.8212527036221354, "learning_rate": 7.482010588544797e-06, "loss": 0.3747, "step": 10335 }, { "epoch": 0.35470144131777626, "grad_norm": 0.7910812705165186, "learning_rate": 7.481528140231758e-06, "loss": 0.2974, "step": 10336 }, { "epoch": 0.354735758407687, "grad_norm": 0.6946681284152496, "learning_rate": 7.481045661262331e-06, "loss": 0.276, "step": 10337 }, { "epoch": 0.3547700754975978, "grad_norm": 0.7103940893103369, "learning_rate": 7.480563151642477e-06, "loss": 0.2892, "step": 10338 }, { "epoch": 0.35480439258750857, "grad_norm": 0.855283863852624, "learning_rate": 7.480080611378158e-06, "loss": 0.3133, "step": 10339 }, { "epoch": 0.3548387096774194, "grad_norm": 0.7909401624194424, "learning_rate": 7.4795980404753354e-06, "loss": 0.2995, "step": 10340 }, { "epoch": 0.3548730267673301, "grad_norm": 0.7681053625192091, "learning_rate": 7.479115438939968e-06, "loss": 0.3717, "step": 10341 }, { "epoch": 0.35490734385724093, "grad_norm": 0.7934254551374327, "learning_rate": 7.478632806778021e-06, "loss": 0.318, "step": 10342 }, { "epoch": 0.3549416609471517, "grad_norm": 0.6812009055047388, "learning_rate": 7.478150143995455e-06, "loss": 0.2587, "step": 10343 }, { "epoch": 0.35497597803706243, "grad_norm": 0.8317792361001277, "learning_rate": 7.477667450598235e-06, "loss": 0.3406, "step": 10344 }, { "epoch": 0.35501029512697324, "grad_norm": 0.8105777422610312, "learning_rate": 7.47718472659232e-06, "loss": 0.3248, "step": 10345 }, { "epoch": 0.355044612216884, "grad_norm": 0.7654078451489402, "learning_rate": 7.476701971983677e-06, "loss": 0.2851, "step": 10346 }, { "epoch": 0.3550789293067948, "grad_norm": 0.7818263780930453, "learning_rate": 7.476219186778269e-06, "loss": 0.3535, "step": 10347 }, { "epoch": 0.35511324639670555, "grad_norm": 0.8319000595391235, "learning_rate": 7.475736370982058e-06, "loss": 0.3699, "step": 10348 }, { "epoch": 0.35514756348661636, "grad_norm": 0.7385994383506044, "learning_rate": 7.475253524601012e-06, "loss": 0.3201, "step": 10349 }, { "epoch": 0.3551818805765271, "grad_norm": 0.7467288697188413, "learning_rate": 7.4747706476410954e-06, "loss": 0.3769, "step": 10350 }, { "epoch": 0.35521619766643786, "grad_norm": 0.6751951850237513, "learning_rate": 7.474287740108272e-06, "loss": 0.2678, "step": 10351 }, { "epoch": 0.35525051475634867, "grad_norm": 3.2420151704082905, "learning_rate": 7.473804802008507e-06, "loss": 0.2914, "step": 10352 }, { "epoch": 0.3552848318462594, "grad_norm": 0.8408808908725904, "learning_rate": 7.473321833347769e-06, "loss": 0.3165, "step": 10353 }, { "epoch": 0.3553191489361702, "grad_norm": 0.7512457072330347, "learning_rate": 7.4728388341320235e-06, "loss": 0.29, "step": 10354 }, { "epoch": 0.355353466026081, "grad_norm": 0.7493464713086428, "learning_rate": 7.472355804367237e-06, "loss": 0.2821, "step": 10355 }, { "epoch": 0.3553877831159918, "grad_norm": 0.7311983562446381, "learning_rate": 7.471872744059378e-06, "loss": 0.315, "step": 10356 }, { "epoch": 0.35542210020590254, "grad_norm": 0.6649339213401376, "learning_rate": 7.471389653214412e-06, "loss": 0.3293, "step": 10357 }, { "epoch": 0.35545641729581334, "grad_norm": 0.8208475866089379, "learning_rate": 7.470906531838308e-06, "loss": 0.3397, "step": 10358 }, { "epoch": 0.3554907343857241, "grad_norm": 0.7477804670090619, "learning_rate": 7.470423379937035e-06, "loss": 0.2614, "step": 10359 }, { "epoch": 0.35552505147563485, "grad_norm": 0.6541814319850259, "learning_rate": 7.469940197516562e-06, "loss": 0.2829, "step": 10360 }, { "epoch": 0.35555936856554565, "grad_norm": 0.6570028742293729, "learning_rate": 7.469456984582857e-06, "loss": 0.2928, "step": 10361 }, { "epoch": 0.3555936856554564, "grad_norm": 0.7285218576280125, "learning_rate": 7.468973741141889e-06, "loss": 0.3239, "step": 10362 }, { "epoch": 0.3556280027453672, "grad_norm": 0.7428148046389911, "learning_rate": 7.46849046719963e-06, "loss": 0.278, "step": 10363 }, { "epoch": 0.35566231983527796, "grad_norm": 0.7726988002014303, "learning_rate": 7.46800716276205e-06, "loss": 0.2803, "step": 10364 }, { "epoch": 0.35569663692518877, "grad_norm": 0.7404382837810094, "learning_rate": 7.467523827835118e-06, "loss": 0.372, "step": 10365 }, { "epoch": 0.3557309540150995, "grad_norm": 0.8354070832104854, "learning_rate": 7.467040462424807e-06, "loss": 0.3216, "step": 10366 }, { "epoch": 0.3557652711050103, "grad_norm": 0.7042229381169831, "learning_rate": 7.4665570665370855e-06, "loss": 0.2647, "step": 10367 }, { "epoch": 0.3557995881949211, "grad_norm": 0.8454295290924086, "learning_rate": 7.466073640177929e-06, "loss": 0.3047, "step": 10368 }, { "epoch": 0.35583390528483183, "grad_norm": 0.9576562402518983, "learning_rate": 7.4655901833533066e-06, "loss": 0.344, "step": 10369 }, { "epoch": 0.35586822237474264, "grad_norm": 0.8064639778915796, "learning_rate": 7.465106696069193e-06, "loss": 0.3674, "step": 10370 }, { "epoch": 0.3559025394646534, "grad_norm": 0.7619866725124704, "learning_rate": 7.464623178331559e-06, "loss": 0.2953, "step": 10371 }, { "epoch": 0.3559368565545642, "grad_norm": 0.6423351532257688, "learning_rate": 7.464139630146381e-06, "loss": 0.281, "step": 10372 }, { "epoch": 0.35597117364447495, "grad_norm": 0.8744335401186344, "learning_rate": 7.463656051519629e-06, "loss": 0.2985, "step": 10373 }, { "epoch": 0.3560054907343857, "grad_norm": 0.8578970836242229, "learning_rate": 7.463172442457279e-06, "loss": 0.309, "step": 10374 }, { "epoch": 0.3560398078242965, "grad_norm": 0.6964548097580371, "learning_rate": 7.462688802965305e-06, "loss": 0.3011, "step": 10375 }, { "epoch": 0.35607412491420726, "grad_norm": 0.8048834624709572, "learning_rate": 7.462205133049684e-06, "loss": 0.2712, "step": 10376 }, { "epoch": 0.35610844200411806, "grad_norm": 0.8776308790600179, "learning_rate": 7.4617214327163865e-06, "loss": 0.2795, "step": 10377 }, { "epoch": 0.3561427590940288, "grad_norm": 0.8758114470542387, "learning_rate": 7.4612377019713936e-06, "loss": 0.2612, "step": 10378 }, { "epoch": 0.3561770761839396, "grad_norm": 0.7043911938840006, "learning_rate": 7.4607539408206775e-06, "loss": 0.2661, "step": 10379 }, { "epoch": 0.3562113932738504, "grad_norm": 0.6617553338074804, "learning_rate": 7.460270149270215e-06, "loss": 0.2415, "step": 10380 }, { "epoch": 0.3562457103637611, "grad_norm": 0.7586316348405788, "learning_rate": 7.459786327325983e-06, "loss": 0.2928, "step": 10381 }, { "epoch": 0.35628002745367193, "grad_norm": 0.7400888284089956, "learning_rate": 7.4593024749939595e-06, "loss": 0.3456, "step": 10382 }, { "epoch": 0.3563143445435827, "grad_norm": 0.7971804830489068, "learning_rate": 7.458818592280121e-06, "loss": 0.3652, "step": 10383 }, { "epoch": 0.3563486616334935, "grad_norm": 0.7382670660364145, "learning_rate": 7.458334679190443e-06, "loss": 0.3001, "step": 10384 }, { "epoch": 0.35638297872340424, "grad_norm": 0.6892035614864235, "learning_rate": 7.45785073573091e-06, "loss": 0.2623, "step": 10385 }, { "epoch": 0.35641729581331505, "grad_norm": 0.7203574498041518, "learning_rate": 7.457366761907494e-06, "loss": 0.3115, "step": 10386 }, { "epoch": 0.3564516129032258, "grad_norm": 0.9120710169157873, "learning_rate": 7.456882757726178e-06, "loss": 0.3038, "step": 10387 }, { "epoch": 0.3564859299931366, "grad_norm": 0.7771024283068589, "learning_rate": 7.4563987231929406e-06, "loss": 0.329, "step": 10388 }, { "epoch": 0.35652024708304736, "grad_norm": 0.8464934701211431, "learning_rate": 7.45591465831376e-06, "loss": 0.3362, "step": 10389 }, { "epoch": 0.3565545641729581, "grad_norm": 1.0395725162863878, "learning_rate": 7.455430563094617e-06, "loss": 0.2941, "step": 10390 }, { "epoch": 0.3565888812628689, "grad_norm": 0.7560601685522643, "learning_rate": 7.454946437541491e-06, "loss": 0.2596, "step": 10391 }, { "epoch": 0.35662319835277967, "grad_norm": 0.7492517586276393, "learning_rate": 7.454462281660367e-06, "loss": 0.2667, "step": 10392 }, { "epoch": 0.3566575154426905, "grad_norm": 0.7436886378097511, "learning_rate": 7.453978095457219e-06, "loss": 0.2753, "step": 10393 }, { "epoch": 0.3566918325326012, "grad_norm": 0.8714385303088713, "learning_rate": 7.453493878938036e-06, "loss": 0.3627, "step": 10394 }, { "epoch": 0.35672614962251203, "grad_norm": 0.8222712135286797, "learning_rate": 7.453009632108796e-06, "loss": 0.3562, "step": 10395 }, { "epoch": 0.3567604667124228, "grad_norm": 0.7619189835312518, "learning_rate": 7.452525354975482e-06, "loss": 0.3005, "step": 10396 }, { "epoch": 0.35679478380233354, "grad_norm": 0.847803716950941, "learning_rate": 7.452041047544077e-06, "loss": 0.3347, "step": 10397 }, { "epoch": 0.35682910089224434, "grad_norm": 0.738200014257267, "learning_rate": 7.451556709820561e-06, "loss": 0.2881, "step": 10398 }, { "epoch": 0.3568634179821551, "grad_norm": 0.9879097411141377, "learning_rate": 7.451072341810922e-06, "loss": 0.3045, "step": 10399 }, { "epoch": 0.3568977350720659, "grad_norm": 0.7744566204828494, "learning_rate": 7.4505879435211415e-06, "loss": 0.3216, "step": 10400 }, { "epoch": 0.35693205216197665, "grad_norm": 0.756060728622476, "learning_rate": 7.4501035149572055e-06, "loss": 0.2779, "step": 10401 }, { "epoch": 0.35696636925188746, "grad_norm": 0.7641807821517838, "learning_rate": 7.449619056125096e-06, "loss": 0.2738, "step": 10402 }, { "epoch": 0.3570006863417982, "grad_norm": 0.778912004335151, "learning_rate": 7.449134567030798e-06, "loss": 0.3825, "step": 10403 }, { "epoch": 0.35703500343170896, "grad_norm": 0.754051964232653, "learning_rate": 7.4486500476803005e-06, "loss": 0.3027, "step": 10404 }, { "epoch": 0.35706932052161977, "grad_norm": 0.9381292855870925, "learning_rate": 7.448165498079585e-06, "loss": 0.289, "step": 10405 }, { "epoch": 0.3571036376115305, "grad_norm": 0.6926818841753103, "learning_rate": 7.447680918234638e-06, "loss": 0.3041, "step": 10406 }, { "epoch": 0.35713795470144133, "grad_norm": 0.8037667731261147, "learning_rate": 7.447196308151447e-06, "loss": 0.3, "step": 10407 }, { "epoch": 0.3571722717913521, "grad_norm": 0.7722453017234037, "learning_rate": 7.446711667836001e-06, "loss": 0.3417, "step": 10408 }, { "epoch": 0.3572065888812629, "grad_norm": 0.6934208314513196, "learning_rate": 7.446226997294283e-06, "loss": 0.3062, "step": 10409 }, { "epoch": 0.35724090597117364, "grad_norm": 0.7806192459235484, "learning_rate": 7.445742296532284e-06, "loss": 0.314, "step": 10410 }, { "epoch": 0.35727522306108445, "grad_norm": 0.7945155661546405, "learning_rate": 7.445257565555989e-06, "loss": 0.3692, "step": 10411 }, { "epoch": 0.3573095401509952, "grad_norm": 0.7087893551210097, "learning_rate": 7.444772804371389e-06, "loss": 0.2721, "step": 10412 }, { "epoch": 0.35734385724090595, "grad_norm": 0.8618594347317039, "learning_rate": 7.444288012984469e-06, "loss": 0.2796, "step": 10413 }, { "epoch": 0.35737817433081676, "grad_norm": 0.7618339664505419, "learning_rate": 7.443803191401222e-06, "loss": 0.2986, "step": 10414 }, { "epoch": 0.3574124914207275, "grad_norm": 0.7377517917021782, "learning_rate": 7.443318339627635e-06, "loss": 0.3051, "step": 10415 }, { "epoch": 0.3574468085106383, "grad_norm": 0.77618411627877, "learning_rate": 7.442833457669699e-06, "loss": 0.2841, "step": 10416 }, { "epoch": 0.35748112560054907, "grad_norm": 0.8622129574875264, "learning_rate": 7.4423485455334056e-06, "loss": 0.3265, "step": 10417 }, { "epoch": 0.3575154426904599, "grad_norm": 0.8098768973108647, "learning_rate": 7.441863603224742e-06, "loss": 0.2869, "step": 10418 }, { "epoch": 0.3575497597803706, "grad_norm": 0.832047869174605, "learning_rate": 7.4413786307497e-06, "loss": 0.3481, "step": 10419 }, { "epoch": 0.3575840768702814, "grad_norm": 0.7330152669479558, "learning_rate": 7.440893628114271e-06, "loss": 0.2612, "step": 10420 }, { "epoch": 0.3576183939601922, "grad_norm": 0.8685804956250828, "learning_rate": 7.4404085953244465e-06, "loss": 0.3176, "step": 10421 }, { "epoch": 0.35765271105010293, "grad_norm": 0.7955100999820712, "learning_rate": 7.439923532386221e-06, "loss": 0.3389, "step": 10422 }, { "epoch": 0.35768702814001374, "grad_norm": 0.7947524782930319, "learning_rate": 7.439438439305584e-06, "loss": 0.3075, "step": 10423 }, { "epoch": 0.3577213452299245, "grad_norm": 0.7763671641710874, "learning_rate": 7.438953316088529e-06, "loss": 0.2986, "step": 10424 }, { "epoch": 0.3577556623198353, "grad_norm": 0.7033844909417697, "learning_rate": 7.438468162741051e-06, "loss": 0.2832, "step": 10425 }, { "epoch": 0.35778997940974605, "grad_norm": 0.8370639635234572, "learning_rate": 7.43798297926914e-06, "loss": 0.3666, "step": 10426 }, { "epoch": 0.3578242964996568, "grad_norm": 0.9148554539512354, "learning_rate": 7.437497765678792e-06, "loss": 0.3651, "step": 10427 }, { "epoch": 0.3578586135895676, "grad_norm": 0.8892330730974501, "learning_rate": 7.437012521976001e-06, "loss": 0.2675, "step": 10428 }, { "epoch": 0.35789293067947836, "grad_norm": 0.8386648530203968, "learning_rate": 7.436527248166761e-06, "loss": 0.2703, "step": 10429 }, { "epoch": 0.35792724776938917, "grad_norm": 0.7462389428536486, "learning_rate": 7.436041944257069e-06, "loss": 0.3408, "step": 10430 }, { "epoch": 0.3579615648592999, "grad_norm": 0.8138252578676427, "learning_rate": 7.435556610252918e-06, "loss": 0.2915, "step": 10431 }, { "epoch": 0.3579958819492107, "grad_norm": 0.804897868570736, "learning_rate": 7.435071246160304e-06, "loss": 0.3087, "step": 10432 }, { "epoch": 0.3580301990391215, "grad_norm": 0.8730717247015143, "learning_rate": 7.434585851985225e-06, "loss": 0.3173, "step": 10433 }, { "epoch": 0.3580645161290323, "grad_norm": 0.7503568174244639, "learning_rate": 7.434100427733676e-06, "loss": 0.3317, "step": 10434 }, { "epoch": 0.35809883321894304, "grad_norm": 0.7817520844251393, "learning_rate": 7.433614973411653e-06, "loss": 0.3741, "step": 10435 }, { "epoch": 0.3581331503088538, "grad_norm": 0.8204512644553342, "learning_rate": 7.433129489025156e-06, "loss": 0.3675, "step": 10436 }, { "epoch": 0.3581674673987646, "grad_norm": 0.7703095839796958, "learning_rate": 7.4326439745801806e-06, "loss": 0.3038, "step": 10437 }, { "epoch": 0.35820178448867535, "grad_norm": 0.7348952356491963, "learning_rate": 7.4321584300827246e-06, "loss": 0.3425, "step": 10438 }, { "epoch": 0.35823610157858615, "grad_norm": 0.7945025603428807, "learning_rate": 7.4316728555387865e-06, "loss": 0.3023, "step": 10439 }, { "epoch": 0.3582704186684969, "grad_norm": 0.7090768793030575, "learning_rate": 7.431187250954366e-06, "loss": 0.3139, "step": 10440 }, { "epoch": 0.3583047357584077, "grad_norm": 0.7072105672125983, "learning_rate": 7.43070161633546e-06, "loss": 0.286, "step": 10441 }, { "epoch": 0.35833905284831846, "grad_norm": 0.7939272837308183, "learning_rate": 7.4302159516880715e-06, "loss": 0.2997, "step": 10442 }, { "epoch": 0.3583733699382292, "grad_norm": 0.8487560153215605, "learning_rate": 7.429730257018197e-06, "loss": 0.2631, "step": 10443 }, { "epoch": 0.35840768702814, "grad_norm": 0.9079648293834401, "learning_rate": 7.429244532331839e-06, "loss": 0.3284, "step": 10444 }, { "epoch": 0.35844200411805077, "grad_norm": 0.8523733798732157, "learning_rate": 7.428758777634996e-06, "loss": 0.3273, "step": 10445 }, { "epoch": 0.3584763212079616, "grad_norm": 0.8077827268954644, "learning_rate": 7.428272992933672e-06, "loss": 0.3085, "step": 10446 }, { "epoch": 0.35851063829787233, "grad_norm": 0.7102008457930075, "learning_rate": 7.4277871782338625e-06, "loss": 0.3137, "step": 10447 }, { "epoch": 0.35854495538778314, "grad_norm": 0.7429336012436312, "learning_rate": 7.427301333541576e-06, "loss": 0.3403, "step": 10448 }, { "epoch": 0.3585792724776939, "grad_norm": 0.866388042754984, "learning_rate": 7.426815458862812e-06, "loss": 0.2907, "step": 10449 }, { "epoch": 0.35861358956760464, "grad_norm": 0.8527069330657867, "learning_rate": 7.426329554203569e-06, "loss": 0.3038, "step": 10450 }, { "epoch": 0.35864790665751545, "grad_norm": 0.8500344178404676, "learning_rate": 7.425843619569855e-06, "loss": 0.3169, "step": 10451 }, { "epoch": 0.3586822237474262, "grad_norm": 0.7852712832272661, "learning_rate": 7.425357654967671e-06, "loss": 0.2678, "step": 10452 }, { "epoch": 0.358716540837337, "grad_norm": 0.7292757611819977, "learning_rate": 7.4248716604030225e-06, "loss": 0.314, "step": 10453 }, { "epoch": 0.35875085792724776, "grad_norm": 0.7066108471610127, "learning_rate": 7.42438563588191e-06, "loss": 0.2739, "step": 10454 }, { "epoch": 0.35878517501715856, "grad_norm": 0.7512796998202885, "learning_rate": 7.423899581410341e-06, "loss": 0.3432, "step": 10455 }, { "epoch": 0.3588194921070693, "grad_norm": 0.7370540337329141, "learning_rate": 7.423413496994318e-06, "loss": 0.3325, "step": 10456 }, { "epoch": 0.3588538091969801, "grad_norm": 0.8526885937082033, "learning_rate": 7.422927382639846e-06, "loss": 0.3484, "step": 10457 }, { "epoch": 0.3588881262868909, "grad_norm": 0.827270433533031, "learning_rate": 7.422441238352932e-06, "loss": 0.3165, "step": 10458 }, { "epoch": 0.3589224433768016, "grad_norm": 0.8854010676147052, "learning_rate": 7.42195506413958e-06, "loss": 0.2755, "step": 10459 }, { "epoch": 0.35895676046671243, "grad_norm": 0.7365147153534816, "learning_rate": 7.421468860005797e-06, "loss": 0.2747, "step": 10460 }, { "epoch": 0.3589910775566232, "grad_norm": 0.9060180075004063, "learning_rate": 7.420982625957589e-06, "loss": 0.3074, "step": 10461 }, { "epoch": 0.359025394646534, "grad_norm": 0.7925069958891987, "learning_rate": 7.420496362000965e-06, "loss": 0.3103, "step": 10462 }, { "epoch": 0.35905971173644474, "grad_norm": 0.7267140206225648, "learning_rate": 7.4200100681419295e-06, "loss": 0.2935, "step": 10463 }, { "epoch": 0.35909402882635555, "grad_norm": 0.7693186008425876, "learning_rate": 7.4195237443864906e-06, "loss": 0.2701, "step": 10464 }, { "epoch": 0.3591283459162663, "grad_norm": 0.747157843894199, "learning_rate": 7.4190373907406565e-06, "loss": 0.2661, "step": 10465 }, { "epoch": 0.35916266300617705, "grad_norm": 0.7248176260258615, "learning_rate": 7.418551007210436e-06, "loss": 0.2992, "step": 10466 }, { "epoch": 0.35919698009608786, "grad_norm": 0.7614019735007518, "learning_rate": 7.418064593801838e-06, "loss": 0.2746, "step": 10467 }, { "epoch": 0.3592312971859986, "grad_norm": 0.7620467498250774, "learning_rate": 7.417578150520871e-06, "loss": 0.3337, "step": 10468 }, { "epoch": 0.3592656142759094, "grad_norm": 0.8293865955684749, "learning_rate": 7.417091677373546e-06, "loss": 0.3089, "step": 10469 }, { "epoch": 0.35929993136582017, "grad_norm": 0.7637901878125224, "learning_rate": 7.41660517436587e-06, "loss": 0.3012, "step": 10470 }, { "epoch": 0.359334248455731, "grad_norm": 0.7370879021464081, "learning_rate": 7.4161186415038535e-06, "loss": 0.3083, "step": 10471 }, { "epoch": 0.3593685655456417, "grad_norm": 0.7891217002092458, "learning_rate": 7.41563207879351e-06, "loss": 0.3254, "step": 10472 }, { "epoch": 0.3594028826355525, "grad_norm": 0.7649261775705275, "learning_rate": 7.415145486240847e-06, "loss": 0.2806, "step": 10473 }, { "epoch": 0.3594371997254633, "grad_norm": 0.7741445315727665, "learning_rate": 7.414658863851877e-06, "loss": 0.3142, "step": 10474 }, { "epoch": 0.35947151681537404, "grad_norm": 0.6958927592605669, "learning_rate": 7.4141722116326135e-06, "loss": 0.302, "step": 10475 }, { "epoch": 0.35950583390528484, "grad_norm": 0.8976509481365162, "learning_rate": 7.413685529589067e-06, "loss": 0.2723, "step": 10476 }, { "epoch": 0.3595401509951956, "grad_norm": 0.7235782852589681, "learning_rate": 7.413198817727249e-06, "loss": 0.3138, "step": 10477 }, { "epoch": 0.3595744680851064, "grad_norm": 0.7319733684460925, "learning_rate": 7.412712076053174e-06, "loss": 0.2957, "step": 10478 }, { "epoch": 0.35960878517501715, "grad_norm": 0.7304250304420988, "learning_rate": 7.412225304572853e-06, "loss": 0.3392, "step": 10479 }, { "epoch": 0.35964310226492796, "grad_norm": 0.775509110385496, "learning_rate": 7.411738503292302e-06, "loss": 0.3035, "step": 10480 }, { "epoch": 0.3596774193548387, "grad_norm": 0.7507662535012902, "learning_rate": 7.411251672217532e-06, "loss": 0.2944, "step": 10481 }, { "epoch": 0.35971173644474946, "grad_norm": 0.793454454845565, "learning_rate": 7.410764811354559e-06, "loss": 0.3085, "step": 10482 }, { "epoch": 0.35974605353466027, "grad_norm": 0.671618531533649, "learning_rate": 7.410277920709399e-06, "loss": 0.2497, "step": 10483 }, { "epoch": 0.359780370624571, "grad_norm": 0.7840713882008303, "learning_rate": 7.409791000288065e-06, "loss": 0.3124, "step": 10484 }, { "epoch": 0.35981468771448183, "grad_norm": 0.7723290619661061, "learning_rate": 7.409304050096573e-06, "loss": 0.3201, "step": 10485 }, { "epoch": 0.3598490048043926, "grad_norm": 0.7041506010550116, "learning_rate": 7.4088170701409365e-06, "loss": 0.2839, "step": 10486 }, { "epoch": 0.3598833218943034, "grad_norm": 0.7091356388691096, "learning_rate": 7.408330060427176e-06, "loss": 0.2882, "step": 10487 }, { "epoch": 0.35991763898421414, "grad_norm": 0.7823573017809805, "learning_rate": 7.407843020961303e-06, "loss": 0.2863, "step": 10488 }, { "epoch": 0.3599519560741249, "grad_norm": 0.7481270159272273, "learning_rate": 7.4073559517493374e-06, "loss": 0.2907, "step": 10489 }, { "epoch": 0.3599862731640357, "grad_norm": 0.740181978517329, "learning_rate": 7.406868852797298e-06, "loss": 0.2978, "step": 10490 }, { "epoch": 0.36002059025394645, "grad_norm": 0.7533946312489996, "learning_rate": 7.406381724111198e-06, "loss": 0.2774, "step": 10491 }, { "epoch": 0.36005490734385726, "grad_norm": 0.8264002779205538, "learning_rate": 7.405894565697058e-06, "loss": 0.3025, "step": 10492 }, { "epoch": 0.360089224433768, "grad_norm": 0.7376233721745876, "learning_rate": 7.4054073775608945e-06, "loss": 0.3131, "step": 10493 }, { "epoch": 0.3601235415236788, "grad_norm": 0.6755427447882366, "learning_rate": 7.404920159708729e-06, "loss": 0.313, "step": 10494 }, { "epoch": 0.36015785861358957, "grad_norm": 0.7063668935150678, "learning_rate": 7.404432912146577e-06, "loss": 0.3288, "step": 10495 }, { "epoch": 0.3601921757035003, "grad_norm": 0.7790004074715117, "learning_rate": 7.403945634880461e-06, "loss": 0.3321, "step": 10496 }, { "epoch": 0.3602264927934111, "grad_norm": 0.7562598521899464, "learning_rate": 7.4034583279163995e-06, "loss": 0.3448, "step": 10497 }, { "epoch": 0.3602608098833219, "grad_norm": 0.7566898922374918, "learning_rate": 7.402970991260412e-06, "loss": 0.299, "step": 10498 }, { "epoch": 0.3602951269732327, "grad_norm": 0.758859330165031, "learning_rate": 7.40248362491852e-06, "loss": 0.279, "step": 10499 }, { "epoch": 0.36032944406314343, "grad_norm": 0.771576889179722, "learning_rate": 7.401996228896743e-06, "loss": 0.2582, "step": 10500 }, { "epoch": 0.36036376115305424, "grad_norm": 0.8564239102834115, "learning_rate": 7.401508803201104e-06, "loss": 0.3071, "step": 10501 }, { "epoch": 0.360398078242965, "grad_norm": 0.8117236511994346, "learning_rate": 7.401021347837623e-06, "loss": 0.2589, "step": 10502 }, { "epoch": 0.3604323953328758, "grad_norm": 0.8106908304264547, "learning_rate": 7.4005338628123215e-06, "loss": 0.2939, "step": 10503 }, { "epoch": 0.36046671242278655, "grad_norm": 0.7028503581500516, "learning_rate": 7.400046348131224e-06, "loss": 0.2711, "step": 10504 }, { "epoch": 0.3605010295126973, "grad_norm": 0.8429741962158931, "learning_rate": 7.399558803800353e-06, "loss": 0.3162, "step": 10505 }, { "epoch": 0.3605353466026081, "grad_norm": 0.7188623397173928, "learning_rate": 7.39907122982573e-06, "loss": 0.256, "step": 10506 }, { "epoch": 0.36056966369251886, "grad_norm": 0.7214174072279654, "learning_rate": 7.398583626213378e-06, "loss": 0.3257, "step": 10507 }, { "epoch": 0.36060398078242967, "grad_norm": 0.7570574757634114, "learning_rate": 7.398095992969323e-06, "loss": 0.304, "step": 10508 }, { "epoch": 0.3606382978723404, "grad_norm": 0.6885855739192501, "learning_rate": 7.397608330099587e-06, "loss": 0.2896, "step": 10509 }, { "epoch": 0.3606726149622512, "grad_norm": 0.6900959311141934, "learning_rate": 7.397120637610196e-06, "loss": 0.2841, "step": 10510 }, { "epoch": 0.360706932052162, "grad_norm": 0.7258339771350363, "learning_rate": 7.396632915507173e-06, "loss": 0.2989, "step": 10511 }, { "epoch": 0.36074124914207273, "grad_norm": 0.7162390545865794, "learning_rate": 7.396145163796545e-06, "loss": 0.2805, "step": 10512 }, { "epoch": 0.36077556623198354, "grad_norm": 0.7318413543714934, "learning_rate": 7.395657382484339e-06, "loss": 0.2726, "step": 10513 }, { "epoch": 0.3608098833218943, "grad_norm": 0.7334597961230157, "learning_rate": 7.3951695715765785e-06, "loss": 0.3014, "step": 10514 }, { "epoch": 0.3608442004118051, "grad_norm": 0.7334841697136313, "learning_rate": 7.3946817310792895e-06, "loss": 0.3093, "step": 10515 }, { "epoch": 0.36087851750171585, "grad_norm": 0.69135145308669, "learning_rate": 7.3941938609985e-06, "loss": 0.2559, "step": 10516 }, { "epoch": 0.36091283459162665, "grad_norm": 0.69057278830458, "learning_rate": 7.393705961340236e-06, "loss": 0.2741, "step": 10517 }, { "epoch": 0.3609471516815374, "grad_norm": 0.787184936855348, "learning_rate": 7.393218032110526e-06, "loss": 0.284, "step": 10518 }, { "epoch": 0.36098146877144816, "grad_norm": 1.4368329770675488, "learning_rate": 7.3927300733153975e-06, "loss": 0.3214, "step": 10519 }, { "epoch": 0.36101578586135896, "grad_norm": 0.6442647995408559, "learning_rate": 7.392242084960879e-06, "loss": 0.3025, "step": 10520 }, { "epoch": 0.3610501029512697, "grad_norm": 0.7268481009515533, "learning_rate": 7.3917540670529985e-06, "loss": 0.3272, "step": 10521 }, { "epoch": 0.3610844200411805, "grad_norm": 0.7147230042642052, "learning_rate": 7.3912660195977846e-06, "loss": 0.3192, "step": 10522 }, { "epoch": 0.36111873713109127, "grad_norm": 0.7166977336776751, "learning_rate": 7.390777942601268e-06, "loss": 0.2984, "step": 10523 }, { "epoch": 0.3611530542210021, "grad_norm": 0.7840632944648361, "learning_rate": 7.390289836069477e-06, "loss": 0.2935, "step": 10524 }, { "epoch": 0.36118737131091283, "grad_norm": 0.80188806413937, "learning_rate": 7.38980170000844e-06, "loss": 0.3912, "step": 10525 }, { "epoch": 0.36122168840082364, "grad_norm": 0.762866913126133, "learning_rate": 7.3893135344241914e-06, "loss": 0.3326, "step": 10526 }, { "epoch": 0.3612560054907344, "grad_norm": 0.7296741295858563, "learning_rate": 7.388825339322759e-06, "loss": 0.282, "step": 10527 }, { "epoch": 0.36129032258064514, "grad_norm": 0.8096539436157327, "learning_rate": 7.388337114710174e-06, "loss": 0.294, "step": 10528 }, { "epoch": 0.36132463967055595, "grad_norm": 0.7594596164526146, "learning_rate": 7.38784886059247e-06, "loss": 0.2577, "step": 10529 }, { "epoch": 0.3613589567604667, "grad_norm": 0.8281198785354819, "learning_rate": 7.387360576975678e-06, "loss": 0.2902, "step": 10530 }, { "epoch": 0.3613932738503775, "grad_norm": 0.6670589698221717, "learning_rate": 7.386872263865827e-06, "loss": 0.2796, "step": 10531 }, { "epoch": 0.36142759094028826, "grad_norm": 0.7653706421627754, "learning_rate": 7.386383921268952e-06, "loss": 0.3356, "step": 10532 }, { "epoch": 0.36146190803019906, "grad_norm": 0.7810465250742431, "learning_rate": 7.385895549191088e-06, "loss": 0.3513, "step": 10533 }, { "epoch": 0.3614962251201098, "grad_norm": 0.8041428315961107, "learning_rate": 7.385407147638264e-06, "loss": 0.3187, "step": 10534 }, { "epoch": 0.36153054221002057, "grad_norm": 0.7034557418646925, "learning_rate": 7.384918716616517e-06, "loss": 0.2774, "step": 10535 }, { "epoch": 0.3615648592999314, "grad_norm": 0.8988701643407242, "learning_rate": 7.38443025613188e-06, "loss": 0.3403, "step": 10536 }, { "epoch": 0.3615991763898421, "grad_norm": 0.8623447423254443, "learning_rate": 7.3839417661903866e-06, "loss": 0.2645, "step": 10537 }, { "epoch": 0.36163349347975293, "grad_norm": 0.8293251842363973, "learning_rate": 7.3834532467980725e-06, "loss": 0.3171, "step": 10538 }, { "epoch": 0.3616678105696637, "grad_norm": 0.8015275338408803, "learning_rate": 7.382964697960971e-06, "loss": 0.3374, "step": 10539 }, { "epoch": 0.3617021276595745, "grad_norm": 0.8088016389665561, "learning_rate": 7.38247611968512e-06, "loss": 0.3242, "step": 10540 }, { "epoch": 0.36173644474948524, "grad_norm": 1.00909109632255, "learning_rate": 7.381987511976555e-06, "loss": 0.2832, "step": 10541 }, { "epoch": 0.361770761839396, "grad_norm": 0.7293602046979872, "learning_rate": 7.381498874841311e-06, "loss": 0.2704, "step": 10542 }, { "epoch": 0.3618050789293068, "grad_norm": 0.707224539206648, "learning_rate": 7.3810102082854254e-06, "loss": 0.2872, "step": 10543 }, { "epoch": 0.36183939601921755, "grad_norm": 0.8661654362699183, "learning_rate": 7.380521512314933e-06, "loss": 0.2897, "step": 10544 }, { "epoch": 0.36187371310912836, "grad_norm": 0.7052479976803049, "learning_rate": 7.380032786935875e-06, "loss": 0.2626, "step": 10545 }, { "epoch": 0.3619080301990391, "grad_norm": 0.8018147881374373, "learning_rate": 7.379544032154285e-06, "loss": 0.3159, "step": 10546 }, { "epoch": 0.3619423472889499, "grad_norm": 0.7758371549282969, "learning_rate": 7.379055247976203e-06, "loss": 0.3487, "step": 10547 }, { "epoch": 0.36197666437886067, "grad_norm": 0.8412350992992975, "learning_rate": 7.378566434407666e-06, "loss": 0.3124, "step": 10548 }, { "epoch": 0.3620109814687715, "grad_norm": 0.7568454095299345, "learning_rate": 7.378077591454716e-06, "loss": 0.3175, "step": 10549 }, { "epoch": 0.3620452985586822, "grad_norm": 0.724610728124938, "learning_rate": 7.3775887191233896e-06, "loss": 0.2817, "step": 10550 }, { "epoch": 0.362079615648593, "grad_norm": 0.75783467496917, "learning_rate": 7.377099817419726e-06, "loss": 0.271, "step": 10551 }, { "epoch": 0.3621139327385038, "grad_norm": 0.77523579254249, "learning_rate": 7.376610886349767e-06, "loss": 0.2565, "step": 10552 }, { "epoch": 0.36214824982841454, "grad_norm": 0.8101921136554292, "learning_rate": 7.3761219259195485e-06, "loss": 0.2985, "step": 10553 }, { "epoch": 0.36218256691832534, "grad_norm": 0.7374333531241964, "learning_rate": 7.3756329361351155e-06, "loss": 0.2957, "step": 10554 }, { "epoch": 0.3622168840082361, "grad_norm": 0.9926887537830219, "learning_rate": 7.375143917002507e-06, "loss": 0.267, "step": 10555 }, { "epoch": 0.3622512010981469, "grad_norm": 0.8342006555907767, "learning_rate": 7.374654868527764e-06, "loss": 0.2767, "step": 10556 }, { "epoch": 0.36228551818805765, "grad_norm": 0.7497916589186889, "learning_rate": 7.37416579071693e-06, "loss": 0.2917, "step": 10557 }, { "epoch": 0.3623198352779684, "grad_norm": 0.755315694815207, "learning_rate": 7.3736766835760455e-06, "loss": 0.2938, "step": 10558 }, { "epoch": 0.3623541523678792, "grad_norm": 0.7456001457145467, "learning_rate": 7.373187547111153e-06, "loss": 0.3244, "step": 10559 }, { "epoch": 0.36238846945778996, "grad_norm": 0.751875390331615, "learning_rate": 7.372698381328295e-06, "loss": 0.2877, "step": 10560 }, { "epoch": 0.36242278654770077, "grad_norm": 0.8191804920068122, "learning_rate": 7.372209186233515e-06, "loss": 0.3103, "step": 10561 }, { "epoch": 0.3624571036376115, "grad_norm": 0.7199514289226514, "learning_rate": 7.371719961832855e-06, "loss": 0.309, "step": 10562 }, { "epoch": 0.36249142072752233, "grad_norm": 0.7944416312600037, "learning_rate": 7.371230708132361e-06, "loss": 0.3636, "step": 10563 }, { "epoch": 0.3625257378174331, "grad_norm": 0.8537597186246266, "learning_rate": 7.370741425138075e-06, "loss": 0.3113, "step": 10564 }, { "epoch": 0.36256005490734383, "grad_norm": 0.8043382490861727, "learning_rate": 7.370252112856045e-06, "loss": 0.3973, "step": 10565 }, { "epoch": 0.36259437199725464, "grad_norm": 0.7169213291891033, "learning_rate": 7.369762771292313e-06, "loss": 0.2477, "step": 10566 }, { "epoch": 0.3626286890871654, "grad_norm": 0.746157826156153, "learning_rate": 7.369273400452923e-06, "loss": 0.2814, "step": 10567 }, { "epoch": 0.3626630061770762, "grad_norm": 0.8026966269757372, "learning_rate": 7.368784000343925e-06, "loss": 0.2738, "step": 10568 }, { "epoch": 0.36269732326698695, "grad_norm": 0.7118819334309998, "learning_rate": 7.36829457097136e-06, "loss": 0.2958, "step": 10569 }, { "epoch": 0.36273164035689776, "grad_norm": 0.7126071272483598, "learning_rate": 7.367805112341277e-06, "loss": 0.27, "step": 10570 }, { "epoch": 0.3627659574468085, "grad_norm": 0.8419412555899495, "learning_rate": 7.367315624459724e-06, "loss": 0.3367, "step": 10571 }, { "epoch": 0.3628002745367193, "grad_norm": 0.8145666241865253, "learning_rate": 7.366826107332746e-06, "loss": 0.357, "step": 10572 }, { "epoch": 0.36283459162663007, "grad_norm": 0.7431463860973221, "learning_rate": 7.366336560966391e-06, "loss": 0.3004, "step": 10573 }, { "epoch": 0.3628689087165408, "grad_norm": 0.7478858054970801, "learning_rate": 7.365846985366707e-06, "loss": 0.2881, "step": 10574 }, { "epoch": 0.3629032258064516, "grad_norm": 0.7426908135662357, "learning_rate": 7.36535738053974e-06, "loss": 0.3393, "step": 10575 }, { "epoch": 0.3629375428963624, "grad_norm": 0.7629599919437094, "learning_rate": 7.364867746491542e-06, "loss": 0.3193, "step": 10576 }, { "epoch": 0.3629718599862732, "grad_norm": 0.9190689051957792, "learning_rate": 7.364378083228159e-06, "loss": 0.2776, "step": 10577 }, { "epoch": 0.36300617707618393, "grad_norm": 0.8647241347998564, "learning_rate": 7.363888390755643e-06, "loss": 0.3062, "step": 10578 }, { "epoch": 0.36304049416609474, "grad_norm": 0.7314742341049519, "learning_rate": 7.363398669080039e-06, "loss": 0.2794, "step": 10579 }, { "epoch": 0.3630748112560055, "grad_norm": 0.8845264514508633, "learning_rate": 7.362908918207402e-06, "loss": 0.3784, "step": 10580 }, { "epoch": 0.36310912834591624, "grad_norm": 0.7771797976216556, "learning_rate": 7.362419138143782e-06, "loss": 0.3506, "step": 10581 }, { "epoch": 0.36314344543582705, "grad_norm": 0.7635054139033516, "learning_rate": 7.361929328895225e-06, "loss": 0.3064, "step": 10582 }, { "epoch": 0.3631777625257378, "grad_norm": 0.7353400434449151, "learning_rate": 7.361439490467785e-06, "loss": 0.2877, "step": 10583 }, { "epoch": 0.3632120796156486, "grad_norm": 0.7791430827908356, "learning_rate": 7.360949622867515e-06, "loss": 0.354, "step": 10584 }, { "epoch": 0.36324639670555936, "grad_norm": 0.8826025567603336, "learning_rate": 7.360459726100463e-06, "loss": 0.3272, "step": 10585 }, { "epoch": 0.36328071379547017, "grad_norm": 0.6830051664506946, "learning_rate": 7.359969800172684e-06, "loss": 0.2753, "step": 10586 }, { "epoch": 0.3633150308853809, "grad_norm": 0.8030347557328767, "learning_rate": 7.35947984509023e-06, "loss": 0.3409, "step": 10587 }, { "epoch": 0.36334934797529167, "grad_norm": 0.7821458614277021, "learning_rate": 7.358989860859153e-06, "loss": 0.2661, "step": 10588 }, { "epoch": 0.3633836650652025, "grad_norm": 0.8007769634069347, "learning_rate": 7.358499847485507e-06, "loss": 0.3465, "step": 10589 }, { "epoch": 0.36341798215511323, "grad_norm": 0.7100429129164999, "learning_rate": 7.358009804975346e-06, "loss": 0.2956, "step": 10590 }, { "epoch": 0.36345229924502404, "grad_norm": 0.7945610062403642, "learning_rate": 7.357519733334722e-06, "loss": 0.3555, "step": 10591 }, { "epoch": 0.3634866163349348, "grad_norm": 0.8325112799662636, "learning_rate": 7.357029632569688e-06, "loss": 0.2881, "step": 10592 }, { "epoch": 0.3635209334248456, "grad_norm": 0.7637871740115498, "learning_rate": 7.356539502686304e-06, "loss": 0.2993, "step": 10593 }, { "epoch": 0.36355525051475635, "grad_norm": 0.8231842043898271, "learning_rate": 7.356049343690621e-06, "loss": 0.3158, "step": 10594 }, { "epoch": 0.3635895676046671, "grad_norm": 0.647152123024163, "learning_rate": 7.355559155588695e-06, "loss": 0.2562, "step": 10595 }, { "epoch": 0.3636238846945779, "grad_norm": 0.7379609311163895, "learning_rate": 7.355068938386582e-06, "loss": 0.2678, "step": 10596 }, { "epoch": 0.36365820178448865, "grad_norm": 0.8325588202279443, "learning_rate": 7.3545786920903395e-06, "loss": 0.2877, "step": 10597 }, { "epoch": 0.36369251887439946, "grad_norm": 0.8210405081528394, "learning_rate": 7.35408841670602e-06, "loss": 0.2896, "step": 10598 }, { "epoch": 0.3637268359643102, "grad_norm": 0.7939753594409299, "learning_rate": 7.353598112239684e-06, "loss": 0.2984, "step": 10599 }, { "epoch": 0.363761153054221, "grad_norm": 0.8605923080563698, "learning_rate": 7.353107778697387e-06, "loss": 0.3208, "step": 10600 }, { "epoch": 0.36379547014413177, "grad_norm": 0.7644300182660778, "learning_rate": 7.352617416085187e-06, "loss": 0.3034, "step": 10601 }, { "epoch": 0.3638297872340426, "grad_norm": 0.7261161912636632, "learning_rate": 7.3521270244091416e-06, "loss": 0.2886, "step": 10602 }, { "epoch": 0.36386410432395333, "grad_norm": 0.7280083031029256, "learning_rate": 7.35163660367531e-06, "loss": 0.2818, "step": 10603 }, { "epoch": 0.3638984214138641, "grad_norm": 0.751179995907257, "learning_rate": 7.351146153889749e-06, "loss": 0.2652, "step": 10604 }, { "epoch": 0.3639327385037749, "grad_norm": 2.8917777369189137, "learning_rate": 7.350655675058518e-06, "loss": 0.2763, "step": 10605 }, { "epoch": 0.36396705559368564, "grad_norm": 0.8258603223805479, "learning_rate": 7.350165167187677e-06, "loss": 0.317, "step": 10606 }, { "epoch": 0.36400137268359645, "grad_norm": 0.7634986042438893, "learning_rate": 7.349674630283286e-06, "loss": 0.2948, "step": 10607 }, { "epoch": 0.3640356897735072, "grad_norm": 0.7426864434230227, "learning_rate": 7.349184064351405e-06, "loss": 0.3022, "step": 10608 }, { "epoch": 0.364070006863418, "grad_norm": 0.7571088105285086, "learning_rate": 7.3486934693980915e-06, "loss": 0.3055, "step": 10609 }, { "epoch": 0.36410432395332876, "grad_norm": 0.792263976136283, "learning_rate": 7.34820284542941e-06, "loss": 0.2684, "step": 10610 }, { "epoch": 0.3641386410432395, "grad_norm": 0.723759966812628, "learning_rate": 7.3477121924514204e-06, "loss": 0.2921, "step": 10611 }, { "epoch": 0.3641729581331503, "grad_norm": 0.8071199150112784, "learning_rate": 7.347221510470184e-06, "loss": 0.3264, "step": 10612 }, { "epoch": 0.36420727522306107, "grad_norm": 0.748586622586037, "learning_rate": 7.346730799491763e-06, "loss": 0.3446, "step": 10613 }, { "epoch": 0.3642415923129719, "grad_norm": 0.9363642993572444, "learning_rate": 7.346240059522216e-06, "loss": 0.2647, "step": 10614 }, { "epoch": 0.3642759094028826, "grad_norm": 0.8522636701155708, "learning_rate": 7.34574929056761e-06, "loss": 0.3372, "step": 10615 }, { "epoch": 0.36431022649279343, "grad_norm": 0.8068850608973107, "learning_rate": 7.345258492634008e-06, "loss": 0.2921, "step": 10616 }, { "epoch": 0.3643445435827042, "grad_norm": 0.6397792585003774, "learning_rate": 7.34476766572747e-06, "loss": 0.262, "step": 10617 }, { "epoch": 0.36437886067261493, "grad_norm": 0.9053316694332788, "learning_rate": 7.344276809854061e-06, "loss": 0.3622, "step": 10618 }, { "epoch": 0.36441317776252574, "grad_norm": 0.7068313160939287, "learning_rate": 7.343785925019846e-06, "loss": 0.3161, "step": 10619 }, { "epoch": 0.3644474948524365, "grad_norm": 0.8653612389629688, "learning_rate": 7.343295011230889e-06, "loss": 0.3125, "step": 10620 }, { "epoch": 0.3644818119423473, "grad_norm": 0.7938934227500596, "learning_rate": 7.342804068493254e-06, "loss": 0.3204, "step": 10621 }, { "epoch": 0.36451612903225805, "grad_norm": 0.952250279768953, "learning_rate": 7.342313096813005e-06, "loss": 0.3369, "step": 10622 }, { "epoch": 0.36455044612216886, "grad_norm": 0.725039948033955, "learning_rate": 7.341822096196209e-06, "loss": 0.3197, "step": 10623 }, { "epoch": 0.3645847632120796, "grad_norm": 0.77101968929766, "learning_rate": 7.341331066648932e-06, "loss": 0.2782, "step": 10624 }, { "epoch": 0.3646190803019904, "grad_norm": 0.7951984593117365, "learning_rate": 7.340840008177237e-06, "loss": 0.2864, "step": 10625 }, { "epoch": 0.36465339739190117, "grad_norm": 0.8431004797477528, "learning_rate": 7.340348920787196e-06, "loss": 0.2608, "step": 10626 }, { "epoch": 0.3646877144818119, "grad_norm": 0.7495238289320898, "learning_rate": 7.339857804484871e-06, "loss": 0.3139, "step": 10627 }, { "epoch": 0.3647220315717227, "grad_norm": 0.8156185064096252, "learning_rate": 7.339366659276332e-06, "loss": 0.2692, "step": 10628 }, { "epoch": 0.3647563486616335, "grad_norm": 0.8138156837520342, "learning_rate": 7.338875485167644e-06, "loss": 0.2747, "step": 10629 }, { "epoch": 0.3647906657515443, "grad_norm": 0.8274273062787113, "learning_rate": 7.338384282164877e-06, "loss": 0.3347, "step": 10630 }, { "epoch": 0.36482498284145504, "grad_norm": 0.7602615035188488, "learning_rate": 7.337893050274098e-06, "loss": 0.2958, "step": 10631 }, { "epoch": 0.36485929993136584, "grad_norm": 0.8248185470859701, "learning_rate": 7.337401789501377e-06, "loss": 0.2883, "step": 10632 }, { "epoch": 0.3648936170212766, "grad_norm": 0.9132457589287074, "learning_rate": 7.336910499852781e-06, "loss": 0.2657, "step": 10633 }, { "epoch": 0.36492793411118735, "grad_norm": 0.8592434523469306, "learning_rate": 7.33641918133438e-06, "loss": 0.2863, "step": 10634 }, { "epoch": 0.36496225120109815, "grad_norm": 0.8111531402450846, "learning_rate": 7.3359278339522454e-06, "loss": 0.2956, "step": 10635 }, { "epoch": 0.3649965682910089, "grad_norm": 0.8163938802612448, "learning_rate": 7.335436457712445e-06, "loss": 0.2553, "step": 10636 }, { "epoch": 0.3650308853809197, "grad_norm": 0.7953037655361694, "learning_rate": 7.33494505262105e-06, "loss": 0.3037, "step": 10637 }, { "epoch": 0.36506520247083046, "grad_norm": 0.8166010394490615, "learning_rate": 7.334453618684132e-06, "loss": 0.2838, "step": 10638 }, { "epoch": 0.36509951956074127, "grad_norm": 0.8816450018760389, "learning_rate": 7.33396215590776e-06, "loss": 0.2568, "step": 10639 }, { "epoch": 0.365133836650652, "grad_norm": 0.8083259312468687, "learning_rate": 7.333470664298008e-06, "loss": 0.2634, "step": 10640 }, { "epoch": 0.3651681537405628, "grad_norm": 0.9237105352583024, "learning_rate": 7.332979143860946e-06, "loss": 0.3868, "step": 10641 }, { "epoch": 0.3652024708304736, "grad_norm": 0.7669128480798759, "learning_rate": 7.332487594602646e-06, "loss": 0.2988, "step": 10642 }, { "epoch": 0.36523678792038433, "grad_norm": 0.73433433066773, "learning_rate": 7.3319960165291815e-06, "loss": 0.2868, "step": 10643 }, { "epoch": 0.36527110501029514, "grad_norm": 0.8137657835758493, "learning_rate": 7.331504409646626e-06, "loss": 0.3566, "step": 10644 }, { "epoch": 0.3653054221002059, "grad_norm": 0.8446476757794584, "learning_rate": 7.331012773961051e-06, "loss": 0.296, "step": 10645 }, { "epoch": 0.3653397391901167, "grad_norm": 0.7077840692290934, "learning_rate": 7.33052110947853e-06, "loss": 0.2684, "step": 10646 }, { "epoch": 0.36537405628002745, "grad_norm": 0.7255169810586859, "learning_rate": 7.330029416205139e-06, "loss": 0.2695, "step": 10647 }, { "epoch": 0.36540837336993826, "grad_norm": 0.8850860091478177, "learning_rate": 7.329537694146951e-06, "loss": 0.344, "step": 10648 }, { "epoch": 0.365442690459849, "grad_norm": 0.8726977723361983, "learning_rate": 7.329045943310041e-06, "loss": 0.3659, "step": 10649 }, { "epoch": 0.36547700754975976, "grad_norm": 0.8196733418155971, "learning_rate": 7.328554163700483e-06, "loss": 0.2859, "step": 10650 }, { "epoch": 0.36551132463967057, "grad_norm": 0.7325357850152571, "learning_rate": 7.328062355324354e-06, "loss": 0.3095, "step": 10651 }, { "epoch": 0.3655456417295813, "grad_norm": 0.7322470477683369, "learning_rate": 7.327570518187728e-06, "loss": 0.2898, "step": 10652 }, { "epoch": 0.3655799588194921, "grad_norm": 0.7741149778438671, "learning_rate": 7.3270786522966815e-06, "loss": 0.2758, "step": 10653 }, { "epoch": 0.3656142759094029, "grad_norm": 0.8863817409748836, "learning_rate": 7.326586757657294e-06, "loss": 0.2852, "step": 10654 }, { "epoch": 0.3656485929993137, "grad_norm": 0.8049269183064305, "learning_rate": 7.326094834275637e-06, "loss": 0.379, "step": 10655 }, { "epoch": 0.36568291008922443, "grad_norm": 0.7025819040984245, "learning_rate": 7.325602882157791e-06, "loss": 0.3036, "step": 10656 }, { "epoch": 0.3657172271791352, "grad_norm": 0.7164213723578108, "learning_rate": 7.325110901309833e-06, "loss": 0.3098, "step": 10657 }, { "epoch": 0.365751544269046, "grad_norm": 0.8224435264773703, "learning_rate": 7.32461889173784e-06, "loss": 0.2883, "step": 10658 }, { "epoch": 0.36578586135895674, "grad_norm": 0.7832830391603428, "learning_rate": 7.324126853447891e-06, "loss": 0.3305, "step": 10659 }, { "epoch": 0.36582017844886755, "grad_norm": 0.6525821923256862, "learning_rate": 7.323634786446066e-06, "loss": 0.2812, "step": 10660 }, { "epoch": 0.3658544955387783, "grad_norm": 0.829785944974378, "learning_rate": 7.323142690738441e-06, "loss": 0.2742, "step": 10661 }, { "epoch": 0.3658888126286891, "grad_norm": 0.8020051789572041, "learning_rate": 7.3226505663310965e-06, "loss": 0.3293, "step": 10662 }, { "epoch": 0.36592312971859986, "grad_norm": 0.7777901852690323, "learning_rate": 7.3221584132301115e-06, "loss": 0.355, "step": 10663 }, { "epoch": 0.3659574468085106, "grad_norm": 0.7699211799662506, "learning_rate": 7.321666231441569e-06, "loss": 0.266, "step": 10664 }, { "epoch": 0.3659917638984214, "grad_norm": 0.6139803466742972, "learning_rate": 7.321174020971546e-06, "loss": 0.2615, "step": 10665 }, { "epoch": 0.36602608098833217, "grad_norm": 0.7736195279645072, "learning_rate": 7.320681781826122e-06, "loss": 0.2989, "step": 10666 }, { "epoch": 0.366060398078243, "grad_norm": 0.7449311649900873, "learning_rate": 7.320189514011382e-06, "loss": 0.357, "step": 10667 }, { "epoch": 0.36609471516815373, "grad_norm": 0.744772171556788, "learning_rate": 7.319697217533405e-06, "loss": 0.2776, "step": 10668 }, { "epoch": 0.36612903225806454, "grad_norm": 0.7759035743862144, "learning_rate": 7.3192048923982754e-06, "loss": 0.3335, "step": 10669 }, { "epoch": 0.3661633493479753, "grad_norm": 0.8008556699923661, "learning_rate": 7.318712538612071e-06, "loss": 0.28, "step": 10670 }, { "epoch": 0.3661976664378861, "grad_norm": 0.7360945033348112, "learning_rate": 7.3182201561808775e-06, "loss": 0.3014, "step": 10671 }, { "epoch": 0.36623198352779684, "grad_norm": 0.7435911579053069, "learning_rate": 7.317727745110776e-06, "loss": 0.3319, "step": 10672 }, { "epoch": 0.3662663006177076, "grad_norm": 0.7443575770247134, "learning_rate": 7.317235305407853e-06, "loss": 0.2548, "step": 10673 }, { "epoch": 0.3663006177076184, "grad_norm": 0.840817130669947, "learning_rate": 7.316742837078185e-06, "loss": 0.3413, "step": 10674 }, { "epoch": 0.36633493479752915, "grad_norm": 0.7913874001163945, "learning_rate": 7.316250340127864e-06, "loss": 0.3315, "step": 10675 }, { "epoch": 0.36636925188743996, "grad_norm": 0.7809761328235006, "learning_rate": 7.315757814562969e-06, "loss": 0.2902, "step": 10676 }, { "epoch": 0.3664035689773507, "grad_norm": 0.688945640996381, "learning_rate": 7.3152652603895876e-06, "loss": 0.2995, "step": 10677 }, { "epoch": 0.3664378860672615, "grad_norm": 0.7852074844644017, "learning_rate": 7.314772677613802e-06, "loss": 0.2934, "step": 10678 }, { "epoch": 0.36647220315717227, "grad_norm": 0.8488083333162677, "learning_rate": 7.314280066241699e-06, "loss": 0.3213, "step": 10679 }, { "epoch": 0.366506520247083, "grad_norm": 0.7814199763766309, "learning_rate": 7.313787426279364e-06, "loss": 0.293, "step": 10680 }, { "epoch": 0.36654083733699383, "grad_norm": 0.8168151804778107, "learning_rate": 7.313294757732882e-06, "loss": 0.3703, "step": 10681 }, { "epoch": 0.3665751544269046, "grad_norm": 0.8438677517508043, "learning_rate": 7.312802060608341e-06, "loss": 0.3089, "step": 10682 }, { "epoch": 0.3666094715168154, "grad_norm": 0.9498844241709394, "learning_rate": 7.312309334911828e-06, "loss": 0.3236, "step": 10683 }, { "epoch": 0.36664378860672614, "grad_norm": 0.7633472285774391, "learning_rate": 7.311816580649428e-06, "loss": 0.268, "step": 10684 }, { "epoch": 0.36667810569663695, "grad_norm": 0.7178028252926737, "learning_rate": 7.311323797827229e-06, "loss": 0.3228, "step": 10685 }, { "epoch": 0.3667124227865477, "grad_norm": 0.7929984656857278, "learning_rate": 7.31083098645132e-06, "loss": 0.314, "step": 10686 }, { "epoch": 0.36674673987645845, "grad_norm": 0.7302613384113698, "learning_rate": 7.310338146527789e-06, "loss": 0.3074, "step": 10687 }, { "epoch": 0.36678105696636926, "grad_norm": 0.9043647449056192, "learning_rate": 7.309845278062723e-06, "loss": 0.2947, "step": 10688 }, { "epoch": 0.36681537405628, "grad_norm": 0.6964821965658199, "learning_rate": 7.309352381062209e-06, "loss": 0.3206, "step": 10689 }, { "epoch": 0.3668496911461908, "grad_norm": 0.7760570046560241, "learning_rate": 7.308859455532344e-06, "loss": 0.3366, "step": 10690 }, { "epoch": 0.36688400823610157, "grad_norm": 0.6689578191694862, "learning_rate": 7.308366501479209e-06, "loss": 0.3039, "step": 10691 }, { "epoch": 0.3669183253260124, "grad_norm": 0.7512697515923358, "learning_rate": 7.307873518908897e-06, "loss": 0.277, "step": 10692 }, { "epoch": 0.3669526424159231, "grad_norm": 0.7246096445763653, "learning_rate": 7.3073805078275e-06, "loss": 0.2923, "step": 10693 }, { "epoch": 0.36698695950583393, "grad_norm": 0.7219165877776722, "learning_rate": 7.306887468241106e-06, "loss": 0.3072, "step": 10694 }, { "epoch": 0.3670212765957447, "grad_norm": 0.8386872481217315, "learning_rate": 7.3063944001558075e-06, "loss": 0.3282, "step": 10695 }, { "epoch": 0.36705559368565543, "grad_norm": 0.7398583732412141, "learning_rate": 7.305901303577693e-06, "loss": 0.2827, "step": 10696 }, { "epoch": 0.36708991077556624, "grad_norm": 0.7426299281819321, "learning_rate": 7.305408178512858e-06, "loss": 0.2565, "step": 10697 }, { "epoch": 0.367124227865477, "grad_norm": 0.778917119068099, "learning_rate": 7.304915024967394e-06, "loss": 0.3574, "step": 10698 }, { "epoch": 0.3671585449553878, "grad_norm": 0.7516391457667906, "learning_rate": 7.304421842947391e-06, "loss": 0.3283, "step": 10699 }, { "epoch": 0.36719286204529855, "grad_norm": 0.7818278766007333, "learning_rate": 7.303928632458943e-06, "loss": 0.2962, "step": 10700 }, { "epoch": 0.36722717913520936, "grad_norm": 0.7347033611624171, "learning_rate": 7.303435393508143e-06, "loss": 0.2906, "step": 10701 }, { "epoch": 0.3672614962251201, "grad_norm": 0.7778359975786111, "learning_rate": 7.302942126101085e-06, "loss": 0.3113, "step": 10702 }, { "epoch": 0.36729581331503086, "grad_norm": 0.7839084958408575, "learning_rate": 7.302448830243859e-06, "loss": 0.3037, "step": 10703 }, { "epoch": 0.36733013040494167, "grad_norm": 0.754617710466532, "learning_rate": 7.301955505942564e-06, "loss": 0.3497, "step": 10704 }, { "epoch": 0.3673644474948524, "grad_norm": 0.80122500565963, "learning_rate": 7.301462153203293e-06, "loss": 0.3031, "step": 10705 }, { "epoch": 0.3673987645847632, "grad_norm": 0.858120509893985, "learning_rate": 7.30096877203214e-06, "loss": 0.3383, "step": 10706 }, { "epoch": 0.367433081674674, "grad_norm": 0.8574489917919831, "learning_rate": 7.3004753624352e-06, "loss": 0.3322, "step": 10707 }, { "epoch": 0.3674673987645848, "grad_norm": 0.639065029610627, "learning_rate": 7.29998192441857e-06, "loss": 0.2805, "step": 10708 }, { "epoch": 0.36750171585449554, "grad_norm": 0.8168977257524855, "learning_rate": 7.299488457988344e-06, "loss": 0.3329, "step": 10709 }, { "epoch": 0.3675360329444063, "grad_norm": 0.7276931700625626, "learning_rate": 7.29899496315062e-06, "loss": 0.302, "step": 10710 }, { "epoch": 0.3675703500343171, "grad_norm": 0.8154021459251845, "learning_rate": 7.298501439911492e-06, "loss": 0.317, "step": 10711 }, { "epoch": 0.36760466712422785, "grad_norm": 0.7733503240744541, "learning_rate": 7.29800788827706e-06, "loss": 0.2948, "step": 10712 }, { "epoch": 0.36763898421413865, "grad_norm": 0.819796708461867, "learning_rate": 7.297514308253419e-06, "loss": 0.2914, "step": 10713 }, { "epoch": 0.3676733013040494, "grad_norm": 0.847345904174936, "learning_rate": 7.2970206998466674e-06, "loss": 0.2861, "step": 10714 }, { "epoch": 0.3677076183939602, "grad_norm": 0.8713933084230567, "learning_rate": 7.296527063062904e-06, "loss": 0.3617, "step": 10715 }, { "epoch": 0.36774193548387096, "grad_norm": 0.8487881328200018, "learning_rate": 7.296033397908225e-06, "loss": 0.3354, "step": 10716 }, { "epoch": 0.36777625257378177, "grad_norm": 0.7434509260697046, "learning_rate": 7.295539704388731e-06, "loss": 0.2925, "step": 10717 }, { "epoch": 0.3678105696636925, "grad_norm": 0.7867365661028041, "learning_rate": 7.29504598251052e-06, "loss": 0.2901, "step": 10718 }, { "epoch": 0.3678448867536033, "grad_norm": 0.8035145715644322, "learning_rate": 7.294552232279691e-06, "loss": 0.263, "step": 10719 }, { "epoch": 0.3678792038435141, "grad_norm": 0.8008860290501495, "learning_rate": 7.2940584537023455e-06, "loss": 0.3271, "step": 10720 }, { "epoch": 0.36791352093342483, "grad_norm": 0.7087297435033391, "learning_rate": 7.293564646784581e-06, "loss": 0.2569, "step": 10721 }, { "epoch": 0.36794783802333564, "grad_norm": 0.8331215766018023, "learning_rate": 7.2930708115325014e-06, "loss": 0.2951, "step": 10722 }, { "epoch": 0.3679821551132464, "grad_norm": 0.7417695723870178, "learning_rate": 7.292576947952204e-06, "loss": 0.2763, "step": 10723 }, { "epoch": 0.3680164722031572, "grad_norm": 0.6955277289063305, "learning_rate": 7.2920830560497915e-06, "loss": 0.2684, "step": 10724 }, { "epoch": 0.36805078929306795, "grad_norm": 0.6912286202035675, "learning_rate": 7.291589135831365e-06, "loss": 0.334, "step": 10725 }, { "epoch": 0.3680851063829787, "grad_norm": 0.7573583875820599, "learning_rate": 7.291095187303027e-06, "loss": 0.291, "step": 10726 }, { "epoch": 0.3681194234728895, "grad_norm": 0.7355005633764192, "learning_rate": 7.2906012104708775e-06, "loss": 0.2553, "step": 10727 }, { "epoch": 0.36815374056280026, "grad_norm": 0.8399082872009775, "learning_rate": 7.290107205341024e-06, "loss": 0.3628, "step": 10728 }, { "epoch": 0.36818805765271106, "grad_norm": 0.6926146875494416, "learning_rate": 7.289613171919563e-06, "loss": 0.338, "step": 10729 }, { "epoch": 0.3682223747426218, "grad_norm": 0.9545930239876663, "learning_rate": 7.289119110212601e-06, "loss": 0.2734, "step": 10730 }, { "epoch": 0.3682566918325326, "grad_norm": 0.7331728477329177, "learning_rate": 7.288625020226243e-06, "loss": 0.3067, "step": 10731 }, { "epoch": 0.3682910089224434, "grad_norm": 0.7683501379934647, "learning_rate": 7.28813090196659e-06, "loss": 0.2931, "step": 10732 }, { "epoch": 0.3683253260123541, "grad_norm": 0.7629994641596585, "learning_rate": 7.287636755439746e-06, "loss": 0.2816, "step": 10733 }, { "epoch": 0.36835964310226493, "grad_norm": 0.7679695079892196, "learning_rate": 7.287142580651819e-06, "loss": 0.2894, "step": 10734 }, { "epoch": 0.3683939601921757, "grad_norm": 0.9670275893756526, "learning_rate": 7.286648377608911e-06, "loss": 0.2962, "step": 10735 }, { "epoch": 0.3684282772820865, "grad_norm": 0.7675437196603344, "learning_rate": 7.286154146317129e-06, "loss": 0.341, "step": 10736 }, { "epoch": 0.36846259437199724, "grad_norm": 0.7816884786204893, "learning_rate": 7.285659886782578e-06, "loss": 0.2788, "step": 10737 }, { "epoch": 0.36849691146190805, "grad_norm": 0.7613551609074598, "learning_rate": 7.285165599011363e-06, "loss": 0.2966, "step": 10738 }, { "epoch": 0.3685312285518188, "grad_norm": 0.9253768443689362, "learning_rate": 7.284671283009593e-06, "loss": 0.2597, "step": 10739 }, { "epoch": 0.3685655456417296, "grad_norm": 0.8207808139393591, "learning_rate": 7.284176938783371e-06, "loss": 0.3347, "step": 10740 }, { "epoch": 0.36859986273164036, "grad_norm": 0.763141409394297, "learning_rate": 7.283682566338806e-06, "loss": 0.3186, "step": 10741 }, { "epoch": 0.3686341798215511, "grad_norm": 0.7811694178937724, "learning_rate": 7.2831881656820065e-06, "loss": 0.2669, "step": 10742 }, { "epoch": 0.3686684969114619, "grad_norm": 0.6981613820649012, "learning_rate": 7.282693736819079e-06, "loss": 0.2574, "step": 10743 }, { "epoch": 0.36870281400137267, "grad_norm": 0.7296844603960043, "learning_rate": 7.282199279756133e-06, "loss": 0.3105, "step": 10744 }, { "epoch": 0.3687371310912835, "grad_norm": 0.7222452192643507, "learning_rate": 7.281704794499274e-06, "loss": 0.2874, "step": 10745 }, { "epoch": 0.36877144818119423, "grad_norm": 0.6990462753705999, "learning_rate": 7.281210281054612e-06, "loss": 0.296, "step": 10746 }, { "epoch": 0.36880576527110503, "grad_norm": 0.8071460576567007, "learning_rate": 7.2807157394282565e-06, "loss": 0.3099, "step": 10747 }, { "epoch": 0.3688400823610158, "grad_norm": 0.7045165546655905, "learning_rate": 7.280221169626318e-06, "loss": 0.2581, "step": 10748 }, { "epoch": 0.36887439945092654, "grad_norm": 0.7380877905429022, "learning_rate": 7.279726571654906e-06, "loss": 0.3316, "step": 10749 }, { "epoch": 0.36890871654083734, "grad_norm": 0.7141583950129062, "learning_rate": 7.279231945520129e-06, "loss": 0.2997, "step": 10750 }, { "epoch": 0.3689430336307481, "grad_norm": 0.8262267675354286, "learning_rate": 7.278737291228099e-06, "loss": 0.3377, "step": 10751 }, { "epoch": 0.3689773507206589, "grad_norm": 0.9519306114717864, "learning_rate": 7.278242608784926e-06, "loss": 0.2691, "step": 10752 }, { "epoch": 0.36901166781056965, "grad_norm": 0.7711226140826424, "learning_rate": 7.277747898196723e-06, "loss": 0.3201, "step": 10753 }, { "epoch": 0.36904598490048046, "grad_norm": 0.7426235644746293, "learning_rate": 7.277253159469601e-06, "loss": 0.2668, "step": 10754 }, { "epoch": 0.3690803019903912, "grad_norm": 0.769080978740276, "learning_rate": 7.27675839260967e-06, "loss": 0.283, "step": 10755 }, { "epoch": 0.36911461908030196, "grad_norm": 0.8547954860045932, "learning_rate": 7.276263597623041e-06, "loss": 0.3168, "step": 10756 }, { "epoch": 0.36914893617021277, "grad_norm": 0.7419815081780826, "learning_rate": 7.275768774515833e-06, "loss": 0.3149, "step": 10757 }, { "epoch": 0.3691832532601235, "grad_norm": 0.7370812849590735, "learning_rate": 7.275273923294154e-06, "loss": 0.319, "step": 10758 }, { "epoch": 0.36921757035003433, "grad_norm": 0.7249698937060335, "learning_rate": 7.274779043964117e-06, "loss": 0.3608, "step": 10759 }, { "epoch": 0.3692518874399451, "grad_norm": 0.8103959748251939, "learning_rate": 7.274284136531839e-06, "loss": 0.3023, "step": 10760 }, { "epoch": 0.3692862045298559, "grad_norm": 0.8660713285651294, "learning_rate": 7.273789201003432e-06, "loss": 0.3625, "step": 10761 }, { "epoch": 0.36932052161976664, "grad_norm": 0.9732200805865034, "learning_rate": 7.273294237385009e-06, "loss": 0.3101, "step": 10762 }, { "epoch": 0.36935483870967745, "grad_norm": 0.839253439185656, "learning_rate": 7.2727992456826854e-06, "loss": 0.3257, "step": 10763 }, { "epoch": 0.3693891557995882, "grad_norm": 0.7135158494844942, "learning_rate": 7.272304225902579e-06, "loss": 0.2585, "step": 10764 }, { "epoch": 0.36942347288949895, "grad_norm": 0.6858743967203528, "learning_rate": 7.271809178050801e-06, "loss": 0.2698, "step": 10765 }, { "epoch": 0.36945778997940976, "grad_norm": 0.7978008214277336, "learning_rate": 7.271314102133471e-06, "loss": 0.3589, "step": 10766 }, { "epoch": 0.3694921070693205, "grad_norm": 0.776299455992802, "learning_rate": 7.270818998156703e-06, "loss": 0.3341, "step": 10767 }, { "epoch": 0.3695264241592313, "grad_norm": 0.769087558679462, "learning_rate": 7.270323866126614e-06, "loss": 0.2999, "step": 10768 }, { "epoch": 0.36956074124914207, "grad_norm": 0.7035774303103352, "learning_rate": 7.26982870604932e-06, "loss": 0.3165, "step": 10769 }, { "epoch": 0.3695950583390529, "grad_norm": 0.7416134940143528, "learning_rate": 7.269333517930939e-06, "loss": 0.2966, "step": 10770 }, { "epoch": 0.3696293754289636, "grad_norm": 0.7604772499482767, "learning_rate": 7.268838301777588e-06, "loss": 0.3031, "step": 10771 }, { "epoch": 0.3696636925188744, "grad_norm": 0.7544499840757248, "learning_rate": 7.268343057595385e-06, "loss": 0.3033, "step": 10772 }, { "epoch": 0.3696980096087852, "grad_norm": 0.6962778841444306, "learning_rate": 7.267847785390449e-06, "loss": 0.2656, "step": 10773 }, { "epoch": 0.36973232669869593, "grad_norm": 0.7858957534122486, "learning_rate": 7.267352485168897e-06, "loss": 0.3356, "step": 10774 }, { "epoch": 0.36976664378860674, "grad_norm": 0.693618646307382, "learning_rate": 7.266857156936849e-06, "loss": 0.2865, "step": 10775 }, { "epoch": 0.3698009608785175, "grad_norm": 0.7775641882552018, "learning_rate": 7.266361800700423e-06, "loss": 0.294, "step": 10776 }, { "epoch": 0.3698352779684283, "grad_norm": 0.7325338941299497, "learning_rate": 7.26586641646574e-06, "loss": 0.3271, "step": 10777 }, { "epoch": 0.36986959505833905, "grad_norm": 0.7261928857858536, "learning_rate": 7.265371004238917e-06, "loss": 0.2734, "step": 10778 }, { "epoch": 0.3699039121482498, "grad_norm": 0.798262367298814, "learning_rate": 7.26487556402608e-06, "loss": 0.3162, "step": 10779 }, { "epoch": 0.3699382292381606, "grad_norm": 0.8337763546110492, "learning_rate": 7.264380095833343e-06, "loss": 0.3331, "step": 10780 }, { "epoch": 0.36997254632807136, "grad_norm": 0.7568816846061361, "learning_rate": 7.263884599666831e-06, "loss": 0.3141, "step": 10781 }, { "epoch": 0.37000686341798217, "grad_norm": 0.7855410021505819, "learning_rate": 7.263389075532664e-06, "loss": 0.3497, "step": 10782 }, { "epoch": 0.3700411805078929, "grad_norm": 0.7485582662355907, "learning_rate": 7.262893523436965e-06, "loss": 0.309, "step": 10783 }, { "epoch": 0.3700754975978037, "grad_norm": 0.757740759450337, "learning_rate": 7.262397943385854e-06, "loss": 0.3181, "step": 10784 }, { "epoch": 0.3701098146877145, "grad_norm": 0.8736310700647403, "learning_rate": 7.261902335385454e-06, "loss": 0.351, "step": 10785 }, { "epoch": 0.3701441317776253, "grad_norm": 0.7665439077586002, "learning_rate": 7.261406699441886e-06, "loss": 0.2988, "step": 10786 }, { "epoch": 0.37017844886753604, "grad_norm": 0.6611342100604327, "learning_rate": 7.260911035561277e-06, "loss": 0.2898, "step": 10787 }, { "epoch": 0.3702127659574468, "grad_norm": 0.8599514924648576, "learning_rate": 7.260415343749747e-06, "loss": 0.2949, "step": 10788 }, { "epoch": 0.3702470830473576, "grad_norm": 0.7294395921811965, "learning_rate": 7.259919624013423e-06, "loss": 0.2669, "step": 10789 }, { "epoch": 0.37028140013726835, "grad_norm": 0.7575998880765488, "learning_rate": 7.259423876358425e-06, "loss": 0.2976, "step": 10790 }, { "epoch": 0.37031571722717915, "grad_norm": 0.7548804915507273, "learning_rate": 7.258928100790879e-06, "loss": 0.3121, "step": 10791 }, { "epoch": 0.3703500343170899, "grad_norm": 0.7667997986120176, "learning_rate": 7.258432297316911e-06, "loss": 0.2791, "step": 10792 }, { "epoch": 0.3703843514070007, "grad_norm": 0.720405836280679, "learning_rate": 7.2579364659426435e-06, "loss": 0.3214, "step": 10793 }, { "epoch": 0.37041866849691146, "grad_norm": 0.7407969127903933, "learning_rate": 7.257440606674205e-06, "loss": 0.2483, "step": 10794 }, { "epoch": 0.3704529855868222, "grad_norm": 0.8530667901743268, "learning_rate": 7.25694471951772e-06, "loss": 0.3304, "step": 10795 }, { "epoch": 0.370487302676733, "grad_norm": 0.9144702882497157, "learning_rate": 7.256448804479312e-06, "loss": 0.3069, "step": 10796 }, { "epoch": 0.3705216197666438, "grad_norm": 0.7371055398440652, "learning_rate": 7.255952861565112e-06, "loss": 0.2799, "step": 10797 }, { "epoch": 0.3705559368565546, "grad_norm": 0.7101212578970445, "learning_rate": 7.255456890781244e-06, "loss": 0.3052, "step": 10798 }, { "epoch": 0.37059025394646533, "grad_norm": 0.6892043065627348, "learning_rate": 7.254960892133836e-06, "loss": 0.2721, "step": 10799 }, { "epoch": 0.37062457103637614, "grad_norm": 0.8507898496211546, "learning_rate": 7.254464865629014e-06, "loss": 0.3154, "step": 10800 }, { "epoch": 0.3706588881262869, "grad_norm": 0.974611446534148, "learning_rate": 7.253968811272908e-06, "loss": 0.2916, "step": 10801 }, { "epoch": 0.37069320521619764, "grad_norm": 0.9264596543191781, "learning_rate": 7.253472729071645e-06, "loss": 0.2924, "step": 10802 }, { "epoch": 0.37072752230610845, "grad_norm": 0.8500882792529736, "learning_rate": 7.2529766190313545e-06, "loss": 0.319, "step": 10803 }, { "epoch": 0.3707618393960192, "grad_norm": 0.7415940390525237, "learning_rate": 7.252480481158164e-06, "loss": 0.3155, "step": 10804 }, { "epoch": 0.37079615648593, "grad_norm": 0.7926711399967348, "learning_rate": 7.2519843154582035e-06, "loss": 0.2889, "step": 10805 }, { "epoch": 0.37083047357584076, "grad_norm": 0.7797049507925238, "learning_rate": 7.251488121937602e-06, "loss": 0.2568, "step": 10806 }, { "epoch": 0.37086479066575156, "grad_norm": 0.77425342053547, "learning_rate": 7.25099190060249e-06, "loss": 0.2833, "step": 10807 }, { "epoch": 0.3708991077556623, "grad_norm": 0.7532622503579386, "learning_rate": 7.250495651458996e-06, "loss": 0.2843, "step": 10808 }, { "epoch": 0.3709334248455731, "grad_norm": 0.7819363775750425, "learning_rate": 7.249999374513253e-06, "loss": 0.3244, "step": 10809 }, { "epoch": 0.3709677419354839, "grad_norm": 0.8126109863102061, "learning_rate": 7.249503069771392e-06, "loss": 0.3172, "step": 10810 }, { "epoch": 0.3710020590253946, "grad_norm": 0.7693688422074307, "learning_rate": 7.249006737239543e-06, "loss": 0.2945, "step": 10811 }, { "epoch": 0.37103637611530543, "grad_norm": 0.7974154906796258, "learning_rate": 7.248510376923837e-06, "loss": 0.2622, "step": 10812 }, { "epoch": 0.3710706932052162, "grad_norm": 0.7082491273156457, "learning_rate": 7.248013988830407e-06, "loss": 0.2978, "step": 10813 }, { "epoch": 0.371105010295127, "grad_norm": 0.7551558188776578, "learning_rate": 7.247517572965386e-06, "loss": 0.3018, "step": 10814 }, { "epoch": 0.37113932738503774, "grad_norm": 0.7702031051977314, "learning_rate": 7.247021129334903e-06, "loss": 0.3384, "step": 10815 }, { "epoch": 0.37117364447494855, "grad_norm": 0.7841663259300882, "learning_rate": 7.2465246579450965e-06, "loss": 0.3436, "step": 10816 }, { "epoch": 0.3712079615648593, "grad_norm": 0.7625246693252583, "learning_rate": 7.246028158802097e-06, "loss": 0.3203, "step": 10817 }, { "epoch": 0.37124227865477005, "grad_norm": 0.7574819532996208, "learning_rate": 7.245531631912037e-06, "loss": 0.2852, "step": 10818 }, { "epoch": 0.37127659574468086, "grad_norm": 0.8477822893486273, "learning_rate": 7.245035077281053e-06, "loss": 0.2481, "step": 10819 }, { "epoch": 0.3713109128345916, "grad_norm": 0.8237682094427075, "learning_rate": 7.244538494915277e-06, "loss": 0.3278, "step": 10820 }, { "epoch": 0.3713452299245024, "grad_norm": 0.7780778479204975, "learning_rate": 7.244041884820845e-06, "loss": 0.306, "step": 10821 }, { "epoch": 0.37137954701441317, "grad_norm": 0.810274993057306, "learning_rate": 7.243545247003892e-06, "loss": 0.2873, "step": 10822 }, { "epoch": 0.371413864104324, "grad_norm": 0.8557679228450147, "learning_rate": 7.243048581470552e-06, "loss": 0.3478, "step": 10823 }, { "epoch": 0.37144818119423473, "grad_norm": 0.8089140427090514, "learning_rate": 7.242551888226964e-06, "loss": 0.365, "step": 10824 }, { "epoch": 0.3714824982841455, "grad_norm": 0.7997694291453438, "learning_rate": 7.24205516727926e-06, "loss": 0.2809, "step": 10825 }, { "epoch": 0.3715168153740563, "grad_norm": 0.6838062419203008, "learning_rate": 7.24155841863358e-06, "loss": 0.3242, "step": 10826 }, { "epoch": 0.37155113246396704, "grad_norm": 0.7535102966078392, "learning_rate": 7.241061642296059e-06, "loss": 0.3201, "step": 10827 }, { "epoch": 0.37158544955387784, "grad_norm": 0.743270591986534, "learning_rate": 7.240564838272833e-06, "loss": 0.3142, "step": 10828 }, { "epoch": 0.3716197666437886, "grad_norm": 0.8395834427681522, "learning_rate": 7.240068006570041e-06, "loss": 0.3018, "step": 10829 }, { "epoch": 0.3716540837336994, "grad_norm": 0.8263696123712181, "learning_rate": 7.239571147193821e-06, "loss": 0.3085, "step": 10830 }, { "epoch": 0.37168840082361015, "grad_norm": 0.7263231633859105, "learning_rate": 7.23907426015031e-06, "loss": 0.3118, "step": 10831 }, { "epoch": 0.3717227179135209, "grad_norm": 0.754282414560372, "learning_rate": 7.238577345445648e-06, "loss": 0.3275, "step": 10832 }, { "epoch": 0.3717570350034317, "grad_norm": 0.7305115910834933, "learning_rate": 7.238080403085972e-06, "loss": 0.2855, "step": 10833 }, { "epoch": 0.37179135209334246, "grad_norm": 0.7975897210557883, "learning_rate": 7.237583433077423e-06, "loss": 0.2935, "step": 10834 }, { "epoch": 0.37182566918325327, "grad_norm": 0.6924792647551866, "learning_rate": 7.237086435426139e-06, "loss": 0.2613, "step": 10835 }, { "epoch": 0.371859986273164, "grad_norm": 0.8027845251309145, "learning_rate": 7.236589410138259e-06, "loss": 0.3302, "step": 10836 }, { "epoch": 0.37189430336307483, "grad_norm": 0.8156270199572598, "learning_rate": 7.236092357219925e-06, "loss": 0.3312, "step": 10837 }, { "epoch": 0.3719286204529856, "grad_norm": 0.7868785246509427, "learning_rate": 7.235595276677277e-06, "loss": 0.3133, "step": 10838 }, { "epoch": 0.3719629375428964, "grad_norm": 0.7944780815048659, "learning_rate": 7.235098168516457e-06, "loss": 0.286, "step": 10839 }, { "epoch": 0.37199725463280714, "grad_norm": 0.7207816962151249, "learning_rate": 7.234601032743605e-06, "loss": 0.306, "step": 10840 }, { "epoch": 0.3720315717227179, "grad_norm": 0.7774412316587682, "learning_rate": 7.234103869364861e-06, "loss": 0.267, "step": 10841 }, { "epoch": 0.3720658888126287, "grad_norm": 0.7561844117055773, "learning_rate": 7.233606678386369e-06, "loss": 0.3017, "step": 10842 }, { "epoch": 0.37210020590253945, "grad_norm": 0.7904725226410931, "learning_rate": 7.233109459814271e-06, "loss": 0.3168, "step": 10843 }, { "epoch": 0.37213452299245026, "grad_norm": 0.7594462703673659, "learning_rate": 7.232612213654708e-06, "loss": 0.2897, "step": 10844 }, { "epoch": 0.372168840082361, "grad_norm": 0.7773258306296692, "learning_rate": 7.232114939913825e-06, "loss": 0.3074, "step": 10845 }, { "epoch": 0.3722031571722718, "grad_norm": 0.7022955462721756, "learning_rate": 7.231617638597763e-06, "loss": 0.3125, "step": 10846 }, { "epoch": 0.37223747426218257, "grad_norm": 0.8716952126911428, "learning_rate": 7.231120309712669e-06, "loss": 0.3073, "step": 10847 }, { "epoch": 0.3722717913520933, "grad_norm": 0.7930737441724285, "learning_rate": 7.230622953264685e-06, "loss": 0.2843, "step": 10848 }, { "epoch": 0.3723061084420041, "grad_norm": 0.8355599569211652, "learning_rate": 7.230125569259954e-06, "loss": 0.3018, "step": 10849 }, { "epoch": 0.3723404255319149, "grad_norm": 0.7334310380504071, "learning_rate": 7.229628157704623e-06, "loss": 0.2585, "step": 10850 }, { "epoch": 0.3723747426218257, "grad_norm": 0.7528907790372347, "learning_rate": 7.229130718604833e-06, "loss": 0.3246, "step": 10851 }, { "epoch": 0.37240905971173643, "grad_norm": 0.8577801909879239, "learning_rate": 7.228633251966733e-06, "loss": 0.3347, "step": 10852 }, { "epoch": 0.37244337680164724, "grad_norm": 0.7362419017034856, "learning_rate": 7.228135757796467e-06, "loss": 0.2372, "step": 10853 }, { "epoch": 0.372477693891558, "grad_norm": 0.6871622166704267, "learning_rate": 7.227638236100183e-06, "loss": 0.2894, "step": 10854 }, { "epoch": 0.37251201098146874, "grad_norm": 0.8130858318151862, "learning_rate": 7.227140686884025e-06, "loss": 0.2867, "step": 10855 }, { "epoch": 0.37254632807137955, "grad_norm": 0.7357432196959216, "learning_rate": 7.22664311015414e-06, "loss": 0.2986, "step": 10856 }, { "epoch": 0.3725806451612903, "grad_norm": 0.8074885477955039, "learning_rate": 7.226145505916676e-06, "loss": 0.3504, "step": 10857 }, { "epoch": 0.3726149622512011, "grad_norm": 0.7060931098655735, "learning_rate": 7.225647874177779e-06, "loss": 0.2828, "step": 10858 }, { "epoch": 0.37264927934111186, "grad_norm": 0.8118896486433677, "learning_rate": 7.225150214943598e-06, "loss": 0.3363, "step": 10859 }, { "epoch": 0.37268359643102267, "grad_norm": 0.6953493741782151, "learning_rate": 7.22465252822028e-06, "loss": 0.3031, "step": 10860 }, { "epoch": 0.3727179135209334, "grad_norm": 0.7933607936163604, "learning_rate": 7.2241548140139735e-06, "loss": 0.3274, "step": 10861 }, { "epoch": 0.3727522306108442, "grad_norm": 0.7443725999981526, "learning_rate": 7.2236570723308275e-06, "loss": 0.3034, "step": 10862 }, { "epoch": 0.372786547700755, "grad_norm": 0.9840541241369277, "learning_rate": 7.223159303176992e-06, "loss": 0.3156, "step": 10863 }, { "epoch": 0.37282086479066573, "grad_norm": 0.8370971906891633, "learning_rate": 7.222661506558615e-06, "loss": 0.2825, "step": 10864 }, { "epoch": 0.37285518188057654, "grad_norm": 0.8289786431777816, "learning_rate": 7.222163682481845e-06, "loss": 0.3453, "step": 10865 }, { "epoch": 0.3728894989704873, "grad_norm": 0.9480718787450645, "learning_rate": 7.221665830952836e-06, "loss": 0.3192, "step": 10866 }, { "epoch": 0.3729238160603981, "grad_norm": 0.8618859118470036, "learning_rate": 7.2211679519777326e-06, "loss": 0.2982, "step": 10867 }, { "epoch": 0.37295813315030885, "grad_norm": 0.7273701326556595, "learning_rate": 7.22067004556269e-06, "loss": 0.2655, "step": 10868 }, { "epoch": 0.37299245024021965, "grad_norm": 0.8235256055292314, "learning_rate": 7.220172111713859e-06, "loss": 0.2574, "step": 10869 }, { "epoch": 0.3730267673301304, "grad_norm": 0.7722529579926238, "learning_rate": 7.21967415043739e-06, "loss": 0.298, "step": 10870 }, { "epoch": 0.37306108442004116, "grad_norm": 0.820502631573975, "learning_rate": 7.219176161739434e-06, "loss": 0.2642, "step": 10871 }, { "epoch": 0.37309540150995196, "grad_norm": 0.7317949207373767, "learning_rate": 7.218678145626144e-06, "loss": 0.302, "step": 10872 }, { "epoch": 0.3731297185998627, "grad_norm": 0.7871916751796659, "learning_rate": 7.218180102103673e-06, "loss": 0.3012, "step": 10873 }, { "epoch": 0.3731640356897735, "grad_norm": 0.8132259683525975, "learning_rate": 7.217682031178173e-06, "loss": 0.2678, "step": 10874 }, { "epoch": 0.3731983527796843, "grad_norm": 0.7965992263449732, "learning_rate": 7.217183932855795e-06, "loss": 0.2678, "step": 10875 }, { "epoch": 0.3732326698695951, "grad_norm": 0.7978512384854664, "learning_rate": 7.216685807142696e-06, "loss": 0.3322, "step": 10876 }, { "epoch": 0.37326698695950583, "grad_norm": 0.894674114412842, "learning_rate": 7.216187654045028e-06, "loss": 0.2775, "step": 10877 }, { "epoch": 0.3733013040494166, "grad_norm": 0.7237509703956695, "learning_rate": 7.215689473568946e-06, "loss": 0.2816, "step": 10878 }, { "epoch": 0.3733356211393274, "grad_norm": 0.7738480437813248, "learning_rate": 7.215191265720604e-06, "loss": 0.2961, "step": 10879 }, { "epoch": 0.37336993822923814, "grad_norm": 0.7770739283153278, "learning_rate": 7.214693030506155e-06, "loss": 0.2827, "step": 10880 }, { "epoch": 0.37340425531914895, "grad_norm": 0.7051650305201088, "learning_rate": 7.214194767931757e-06, "loss": 0.2674, "step": 10881 }, { "epoch": 0.3734385724090597, "grad_norm": 0.7740955073851058, "learning_rate": 7.213696478003562e-06, "loss": 0.3141, "step": 10882 }, { "epoch": 0.3734728894989705, "grad_norm": 0.7697667981133957, "learning_rate": 7.21319816072773e-06, "loss": 0.3368, "step": 10883 }, { "epoch": 0.37350720658888126, "grad_norm": 0.670670611113149, "learning_rate": 7.212699816110415e-06, "loss": 0.2754, "step": 10884 }, { "epoch": 0.37354152367879206, "grad_norm": 0.8031518958108862, "learning_rate": 7.212201444157773e-06, "loss": 0.2868, "step": 10885 }, { "epoch": 0.3735758407687028, "grad_norm": 0.7990988103302229, "learning_rate": 7.211703044875962e-06, "loss": 0.2881, "step": 10886 }, { "epoch": 0.37361015785861357, "grad_norm": 0.7825272311610819, "learning_rate": 7.211204618271138e-06, "loss": 0.2918, "step": 10887 }, { "epoch": 0.3736444749485244, "grad_norm": 0.7708708940886738, "learning_rate": 7.2107061643494606e-06, "loss": 0.3094, "step": 10888 }, { "epoch": 0.3736787920384351, "grad_norm": 0.8036098856585019, "learning_rate": 7.210207683117083e-06, "loss": 0.3326, "step": 10889 }, { "epoch": 0.37371310912834593, "grad_norm": 0.8107211495099289, "learning_rate": 7.209709174580167e-06, "loss": 0.2921, "step": 10890 }, { "epoch": 0.3737474262182567, "grad_norm": 0.7425442334172574, "learning_rate": 7.2092106387448725e-06, "loss": 0.3343, "step": 10891 }, { "epoch": 0.3737817433081675, "grad_norm": 0.7534342484925636, "learning_rate": 7.208712075617355e-06, "loss": 0.329, "step": 10892 }, { "epoch": 0.37381606039807824, "grad_norm": 0.7197004329557619, "learning_rate": 7.2082134852037745e-06, "loss": 0.2573, "step": 10893 }, { "epoch": 0.373850377487989, "grad_norm": 0.8847228675344507, "learning_rate": 7.20771486751029e-06, "loss": 0.3106, "step": 10894 }, { "epoch": 0.3738846945778998, "grad_norm": 0.7382946443691255, "learning_rate": 7.207216222543064e-06, "loss": 0.2922, "step": 10895 }, { "epoch": 0.37391901166781055, "grad_norm": 0.7920023193696625, "learning_rate": 7.206717550308255e-06, "loss": 0.306, "step": 10896 }, { "epoch": 0.37395332875772136, "grad_norm": 0.8149347519106855, "learning_rate": 7.206218850812021e-06, "loss": 0.333, "step": 10897 }, { "epoch": 0.3739876458476321, "grad_norm": 0.7843304877259735, "learning_rate": 7.205720124060529e-06, "loss": 0.3009, "step": 10898 }, { "epoch": 0.3740219629375429, "grad_norm": 0.7683167620348266, "learning_rate": 7.205221370059934e-06, "loss": 0.2633, "step": 10899 }, { "epoch": 0.37405628002745367, "grad_norm": 0.8742706953129951, "learning_rate": 7.2047225888164e-06, "loss": 0.2927, "step": 10900 }, { "epoch": 0.3740905971173644, "grad_norm": 0.823284366182659, "learning_rate": 7.204223780336091e-06, "loss": 0.3211, "step": 10901 }, { "epoch": 0.3741249142072752, "grad_norm": 0.8165193190491369, "learning_rate": 7.203724944625165e-06, "loss": 0.2812, "step": 10902 }, { "epoch": 0.374159231297186, "grad_norm": 1.1626687316626008, "learning_rate": 7.203226081689787e-06, "loss": 0.2948, "step": 10903 }, { "epoch": 0.3741935483870968, "grad_norm": 0.8505032698114421, "learning_rate": 7.20272719153612e-06, "loss": 0.3262, "step": 10904 }, { "epoch": 0.37422786547700754, "grad_norm": 0.7002300734738116, "learning_rate": 7.202228274170326e-06, "loss": 0.2889, "step": 10905 }, { "epoch": 0.37426218256691834, "grad_norm": 0.7160363516260324, "learning_rate": 7.201729329598569e-06, "loss": 0.2786, "step": 10906 }, { "epoch": 0.3742964996568291, "grad_norm": 0.8015203762214321, "learning_rate": 7.201230357827014e-06, "loss": 0.3755, "step": 10907 }, { "epoch": 0.3743308167467399, "grad_norm": 0.7240221866760448, "learning_rate": 7.200731358861825e-06, "loss": 0.2945, "step": 10908 }, { "epoch": 0.37436513383665065, "grad_norm": 0.8249550233472811, "learning_rate": 7.200232332709164e-06, "loss": 0.3027, "step": 10909 }, { "epoch": 0.3743994509265614, "grad_norm": 0.7083874939137041, "learning_rate": 7.199733279375198e-06, "loss": 0.3197, "step": 10910 }, { "epoch": 0.3744337680164722, "grad_norm": 0.747021314993768, "learning_rate": 7.199234198866093e-06, "loss": 0.268, "step": 10911 }, { "epoch": 0.37446808510638296, "grad_norm": 0.7667342992244259, "learning_rate": 7.198735091188013e-06, "loss": 0.2845, "step": 10912 }, { "epoch": 0.37450240219629377, "grad_norm": 0.7561572159159909, "learning_rate": 7.198235956347124e-06, "loss": 0.3033, "step": 10913 }, { "epoch": 0.3745367192862045, "grad_norm": 0.7876852816094881, "learning_rate": 7.197736794349593e-06, "loss": 0.2621, "step": 10914 }, { "epoch": 0.37457103637611533, "grad_norm": 0.8091592520369497, "learning_rate": 7.1972376052015865e-06, "loss": 0.3541, "step": 10915 }, { "epoch": 0.3746053534660261, "grad_norm": 0.745338836307266, "learning_rate": 7.196738388909271e-06, "loss": 0.3074, "step": 10916 }, { "epoch": 0.37463967055593683, "grad_norm": 0.6754360554319654, "learning_rate": 7.196239145478815e-06, "loss": 0.2493, "step": 10917 }, { "epoch": 0.37467398764584764, "grad_norm": 0.80649264098422, "learning_rate": 7.195739874916383e-06, "loss": 0.3172, "step": 10918 }, { "epoch": 0.3747083047357584, "grad_norm": 0.8412928285154335, "learning_rate": 7.195240577228145e-06, "loss": 0.3595, "step": 10919 }, { "epoch": 0.3747426218256692, "grad_norm": 0.7972314273630003, "learning_rate": 7.194741252420271e-06, "loss": 0.325, "step": 10920 }, { "epoch": 0.37477693891557995, "grad_norm": 0.8119045314121872, "learning_rate": 7.194241900498926e-06, "loss": 0.2891, "step": 10921 }, { "epoch": 0.37481125600549076, "grad_norm": 0.8046917163183064, "learning_rate": 7.19374252147028e-06, "loss": 0.263, "step": 10922 }, { "epoch": 0.3748455730954015, "grad_norm": 0.8363226347346793, "learning_rate": 7.193243115340504e-06, "loss": 0.3075, "step": 10923 }, { "epoch": 0.37487989018531226, "grad_norm": 0.7383071501837198, "learning_rate": 7.192743682115767e-06, "loss": 0.2862, "step": 10924 }, { "epoch": 0.37491420727522307, "grad_norm": 0.7879951703250556, "learning_rate": 7.192244221802237e-06, "loss": 0.3762, "step": 10925 }, { "epoch": 0.3749485243651338, "grad_norm": 0.7885075992379006, "learning_rate": 7.191744734406085e-06, "loss": 0.2676, "step": 10926 }, { "epoch": 0.3749828414550446, "grad_norm": 0.7552180107045282, "learning_rate": 7.191245219933483e-06, "loss": 0.2653, "step": 10927 }, { "epoch": 0.3750171585449554, "grad_norm": 0.7889016376945839, "learning_rate": 7.190745678390602e-06, "loss": 0.2556, "step": 10928 }, { "epoch": 0.3750514756348662, "grad_norm": 0.7495782364758136, "learning_rate": 7.190246109783611e-06, "loss": 0.3416, "step": 10929 }, { "epoch": 0.37508579272477693, "grad_norm": 0.709791747696718, "learning_rate": 7.189746514118684e-06, "loss": 0.2998, "step": 10930 }, { "epoch": 0.37512010981468774, "grad_norm": 0.7428731216039183, "learning_rate": 7.1892468914019905e-06, "loss": 0.3131, "step": 10931 }, { "epoch": 0.3751544269045985, "grad_norm": 0.709878311389249, "learning_rate": 7.188747241639706e-06, "loss": 0.2884, "step": 10932 }, { "epoch": 0.37518874399450924, "grad_norm": 0.8136155174111159, "learning_rate": 7.188247564837998e-06, "loss": 0.3226, "step": 10933 }, { "epoch": 0.37522306108442005, "grad_norm": 0.8190968348073017, "learning_rate": 7.187747861003045e-06, "loss": 0.2649, "step": 10934 }, { "epoch": 0.3752573781743308, "grad_norm": 0.7766303362160627, "learning_rate": 7.187248130141017e-06, "loss": 0.3182, "step": 10935 }, { "epoch": 0.3752916952642416, "grad_norm": 0.7698008920300429, "learning_rate": 7.18674837225809e-06, "loss": 0.2423, "step": 10936 }, { "epoch": 0.37532601235415236, "grad_norm": 0.6709123218647853, "learning_rate": 7.186248587360434e-06, "loss": 0.297, "step": 10937 }, { "epoch": 0.37536032944406317, "grad_norm": 0.8136397465701581, "learning_rate": 7.185748775454227e-06, "loss": 0.2785, "step": 10938 }, { "epoch": 0.3753946465339739, "grad_norm": 0.8865075331795613, "learning_rate": 7.185248936545642e-06, "loss": 0.3272, "step": 10939 }, { "epoch": 0.37542896362388467, "grad_norm": 0.8325438876467578, "learning_rate": 7.184749070640855e-06, "loss": 0.3477, "step": 10940 }, { "epoch": 0.3754632807137955, "grad_norm": 0.8155238267856109, "learning_rate": 7.184249177746038e-06, "loss": 0.3387, "step": 10941 }, { "epoch": 0.37549759780370623, "grad_norm": 0.7587372358787143, "learning_rate": 7.183749257867369e-06, "loss": 0.2377, "step": 10942 }, { "epoch": 0.37553191489361704, "grad_norm": 0.8561450649559047, "learning_rate": 7.183249311011027e-06, "loss": 0.2891, "step": 10943 }, { "epoch": 0.3755662319835278, "grad_norm": 0.7268237083693517, "learning_rate": 7.182749337183183e-06, "loss": 0.2822, "step": 10944 }, { "epoch": 0.3756005490734386, "grad_norm": 0.9338839153313532, "learning_rate": 7.182249336390017e-06, "loss": 0.3408, "step": 10945 }, { "epoch": 0.37563486616334935, "grad_norm": 0.8504074800943799, "learning_rate": 7.181749308637705e-06, "loss": 0.3398, "step": 10946 }, { "epoch": 0.3756691832532601, "grad_norm": 0.7800924365405686, "learning_rate": 7.181249253932423e-06, "loss": 0.3056, "step": 10947 }, { "epoch": 0.3757035003431709, "grad_norm": 0.7226061464569868, "learning_rate": 7.180749172280349e-06, "loss": 0.2693, "step": 10948 }, { "epoch": 0.37573781743308166, "grad_norm": 0.7578028287765596, "learning_rate": 7.180249063687662e-06, "loss": 0.2697, "step": 10949 }, { "epoch": 0.37577213452299246, "grad_norm": 0.8334614056853423, "learning_rate": 7.1797489281605395e-06, "loss": 0.3172, "step": 10950 }, { "epoch": 0.3758064516129032, "grad_norm": 0.8123168886116924, "learning_rate": 7.17924876570516e-06, "loss": 0.3378, "step": 10951 }, { "epoch": 0.375840768702814, "grad_norm": 0.6945309661484462, "learning_rate": 7.178748576327704e-06, "loss": 0.2687, "step": 10952 }, { "epoch": 0.3758750857927248, "grad_norm": 0.7899260758103798, "learning_rate": 7.178248360034349e-06, "loss": 0.3385, "step": 10953 }, { "epoch": 0.3759094028826356, "grad_norm": 0.7248726561744145, "learning_rate": 7.177748116831274e-06, "loss": 0.2754, "step": 10954 }, { "epoch": 0.37594371997254633, "grad_norm": 0.737027223551256, "learning_rate": 7.177247846724662e-06, "loss": 0.2999, "step": 10955 }, { "epoch": 0.3759780370624571, "grad_norm": 0.7532263747499359, "learning_rate": 7.176747549720689e-06, "loss": 0.3877, "step": 10956 }, { "epoch": 0.3760123541523679, "grad_norm": 0.9857516810541782, "learning_rate": 7.17624722582554e-06, "loss": 0.2719, "step": 10957 }, { "epoch": 0.37604667124227864, "grad_norm": 0.8347967203022393, "learning_rate": 7.175746875045393e-06, "loss": 0.3208, "step": 10958 }, { "epoch": 0.37608098833218945, "grad_norm": 0.8291201807091204, "learning_rate": 7.17524649738643e-06, "loss": 0.2825, "step": 10959 }, { "epoch": 0.3761153054221002, "grad_norm": 0.7694462860005171, "learning_rate": 7.174746092854833e-06, "loss": 0.3083, "step": 10960 }, { "epoch": 0.376149622512011, "grad_norm": 0.8524856944848194, "learning_rate": 7.174245661456782e-06, "loss": 0.3579, "step": 10961 }, { "epoch": 0.37618393960192176, "grad_norm": 0.793464893180186, "learning_rate": 7.173745203198463e-06, "loss": 0.3097, "step": 10962 }, { "epoch": 0.3762182566918325, "grad_norm": 0.8547269633904503, "learning_rate": 7.173244718086055e-06, "loss": 0.273, "step": 10963 }, { "epoch": 0.3762525737817433, "grad_norm": 0.7511000698873647, "learning_rate": 7.1727442061257425e-06, "loss": 0.2838, "step": 10964 }, { "epoch": 0.37628689087165407, "grad_norm": 0.8204028318826687, "learning_rate": 7.172243667323709e-06, "loss": 0.3221, "step": 10965 }, { "epoch": 0.3763212079615649, "grad_norm": 0.8575548856836713, "learning_rate": 7.171743101686137e-06, "loss": 0.2836, "step": 10966 }, { "epoch": 0.3763555250514756, "grad_norm": 0.789635473012646, "learning_rate": 7.17124250921921e-06, "loss": 0.2507, "step": 10967 }, { "epoch": 0.37638984214138643, "grad_norm": 0.7357123424436945, "learning_rate": 7.170741889929116e-06, "loss": 0.3803, "step": 10968 }, { "epoch": 0.3764241592312972, "grad_norm": 0.7164543031419359, "learning_rate": 7.170241243822035e-06, "loss": 0.2809, "step": 10969 }, { "epoch": 0.37645847632120794, "grad_norm": 0.7916874926697969, "learning_rate": 7.169740570904153e-06, "loss": 0.327, "step": 10970 }, { "epoch": 0.37649279341111874, "grad_norm": 0.7872509031262985, "learning_rate": 7.169239871181657e-06, "loss": 0.2659, "step": 10971 }, { "epoch": 0.3765271105010295, "grad_norm": 0.8050812281904339, "learning_rate": 7.168739144660732e-06, "loss": 0.3184, "step": 10972 }, { "epoch": 0.3765614275909403, "grad_norm": 0.8375189378535922, "learning_rate": 7.168238391347562e-06, "loss": 0.2756, "step": 10973 }, { "epoch": 0.37659574468085105, "grad_norm": 0.7403758480899197, "learning_rate": 7.167737611248334e-06, "loss": 0.2712, "step": 10974 }, { "epoch": 0.37663006177076186, "grad_norm": 0.7348067520033312, "learning_rate": 7.167236804369238e-06, "loss": 0.2675, "step": 10975 }, { "epoch": 0.3766643788606726, "grad_norm": 0.8171852629035427, "learning_rate": 7.166735970716456e-06, "loss": 0.3884, "step": 10976 }, { "epoch": 0.3766986959505834, "grad_norm": 0.8552815303656489, "learning_rate": 7.166235110296177e-06, "loss": 0.2687, "step": 10977 }, { "epoch": 0.37673301304049417, "grad_norm": 0.7235220307904301, "learning_rate": 7.165734223114589e-06, "loss": 0.3111, "step": 10978 }, { "epoch": 0.3767673301304049, "grad_norm": 0.7392148572324622, "learning_rate": 7.16523330917788e-06, "loss": 0.284, "step": 10979 }, { "epoch": 0.3768016472203157, "grad_norm": 0.837429619840057, "learning_rate": 7.164732368492238e-06, "loss": 0.3633, "step": 10980 }, { "epoch": 0.3768359643102265, "grad_norm": 0.782100765038098, "learning_rate": 7.1642314010638526e-06, "loss": 0.2903, "step": 10981 }, { "epoch": 0.3768702814001373, "grad_norm": 0.7149382029437235, "learning_rate": 7.1637304068989096e-06, "loss": 0.2513, "step": 10982 }, { "epoch": 0.37690459849004804, "grad_norm": 0.7721706632653756, "learning_rate": 7.163229386003601e-06, "loss": 0.2956, "step": 10983 }, { "epoch": 0.37693891557995884, "grad_norm": 0.854724025938995, "learning_rate": 7.162728338384115e-06, "loss": 0.3022, "step": 10984 }, { "epoch": 0.3769732326698696, "grad_norm": 0.8422614122341816, "learning_rate": 7.162227264046643e-06, "loss": 0.3206, "step": 10985 }, { "epoch": 0.37700754975978035, "grad_norm": 0.8140514534562323, "learning_rate": 7.161726162997371e-06, "loss": 0.2445, "step": 10986 }, { "epoch": 0.37704186684969115, "grad_norm": 0.8491107904863351, "learning_rate": 7.161225035242495e-06, "loss": 0.3104, "step": 10987 }, { "epoch": 0.3770761839396019, "grad_norm": 0.8061892372690422, "learning_rate": 7.160723880788204e-06, "loss": 0.3127, "step": 10988 }, { "epoch": 0.3771105010295127, "grad_norm": 0.7852118001491475, "learning_rate": 7.160222699640688e-06, "loss": 0.3322, "step": 10989 }, { "epoch": 0.37714481811942346, "grad_norm": 0.8951399438899503, "learning_rate": 7.159721491806138e-06, "loss": 0.3184, "step": 10990 }, { "epoch": 0.37717913520933427, "grad_norm": 0.6764778740850609, "learning_rate": 7.159220257290748e-06, "loss": 0.2646, "step": 10991 }, { "epoch": 0.377213452299245, "grad_norm": 0.9498536312827816, "learning_rate": 7.15871899610071e-06, "loss": 0.3432, "step": 10992 }, { "epoch": 0.3772477693891558, "grad_norm": 0.8026624505231205, "learning_rate": 7.158217708242215e-06, "loss": 0.3083, "step": 10993 }, { "epoch": 0.3772820864790666, "grad_norm": 0.7249566205344967, "learning_rate": 7.157716393721454e-06, "loss": 0.2836, "step": 10994 }, { "epoch": 0.37731640356897733, "grad_norm": 0.8450624969018258, "learning_rate": 7.157215052544625e-06, "loss": 0.3062, "step": 10995 }, { "epoch": 0.37735072065888814, "grad_norm": 0.7361087537340063, "learning_rate": 7.1567136847179186e-06, "loss": 0.3264, "step": 10996 }, { "epoch": 0.3773850377487989, "grad_norm": 0.7570186381205711, "learning_rate": 7.156212290247531e-06, "loss": 0.299, "step": 10997 }, { "epoch": 0.3774193548387097, "grad_norm": 0.9404751136337033, "learning_rate": 7.1557108691396515e-06, "loss": 0.3196, "step": 10998 }, { "epoch": 0.37745367192862045, "grad_norm": 0.7366208082432821, "learning_rate": 7.155209421400479e-06, "loss": 0.312, "step": 10999 }, { "epoch": 0.37748798901853126, "grad_norm": 0.7507138853040072, "learning_rate": 7.154707947036206e-06, "loss": 0.2442, "step": 11000 }, { "epoch": 0.377522306108442, "grad_norm": 0.7975504851072845, "learning_rate": 7.154206446053029e-06, "loss": 0.2985, "step": 11001 }, { "epoch": 0.37755662319835276, "grad_norm": 0.7360819281455667, "learning_rate": 7.153704918457143e-06, "loss": 0.3027, "step": 11002 }, { "epoch": 0.37759094028826357, "grad_norm": 0.7680222740992492, "learning_rate": 7.153203364254744e-06, "loss": 0.298, "step": 11003 }, { "epoch": 0.3776252573781743, "grad_norm": 0.7395597377149165, "learning_rate": 7.152701783452027e-06, "loss": 0.2884, "step": 11004 }, { "epoch": 0.3776595744680851, "grad_norm": 0.8231823882389006, "learning_rate": 7.152200176055191e-06, "loss": 0.3057, "step": 11005 }, { "epoch": 0.3776938915579959, "grad_norm": 0.820514483031375, "learning_rate": 7.151698542070429e-06, "loss": 0.3478, "step": 11006 }, { "epoch": 0.3777282086479067, "grad_norm": 0.7803147818126293, "learning_rate": 7.151196881503941e-06, "loss": 0.2621, "step": 11007 }, { "epoch": 0.37776252573781743, "grad_norm": 1.0127250185042718, "learning_rate": 7.150695194361923e-06, "loss": 0.2947, "step": 11008 }, { "epoch": 0.3777968428277282, "grad_norm": 0.7900418001605627, "learning_rate": 7.150193480650574e-06, "loss": 0.3088, "step": 11009 }, { "epoch": 0.377831159917639, "grad_norm": 0.7851032977993985, "learning_rate": 7.1496917403760915e-06, "loss": 0.3105, "step": 11010 }, { "epoch": 0.37786547700754974, "grad_norm": 0.8031606716615883, "learning_rate": 7.1491899735446736e-06, "loss": 0.3127, "step": 11011 }, { "epoch": 0.37789979409746055, "grad_norm": 0.8179987202329919, "learning_rate": 7.14868818016252e-06, "loss": 0.2965, "step": 11012 }, { "epoch": 0.3779341111873713, "grad_norm": 0.9014534716263028, "learning_rate": 7.1481863602358285e-06, "loss": 0.2632, "step": 11013 }, { "epoch": 0.3779684282772821, "grad_norm": 0.775987907932815, "learning_rate": 7.1476845137707985e-06, "loss": 0.3484, "step": 11014 }, { "epoch": 0.37800274536719286, "grad_norm": 0.7325528490276795, "learning_rate": 7.147182640773631e-06, "loss": 0.3497, "step": 11015 }, { "epoch": 0.3780370624571036, "grad_norm": 0.8061747684026145, "learning_rate": 7.146680741250524e-06, "loss": 0.2889, "step": 11016 }, { "epoch": 0.3780713795470144, "grad_norm": 0.7552322616118351, "learning_rate": 7.146178815207681e-06, "loss": 0.274, "step": 11017 }, { "epoch": 0.37810569663692517, "grad_norm": 0.7761341029332147, "learning_rate": 7.1456768626513e-06, "loss": 0.405, "step": 11018 }, { "epoch": 0.378140013726836, "grad_norm": 0.846822012848885, "learning_rate": 7.145174883587582e-06, "loss": 0.3711, "step": 11019 }, { "epoch": 0.37817433081674673, "grad_norm": 0.740252377935661, "learning_rate": 7.1446728780227314e-06, "loss": 0.2978, "step": 11020 }, { "epoch": 0.37820864790665754, "grad_norm": 0.727099786806399, "learning_rate": 7.144170845962948e-06, "loss": 0.31, "step": 11021 }, { "epoch": 0.3782429649965683, "grad_norm": 0.8058602551711843, "learning_rate": 7.1436687874144315e-06, "loss": 0.3713, "step": 11022 }, { "epoch": 0.3782772820864791, "grad_norm": 0.8140051552766172, "learning_rate": 7.1431667023833885e-06, "loss": 0.3047, "step": 11023 }, { "epoch": 0.37831159917638985, "grad_norm": 0.727044618992144, "learning_rate": 7.142664590876018e-06, "loss": 0.2724, "step": 11024 }, { "epoch": 0.3783459162663006, "grad_norm": 0.828421937006471, "learning_rate": 7.142162452898525e-06, "loss": 0.2936, "step": 11025 }, { "epoch": 0.3783802333562114, "grad_norm": 0.6727920928742308, "learning_rate": 7.141660288457113e-06, "loss": 0.2856, "step": 11026 }, { "epoch": 0.37841455044612216, "grad_norm": 0.7945093853964371, "learning_rate": 7.141158097557985e-06, "loss": 0.3619, "step": 11027 }, { "epoch": 0.37844886753603296, "grad_norm": 0.7771362332966915, "learning_rate": 7.1406558802073445e-06, "loss": 0.3398, "step": 11028 }, { "epoch": 0.3784831846259437, "grad_norm": 0.6903670395903311, "learning_rate": 7.140153636411398e-06, "loss": 0.292, "step": 11029 }, { "epoch": 0.3785175017158545, "grad_norm": 0.6734819022631134, "learning_rate": 7.139651366176346e-06, "loss": 0.2695, "step": 11030 }, { "epoch": 0.37855181880576527, "grad_norm": 0.7856883152794655, "learning_rate": 7.139149069508397e-06, "loss": 0.2901, "step": 11031 }, { "epoch": 0.378586135895676, "grad_norm": 0.7905635677566912, "learning_rate": 7.138646746413756e-06, "loss": 0.3136, "step": 11032 }, { "epoch": 0.37862045298558683, "grad_norm": 0.7841127219721027, "learning_rate": 7.1381443968986266e-06, "loss": 0.308, "step": 11033 }, { "epoch": 0.3786547700754976, "grad_norm": 0.6795672912291958, "learning_rate": 7.137642020969216e-06, "loss": 0.3, "step": 11034 }, { "epoch": 0.3786890871654084, "grad_norm": 0.8112430799815746, "learning_rate": 7.137139618631731e-06, "loss": 0.2499, "step": 11035 }, { "epoch": 0.37872340425531914, "grad_norm": 0.7298643363171226, "learning_rate": 7.136637189892379e-06, "loss": 0.3309, "step": 11036 }, { "epoch": 0.37875772134522995, "grad_norm": 0.7450186971502937, "learning_rate": 7.136134734757364e-06, "loss": 0.2505, "step": 11037 }, { "epoch": 0.3787920384351407, "grad_norm": 0.7450121659597495, "learning_rate": 7.135632253232895e-06, "loss": 0.277, "step": 11038 }, { "epoch": 0.37882635552505145, "grad_norm": 0.8441832379773699, "learning_rate": 7.13512974532518e-06, "loss": 0.2945, "step": 11039 }, { "epoch": 0.37886067261496226, "grad_norm": 0.8694962895248379, "learning_rate": 7.1346272110404255e-06, "loss": 0.2646, "step": 11040 }, { "epoch": 0.378894989704873, "grad_norm": 0.7889359972024513, "learning_rate": 7.13412465038484e-06, "loss": 0.2926, "step": 11041 }, { "epoch": 0.3789293067947838, "grad_norm": 0.7932626233802661, "learning_rate": 7.133622063364635e-06, "loss": 0.289, "step": 11042 }, { "epoch": 0.37896362388469457, "grad_norm": 0.7418073591154606, "learning_rate": 7.133119449986016e-06, "loss": 0.2564, "step": 11043 }, { "epoch": 0.3789979409746054, "grad_norm": 0.9790395593211034, "learning_rate": 7.1326168102551914e-06, "loss": 0.2639, "step": 11044 }, { "epoch": 0.3790322580645161, "grad_norm": 0.7923965630452209, "learning_rate": 7.1321141441783745e-06, "loss": 0.3016, "step": 11045 }, { "epoch": 0.3790665751544269, "grad_norm": 0.7908240633810497, "learning_rate": 7.131611451761772e-06, "loss": 0.2784, "step": 11046 }, { "epoch": 0.3791008922443377, "grad_norm": 0.7562559462515694, "learning_rate": 7.1311087330115955e-06, "loss": 0.3188, "step": 11047 }, { "epoch": 0.37913520933424844, "grad_norm": 0.9051271951285031, "learning_rate": 7.130605987934055e-06, "loss": 0.2798, "step": 11048 }, { "epoch": 0.37916952642415924, "grad_norm": 0.8728340042931815, "learning_rate": 7.130103216535363e-06, "loss": 0.3246, "step": 11049 }, { "epoch": 0.37920384351407, "grad_norm": 0.7623371962833518, "learning_rate": 7.129600418821729e-06, "loss": 0.3291, "step": 11050 }, { "epoch": 0.3792381606039808, "grad_norm": 0.7538834624138945, "learning_rate": 7.129097594799363e-06, "loss": 0.3093, "step": 11051 }, { "epoch": 0.37927247769389155, "grad_norm": 0.7621548583107648, "learning_rate": 7.128594744474479e-06, "loss": 0.3235, "step": 11052 }, { "epoch": 0.37930679478380236, "grad_norm": 0.8250490283179601, "learning_rate": 7.128091867853288e-06, "loss": 0.2818, "step": 11053 }, { "epoch": 0.3793411118737131, "grad_norm": 0.8520065390878065, "learning_rate": 7.127588964942005e-06, "loss": 0.2884, "step": 11054 }, { "epoch": 0.37937542896362386, "grad_norm": 0.778359155636318, "learning_rate": 7.127086035746841e-06, "loss": 0.2856, "step": 11055 }, { "epoch": 0.37940974605353467, "grad_norm": 0.7914709844327689, "learning_rate": 7.126583080274008e-06, "loss": 0.3186, "step": 11056 }, { "epoch": 0.3794440631434454, "grad_norm": 0.7316724642578374, "learning_rate": 7.12608009852972e-06, "loss": 0.261, "step": 11057 }, { "epoch": 0.3794783802333562, "grad_norm": 0.7477503271676815, "learning_rate": 7.125577090520192e-06, "loss": 0.2988, "step": 11058 }, { "epoch": 0.379512697323267, "grad_norm": 0.8369145211501012, "learning_rate": 7.125074056251637e-06, "loss": 0.2967, "step": 11059 }, { "epoch": 0.3795470144131778, "grad_norm": 0.6905420470920375, "learning_rate": 7.124570995730269e-06, "loss": 0.3021, "step": 11060 }, { "epoch": 0.37958133150308854, "grad_norm": 0.7170471445789093, "learning_rate": 7.124067908962304e-06, "loss": 0.3057, "step": 11061 }, { "epoch": 0.3796156485929993, "grad_norm": 0.7535551581736663, "learning_rate": 7.123564795953956e-06, "loss": 0.2712, "step": 11062 }, { "epoch": 0.3796499656829101, "grad_norm": 0.9641671636941891, "learning_rate": 7.123061656711441e-06, "loss": 0.3427, "step": 11063 }, { "epoch": 0.37968428277282085, "grad_norm": 0.6346823364958046, "learning_rate": 7.122558491240974e-06, "loss": 0.2901, "step": 11064 }, { "epoch": 0.37971859986273165, "grad_norm": 0.8288117134133354, "learning_rate": 7.122055299548772e-06, "loss": 0.3044, "step": 11065 }, { "epoch": 0.3797529169526424, "grad_norm": 0.7354564514685535, "learning_rate": 7.121552081641051e-06, "loss": 0.3063, "step": 11066 }, { "epoch": 0.3797872340425532, "grad_norm": 0.7566702314126453, "learning_rate": 7.121048837524027e-06, "loss": 0.3123, "step": 11067 }, { "epoch": 0.37982155113246396, "grad_norm": 0.6746135477072616, "learning_rate": 7.120545567203918e-06, "loss": 0.304, "step": 11068 }, { "epoch": 0.3798558682223747, "grad_norm": 0.7708737537779224, "learning_rate": 7.12004227068694e-06, "loss": 0.2954, "step": 11069 }, { "epoch": 0.3798901853122855, "grad_norm": 0.7931879055658843, "learning_rate": 7.119538947979312e-06, "loss": 0.2799, "step": 11070 }, { "epoch": 0.3799245024021963, "grad_norm": 0.6783789919011035, "learning_rate": 7.119035599087253e-06, "loss": 0.2848, "step": 11071 }, { "epoch": 0.3799588194921071, "grad_norm": 0.7330035348554231, "learning_rate": 7.118532224016978e-06, "loss": 0.3126, "step": 11072 }, { "epoch": 0.37999313658201783, "grad_norm": 0.8177797282302443, "learning_rate": 7.118028822774707e-06, "loss": 0.2782, "step": 11073 }, { "epoch": 0.38002745367192864, "grad_norm": 0.7791570785566215, "learning_rate": 7.117525395366661e-06, "loss": 0.3055, "step": 11074 }, { "epoch": 0.3800617707618394, "grad_norm": 0.7316803465383973, "learning_rate": 7.117021941799055e-06, "loss": 0.2622, "step": 11075 }, { "epoch": 0.3800960878517502, "grad_norm": 0.7857265218890915, "learning_rate": 7.116518462078112e-06, "loss": 0.3443, "step": 11076 }, { "epoch": 0.38013040494166095, "grad_norm": 0.9300693909532749, "learning_rate": 7.116014956210053e-06, "loss": 0.3106, "step": 11077 }, { "epoch": 0.3801647220315717, "grad_norm": 0.8875025308809602, "learning_rate": 7.115511424201095e-06, "loss": 0.3001, "step": 11078 }, { "epoch": 0.3801990391214825, "grad_norm": 0.7494298362744051, "learning_rate": 7.115007866057459e-06, "loss": 0.2555, "step": 11079 }, { "epoch": 0.38023335621139326, "grad_norm": 0.7570560191599164, "learning_rate": 7.114504281785367e-06, "loss": 0.2653, "step": 11080 }, { "epoch": 0.38026767330130407, "grad_norm": 0.8525499980369047, "learning_rate": 7.114000671391042e-06, "loss": 0.2929, "step": 11081 }, { "epoch": 0.3803019903912148, "grad_norm": 0.7161907263555652, "learning_rate": 7.113497034880701e-06, "loss": 0.262, "step": 11082 }, { "epoch": 0.3803363074811256, "grad_norm": 0.840847611432703, "learning_rate": 7.112993372260567e-06, "loss": 0.3161, "step": 11083 }, { "epoch": 0.3803706245710364, "grad_norm": 0.784926032632318, "learning_rate": 7.1124896835368665e-06, "loss": 0.2597, "step": 11084 }, { "epoch": 0.3804049416609471, "grad_norm": 0.739536389202277, "learning_rate": 7.111985968715818e-06, "loss": 0.3031, "step": 11085 }, { "epoch": 0.38043925875085793, "grad_norm": 0.8199751689234396, "learning_rate": 7.111482227803643e-06, "loss": 0.3362, "step": 11086 }, { "epoch": 0.3804735758407687, "grad_norm": 0.7399916912939288, "learning_rate": 7.110978460806571e-06, "loss": 0.2715, "step": 11087 }, { "epoch": 0.3805078929306795, "grad_norm": 0.7285550621625716, "learning_rate": 7.110474667730818e-06, "loss": 0.2954, "step": 11088 }, { "epoch": 0.38054221002059024, "grad_norm": 0.6739372595321437, "learning_rate": 7.109970848582613e-06, "loss": 0.2677, "step": 11089 }, { "epoch": 0.38057652711050105, "grad_norm": 0.7172778687957978, "learning_rate": 7.109467003368176e-06, "loss": 0.2786, "step": 11090 }, { "epoch": 0.3806108442004118, "grad_norm": 0.6905147189344502, "learning_rate": 7.108963132093733e-06, "loss": 0.3047, "step": 11091 }, { "epoch": 0.38064516129032255, "grad_norm": 0.7359855447228785, "learning_rate": 7.108459234765513e-06, "loss": 0.3175, "step": 11092 }, { "epoch": 0.38067947838023336, "grad_norm": 0.8084605246603238, "learning_rate": 7.107955311389735e-06, "loss": 0.3015, "step": 11093 }, { "epoch": 0.3807137954701441, "grad_norm": 0.7414334993129539, "learning_rate": 7.107451361972627e-06, "loss": 0.2697, "step": 11094 }, { "epoch": 0.3807481125600549, "grad_norm": 0.8311459187469998, "learning_rate": 7.106947386520414e-06, "loss": 0.3246, "step": 11095 }, { "epoch": 0.38078242964996567, "grad_norm": 0.7817251384387391, "learning_rate": 7.106443385039323e-06, "loss": 0.2985, "step": 11096 }, { "epoch": 0.3808167467398765, "grad_norm": 0.7217630724237102, "learning_rate": 7.105939357535581e-06, "loss": 0.2456, "step": 11097 }, { "epoch": 0.38085106382978723, "grad_norm": 0.8827120047765004, "learning_rate": 7.105435304015411e-06, "loss": 0.2807, "step": 11098 }, { "epoch": 0.38088538091969804, "grad_norm": 0.7510644840501731, "learning_rate": 7.104931224485044e-06, "loss": 0.2672, "step": 11099 }, { "epoch": 0.3809196980096088, "grad_norm": 0.7778713308917424, "learning_rate": 7.104427118950708e-06, "loss": 0.3156, "step": 11100 }, { "epoch": 0.38095401509951954, "grad_norm": 0.7413900004748891, "learning_rate": 7.103922987418625e-06, "loss": 0.2756, "step": 11101 }, { "epoch": 0.38098833218943035, "grad_norm": 0.7620682711441195, "learning_rate": 7.103418829895028e-06, "loss": 0.2597, "step": 11102 }, { "epoch": 0.3810226492793411, "grad_norm": 0.8513427656866719, "learning_rate": 7.102914646386144e-06, "loss": 0.2697, "step": 11103 }, { "epoch": 0.3810569663692519, "grad_norm": 0.7136747209379811, "learning_rate": 7.102410436898201e-06, "loss": 0.3013, "step": 11104 }, { "epoch": 0.38109128345916266, "grad_norm": 0.7770870120512432, "learning_rate": 7.101906201437426e-06, "loss": 0.2733, "step": 11105 }, { "epoch": 0.38112560054907346, "grad_norm": 0.7386830531192333, "learning_rate": 7.101401940010053e-06, "loss": 0.2938, "step": 11106 }, { "epoch": 0.3811599176389842, "grad_norm": 0.7592974241312009, "learning_rate": 7.10089765262231e-06, "loss": 0.2637, "step": 11107 }, { "epoch": 0.38119423472889497, "grad_norm": 0.8607594288990759, "learning_rate": 7.1003933392804224e-06, "loss": 0.3261, "step": 11108 }, { "epoch": 0.38122855181880577, "grad_norm": 0.9074543263816496, "learning_rate": 7.099888999990627e-06, "loss": 0.3252, "step": 11109 }, { "epoch": 0.3812628689087165, "grad_norm": 0.758028099634556, "learning_rate": 7.09938463475915e-06, "loss": 0.3027, "step": 11110 }, { "epoch": 0.38129718599862733, "grad_norm": 0.7876243793350142, "learning_rate": 7.098880243592223e-06, "loss": 0.2731, "step": 11111 }, { "epoch": 0.3813315030885381, "grad_norm": 0.7406987156840278, "learning_rate": 7.098375826496079e-06, "loss": 0.2857, "step": 11112 }, { "epoch": 0.3813658201784489, "grad_norm": 0.8553672231651758, "learning_rate": 7.097871383476947e-06, "loss": 0.3541, "step": 11113 }, { "epoch": 0.38140013726835964, "grad_norm": 0.7350822530966804, "learning_rate": 7.097366914541059e-06, "loss": 0.2605, "step": 11114 }, { "epoch": 0.3814344543582704, "grad_norm": 0.8114137985432739, "learning_rate": 7.09686241969465e-06, "loss": 0.312, "step": 11115 }, { "epoch": 0.3814687714481812, "grad_norm": 0.8128834733037857, "learning_rate": 7.096357898943951e-06, "loss": 0.3129, "step": 11116 }, { "epoch": 0.38150308853809195, "grad_norm": 0.7651077009503365, "learning_rate": 7.095853352295193e-06, "loss": 0.286, "step": 11117 }, { "epoch": 0.38153740562800276, "grad_norm": 0.8143321993671601, "learning_rate": 7.09534877975461e-06, "loss": 0.2688, "step": 11118 }, { "epoch": 0.3815717227179135, "grad_norm": 0.8160220039960833, "learning_rate": 7.0948441813284365e-06, "loss": 0.3032, "step": 11119 }, { "epoch": 0.3816060398078243, "grad_norm": 0.6784951979372991, "learning_rate": 7.094339557022905e-06, "loss": 0.2681, "step": 11120 }, { "epoch": 0.38164035689773507, "grad_norm": 0.7944947513800533, "learning_rate": 7.09383490684425e-06, "loss": 0.3203, "step": 11121 }, { "epoch": 0.3816746739876459, "grad_norm": 0.6953766327137244, "learning_rate": 7.093330230798707e-06, "loss": 0.2434, "step": 11122 }, { "epoch": 0.3817089910775566, "grad_norm": 0.8177781590283415, "learning_rate": 7.092825528892509e-06, "loss": 0.3418, "step": 11123 }, { "epoch": 0.3817433081674674, "grad_norm": 0.8402625482015283, "learning_rate": 7.092320801131891e-06, "loss": 0.3258, "step": 11124 }, { "epoch": 0.3817776252573782, "grad_norm": 0.8061656350716835, "learning_rate": 7.09181604752309e-06, "loss": 0.3069, "step": 11125 }, { "epoch": 0.38181194234728894, "grad_norm": 0.8542295597238643, "learning_rate": 7.091311268072339e-06, "loss": 0.2747, "step": 11126 }, { "epoch": 0.38184625943719974, "grad_norm": 0.7660599162255823, "learning_rate": 7.090806462785878e-06, "loss": 0.2713, "step": 11127 }, { "epoch": 0.3818805765271105, "grad_norm": 0.8059409611517143, "learning_rate": 7.090301631669937e-06, "loss": 0.3023, "step": 11128 }, { "epoch": 0.3819148936170213, "grad_norm": 0.741897429298014, "learning_rate": 7.08979677473076e-06, "loss": 0.3262, "step": 11129 }, { "epoch": 0.38194921070693205, "grad_norm": 0.7321184601952548, "learning_rate": 7.089291891974578e-06, "loss": 0.3159, "step": 11130 }, { "epoch": 0.3819835277968428, "grad_norm": 0.7764505088588556, "learning_rate": 7.088786983407632e-06, "loss": 0.3073, "step": 11131 }, { "epoch": 0.3820178448867536, "grad_norm": 0.7154513947210459, "learning_rate": 7.088282049036158e-06, "loss": 0.2734, "step": 11132 }, { "epoch": 0.38205216197666436, "grad_norm": 0.6767501280153886, "learning_rate": 7.087777088866394e-06, "loss": 0.2615, "step": 11133 }, { "epoch": 0.38208647906657517, "grad_norm": 0.6945922699055325, "learning_rate": 7.087272102904577e-06, "loss": 0.255, "step": 11134 }, { "epoch": 0.3821207961564859, "grad_norm": 0.7497555042295839, "learning_rate": 7.086767091156947e-06, "loss": 0.2529, "step": 11135 }, { "epoch": 0.3821551132463967, "grad_norm": 0.7307667237941898, "learning_rate": 7.0862620536297424e-06, "loss": 0.3182, "step": 11136 }, { "epoch": 0.3821894303363075, "grad_norm": 0.8542145708848083, "learning_rate": 7.085756990329204e-06, "loss": 0.2836, "step": 11137 }, { "epoch": 0.38222374742621823, "grad_norm": 0.7680067168762096, "learning_rate": 7.0852519012615685e-06, "loss": 0.3404, "step": 11138 }, { "epoch": 0.38225806451612904, "grad_norm": 0.7360670178948051, "learning_rate": 7.084746786433076e-06, "loss": 0.3247, "step": 11139 }, { "epoch": 0.3822923816060398, "grad_norm": 0.7347174518046793, "learning_rate": 7.08424164584997e-06, "loss": 0.2912, "step": 11140 }, { "epoch": 0.3823266986959506, "grad_norm": 0.7659019345714391, "learning_rate": 7.083736479518486e-06, "loss": 0.3041, "step": 11141 }, { "epoch": 0.38236101578586135, "grad_norm": 0.7438250011927934, "learning_rate": 7.083231287444868e-06, "loss": 0.3272, "step": 11142 }, { "epoch": 0.38239533287577215, "grad_norm": 0.8295400585106605, "learning_rate": 7.082726069635357e-06, "loss": 0.3117, "step": 11143 }, { "epoch": 0.3824296499656829, "grad_norm": 0.7641228301928013, "learning_rate": 7.082220826096193e-06, "loss": 0.2949, "step": 11144 }, { "epoch": 0.3824639670555937, "grad_norm": 0.6831756435848145, "learning_rate": 7.08171555683362e-06, "loss": 0.2727, "step": 11145 }, { "epoch": 0.38249828414550446, "grad_norm": 0.7696697998021017, "learning_rate": 7.081210261853877e-06, "loss": 0.3717, "step": 11146 }, { "epoch": 0.3825326012354152, "grad_norm": 0.745784583184615, "learning_rate": 7.080704941163207e-06, "loss": 0.3119, "step": 11147 }, { "epoch": 0.382566918325326, "grad_norm": 0.7488658627525, "learning_rate": 7.0801995947678544e-06, "loss": 0.2579, "step": 11148 }, { "epoch": 0.3826012354152368, "grad_norm": 0.8167442824409172, "learning_rate": 7.079694222674061e-06, "loss": 0.3846, "step": 11149 }, { "epoch": 0.3826355525051476, "grad_norm": 0.9384155036457971, "learning_rate": 7.079188824888068e-06, "loss": 0.2616, "step": 11150 }, { "epoch": 0.38266986959505833, "grad_norm": 0.7883287483159805, "learning_rate": 7.078683401416124e-06, "loss": 0.2493, "step": 11151 }, { "epoch": 0.38270418668496914, "grad_norm": 0.6987797120345268, "learning_rate": 7.078177952264469e-06, "loss": 0.2908, "step": 11152 }, { "epoch": 0.3827385037748799, "grad_norm": 0.9397099262857712, "learning_rate": 7.0776724774393475e-06, "loss": 0.3342, "step": 11153 }, { "epoch": 0.38277282086479064, "grad_norm": 0.7685761262184796, "learning_rate": 7.077166976947007e-06, "loss": 0.2796, "step": 11154 }, { "epoch": 0.38280713795470145, "grad_norm": 0.6965951260429072, "learning_rate": 7.076661450793689e-06, "loss": 0.2457, "step": 11155 }, { "epoch": 0.3828414550446122, "grad_norm": 0.8177406320202212, "learning_rate": 7.07615589898564e-06, "loss": 0.2958, "step": 11156 }, { "epoch": 0.382875772134523, "grad_norm": 0.8454568186911935, "learning_rate": 7.075650321529104e-06, "loss": 0.3245, "step": 11157 }, { "epoch": 0.38291008922443376, "grad_norm": 0.782779722522072, "learning_rate": 7.075144718430329e-06, "loss": 0.2798, "step": 11158 }, { "epoch": 0.38294440631434457, "grad_norm": 0.7651347638202545, "learning_rate": 7.07463908969556e-06, "loss": 0.3257, "step": 11159 }, { "epoch": 0.3829787234042553, "grad_norm": 0.820656011833615, "learning_rate": 7.074133435331045e-06, "loss": 0.3066, "step": 11160 }, { "epoch": 0.38301304049416607, "grad_norm": 0.8169075480761644, "learning_rate": 7.073627755343031e-06, "loss": 0.3196, "step": 11161 }, { "epoch": 0.3830473575840769, "grad_norm": 0.7215348727899611, "learning_rate": 7.073122049737761e-06, "loss": 0.2978, "step": 11162 }, { "epoch": 0.3830816746739876, "grad_norm": 0.8192617060489705, "learning_rate": 7.072616318521487e-06, "loss": 0.2995, "step": 11163 }, { "epoch": 0.38311599176389843, "grad_norm": 0.748626775753727, "learning_rate": 7.072110561700454e-06, "loss": 0.2727, "step": 11164 }, { "epoch": 0.3831503088538092, "grad_norm": 0.7835723445712538, "learning_rate": 7.071604779280911e-06, "loss": 0.3981, "step": 11165 }, { "epoch": 0.38318462594372, "grad_norm": 0.6887250124419673, "learning_rate": 7.0710989712691055e-06, "loss": 0.2746, "step": 11166 }, { "epoch": 0.38321894303363074, "grad_norm": 0.8265846075023257, "learning_rate": 7.070593137671288e-06, "loss": 0.3372, "step": 11167 }, { "epoch": 0.38325326012354155, "grad_norm": 0.7430123848982324, "learning_rate": 7.070087278493706e-06, "loss": 0.3273, "step": 11168 }, { "epoch": 0.3832875772134523, "grad_norm": 0.7177938330915429, "learning_rate": 7.069581393742608e-06, "loss": 0.2685, "step": 11169 }, { "epoch": 0.38332189430336305, "grad_norm": 0.8173883347224065, "learning_rate": 7.069075483424247e-06, "loss": 0.3275, "step": 11170 }, { "epoch": 0.38335621139327386, "grad_norm": 0.8504739409152194, "learning_rate": 7.068569547544869e-06, "loss": 0.3053, "step": 11171 }, { "epoch": 0.3833905284831846, "grad_norm": 0.7361554375215116, "learning_rate": 7.068063586110725e-06, "loss": 0.3156, "step": 11172 }, { "epoch": 0.3834248455730954, "grad_norm": 0.7092160876429731, "learning_rate": 7.06755759912807e-06, "loss": 0.3044, "step": 11173 }, { "epoch": 0.38345916266300617, "grad_norm": 0.7227084367345487, "learning_rate": 7.0670515866031484e-06, "loss": 0.2693, "step": 11174 }, { "epoch": 0.383493479752917, "grad_norm": 0.7471787165493312, "learning_rate": 7.066545548542215e-06, "loss": 0.3518, "step": 11175 }, { "epoch": 0.38352779684282773, "grad_norm": 0.8249867871325707, "learning_rate": 7.0660394849515215e-06, "loss": 0.3023, "step": 11176 }, { "epoch": 0.3835621139327385, "grad_norm": 0.8075645727155427, "learning_rate": 7.065533395837319e-06, "loss": 0.3061, "step": 11177 }, { "epoch": 0.3835964310226493, "grad_norm": 0.9619776746849514, "learning_rate": 7.065027281205858e-06, "loss": 0.3395, "step": 11178 }, { "epoch": 0.38363074811256004, "grad_norm": 0.8707257964688286, "learning_rate": 7.064521141063394e-06, "loss": 0.3331, "step": 11179 }, { "epoch": 0.38366506520247085, "grad_norm": 0.7390727207036232, "learning_rate": 7.064014975416178e-06, "loss": 0.2695, "step": 11180 }, { "epoch": 0.3836993822923816, "grad_norm": 0.8653822298593967, "learning_rate": 7.0635087842704634e-06, "loss": 0.3021, "step": 11181 }, { "epoch": 0.3837336993822924, "grad_norm": 0.7381087968979378, "learning_rate": 7.063002567632503e-06, "loss": 0.2815, "step": 11182 }, { "epoch": 0.38376801647220316, "grad_norm": 0.7565745286433003, "learning_rate": 7.0624963255085525e-06, "loss": 0.2974, "step": 11183 }, { "epoch": 0.3838023335621139, "grad_norm": 0.7386481690921768, "learning_rate": 7.0619900579048626e-06, "loss": 0.3007, "step": 11184 }, { "epoch": 0.3838366506520247, "grad_norm": 0.9239570661161473, "learning_rate": 7.061483764827691e-06, "loss": 0.367, "step": 11185 }, { "epoch": 0.38387096774193546, "grad_norm": 0.8088607809205073, "learning_rate": 7.0609774462832905e-06, "loss": 0.2922, "step": 11186 }, { "epoch": 0.38390528483184627, "grad_norm": 0.7472272592038769, "learning_rate": 7.060471102277916e-06, "loss": 0.3201, "step": 11187 }, { "epoch": 0.383939601921757, "grad_norm": 0.7408549328452034, "learning_rate": 7.0599647328178236e-06, "loss": 0.2987, "step": 11188 }, { "epoch": 0.38397391901166783, "grad_norm": 0.7714165136366924, "learning_rate": 7.059458337909269e-06, "loss": 0.3148, "step": 11189 }, { "epoch": 0.3840082361015786, "grad_norm": 0.7379213096151701, "learning_rate": 7.058951917558509e-06, "loss": 0.3428, "step": 11190 }, { "epoch": 0.3840425531914894, "grad_norm": 0.7542691728046029, "learning_rate": 7.0584454717717964e-06, "loss": 0.3208, "step": 11191 }, { "epoch": 0.38407687028140014, "grad_norm": 0.8089217095918434, "learning_rate": 7.057939000555391e-06, "loss": 0.2609, "step": 11192 }, { "epoch": 0.3841111873713109, "grad_norm": 0.7885169384107461, "learning_rate": 7.057432503915549e-06, "loss": 0.3022, "step": 11193 }, { "epoch": 0.3841455044612217, "grad_norm": 0.7458884937611506, "learning_rate": 7.056925981858525e-06, "loss": 0.236, "step": 11194 }, { "epoch": 0.38417982155113245, "grad_norm": 0.76941315513767, "learning_rate": 7.056419434390579e-06, "loss": 0.3213, "step": 11195 }, { "epoch": 0.38421413864104326, "grad_norm": 0.779851613347821, "learning_rate": 7.05591286151797e-06, "loss": 0.2798, "step": 11196 }, { "epoch": 0.384248455730954, "grad_norm": 0.8649878150596121, "learning_rate": 7.055406263246955e-06, "loss": 0.3075, "step": 11197 }, { "epoch": 0.3842827728208648, "grad_norm": 0.8420800602621706, "learning_rate": 7.0548996395837896e-06, "loss": 0.3328, "step": 11198 }, { "epoch": 0.38431708991077557, "grad_norm": 0.7753265975039283, "learning_rate": 7.054392990534738e-06, "loss": 0.2839, "step": 11199 }, { "epoch": 0.3843514070006863, "grad_norm": 0.7814612494063453, "learning_rate": 7.053886316106055e-06, "loss": 0.3174, "step": 11200 }, { "epoch": 0.3843857240905971, "grad_norm": 0.8293269113492681, "learning_rate": 7.053379616303999e-06, "loss": 0.321, "step": 11201 }, { "epoch": 0.3844200411805079, "grad_norm": 0.7289034457789475, "learning_rate": 7.052872891134834e-06, "loss": 0.2938, "step": 11202 }, { "epoch": 0.3844543582704187, "grad_norm": 0.8312960245967599, "learning_rate": 7.052366140604817e-06, "loss": 0.2825, "step": 11203 }, { "epoch": 0.38448867536032943, "grad_norm": 0.792323986210724, "learning_rate": 7.051859364720209e-06, "loss": 0.307, "step": 11204 }, { "epoch": 0.38452299245024024, "grad_norm": 0.8937470354694751, "learning_rate": 7.051352563487272e-06, "loss": 0.2605, "step": 11205 }, { "epoch": 0.384557309540151, "grad_norm": 0.8076019324971165, "learning_rate": 7.0508457369122666e-06, "loss": 0.2942, "step": 11206 }, { "epoch": 0.38459162663006174, "grad_norm": 0.6669251503132286, "learning_rate": 7.0503388850014505e-06, "loss": 0.2841, "step": 11207 }, { "epoch": 0.38462594371997255, "grad_norm": 0.7357022348555987, "learning_rate": 7.049832007761088e-06, "loss": 0.2518, "step": 11208 }, { "epoch": 0.3846602608098833, "grad_norm": 0.8203805697619823, "learning_rate": 7.049325105197442e-06, "loss": 0.3219, "step": 11209 }, { "epoch": 0.3846945778997941, "grad_norm": 0.8490891081674189, "learning_rate": 7.048818177316774e-06, "loss": 0.2644, "step": 11210 }, { "epoch": 0.38472889498970486, "grad_norm": 0.7170539670743258, "learning_rate": 7.048311224125346e-06, "loss": 0.2541, "step": 11211 }, { "epoch": 0.38476321207961567, "grad_norm": 0.8732426220154544, "learning_rate": 7.0478042456294214e-06, "loss": 0.341, "step": 11212 }, { "epoch": 0.3847975291695264, "grad_norm": 0.9414204742774901, "learning_rate": 7.047297241835262e-06, "loss": 0.3246, "step": 11213 }, { "epoch": 0.3848318462594372, "grad_norm": 0.7054724520368163, "learning_rate": 7.046790212749132e-06, "loss": 0.2527, "step": 11214 }, { "epoch": 0.384866163349348, "grad_norm": 0.7447795724061341, "learning_rate": 7.046283158377296e-06, "loss": 0.3053, "step": 11215 }, { "epoch": 0.38490048043925873, "grad_norm": 0.8419812199624999, "learning_rate": 7.045776078726015e-06, "loss": 0.3171, "step": 11216 }, { "epoch": 0.38493479752916954, "grad_norm": 0.7068575114313019, "learning_rate": 7.0452689738015576e-06, "loss": 0.2861, "step": 11217 }, { "epoch": 0.3849691146190803, "grad_norm": 0.7255619838355808, "learning_rate": 7.044761843610187e-06, "loss": 0.3109, "step": 11218 }, { "epoch": 0.3850034317089911, "grad_norm": 0.7900151212205354, "learning_rate": 7.044254688158168e-06, "loss": 0.3107, "step": 11219 }, { "epoch": 0.38503774879890185, "grad_norm": 0.8137000690889776, "learning_rate": 7.043747507451764e-06, "loss": 0.3078, "step": 11220 }, { "epoch": 0.38507206588881265, "grad_norm": 0.7626363930848755, "learning_rate": 7.043240301497243e-06, "loss": 0.3527, "step": 11221 }, { "epoch": 0.3851063829787234, "grad_norm": 0.7506832908129069, "learning_rate": 7.0427330703008714e-06, "loss": 0.253, "step": 11222 }, { "epoch": 0.38514070006863416, "grad_norm": 0.7115591795463734, "learning_rate": 7.042225813868912e-06, "loss": 0.2919, "step": 11223 }, { "epoch": 0.38517501715854496, "grad_norm": 0.7029872055478752, "learning_rate": 7.0417185322076355e-06, "loss": 0.3136, "step": 11224 }, { "epoch": 0.3852093342484557, "grad_norm": 0.7755475415483088, "learning_rate": 7.0412112253233065e-06, "loss": 0.3309, "step": 11225 }, { "epoch": 0.3852436513383665, "grad_norm": 0.7470595919616663, "learning_rate": 7.040703893222193e-06, "loss": 0.2922, "step": 11226 }, { "epoch": 0.3852779684282773, "grad_norm": 0.8252097526936233, "learning_rate": 7.040196535910562e-06, "loss": 0.3328, "step": 11227 }, { "epoch": 0.3853122855181881, "grad_norm": 0.7655737135073095, "learning_rate": 7.0396891533946806e-06, "loss": 0.3052, "step": 11228 }, { "epoch": 0.38534660260809883, "grad_norm": 0.7035847321921702, "learning_rate": 7.0391817456808184e-06, "loss": 0.2878, "step": 11229 }, { "epoch": 0.3853809196980096, "grad_norm": 0.7657835552615224, "learning_rate": 7.0386743127752436e-06, "loss": 0.348, "step": 11230 }, { "epoch": 0.3854152367879204, "grad_norm": 0.7288911031627268, "learning_rate": 7.038166854684223e-06, "loss": 0.2926, "step": 11231 }, { "epoch": 0.38544955387783114, "grad_norm": 0.7492808433582449, "learning_rate": 7.0376593714140285e-06, "loss": 0.2763, "step": 11232 }, { "epoch": 0.38548387096774195, "grad_norm": 0.7226551417766782, "learning_rate": 7.037151862970928e-06, "loss": 0.2836, "step": 11233 }, { "epoch": 0.3855181880576527, "grad_norm": 0.8798527695577583, "learning_rate": 7.036644329361192e-06, "loss": 0.261, "step": 11234 }, { "epoch": 0.3855525051475635, "grad_norm": 0.7104556294382769, "learning_rate": 7.036136770591089e-06, "loss": 0.2655, "step": 11235 }, { "epoch": 0.38558682223747426, "grad_norm": 0.7867870065586723, "learning_rate": 7.03562918666689e-06, "loss": 0.3384, "step": 11236 }, { "epoch": 0.38562113932738507, "grad_norm": 0.7502504970816662, "learning_rate": 7.035121577594866e-06, "loss": 0.3277, "step": 11237 }, { "epoch": 0.3856554564172958, "grad_norm": 0.7303423571089228, "learning_rate": 7.0346139433812875e-06, "loss": 0.2782, "step": 11238 }, { "epoch": 0.38568977350720657, "grad_norm": 0.7188094905735529, "learning_rate": 7.034106284032425e-06, "loss": 0.3258, "step": 11239 }, { "epoch": 0.3857240905971174, "grad_norm": 0.7146190709618195, "learning_rate": 7.033598599554552e-06, "loss": 0.2562, "step": 11240 }, { "epoch": 0.3857584076870281, "grad_norm": 0.7517479248314646, "learning_rate": 7.0330908899539394e-06, "loss": 0.2968, "step": 11241 }, { "epoch": 0.38579272477693893, "grad_norm": 0.8437951055701072, "learning_rate": 7.032583155236859e-06, "loss": 0.2802, "step": 11242 }, { "epoch": 0.3858270418668497, "grad_norm": 0.7206972335916565, "learning_rate": 7.032075395409583e-06, "loss": 0.3109, "step": 11243 }, { "epoch": 0.3858613589567605, "grad_norm": 0.6999325488878537, "learning_rate": 7.0315676104783845e-06, "loss": 0.2674, "step": 11244 }, { "epoch": 0.38589567604667124, "grad_norm": 0.7128797544391478, "learning_rate": 7.0310598004495375e-06, "loss": 0.2633, "step": 11245 }, { "epoch": 0.385929993136582, "grad_norm": 0.8297020166642315, "learning_rate": 7.030551965329314e-06, "loss": 0.2931, "step": 11246 }, { "epoch": 0.3859643102264928, "grad_norm": 0.8228070101515297, "learning_rate": 7.0300441051239885e-06, "loss": 0.3343, "step": 11247 }, { "epoch": 0.38599862731640355, "grad_norm": 0.8317351015587349, "learning_rate": 7.029536219839835e-06, "loss": 0.2875, "step": 11248 }, { "epoch": 0.38603294440631436, "grad_norm": 0.7763035442978823, "learning_rate": 7.029028309483128e-06, "loss": 0.3093, "step": 11249 }, { "epoch": 0.3860672614962251, "grad_norm": 0.8126950665902104, "learning_rate": 7.028520374060143e-06, "loss": 0.3398, "step": 11250 }, { "epoch": 0.3861015785861359, "grad_norm": 0.7338248909632766, "learning_rate": 7.0280124135771525e-06, "loss": 0.2613, "step": 11251 }, { "epoch": 0.38613589567604667, "grad_norm": 0.7937281544812651, "learning_rate": 7.027504428040433e-06, "loss": 0.2834, "step": 11252 }, { "epoch": 0.3861702127659574, "grad_norm": 0.702664521493463, "learning_rate": 7.026996417456259e-06, "loss": 0.2779, "step": 11253 }, { "epoch": 0.38620452985586823, "grad_norm": 0.7247292543119394, "learning_rate": 7.0264883818309085e-06, "loss": 0.2664, "step": 11254 }, { "epoch": 0.386238846945779, "grad_norm": 0.8033971766817608, "learning_rate": 7.025980321170657e-06, "loss": 0.326, "step": 11255 }, { "epoch": 0.3862731640356898, "grad_norm": 0.8862449166362625, "learning_rate": 7.0254722354817806e-06, "loss": 0.2985, "step": 11256 }, { "epoch": 0.38630748112560054, "grad_norm": 0.8538149211685186, "learning_rate": 7.024964124770557e-06, "loss": 0.3261, "step": 11257 }, { "epoch": 0.38634179821551135, "grad_norm": 0.8417906275689956, "learning_rate": 7.02445598904326e-06, "loss": 0.3093, "step": 11258 }, { "epoch": 0.3863761153054221, "grad_norm": 0.8745720221203543, "learning_rate": 7.023947828306172e-06, "loss": 0.3519, "step": 11259 }, { "epoch": 0.3864104323953329, "grad_norm": 0.730734557094756, "learning_rate": 7.023439642565568e-06, "loss": 0.27, "step": 11260 }, { "epoch": 0.38644474948524365, "grad_norm": 0.8083272599181567, "learning_rate": 7.022931431827725e-06, "loss": 0.3117, "step": 11261 }, { "epoch": 0.3864790665751544, "grad_norm": 0.7668581395903435, "learning_rate": 7.022423196098923e-06, "loss": 0.3148, "step": 11262 }, { "epoch": 0.3865133836650652, "grad_norm": 0.8626276732123671, "learning_rate": 7.0219149353854425e-06, "loss": 0.3522, "step": 11263 }, { "epoch": 0.38654770075497596, "grad_norm": 1.1754725741651932, "learning_rate": 7.021406649693558e-06, "loss": 0.3067, "step": 11264 }, { "epoch": 0.38658201784488677, "grad_norm": 0.7314899354471017, "learning_rate": 7.020898339029551e-06, "loss": 0.2759, "step": 11265 }, { "epoch": 0.3866163349347975, "grad_norm": 0.6970736470157937, "learning_rate": 7.020390003399703e-06, "loss": 0.2864, "step": 11266 }, { "epoch": 0.38665065202470833, "grad_norm": 0.8188822722263743, "learning_rate": 7.019881642810291e-06, "loss": 0.3006, "step": 11267 }, { "epoch": 0.3866849691146191, "grad_norm": 0.7359284063993325, "learning_rate": 7.0193732572675954e-06, "loss": 0.3058, "step": 11268 }, { "epoch": 0.38671928620452983, "grad_norm": 0.8266149576813825, "learning_rate": 7.018864846777898e-06, "loss": 0.3013, "step": 11269 }, { "epoch": 0.38675360329444064, "grad_norm": 0.7258820661031367, "learning_rate": 7.01835641134748e-06, "loss": 0.2982, "step": 11270 }, { "epoch": 0.3867879203843514, "grad_norm": 0.7339508887352595, "learning_rate": 7.017847950982621e-06, "loss": 0.2767, "step": 11271 }, { "epoch": 0.3868222374742622, "grad_norm": 0.8380433161416421, "learning_rate": 7.0173394656896034e-06, "loss": 0.2906, "step": 11272 }, { "epoch": 0.38685655456417295, "grad_norm": 0.7532994615153877, "learning_rate": 7.0168309554747095e-06, "loss": 0.287, "step": 11273 }, { "epoch": 0.38689087165408376, "grad_norm": 0.7923857356744249, "learning_rate": 7.0163224203442194e-06, "loss": 0.3362, "step": 11274 }, { "epoch": 0.3869251887439945, "grad_norm": 0.7364156049382068, "learning_rate": 7.0158138603044166e-06, "loss": 0.3551, "step": 11275 }, { "epoch": 0.38695950583390526, "grad_norm": 0.8352652941091793, "learning_rate": 7.015305275361583e-06, "loss": 0.32, "step": 11276 }, { "epoch": 0.38699382292381607, "grad_norm": 0.8611682948026702, "learning_rate": 7.014796665522004e-06, "loss": 0.292, "step": 11277 }, { "epoch": 0.3870281400137268, "grad_norm": 0.8157115122627573, "learning_rate": 7.0142880307919605e-06, "loss": 0.3138, "step": 11278 }, { "epoch": 0.3870624571036376, "grad_norm": 0.7554149278801021, "learning_rate": 7.013779371177738e-06, "loss": 0.299, "step": 11279 }, { "epoch": 0.3870967741935484, "grad_norm": 0.7147493755225142, "learning_rate": 7.013270686685617e-06, "loss": 0.2867, "step": 11280 }, { "epoch": 0.3871310912834592, "grad_norm": 0.8449353266764622, "learning_rate": 7.012761977321885e-06, "loss": 0.3592, "step": 11281 }, { "epoch": 0.38716540837336993, "grad_norm": 0.7896500309467945, "learning_rate": 7.012253243092827e-06, "loss": 0.3206, "step": 11282 }, { "epoch": 0.3871997254632807, "grad_norm": 0.6944034768549209, "learning_rate": 7.011744484004723e-06, "loss": 0.2661, "step": 11283 }, { "epoch": 0.3872340425531915, "grad_norm": 0.782045096493831, "learning_rate": 7.011235700063863e-06, "loss": 0.2914, "step": 11284 }, { "epoch": 0.38726835964310224, "grad_norm": 0.8230747097196476, "learning_rate": 7.010726891276532e-06, "loss": 0.2756, "step": 11285 }, { "epoch": 0.38730267673301305, "grad_norm": 0.8139745220313453, "learning_rate": 7.010218057649013e-06, "loss": 0.3402, "step": 11286 }, { "epoch": 0.3873369938229238, "grad_norm": 0.7363006445894898, "learning_rate": 7.009709199187593e-06, "loss": 0.2774, "step": 11287 }, { "epoch": 0.3873713109128346, "grad_norm": 0.8388479068514562, "learning_rate": 7.0092003158985615e-06, "loss": 0.3475, "step": 11288 }, { "epoch": 0.38740562800274536, "grad_norm": 0.6996802837612168, "learning_rate": 7.008691407788201e-06, "loss": 0.2955, "step": 11289 }, { "epoch": 0.38743994509265617, "grad_norm": 0.7367216104201333, "learning_rate": 7.0081824748628e-06, "loss": 0.2985, "step": 11290 }, { "epoch": 0.3874742621825669, "grad_norm": 0.7458164553460969, "learning_rate": 7.007673517128645e-06, "loss": 0.3019, "step": 11291 }, { "epoch": 0.38750857927247767, "grad_norm": 0.7185111065542558, "learning_rate": 7.007164534592026e-06, "loss": 0.3349, "step": 11292 }, { "epoch": 0.3875428963623885, "grad_norm": 0.8394952744645409, "learning_rate": 7.006655527259229e-06, "loss": 0.3195, "step": 11293 }, { "epoch": 0.38757721345229923, "grad_norm": 0.7585409045825058, "learning_rate": 7.006146495136541e-06, "loss": 0.3155, "step": 11294 }, { "epoch": 0.38761153054221004, "grad_norm": 0.7652352143617512, "learning_rate": 7.005637438230255e-06, "loss": 0.282, "step": 11295 }, { "epoch": 0.3876458476321208, "grad_norm": 0.7726592642104264, "learning_rate": 7.005128356546655e-06, "loss": 0.2892, "step": 11296 }, { "epoch": 0.3876801647220316, "grad_norm": 0.8296494671718226, "learning_rate": 7.004619250092031e-06, "loss": 0.3272, "step": 11297 }, { "epoch": 0.38771448181194235, "grad_norm": 0.878891373316242, "learning_rate": 7.0041101188726735e-06, "loss": 0.3039, "step": 11298 }, { "epoch": 0.3877487989018531, "grad_norm": 0.7666720481427532, "learning_rate": 7.003600962894873e-06, "loss": 0.2913, "step": 11299 }, { "epoch": 0.3877831159917639, "grad_norm": 0.8199129028450935, "learning_rate": 7.003091782164918e-06, "loss": 0.3397, "step": 11300 }, { "epoch": 0.38781743308167466, "grad_norm": 0.9426778080801529, "learning_rate": 7.002582576689099e-06, "loss": 0.334, "step": 11301 }, { "epoch": 0.38785175017158546, "grad_norm": 0.7970435263686159, "learning_rate": 7.002073346473709e-06, "loss": 0.3334, "step": 11302 }, { "epoch": 0.3878860672614962, "grad_norm": 0.8669398263141893, "learning_rate": 7.001564091525036e-06, "loss": 0.2747, "step": 11303 }, { "epoch": 0.387920384351407, "grad_norm": 0.7073077666580204, "learning_rate": 7.001054811849372e-06, "loss": 0.3457, "step": 11304 }, { "epoch": 0.3879547014413178, "grad_norm": 0.7506309435562687, "learning_rate": 7.000545507453009e-06, "loss": 0.2921, "step": 11305 }, { "epoch": 0.3879890185312285, "grad_norm": 0.8707595639397286, "learning_rate": 7.000036178342237e-06, "loss": 0.3712, "step": 11306 }, { "epoch": 0.38802333562113933, "grad_norm": 0.8399672212856593, "learning_rate": 6.9995268245233516e-06, "loss": 0.2776, "step": 11307 }, { "epoch": 0.3880576527110501, "grad_norm": 0.7765569804872364, "learning_rate": 6.999017446002644e-06, "loss": 0.2886, "step": 11308 }, { "epoch": 0.3880919698009609, "grad_norm": 0.7557459964442675, "learning_rate": 6.998508042786405e-06, "loss": 0.2796, "step": 11309 }, { "epoch": 0.38812628689087164, "grad_norm": 0.7630368242485647, "learning_rate": 6.9979986148809295e-06, "loss": 0.3338, "step": 11310 }, { "epoch": 0.38816060398078245, "grad_norm": 0.7376665983905996, "learning_rate": 6.9974891622925124e-06, "loss": 0.2786, "step": 11311 }, { "epoch": 0.3881949210706932, "grad_norm": 0.7553310824194115, "learning_rate": 6.996979685027444e-06, "loss": 0.3072, "step": 11312 }, { "epoch": 0.388229238160604, "grad_norm": 0.7300582273362237, "learning_rate": 6.996470183092019e-06, "loss": 0.2757, "step": 11313 }, { "epoch": 0.38826355525051476, "grad_norm": 0.8144527378453488, "learning_rate": 6.9959606564925344e-06, "loss": 0.2885, "step": 11314 }, { "epoch": 0.3882978723404255, "grad_norm": 0.7234299582054846, "learning_rate": 6.995451105235281e-06, "loss": 0.2863, "step": 11315 }, { "epoch": 0.3883321894303363, "grad_norm": 0.7741280926431487, "learning_rate": 6.9949415293265565e-06, "loss": 0.3271, "step": 11316 }, { "epoch": 0.38836650652024707, "grad_norm": 0.7145906087914679, "learning_rate": 6.994431928772656e-06, "loss": 0.3205, "step": 11317 }, { "epoch": 0.3884008236101579, "grad_norm": 0.7720187111462975, "learning_rate": 6.993922303579874e-06, "loss": 0.3134, "step": 11318 }, { "epoch": 0.3884351407000686, "grad_norm": 0.8448405071245524, "learning_rate": 6.993412653754506e-06, "loss": 0.2724, "step": 11319 }, { "epoch": 0.38846945778997943, "grad_norm": 0.7061392833008764, "learning_rate": 6.992902979302849e-06, "loss": 0.287, "step": 11320 }, { "epoch": 0.3885037748798902, "grad_norm": 0.815999235907204, "learning_rate": 6.992393280231199e-06, "loss": 0.3503, "step": 11321 }, { "epoch": 0.38853809196980094, "grad_norm": 0.720529816721315, "learning_rate": 6.9918835565458534e-06, "loss": 0.3254, "step": 11322 }, { "epoch": 0.38857240905971174, "grad_norm": 0.765557789834968, "learning_rate": 6.991373808253108e-06, "loss": 0.3459, "step": 11323 }, { "epoch": 0.3886067261496225, "grad_norm": 1.136457339234002, "learning_rate": 6.990864035359263e-06, "loss": 0.3601, "step": 11324 }, { "epoch": 0.3886410432395333, "grad_norm": 0.8977687708356389, "learning_rate": 6.990354237870613e-06, "loss": 0.2739, "step": 11325 }, { "epoch": 0.38867536032944405, "grad_norm": 0.7468251840406916, "learning_rate": 6.989844415793455e-06, "loss": 0.3258, "step": 11326 }, { "epoch": 0.38870967741935486, "grad_norm": 0.7515446466178849, "learning_rate": 6.989334569134091e-06, "loss": 0.2699, "step": 11327 }, { "epoch": 0.3887439945092656, "grad_norm": 0.750156996605814, "learning_rate": 6.988824697898817e-06, "loss": 0.3246, "step": 11328 }, { "epoch": 0.38877831159917636, "grad_norm": 0.745199345062544, "learning_rate": 6.988314802093933e-06, "loss": 0.3302, "step": 11329 }, { "epoch": 0.38881262868908717, "grad_norm": 0.6768071376563433, "learning_rate": 6.987804881725738e-06, "loss": 0.3037, "step": 11330 }, { "epoch": 0.3888469457789979, "grad_norm": 0.7643710576442452, "learning_rate": 6.98729493680053e-06, "loss": 0.3345, "step": 11331 }, { "epoch": 0.38888126286890873, "grad_norm": 0.8710026260828387, "learning_rate": 6.986784967324611e-06, "loss": 0.3203, "step": 11332 }, { "epoch": 0.3889155799588195, "grad_norm": 0.7999516476843317, "learning_rate": 6.9862749733042814e-06, "loss": 0.3024, "step": 11333 }, { "epoch": 0.3889498970487303, "grad_norm": 0.7689908414253978, "learning_rate": 6.9857649547458394e-06, "loss": 0.3474, "step": 11334 }, { "epoch": 0.38898421413864104, "grad_norm": 0.7276575231344667, "learning_rate": 6.985254911655586e-06, "loss": 0.2717, "step": 11335 }, { "epoch": 0.38901853122855184, "grad_norm": 0.8008595333733337, "learning_rate": 6.984744844039821e-06, "loss": 0.3112, "step": 11336 }, { "epoch": 0.3890528483184626, "grad_norm": 0.7948540003043839, "learning_rate": 6.98423475190485e-06, "loss": 0.352, "step": 11337 }, { "epoch": 0.38908716540837335, "grad_norm": 0.7937629306755417, "learning_rate": 6.983724635256972e-06, "loss": 0.3095, "step": 11338 }, { "epoch": 0.38912148249828415, "grad_norm": 0.8241013400750445, "learning_rate": 6.983214494102487e-06, "loss": 0.3141, "step": 11339 }, { "epoch": 0.3891557995881949, "grad_norm": 0.7756091403699484, "learning_rate": 6.9827043284477015e-06, "loss": 0.324, "step": 11340 }, { "epoch": 0.3891901166781057, "grad_norm": 0.7029895178219637, "learning_rate": 6.982194138298914e-06, "loss": 0.2468, "step": 11341 }, { "epoch": 0.38922443376801646, "grad_norm": 0.7513335842527719, "learning_rate": 6.98168392366243e-06, "loss": 0.2895, "step": 11342 }, { "epoch": 0.38925875085792727, "grad_norm": 0.7971803771122091, "learning_rate": 6.981173684544551e-06, "loss": 0.3034, "step": 11343 }, { "epoch": 0.389293067947838, "grad_norm": 0.7338191157540442, "learning_rate": 6.98066342095158e-06, "loss": 0.2869, "step": 11344 }, { "epoch": 0.3893273850377488, "grad_norm": 0.7784116055362025, "learning_rate": 6.980153132889823e-06, "loss": 0.273, "step": 11345 }, { "epoch": 0.3893617021276596, "grad_norm": 0.7367342679008191, "learning_rate": 6.979642820365582e-06, "loss": 0.2772, "step": 11346 }, { "epoch": 0.38939601921757033, "grad_norm": 0.7782469193409343, "learning_rate": 6.979132483385163e-06, "loss": 0.2832, "step": 11347 }, { "epoch": 0.38943033630748114, "grad_norm": 0.7998624022858714, "learning_rate": 6.978622121954869e-06, "loss": 0.2573, "step": 11348 }, { "epoch": 0.3894646533973919, "grad_norm": 0.7664483758833537, "learning_rate": 6.978111736081005e-06, "loss": 0.3399, "step": 11349 }, { "epoch": 0.3894989704873027, "grad_norm": 0.7796730501645103, "learning_rate": 6.977601325769877e-06, "loss": 0.3127, "step": 11350 }, { "epoch": 0.38953328757721345, "grad_norm": 0.8051093496995423, "learning_rate": 6.97709089102779e-06, "loss": 0.2753, "step": 11351 }, { "epoch": 0.3895676046671242, "grad_norm": 1.0738525734343634, "learning_rate": 6.976580431861051e-06, "loss": 0.2577, "step": 11352 }, { "epoch": 0.389601921757035, "grad_norm": 0.7455298406976845, "learning_rate": 6.976069948275965e-06, "loss": 0.287, "step": 11353 }, { "epoch": 0.38963623884694576, "grad_norm": 0.758678493684854, "learning_rate": 6.975559440278838e-06, "loss": 0.2863, "step": 11354 }, { "epoch": 0.38967055593685657, "grad_norm": 0.8139081914225469, "learning_rate": 6.975048907875978e-06, "loss": 0.2989, "step": 11355 }, { "epoch": 0.3897048730267673, "grad_norm": 0.7871483667633863, "learning_rate": 6.974538351073692e-06, "loss": 0.2985, "step": 11356 }, { "epoch": 0.3897391901166781, "grad_norm": 0.7659139445313471, "learning_rate": 6.974027769878287e-06, "loss": 0.3447, "step": 11357 }, { "epoch": 0.3897735072065889, "grad_norm": 0.8731920236514534, "learning_rate": 6.973517164296068e-06, "loss": 0.2926, "step": 11358 }, { "epoch": 0.3898078242964997, "grad_norm": 0.7714996421041177, "learning_rate": 6.973006534333347e-06, "loss": 0.3033, "step": 11359 }, { "epoch": 0.38984214138641043, "grad_norm": 0.8383659895321612, "learning_rate": 6.972495879996431e-06, "loss": 0.3121, "step": 11360 }, { "epoch": 0.3898764584763212, "grad_norm": 0.7202113240146611, "learning_rate": 6.971985201291627e-06, "loss": 0.2834, "step": 11361 }, { "epoch": 0.389910775566232, "grad_norm": 0.8717637228547795, "learning_rate": 6.971474498225244e-06, "loss": 0.3398, "step": 11362 }, { "epoch": 0.38994509265614274, "grad_norm": 0.8432159425396443, "learning_rate": 6.970963770803595e-06, "loss": 0.3449, "step": 11363 }, { "epoch": 0.38997940974605355, "grad_norm": 0.7151993282169954, "learning_rate": 6.970453019032985e-06, "loss": 0.3428, "step": 11364 }, { "epoch": 0.3900137268359643, "grad_norm": 0.7484090101265816, "learning_rate": 6.969942242919725e-06, "loss": 0.2784, "step": 11365 }, { "epoch": 0.3900480439258751, "grad_norm": 0.7496274226431314, "learning_rate": 6.9694314424701245e-06, "loss": 0.3163, "step": 11366 }, { "epoch": 0.39008236101578586, "grad_norm": 0.7275414210405963, "learning_rate": 6.968920617690497e-06, "loss": 0.3153, "step": 11367 }, { "epoch": 0.3901166781056966, "grad_norm": 0.8084391740303136, "learning_rate": 6.968409768587149e-06, "loss": 0.3353, "step": 11368 }, { "epoch": 0.3901509951956074, "grad_norm": 0.6983145816930865, "learning_rate": 6.967898895166394e-06, "loss": 0.2957, "step": 11369 }, { "epoch": 0.39018531228551817, "grad_norm": 0.8751199302353231, "learning_rate": 6.967387997434542e-06, "loss": 0.3128, "step": 11370 }, { "epoch": 0.390219629375429, "grad_norm": 0.8432735829890323, "learning_rate": 6.9668770753979065e-06, "loss": 0.3277, "step": 11371 }, { "epoch": 0.39025394646533973, "grad_norm": 0.7886668016498011, "learning_rate": 6.966366129062796e-06, "loss": 0.3037, "step": 11372 }, { "epoch": 0.39028826355525054, "grad_norm": 0.7789273740885617, "learning_rate": 6.965855158435525e-06, "loss": 0.3125, "step": 11373 }, { "epoch": 0.3903225806451613, "grad_norm": 0.7845327364346526, "learning_rate": 6.965344163522405e-06, "loss": 0.2891, "step": 11374 }, { "epoch": 0.39035689773507204, "grad_norm": 0.829766316045183, "learning_rate": 6.964833144329751e-06, "loss": 0.3484, "step": 11375 }, { "epoch": 0.39039121482498285, "grad_norm": 0.8318791429854757, "learning_rate": 6.964322100863874e-06, "loss": 0.2837, "step": 11376 }, { "epoch": 0.3904255319148936, "grad_norm": 0.7038393768101765, "learning_rate": 6.963811033131086e-06, "loss": 0.2759, "step": 11377 }, { "epoch": 0.3904598490048044, "grad_norm": 0.8561233888161268, "learning_rate": 6.963299941137703e-06, "loss": 0.2732, "step": 11378 }, { "epoch": 0.39049416609471516, "grad_norm": 0.7211955187236125, "learning_rate": 6.96278882489004e-06, "loss": 0.3679, "step": 11379 }, { "epoch": 0.39052848318462596, "grad_norm": 0.7587419740708098, "learning_rate": 6.962277684394406e-06, "loss": 0.3157, "step": 11380 }, { "epoch": 0.3905628002745367, "grad_norm": 0.724086188909536, "learning_rate": 6.961766519657121e-06, "loss": 0.2709, "step": 11381 }, { "epoch": 0.3905971173644475, "grad_norm": 0.744713901782738, "learning_rate": 6.9612553306844985e-06, "loss": 0.3503, "step": 11382 }, { "epoch": 0.3906314344543583, "grad_norm": 0.7113553686606403, "learning_rate": 6.960744117482852e-06, "loss": 0.2956, "step": 11383 }, { "epoch": 0.390665751544269, "grad_norm": 0.7582863737997615, "learning_rate": 6.9602328800584975e-06, "loss": 0.3046, "step": 11384 }, { "epoch": 0.39070006863417983, "grad_norm": 0.7713252867888948, "learning_rate": 6.959721618417753e-06, "loss": 0.3335, "step": 11385 }, { "epoch": 0.3907343857240906, "grad_norm": 0.7119264466642443, "learning_rate": 6.959210332566931e-06, "loss": 0.2361, "step": 11386 }, { "epoch": 0.3907687028140014, "grad_norm": 0.7351961207974763, "learning_rate": 6.95869902251235e-06, "loss": 0.2809, "step": 11387 }, { "epoch": 0.39080301990391214, "grad_norm": 0.8695688442792795, "learning_rate": 6.958187688260326e-06, "loss": 0.3423, "step": 11388 }, { "epoch": 0.39083733699382295, "grad_norm": 0.7989566271119838, "learning_rate": 6.957676329817177e-06, "loss": 0.3037, "step": 11389 }, { "epoch": 0.3908716540837337, "grad_norm": 0.6635105782751762, "learning_rate": 6.957164947189219e-06, "loss": 0.2518, "step": 11390 }, { "epoch": 0.39090597117364445, "grad_norm": 0.8753006280598054, "learning_rate": 6.956653540382769e-06, "loss": 0.3705, "step": 11391 }, { "epoch": 0.39094028826355526, "grad_norm": 0.7011599108046275, "learning_rate": 6.956142109404147e-06, "loss": 0.276, "step": 11392 }, { "epoch": 0.390974605353466, "grad_norm": 0.6883729592611644, "learning_rate": 6.9556306542596685e-06, "loss": 0.3266, "step": 11393 }, { "epoch": 0.3910089224433768, "grad_norm": 0.767358551891913, "learning_rate": 6.955119174955655e-06, "loss": 0.3186, "step": 11394 }, { "epoch": 0.39104323953328757, "grad_norm": 0.7721816922813483, "learning_rate": 6.954607671498422e-06, "loss": 0.3169, "step": 11395 }, { "epoch": 0.3910775566231984, "grad_norm": 0.608338928450311, "learning_rate": 6.954096143894291e-06, "loss": 0.2679, "step": 11396 }, { "epoch": 0.3911118737131091, "grad_norm": 0.7582395672235185, "learning_rate": 6.953584592149579e-06, "loss": 0.2521, "step": 11397 }, { "epoch": 0.3911461908030199, "grad_norm": 0.7640848111436366, "learning_rate": 6.953073016270609e-06, "loss": 0.3007, "step": 11398 }, { "epoch": 0.3911805078929307, "grad_norm": 0.7488482743282574, "learning_rate": 6.952561416263699e-06, "loss": 0.266, "step": 11399 }, { "epoch": 0.39121482498284144, "grad_norm": 0.76624595325163, "learning_rate": 6.952049792135167e-06, "loss": 0.2602, "step": 11400 }, { "epoch": 0.39124914207275224, "grad_norm": 0.7777901550306018, "learning_rate": 6.951538143891339e-06, "loss": 0.2835, "step": 11401 }, { "epoch": 0.391283459162663, "grad_norm": 0.7224174738779919, "learning_rate": 6.951026471538529e-06, "loss": 0.2877, "step": 11402 }, { "epoch": 0.3913177762525738, "grad_norm": 0.7528060929362859, "learning_rate": 6.950514775083062e-06, "loss": 0.3902, "step": 11403 }, { "epoch": 0.39135209334248455, "grad_norm": 0.7475503727084717, "learning_rate": 6.950003054531261e-06, "loss": 0.3386, "step": 11404 }, { "epoch": 0.39138641043239536, "grad_norm": 0.8273134477055042, "learning_rate": 6.949491309889445e-06, "loss": 0.2889, "step": 11405 }, { "epoch": 0.3914207275223061, "grad_norm": 0.760245745352056, "learning_rate": 6.948979541163936e-06, "loss": 0.2801, "step": 11406 }, { "epoch": 0.39145504461221686, "grad_norm": 0.7885698141554626, "learning_rate": 6.948467748361059e-06, "loss": 0.2897, "step": 11407 }, { "epoch": 0.39148936170212767, "grad_norm": 0.7373869336881468, "learning_rate": 6.9479559314871334e-06, "loss": 0.2932, "step": 11408 }, { "epoch": 0.3915236787920384, "grad_norm": 0.8032520483923709, "learning_rate": 6.947444090548483e-06, "loss": 0.3133, "step": 11409 }, { "epoch": 0.39155799588194923, "grad_norm": 0.7524334626028532, "learning_rate": 6.946932225551432e-06, "loss": 0.277, "step": 11410 }, { "epoch": 0.39159231297186, "grad_norm": 0.8003142474971637, "learning_rate": 6.946420336502302e-06, "loss": 0.334, "step": 11411 }, { "epoch": 0.3916266300617708, "grad_norm": 0.9386934621130456, "learning_rate": 6.945908423407419e-06, "loss": 0.2768, "step": 11412 }, { "epoch": 0.39166094715168154, "grad_norm": 0.7143718287829532, "learning_rate": 6.945396486273106e-06, "loss": 0.2943, "step": 11413 }, { "epoch": 0.3916952642415923, "grad_norm": 0.7296495574974787, "learning_rate": 6.944884525105688e-06, "loss": 0.3057, "step": 11414 }, { "epoch": 0.3917295813315031, "grad_norm": 0.7184276733870971, "learning_rate": 6.944372539911489e-06, "loss": 0.3082, "step": 11415 }, { "epoch": 0.39176389842141385, "grad_norm": 0.8015972216117255, "learning_rate": 6.943860530696834e-06, "loss": 0.3067, "step": 11416 }, { "epoch": 0.39179821551132465, "grad_norm": 0.9221960244544628, "learning_rate": 6.943348497468048e-06, "loss": 0.2802, "step": 11417 }, { "epoch": 0.3918325326012354, "grad_norm": 0.6394016812015144, "learning_rate": 6.9428364402314576e-06, "loss": 0.2986, "step": 11418 }, { "epoch": 0.3918668496911462, "grad_norm": 0.8048545390481462, "learning_rate": 6.942324358993388e-06, "loss": 0.3815, "step": 11419 }, { "epoch": 0.39190116678105696, "grad_norm": 0.7464409601537654, "learning_rate": 6.941812253760166e-06, "loss": 0.2876, "step": 11420 }, { "epoch": 0.3919354838709677, "grad_norm": 0.7622948604161116, "learning_rate": 6.9413001245381174e-06, "loss": 0.2523, "step": 11421 }, { "epoch": 0.3919698009608785, "grad_norm": 0.7935330214435855, "learning_rate": 6.940787971333568e-06, "loss": 0.3367, "step": 11422 }, { "epoch": 0.3920041180507893, "grad_norm": 0.9517152396774167, "learning_rate": 6.9402757941528474e-06, "loss": 0.2994, "step": 11423 }, { "epoch": 0.3920384351407001, "grad_norm": 0.6962224155170851, "learning_rate": 6.93976359300228e-06, "loss": 0.2598, "step": 11424 }, { "epoch": 0.39207275223061083, "grad_norm": 0.6787694921378226, "learning_rate": 6.9392513678881954e-06, "loss": 0.2852, "step": 11425 }, { "epoch": 0.39210706932052164, "grad_norm": 0.8012518162702466, "learning_rate": 6.938739118816922e-06, "loss": 0.2927, "step": 11426 }, { "epoch": 0.3921413864104324, "grad_norm": 0.7915033600462207, "learning_rate": 6.938226845794787e-06, "loss": 0.2934, "step": 11427 }, { "epoch": 0.3921757035003432, "grad_norm": 0.8117298599668172, "learning_rate": 6.937714548828118e-06, "loss": 0.3467, "step": 11428 }, { "epoch": 0.39221002059025395, "grad_norm": 0.7821248689346272, "learning_rate": 6.9372022279232456e-06, "loss": 0.341, "step": 11429 }, { "epoch": 0.3922443376801647, "grad_norm": 0.8309427852566996, "learning_rate": 6.936689883086499e-06, "loss": 0.3135, "step": 11430 }, { "epoch": 0.3922786547700755, "grad_norm": 0.7860082282729884, "learning_rate": 6.936177514324206e-06, "loss": 0.2761, "step": 11431 }, { "epoch": 0.39231297185998626, "grad_norm": 0.8973323122583808, "learning_rate": 6.935665121642698e-06, "loss": 0.2852, "step": 11432 }, { "epoch": 0.39234728894989707, "grad_norm": 0.6201430336689879, "learning_rate": 6.935152705048303e-06, "loss": 0.2549, "step": 11433 }, { "epoch": 0.3923816060398078, "grad_norm": 0.8117191275838318, "learning_rate": 6.934640264547352e-06, "loss": 0.3467, "step": 11434 }, { "epoch": 0.3924159231297186, "grad_norm": 0.7568681469913843, "learning_rate": 6.934127800146178e-06, "loss": 0.2842, "step": 11435 }, { "epoch": 0.3924502402196294, "grad_norm": 0.8242201693547129, "learning_rate": 6.93361531185111e-06, "loss": 0.342, "step": 11436 }, { "epoch": 0.3924845573095401, "grad_norm": 0.7281313692128566, "learning_rate": 6.933102799668476e-06, "loss": 0.2913, "step": 11437 }, { "epoch": 0.39251887439945093, "grad_norm": 0.7377442485834725, "learning_rate": 6.932590263604614e-06, "loss": 0.2882, "step": 11438 }, { "epoch": 0.3925531914893617, "grad_norm": 0.8353751855313228, "learning_rate": 6.932077703665851e-06, "loss": 0.2933, "step": 11439 }, { "epoch": 0.3925875085792725, "grad_norm": 0.9157110348376605, "learning_rate": 6.931565119858521e-06, "loss": 0.3202, "step": 11440 }, { "epoch": 0.39262182566918324, "grad_norm": 0.671669352242211, "learning_rate": 6.931052512188956e-06, "loss": 0.2579, "step": 11441 }, { "epoch": 0.39265614275909405, "grad_norm": 0.7052585245468783, "learning_rate": 6.9305398806634875e-06, "loss": 0.2737, "step": 11442 }, { "epoch": 0.3926904598490048, "grad_norm": 0.848002609334353, "learning_rate": 6.930027225288452e-06, "loss": 0.3099, "step": 11443 }, { "epoch": 0.39272477693891555, "grad_norm": 0.7680500433085092, "learning_rate": 6.929514546070179e-06, "loss": 0.3784, "step": 11444 }, { "epoch": 0.39275909402882636, "grad_norm": 0.73684007563677, "learning_rate": 6.929001843015002e-06, "loss": 0.2977, "step": 11445 }, { "epoch": 0.3927934111187371, "grad_norm": 0.7380974413318211, "learning_rate": 6.928489116129259e-06, "loss": 0.3365, "step": 11446 }, { "epoch": 0.3928277282086479, "grad_norm": 0.8407656229928291, "learning_rate": 6.927976365419278e-06, "loss": 0.3044, "step": 11447 }, { "epoch": 0.39286204529855867, "grad_norm": 0.8715209895886433, "learning_rate": 6.927463590891399e-06, "loss": 0.3448, "step": 11448 }, { "epoch": 0.3928963623884695, "grad_norm": 0.7882961622817065, "learning_rate": 6.926950792551954e-06, "loss": 0.2916, "step": 11449 }, { "epoch": 0.39293067947838023, "grad_norm": 0.8063039318070913, "learning_rate": 6.9264379704072795e-06, "loss": 0.3391, "step": 11450 }, { "epoch": 0.39296499656829104, "grad_norm": 0.7905036926842763, "learning_rate": 6.925925124463708e-06, "loss": 0.3099, "step": 11451 }, { "epoch": 0.3929993136582018, "grad_norm": 0.8259175096878206, "learning_rate": 6.92541225472758e-06, "loss": 0.3159, "step": 11452 }, { "epoch": 0.39303363074811254, "grad_norm": 0.81780018155515, "learning_rate": 6.924899361205226e-06, "loss": 0.3566, "step": 11453 }, { "epoch": 0.39306794783802335, "grad_norm": 0.7355173934335755, "learning_rate": 6.924386443902985e-06, "loss": 0.3145, "step": 11454 }, { "epoch": 0.3931022649279341, "grad_norm": 0.7078255799637014, "learning_rate": 6.9238735028271934e-06, "loss": 0.3326, "step": 11455 }, { "epoch": 0.3931365820178449, "grad_norm": 0.8458130687353936, "learning_rate": 6.923360537984187e-06, "loss": 0.2665, "step": 11456 }, { "epoch": 0.39317089910775566, "grad_norm": 0.7013757022051469, "learning_rate": 6.922847549380304e-06, "loss": 0.2939, "step": 11457 }, { "epoch": 0.39320521619766646, "grad_norm": 0.7945661100249486, "learning_rate": 6.922334537021882e-06, "loss": 0.3275, "step": 11458 }, { "epoch": 0.3932395332875772, "grad_norm": 0.6972361324646017, "learning_rate": 6.921821500915258e-06, "loss": 0.2794, "step": 11459 }, { "epoch": 0.39327385037748797, "grad_norm": 0.7353167280554922, "learning_rate": 6.92130844106677e-06, "loss": 0.297, "step": 11460 }, { "epoch": 0.3933081674673988, "grad_norm": 0.7909175256114612, "learning_rate": 6.9207953574827544e-06, "loss": 0.3111, "step": 11461 }, { "epoch": 0.3933424845573095, "grad_norm": 0.74831910750541, "learning_rate": 6.920282250169553e-06, "loss": 0.2301, "step": 11462 }, { "epoch": 0.39337680164722033, "grad_norm": 0.7145302479768497, "learning_rate": 6.919769119133502e-06, "loss": 0.2498, "step": 11463 }, { "epoch": 0.3934111187371311, "grad_norm": 0.7754627027465354, "learning_rate": 6.919255964380943e-06, "loss": 0.311, "step": 11464 }, { "epoch": 0.3934454358270419, "grad_norm": 0.6908456085909007, "learning_rate": 6.918742785918215e-06, "loss": 0.3138, "step": 11465 }, { "epoch": 0.39347975291695264, "grad_norm": 0.8522643650887405, "learning_rate": 6.9182295837516566e-06, "loss": 0.3789, "step": 11466 }, { "epoch": 0.3935140700068634, "grad_norm": 0.9430562729476137, "learning_rate": 6.917716357887607e-06, "loss": 0.2624, "step": 11467 }, { "epoch": 0.3935483870967742, "grad_norm": 0.8297725015523659, "learning_rate": 6.917203108332409e-06, "loss": 0.3584, "step": 11468 }, { "epoch": 0.39358270418668495, "grad_norm": 0.7273149169024287, "learning_rate": 6.9166898350924e-06, "loss": 0.3159, "step": 11469 }, { "epoch": 0.39361702127659576, "grad_norm": 0.7759614547774242, "learning_rate": 6.916176538173923e-06, "loss": 0.3041, "step": 11470 }, { "epoch": 0.3936513383665065, "grad_norm": 0.8043959568135671, "learning_rate": 6.9156632175833215e-06, "loss": 0.2875, "step": 11471 }, { "epoch": 0.3936856554564173, "grad_norm": 0.7861350737531212, "learning_rate": 6.915149873326932e-06, "loss": 0.2971, "step": 11472 }, { "epoch": 0.39371997254632807, "grad_norm": 0.7326854787221799, "learning_rate": 6.9146365054110986e-06, "loss": 0.2484, "step": 11473 }, { "epoch": 0.3937542896362389, "grad_norm": 0.8199786496814779, "learning_rate": 6.914123113842164e-06, "loss": 0.2766, "step": 11474 }, { "epoch": 0.3937886067261496, "grad_norm": 0.789907852462924, "learning_rate": 6.913609698626471e-06, "loss": 0.2725, "step": 11475 }, { "epoch": 0.3938229238160604, "grad_norm": 0.7600157484930626, "learning_rate": 6.9130962597703595e-06, "loss": 0.3553, "step": 11476 }, { "epoch": 0.3938572409059712, "grad_norm": 0.9434595075598852, "learning_rate": 6.912582797280174e-06, "loss": 0.322, "step": 11477 }, { "epoch": 0.39389155799588194, "grad_norm": 0.8181460504400914, "learning_rate": 6.91206931116226e-06, "loss": 0.2909, "step": 11478 }, { "epoch": 0.39392587508579274, "grad_norm": 0.7941800449276893, "learning_rate": 6.9115558014229575e-06, "loss": 0.2949, "step": 11479 }, { "epoch": 0.3939601921757035, "grad_norm": 0.8033068049440514, "learning_rate": 6.911042268068611e-06, "loss": 0.3103, "step": 11480 }, { "epoch": 0.3939945092656143, "grad_norm": 0.7808661711185916, "learning_rate": 6.910528711105569e-06, "loss": 0.3803, "step": 11481 }, { "epoch": 0.39402882635552505, "grad_norm": 0.76659688685163, "learning_rate": 6.9100151305401685e-06, "loss": 0.2926, "step": 11482 }, { "epoch": 0.3940631434454358, "grad_norm": 0.7107431984457232, "learning_rate": 6.909501526378759e-06, "loss": 0.2656, "step": 11483 }, { "epoch": 0.3940974605353466, "grad_norm": 0.731479560942957, "learning_rate": 6.908987898627684e-06, "loss": 0.324, "step": 11484 }, { "epoch": 0.39413177762525736, "grad_norm": 0.7145358267372925, "learning_rate": 6.908474247293289e-06, "loss": 0.2947, "step": 11485 }, { "epoch": 0.39416609471516817, "grad_norm": 0.7418540088047743, "learning_rate": 6.90796057238192e-06, "loss": 0.3247, "step": 11486 }, { "epoch": 0.3942004118050789, "grad_norm": 0.7339228092137726, "learning_rate": 6.907446873899924e-06, "loss": 0.2825, "step": 11487 }, { "epoch": 0.3942347288949897, "grad_norm": 0.7736895125414236, "learning_rate": 6.9069331518536444e-06, "loss": 0.3696, "step": 11488 }, { "epoch": 0.3942690459849005, "grad_norm": 0.7368907896841512, "learning_rate": 6.906419406249429e-06, "loss": 0.2792, "step": 11489 }, { "epoch": 0.39430336307481123, "grad_norm": 0.7309300623290911, "learning_rate": 6.905905637093624e-06, "loss": 0.3142, "step": 11490 }, { "epoch": 0.39433768016472204, "grad_norm": 0.7312394536086281, "learning_rate": 6.905391844392578e-06, "loss": 0.3274, "step": 11491 }, { "epoch": 0.3943719972546328, "grad_norm": 0.7323864556980936, "learning_rate": 6.904878028152637e-06, "loss": 0.2855, "step": 11492 }, { "epoch": 0.3944063143445436, "grad_norm": 0.7132257424069173, "learning_rate": 6.904364188380148e-06, "loss": 0.2673, "step": 11493 }, { "epoch": 0.39444063143445435, "grad_norm": 0.7438490101065385, "learning_rate": 6.903850325081461e-06, "loss": 0.3183, "step": 11494 }, { "epoch": 0.39447494852436515, "grad_norm": 0.7871070889165426, "learning_rate": 6.903336438262922e-06, "loss": 0.3313, "step": 11495 }, { "epoch": 0.3945092656142759, "grad_norm": 0.8419352697893917, "learning_rate": 6.902822527930881e-06, "loss": 0.2866, "step": 11496 }, { "epoch": 0.39454358270418666, "grad_norm": 0.87841085591134, "learning_rate": 6.902308594091687e-06, "loss": 0.3261, "step": 11497 }, { "epoch": 0.39457789979409746, "grad_norm": 0.8007987533562017, "learning_rate": 6.901794636751686e-06, "loss": 0.2988, "step": 11498 }, { "epoch": 0.3946122168840082, "grad_norm": 0.7451535687466319, "learning_rate": 6.901280655917231e-06, "loss": 0.2941, "step": 11499 }, { "epoch": 0.394646533973919, "grad_norm": 0.7315486852274243, "learning_rate": 6.900766651594669e-06, "loss": 0.3313, "step": 11500 }, { "epoch": 0.3946808510638298, "grad_norm": 0.7777813140496774, "learning_rate": 6.9002526237903525e-06, "loss": 0.31, "step": 11501 }, { "epoch": 0.3947151681537406, "grad_norm": 0.7996963112960241, "learning_rate": 6.8997385725106295e-06, "loss": 0.2945, "step": 11502 }, { "epoch": 0.39474948524365133, "grad_norm": 0.746706681930455, "learning_rate": 6.899224497761851e-06, "loss": 0.3027, "step": 11503 }, { "epoch": 0.39478380233356214, "grad_norm": 0.8386879316592518, "learning_rate": 6.8987103995503705e-06, "loss": 0.3087, "step": 11504 }, { "epoch": 0.3948181194234729, "grad_norm": 0.7025295895433058, "learning_rate": 6.898196277882534e-06, "loss": 0.3066, "step": 11505 }, { "epoch": 0.39485243651338364, "grad_norm": 0.6960538068222293, "learning_rate": 6.897682132764696e-06, "loss": 0.2849, "step": 11506 }, { "epoch": 0.39488675360329445, "grad_norm": 0.6893707959031564, "learning_rate": 6.8971679642032075e-06, "loss": 0.2625, "step": 11507 }, { "epoch": 0.3949210706932052, "grad_norm": 0.7391979455440508, "learning_rate": 6.896653772204421e-06, "loss": 0.3523, "step": 11508 }, { "epoch": 0.394955387783116, "grad_norm": 1.1636298805665424, "learning_rate": 6.896139556774689e-06, "loss": 0.3199, "step": 11509 }, { "epoch": 0.39498970487302676, "grad_norm": 1.011250632222691, "learning_rate": 6.895625317920364e-06, "loss": 0.2683, "step": 11510 }, { "epoch": 0.39502402196293757, "grad_norm": 0.7553425190587454, "learning_rate": 6.8951110556477975e-06, "loss": 0.262, "step": 11511 }, { "epoch": 0.3950583390528483, "grad_norm": 0.729996753183315, "learning_rate": 6.894596769963342e-06, "loss": 0.3503, "step": 11512 }, { "epoch": 0.39509265614275907, "grad_norm": 0.8256415929696705, "learning_rate": 6.894082460873355e-06, "loss": 0.2494, "step": 11513 }, { "epoch": 0.3951269732326699, "grad_norm": 0.6837292972986726, "learning_rate": 6.893568128384185e-06, "loss": 0.2866, "step": 11514 }, { "epoch": 0.3951612903225806, "grad_norm": 0.7967978633480375, "learning_rate": 6.8930537725021895e-06, "loss": 0.2763, "step": 11515 }, { "epoch": 0.39519560741249143, "grad_norm": 0.7542034937727475, "learning_rate": 6.892539393233722e-06, "loss": 0.2772, "step": 11516 }, { "epoch": 0.3952299245024022, "grad_norm": 0.7224763139262599, "learning_rate": 6.892024990585138e-06, "loss": 0.3322, "step": 11517 }, { "epoch": 0.395264241592313, "grad_norm": 0.795852009884724, "learning_rate": 6.891510564562789e-06, "loss": 0.2497, "step": 11518 }, { "epoch": 0.39529855868222374, "grad_norm": 1.0243046313039075, "learning_rate": 6.890996115173032e-06, "loss": 0.3093, "step": 11519 }, { "epoch": 0.3953328757721345, "grad_norm": 0.8013109870626905, "learning_rate": 6.890481642422224e-06, "loss": 0.2957, "step": 11520 }, { "epoch": 0.3953671928620453, "grad_norm": 0.7898856089776932, "learning_rate": 6.889967146316719e-06, "loss": 0.326, "step": 11521 }, { "epoch": 0.39540150995195605, "grad_norm": 0.7003226056740139, "learning_rate": 6.889452626862874e-06, "loss": 0.2397, "step": 11522 }, { "epoch": 0.39543582704186686, "grad_norm": 0.6956755445273223, "learning_rate": 6.888938084067044e-06, "loss": 0.294, "step": 11523 }, { "epoch": 0.3954701441317776, "grad_norm": 0.7101981223000302, "learning_rate": 6.8884235179355865e-06, "loss": 0.2717, "step": 11524 }, { "epoch": 0.3955044612216884, "grad_norm": 0.7337398523207115, "learning_rate": 6.887908928474858e-06, "loss": 0.3425, "step": 11525 }, { "epoch": 0.39553877831159917, "grad_norm": 0.783132996590439, "learning_rate": 6.887394315691216e-06, "loss": 0.2865, "step": 11526 }, { "epoch": 0.39557309540151, "grad_norm": 0.7844627384141519, "learning_rate": 6.886879679591018e-06, "loss": 0.2947, "step": 11527 }, { "epoch": 0.39560741249142073, "grad_norm": 0.7760190989644312, "learning_rate": 6.886365020180621e-06, "loss": 0.2602, "step": 11528 }, { "epoch": 0.3956417295813315, "grad_norm": 0.755978144836182, "learning_rate": 6.8858503374663824e-06, "loss": 0.3071, "step": 11529 }, { "epoch": 0.3956760466712423, "grad_norm": 0.7989216257279735, "learning_rate": 6.885335631454663e-06, "loss": 0.3121, "step": 11530 }, { "epoch": 0.39571036376115304, "grad_norm": 0.7890534261438654, "learning_rate": 6.884820902151818e-06, "loss": 0.3, "step": 11531 }, { "epoch": 0.39574468085106385, "grad_norm": 0.7444807253044056, "learning_rate": 6.884306149564212e-06, "loss": 0.2574, "step": 11532 }, { "epoch": 0.3957789979409746, "grad_norm": 0.7038698671270627, "learning_rate": 6.883791373698197e-06, "loss": 0.3017, "step": 11533 }, { "epoch": 0.3958133150308854, "grad_norm": 0.7719387984069094, "learning_rate": 6.883276574560137e-06, "loss": 0.3144, "step": 11534 }, { "epoch": 0.39584763212079616, "grad_norm": 0.701579560290662, "learning_rate": 6.882761752156391e-06, "loss": 0.3157, "step": 11535 }, { "epoch": 0.3958819492107069, "grad_norm": 0.8247242779168737, "learning_rate": 6.882246906493318e-06, "loss": 0.3212, "step": 11536 }, { "epoch": 0.3959162663006177, "grad_norm": 0.7711909853346283, "learning_rate": 6.881732037577279e-06, "loss": 0.2682, "step": 11537 }, { "epoch": 0.39595058339052847, "grad_norm": 0.6741777596355019, "learning_rate": 6.881217145414635e-06, "loss": 0.2448, "step": 11538 }, { "epoch": 0.3959849004804393, "grad_norm": 0.9250799933239858, "learning_rate": 6.8807022300117465e-06, "loss": 0.3098, "step": 11539 }, { "epoch": 0.39601921757035, "grad_norm": 0.7608270271992333, "learning_rate": 6.880187291374974e-06, "loss": 0.2955, "step": 11540 }, { "epoch": 0.39605353466026083, "grad_norm": 0.7100658183837594, "learning_rate": 6.879672329510681e-06, "loss": 0.246, "step": 11541 }, { "epoch": 0.3960878517501716, "grad_norm": 0.7546941763347839, "learning_rate": 6.879157344425227e-06, "loss": 0.3155, "step": 11542 }, { "epoch": 0.39612216884008233, "grad_norm": 0.7174843872017812, "learning_rate": 6.8786423361249746e-06, "loss": 0.2735, "step": 11543 }, { "epoch": 0.39615648592999314, "grad_norm": 0.861047998786061, "learning_rate": 6.878127304616285e-06, "loss": 0.3039, "step": 11544 }, { "epoch": 0.3961908030199039, "grad_norm": 0.8202141177351097, "learning_rate": 6.8776122499055255e-06, "loss": 0.3187, "step": 11545 }, { "epoch": 0.3962251201098147, "grad_norm": 0.7535388068190678, "learning_rate": 6.877097171999054e-06, "loss": 0.2895, "step": 11546 }, { "epoch": 0.39625943719972545, "grad_norm": 0.7467195417845172, "learning_rate": 6.876582070903235e-06, "loss": 0.2871, "step": 11547 }, { "epoch": 0.39629375428963626, "grad_norm": 0.7642029536259444, "learning_rate": 6.876066946624434e-06, "loss": 0.3198, "step": 11548 }, { "epoch": 0.396328071379547, "grad_norm": 0.7383503207803843, "learning_rate": 6.875551799169011e-06, "loss": 0.2587, "step": 11549 }, { "epoch": 0.3963623884694578, "grad_norm": 0.7211669946958036, "learning_rate": 6.875036628543333e-06, "loss": 0.264, "step": 11550 }, { "epoch": 0.39639670555936857, "grad_norm": 0.8483965832116749, "learning_rate": 6.874521434753765e-06, "loss": 0.3138, "step": 11551 }, { "epoch": 0.3964310226492793, "grad_norm": 0.730624048629582, "learning_rate": 6.874006217806668e-06, "loss": 0.2658, "step": 11552 }, { "epoch": 0.3964653397391901, "grad_norm": 0.8459394357069075, "learning_rate": 6.87349097770841e-06, "loss": 0.2892, "step": 11553 }, { "epoch": 0.3964996568291009, "grad_norm": 0.7789991312838644, "learning_rate": 6.8729757144653555e-06, "loss": 0.2862, "step": 11554 }, { "epoch": 0.3965339739190117, "grad_norm": 0.7685661726523886, "learning_rate": 6.872460428083869e-06, "loss": 0.288, "step": 11555 }, { "epoch": 0.39656829100892244, "grad_norm": 0.7417523587841776, "learning_rate": 6.871945118570318e-06, "loss": 0.2647, "step": 11556 }, { "epoch": 0.39660260809883324, "grad_norm": 0.8041386682225938, "learning_rate": 6.871429785931068e-06, "loss": 0.28, "step": 11557 }, { "epoch": 0.396636925188744, "grad_norm": 0.7457061534318262, "learning_rate": 6.870914430172483e-06, "loss": 0.2945, "step": 11558 }, { "epoch": 0.39667124227865475, "grad_norm": 0.7019355982178679, "learning_rate": 6.870399051300931e-06, "loss": 0.2861, "step": 11559 }, { "epoch": 0.39670555936856555, "grad_norm": 0.7500752812448366, "learning_rate": 6.869883649322782e-06, "loss": 0.2578, "step": 11560 }, { "epoch": 0.3967398764584763, "grad_norm": 0.6665479986576306, "learning_rate": 6.869368224244399e-06, "loss": 0.271, "step": 11561 }, { "epoch": 0.3967741935483871, "grad_norm": 0.7326637422862983, "learning_rate": 6.868852776072151e-06, "loss": 0.3302, "step": 11562 }, { "epoch": 0.39680851063829786, "grad_norm": 0.848492678016596, "learning_rate": 6.868337304812407e-06, "loss": 0.3205, "step": 11563 }, { "epoch": 0.39684282772820867, "grad_norm": 0.7625607665886649, "learning_rate": 6.867821810471533e-06, "loss": 0.3479, "step": 11564 }, { "epoch": 0.3968771448181194, "grad_norm": 0.8094200452928123, "learning_rate": 6.867306293055898e-06, "loss": 0.3179, "step": 11565 }, { "epoch": 0.39691146190803017, "grad_norm": 0.7375400880898559, "learning_rate": 6.8667907525718704e-06, "loss": 0.2828, "step": 11566 }, { "epoch": 0.396945778997941, "grad_norm": 0.8315368575476773, "learning_rate": 6.8662751890258204e-06, "loss": 0.3713, "step": 11567 }, { "epoch": 0.39698009608785173, "grad_norm": 0.8293578786969554, "learning_rate": 6.865759602424117e-06, "loss": 0.3069, "step": 11568 }, { "epoch": 0.39701441317776254, "grad_norm": 0.7557576400413234, "learning_rate": 6.865243992773127e-06, "loss": 0.3271, "step": 11569 }, { "epoch": 0.3970487302676733, "grad_norm": 0.7697530636274221, "learning_rate": 6.864728360079224e-06, "loss": 0.3287, "step": 11570 }, { "epoch": 0.3970830473575841, "grad_norm": 0.8654814523659041, "learning_rate": 6.864212704348776e-06, "loss": 0.3005, "step": 11571 }, { "epoch": 0.39711736444749485, "grad_norm": 0.8464226539344918, "learning_rate": 6.863697025588153e-06, "loss": 0.3204, "step": 11572 }, { "epoch": 0.39715168153740565, "grad_norm": 1.0319931111815142, "learning_rate": 6.863181323803726e-06, "loss": 0.2769, "step": 11573 }, { "epoch": 0.3971859986273164, "grad_norm": 0.8123222211324291, "learning_rate": 6.862665599001865e-06, "loss": 0.2511, "step": 11574 }, { "epoch": 0.39722031571722716, "grad_norm": 0.8461593591923869, "learning_rate": 6.862149851188944e-06, "loss": 0.3559, "step": 11575 }, { "epoch": 0.39725463280713796, "grad_norm": 0.8366632449237418, "learning_rate": 6.861634080371331e-06, "loss": 0.2763, "step": 11576 }, { "epoch": 0.3972889498970487, "grad_norm": 0.8249035462903976, "learning_rate": 6.861118286555402e-06, "loss": 0.2919, "step": 11577 }, { "epoch": 0.3973232669869595, "grad_norm": 0.896260867447192, "learning_rate": 6.860602469747524e-06, "loss": 0.2997, "step": 11578 }, { "epoch": 0.3973575840768703, "grad_norm": 0.7374467073285237, "learning_rate": 6.860086629954073e-06, "loss": 0.3149, "step": 11579 }, { "epoch": 0.3973919011667811, "grad_norm": 0.7296699166225689, "learning_rate": 6.859570767181418e-06, "loss": 0.2856, "step": 11580 }, { "epoch": 0.39742621825669183, "grad_norm": 0.7232604092507442, "learning_rate": 6.859054881435936e-06, "loss": 0.2895, "step": 11581 }, { "epoch": 0.3974605353466026, "grad_norm": 0.7901417057243578, "learning_rate": 6.858538972723998e-06, "loss": 0.293, "step": 11582 }, { "epoch": 0.3974948524365134, "grad_norm": 0.8805381734344568, "learning_rate": 6.8580230410519785e-06, "loss": 0.3472, "step": 11583 }, { "epoch": 0.39752916952642414, "grad_norm": 0.7432176817982975, "learning_rate": 6.857507086426252e-06, "loss": 0.3348, "step": 11584 }, { "epoch": 0.39756348661633495, "grad_norm": 0.8143428496426339, "learning_rate": 6.8569911088531884e-06, "loss": 0.3274, "step": 11585 }, { "epoch": 0.3975978037062457, "grad_norm": 0.671762608367201, "learning_rate": 6.856475108339166e-06, "loss": 0.2668, "step": 11586 }, { "epoch": 0.3976321207961565, "grad_norm": 0.7945004539589269, "learning_rate": 6.855959084890558e-06, "loss": 0.3225, "step": 11587 }, { "epoch": 0.39766643788606726, "grad_norm": 0.7246427977926683, "learning_rate": 6.855443038513738e-06, "loss": 0.3031, "step": 11588 }, { "epoch": 0.397700754975978, "grad_norm": 0.8294606967128727, "learning_rate": 6.854926969215083e-06, "loss": 0.3203, "step": 11589 }, { "epoch": 0.3977350720658888, "grad_norm": 0.777793287936668, "learning_rate": 6.85441087700097e-06, "loss": 0.2582, "step": 11590 }, { "epoch": 0.39776938915579957, "grad_norm": 0.7001963812775257, "learning_rate": 6.853894761877771e-06, "loss": 0.2848, "step": 11591 }, { "epoch": 0.3978037062457104, "grad_norm": 0.7879061393020064, "learning_rate": 6.853378623851863e-06, "loss": 0.3347, "step": 11592 }, { "epoch": 0.3978380233356211, "grad_norm": 0.7751299036841082, "learning_rate": 6.852862462929625e-06, "loss": 0.3064, "step": 11593 }, { "epoch": 0.39787234042553193, "grad_norm": 0.8487908158780885, "learning_rate": 6.85234627911743e-06, "loss": 0.2969, "step": 11594 }, { "epoch": 0.3979066575154427, "grad_norm": 0.8444541462554264, "learning_rate": 6.8518300724216556e-06, "loss": 0.2771, "step": 11595 }, { "epoch": 0.3979409746053535, "grad_norm": 0.8320282478627014, "learning_rate": 6.851313842848681e-06, "loss": 0.2836, "step": 11596 }, { "epoch": 0.39797529169526424, "grad_norm": 0.8025255932960825, "learning_rate": 6.850797590404881e-06, "loss": 0.2906, "step": 11597 }, { "epoch": 0.398009608785175, "grad_norm": 0.7681322158976897, "learning_rate": 6.850281315096637e-06, "loss": 0.2727, "step": 11598 }, { "epoch": 0.3980439258750858, "grad_norm": 0.8184824089549244, "learning_rate": 6.849765016930322e-06, "loss": 0.2891, "step": 11599 }, { "epoch": 0.39807824296499655, "grad_norm": 0.715691739726972, "learning_rate": 6.8492486959123186e-06, "loss": 0.286, "step": 11600 }, { "epoch": 0.39811256005490736, "grad_norm": 0.7063705707849421, "learning_rate": 6.848732352049002e-06, "loss": 0.2523, "step": 11601 }, { "epoch": 0.3981468771448181, "grad_norm": 0.7779775949010279, "learning_rate": 6.848215985346753e-06, "loss": 0.2661, "step": 11602 }, { "epoch": 0.3981811942347289, "grad_norm": 0.7491538610611103, "learning_rate": 6.847699595811952e-06, "loss": 0.2571, "step": 11603 }, { "epoch": 0.39821551132463967, "grad_norm": 0.8035696885996642, "learning_rate": 6.847183183450975e-06, "loss": 0.3325, "step": 11604 }, { "epoch": 0.3982498284145504, "grad_norm": 0.7275893338416645, "learning_rate": 6.846666748270203e-06, "loss": 0.2442, "step": 11605 }, { "epoch": 0.39828414550446123, "grad_norm": 0.8337494366313674, "learning_rate": 6.846150290276019e-06, "loss": 0.328, "step": 11606 }, { "epoch": 0.398318462594372, "grad_norm": 0.8483786864519193, "learning_rate": 6.845633809474799e-06, "loss": 0.2698, "step": 11607 }, { "epoch": 0.3983527796842828, "grad_norm": 0.7586815003366153, "learning_rate": 6.845117305872924e-06, "loss": 0.3018, "step": 11608 }, { "epoch": 0.39838709677419354, "grad_norm": 0.8041570960958012, "learning_rate": 6.844600779476778e-06, "loss": 0.3056, "step": 11609 }, { "epoch": 0.39842141386410435, "grad_norm": 0.8527133279293001, "learning_rate": 6.844084230292738e-06, "loss": 0.259, "step": 11610 }, { "epoch": 0.3984557309540151, "grad_norm": 0.7659594447264803, "learning_rate": 6.843567658327187e-06, "loss": 0.3527, "step": 11611 }, { "epoch": 0.39849004804392585, "grad_norm": 0.7828522222120783, "learning_rate": 6.843051063586509e-06, "loss": 0.2935, "step": 11612 }, { "epoch": 0.39852436513383666, "grad_norm": 0.7990599877905828, "learning_rate": 6.8425344460770825e-06, "loss": 0.2395, "step": 11613 }, { "epoch": 0.3985586822237474, "grad_norm": 0.8202893946078034, "learning_rate": 6.842017805805292e-06, "loss": 0.2928, "step": 11614 }, { "epoch": 0.3985929993136582, "grad_norm": 0.7073855233487119, "learning_rate": 6.841501142777518e-06, "loss": 0.2679, "step": 11615 }, { "epoch": 0.39862731640356897, "grad_norm": 0.7805558989520871, "learning_rate": 6.840984457000146e-06, "loss": 0.3511, "step": 11616 }, { "epoch": 0.3986616334934798, "grad_norm": 0.8083998167687702, "learning_rate": 6.840467748479556e-06, "loss": 0.2877, "step": 11617 }, { "epoch": 0.3986959505833905, "grad_norm": 0.8351621257373643, "learning_rate": 6.839951017222133e-06, "loss": 0.2595, "step": 11618 }, { "epoch": 0.39873026767330133, "grad_norm": 0.7359297604830617, "learning_rate": 6.83943426323426e-06, "loss": 0.2849, "step": 11619 }, { "epoch": 0.3987645847632121, "grad_norm": 0.7386138147538686, "learning_rate": 6.838917486522322e-06, "loss": 0.2919, "step": 11620 }, { "epoch": 0.39879890185312283, "grad_norm": 0.6844229332931391, "learning_rate": 6.838400687092702e-06, "loss": 0.2587, "step": 11621 }, { "epoch": 0.39883321894303364, "grad_norm": 0.7009025349600742, "learning_rate": 6.8378838649517865e-06, "loss": 0.2822, "step": 11622 }, { "epoch": 0.3988675360329444, "grad_norm": 0.755253434614146, "learning_rate": 6.837367020105956e-06, "loss": 0.2907, "step": 11623 }, { "epoch": 0.3989018531228552, "grad_norm": 0.7555522128743459, "learning_rate": 6.836850152561599e-06, "loss": 0.3084, "step": 11624 }, { "epoch": 0.39893617021276595, "grad_norm": 0.7536617517506806, "learning_rate": 6.836333262325101e-06, "loss": 0.2953, "step": 11625 }, { "epoch": 0.39897048730267676, "grad_norm": 0.6775040275668349, "learning_rate": 6.835816349402845e-06, "loss": 0.2937, "step": 11626 }, { "epoch": 0.3990048043925875, "grad_norm": 0.8029160926305596, "learning_rate": 6.835299413801219e-06, "loss": 0.2885, "step": 11627 }, { "epoch": 0.39903912148249826, "grad_norm": 0.82625681629614, "learning_rate": 6.8347824555266095e-06, "loss": 0.3609, "step": 11628 }, { "epoch": 0.39907343857240907, "grad_norm": 0.7933313122124742, "learning_rate": 6.834265474585401e-06, "loss": 0.3057, "step": 11629 }, { "epoch": 0.3991077556623198, "grad_norm": 1.0828269194910347, "learning_rate": 6.8337484709839806e-06, "loss": 0.3054, "step": 11630 }, { "epoch": 0.3991420727522306, "grad_norm": 0.8557423607826805, "learning_rate": 6.8332314447287365e-06, "loss": 0.3162, "step": 11631 }, { "epoch": 0.3991763898421414, "grad_norm": 0.6795905472389502, "learning_rate": 6.832714395826056e-06, "loss": 0.2983, "step": 11632 }, { "epoch": 0.3992107069320522, "grad_norm": 1.0558071677056384, "learning_rate": 6.832197324282323e-06, "loss": 0.2646, "step": 11633 }, { "epoch": 0.39924502402196294, "grad_norm": 0.711293238225824, "learning_rate": 6.83168023010393e-06, "loss": 0.3128, "step": 11634 }, { "epoch": 0.3992793411118737, "grad_norm": 0.7748516704058415, "learning_rate": 6.831163113297265e-06, "loss": 0.312, "step": 11635 }, { "epoch": 0.3993136582017845, "grad_norm": 0.7108296960811279, "learning_rate": 6.830645973868713e-06, "loss": 0.3309, "step": 11636 }, { "epoch": 0.39934797529169525, "grad_norm": 0.7772416906047767, "learning_rate": 6.830128811824663e-06, "loss": 0.3286, "step": 11637 }, { "epoch": 0.39938229238160605, "grad_norm": 0.7734717588017382, "learning_rate": 6.829611627171508e-06, "loss": 0.3078, "step": 11638 }, { "epoch": 0.3994166094715168, "grad_norm": 0.736276562309313, "learning_rate": 6.829094419915633e-06, "loss": 0.296, "step": 11639 }, { "epoch": 0.3994509265614276, "grad_norm": 0.7612407172212425, "learning_rate": 6.828577190063428e-06, "loss": 0.3255, "step": 11640 }, { "epoch": 0.39948524365133836, "grad_norm": 0.7549778777245578, "learning_rate": 6.828059937621286e-06, "loss": 0.2801, "step": 11641 }, { "epoch": 0.39951956074124917, "grad_norm": 0.7156428507700633, "learning_rate": 6.827542662595592e-06, "loss": 0.2646, "step": 11642 }, { "epoch": 0.3995538778311599, "grad_norm": 0.7265334567651699, "learning_rate": 6.8270253649927405e-06, "loss": 0.2637, "step": 11643 }, { "epoch": 0.39958819492107067, "grad_norm": 0.7069478483888584, "learning_rate": 6.826508044819121e-06, "loss": 0.2695, "step": 11644 }, { "epoch": 0.3996225120109815, "grad_norm": 0.7984315729856273, "learning_rate": 6.8259907020811245e-06, "loss": 0.2676, "step": 11645 }, { "epoch": 0.39965682910089223, "grad_norm": 0.7689218288227293, "learning_rate": 6.825473336785142e-06, "loss": 0.2991, "step": 11646 }, { "epoch": 0.39969114619080304, "grad_norm": 0.7212910736227317, "learning_rate": 6.8249559489375636e-06, "loss": 0.28, "step": 11647 }, { "epoch": 0.3997254632807138, "grad_norm": 0.7889251353608132, "learning_rate": 6.824438538544782e-06, "loss": 0.3179, "step": 11648 }, { "epoch": 0.3997597803706246, "grad_norm": 0.8311963901964318, "learning_rate": 6.823921105613189e-06, "loss": 0.3461, "step": 11649 }, { "epoch": 0.39979409746053535, "grad_norm": 0.8168237325844138, "learning_rate": 6.823403650149179e-06, "loss": 0.3231, "step": 11650 }, { "epoch": 0.3998284145504461, "grad_norm": 0.7244945925336294, "learning_rate": 6.822886172159142e-06, "loss": 0.2604, "step": 11651 }, { "epoch": 0.3998627316403569, "grad_norm": 0.6929413836843964, "learning_rate": 6.822368671649472e-06, "loss": 0.2882, "step": 11652 }, { "epoch": 0.39989704873026766, "grad_norm": 0.6884124119438664, "learning_rate": 6.821851148626561e-06, "loss": 0.2913, "step": 11653 }, { "epoch": 0.39993136582017846, "grad_norm": 0.8831898631877444, "learning_rate": 6.821333603096805e-06, "loss": 0.2749, "step": 11654 }, { "epoch": 0.3999656829100892, "grad_norm": 0.8364761838115272, "learning_rate": 6.820816035066592e-06, "loss": 0.3401, "step": 11655 }, { "epoch": 0.4, "grad_norm": 0.7262307195618436, "learning_rate": 6.820298444542322e-06, "loss": 0.2674, "step": 11656 }, { "epoch": 0.4000343170899108, "grad_norm": 0.7458754370314832, "learning_rate": 6.819780831530387e-06, "loss": 0.3171, "step": 11657 }, { "epoch": 0.4000686341798215, "grad_norm": 0.7630875075602257, "learning_rate": 6.819263196037182e-06, "loss": 0.3222, "step": 11658 }, { "epoch": 0.40010295126973233, "grad_norm": 0.6937117994916716, "learning_rate": 6.8187455380691e-06, "loss": 0.2861, "step": 11659 }, { "epoch": 0.4001372683596431, "grad_norm": 0.7515029657416272, "learning_rate": 6.818227857632538e-06, "loss": 0.2602, "step": 11660 }, { "epoch": 0.4001715854495539, "grad_norm": 0.7389899223231522, "learning_rate": 6.817710154733892e-06, "loss": 0.3553, "step": 11661 }, { "epoch": 0.40020590253946464, "grad_norm": 0.8384326577081599, "learning_rate": 6.817192429379555e-06, "loss": 0.29, "step": 11662 }, { "epoch": 0.40024021962937545, "grad_norm": 0.7382373585284154, "learning_rate": 6.8166746815759225e-06, "loss": 0.336, "step": 11663 }, { "epoch": 0.4002745367192862, "grad_norm": 0.7161473485801823, "learning_rate": 6.816156911329394e-06, "loss": 0.3393, "step": 11664 }, { "epoch": 0.400308853809197, "grad_norm": 0.8141550047898347, "learning_rate": 6.815639118646364e-06, "loss": 0.2739, "step": 11665 }, { "epoch": 0.40034317089910776, "grad_norm": 0.768202820206247, "learning_rate": 6.81512130353323e-06, "loss": 0.3189, "step": 11666 }, { "epoch": 0.4003774879890185, "grad_norm": 0.8596409884310546, "learning_rate": 6.81460346599639e-06, "loss": 0.2741, "step": 11667 }, { "epoch": 0.4004118050789293, "grad_norm": 0.8428005452351145, "learning_rate": 6.8140856060422375e-06, "loss": 0.2841, "step": 11668 }, { "epoch": 0.40044612216884007, "grad_norm": 0.8425654497806466, "learning_rate": 6.8135677236771726e-06, "loss": 0.3128, "step": 11669 }, { "epoch": 0.4004804392587509, "grad_norm": 0.668592156350219, "learning_rate": 6.813049818907593e-06, "loss": 0.2685, "step": 11670 }, { "epoch": 0.4005147563486616, "grad_norm": 0.7141579418939028, "learning_rate": 6.812531891739898e-06, "loss": 0.2687, "step": 11671 }, { "epoch": 0.40054907343857243, "grad_norm": 0.7649649652287039, "learning_rate": 6.812013942180482e-06, "loss": 0.3298, "step": 11672 }, { "epoch": 0.4005833905284832, "grad_norm": 0.8566621535052067, "learning_rate": 6.811495970235748e-06, "loss": 0.3745, "step": 11673 }, { "epoch": 0.40061770761839394, "grad_norm": 0.7212314514403344, "learning_rate": 6.810977975912094e-06, "loss": 0.279, "step": 11674 }, { "epoch": 0.40065202470830474, "grad_norm": 0.8107124452212782, "learning_rate": 6.810459959215918e-06, "loss": 0.3736, "step": 11675 }, { "epoch": 0.4006863417982155, "grad_norm": 0.7624756678427551, "learning_rate": 6.80994192015362e-06, "loss": 0.2819, "step": 11676 }, { "epoch": 0.4007206588881263, "grad_norm": 0.7808970866003194, "learning_rate": 6.809423858731599e-06, "loss": 0.3145, "step": 11677 }, { "epoch": 0.40075497597803705, "grad_norm": 0.6973025491065767, "learning_rate": 6.808905774956256e-06, "loss": 0.307, "step": 11678 }, { "epoch": 0.40078929306794786, "grad_norm": 0.8364239796330405, "learning_rate": 6.808387668833991e-06, "loss": 0.3115, "step": 11679 }, { "epoch": 0.4008236101578586, "grad_norm": 0.768088321922159, "learning_rate": 6.807869540371206e-06, "loss": 0.3027, "step": 11680 }, { "epoch": 0.40085792724776936, "grad_norm": 0.6939053574347683, "learning_rate": 6.807351389574299e-06, "loss": 0.3167, "step": 11681 }, { "epoch": 0.40089224433768017, "grad_norm": 0.7574838668371399, "learning_rate": 6.8068332164496745e-06, "loss": 0.2843, "step": 11682 }, { "epoch": 0.4009265614275909, "grad_norm": 1.005653522327633, "learning_rate": 6.806315021003732e-06, "loss": 0.273, "step": 11683 }, { "epoch": 0.40096087851750173, "grad_norm": 0.787144014296766, "learning_rate": 6.805796803242872e-06, "loss": 0.3111, "step": 11684 }, { "epoch": 0.4009951956074125, "grad_norm": 0.7408046759269282, "learning_rate": 6.805278563173497e-06, "loss": 0.3056, "step": 11685 }, { "epoch": 0.4010295126973233, "grad_norm": 0.8453455909097329, "learning_rate": 6.804760300802012e-06, "loss": 0.2689, "step": 11686 }, { "epoch": 0.40106382978723404, "grad_norm": 0.7774587321601412, "learning_rate": 6.804242016134818e-06, "loss": 0.2673, "step": 11687 }, { "epoch": 0.40109814687714485, "grad_norm": 0.7778599562148496, "learning_rate": 6.803723709178316e-06, "loss": 0.2962, "step": 11688 }, { "epoch": 0.4011324639670556, "grad_norm": 0.8111042350023426, "learning_rate": 6.803205379938912e-06, "loss": 0.3104, "step": 11689 }, { "epoch": 0.40116678105696635, "grad_norm": 0.690985659725615, "learning_rate": 6.8026870284230075e-06, "loss": 0.3778, "step": 11690 }, { "epoch": 0.40120109814687716, "grad_norm": 0.7258377502372372, "learning_rate": 6.802168654637005e-06, "loss": 0.2732, "step": 11691 }, { "epoch": 0.4012354152367879, "grad_norm": 0.7983630279729542, "learning_rate": 6.801650258587311e-06, "loss": 0.2753, "step": 11692 }, { "epoch": 0.4012697323266987, "grad_norm": 0.8487682154528384, "learning_rate": 6.801131840280328e-06, "loss": 0.3061, "step": 11693 }, { "epoch": 0.40130404941660947, "grad_norm": 0.7478992665681128, "learning_rate": 6.800613399722461e-06, "loss": 0.304, "step": 11694 }, { "epoch": 0.40133836650652027, "grad_norm": 0.771454569716082, "learning_rate": 6.8000949369201156e-06, "loss": 0.2955, "step": 11695 }, { "epoch": 0.401372683596431, "grad_norm": 0.8155916503238766, "learning_rate": 6.7995764518796965e-06, "loss": 0.292, "step": 11696 }, { "epoch": 0.4014070006863418, "grad_norm": 0.7604383739224194, "learning_rate": 6.799057944607608e-06, "loss": 0.2741, "step": 11697 }, { "epoch": 0.4014413177762526, "grad_norm": 0.8011037589057681, "learning_rate": 6.798539415110255e-06, "loss": 0.3202, "step": 11698 }, { "epoch": 0.40147563486616333, "grad_norm": 0.7498291744450897, "learning_rate": 6.798020863394045e-06, "loss": 0.3176, "step": 11699 }, { "epoch": 0.40150995195607414, "grad_norm": 0.8058620973767071, "learning_rate": 6.797502289465383e-06, "loss": 0.262, "step": 11700 }, { "epoch": 0.4015442690459849, "grad_norm": 0.7707822243672294, "learning_rate": 6.796983693330677e-06, "loss": 0.2865, "step": 11701 }, { "epoch": 0.4015785861358957, "grad_norm": 0.740164266063526, "learning_rate": 6.796465074996332e-06, "loss": 0.2881, "step": 11702 }, { "epoch": 0.40161290322580645, "grad_norm": 0.733579686146432, "learning_rate": 6.795946434468756e-06, "loss": 0.2897, "step": 11703 }, { "epoch": 0.4016472203157172, "grad_norm": 0.7605523048189028, "learning_rate": 6.795427771754355e-06, "loss": 0.3226, "step": 11704 }, { "epoch": 0.401681537405628, "grad_norm": 0.9161988430011202, "learning_rate": 6.794909086859538e-06, "loss": 0.2994, "step": 11705 }, { "epoch": 0.40171585449553876, "grad_norm": 0.782546687021853, "learning_rate": 6.794390379790711e-06, "loss": 0.2869, "step": 11706 }, { "epoch": 0.40175017158544957, "grad_norm": 0.7431557757230488, "learning_rate": 6.793871650554282e-06, "loss": 0.2585, "step": 11707 }, { "epoch": 0.4017844886753603, "grad_norm": 0.719304434099828, "learning_rate": 6.79335289915666e-06, "loss": 0.3043, "step": 11708 }, { "epoch": 0.4018188057652711, "grad_norm": 0.6774080477591001, "learning_rate": 6.792834125604256e-06, "loss": 0.3263, "step": 11709 }, { "epoch": 0.4018531228551819, "grad_norm": 0.7617349414453806, "learning_rate": 6.792315329903476e-06, "loss": 0.2645, "step": 11710 }, { "epoch": 0.4018874399450927, "grad_norm": 0.8407981520871054, "learning_rate": 6.791796512060729e-06, "loss": 0.3794, "step": 11711 }, { "epoch": 0.40192175703500344, "grad_norm": 0.7681015544245797, "learning_rate": 6.791277672082427e-06, "loss": 0.3157, "step": 11712 }, { "epoch": 0.4019560741249142, "grad_norm": 0.7769432423212994, "learning_rate": 6.790758809974976e-06, "loss": 0.3215, "step": 11713 }, { "epoch": 0.401990391214825, "grad_norm": 0.7964760361322791, "learning_rate": 6.790239925744788e-06, "loss": 0.2708, "step": 11714 }, { "epoch": 0.40202470830473575, "grad_norm": 0.7491665175014812, "learning_rate": 6.789721019398272e-06, "loss": 0.3001, "step": 11715 }, { "epoch": 0.40205902539464655, "grad_norm": 0.8347003742502562, "learning_rate": 6.789202090941841e-06, "loss": 0.3061, "step": 11716 }, { "epoch": 0.4020933424845573, "grad_norm": 0.8113650510778863, "learning_rate": 6.788683140381903e-06, "loss": 0.3199, "step": 11717 }, { "epoch": 0.4021276595744681, "grad_norm": 0.8315154985838933, "learning_rate": 6.788164167724872e-06, "loss": 0.3027, "step": 11718 }, { "epoch": 0.40216197666437886, "grad_norm": 0.6572245359307698, "learning_rate": 6.787645172977157e-06, "loss": 0.2817, "step": 11719 }, { "epoch": 0.4021962937542896, "grad_norm": 0.7865002971102495, "learning_rate": 6.787126156145168e-06, "loss": 0.2907, "step": 11720 }, { "epoch": 0.4022306108442004, "grad_norm": 0.8001455516325733, "learning_rate": 6.78660711723532e-06, "loss": 0.336, "step": 11721 }, { "epoch": 0.40226492793411117, "grad_norm": 0.833097261934985, "learning_rate": 6.786088056254024e-06, "loss": 0.3589, "step": 11722 }, { "epoch": 0.402299245024022, "grad_norm": 0.8109963256878833, "learning_rate": 6.785568973207693e-06, "loss": 0.2678, "step": 11723 }, { "epoch": 0.40233356211393273, "grad_norm": 0.7022482887403155, "learning_rate": 6.785049868102739e-06, "loss": 0.2957, "step": 11724 }, { "epoch": 0.40236787920384354, "grad_norm": 0.8633179695412434, "learning_rate": 6.784530740945576e-06, "loss": 0.2906, "step": 11725 }, { "epoch": 0.4024021962937543, "grad_norm": 0.689033667538719, "learning_rate": 6.784011591742615e-06, "loss": 0.2599, "step": 11726 }, { "epoch": 0.40243651338366504, "grad_norm": 0.740026962296084, "learning_rate": 6.783492420500271e-06, "loss": 0.2596, "step": 11727 }, { "epoch": 0.40247083047357585, "grad_norm": 0.8148098305416644, "learning_rate": 6.782973227224958e-06, "loss": 0.3137, "step": 11728 }, { "epoch": 0.4025051475634866, "grad_norm": 0.7507209468196053, "learning_rate": 6.782454011923089e-06, "loss": 0.2636, "step": 11729 }, { "epoch": 0.4025394646533974, "grad_norm": 0.7889105551774068, "learning_rate": 6.781934774601078e-06, "loss": 0.2796, "step": 11730 }, { "epoch": 0.40257378174330816, "grad_norm": 0.9082882614488602, "learning_rate": 6.781415515265341e-06, "loss": 0.2789, "step": 11731 }, { "epoch": 0.40260809883321896, "grad_norm": 0.819230039079172, "learning_rate": 6.780896233922293e-06, "loss": 0.3926, "step": 11732 }, { "epoch": 0.4026424159231297, "grad_norm": 0.7842432448600148, "learning_rate": 6.780376930578348e-06, "loss": 0.3777, "step": 11733 }, { "epoch": 0.40267673301304047, "grad_norm": 0.9056568859281058, "learning_rate": 6.779857605239922e-06, "loss": 0.3127, "step": 11734 }, { "epoch": 0.4027110501029513, "grad_norm": 0.8851097530630592, "learning_rate": 6.779338257913429e-06, "loss": 0.295, "step": 11735 }, { "epoch": 0.402745367192862, "grad_norm": 0.7459454362025741, "learning_rate": 6.778818888605287e-06, "loss": 0.2985, "step": 11736 }, { "epoch": 0.40277968428277283, "grad_norm": 0.7275063853445227, "learning_rate": 6.778299497321912e-06, "loss": 0.2745, "step": 11737 }, { "epoch": 0.4028140013726836, "grad_norm": 0.7379475636313213, "learning_rate": 6.777780084069721e-06, "loss": 0.2989, "step": 11738 }, { "epoch": 0.4028483184625944, "grad_norm": 0.8296306759331314, "learning_rate": 6.777260648855127e-06, "loss": 0.3284, "step": 11739 }, { "epoch": 0.40288263555250514, "grad_norm": 0.7855483211192376, "learning_rate": 6.7767411916845516e-06, "loss": 0.3255, "step": 11740 }, { "epoch": 0.40291695264241595, "grad_norm": 0.7512684192367755, "learning_rate": 6.776221712564411e-06, "loss": 0.2686, "step": 11741 }, { "epoch": 0.4029512697323267, "grad_norm": 0.756587185201965, "learning_rate": 6.775702211501121e-06, "loss": 0.2744, "step": 11742 }, { "epoch": 0.40298558682223745, "grad_norm": 0.6843353031088933, "learning_rate": 6.7751826885010994e-06, "loss": 0.31, "step": 11743 }, { "epoch": 0.40301990391214826, "grad_norm": 0.7919261641461369, "learning_rate": 6.774663143570767e-06, "loss": 0.2377, "step": 11744 }, { "epoch": 0.403054221002059, "grad_norm": 0.7606085299292641, "learning_rate": 6.774143576716539e-06, "loss": 0.2835, "step": 11745 }, { "epoch": 0.4030885380919698, "grad_norm": 0.8821730593846027, "learning_rate": 6.773623987944836e-06, "loss": 0.278, "step": 11746 }, { "epoch": 0.40312285518188057, "grad_norm": 0.7554260437007126, "learning_rate": 6.7731043772620784e-06, "loss": 0.298, "step": 11747 }, { "epoch": 0.4031571722717914, "grad_norm": 0.7636557441201205, "learning_rate": 6.772584744674682e-06, "loss": 0.3115, "step": 11748 }, { "epoch": 0.4031914893617021, "grad_norm": 0.7442156670257539, "learning_rate": 6.772065090189067e-06, "loss": 0.2478, "step": 11749 }, { "epoch": 0.4032258064516129, "grad_norm": 0.7395364040634252, "learning_rate": 6.7715454138116555e-06, "loss": 0.2555, "step": 11750 }, { "epoch": 0.4032601235415237, "grad_norm": 0.7976980215687789, "learning_rate": 6.771025715548865e-06, "loss": 0.3339, "step": 11751 }, { "epoch": 0.40329444063143444, "grad_norm": 0.8462156958820788, "learning_rate": 6.770505995407115e-06, "loss": 0.2908, "step": 11752 }, { "epoch": 0.40332875772134524, "grad_norm": 0.7799245415958288, "learning_rate": 6.7699862533928306e-06, "loss": 0.3395, "step": 11753 }, { "epoch": 0.403363074811256, "grad_norm": 0.7975916347462741, "learning_rate": 6.769466489512427e-06, "loss": 0.2986, "step": 11754 }, { "epoch": 0.4033973919011668, "grad_norm": 0.7829106092694451, "learning_rate": 6.76894670377233e-06, "loss": 0.3, "step": 11755 }, { "epoch": 0.40343170899107755, "grad_norm": 0.7956510973039469, "learning_rate": 6.7684268961789565e-06, "loss": 0.31, "step": 11756 }, { "epoch": 0.4034660260809883, "grad_norm": 0.772187759508922, "learning_rate": 6.767907066738734e-06, "loss": 0.3191, "step": 11757 }, { "epoch": 0.4035003431708991, "grad_norm": 0.7562520061181655, "learning_rate": 6.767387215458078e-06, "loss": 0.2639, "step": 11758 }, { "epoch": 0.40353466026080986, "grad_norm": 0.7955892181008591, "learning_rate": 6.766867342343414e-06, "loss": 0.395, "step": 11759 }, { "epoch": 0.40356897735072067, "grad_norm": 0.7830596974585194, "learning_rate": 6.766347447401165e-06, "loss": 0.3065, "step": 11760 }, { "epoch": 0.4036032944406314, "grad_norm": 0.8504006436350702, "learning_rate": 6.765827530637751e-06, "loss": 0.305, "step": 11761 }, { "epoch": 0.40363761153054223, "grad_norm": 0.7389481998430225, "learning_rate": 6.765307592059598e-06, "loss": 0.3068, "step": 11762 }, { "epoch": 0.403671928620453, "grad_norm": 0.7363591319635109, "learning_rate": 6.7647876316731275e-06, "loss": 0.2693, "step": 11763 }, { "epoch": 0.4037062457103638, "grad_norm": 0.7769692621583704, "learning_rate": 6.764267649484763e-06, "loss": 0.2943, "step": 11764 }, { "epoch": 0.40374056280027454, "grad_norm": 0.7404274870632575, "learning_rate": 6.763747645500929e-06, "loss": 0.2998, "step": 11765 }, { "epoch": 0.4037748798901853, "grad_norm": 0.8057395901891775, "learning_rate": 6.763227619728049e-06, "loss": 0.2851, "step": 11766 }, { "epoch": 0.4038091969800961, "grad_norm": 0.8151667689803659, "learning_rate": 6.762707572172547e-06, "loss": 0.3107, "step": 11767 }, { "epoch": 0.40384351407000685, "grad_norm": 0.7886941927499136, "learning_rate": 6.762187502840849e-06, "loss": 0.3181, "step": 11768 }, { "epoch": 0.40387783115991766, "grad_norm": 0.7497849485112023, "learning_rate": 6.761667411739379e-06, "loss": 0.2975, "step": 11769 }, { "epoch": 0.4039121482498284, "grad_norm": 0.6770268507306845, "learning_rate": 6.761147298874562e-06, "loss": 0.2335, "step": 11770 }, { "epoch": 0.4039464653397392, "grad_norm": 0.8442927534042793, "learning_rate": 6.760627164252822e-06, "loss": 0.2713, "step": 11771 }, { "epoch": 0.40398078242964996, "grad_norm": 0.7471657640129733, "learning_rate": 6.760107007880588e-06, "loss": 0.2822, "step": 11772 }, { "epoch": 0.4040150995195607, "grad_norm": 0.7784961152268474, "learning_rate": 6.759586829764283e-06, "loss": 0.2883, "step": 11773 }, { "epoch": 0.4040494166094715, "grad_norm": 0.7456975664478745, "learning_rate": 6.759066629910333e-06, "loss": 0.3017, "step": 11774 }, { "epoch": 0.4040837336993823, "grad_norm": 0.7544097596329399, "learning_rate": 6.758546408325166e-06, "loss": 0.3228, "step": 11775 }, { "epoch": 0.4041180507892931, "grad_norm": 0.7958077316631782, "learning_rate": 6.7580261650152105e-06, "loss": 0.3149, "step": 11776 }, { "epoch": 0.40415236787920383, "grad_norm": 1.3976075910069392, "learning_rate": 6.75750589998689e-06, "loss": 0.2835, "step": 11777 }, { "epoch": 0.40418668496911464, "grad_norm": 0.6958313240560859, "learning_rate": 6.756985613246634e-06, "loss": 0.2901, "step": 11778 }, { "epoch": 0.4042210020590254, "grad_norm": 0.6792236150047612, "learning_rate": 6.756465304800869e-06, "loss": 0.291, "step": 11779 }, { "epoch": 0.40425531914893614, "grad_norm": 0.8160006625948648, "learning_rate": 6.755944974656023e-06, "loss": 0.2809, "step": 11780 }, { "epoch": 0.40428963623884695, "grad_norm": 0.9144899090234881, "learning_rate": 6.755424622818523e-06, "loss": 0.3252, "step": 11781 }, { "epoch": 0.4043239533287577, "grad_norm": 0.8212958920593365, "learning_rate": 6.754904249294798e-06, "loss": 0.2788, "step": 11782 }, { "epoch": 0.4043582704186685, "grad_norm": 0.8950582581032838, "learning_rate": 6.754383854091277e-06, "loss": 0.3175, "step": 11783 }, { "epoch": 0.40439258750857926, "grad_norm": 0.755615709413172, "learning_rate": 6.753863437214391e-06, "loss": 0.2998, "step": 11784 }, { "epoch": 0.40442690459849007, "grad_norm": 0.7198488122080067, "learning_rate": 6.753342998670567e-06, "loss": 0.284, "step": 11785 }, { "epoch": 0.4044612216884008, "grad_norm": 0.7754779568013228, "learning_rate": 6.7528225384662325e-06, "loss": 0.2746, "step": 11786 }, { "epoch": 0.4044955387783116, "grad_norm": 0.8587797073015021, "learning_rate": 6.75230205660782e-06, "loss": 0.3102, "step": 11787 }, { "epoch": 0.4045298558682224, "grad_norm": 0.7538711199446888, "learning_rate": 6.75178155310176e-06, "loss": 0.2991, "step": 11788 }, { "epoch": 0.40456417295813313, "grad_norm": 0.7353666219043393, "learning_rate": 6.7512610279544785e-06, "loss": 0.2701, "step": 11789 }, { "epoch": 0.40459849004804394, "grad_norm": 0.7510119913485511, "learning_rate": 6.7507404811724095e-06, "loss": 0.2661, "step": 11790 }, { "epoch": 0.4046328071379547, "grad_norm": 0.8419999251133927, "learning_rate": 6.750219912761984e-06, "loss": 0.34, "step": 11791 }, { "epoch": 0.4046671242278655, "grad_norm": 0.7344434861943301, "learning_rate": 6.749699322729632e-06, "loss": 0.2774, "step": 11792 }, { "epoch": 0.40470144131777624, "grad_norm": 0.6783521288789824, "learning_rate": 6.749178711081785e-06, "loss": 0.2805, "step": 11793 }, { "epoch": 0.40473575840768705, "grad_norm": 0.7739213243024143, "learning_rate": 6.748658077824874e-06, "loss": 0.2921, "step": 11794 }, { "epoch": 0.4047700754975978, "grad_norm": 0.8470031860091608, "learning_rate": 6.7481374229653315e-06, "loss": 0.3443, "step": 11795 }, { "epoch": 0.40480439258750855, "grad_norm": 0.8030175680362949, "learning_rate": 6.747616746509588e-06, "loss": 0.286, "step": 11796 }, { "epoch": 0.40483870967741936, "grad_norm": 0.7089995896524253, "learning_rate": 6.747096048464078e-06, "loss": 0.3069, "step": 11797 }, { "epoch": 0.4048730267673301, "grad_norm": 0.7356792621970029, "learning_rate": 6.746575328835234e-06, "loss": 0.2582, "step": 11798 }, { "epoch": 0.4049073438572409, "grad_norm": 0.827597483403281, "learning_rate": 6.746054587629487e-06, "loss": 0.2843, "step": 11799 }, { "epoch": 0.40494166094715167, "grad_norm": 0.6965157702043813, "learning_rate": 6.745533824853272e-06, "loss": 0.2546, "step": 11800 }, { "epoch": 0.4049759780370625, "grad_norm": 0.8464981637806176, "learning_rate": 6.745013040513021e-06, "loss": 0.3107, "step": 11801 }, { "epoch": 0.40501029512697323, "grad_norm": 0.8776207551932983, "learning_rate": 6.7444922346151695e-06, "loss": 0.3566, "step": 11802 }, { "epoch": 0.405044612216884, "grad_norm": 0.7401104654416575, "learning_rate": 6.743971407166149e-06, "loss": 0.2901, "step": 11803 }, { "epoch": 0.4050789293067948, "grad_norm": 0.6989856721470832, "learning_rate": 6.743450558172395e-06, "loss": 0.2811, "step": 11804 }, { "epoch": 0.40511324639670554, "grad_norm": 0.7439618193802217, "learning_rate": 6.742929687640343e-06, "loss": 0.3391, "step": 11805 }, { "epoch": 0.40514756348661635, "grad_norm": 0.820100659290789, "learning_rate": 6.742408795576426e-06, "loss": 0.3296, "step": 11806 }, { "epoch": 0.4051818805765271, "grad_norm": 0.9142876757063282, "learning_rate": 6.741887881987081e-06, "loss": 0.2506, "step": 11807 }, { "epoch": 0.4052161976664379, "grad_norm": 0.8104808381288251, "learning_rate": 6.741366946878741e-06, "loss": 0.3572, "step": 11808 }, { "epoch": 0.40525051475634866, "grad_norm": 0.8443951892633013, "learning_rate": 6.740845990257843e-06, "loss": 0.3228, "step": 11809 }, { "epoch": 0.40528483184625946, "grad_norm": 0.80335166578361, "learning_rate": 6.740325012130822e-06, "loss": 0.2775, "step": 11810 }, { "epoch": 0.4053191489361702, "grad_norm": 0.814651279644962, "learning_rate": 6.739804012504114e-06, "loss": 0.3548, "step": 11811 }, { "epoch": 0.40535346602608097, "grad_norm": 0.7870279306601358, "learning_rate": 6.739282991384157e-06, "loss": 0.3198, "step": 11812 }, { "epoch": 0.4053877831159918, "grad_norm": 0.6917727434302537, "learning_rate": 6.738761948777386e-06, "loss": 0.2865, "step": 11813 }, { "epoch": 0.4054221002059025, "grad_norm": 0.8319740784414518, "learning_rate": 6.738240884690239e-06, "loss": 0.3047, "step": 11814 }, { "epoch": 0.40545641729581333, "grad_norm": 0.7568620903442624, "learning_rate": 6.737719799129151e-06, "loss": 0.3021, "step": 11815 }, { "epoch": 0.4054907343857241, "grad_norm": 0.8108965994709242, "learning_rate": 6.737198692100562e-06, "loss": 0.3482, "step": 11816 }, { "epoch": 0.4055250514756349, "grad_norm": 0.7876015753828618, "learning_rate": 6.7366775636109075e-06, "loss": 0.2634, "step": 11817 }, { "epoch": 0.40555936856554564, "grad_norm": 0.8834142840529725, "learning_rate": 6.736156413666628e-06, "loss": 0.2897, "step": 11818 }, { "epoch": 0.4055936856554564, "grad_norm": 0.6894034136338398, "learning_rate": 6.735635242274159e-06, "loss": 0.2664, "step": 11819 }, { "epoch": 0.4056280027453672, "grad_norm": 0.7310274351638089, "learning_rate": 6.735114049439939e-06, "loss": 0.2992, "step": 11820 }, { "epoch": 0.40566231983527795, "grad_norm": 0.7171126486743192, "learning_rate": 6.7345928351704106e-06, "loss": 0.3723, "step": 11821 }, { "epoch": 0.40569663692518876, "grad_norm": 0.7991834580429863, "learning_rate": 6.7340715994720096e-06, "loss": 0.269, "step": 11822 }, { "epoch": 0.4057309540150995, "grad_norm": 0.7324861029614321, "learning_rate": 6.733550342351174e-06, "loss": 0.2771, "step": 11823 }, { "epoch": 0.4057652711050103, "grad_norm": 0.8218308921612882, "learning_rate": 6.733029063814347e-06, "loss": 0.3048, "step": 11824 }, { "epoch": 0.40579958819492107, "grad_norm": 0.7735022681930898, "learning_rate": 6.732507763867965e-06, "loss": 0.2767, "step": 11825 }, { "epoch": 0.4058339052848318, "grad_norm": 0.759701017382547, "learning_rate": 6.73198644251847e-06, "loss": 0.2921, "step": 11826 }, { "epoch": 0.4058682223747426, "grad_norm": 0.719192577347732, "learning_rate": 6.7314650997723005e-06, "loss": 0.3027, "step": 11827 }, { "epoch": 0.4059025394646534, "grad_norm": 0.7461150323182015, "learning_rate": 6.7309437356359e-06, "loss": 0.2855, "step": 11828 }, { "epoch": 0.4059368565545642, "grad_norm": 0.9010173672824311, "learning_rate": 6.730422350115707e-06, "loss": 0.3563, "step": 11829 }, { "epoch": 0.40597117364447494, "grad_norm": 0.7169718102246895, "learning_rate": 6.729900943218165e-06, "loss": 0.2783, "step": 11830 }, { "epoch": 0.40600549073438574, "grad_norm": 0.6985896026694179, "learning_rate": 6.729379514949712e-06, "loss": 0.253, "step": 11831 }, { "epoch": 0.4060398078242965, "grad_norm": 0.7708635244303813, "learning_rate": 6.728858065316791e-06, "loss": 0.2904, "step": 11832 }, { "epoch": 0.4060741249142073, "grad_norm": 0.859817850556467, "learning_rate": 6.728336594325845e-06, "loss": 0.2875, "step": 11833 }, { "epoch": 0.40610844200411805, "grad_norm": 0.8507038044491139, "learning_rate": 6.727815101983315e-06, "loss": 0.2996, "step": 11834 }, { "epoch": 0.4061427590940288, "grad_norm": 0.7410755543101579, "learning_rate": 6.727293588295645e-06, "loss": 0.2706, "step": 11835 }, { "epoch": 0.4061770761839396, "grad_norm": 0.8174588128671777, "learning_rate": 6.726772053269275e-06, "loss": 0.2794, "step": 11836 }, { "epoch": 0.40621139327385036, "grad_norm": 0.7534291169854717, "learning_rate": 6.726250496910651e-06, "loss": 0.2565, "step": 11837 }, { "epoch": 0.40624571036376117, "grad_norm": 0.673204393082546, "learning_rate": 6.725728919226214e-06, "loss": 0.292, "step": 11838 }, { "epoch": 0.4062800274536719, "grad_norm": 0.7605944194939, "learning_rate": 6.725207320222409e-06, "loss": 0.2582, "step": 11839 }, { "epoch": 0.40631434454358273, "grad_norm": 0.7821337552917024, "learning_rate": 6.724685699905677e-06, "loss": 0.2945, "step": 11840 }, { "epoch": 0.4063486616334935, "grad_norm": 0.7409713535773946, "learning_rate": 6.724164058282465e-06, "loss": 0.2504, "step": 11841 }, { "epoch": 0.40638297872340423, "grad_norm": 0.7479988212738848, "learning_rate": 6.7236423953592165e-06, "loss": 0.2987, "step": 11842 }, { "epoch": 0.40641729581331504, "grad_norm": 0.8007676713212354, "learning_rate": 6.723120711142377e-06, "loss": 0.2926, "step": 11843 }, { "epoch": 0.4064516129032258, "grad_norm": 0.7590350167171851, "learning_rate": 6.722599005638388e-06, "loss": 0.2689, "step": 11844 }, { "epoch": 0.4064859299931366, "grad_norm": 0.8168003224247998, "learning_rate": 6.722077278853697e-06, "loss": 0.2816, "step": 11845 }, { "epoch": 0.40652024708304735, "grad_norm": 0.7694559901673577, "learning_rate": 6.72155553079475e-06, "loss": 0.2899, "step": 11846 }, { "epoch": 0.40655456417295815, "grad_norm": 0.7260226087184934, "learning_rate": 6.721033761467991e-06, "loss": 0.2582, "step": 11847 }, { "epoch": 0.4065888812628689, "grad_norm": 0.7886284693353482, "learning_rate": 6.720511970879866e-06, "loss": 0.301, "step": 11848 }, { "epoch": 0.40662319835277966, "grad_norm": 0.813916577708948, "learning_rate": 6.719990159036821e-06, "loss": 0.3199, "step": 11849 }, { "epoch": 0.40665751544269046, "grad_norm": 0.7288783675664195, "learning_rate": 6.719468325945304e-06, "loss": 0.2564, "step": 11850 }, { "epoch": 0.4066918325326012, "grad_norm": 0.7952892090062229, "learning_rate": 6.71894647161176e-06, "loss": 0.2538, "step": 11851 }, { "epoch": 0.406726149622512, "grad_norm": 0.7125414964454476, "learning_rate": 6.718424596042636e-06, "loss": 0.3121, "step": 11852 }, { "epoch": 0.4067604667124228, "grad_norm": 0.6959905538457903, "learning_rate": 6.717902699244382e-06, "loss": 0.2834, "step": 11853 }, { "epoch": 0.4067947838023336, "grad_norm": 0.7148133060538698, "learning_rate": 6.717380781223441e-06, "loss": 0.3041, "step": 11854 }, { "epoch": 0.40682910089224433, "grad_norm": 0.7682649912578102, "learning_rate": 6.716858841986263e-06, "loss": 0.3135, "step": 11855 }, { "epoch": 0.40686341798215514, "grad_norm": 0.7649123787678953, "learning_rate": 6.716336881539296e-06, "loss": 0.2311, "step": 11856 }, { "epoch": 0.4068977350720659, "grad_norm": 0.7416470630221919, "learning_rate": 6.715814899888987e-06, "loss": 0.3053, "step": 11857 }, { "epoch": 0.40693205216197664, "grad_norm": 0.7487965073516513, "learning_rate": 6.7152928970417865e-06, "loss": 0.2478, "step": 11858 }, { "epoch": 0.40696636925188745, "grad_norm": 0.8428693849184086, "learning_rate": 6.714770873004141e-06, "loss": 0.2564, "step": 11859 }, { "epoch": 0.4070006863417982, "grad_norm": 0.7688168718795525, "learning_rate": 6.714248827782501e-06, "loss": 0.2658, "step": 11860 }, { "epoch": 0.407035003431709, "grad_norm": 0.7289474073601969, "learning_rate": 6.713726761383317e-06, "loss": 0.2943, "step": 11861 }, { "epoch": 0.40706932052161976, "grad_norm": 0.7143263815098139, "learning_rate": 6.7132046738130354e-06, "loss": 0.2776, "step": 11862 }, { "epoch": 0.40710363761153057, "grad_norm": 0.7613410406130939, "learning_rate": 6.712682565078106e-06, "loss": 0.3159, "step": 11863 }, { "epoch": 0.4071379547014413, "grad_norm": 0.8821872123777474, "learning_rate": 6.712160435184982e-06, "loss": 0.2965, "step": 11864 }, { "epoch": 0.40717227179135207, "grad_norm": 0.7848741687233214, "learning_rate": 6.711638284140112e-06, "loss": 0.2787, "step": 11865 }, { "epoch": 0.4072065888812629, "grad_norm": 0.6484100115775361, "learning_rate": 6.711116111949947e-06, "loss": 0.2696, "step": 11866 }, { "epoch": 0.40724090597117363, "grad_norm": 0.821927483675575, "learning_rate": 6.710593918620936e-06, "loss": 0.3315, "step": 11867 }, { "epoch": 0.40727522306108443, "grad_norm": 0.6845794498474038, "learning_rate": 6.710071704159532e-06, "loss": 0.3009, "step": 11868 }, { "epoch": 0.4073095401509952, "grad_norm": 0.8045142307706982, "learning_rate": 6.709549468572187e-06, "loss": 0.3634, "step": 11869 }, { "epoch": 0.407343857240906, "grad_norm": 0.7813866582133182, "learning_rate": 6.709027211865351e-06, "loss": 0.2405, "step": 11870 }, { "epoch": 0.40737817433081674, "grad_norm": 0.8343632104249041, "learning_rate": 6.708504934045474e-06, "loss": 0.3888, "step": 11871 }, { "epoch": 0.4074124914207275, "grad_norm": 0.8332603445271652, "learning_rate": 6.707982635119014e-06, "loss": 0.2622, "step": 11872 }, { "epoch": 0.4074468085106383, "grad_norm": 0.7889349283732867, "learning_rate": 6.707460315092418e-06, "loss": 0.2981, "step": 11873 }, { "epoch": 0.40748112560054905, "grad_norm": 0.8287100524575703, "learning_rate": 6.706937973972139e-06, "loss": 0.2569, "step": 11874 }, { "epoch": 0.40751544269045986, "grad_norm": 0.8715319711995929, "learning_rate": 6.706415611764635e-06, "loss": 0.3135, "step": 11875 }, { "epoch": 0.4075497597803706, "grad_norm": 0.7564508878976519, "learning_rate": 6.7058932284763536e-06, "loss": 0.2697, "step": 11876 }, { "epoch": 0.4075840768702814, "grad_norm": 0.6697841220206339, "learning_rate": 6.705370824113751e-06, "loss": 0.2573, "step": 11877 }, { "epoch": 0.40761839396019217, "grad_norm": 0.7938015917263295, "learning_rate": 6.704848398683278e-06, "loss": 0.3272, "step": 11878 }, { "epoch": 0.407652711050103, "grad_norm": 0.6947822406213081, "learning_rate": 6.704325952191393e-06, "loss": 0.2664, "step": 11879 }, { "epoch": 0.40768702814001373, "grad_norm": 0.8525627779627563, "learning_rate": 6.703803484644547e-06, "loss": 0.3792, "step": 11880 }, { "epoch": 0.4077213452299245, "grad_norm": 0.7633697258240995, "learning_rate": 6.703280996049195e-06, "loss": 0.3676, "step": 11881 }, { "epoch": 0.4077556623198353, "grad_norm": 0.7362257447589108, "learning_rate": 6.7027584864117935e-06, "loss": 0.3066, "step": 11882 }, { "epoch": 0.40778997940974604, "grad_norm": 0.9082019683440938, "learning_rate": 6.702235955738795e-06, "loss": 0.2984, "step": 11883 }, { "epoch": 0.40782429649965685, "grad_norm": 0.8883965407895377, "learning_rate": 6.701713404036656e-06, "loss": 0.3546, "step": 11884 }, { "epoch": 0.4078586135895676, "grad_norm": 0.8886297134470184, "learning_rate": 6.7011908313118316e-06, "loss": 0.2962, "step": 11885 }, { "epoch": 0.4078929306794784, "grad_norm": 0.8235379075434686, "learning_rate": 6.700668237570777e-06, "loss": 0.2657, "step": 11886 }, { "epoch": 0.40792724776938916, "grad_norm": 0.7994251848746013, "learning_rate": 6.700145622819951e-06, "loss": 0.3255, "step": 11887 }, { "epoch": 0.4079615648592999, "grad_norm": 0.7740613715722587, "learning_rate": 6.699622987065808e-06, "loss": 0.3112, "step": 11888 }, { "epoch": 0.4079958819492107, "grad_norm": 0.7806294714804373, "learning_rate": 6.699100330314805e-06, "loss": 0.2892, "step": 11889 }, { "epoch": 0.40803019903912147, "grad_norm": 0.7637483729881318, "learning_rate": 6.698577652573397e-06, "loss": 0.2934, "step": 11890 }, { "epoch": 0.4080645161290323, "grad_norm": 0.7874447960731403, "learning_rate": 6.698054953848043e-06, "loss": 0.3705, "step": 11891 }, { "epoch": 0.408098833218943, "grad_norm": 0.965084333968342, "learning_rate": 6.6975322341452e-06, "loss": 0.3196, "step": 11892 }, { "epoch": 0.40813315030885383, "grad_norm": 0.8816335849552087, "learning_rate": 6.697009493471325e-06, "loss": 0.3624, "step": 11893 }, { "epoch": 0.4081674673987646, "grad_norm": 0.8013345420236077, "learning_rate": 6.696486731832875e-06, "loss": 0.283, "step": 11894 }, { "epoch": 0.40820178448867533, "grad_norm": 0.8263831447660411, "learning_rate": 6.695963949236311e-06, "loss": 0.3349, "step": 11895 }, { "epoch": 0.40823610157858614, "grad_norm": 0.8188486250110061, "learning_rate": 6.695441145688089e-06, "loss": 0.3372, "step": 11896 }, { "epoch": 0.4082704186684969, "grad_norm": 0.7210217348830285, "learning_rate": 6.6949183211946676e-06, "loss": 0.3103, "step": 11897 }, { "epoch": 0.4083047357584077, "grad_norm": 0.7729965851306135, "learning_rate": 6.69439547576251e-06, "loss": 0.2479, "step": 11898 }, { "epoch": 0.40833905284831845, "grad_norm": 0.861740017760643, "learning_rate": 6.693872609398069e-06, "loss": 0.2792, "step": 11899 }, { "epoch": 0.40837336993822926, "grad_norm": 0.7643199452236153, "learning_rate": 6.693349722107806e-06, "loss": 0.2877, "step": 11900 }, { "epoch": 0.40840768702814, "grad_norm": 0.8011901852873433, "learning_rate": 6.692826813898182e-06, "loss": 0.2822, "step": 11901 }, { "epoch": 0.4084420041180508, "grad_norm": 0.73454928379533, "learning_rate": 6.692303884775657e-06, "loss": 0.2716, "step": 11902 }, { "epoch": 0.40847632120796157, "grad_norm": 0.7936918542462238, "learning_rate": 6.69178093474669e-06, "loss": 0.3222, "step": 11903 }, { "epoch": 0.4085106382978723, "grad_norm": 0.7713197735917097, "learning_rate": 6.691257963817743e-06, "loss": 0.3214, "step": 11904 }, { "epoch": 0.4085449553877831, "grad_norm": 0.7131266618722775, "learning_rate": 6.690734971995274e-06, "loss": 0.2728, "step": 11905 }, { "epoch": 0.4085792724776939, "grad_norm": 0.8422172222847941, "learning_rate": 6.690211959285747e-06, "loss": 0.2963, "step": 11906 }, { "epoch": 0.4086135895676047, "grad_norm": 0.8412017091392328, "learning_rate": 6.689688925695621e-06, "loss": 0.3005, "step": 11907 }, { "epoch": 0.40864790665751544, "grad_norm": 0.7539651006307843, "learning_rate": 6.689165871231359e-06, "loss": 0.3135, "step": 11908 }, { "epoch": 0.40868222374742624, "grad_norm": 0.7572055352970647, "learning_rate": 6.688642795899421e-06, "loss": 0.3283, "step": 11909 }, { "epoch": 0.408716540837337, "grad_norm": 0.9743303447369385, "learning_rate": 6.688119699706272e-06, "loss": 0.2853, "step": 11910 }, { "epoch": 0.40875085792724775, "grad_norm": 0.7439424908697981, "learning_rate": 6.687596582658371e-06, "loss": 0.3005, "step": 11911 }, { "epoch": 0.40878517501715855, "grad_norm": 0.7303865905185004, "learning_rate": 6.687073444762182e-06, "loss": 0.284, "step": 11912 }, { "epoch": 0.4088194921070693, "grad_norm": 0.767075347129254, "learning_rate": 6.6865502860241675e-06, "loss": 0.3045, "step": 11913 }, { "epoch": 0.4088538091969801, "grad_norm": 0.835105098325649, "learning_rate": 6.686027106450791e-06, "loss": 0.2724, "step": 11914 }, { "epoch": 0.40888812628689086, "grad_norm": 0.7695327685633403, "learning_rate": 6.685503906048514e-06, "loss": 0.2906, "step": 11915 }, { "epoch": 0.40892244337680167, "grad_norm": 0.7388556136363492, "learning_rate": 6.684980684823801e-06, "loss": 0.2938, "step": 11916 }, { "epoch": 0.4089567604667124, "grad_norm": 0.7018792480821887, "learning_rate": 6.684457442783118e-06, "loss": 0.2665, "step": 11917 }, { "epoch": 0.4089910775566232, "grad_norm": 0.8184835230405199, "learning_rate": 6.683934179932926e-06, "loss": 0.3311, "step": 11918 }, { "epoch": 0.409025394646534, "grad_norm": 0.7051467462441642, "learning_rate": 6.68341089627969e-06, "loss": 0.2343, "step": 11919 }, { "epoch": 0.40905971173644473, "grad_norm": 0.7461128984113834, "learning_rate": 6.6828875918298776e-06, "loss": 0.2474, "step": 11920 }, { "epoch": 0.40909402882635554, "grad_norm": 0.916694335053609, "learning_rate": 6.6823642665899495e-06, "loss": 0.2833, "step": 11921 }, { "epoch": 0.4091283459162663, "grad_norm": 0.8342010615321986, "learning_rate": 6.6818409205663715e-06, "loss": 0.2988, "step": 11922 }, { "epoch": 0.4091626630061771, "grad_norm": 0.8636348469428149, "learning_rate": 6.6813175537656106e-06, "loss": 0.301, "step": 11923 }, { "epoch": 0.40919698009608785, "grad_norm": 0.844480780485385, "learning_rate": 6.680794166194131e-06, "loss": 0.3372, "step": 11924 }, { "epoch": 0.40923129718599865, "grad_norm": 0.8331119333964226, "learning_rate": 6.680270757858399e-06, "loss": 0.3439, "step": 11925 }, { "epoch": 0.4092656142759094, "grad_norm": 0.8026903359160096, "learning_rate": 6.679747328764883e-06, "loss": 0.2905, "step": 11926 }, { "epoch": 0.40929993136582016, "grad_norm": 0.8178541771577549, "learning_rate": 6.679223878920045e-06, "loss": 0.2796, "step": 11927 }, { "epoch": 0.40933424845573096, "grad_norm": 0.8041049756893667, "learning_rate": 6.678700408330354e-06, "loss": 0.3184, "step": 11928 }, { "epoch": 0.4093685655456417, "grad_norm": 0.8306956241537199, "learning_rate": 6.678176917002277e-06, "loss": 0.3194, "step": 11929 }, { "epoch": 0.4094028826355525, "grad_norm": 0.8245864318857954, "learning_rate": 6.6776534049422805e-06, "loss": 0.289, "step": 11930 }, { "epoch": 0.4094371997254633, "grad_norm": 0.73482974960285, "learning_rate": 6.6771298721568315e-06, "loss": 0.2963, "step": 11931 }, { "epoch": 0.4094715168153741, "grad_norm": 0.8031146160312455, "learning_rate": 6.676606318652399e-06, "loss": 0.256, "step": 11932 }, { "epoch": 0.40950583390528483, "grad_norm": 0.7787231035977839, "learning_rate": 6.676082744435451e-06, "loss": 0.3027, "step": 11933 }, { "epoch": 0.4095401509951956, "grad_norm": 0.8914924246844587, "learning_rate": 6.675559149512454e-06, "loss": 0.2676, "step": 11934 }, { "epoch": 0.4095744680851064, "grad_norm": 0.7997835144416681, "learning_rate": 6.675035533889876e-06, "loss": 0.3091, "step": 11935 }, { "epoch": 0.40960878517501714, "grad_norm": 0.7986089537153568, "learning_rate": 6.67451189757419e-06, "loss": 0.2558, "step": 11936 }, { "epoch": 0.40964310226492795, "grad_norm": 0.7415347864770357, "learning_rate": 6.6739882405718596e-06, "loss": 0.3083, "step": 11937 }, { "epoch": 0.4096774193548387, "grad_norm": 0.7095361547777442, "learning_rate": 6.673464562889355e-06, "loss": 0.2449, "step": 11938 }, { "epoch": 0.4097117364447495, "grad_norm": 0.8964290560574076, "learning_rate": 6.67294086453315e-06, "loss": 0.331, "step": 11939 }, { "epoch": 0.40974605353466026, "grad_norm": 0.704021784961895, "learning_rate": 6.6724171455097085e-06, "loss": 0.3472, "step": 11940 }, { "epoch": 0.409780370624571, "grad_norm": 0.7774757692265322, "learning_rate": 6.671893405825503e-06, "loss": 0.3372, "step": 11941 }, { "epoch": 0.4098146877144818, "grad_norm": 0.9791491727271723, "learning_rate": 6.671369645487006e-06, "loss": 0.2624, "step": 11942 }, { "epoch": 0.40984900480439257, "grad_norm": 0.7474223093041465, "learning_rate": 6.670845864500684e-06, "loss": 0.2989, "step": 11943 }, { "epoch": 0.4098833218943034, "grad_norm": 0.7478882452245068, "learning_rate": 6.670322062873009e-06, "loss": 0.2652, "step": 11944 }, { "epoch": 0.4099176389842141, "grad_norm": 0.777929992496166, "learning_rate": 6.669798240610454e-06, "loss": 0.3126, "step": 11945 }, { "epoch": 0.40995195607412493, "grad_norm": 0.7873330191302653, "learning_rate": 6.669274397719486e-06, "loss": 0.3026, "step": 11946 }, { "epoch": 0.4099862731640357, "grad_norm": 0.8226389187679082, "learning_rate": 6.66875053420658e-06, "loss": 0.3529, "step": 11947 }, { "epoch": 0.4100205902539465, "grad_norm": 0.7960244756997865, "learning_rate": 6.668226650078206e-06, "loss": 0.263, "step": 11948 }, { "epoch": 0.41005490734385724, "grad_norm": 0.6787699334475602, "learning_rate": 6.667702745340841e-06, "loss": 0.2666, "step": 11949 }, { "epoch": 0.410089224433768, "grad_norm": 0.7873205903918427, "learning_rate": 6.667178820000949e-06, "loss": 0.3397, "step": 11950 }, { "epoch": 0.4101235415236788, "grad_norm": 0.7872170349796871, "learning_rate": 6.666654874065006e-06, "loss": 0.2644, "step": 11951 }, { "epoch": 0.41015785861358955, "grad_norm": 0.698543923865466, "learning_rate": 6.666130907539487e-06, "loss": 0.2408, "step": 11952 }, { "epoch": 0.41019217570350036, "grad_norm": 0.8450611898607829, "learning_rate": 6.665606920430863e-06, "loss": 0.2983, "step": 11953 }, { "epoch": 0.4102264927934111, "grad_norm": 0.9434310534319967, "learning_rate": 6.6650829127456065e-06, "loss": 0.2793, "step": 11954 }, { "epoch": 0.4102608098833219, "grad_norm": 0.6909343556278343, "learning_rate": 6.664558884490193e-06, "loss": 0.2527, "step": 11955 }, { "epoch": 0.41029512697323267, "grad_norm": 0.7174482285686595, "learning_rate": 6.6640348356710935e-06, "loss": 0.3041, "step": 11956 }, { "epoch": 0.4103294440631434, "grad_norm": 0.8019350681724975, "learning_rate": 6.663510766294785e-06, "loss": 0.2964, "step": 11957 }, { "epoch": 0.41036376115305423, "grad_norm": 0.7000622639799208, "learning_rate": 6.66298667636774e-06, "loss": 0.3083, "step": 11958 }, { "epoch": 0.410398078242965, "grad_norm": 0.7510792144194736, "learning_rate": 6.662462565896435e-06, "loss": 0.276, "step": 11959 }, { "epoch": 0.4104323953328758, "grad_norm": 0.8404294421850595, "learning_rate": 6.661938434887341e-06, "loss": 0.3444, "step": 11960 }, { "epoch": 0.41046671242278654, "grad_norm": 0.800884712900166, "learning_rate": 6.661414283346935e-06, "loss": 0.2846, "step": 11961 }, { "epoch": 0.41050102951269735, "grad_norm": 0.7371080516397235, "learning_rate": 6.660890111281695e-06, "loss": 0.3134, "step": 11962 }, { "epoch": 0.4105353466026081, "grad_norm": 0.7465085988989709, "learning_rate": 6.6603659186980916e-06, "loss": 0.235, "step": 11963 }, { "epoch": 0.41056966369251885, "grad_norm": 0.7508209287395039, "learning_rate": 6.659841705602604e-06, "loss": 0.3201, "step": 11964 }, { "epoch": 0.41060398078242966, "grad_norm": 0.9047602517029355, "learning_rate": 6.65931747200171e-06, "loss": 0.2606, "step": 11965 }, { "epoch": 0.4106382978723404, "grad_norm": 0.7567285042784722, "learning_rate": 6.65879321790188e-06, "loss": 0.2929, "step": 11966 }, { "epoch": 0.4106726149622512, "grad_norm": 0.7542556500845381, "learning_rate": 6.658268943309594e-06, "loss": 0.2998, "step": 11967 }, { "epoch": 0.41070693205216197, "grad_norm": 0.7228829540392043, "learning_rate": 6.657744648231329e-06, "loss": 0.2555, "step": 11968 }, { "epoch": 0.4107412491420728, "grad_norm": 0.7309600962800955, "learning_rate": 6.6572203326735615e-06, "loss": 0.2644, "step": 11969 }, { "epoch": 0.4107755662319835, "grad_norm": 0.6862004916660018, "learning_rate": 6.656695996642768e-06, "loss": 0.3148, "step": 11970 }, { "epoch": 0.4108098833218943, "grad_norm": 0.7226680881118733, "learning_rate": 6.65617164014543e-06, "loss": 0.3134, "step": 11971 }, { "epoch": 0.4108442004118051, "grad_norm": 0.7392371747666888, "learning_rate": 6.65564726318802e-06, "loss": 0.3134, "step": 11972 }, { "epoch": 0.41087851750171583, "grad_norm": 0.8491443928949266, "learning_rate": 6.655122865777018e-06, "loss": 0.3261, "step": 11973 }, { "epoch": 0.41091283459162664, "grad_norm": 0.721448480568655, "learning_rate": 6.654598447918903e-06, "loss": 0.3266, "step": 11974 }, { "epoch": 0.4109471516815374, "grad_norm": 0.8214764067635554, "learning_rate": 6.654074009620154e-06, "loss": 0.3144, "step": 11975 }, { "epoch": 0.4109814687714482, "grad_norm": 0.7627119201719107, "learning_rate": 6.653549550887249e-06, "loss": 0.3651, "step": 11976 }, { "epoch": 0.41101578586135895, "grad_norm": 0.7926611645784215, "learning_rate": 6.653025071726667e-06, "loss": 0.2721, "step": 11977 }, { "epoch": 0.41105010295126976, "grad_norm": 0.6769156291475901, "learning_rate": 6.652500572144888e-06, "loss": 0.2914, "step": 11978 }, { "epoch": 0.4110844200411805, "grad_norm": 0.711640730170452, "learning_rate": 6.65197605214839e-06, "loss": 0.2844, "step": 11979 }, { "epoch": 0.41111873713109126, "grad_norm": 0.8036483605186393, "learning_rate": 6.651451511743654e-06, "loss": 0.3013, "step": 11980 }, { "epoch": 0.41115305422100207, "grad_norm": 0.8014631763851168, "learning_rate": 6.650926950937161e-06, "loss": 0.2857, "step": 11981 }, { "epoch": 0.4111873713109128, "grad_norm": 0.7411772397911298, "learning_rate": 6.650402369735388e-06, "loss": 0.2934, "step": 11982 }, { "epoch": 0.4112216884008236, "grad_norm": 0.7722587324491335, "learning_rate": 6.64987776814482e-06, "loss": 0.335, "step": 11983 }, { "epoch": 0.4112560054907344, "grad_norm": 0.8239284411229049, "learning_rate": 6.649353146171936e-06, "loss": 0.2795, "step": 11984 }, { "epoch": 0.4112903225806452, "grad_norm": 0.734659128182187, "learning_rate": 6.648828503823216e-06, "loss": 0.3265, "step": 11985 }, { "epoch": 0.41132463967055594, "grad_norm": 0.8222356395791345, "learning_rate": 6.6483038411051416e-06, "loss": 0.3132, "step": 11986 }, { "epoch": 0.4113589567604667, "grad_norm": 0.7367935945828267, "learning_rate": 6.6477791580241965e-06, "loss": 0.3119, "step": 11987 }, { "epoch": 0.4113932738503775, "grad_norm": 0.7546852054072054, "learning_rate": 6.64725445458686e-06, "loss": 0.2995, "step": 11988 }, { "epoch": 0.41142759094028825, "grad_norm": 0.9024854700028739, "learning_rate": 6.6467297307996155e-06, "loss": 0.3195, "step": 11989 }, { "epoch": 0.41146190803019905, "grad_norm": 0.8464836262766259, "learning_rate": 6.646204986668945e-06, "loss": 0.381, "step": 11990 }, { "epoch": 0.4114962251201098, "grad_norm": 0.7682657036315779, "learning_rate": 6.6456802222013315e-06, "loss": 0.3159, "step": 11991 }, { "epoch": 0.4115305422100206, "grad_norm": 0.7024416713348494, "learning_rate": 6.645155437403257e-06, "loss": 0.3197, "step": 11992 }, { "epoch": 0.41156485929993136, "grad_norm": 0.776660242137309, "learning_rate": 6.644630632281205e-06, "loss": 0.2605, "step": 11993 }, { "epoch": 0.4115991763898421, "grad_norm": 0.702758748990344, "learning_rate": 6.64410580684166e-06, "loss": 0.259, "step": 11994 }, { "epoch": 0.4116334934797529, "grad_norm": 0.7357262437799852, "learning_rate": 6.6435809610911036e-06, "loss": 0.2868, "step": 11995 }, { "epoch": 0.4116678105696637, "grad_norm": 0.7450499043463845, "learning_rate": 6.643056095036021e-06, "loss": 0.2834, "step": 11996 }, { "epoch": 0.4117021276595745, "grad_norm": 0.9436695787007661, "learning_rate": 6.642531208682895e-06, "loss": 0.3481, "step": 11997 }, { "epoch": 0.41173644474948523, "grad_norm": 0.8238635168751363, "learning_rate": 6.642006302038212e-06, "loss": 0.2677, "step": 11998 }, { "epoch": 0.41177076183939604, "grad_norm": 0.7465430540174882, "learning_rate": 6.641481375108456e-06, "loss": 0.3051, "step": 11999 }, { "epoch": 0.4118050789293068, "grad_norm": 0.7831680974339352, "learning_rate": 6.6409564279001115e-06, "loss": 0.2616, "step": 12000 }, { "epoch": 0.4118393960192176, "grad_norm": 0.8133513818582679, "learning_rate": 6.640431460419663e-06, "loss": 0.3202, "step": 12001 }, { "epoch": 0.41187371310912835, "grad_norm": 0.6893328015542127, "learning_rate": 6.639906472673595e-06, "loss": 0.2594, "step": 12002 }, { "epoch": 0.4119080301990391, "grad_norm": 0.7425531239790502, "learning_rate": 6.639381464668397e-06, "loss": 0.2684, "step": 12003 }, { "epoch": 0.4119423472889499, "grad_norm": 0.7668832406624817, "learning_rate": 6.63885643641055e-06, "loss": 0.3096, "step": 12004 }, { "epoch": 0.41197666437886066, "grad_norm": 0.7243215484709962, "learning_rate": 6.638331387906542e-06, "loss": 0.3134, "step": 12005 }, { "epoch": 0.41201098146877146, "grad_norm": 0.7638063423266176, "learning_rate": 6.6378063191628615e-06, "loss": 0.3429, "step": 12006 }, { "epoch": 0.4120452985586822, "grad_norm": 0.9619054511768004, "learning_rate": 6.637281230185995e-06, "loss": 0.2906, "step": 12007 }, { "epoch": 0.412079615648593, "grad_norm": 0.6218042015670601, "learning_rate": 6.636756120982425e-06, "loss": 0.2353, "step": 12008 }, { "epoch": 0.4121139327385038, "grad_norm": 0.8075812651129772, "learning_rate": 6.636230991558642e-06, "loss": 0.293, "step": 12009 }, { "epoch": 0.4121482498284145, "grad_norm": 0.7653994681991241, "learning_rate": 6.635705841921134e-06, "loss": 0.3065, "step": 12010 }, { "epoch": 0.41218256691832533, "grad_norm": 0.7499205364960239, "learning_rate": 6.635180672076387e-06, "loss": 0.2398, "step": 12011 }, { "epoch": 0.4122168840082361, "grad_norm": 0.7735958682886107, "learning_rate": 6.634655482030888e-06, "loss": 0.3539, "step": 12012 }, { "epoch": 0.4122512010981469, "grad_norm": 0.7899189505464199, "learning_rate": 6.634130271791127e-06, "loss": 0.3628, "step": 12013 }, { "epoch": 0.41228551818805764, "grad_norm": 0.8168399213004, "learning_rate": 6.633605041363591e-06, "loss": 0.263, "step": 12014 }, { "epoch": 0.41231983527796845, "grad_norm": 0.7477664322621762, "learning_rate": 6.6330797907547705e-06, "loss": 0.3012, "step": 12015 }, { "epoch": 0.4123541523678792, "grad_norm": 0.8174491816942488, "learning_rate": 6.6325545199711535e-06, "loss": 0.2536, "step": 12016 }, { "epoch": 0.41238846945778995, "grad_norm": 0.8474715404500605, "learning_rate": 6.632029229019228e-06, "loss": 0.3335, "step": 12017 }, { "epoch": 0.41242278654770076, "grad_norm": 0.787669002556522, "learning_rate": 6.631503917905484e-06, "loss": 0.2983, "step": 12018 }, { "epoch": 0.4124571036376115, "grad_norm": 0.7190595169852378, "learning_rate": 6.63097858663641e-06, "loss": 0.2586, "step": 12019 }, { "epoch": 0.4124914207275223, "grad_norm": 0.7040353105535767, "learning_rate": 6.630453235218499e-06, "loss": 0.3111, "step": 12020 }, { "epoch": 0.41252573781743307, "grad_norm": 0.9281375433984977, "learning_rate": 6.629927863658238e-06, "loss": 0.283, "step": 12021 }, { "epoch": 0.4125600549073439, "grad_norm": 0.7190598061664522, "learning_rate": 6.62940247196212e-06, "loss": 0.2533, "step": 12022 }, { "epoch": 0.4125943719972546, "grad_norm": 0.6553676764837366, "learning_rate": 6.628877060136634e-06, "loss": 0.2325, "step": 12023 }, { "epoch": 0.41262868908716543, "grad_norm": 0.8239987161237935, "learning_rate": 6.62835162818827e-06, "loss": 0.3015, "step": 12024 }, { "epoch": 0.4126630061770762, "grad_norm": 0.6883911991727194, "learning_rate": 6.627826176123521e-06, "loss": 0.2639, "step": 12025 }, { "epoch": 0.41269732326698694, "grad_norm": 0.7291248398181105, "learning_rate": 6.627300703948878e-06, "loss": 0.3311, "step": 12026 }, { "epoch": 0.41273164035689774, "grad_norm": 0.7771851424849754, "learning_rate": 6.6267752116708295e-06, "loss": 0.2932, "step": 12027 }, { "epoch": 0.4127659574468085, "grad_norm": 0.7240943647724019, "learning_rate": 6.626249699295871e-06, "loss": 0.2668, "step": 12028 }, { "epoch": 0.4128002745367193, "grad_norm": 0.8126596660622125, "learning_rate": 6.625724166830495e-06, "loss": 0.3308, "step": 12029 }, { "epoch": 0.41283459162663005, "grad_norm": 0.8487855386895059, "learning_rate": 6.625198614281191e-06, "loss": 0.2912, "step": 12030 }, { "epoch": 0.41286890871654086, "grad_norm": 0.7658271725636842, "learning_rate": 6.624673041654453e-06, "loss": 0.3502, "step": 12031 }, { "epoch": 0.4129032258064516, "grad_norm": 0.7460879467149306, "learning_rate": 6.6241474489567745e-06, "loss": 0.2896, "step": 12032 }, { "epoch": 0.41293754289636236, "grad_norm": 0.665152260612456, "learning_rate": 6.623621836194647e-06, "loss": 0.2332, "step": 12033 }, { "epoch": 0.41297185998627317, "grad_norm": 1.7488537498858843, "learning_rate": 6.623096203374565e-06, "loss": 0.3164, "step": 12034 }, { "epoch": 0.4130061770761839, "grad_norm": 0.726007855515128, "learning_rate": 6.6225705505030216e-06, "loss": 0.3342, "step": 12035 }, { "epoch": 0.41304049416609473, "grad_norm": 0.7366852482759726, "learning_rate": 6.6220448775865105e-06, "loss": 0.2913, "step": 12036 }, { "epoch": 0.4130748112560055, "grad_norm": 0.7191278485140505, "learning_rate": 6.621519184631526e-06, "loss": 0.2792, "step": 12037 }, { "epoch": 0.4131091283459163, "grad_norm": 0.7595852125796453, "learning_rate": 6.620993471644564e-06, "loss": 0.3007, "step": 12038 }, { "epoch": 0.41314344543582704, "grad_norm": 0.8272586453709893, "learning_rate": 6.620467738632117e-06, "loss": 0.2869, "step": 12039 }, { "epoch": 0.4131777625257378, "grad_norm": 0.7457262842400839, "learning_rate": 6.619941985600679e-06, "loss": 0.2894, "step": 12040 }, { "epoch": 0.4132120796156486, "grad_norm": 0.6559175364063573, "learning_rate": 6.619416212556748e-06, "loss": 0.3073, "step": 12041 }, { "epoch": 0.41324639670555935, "grad_norm": 0.7516598173395571, "learning_rate": 6.618890419506816e-06, "loss": 0.2867, "step": 12042 }, { "epoch": 0.41328071379547016, "grad_norm": 0.7380082157729144, "learning_rate": 6.6183646064573814e-06, "loss": 0.27, "step": 12043 }, { "epoch": 0.4133150308853809, "grad_norm": 0.7680093278095749, "learning_rate": 6.617838773414939e-06, "loss": 0.3253, "step": 12044 }, { "epoch": 0.4133493479752917, "grad_norm": 0.681856673621097, "learning_rate": 6.617312920385986e-06, "loss": 0.3081, "step": 12045 }, { "epoch": 0.41338366506520247, "grad_norm": 0.8074169996808145, "learning_rate": 6.616787047377016e-06, "loss": 0.2668, "step": 12046 }, { "epoch": 0.4134179821551133, "grad_norm": 0.7272849967911678, "learning_rate": 6.616261154394525e-06, "loss": 0.3154, "step": 12047 }, { "epoch": 0.413452299245024, "grad_norm": 0.7521146919491861, "learning_rate": 6.615735241445016e-06, "loss": 0.344, "step": 12048 }, { "epoch": 0.4134866163349348, "grad_norm": 0.6968556083046309, "learning_rate": 6.615209308534978e-06, "loss": 0.2797, "step": 12049 }, { "epoch": 0.4135209334248456, "grad_norm": 0.7703023520004408, "learning_rate": 6.614683355670915e-06, "loss": 0.2409, "step": 12050 }, { "epoch": 0.41355525051475633, "grad_norm": 0.7954746146542679, "learning_rate": 6.614157382859321e-06, "loss": 0.2936, "step": 12051 }, { "epoch": 0.41358956760466714, "grad_norm": 0.7518706428937518, "learning_rate": 6.613631390106694e-06, "loss": 0.3335, "step": 12052 }, { "epoch": 0.4136238846945779, "grad_norm": 0.7478239938988099, "learning_rate": 6.613105377419532e-06, "loss": 0.2878, "step": 12053 }, { "epoch": 0.4136582017844887, "grad_norm": 0.7515345430554713, "learning_rate": 6.612579344804334e-06, "loss": 0.3092, "step": 12054 }, { "epoch": 0.41369251887439945, "grad_norm": 0.7646638636014643, "learning_rate": 6.6120532922676e-06, "loss": 0.3268, "step": 12055 }, { "epoch": 0.4137268359643102, "grad_norm": 0.8024343455207518, "learning_rate": 6.611527219815826e-06, "loss": 0.286, "step": 12056 }, { "epoch": 0.413761153054221, "grad_norm": 0.8531453458013043, "learning_rate": 6.61100112745551e-06, "loss": 0.3327, "step": 12057 }, { "epoch": 0.41379547014413176, "grad_norm": 0.787440906060195, "learning_rate": 6.610475015193155e-06, "loss": 0.3473, "step": 12058 }, { "epoch": 0.41382978723404257, "grad_norm": 0.7340126094831211, "learning_rate": 6.609948883035259e-06, "loss": 0.2846, "step": 12059 }, { "epoch": 0.4138641043239533, "grad_norm": 0.7353361218134989, "learning_rate": 6.6094227309883204e-06, "loss": 0.2847, "step": 12060 }, { "epoch": 0.4138984214138641, "grad_norm": 0.7268641114725963, "learning_rate": 6.608896559058843e-06, "loss": 0.3038, "step": 12061 }, { "epoch": 0.4139327385037749, "grad_norm": 0.795221076735452, "learning_rate": 6.608370367253323e-06, "loss": 0.287, "step": 12062 }, { "epoch": 0.41396705559368563, "grad_norm": 0.8620722831839832, "learning_rate": 6.607844155578262e-06, "loss": 0.3107, "step": 12063 }, { "epoch": 0.41400137268359644, "grad_norm": 0.8461700206639637, "learning_rate": 6.60731792404016e-06, "loss": 0.2987, "step": 12064 }, { "epoch": 0.4140356897735072, "grad_norm": 0.8041470382328806, "learning_rate": 6.606791672645521e-06, "loss": 0.3329, "step": 12065 }, { "epoch": 0.414070006863418, "grad_norm": 0.8377358918231788, "learning_rate": 6.606265401400842e-06, "loss": 0.2829, "step": 12066 }, { "epoch": 0.41410432395332875, "grad_norm": 0.8235649011152544, "learning_rate": 6.605739110312629e-06, "loss": 0.3257, "step": 12067 }, { "epoch": 0.41413864104323955, "grad_norm": 0.7993631802538682, "learning_rate": 6.605212799387381e-06, "loss": 0.3024, "step": 12068 }, { "epoch": 0.4141729581331503, "grad_norm": 0.7083021541616531, "learning_rate": 6.6046864686315995e-06, "loss": 0.2667, "step": 12069 }, { "epoch": 0.4142072752230611, "grad_norm": 0.9800285519553423, "learning_rate": 6.6041601180517865e-06, "loss": 0.2874, "step": 12070 }, { "epoch": 0.41424159231297186, "grad_norm": 0.8504356943473157, "learning_rate": 6.603633747654448e-06, "loss": 0.3148, "step": 12071 }, { "epoch": 0.4142759094028826, "grad_norm": 0.8037632104936631, "learning_rate": 6.603107357446082e-06, "loss": 0.3217, "step": 12072 }, { "epoch": 0.4143102264927934, "grad_norm": 0.8947305827965385, "learning_rate": 6.602580947433194e-06, "loss": 0.3328, "step": 12073 }, { "epoch": 0.4143445435827042, "grad_norm": 0.7243477209952237, "learning_rate": 6.60205451762229e-06, "loss": 0.2515, "step": 12074 }, { "epoch": 0.414378860672615, "grad_norm": 0.8323825744631276, "learning_rate": 6.601528068019867e-06, "loss": 0.2923, "step": 12075 }, { "epoch": 0.41441317776252573, "grad_norm": 0.7748726818162205, "learning_rate": 6.601001598632431e-06, "loss": 0.2476, "step": 12076 }, { "epoch": 0.41444749485243654, "grad_norm": 0.7948240161260892, "learning_rate": 6.600475109466491e-06, "loss": 0.3032, "step": 12077 }, { "epoch": 0.4144818119423473, "grad_norm": 0.7134843706638221, "learning_rate": 6.599948600528544e-06, "loss": 0.2521, "step": 12078 }, { "epoch": 0.41451612903225804, "grad_norm": 0.8009180717039409, "learning_rate": 6.5994220718250965e-06, "loss": 0.3565, "step": 12079 }, { "epoch": 0.41455044612216885, "grad_norm": 0.8277155515031244, "learning_rate": 6.598895523362656e-06, "loss": 0.2833, "step": 12080 }, { "epoch": 0.4145847632120796, "grad_norm": 0.7830849031240993, "learning_rate": 6.598368955147725e-06, "loss": 0.3107, "step": 12081 }, { "epoch": 0.4146190803019904, "grad_norm": 0.7927803338466036, "learning_rate": 6.597842367186809e-06, "loss": 0.2768, "step": 12082 }, { "epoch": 0.41465339739190116, "grad_norm": 0.743314901589871, "learning_rate": 6.597315759486413e-06, "loss": 0.3202, "step": 12083 }, { "epoch": 0.41468771448181196, "grad_norm": 0.7473692083723822, "learning_rate": 6.596789132053044e-06, "loss": 0.2915, "step": 12084 }, { "epoch": 0.4147220315717227, "grad_norm": 0.735682133485939, "learning_rate": 6.596262484893207e-06, "loss": 0.2446, "step": 12085 }, { "epoch": 0.41475634866163347, "grad_norm": 0.7441467525351362, "learning_rate": 6.595735818013407e-06, "loss": 0.2733, "step": 12086 }, { "epoch": 0.4147906657515443, "grad_norm": 0.7478517642444598, "learning_rate": 6.595209131420151e-06, "loss": 0.2759, "step": 12087 }, { "epoch": 0.414824982841455, "grad_norm": 0.8672407849943774, "learning_rate": 6.594682425119947e-06, "loss": 0.2768, "step": 12088 }, { "epoch": 0.41485929993136583, "grad_norm": 0.7555947099170011, "learning_rate": 6.594155699119301e-06, "loss": 0.2922, "step": 12089 }, { "epoch": 0.4148936170212766, "grad_norm": 0.7649012798310948, "learning_rate": 6.59362895342472e-06, "loss": 0.3075, "step": 12090 }, { "epoch": 0.4149279341111874, "grad_norm": 0.7874158389820819, "learning_rate": 6.593102188042711e-06, "loss": 0.3064, "step": 12091 }, { "epoch": 0.41496225120109814, "grad_norm": 0.758384805126472, "learning_rate": 6.592575402979781e-06, "loss": 0.3377, "step": 12092 }, { "epoch": 0.41499656829100895, "grad_norm": 0.6942846435150593, "learning_rate": 6.5920485982424395e-06, "loss": 0.2497, "step": 12093 }, { "epoch": 0.4150308853809197, "grad_norm": 0.7590141759966827, "learning_rate": 6.591521773837195e-06, "loss": 0.2728, "step": 12094 }, { "epoch": 0.41506520247083045, "grad_norm": 0.785565913817209, "learning_rate": 6.5909949297705524e-06, "loss": 0.298, "step": 12095 }, { "epoch": 0.41509951956074126, "grad_norm": 0.7667415725010571, "learning_rate": 6.5904680660490236e-06, "loss": 0.2492, "step": 12096 }, { "epoch": 0.415133836650652, "grad_norm": 0.7453815596170792, "learning_rate": 6.589941182679116e-06, "loss": 0.3254, "step": 12097 }, { "epoch": 0.4151681537405628, "grad_norm": 0.8094489790442078, "learning_rate": 6.589414279667339e-06, "loss": 0.2684, "step": 12098 }, { "epoch": 0.41520247083047357, "grad_norm": 0.6875377332846699, "learning_rate": 6.5888873570202e-06, "loss": 0.2417, "step": 12099 }, { "epoch": 0.4152367879203844, "grad_norm": 0.6435049473771587, "learning_rate": 6.5883604147442125e-06, "loss": 0.2279, "step": 12100 }, { "epoch": 0.4152711050102951, "grad_norm": 0.7464018172807049, "learning_rate": 6.587833452845883e-06, "loss": 0.3307, "step": 12101 }, { "epoch": 0.4153054221002059, "grad_norm": 0.7131707578731972, "learning_rate": 6.58730647133172e-06, "loss": 0.278, "step": 12102 }, { "epoch": 0.4153397391901167, "grad_norm": 0.6464361187196986, "learning_rate": 6.58677947020824e-06, "loss": 0.2677, "step": 12103 }, { "epoch": 0.41537405628002744, "grad_norm": 0.8507373079602641, "learning_rate": 6.586252449481948e-06, "loss": 0.3655, "step": 12104 }, { "epoch": 0.41540837336993824, "grad_norm": 0.8450690940398735, "learning_rate": 6.585725409159356e-06, "loss": 0.3097, "step": 12105 }, { "epoch": 0.415442690459849, "grad_norm": 0.7899072148969223, "learning_rate": 6.585198349246977e-06, "loss": 0.331, "step": 12106 }, { "epoch": 0.4154770075497598, "grad_norm": 0.7258946480117263, "learning_rate": 6.58467126975132e-06, "loss": 0.2557, "step": 12107 }, { "epoch": 0.41551132463967055, "grad_norm": 0.8036002176236664, "learning_rate": 6.5841441706788945e-06, "loss": 0.3427, "step": 12108 }, { "epoch": 0.4155456417295813, "grad_norm": 0.6859841601426968, "learning_rate": 6.583617052036218e-06, "loss": 0.2456, "step": 12109 }, { "epoch": 0.4155799588194921, "grad_norm": 0.7696792010000703, "learning_rate": 6.5830899138297965e-06, "loss": 0.2919, "step": 12110 }, { "epoch": 0.41561427590940286, "grad_norm": 0.7986461643700714, "learning_rate": 6.582562756066145e-06, "loss": 0.3028, "step": 12111 }, { "epoch": 0.41564859299931367, "grad_norm": 0.8032982248539371, "learning_rate": 6.582035578751777e-06, "loss": 0.3249, "step": 12112 }, { "epoch": 0.4156829100892244, "grad_norm": 0.6837474886842272, "learning_rate": 6.581508381893205e-06, "loss": 0.2909, "step": 12113 }, { "epoch": 0.41571722717913523, "grad_norm": 0.7990496119366224, "learning_rate": 6.580981165496939e-06, "loss": 0.2942, "step": 12114 }, { "epoch": 0.415751544269046, "grad_norm": 0.7661933263342265, "learning_rate": 6.580453929569495e-06, "loss": 0.2711, "step": 12115 }, { "epoch": 0.4157858613589568, "grad_norm": 0.7985038108732047, "learning_rate": 6.579926674117384e-06, "loss": 0.3124, "step": 12116 }, { "epoch": 0.41582017844886754, "grad_norm": 0.796766302027772, "learning_rate": 6.579399399147121e-06, "loss": 0.2696, "step": 12117 }, { "epoch": 0.4158544955387783, "grad_norm": 0.8430993989897336, "learning_rate": 6.57887210466522e-06, "loss": 0.2658, "step": 12118 }, { "epoch": 0.4158888126286891, "grad_norm": 0.8926142549971968, "learning_rate": 6.5783447906781975e-06, "loss": 0.2908, "step": 12119 }, { "epoch": 0.41592312971859985, "grad_norm": 0.7058285183475309, "learning_rate": 6.577817457192562e-06, "loss": 0.2507, "step": 12120 }, { "epoch": 0.41595744680851066, "grad_norm": 1.000691304010017, "learning_rate": 6.577290104214833e-06, "loss": 0.2903, "step": 12121 }, { "epoch": 0.4159917638984214, "grad_norm": 0.8756715676058349, "learning_rate": 6.576762731751524e-06, "loss": 0.2769, "step": 12122 }, { "epoch": 0.4160260809883322, "grad_norm": 0.6571603052766046, "learning_rate": 6.576235339809148e-06, "loss": 0.2425, "step": 12123 }, { "epoch": 0.41606039807824297, "grad_norm": 0.869364511021605, "learning_rate": 6.575707928394223e-06, "loss": 0.2905, "step": 12124 }, { "epoch": 0.4160947151681537, "grad_norm": 0.7095155287967061, "learning_rate": 6.575180497513264e-06, "loss": 0.2965, "step": 12125 }, { "epoch": 0.4161290322580645, "grad_norm": 0.7053829601944205, "learning_rate": 6.574653047172787e-06, "loss": 0.2945, "step": 12126 }, { "epoch": 0.4161633493479753, "grad_norm": 0.6394691615638216, "learning_rate": 6.574125577379306e-06, "loss": 0.2818, "step": 12127 }, { "epoch": 0.4161976664378861, "grad_norm": 0.6574286944802126, "learning_rate": 6.57359808813934e-06, "loss": 0.2645, "step": 12128 }, { "epoch": 0.41623198352779683, "grad_norm": 0.7281353895588638, "learning_rate": 6.573070579459404e-06, "loss": 0.326, "step": 12129 }, { "epoch": 0.41626630061770764, "grad_norm": 0.7909375549983539, "learning_rate": 6.572543051346014e-06, "loss": 0.2878, "step": 12130 }, { "epoch": 0.4163006177076184, "grad_norm": 0.6582025022767423, "learning_rate": 6.572015503805688e-06, "loss": 0.2578, "step": 12131 }, { "epoch": 0.41633493479752914, "grad_norm": 0.7453002590554393, "learning_rate": 6.571487936844945e-06, "loss": 0.311, "step": 12132 }, { "epoch": 0.41636925188743995, "grad_norm": 0.6862620290091581, "learning_rate": 6.570960350470299e-06, "loss": 0.3223, "step": 12133 }, { "epoch": 0.4164035689773507, "grad_norm": 0.8492523940671718, "learning_rate": 6.57043274468827e-06, "loss": 0.2934, "step": 12134 }, { "epoch": 0.4164378860672615, "grad_norm": 1.0838689131014119, "learning_rate": 6.5699051195053755e-06, "loss": 0.3536, "step": 12135 }, { "epoch": 0.41647220315717226, "grad_norm": 0.7263965334829797, "learning_rate": 6.569377474928133e-06, "loss": 0.261, "step": 12136 }, { "epoch": 0.41650652024708307, "grad_norm": 0.7959329360141979, "learning_rate": 6.568849810963063e-06, "loss": 0.3483, "step": 12137 }, { "epoch": 0.4165408373369938, "grad_norm": 0.7970528173131379, "learning_rate": 6.568322127616682e-06, "loss": 0.3278, "step": 12138 }, { "epoch": 0.4165751544269046, "grad_norm": 0.8561989113840164, "learning_rate": 6.567794424895508e-06, "loss": 0.3579, "step": 12139 }, { "epoch": 0.4166094715168154, "grad_norm": 0.7706962277727621, "learning_rate": 6.567266702806063e-06, "loss": 0.3182, "step": 12140 }, { "epoch": 0.41664378860672613, "grad_norm": 0.8818769962578197, "learning_rate": 6.566738961354866e-06, "loss": 0.2741, "step": 12141 }, { "epoch": 0.41667810569663694, "grad_norm": 0.7018149021119463, "learning_rate": 6.566211200548436e-06, "loss": 0.2828, "step": 12142 }, { "epoch": 0.4167124227865477, "grad_norm": 0.8016511329030704, "learning_rate": 6.565683420393291e-06, "loss": 0.288, "step": 12143 }, { "epoch": 0.4167467398764585, "grad_norm": 0.7081723365088946, "learning_rate": 6.565155620895954e-06, "loss": 0.2882, "step": 12144 }, { "epoch": 0.41678105696636925, "grad_norm": 0.7262248583921332, "learning_rate": 6.5646278020629435e-06, "loss": 0.2784, "step": 12145 }, { "epoch": 0.41681537405628005, "grad_norm": 0.7562683566964875, "learning_rate": 6.564099963900779e-06, "loss": 0.3097, "step": 12146 }, { "epoch": 0.4168496911461908, "grad_norm": 0.7330619767422012, "learning_rate": 6.563572106415985e-06, "loss": 0.2767, "step": 12147 }, { "epoch": 0.41688400823610156, "grad_norm": 0.7263347272459644, "learning_rate": 6.5630442296150805e-06, "loss": 0.3608, "step": 12148 }, { "epoch": 0.41691832532601236, "grad_norm": 0.7992901264557335, "learning_rate": 6.562516333504587e-06, "loss": 0.2822, "step": 12149 }, { "epoch": 0.4169526424159231, "grad_norm": 0.8131817651642562, "learning_rate": 6.561988418091026e-06, "loss": 0.3656, "step": 12150 }, { "epoch": 0.4169869595058339, "grad_norm": 0.7279025938728234, "learning_rate": 6.561460483380919e-06, "loss": 0.3254, "step": 12151 }, { "epoch": 0.41702127659574467, "grad_norm": 0.7998747034135799, "learning_rate": 6.560932529380788e-06, "loss": 0.3318, "step": 12152 }, { "epoch": 0.4170555936856555, "grad_norm": 0.7880007986096975, "learning_rate": 6.5604045560971554e-06, "loss": 0.2751, "step": 12153 }, { "epoch": 0.41708991077556623, "grad_norm": 0.7539023128841021, "learning_rate": 6.559876563536543e-06, "loss": 0.3547, "step": 12154 }, { "epoch": 0.417124227865477, "grad_norm": 0.7897414871220231, "learning_rate": 6.559348551705476e-06, "loss": 0.3211, "step": 12155 }, { "epoch": 0.4171585449553878, "grad_norm": 0.6818137328106073, "learning_rate": 6.558820520610474e-06, "loss": 0.2542, "step": 12156 }, { "epoch": 0.41719286204529854, "grad_norm": 0.7715615963604608, "learning_rate": 6.558292470258063e-06, "loss": 0.2738, "step": 12157 }, { "epoch": 0.41722717913520935, "grad_norm": 0.693423050930936, "learning_rate": 6.5577644006547655e-06, "loss": 0.2634, "step": 12158 }, { "epoch": 0.4172614962251201, "grad_norm": 0.7902960546106123, "learning_rate": 6.5572363118071044e-06, "loss": 0.3723, "step": 12159 }, { "epoch": 0.4172958133150309, "grad_norm": 0.741249223452146, "learning_rate": 6.556708203721605e-06, "loss": 0.3415, "step": 12160 }, { "epoch": 0.41733013040494166, "grad_norm": 0.7343407077609583, "learning_rate": 6.556180076404789e-06, "loss": 0.3073, "step": 12161 }, { "epoch": 0.41736444749485246, "grad_norm": 0.8838805265681645, "learning_rate": 6.555651929863183e-06, "loss": 0.3387, "step": 12162 }, { "epoch": 0.4173987645847632, "grad_norm": 0.8967218300773249, "learning_rate": 6.555123764103311e-06, "loss": 0.3063, "step": 12163 }, { "epoch": 0.41743308167467397, "grad_norm": 0.920662322369855, "learning_rate": 6.554595579131699e-06, "loss": 0.2692, "step": 12164 }, { "epoch": 0.4174673987645848, "grad_norm": 0.7271950992227577, "learning_rate": 6.554067374954871e-06, "loss": 0.2903, "step": 12165 }, { "epoch": 0.4175017158544955, "grad_norm": 0.7106214120698091, "learning_rate": 6.553539151579352e-06, "loss": 0.2624, "step": 12166 }, { "epoch": 0.41753603294440633, "grad_norm": 0.8068414758604684, "learning_rate": 6.55301090901167e-06, "loss": 0.2673, "step": 12167 }, { "epoch": 0.4175703500343171, "grad_norm": 0.731530074185554, "learning_rate": 6.552482647258346e-06, "loss": 0.2959, "step": 12168 }, { "epoch": 0.4176046671242279, "grad_norm": 0.777820137342224, "learning_rate": 6.551954366325909e-06, "loss": 0.2865, "step": 12169 }, { "epoch": 0.41763898421413864, "grad_norm": 0.7778251121810121, "learning_rate": 6.5514260662208875e-06, "loss": 0.3165, "step": 12170 }, { "epoch": 0.4176733013040494, "grad_norm": 0.7682257401220866, "learning_rate": 6.550897746949804e-06, "loss": 0.246, "step": 12171 }, { "epoch": 0.4177076183939602, "grad_norm": 0.7739679936647598, "learning_rate": 6.550369408519187e-06, "loss": 0.3052, "step": 12172 }, { "epoch": 0.41774193548387095, "grad_norm": 0.8451377686746726, "learning_rate": 6.549841050935565e-06, "loss": 0.3342, "step": 12173 }, { "epoch": 0.41777625257378176, "grad_norm": 0.8506359889876282, "learning_rate": 6.549312674205462e-06, "loss": 0.3234, "step": 12174 }, { "epoch": 0.4178105696636925, "grad_norm": 0.7004235187556987, "learning_rate": 6.548784278335408e-06, "loss": 0.2819, "step": 12175 }, { "epoch": 0.4178448867536033, "grad_norm": 0.8440784541295924, "learning_rate": 6.5482558633319295e-06, "loss": 0.2898, "step": 12176 }, { "epoch": 0.41787920384351407, "grad_norm": 0.8535127389807823, "learning_rate": 6.547727429201556e-06, "loss": 0.2805, "step": 12177 }, { "epoch": 0.4179135209334248, "grad_norm": 0.8251172751142659, "learning_rate": 6.547198975950814e-06, "loss": 0.268, "step": 12178 }, { "epoch": 0.4179478380233356, "grad_norm": 0.7600969149864375, "learning_rate": 6.546670503586231e-06, "loss": 0.2896, "step": 12179 }, { "epoch": 0.4179821551132464, "grad_norm": 0.7832270900465339, "learning_rate": 6.54614201211434e-06, "loss": 0.2853, "step": 12180 }, { "epoch": 0.4180164722031572, "grad_norm": 0.8143498691918746, "learning_rate": 6.545613501541666e-06, "loss": 0.2996, "step": 12181 }, { "epoch": 0.41805078929306794, "grad_norm": 0.7514293566177619, "learning_rate": 6.545084971874738e-06, "loss": 0.3085, "step": 12182 }, { "epoch": 0.41808510638297874, "grad_norm": 0.7632376926082202, "learning_rate": 6.544556423120087e-06, "loss": 0.3, "step": 12183 }, { "epoch": 0.4181194234728895, "grad_norm": 0.7987365141951962, "learning_rate": 6.5440278552842425e-06, "loss": 0.3504, "step": 12184 }, { "epoch": 0.41815374056280025, "grad_norm": 0.7040962521387785, "learning_rate": 6.543499268373734e-06, "loss": 0.2568, "step": 12185 }, { "epoch": 0.41818805765271105, "grad_norm": 0.7497627421157818, "learning_rate": 6.542970662395092e-06, "loss": 0.311, "step": 12186 }, { "epoch": 0.4182223747426218, "grad_norm": 0.5994593156105252, "learning_rate": 6.542442037354846e-06, "loss": 0.2556, "step": 12187 }, { "epoch": 0.4182566918325326, "grad_norm": 0.7972745138105813, "learning_rate": 6.541913393259527e-06, "loss": 0.28, "step": 12188 }, { "epoch": 0.41829100892244336, "grad_norm": 0.7586280616035453, "learning_rate": 6.541384730115667e-06, "loss": 0.2608, "step": 12189 }, { "epoch": 0.41832532601235417, "grad_norm": 0.8314016337084031, "learning_rate": 6.540856047929793e-06, "loss": 0.2866, "step": 12190 }, { "epoch": 0.4183596431022649, "grad_norm": 0.8006936026686888, "learning_rate": 6.5403273467084395e-06, "loss": 0.334, "step": 12191 }, { "epoch": 0.41839396019217573, "grad_norm": 0.8365759759761675, "learning_rate": 6.53979862645814e-06, "loss": 0.325, "step": 12192 }, { "epoch": 0.4184282772820865, "grad_norm": 0.8062045308783659, "learning_rate": 6.539269887185422e-06, "loss": 0.2585, "step": 12193 }, { "epoch": 0.41846259437199723, "grad_norm": 0.8179625804735182, "learning_rate": 6.53874112889682e-06, "loss": 0.263, "step": 12194 }, { "epoch": 0.41849691146190804, "grad_norm": 0.7423965323288154, "learning_rate": 6.538212351598862e-06, "loss": 0.2978, "step": 12195 }, { "epoch": 0.4185312285518188, "grad_norm": 0.8672983440194989, "learning_rate": 6.5376835552980885e-06, "loss": 0.3181, "step": 12196 }, { "epoch": 0.4185655456417296, "grad_norm": 0.8714194331268021, "learning_rate": 6.5371547400010245e-06, "loss": 0.3384, "step": 12197 }, { "epoch": 0.41859986273164035, "grad_norm": 0.8409768386650504, "learning_rate": 6.536625905714207e-06, "loss": 0.2981, "step": 12198 }, { "epoch": 0.41863417982155116, "grad_norm": 0.9407982237128177, "learning_rate": 6.536097052444168e-06, "loss": 0.2964, "step": 12199 }, { "epoch": 0.4186684969114619, "grad_norm": 0.7812518094672872, "learning_rate": 6.53556818019744e-06, "loss": 0.2469, "step": 12200 }, { "epoch": 0.41870281400137266, "grad_norm": 0.681333508112236, "learning_rate": 6.535039288980558e-06, "loss": 0.2509, "step": 12201 }, { "epoch": 0.41873713109128347, "grad_norm": 0.7494972198492892, "learning_rate": 6.534510378800055e-06, "loss": 0.2586, "step": 12202 }, { "epoch": 0.4187714481811942, "grad_norm": 0.8026958374840126, "learning_rate": 6.533981449662466e-06, "loss": 0.3064, "step": 12203 }, { "epoch": 0.418805765271105, "grad_norm": 0.7401963019016826, "learning_rate": 6.5334525015743245e-06, "loss": 0.3395, "step": 12204 }, { "epoch": 0.4188400823610158, "grad_norm": 0.7243495166025652, "learning_rate": 6.5329235345421635e-06, "loss": 0.2519, "step": 12205 }, { "epoch": 0.4188743994509266, "grad_norm": 0.8100108368749832, "learning_rate": 6.53239454857252e-06, "loss": 0.3643, "step": 12206 }, { "epoch": 0.41890871654083733, "grad_norm": 0.7081034030454114, "learning_rate": 6.53186554367193e-06, "loss": 0.273, "step": 12207 }, { "epoch": 0.4189430336307481, "grad_norm": 0.7621988073351638, "learning_rate": 6.531336519846927e-06, "loss": 0.2626, "step": 12208 }, { "epoch": 0.4189773507206589, "grad_norm": 0.7509978422928748, "learning_rate": 6.530807477104046e-06, "loss": 0.2949, "step": 12209 }, { "epoch": 0.41901166781056964, "grad_norm": 0.7895338363345558, "learning_rate": 6.530278415449824e-06, "loss": 0.3305, "step": 12210 }, { "epoch": 0.41904598490048045, "grad_norm": 0.7273301517006859, "learning_rate": 6.529749334890795e-06, "loss": 0.2958, "step": 12211 }, { "epoch": 0.4190803019903912, "grad_norm": 0.8723823957468236, "learning_rate": 6.529220235433498e-06, "loss": 0.254, "step": 12212 }, { "epoch": 0.419114619080302, "grad_norm": 0.7469329564631421, "learning_rate": 6.528691117084466e-06, "loss": 0.3181, "step": 12213 }, { "epoch": 0.41914893617021276, "grad_norm": 0.7232357821523728, "learning_rate": 6.528161979850238e-06, "loss": 0.2883, "step": 12214 }, { "epoch": 0.41918325326012357, "grad_norm": 0.8007352432864208, "learning_rate": 6.5276328237373525e-06, "loss": 0.3004, "step": 12215 }, { "epoch": 0.4192175703500343, "grad_norm": 0.7070046359219646, "learning_rate": 6.527103648752343e-06, "loss": 0.2442, "step": 12216 }, { "epoch": 0.41925188743994507, "grad_norm": 0.7435537640757911, "learning_rate": 6.526574454901748e-06, "loss": 0.2448, "step": 12217 }, { "epoch": 0.4192862045298559, "grad_norm": 0.7671195794268498, "learning_rate": 6.526045242192107e-06, "loss": 0.2913, "step": 12218 }, { "epoch": 0.41932052161976663, "grad_norm": 0.7903862083998464, "learning_rate": 6.525516010629954e-06, "loss": 0.3138, "step": 12219 }, { "epoch": 0.41935483870967744, "grad_norm": 0.7401089964758769, "learning_rate": 6.524986760221831e-06, "loss": 0.2787, "step": 12220 }, { "epoch": 0.4193891557995882, "grad_norm": 0.6876337975344337, "learning_rate": 6.524457490974273e-06, "loss": 0.2982, "step": 12221 }, { "epoch": 0.419423472889499, "grad_norm": 0.6845969209642111, "learning_rate": 6.523928202893821e-06, "loss": 0.2603, "step": 12222 }, { "epoch": 0.41945778997940975, "grad_norm": 0.7773230814701074, "learning_rate": 6.523398895987012e-06, "loss": 0.2847, "step": 12223 }, { "epoch": 0.4194921070693205, "grad_norm": 0.8184937691744788, "learning_rate": 6.522869570260388e-06, "loss": 0.3074, "step": 12224 }, { "epoch": 0.4195264241592313, "grad_norm": 0.6985311429396667, "learning_rate": 6.5223402257204844e-06, "loss": 0.259, "step": 12225 }, { "epoch": 0.41956074124914206, "grad_norm": 0.7769707575121987, "learning_rate": 6.521810862373842e-06, "loss": 0.3293, "step": 12226 }, { "epoch": 0.41959505833905286, "grad_norm": 0.7661577578197633, "learning_rate": 6.521281480227001e-06, "loss": 0.2829, "step": 12227 }, { "epoch": 0.4196293754289636, "grad_norm": 0.7758850573826793, "learning_rate": 6.520752079286499e-06, "loss": 0.3052, "step": 12228 }, { "epoch": 0.4196636925188744, "grad_norm": 0.7621622657976227, "learning_rate": 6.520222659558882e-06, "loss": 0.2704, "step": 12229 }, { "epoch": 0.41969800960878517, "grad_norm": 0.7817351159856842, "learning_rate": 6.5196932210506825e-06, "loss": 0.2947, "step": 12230 }, { "epoch": 0.4197323266986959, "grad_norm": 0.7367126320425491, "learning_rate": 6.519163763768448e-06, "loss": 0.2644, "step": 12231 }, { "epoch": 0.41976664378860673, "grad_norm": 0.7611389834473024, "learning_rate": 6.518634287718714e-06, "loss": 0.2692, "step": 12232 }, { "epoch": 0.4198009608785175, "grad_norm": 0.7898416016329728, "learning_rate": 6.518104792908027e-06, "loss": 0.3373, "step": 12233 }, { "epoch": 0.4198352779684283, "grad_norm": 0.7459081482684904, "learning_rate": 6.517575279342924e-06, "loss": 0.2275, "step": 12234 }, { "epoch": 0.41986959505833904, "grad_norm": 0.964294938448033, "learning_rate": 6.517045747029946e-06, "loss": 0.3084, "step": 12235 }, { "epoch": 0.41990391214824985, "grad_norm": 0.7612505487721374, "learning_rate": 6.516516195975638e-06, "loss": 0.3545, "step": 12236 }, { "epoch": 0.4199382292381606, "grad_norm": 0.7444185599623807, "learning_rate": 6.51598662618654e-06, "loss": 0.2784, "step": 12237 }, { "epoch": 0.4199725463280714, "grad_norm": 0.6950797416727894, "learning_rate": 6.515457037669195e-06, "loss": 0.2938, "step": 12238 }, { "epoch": 0.42000686341798216, "grad_norm": 0.7472933143820778, "learning_rate": 6.514927430430144e-06, "loss": 0.246, "step": 12239 }, { "epoch": 0.4200411805078929, "grad_norm": 0.7810114229499192, "learning_rate": 6.5143978044759325e-06, "loss": 0.2965, "step": 12240 }, { "epoch": 0.4200754975978037, "grad_norm": 0.7734385148901283, "learning_rate": 6.5138681598131015e-06, "loss": 0.2675, "step": 12241 }, { "epoch": 0.42010981468771447, "grad_norm": 0.6988310754116874, "learning_rate": 6.513338496448194e-06, "loss": 0.33, "step": 12242 }, { "epoch": 0.4201441317776253, "grad_norm": 0.8460891006835577, "learning_rate": 6.512808814387752e-06, "loss": 0.2547, "step": 12243 }, { "epoch": 0.420178448867536, "grad_norm": 0.9176148897710142, "learning_rate": 6.5122791136383234e-06, "loss": 0.3297, "step": 12244 }, { "epoch": 0.42021276595744683, "grad_norm": 0.7806959916464182, "learning_rate": 6.511749394206448e-06, "loss": 0.2936, "step": 12245 }, { "epoch": 0.4202470830473576, "grad_norm": 0.7480907896449114, "learning_rate": 6.511219656098673e-06, "loss": 0.2864, "step": 12246 }, { "epoch": 0.42028140013726834, "grad_norm": 0.7635098692850814, "learning_rate": 6.5106898993215406e-06, "loss": 0.3074, "step": 12247 }, { "epoch": 0.42031571722717914, "grad_norm": 0.8266177053852914, "learning_rate": 6.510160123881595e-06, "loss": 0.2782, "step": 12248 }, { "epoch": 0.4203500343170899, "grad_norm": 0.8220010972896178, "learning_rate": 6.509630329785381e-06, "loss": 0.2912, "step": 12249 }, { "epoch": 0.4203843514070007, "grad_norm": 0.8436292495729538, "learning_rate": 6.5091005170394465e-06, "loss": 0.294, "step": 12250 }, { "epoch": 0.42041866849691145, "grad_norm": 0.7252539633855667, "learning_rate": 6.5085706856503325e-06, "loss": 0.2625, "step": 12251 }, { "epoch": 0.42045298558682226, "grad_norm": 0.7364695087570948, "learning_rate": 6.508040835624587e-06, "loss": 0.3112, "step": 12252 }, { "epoch": 0.420487302676733, "grad_norm": 0.8728872147149993, "learning_rate": 6.507510966968757e-06, "loss": 0.2871, "step": 12253 }, { "epoch": 0.42052161976664376, "grad_norm": 0.7833886821987927, "learning_rate": 6.506981079689385e-06, "loss": 0.3089, "step": 12254 }, { "epoch": 0.42055593685655457, "grad_norm": 0.666871356115419, "learning_rate": 6.506451173793018e-06, "loss": 0.3206, "step": 12255 }, { "epoch": 0.4205902539464653, "grad_norm": 0.8560792437269562, "learning_rate": 6.505921249286204e-06, "loss": 0.3122, "step": 12256 }, { "epoch": 0.4206245710363761, "grad_norm": 0.7217907861982571, "learning_rate": 6.505391306175488e-06, "loss": 0.3144, "step": 12257 }, { "epoch": 0.4206588881262869, "grad_norm": 0.7933119689947256, "learning_rate": 6.504861344467419e-06, "loss": 0.315, "step": 12258 }, { "epoch": 0.4206932052161977, "grad_norm": 1.1592583020298168, "learning_rate": 6.504331364168541e-06, "loss": 0.3233, "step": 12259 }, { "epoch": 0.42072752230610844, "grad_norm": 0.7492978563279835, "learning_rate": 6.5038013652854044e-06, "loss": 0.2667, "step": 12260 }, { "epoch": 0.42076183939601924, "grad_norm": 0.8480885839147839, "learning_rate": 6.503271347824555e-06, "loss": 0.3419, "step": 12261 }, { "epoch": 0.42079615648593, "grad_norm": 0.809719327694343, "learning_rate": 6.50274131179254e-06, "loss": 0.2921, "step": 12262 }, { "epoch": 0.42083047357584075, "grad_norm": 0.6927051508090964, "learning_rate": 6.502211257195909e-06, "loss": 0.2836, "step": 12263 }, { "epoch": 0.42086479066575155, "grad_norm": 0.7640837272554428, "learning_rate": 6.5016811840412084e-06, "loss": 0.2312, "step": 12264 }, { "epoch": 0.4208991077556623, "grad_norm": 0.7570144389904665, "learning_rate": 6.501151092334988e-06, "loss": 0.2551, "step": 12265 }, { "epoch": 0.4209334248455731, "grad_norm": 0.7885842636528911, "learning_rate": 6.5006209820837954e-06, "loss": 0.3329, "step": 12266 }, { "epoch": 0.42096774193548386, "grad_norm": 0.8069788157514792, "learning_rate": 6.500090853294181e-06, "loss": 0.2521, "step": 12267 }, { "epoch": 0.42100205902539467, "grad_norm": 0.7284025579924107, "learning_rate": 6.499560705972692e-06, "loss": 0.2854, "step": 12268 }, { "epoch": 0.4210363761153054, "grad_norm": 0.780145385900463, "learning_rate": 6.49903054012588e-06, "loss": 0.2862, "step": 12269 }, { "epoch": 0.4210706932052162, "grad_norm": 0.7822884118601388, "learning_rate": 6.498500355760294e-06, "loss": 0.3166, "step": 12270 }, { "epoch": 0.421105010295127, "grad_norm": 0.799119872829842, "learning_rate": 6.49797015288248e-06, "loss": 0.3019, "step": 12271 }, { "epoch": 0.42113932738503773, "grad_norm": 0.7932868099531223, "learning_rate": 6.497439931498993e-06, "loss": 0.2931, "step": 12272 }, { "epoch": 0.42117364447494854, "grad_norm": 0.731115230692432, "learning_rate": 6.496909691616382e-06, "loss": 0.3486, "step": 12273 }, { "epoch": 0.4212079615648593, "grad_norm": 0.7709055193098566, "learning_rate": 6.4963794332411964e-06, "loss": 0.3095, "step": 12274 }, { "epoch": 0.4212422786547701, "grad_norm": 0.8352027776142146, "learning_rate": 6.495849156379987e-06, "loss": 0.2914, "step": 12275 }, { "epoch": 0.42127659574468085, "grad_norm": 0.7246001112924285, "learning_rate": 6.495318861039308e-06, "loss": 0.2775, "step": 12276 }, { "epoch": 0.4213109128345916, "grad_norm": 0.7734778257481, "learning_rate": 6.494788547225706e-06, "loss": 0.3058, "step": 12277 }, { "epoch": 0.4213452299245024, "grad_norm": 0.7805956220940543, "learning_rate": 6.4942582149457335e-06, "loss": 0.2645, "step": 12278 }, { "epoch": 0.42137954701441316, "grad_norm": 0.8632107309447283, "learning_rate": 6.493727864205943e-06, "loss": 0.2879, "step": 12279 }, { "epoch": 0.42141386410432397, "grad_norm": 0.960826179769724, "learning_rate": 6.493197495012887e-06, "loss": 0.2971, "step": 12280 }, { "epoch": 0.4214481811942347, "grad_norm": 0.8041498115768021, "learning_rate": 6.492667107373116e-06, "loss": 0.2825, "step": 12281 }, { "epoch": 0.4214824982841455, "grad_norm": 0.7703744796245097, "learning_rate": 6.492136701293185e-06, "loss": 0.3279, "step": 12282 }, { "epoch": 0.4215168153740563, "grad_norm": 0.6972468116809478, "learning_rate": 6.491606276779643e-06, "loss": 0.2747, "step": 12283 }, { "epoch": 0.4215511324639671, "grad_norm": 0.7777627617212699, "learning_rate": 6.491075833839046e-06, "loss": 0.3185, "step": 12284 }, { "epoch": 0.42158544955387783, "grad_norm": 0.8232153961506703, "learning_rate": 6.490545372477944e-06, "loss": 0.2907, "step": 12285 }, { "epoch": 0.4216197666437886, "grad_norm": 0.7502120893591544, "learning_rate": 6.4900148927028924e-06, "loss": 0.3621, "step": 12286 }, { "epoch": 0.4216540837336994, "grad_norm": 0.7484255017170912, "learning_rate": 6.489484394520444e-06, "loss": 0.3197, "step": 12287 }, { "epoch": 0.42168840082361014, "grad_norm": 0.790121993574678, "learning_rate": 6.488953877937151e-06, "loss": 0.3284, "step": 12288 }, { "epoch": 0.42172271791352095, "grad_norm": 0.816083897831989, "learning_rate": 6.488423342959571e-06, "loss": 0.2892, "step": 12289 }, { "epoch": 0.4217570350034317, "grad_norm": 0.791302034896828, "learning_rate": 6.487892789594255e-06, "loss": 0.3233, "step": 12290 }, { "epoch": 0.4217913520933425, "grad_norm": 0.7427129136173727, "learning_rate": 6.487362217847758e-06, "loss": 0.359, "step": 12291 }, { "epoch": 0.42182566918325326, "grad_norm": 0.8410447092240236, "learning_rate": 6.4868316277266365e-06, "loss": 0.2871, "step": 12292 }, { "epoch": 0.421859986273164, "grad_norm": 0.9191028226838881, "learning_rate": 6.4863010192374424e-06, "loss": 0.2819, "step": 12293 }, { "epoch": 0.4218943033630748, "grad_norm": 0.7346337979713943, "learning_rate": 6.485770392386732e-06, "loss": 0.2814, "step": 12294 }, { "epoch": 0.42192862045298557, "grad_norm": 0.8015818537037264, "learning_rate": 6.4852397471810605e-06, "loss": 0.3109, "step": 12295 }, { "epoch": 0.4219629375428964, "grad_norm": 0.7366308320790643, "learning_rate": 6.484709083626983e-06, "loss": 0.3066, "step": 12296 }, { "epoch": 0.42199725463280713, "grad_norm": 0.8790451830975431, "learning_rate": 6.484178401731056e-06, "loss": 0.2949, "step": 12297 }, { "epoch": 0.42203157172271794, "grad_norm": 0.753458274593411, "learning_rate": 6.483647701499838e-06, "loss": 0.3415, "step": 12298 }, { "epoch": 0.4220658888126287, "grad_norm": 1.0325154957366907, "learning_rate": 6.4831169829398786e-06, "loss": 0.2965, "step": 12299 }, { "epoch": 0.42210020590253944, "grad_norm": 0.7349608318631392, "learning_rate": 6.48258624605774e-06, "loss": 0.2762, "step": 12300 }, { "epoch": 0.42213452299245025, "grad_norm": 0.7492906274847385, "learning_rate": 6.482055490859977e-06, "loss": 0.2771, "step": 12301 }, { "epoch": 0.422168840082361, "grad_norm": 0.7892682382268401, "learning_rate": 6.4815247173531444e-06, "loss": 0.2789, "step": 12302 }, { "epoch": 0.4222031571722718, "grad_norm": 0.7213180252665139, "learning_rate": 6.480993925543804e-06, "loss": 0.2866, "step": 12303 }, { "epoch": 0.42223747426218255, "grad_norm": 0.8510690651603718, "learning_rate": 6.4804631154385074e-06, "loss": 0.2752, "step": 12304 }, { "epoch": 0.42227179135209336, "grad_norm": 0.7777991888346412, "learning_rate": 6.479932287043817e-06, "loss": 0.2827, "step": 12305 }, { "epoch": 0.4223061084420041, "grad_norm": 0.7511798990574914, "learning_rate": 6.479401440366289e-06, "loss": 0.3067, "step": 12306 }, { "epoch": 0.4223404255319149, "grad_norm": 0.8476583457715976, "learning_rate": 6.47887057541248e-06, "loss": 0.3176, "step": 12307 }, { "epoch": 0.42237474262182567, "grad_norm": 0.7741776431737601, "learning_rate": 6.478339692188951e-06, "loss": 0.3185, "step": 12308 }, { "epoch": 0.4224090597117364, "grad_norm": 0.7517718417425228, "learning_rate": 6.477808790702257e-06, "loss": 0.2937, "step": 12309 }, { "epoch": 0.42244337680164723, "grad_norm": 0.7830004238450947, "learning_rate": 6.477277870958957e-06, "loss": 0.2678, "step": 12310 }, { "epoch": 0.422477693891558, "grad_norm": 0.6907650699521812, "learning_rate": 6.476746932965614e-06, "loss": 0.3013, "step": 12311 }, { "epoch": 0.4225120109814688, "grad_norm": 0.7519137618523648, "learning_rate": 6.476215976728782e-06, "loss": 0.2975, "step": 12312 }, { "epoch": 0.42254632807137954, "grad_norm": 0.7371860922774576, "learning_rate": 6.475685002255025e-06, "loss": 0.3005, "step": 12313 }, { "epoch": 0.42258064516129035, "grad_norm": 0.6274358944252278, "learning_rate": 6.475154009550899e-06, "loss": 0.2929, "step": 12314 }, { "epoch": 0.4226149622512011, "grad_norm": 0.8216917861991462, "learning_rate": 6.474622998622966e-06, "loss": 0.3176, "step": 12315 }, { "epoch": 0.42264927934111185, "grad_norm": 0.8198277763093165, "learning_rate": 6.474091969477784e-06, "loss": 0.3313, "step": 12316 }, { "epoch": 0.42268359643102266, "grad_norm": 0.8226316161185243, "learning_rate": 6.473560922121914e-06, "loss": 0.2695, "step": 12317 }, { "epoch": 0.4227179135209334, "grad_norm": 0.7285317683062511, "learning_rate": 6.473029856561916e-06, "loss": 0.3366, "step": 12318 }, { "epoch": 0.4227522306108442, "grad_norm": 0.7430428668723515, "learning_rate": 6.472498772804354e-06, "loss": 0.2922, "step": 12319 }, { "epoch": 0.42278654770075497, "grad_norm": 0.8414727132683659, "learning_rate": 6.471967670855785e-06, "loss": 0.3021, "step": 12320 }, { "epoch": 0.4228208647906658, "grad_norm": 0.8839852536956372, "learning_rate": 6.471436550722773e-06, "loss": 0.2812, "step": 12321 }, { "epoch": 0.4228551818805765, "grad_norm": 0.7349970122242885, "learning_rate": 6.4709054124118764e-06, "loss": 0.279, "step": 12322 }, { "epoch": 0.4228894989704873, "grad_norm": 0.7907393614165029, "learning_rate": 6.470374255929659e-06, "loss": 0.2959, "step": 12323 }, { "epoch": 0.4229238160603981, "grad_norm": 0.814509928253293, "learning_rate": 6.469843081282682e-06, "loss": 0.327, "step": 12324 }, { "epoch": 0.42295813315030883, "grad_norm": 0.7817704517521888, "learning_rate": 6.469311888477506e-06, "loss": 0.2637, "step": 12325 }, { "epoch": 0.42299245024021964, "grad_norm": 0.7857363567679616, "learning_rate": 6.4687806775206965e-06, "loss": 0.2614, "step": 12326 }, { "epoch": 0.4230267673301304, "grad_norm": 0.7613315840948707, "learning_rate": 6.468249448418814e-06, "loss": 0.3048, "step": 12327 }, { "epoch": 0.4230610844200412, "grad_norm": 0.78450842106472, "learning_rate": 6.467718201178421e-06, "loss": 0.3154, "step": 12328 }, { "epoch": 0.42309540150995195, "grad_norm": 0.839958839537845, "learning_rate": 6.46718693580608e-06, "loss": 0.2819, "step": 12329 }, { "epoch": 0.42312971859986276, "grad_norm": 0.8067309227350876, "learning_rate": 6.466655652308357e-06, "loss": 0.2912, "step": 12330 }, { "epoch": 0.4231640356897735, "grad_norm": 0.7573102216704733, "learning_rate": 6.466124350691811e-06, "loss": 0.2995, "step": 12331 }, { "epoch": 0.42319835277968426, "grad_norm": 0.8075423835953135, "learning_rate": 6.465593030963009e-06, "loss": 0.298, "step": 12332 }, { "epoch": 0.42323266986959507, "grad_norm": 0.772963220444466, "learning_rate": 6.465061693128515e-06, "loss": 0.2943, "step": 12333 }, { "epoch": 0.4232669869595058, "grad_norm": 0.8182312588317752, "learning_rate": 6.4645303371948895e-06, "loss": 0.2617, "step": 12334 }, { "epoch": 0.4233013040494166, "grad_norm": 0.7564221307291576, "learning_rate": 6.4639989631687e-06, "loss": 0.2571, "step": 12335 }, { "epoch": 0.4233356211393274, "grad_norm": 0.7634722498706351, "learning_rate": 6.463467571056511e-06, "loss": 0.2726, "step": 12336 }, { "epoch": 0.4233699382292382, "grad_norm": 0.7341296717091146, "learning_rate": 6.462936160864888e-06, "loss": 0.2495, "step": 12337 }, { "epoch": 0.42340425531914894, "grad_norm": 0.8863395439459887, "learning_rate": 6.462404732600391e-06, "loss": 0.2958, "step": 12338 }, { "epoch": 0.4234385724090597, "grad_norm": 0.7865666763201987, "learning_rate": 6.46187328626959e-06, "loss": 0.2407, "step": 12339 }, { "epoch": 0.4234728894989705, "grad_norm": 0.7029623358952145, "learning_rate": 6.461341821879048e-06, "loss": 0.2585, "step": 12340 }, { "epoch": 0.42350720658888125, "grad_norm": 0.6830276509614619, "learning_rate": 6.460810339435333e-06, "loss": 0.2753, "step": 12341 }, { "epoch": 0.42354152367879205, "grad_norm": 0.7586807200507583, "learning_rate": 6.460278838945009e-06, "loss": 0.2935, "step": 12342 }, { "epoch": 0.4235758407687028, "grad_norm": 0.702044197797262, "learning_rate": 6.459747320414642e-06, "loss": 0.2424, "step": 12343 }, { "epoch": 0.4236101578586136, "grad_norm": 0.7281589583982886, "learning_rate": 6.459215783850798e-06, "loss": 0.2762, "step": 12344 }, { "epoch": 0.42364447494852436, "grad_norm": 0.7109936361327954, "learning_rate": 6.458684229260046e-06, "loss": 0.2599, "step": 12345 }, { "epoch": 0.4236787920384351, "grad_norm": 0.8390757150362654, "learning_rate": 6.4581526566489505e-06, "loss": 0.3009, "step": 12346 }, { "epoch": 0.4237131091283459, "grad_norm": 0.7541945826824863, "learning_rate": 6.4576210660240775e-06, "loss": 0.3451, "step": 12347 }, { "epoch": 0.4237474262182567, "grad_norm": 0.7549065055144447, "learning_rate": 6.457089457391997e-06, "loss": 0.2499, "step": 12348 }, { "epoch": 0.4237817433081675, "grad_norm": 0.7626617756924757, "learning_rate": 6.456557830759276e-06, "loss": 0.2958, "step": 12349 }, { "epoch": 0.42381606039807823, "grad_norm": 0.802594027585899, "learning_rate": 6.456026186132481e-06, "loss": 0.274, "step": 12350 }, { "epoch": 0.42385037748798904, "grad_norm": 0.8643354175677097, "learning_rate": 6.4554945235181775e-06, "loss": 0.3462, "step": 12351 }, { "epoch": 0.4238846945778998, "grad_norm": 0.7006907378735953, "learning_rate": 6.454962842922939e-06, "loss": 0.377, "step": 12352 }, { "epoch": 0.4239190116678106, "grad_norm": 0.829863888107565, "learning_rate": 6.4544311443533295e-06, "loss": 0.3026, "step": 12353 }, { "epoch": 0.42395332875772135, "grad_norm": 0.806347168256092, "learning_rate": 6.453899427815917e-06, "loss": 0.2955, "step": 12354 }, { "epoch": 0.4239876458476321, "grad_norm": 0.7236313598431995, "learning_rate": 6.453367693317275e-06, "loss": 0.2913, "step": 12355 }, { "epoch": 0.4240219629375429, "grad_norm": 0.8327229612965844, "learning_rate": 6.452835940863969e-06, "loss": 0.3603, "step": 12356 }, { "epoch": 0.42405628002745366, "grad_norm": 0.7425155514344312, "learning_rate": 6.452304170462569e-06, "loss": 0.2574, "step": 12357 }, { "epoch": 0.42409059711736447, "grad_norm": 0.817494682763622, "learning_rate": 6.451772382119643e-06, "loss": 0.3102, "step": 12358 }, { "epoch": 0.4241249142072752, "grad_norm": 0.8102683431162009, "learning_rate": 6.451240575841764e-06, "loss": 0.2807, "step": 12359 }, { "epoch": 0.424159231297186, "grad_norm": 0.8268118163204232, "learning_rate": 6.450708751635498e-06, "loss": 0.2818, "step": 12360 }, { "epoch": 0.4241935483870968, "grad_norm": 0.8068176339301316, "learning_rate": 6.450176909507417e-06, "loss": 0.2815, "step": 12361 }, { "epoch": 0.4242278654770075, "grad_norm": 0.7790464731583296, "learning_rate": 6.4496450494640905e-06, "loss": 0.3365, "step": 12362 }, { "epoch": 0.42426218256691833, "grad_norm": 0.8010766116767062, "learning_rate": 6.44911317151209e-06, "loss": 0.2704, "step": 12363 }, { "epoch": 0.4242964996568291, "grad_norm": 0.7940207770459164, "learning_rate": 6.448581275657986e-06, "loss": 0.2849, "step": 12364 }, { "epoch": 0.4243308167467399, "grad_norm": 0.7371501650099573, "learning_rate": 6.448049361908349e-06, "loss": 0.3536, "step": 12365 }, { "epoch": 0.42436513383665064, "grad_norm": 0.8954436497700369, "learning_rate": 6.447517430269752e-06, "loss": 0.334, "step": 12366 }, { "epoch": 0.42439945092656145, "grad_norm": 0.7226451719705281, "learning_rate": 6.446985480748762e-06, "loss": 0.2615, "step": 12367 }, { "epoch": 0.4244337680164722, "grad_norm": 0.810067909739951, "learning_rate": 6.446453513351954e-06, "loss": 0.322, "step": 12368 }, { "epoch": 0.42446808510638295, "grad_norm": 0.7074721106911553, "learning_rate": 6.445921528085901e-06, "loss": 0.2954, "step": 12369 }, { "epoch": 0.42450240219629376, "grad_norm": 0.6883643974785011, "learning_rate": 6.445389524957172e-06, "loss": 0.2938, "step": 12370 }, { "epoch": 0.4245367192862045, "grad_norm": 0.7749531022608912, "learning_rate": 6.44485750397234e-06, "loss": 0.3289, "step": 12371 }, { "epoch": 0.4245710363761153, "grad_norm": 0.7256240786531012, "learning_rate": 6.44432546513798e-06, "loss": 0.2633, "step": 12372 }, { "epoch": 0.42460535346602607, "grad_norm": 0.7937682073131508, "learning_rate": 6.443793408460661e-06, "loss": 0.3178, "step": 12373 }, { "epoch": 0.4246396705559369, "grad_norm": 0.7179579655241392, "learning_rate": 6.443261333946958e-06, "loss": 0.2598, "step": 12374 }, { "epoch": 0.42467398764584763, "grad_norm": 0.9112658644826658, "learning_rate": 6.442729241603446e-06, "loss": 0.2944, "step": 12375 }, { "epoch": 0.42470830473575844, "grad_norm": 0.7963555128594489, "learning_rate": 6.442197131436693e-06, "loss": 0.2444, "step": 12376 }, { "epoch": 0.4247426218256692, "grad_norm": 0.8275321609294655, "learning_rate": 6.441665003453277e-06, "loss": 0.3455, "step": 12377 }, { "epoch": 0.42477693891557994, "grad_norm": 0.6609491442929365, "learning_rate": 6.441132857659772e-06, "loss": 0.2611, "step": 12378 }, { "epoch": 0.42481125600549074, "grad_norm": 0.6961091818248032, "learning_rate": 6.44060069406275e-06, "loss": 0.2775, "step": 12379 }, { "epoch": 0.4248455730954015, "grad_norm": 0.8055286953964013, "learning_rate": 6.4400685126687855e-06, "loss": 0.3513, "step": 12380 }, { "epoch": 0.4248798901853123, "grad_norm": 0.7943255926093765, "learning_rate": 6.439536313484456e-06, "loss": 0.2759, "step": 12381 }, { "epoch": 0.42491420727522305, "grad_norm": 0.7620462205117309, "learning_rate": 6.43900409651633e-06, "loss": 0.2829, "step": 12382 }, { "epoch": 0.42494852436513386, "grad_norm": 0.7752452115017536, "learning_rate": 6.438471861770988e-06, "loss": 0.254, "step": 12383 }, { "epoch": 0.4249828414550446, "grad_norm": 0.7464961467801866, "learning_rate": 6.4379396092550025e-06, "loss": 0.2623, "step": 12384 }, { "epoch": 0.42501715854495536, "grad_norm": 0.7014008669057558, "learning_rate": 6.43740733897495e-06, "loss": 0.2689, "step": 12385 }, { "epoch": 0.42505147563486617, "grad_norm": 0.7969987732053214, "learning_rate": 6.4368750509374055e-06, "loss": 0.2868, "step": 12386 }, { "epoch": 0.4250857927247769, "grad_norm": 1.0166714976799354, "learning_rate": 6.436342745148945e-06, "loss": 0.278, "step": 12387 }, { "epoch": 0.42512010981468773, "grad_norm": 0.8034618113597134, "learning_rate": 6.435810421616145e-06, "loss": 0.2448, "step": 12388 }, { "epoch": 0.4251544269045985, "grad_norm": 0.7532186515856512, "learning_rate": 6.435278080345582e-06, "loss": 0.2387, "step": 12389 }, { "epoch": 0.4251887439945093, "grad_norm": 0.7357061792418964, "learning_rate": 6.434745721343829e-06, "loss": 0.2849, "step": 12390 }, { "epoch": 0.42522306108442004, "grad_norm": 0.7361741282494749, "learning_rate": 6.434213344617468e-06, "loss": 0.3161, "step": 12391 }, { "epoch": 0.4252573781743308, "grad_norm": 1.005753123455234, "learning_rate": 6.433680950173071e-06, "loss": 0.321, "step": 12392 }, { "epoch": 0.4252916952642416, "grad_norm": 0.8152936058911542, "learning_rate": 6.4331485380172185e-06, "loss": 0.273, "step": 12393 }, { "epoch": 0.42532601235415235, "grad_norm": 0.781403496494025, "learning_rate": 6.432616108156489e-06, "loss": 0.2621, "step": 12394 }, { "epoch": 0.42536032944406316, "grad_norm": 0.8093739337123296, "learning_rate": 6.432083660597455e-06, "loss": 0.2977, "step": 12395 }, { "epoch": 0.4253946465339739, "grad_norm": 0.7647312480106196, "learning_rate": 6.431551195346697e-06, "loss": 0.3167, "step": 12396 }, { "epoch": 0.4254289636238847, "grad_norm": 0.7985804290188407, "learning_rate": 6.431018712410792e-06, "loss": 0.3204, "step": 12397 }, { "epoch": 0.42546328071379547, "grad_norm": 0.799299863030216, "learning_rate": 6.4304862117963225e-06, "loss": 0.3228, "step": 12398 }, { "epoch": 0.4254975978037063, "grad_norm": 0.7458600557549755, "learning_rate": 6.42995369350986e-06, "loss": 0.2914, "step": 12399 }, { "epoch": 0.425531914893617, "grad_norm": 0.8496428955812072, "learning_rate": 6.429421157557989e-06, "loss": 0.3787, "step": 12400 }, { "epoch": 0.4255662319835278, "grad_norm": 0.8753037184050766, "learning_rate": 6.428888603947288e-06, "loss": 0.3343, "step": 12401 }, { "epoch": 0.4256005490734386, "grad_norm": 0.8238650762192281, "learning_rate": 6.4283560326843305e-06, "loss": 0.3303, "step": 12402 }, { "epoch": 0.42563486616334933, "grad_norm": 0.8352534398519622, "learning_rate": 6.427823443775701e-06, "loss": 0.3677, "step": 12403 }, { "epoch": 0.42566918325326014, "grad_norm": 0.8530874589747558, "learning_rate": 6.42729083722798e-06, "loss": 0.2469, "step": 12404 }, { "epoch": 0.4257035003431709, "grad_norm": 0.8697561839065332, "learning_rate": 6.426758213047741e-06, "loss": 0.3091, "step": 12405 }, { "epoch": 0.4257378174330817, "grad_norm": 0.7136952600767682, "learning_rate": 6.426225571241569e-06, "loss": 0.2461, "step": 12406 }, { "epoch": 0.42577213452299245, "grad_norm": 0.7791587493018957, "learning_rate": 6.425692911816043e-06, "loss": 0.2558, "step": 12407 }, { "epoch": 0.4258064516129032, "grad_norm": 0.7419616755122118, "learning_rate": 6.425160234777743e-06, "loss": 0.3093, "step": 12408 }, { "epoch": 0.425840768702814, "grad_norm": 0.8051581961446159, "learning_rate": 6.42462754013325e-06, "loss": 0.3208, "step": 12409 }, { "epoch": 0.42587508579272476, "grad_norm": 0.7824968322547111, "learning_rate": 6.424094827889146e-06, "loss": 0.2442, "step": 12410 }, { "epoch": 0.42590940288263557, "grad_norm": 0.8571597024978331, "learning_rate": 6.42356209805201e-06, "loss": 0.3411, "step": 12411 }, { "epoch": 0.4259437199725463, "grad_norm": 0.80631494688041, "learning_rate": 6.423029350628422e-06, "loss": 0.2704, "step": 12412 }, { "epoch": 0.4259780370624571, "grad_norm": 0.7520110206268662, "learning_rate": 6.422496585624968e-06, "loss": 0.2878, "step": 12413 }, { "epoch": 0.4260123541523679, "grad_norm": 0.8006701297873837, "learning_rate": 6.421963803048226e-06, "loss": 0.3582, "step": 12414 }, { "epoch": 0.42604667124227863, "grad_norm": 0.8367644210198872, "learning_rate": 6.421431002904779e-06, "loss": 0.2921, "step": 12415 }, { "epoch": 0.42608098833218944, "grad_norm": 0.7699828474077952, "learning_rate": 6.420898185201209e-06, "loss": 0.3183, "step": 12416 }, { "epoch": 0.4261153054221002, "grad_norm": 0.7816835362105128, "learning_rate": 6.4203653499440995e-06, "loss": 0.2973, "step": 12417 }, { "epoch": 0.426149622512011, "grad_norm": 0.6749324593382875, "learning_rate": 6.419832497140032e-06, "loss": 0.2644, "step": 12418 }, { "epoch": 0.42618393960192175, "grad_norm": 0.7853196510197519, "learning_rate": 6.419299626795587e-06, "loss": 0.3423, "step": 12419 }, { "epoch": 0.42621825669183255, "grad_norm": 0.7150306799532296, "learning_rate": 6.418766738917353e-06, "loss": 0.3092, "step": 12420 }, { "epoch": 0.4262525737817433, "grad_norm": 0.7684999441221539, "learning_rate": 6.418233833511909e-06, "loss": 0.335, "step": 12421 }, { "epoch": 0.42628689087165406, "grad_norm": 0.8086566905989602, "learning_rate": 6.417700910585838e-06, "loss": 0.3703, "step": 12422 }, { "epoch": 0.42632120796156486, "grad_norm": 0.8140807904971391, "learning_rate": 6.417167970145728e-06, "loss": 0.3672, "step": 12423 }, { "epoch": 0.4263555250514756, "grad_norm": 0.7917795618522825, "learning_rate": 6.416635012198158e-06, "loss": 0.3157, "step": 12424 }, { "epoch": 0.4263898421413864, "grad_norm": 0.7813601654856441, "learning_rate": 6.416102036749714e-06, "loss": 0.3072, "step": 12425 }, { "epoch": 0.4264241592312972, "grad_norm": 0.7579942599624137, "learning_rate": 6.4155690438069815e-06, "loss": 0.3235, "step": 12426 }, { "epoch": 0.426458476321208, "grad_norm": 0.6934995789841788, "learning_rate": 6.4150360333765436e-06, "loss": 0.2583, "step": 12427 }, { "epoch": 0.42649279341111873, "grad_norm": 0.7599725841213127, "learning_rate": 6.414503005464985e-06, "loss": 0.3512, "step": 12428 }, { "epoch": 0.42652711050102954, "grad_norm": 0.7093840407009452, "learning_rate": 6.4139699600788895e-06, "loss": 0.2919, "step": 12429 }, { "epoch": 0.4265614275909403, "grad_norm": 0.7974152510872774, "learning_rate": 6.413436897224845e-06, "loss": 0.3461, "step": 12430 }, { "epoch": 0.42659574468085104, "grad_norm": 0.7812891880592572, "learning_rate": 6.412903816909436e-06, "loss": 0.2484, "step": 12431 }, { "epoch": 0.42663006177076185, "grad_norm": 0.9195010641109912, "learning_rate": 6.412370719139247e-06, "loss": 0.3128, "step": 12432 }, { "epoch": 0.4266643788606726, "grad_norm": 0.9079524765511354, "learning_rate": 6.411837603920865e-06, "loss": 0.2768, "step": 12433 }, { "epoch": 0.4266986959505834, "grad_norm": 0.7243474088956717, "learning_rate": 6.411304471260875e-06, "loss": 0.3084, "step": 12434 }, { "epoch": 0.42673301304049416, "grad_norm": 0.8210232697864255, "learning_rate": 6.410771321165863e-06, "loss": 0.37, "step": 12435 }, { "epoch": 0.42676733013040496, "grad_norm": 0.8055188109871878, "learning_rate": 6.410238153642418e-06, "loss": 0.2904, "step": 12436 }, { "epoch": 0.4268016472203157, "grad_norm": 0.7908245615738645, "learning_rate": 6.4097049686971226e-06, "loss": 0.2776, "step": 12437 }, { "epoch": 0.42683596431022647, "grad_norm": 0.7360760268898987, "learning_rate": 6.409171766336566e-06, "loss": 0.2626, "step": 12438 }, { "epoch": 0.4268702814001373, "grad_norm": 0.8445288281393567, "learning_rate": 6.408638546567337e-06, "loss": 0.3223, "step": 12439 }, { "epoch": 0.426904598490048, "grad_norm": 0.7476549679546796, "learning_rate": 6.40810530939602e-06, "loss": 0.2533, "step": 12440 }, { "epoch": 0.42693891557995883, "grad_norm": 0.7594139874835563, "learning_rate": 6.4075720548292045e-06, "loss": 0.2833, "step": 12441 }, { "epoch": 0.4269732326698696, "grad_norm": 0.6700541844573421, "learning_rate": 6.407038782873477e-06, "loss": 0.2676, "step": 12442 }, { "epoch": 0.4270075497597804, "grad_norm": 0.7181983745536378, "learning_rate": 6.406505493535425e-06, "loss": 0.2564, "step": 12443 }, { "epoch": 0.42704186684969114, "grad_norm": 0.7610525947419834, "learning_rate": 6.405972186821636e-06, "loss": 0.315, "step": 12444 }, { "epoch": 0.4270761839396019, "grad_norm": 0.8266805700792085, "learning_rate": 6.405438862738702e-06, "loss": 0.2857, "step": 12445 }, { "epoch": 0.4271105010295127, "grad_norm": 0.7916824431692776, "learning_rate": 6.40490552129321e-06, "loss": 0.3302, "step": 12446 }, { "epoch": 0.42714481811942345, "grad_norm": 0.7173943655554365, "learning_rate": 6.404372162491746e-06, "loss": 0.3027, "step": 12447 }, { "epoch": 0.42717913520933426, "grad_norm": 0.8066240096820559, "learning_rate": 6.403838786340903e-06, "loss": 0.2839, "step": 12448 }, { "epoch": 0.427213452299245, "grad_norm": 0.7556599238554254, "learning_rate": 6.403305392847269e-06, "loss": 0.283, "step": 12449 }, { "epoch": 0.4272477693891558, "grad_norm": 0.725640460338348, "learning_rate": 6.402771982017432e-06, "loss": 0.3346, "step": 12450 }, { "epoch": 0.42728208647906657, "grad_norm": 0.7709491715001779, "learning_rate": 6.402238553857982e-06, "loss": 0.3347, "step": 12451 }, { "epoch": 0.4273164035689774, "grad_norm": 0.8157274795221237, "learning_rate": 6.401705108375511e-06, "loss": 0.3099, "step": 12452 }, { "epoch": 0.42735072065888813, "grad_norm": 0.7877441710238614, "learning_rate": 6.401171645576606e-06, "loss": 0.3337, "step": 12453 }, { "epoch": 0.4273850377487989, "grad_norm": 0.8039549823026124, "learning_rate": 6.40063816546786e-06, "loss": 0.2839, "step": 12454 }, { "epoch": 0.4274193548387097, "grad_norm": 0.7346444773620899, "learning_rate": 6.400104668055863e-06, "loss": 0.2958, "step": 12455 }, { "epoch": 0.42745367192862044, "grad_norm": 0.7829418725483291, "learning_rate": 6.3995711533472034e-06, "loss": 0.2955, "step": 12456 }, { "epoch": 0.42748798901853124, "grad_norm": 0.6916635127619343, "learning_rate": 6.399037621348475e-06, "loss": 0.2975, "step": 12457 }, { "epoch": 0.427522306108442, "grad_norm": 0.7841626003350461, "learning_rate": 6.398504072066267e-06, "loss": 0.3043, "step": 12458 }, { "epoch": 0.4275566231983528, "grad_norm": 0.7561929391170724, "learning_rate": 6.397970505507174e-06, "loss": 0.2834, "step": 12459 }, { "epoch": 0.42759094028826355, "grad_norm": 0.6248810088136768, "learning_rate": 6.397436921677783e-06, "loss": 0.2363, "step": 12460 }, { "epoch": 0.4276252573781743, "grad_norm": 0.8688019496067821, "learning_rate": 6.396903320584689e-06, "loss": 0.2998, "step": 12461 }, { "epoch": 0.4276595744680851, "grad_norm": 0.8143549537342563, "learning_rate": 6.396369702234484e-06, "loss": 0.2868, "step": 12462 }, { "epoch": 0.42769389155799586, "grad_norm": 0.6556720564152274, "learning_rate": 6.395836066633757e-06, "loss": 0.2727, "step": 12463 }, { "epoch": 0.42772820864790667, "grad_norm": 0.7220373894241126, "learning_rate": 6.395302413789104e-06, "loss": 0.2663, "step": 12464 }, { "epoch": 0.4277625257378174, "grad_norm": 0.7621311262723891, "learning_rate": 6.3947687437071175e-06, "loss": 0.3179, "step": 12465 }, { "epoch": 0.42779684282772823, "grad_norm": 0.6802598791820257, "learning_rate": 6.394235056394388e-06, "loss": 0.2461, "step": 12466 }, { "epoch": 0.427831159917639, "grad_norm": 0.6963767901382635, "learning_rate": 6.39370135185751e-06, "loss": 0.2473, "step": 12467 }, { "epoch": 0.42786547700754973, "grad_norm": 0.739819128596655, "learning_rate": 6.393167630103078e-06, "loss": 0.3194, "step": 12468 }, { "epoch": 0.42789979409746054, "grad_norm": 0.7358570361632303, "learning_rate": 6.3926338911376825e-06, "loss": 0.2724, "step": 12469 }, { "epoch": 0.4279341111873713, "grad_norm": 0.7998696339096867, "learning_rate": 6.392100134967919e-06, "loss": 0.2692, "step": 12470 }, { "epoch": 0.4279684282772821, "grad_norm": 0.8182357711830935, "learning_rate": 6.391566361600383e-06, "loss": 0.3064, "step": 12471 }, { "epoch": 0.42800274536719285, "grad_norm": 0.8240702734981121, "learning_rate": 6.391032571041666e-06, "loss": 0.3127, "step": 12472 }, { "epoch": 0.42803706245710366, "grad_norm": 0.9693219878942771, "learning_rate": 6.390498763298363e-06, "loss": 0.324, "step": 12473 }, { "epoch": 0.4280713795470144, "grad_norm": 0.7663533508810109, "learning_rate": 6.389964938377069e-06, "loss": 0.275, "step": 12474 }, { "epoch": 0.4281056966369252, "grad_norm": 0.8940954805866073, "learning_rate": 6.389431096284379e-06, "loss": 0.2867, "step": 12475 }, { "epoch": 0.42814001372683597, "grad_norm": 0.7579176439717767, "learning_rate": 6.388897237026887e-06, "loss": 0.2936, "step": 12476 }, { "epoch": 0.4281743308167467, "grad_norm": 0.7168861634190672, "learning_rate": 6.388363360611189e-06, "loss": 0.308, "step": 12477 }, { "epoch": 0.4282086479066575, "grad_norm": 0.7315255458700352, "learning_rate": 6.3878294670438826e-06, "loss": 0.3198, "step": 12478 }, { "epoch": 0.4282429649965683, "grad_norm": 0.7582759843400715, "learning_rate": 6.387295556331559e-06, "loss": 0.3182, "step": 12479 }, { "epoch": 0.4282772820864791, "grad_norm": 0.8299637075163201, "learning_rate": 6.386761628480818e-06, "loss": 0.3571, "step": 12480 }, { "epoch": 0.42831159917638983, "grad_norm": 0.7515119443488517, "learning_rate": 6.3862276834982516e-06, "loss": 0.2558, "step": 12481 }, { "epoch": 0.42834591626630064, "grad_norm": 0.7261839081057935, "learning_rate": 6.38569372139046e-06, "loss": 0.2944, "step": 12482 }, { "epoch": 0.4283802333562114, "grad_norm": 0.8085125138241409, "learning_rate": 6.3851597421640366e-06, "loss": 0.2395, "step": 12483 }, { "epoch": 0.42841455044612214, "grad_norm": 0.7944622554221132, "learning_rate": 6.384625745825581e-06, "loss": 0.2751, "step": 12484 }, { "epoch": 0.42844886753603295, "grad_norm": 0.7447663640745876, "learning_rate": 6.384091732381689e-06, "loss": 0.2748, "step": 12485 }, { "epoch": 0.4284831846259437, "grad_norm": 0.749069250706906, "learning_rate": 6.3835577018389564e-06, "loss": 0.2803, "step": 12486 }, { "epoch": 0.4285175017158545, "grad_norm": 0.8465676944620318, "learning_rate": 6.38302365420398e-06, "loss": 0.2937, "step": 12487 }, { "epoch": 0.42855181880576526, "grad_norm": 0.813861080580515, "learning_rate": 6.382489589483361e-06, "loss": 0.2609, "step": 12488 }, { "epoch": 0.42858613589567607, "grad_norm": 0.8140769615590563, "learning_rate": 6.381955507683693e-06, "loss": 0.292, "step": 12489 }, { "epoch": 0.4286204529855868, "grad_norm": 0.7334532088933774, "learning_rate": 6.381421408811579e-06, "loss": 0.3376, "step": 12490 }, { "epoch": 0.42865477007549757, "grad_norm": 0.6807507782492274, "learning_rate": 6.380887292873612e-06, "loss": 0.2861, "step": 12491 }, { "epoch": 0.4286890871654084, "grad_norm": 0.7375500380728669, "learning_rate": 6.380353159876393e-06, "loss": 0.2744, "step": 12492 }, { "epoch": 0.42872340425531913, "grad_norm": 0.8662011738927398, "learning_rate": 6.379819009826519e-06, "loss": 0.3329, "step": 12493 }, { "epoch": 0.42875772134522994, "grad_norm": 0.7992198694479388, "learning_rate": 6.379284842730591e-06, "loss": 0.3237, "step": 12494 }, { "epoch": 0.4287920384351407, "grad_norm": 0.7280828702107758, "learning_rate": 6.378750658595206e-06, "loss": 0.2813, "step": 12495 }, { "epoch": 0.4288263555250515, "grad_norm": 0.8013422807299873, "learning_rate": 6.378216457426963e-06, "loss": 0.3169, "step": 12496 }, { "epoch": 0.42886067261496225, "grad_norm": 0.7393163935506838, "learning_rate": 6.377682239232465e-06, "loss": 0.2689, "step": 12497 }, { "epoch": 0.42889498970487305, "grad_norm": 0.8168681075925293, "learning_rate": 6.377148004018309e-06, "loss": 0.2873, "step": 12498 }, { "epoch": 0.4289293067947838, "grad_norm": 0.820678771127731, "learning_rate": 6.376613751791093e-06, "loss": 0.3027, "step": 12499 }, { "epoch": 0.42896362388469456, "grad_norm": 0.6446678180773301, "learning_rate": 6.376079482557421e-06, "loss": 0.235, "step": 12500 }, { "epoch": 0.42899794097460536, "grad_norm": 0.693072598051957, "learning_rate": 6.375545196323891e-06, "loss": 0.3015, "step": 12501 }, { "epoch": 0.4290322580645161, "grad_norm": 0.7393588571571148, "learning_rate": 6.375010893097103e-06, "loss": 0.2642, "step": 12502 }, { "epoch": 0.4290665751544269, "grad_norm": 0.7756746757095188, "learning_rate": 6.3744765728836585e-06, "loss": 0.2777, "step": 12503 }, { "epoch": 0.4291008922443377, "grad_norm": 0.786065420219711, "learning_rate": 6.373942235690159e-06, "loss": 0.2758, "step": 12504 }, { "epoch": 0.4291352093342485, "grad_norm": 0.8250594507826274, "learning_rate": 6.3734078815232046e-06, "loss": 0.2966, "step": 12505 }, { "epoch": 0.42916952642415923, "grad_norm": 0.7217248461392303, "learning_rate": 6.372873510389399e-06, "loss": 0.3166, "step": 12506 }, { "epoch": 0.42920384351407, "grad_norm": 0.7569067323883983, "learning_rate": 6.372339122295339e-06, "loss": 0.2479, "step": 12507 }, { "epoch": 0.4292381606039808, "grad_norm": 0.6612699433892849, "learning_rate": 6.371804717247631e-06, "loss": 0.2273, "step": 12508 }, { "epoch": 0.42927247769389154, "grad_norm": 0.7496096760712209, "learning_rate": 6.371270295252874e-06, "loss": 0.285, "step": 12509 }, { "epoch": 0.42930679478380235, "grad_norm": 0.7287710897518519, "learning_rate": 6.3707358563176715e-06, "loss": 0.2577, "step": 12510 }, { "epoch": 0.4293411118737131, "grad_norm": 0.7674139306097288, "learning_rate": 6.3702014004486255e-06, "loss": 0.3579, "step": 12511 }, { "epoch": 0.4293754289636239, "grad_norm": 0.7501343438466099, "learning_rate": 6.369666927652338e-06, "loss": 0.292, "step": 12512 }, { "epoch": 0.42940974605353466, "grad_norm": 0.9393113596518119, "learning_rate": 6.3691324379354144e-06, "loss": 0.3001, "step": 12513 }, { "epoch": 0.4294440631434454, "grad_norm": 0.8178900216517914, "learning_rate": 6.368597931304455e-06, "loss": 0.3684, "step": 12514 }, { "epoch": 0.4294783802333562, "grad_norm": 0.8025576135364235, "learning_rate": 6.368063407766063e-06, "loss": 0.298, "step": 12515 }, { "epoch": 0.42951269732326697, "grad_norm": 0.77547774125364, "learning_rate": 6.367528867326844e-06, "loss": 0.3341, "step": 12516 }, { "epoch": 0.4295470144131778, "grad_norm": 0.7887394877198665, "learning_rate": 6.366994309993399e-06, "loss": 0.3055, "step": 12517 }, { "epoch": 0.4295813315030885, "grad_norm": 0.806353463682394, "learning_rate": 6.366459735772332e-06, "loss": 0.2912, "step": 12518 }, { "epoch": 0.42961564859299933, "grad_norm": 0.8073696487224662, "learning_rate": 6.3659251446702504e-06, "loss": 0.3411, "step": 12519 }, { "epoch": 0.4296499656829101, "grad_norm": 0.7849805515336513, "learning_rate": 6.365390536693756e-06, "loss": 0.276, "step": 12520 }, { "epoch": 0.4296842827728209, "grad_norm": 0.7857777945437601, "learning_rate": 6.364855911849453e-06, "loss": 0.2693, "step": 12521 }, { "epoch": 0.42971859986273164, "grad_norm": 0.6926611471735139, "learning_rate": 6.364321270143947e-06, "loss": 0.2845, "step": 12522 }, { "epoch": 0.4297529169526424, "grad_norm": 0.7919805881833845, "learning_rate": 6.363786611583842e-06, "loss": 0.3294, "step": 12523 }, { "epoch": 0.4297872340425532, "grad_norm": 0.7765910959826239, "learning_rate": 6.363251936175743e-06, "loss": 0.2942, "step": 12524 }, { "epoch": 0.42982155113246395, "grad_norm": 0.7919322871605358, "learning_rate": 6.362717243926255e-06, "loss": 0.2715, "step": 12525 }, { "epoch": 0.42985586822237476, "grad_norm": 0.7281156075890032, "learning_rate": 6.362182534841985e-06, "loss": 0.3126, "step": 12526 }, { "epoch": 0.4298901853122855, "grad_norm": 0.7861976905889008, "learning_rate": 6.361647808929537e-06, "loss": 0.3045, "step": 12527 }, { "epoch": 0.4299245024021963, "grad_norm": 0.786573511426567, "learning_rate": 6.361113066195519e-06, "loss": 0.322, "step": 12528 }, { "epoch": 0.42995881949210707, "grad_norm": 0.8117259024791089, "learning_rate": 6.3605783066465364e-06, "loss": 0.3037, "step": 12529 }, { "epoch": 0.4299931365820178, "grad_norm": 1.4087051822506798, "learning_rate": 6.360043530289195e-06, "loss": 0.2904, "step": 12530 }, { "epoch": 0.43002745367192863, "grad_norm": 0.8804707568174915, "learning_rate": 6.3595087371301e-06, "loss": 0.3162, "step": 12531 }, { "epoch": 0.4300617707618394, "grad_norm": 0.7250926231332329, "learning_rate": 6.35897392717586e-06, "loss": 0.2762, "step": 12532 }, { "epoch": 0.4300960878517502, "grad_norm": 0.7709118973372442, "learning_rate": 6.358439100433083e-06, "loss": 0.3057, "step": 12533 }, { "epoch": 0.43013040494166094, "grad_norm": 0.7846957784049415, "learning_rate": 6.3579042569083724e-06, "loss": 0.3656, "step": 12534 }, { "epoch": 0.43016472203157174, "grad_norm": 0.7324889787590072, "learning_rate": 6.357369396608341e-06, "loss": 0.2955, "step": 12535 }, { "epoch": 0.4301990391214825, "grad_norm": 0.8248036223179451, "learning_rate": 6.35683451953959e-06, "loss": 0.3243, "step": 12536 }, { "epoch": 0.43023335621139325, "grad_norm": 0.7828256986549835, "learning_rate": 6.356299625708731e-06, "loss": 0.2948, "step": 12537 }, { "epoch": 0.43026767330130405, "grad_norm": 0.795555899076841, "learning_rate": 6.3557647151223745e-06, "loss": 0.2915, "step": 12538 }, { "epoch": 0.4303019903912148, "grad_norm": 0.7752520971355829, "learning_rate": 6.355229787787123e-06, "loss": 0.2628, "step": 12539 }, { "epoch": 0.4303363074811256, "grad_norm": 0.7638947535560299, "learning_rate": 6.354694843709587e-06, "loss": 0.2902, "step": 12540 }, { "epoch": 0.43037062457103636, "grad_norm": 0.8802819680163657, "learning_rate": 6.354159882896376e-06, "loss": 0.2707, "step": 12541 }, { "epoch": 0.43040494166094717, "grad_norm": 0.9022895332069081, "learning_rate": 6.3536249053541e-06, "loss": 0.2998, "step": 12542 }, { "epoch": 0.4304392587508579, "grad_norm": 0.7334995461291474, "learning_rate": 6.353089911089365e-06, "loss": 0.277, "step": 12543 }, { "epoch": 0.43047357584076873, "grad_norm": 0.7785065254045248, "learning_rate": 6.352554900108781e-06, "loss": 0.3173, "step": 12544 }, { "epoch": 0.4305078929306795, "grad_norm": 0.7295035066877625, "learning_rate": 6.352019872418961e-06, "loss": 0.2776, "step": 12545 }, { "epoch": 0.43054221002059023, "grad_norm": 0.7317873417851455, "learning_rate": 6.351484828026509e-06, "loss": 0.285, "step": 12546 }, { "epoch": 0.43057652711050104, "grad_norm": 0.7709626726907441, "learning_rate": 6.350949766938039e-06, "loss": 0.2779, "step": 12547 }, { "epoch": 0.4306108442004118, "grad_norm": 0.7592464483261719, "learning_rate": 6.350414689160159e-06, "loss": 0.2724, "step": 12548 }, { "epoch": 0.4306451612903226, "grad_norm": 0.7708277382524368, "learning_rate": 6.349879594699481e-06, "loss": 0.2797, "step": 12549 }, { "epoch": 0.43067947838023335, "grad_norm": 0.7456933411844744, "learning_rate": 6.349344483562613e-06, "loss": 0.2784, "step": 12550 }, { "epoch": 0.43071379547014416, "grad_norm": 0.7729108588021345, "learning_rate": 6.3488093557561694e-06, "loss": 0.3216, "step": 12551 }, { "epoch": 0.4307481125600549, "grad_norm": 0.7652404294291619, "learning_rate": 6.348274211286756e-06, "loss": 0.3225, "step": 12552 }, { "epoch": 0.43078242964996566, "grad_norm": 0.7835017533934256, "learning_rate": 6.347739050160988e-06, "loss": 0.2803, "step": 12553 }, { "epoch": 0.43081674673987647, "grad_norm": 0.7031931119256475, "learning_rate": 6.347203872385474e-06, "loss": 0.2771, "step": 12554 }, { "epoch": 0.4308510638297872, "grad_norm": 0.7889683052143862, "learning_rate": 6.346668677966828e-06, "loss": 0.2913, "step": 12555 }, { "epoch": 0.430885380919698, "grad_norm": 0.7449752948944576, "learning_rate": 6.346133466911662e-06, "loss": 0.3263, "step": 12556 }, { "epoch": 0.4309196980096088, "grad_norm": 0.7478014274305661, "learning_rate": 6.345598239226584e-06, "loss": 0.3053, "step": 12557 }, { "epoch": 0.4309540150995196, "grad_norm": 0.8341771468071537, "learning_rate": 6.345062994918211e-06, "loss": 0.2876, "step": 12558 }, { "epoch": 0.43098833218943033, "grad_norm": 0.8326288670670491, "learning_rate": 6.344527733993151e-06, "loss": 0.3204, "step": 12559 }, { "epoch": 0.4310226492793411, "grad_norm": 0.8554434497103098, "learning_rate": 6.343992456458018e-06, "loss": 0.3351, "step": 12560 }, { "epoch": 0.4310569663692519, "grad_norm": 0.8155907447950571, "learning_rate": 6.343457162319428e-06, "loss": 0.3188, "step": 12561 }, { "epoch": 0.43109128345916264, "grad_norm": 0.7261346393986021, "learning_rate": 6.342921851583987e-06, "loss": 0.27, "step": 12562 }, { "epoch": 0.43112560054907345, "grad_norm": 0.8672847929555666, "learning_rate": 6.342386524258315e-06, "loss": 0.3239, "step": 12563 }, { "epoch": 0.4311599176389842, "grad_norm": 0.7583791869646683, "learning_rate": 6.341851180349022e-06, "loss": 0.2846, "step": 12564 }, { "epoch": 0.431194234728895, "grad_norm": 0.8782876153786793, "learning_rate": 6.341315819862721e-06, "loss": 0.3174, "step": 12565 }, { "epoch": 0.43122855181880576, "grad_norm": 0.7966116851137431, "learning_rate": 6.340780442806028e-06, "loss": 0.2377, "step": 12566 }, { "epoch": 0.43126286890871657, "grad_norm": 0.8406030334574579, "learning_rate": 6.340245049185557e-06, "loss": 0.3172, "step": 12567 }, { "epoch": 0.4312971859986273, "grad_norm": 0.8260437965441048, "learning_rate": 6.339709639007919e-06, "loss": 0.3031, "step": 12568 }, { "epoch": 0.43133150308853807, "grad_norm": 0.7425429926900875, "learning_rate": 6.339174212279732e-06, "loss": 0.3065, "step": 12569 }, { "epoch": 0.4313658201784489, "grad_norm": 0.8170806379429704, "learning_rate": 6.338638769007608e-06, "loss": 0.2733, "step": 12570 }, { "epoch": 0.43140013726835963, "grad_norm": 0.7165208999287489, "learning_rate": 6.338103309198161e-06, "loss": 0.2888, "step": 12571 }, { "epoch": 0.43143445435827044, "grad_norm": 0.7125893288476445, "learning_rate": 6.337567832858009e-06, "loss": 0.3065, "step": 12572 }, { "epoch": 0.4314687714481812, "grad_norm": 0.7988547685448283, "learning_rate": 6.337032339993767e-06, "loss": 0.2915, "step": 12573 }, { "epoch": 0.431503088538092, "grad_norm": 0.8393111407165378, "learning_rate": 6.336496830612049e-06, "loss": 0.2777, "step": 12574 }, { "epoch": 0.43153740562800275, "grad_norm": 0.7197164552907274, "learning_rate": 6.335961304719471e-06, "loss": 0.2615, "step": 12575 }, { "epoch": 0.4315717227179135, "grad_norm": 0.7386706512998519, "learning_rate": 6.335425762322648e-06, "loss": 0.2615, "step": 12576 }, { "epoch": 0.4316060398078243, "grad_norm": 0.7887567105049305, "learning_rate": 6.334890203428196e-06, "loss": 0.2822, "step": 12577 }, { "epoch": 0.43164035689773506, "grad_norm": 0.7486821421262103, "learning_rate": 6.334354628042733e-06, "loss": 0.2568, "step": 12578 }, { "epoch": 0.43167467398764586, "grad_norm": 0.9650351518875969, "learning_rate": 6.333819036172872e-06, "loss": 0.3755, "step": 12579 }, { "epoch": 0.4317089910775566, "grad_norm": 0.926747043841534, "learning_rate": 6.3332834278252355e-06, "loss": 0.271, "step": 12580 }, { "epoch": 0.4317433081674674, "grad_norm": 0.7809965959765075, "learning_rate": 6.332747803006435e-06, "loss": 0.3177, "step": 12581 }, { "epoch": 0.4317776252573782, "grad_norm": 0.7717960847913969, "learning_rate": 6.332212161723089e-06, "loss": 0.3236, "step": 12582 }, { "epoch": 0.4318119423472889, "grad_norm": 0.7140057412985267, "learning_rate": 6.331676503981816e-06, "loss": 0.2942, "step": 12583 }, { "epoch": 0.43184625943719973, "grad_norm": 0.7206102021032461, "learning_rate": 6.331140829789231e-06, "loss": 0.2867, "step": 12584 }, { "epoch": 0.4318805765271105, "grad_norm": 0.7754259975401002, "learning_rate": 6.330605139151953e-06, "loss": 0.2756, "step": 12585 }, { "epoch": 0.4319148936170213, "grad_norm": 0.6989118172417044, "learning_rate": 6.3300694320765996e-06, "loss": 0.3078, "step": 12586 }, { "epoch": 0.43194921070693204, "grad_norm": 0.7848480853201923, "learning_rate": 6.329533708569791e-06, "loss": 0.3115, "step": 12587 }, { "epoch": 0.43198352779684285, "grad_norm": 0.7611471838532702, "learning_rate": 6.328997968638142e-06, "loss": 0.2658, "step": 12588 }, { "epoch": 0.4320178448867536, "grad_norm": 0.7560126163193664, "learning_rate": 6.3284622122882735e-06, "loss": 0.3457, "step": 12589 }, { "epoch": 0.4320521619766644, "grad_norm": 0.7045176421712953, "learning_rate": 6.327926439526803e-06, "loss": 0.2569, "step": 12590 }, { "epoch": 0.43208647906657516, "grad_norm": 0.7262973199964625, "learning_rate": 6.3273906503603485e-06, "loss": 0.2756, "step": 12591 }, { "epoch": 0.4321207961564859, "grad_norm": 0.7617360261841394, "learning_rate": 6.326854844795531e-06, "loss": 0.3533, "step": 12592 }, { "epoch": 0.4321551132463967, "grad_norm": 0.8050857111988611, "learning_rate": 6.326319022838968e-06, "loss": 0.3348, "step": 12593 }, { "epoch": 0.43218943033630747, "grad_norm": 0.7130738020325893, "learning_rate": 6.325783184497281e-06, "loss": 0.2951, "step": 12594 }, { "epoch": 0.4322237474262183, "grad_norm": 0.7123029116003295, "learning_rate": 6.325247329777087e-06, "loss": 0.2766, "step": 12595 }, { "epoch": 0.432258064516129, "grad_norm": 0.7054327459965136, "learning_rate": 6.324711458685009e-06, "loss": 0.2791, "step": 12596 }, { "epoch": 0.43229238160603983, "grad_norm": 0.7598495357280334, "learning_rate": 6.324175571227665e-06, "loss": 0.3349, "step": 12597 }, { "epoch": 0.4323266986959506, "grad_norm": 0.7537781237678829, "learning_rate": 6.323639667411675e-06, "loss": 0.2726, "step": 12598 }, { "epoch": 0.43236101578586134, "grad_norm": 0.740574649801275, "learning_rate": 6.32310374724366e-06, "loss": 0.2954, "step": 12599 }, { "epoch": 0.43239533287577214, "grad_norm": 0.7115739433753157, "learning_rate": 6.322567810730242e-06, "loss": 0.2597, "step": 12600 }, { "epoch": 0.4324296499656829, "grad_norm": 0.7582755402241068, "learning_rate": 6.322031857878039e-06, "loss": 0.2505, "step": 12601 }, { "epoch": 0.4324639670555937, "grad_norm": 0.7565734350261734, "learning_rate": 6.321495888693674e-06, "loss": 0.2945, "step": 12602 }, { "epoch": 0.43249828414550445, "grad_norm": 0.7387878099539997, "learning_rate": 6.32095990318377e-06, "loss": 0.2765, "step": 12603 }, { "epoch": 0.43253260123541526, "grad_norm": 0.6786463076298915, "learning_rate": 6.320423901354944e-06, "loss": 0.2145, "step": 12604 }, { "epoch": 0.432566918325326, "grad_norm": 0.7404888583288898, "learning_rate": 6.319887883213822e-06, "loss": 0.3337, "step": 12605 }, { "epoch": 0.43260123541523676, "grad_norm": 0.7593209610751227, "learning_rate": 6.319351848767024e-06, "loss": 0.3251, "step": 12606 }, { "epoch": 0.43263555250514757, "grad_norm": 0.7802406900769076, "learning_rate": 6.318815798021169e-06, "loss": 0.2609, "step": 12607 }, { "epoch": 0.4326698695950583, "grad_norm": 0.7389016975290702, "learning_rate": 6.318279730982886e-06, "loss": 0.2903, "step": 12608 }, { "epoch": 0.4327041866849691, "grad_norm": 0.7466454211603282, "learning_rate": 6.317743647658793e-06, "loss": 0.3825, "step": 12609 }, { "epoch": 0.4327385037748799, "grad_norm": 0.7292846364662876, "learning_rate": 6.317207548055513e-06, "loss": 0.3626, "step": 12610 }, { "epoch": 0.4327728208647907, "grad_norm": 0.7330842860116731, "learning_rate": 6.31667143217967e-06, "loss": 0.276, "step": 12611 }, { "epoch": 0.43280713795470144, "grad_norm": 0.8194533820267702, "learning_rate": 6.316135300037887e-06, "loss": 0.3146, "step": 12612 }, { "epoch": 0.43284145504461224, "grad_norm": 0.7825574997500516, "learning_rate": 6.315599151636787e-06, "loss": 0.3132, "step": 12613 }, { "epoch": 0.432875772134523, "grad_norm": 0.7998038473046748, "learning_rate": 6.315062986982992e-06, "loss": 0.2837, "step": 12614 }, { "epoch": 0.43291008922443375, "grad_norm": 0.8613049923240795, "learning_rate": 6.3145268060831264e-06, "loss": 0.3039, "step": 12615 }, { "epoch": 0.43294440631434455, "grad_norm": 0.7825133632223548, "learning_rate": 6.313990608943816e-06, "loss": 0.2728, "step": 12616 }, { "epoch": 0.4329787234042553, "grad_norm": 0.8052218563904966, "learning_rate": 6.313454395571683e-06, "loss": 0.3115, "step": 12617 }, { "epoch": 0.4330130404941661, "grad_norm": 0.7911553044859754, "learning_rate": 6.312918165973352e-06, "loss": 0.2873, "step": 12618 }, { "epoch": 0.43304735758407686, "grad_norm": 0.7629859313470699, "learning_rate": 6.3123819201554496e-06, "loss": 0.3353, "step": 12619 }, { "epoch": 0.43308167467398767, "grad_norm": 0.8143438567687836, "learning_rate": 6.3118456581245965e-06, "loss": 0.3217, "step": 12620 }, { "epoch": 0.4331159917638984, "grad_norm": 0.7941926667255179, "learning_rate": 6.3113093798874215e-06, "loss": 0.2895, "step": 12621 }, { "epoch": 0.4331503088538092, "grad_norm": 0.7767729257637634, "learning_rate": 6.3107730854505464e-06, "loss": 0.3444, "step": 12622 }, { "epoch": 0.43318462594372, "grad_norm": 0.9526264791476721, "learning_rate": 6.3102367748205995e-06, "loss": 0.3211, "step": 12623 }, { "epoch": 0.43321894303363073, "grad_norm": 0.8630001970913332, "learning_rate": 6.309700448004204e-06, "loss": 0.3231, "step": 12624 }, { "epoch": 0.43325326012354154, "grad_norm": 0.7029618223247495, "learning_rate": 6.309164105007987e-06, "loss": 0.2855, "step": 12625 }, { "epoch": 0.4332875772134523, "grad_norm": 0.7297830163905389, "learning_rate": 6.308627745838574e-06, "loss": 0.3204, "step": 12626 }, { "epoch": 0.4333218943033631, "grad_norm": 0.6985260357542421, "learning_rate": 6.308091370502589e-06, "loss": 0.2946, "step": 12627 }, { "epoch": 0.43335621139327385, "grad_norm": 0.7213720068767611, "learning_rate": 6.3075549790066626e-06, "loss": 0.2788, "step": 12628 }, { "epoch": 0.4333905284831846, "grad_norm": 0.7328778148002015, "learning_rate": 6.307018571357416e-06, "loss": 0.3193, "step": 12629 }, { "epoch": 0.4334248455730954, "grad_norm": 0.6615282467121604, "learning_rate": 6.3064821475614814e-06, "loss": 0.251, "step": 12630 }, { "epoch": 0.43345916266300616, "grad_norm": 0.7374032767647072, "learning_rate": 6.305945707625483e-06, "loss": 0.2942, "step": 12631 }, { "epoch": 0.43349347975291697, "grad_norm": 0.7852857639506036, "learning_rate": 6.305409251556047e-06, "loss": 0.2832, "step": 12632 }, { "epoch": 0.4335277968428277, "grad_norm": 0.7600794051738113, "learning_rate": 6.304872779359801e-06, "loss": 0.311, "step": 12633 }, { "epoch": 0.4335621139327385, "grad_norm": 0.7516355913125615, "learning_rate": 6.3043362910433736e-06, "loss": 0.2742, "step": 12634 }, { "epoch": 0.4335964310226493, "grad_norm": 0.7256666318525639, "learning_rate": 6.303799786613394e-06, "loss": 0.2564, "step": 12635 }, { "epoch": 0.43363074811256, "grad_norm": 0.8692735648742197, "learning_rate": 6.303263266076487e-06, "loss": 0.3311, "step": 12636 }, { "epoch": 0.43366506520247083, "grad_norm": 0.6920481438236734, "learning_rate": 6.3027267294392794e-06, "loss": 0.2483, "step": 12637 }, { "epoch": 0.4336993822923816, "grad_norm": 0.7757291300231663, "learning_rate": 6.3021901767084045e-06, "loss": 0.3489, "step": 12638 }, { "epoch": 0.4337336993822924, "grad_norm": 0.8552318545059918, "learning_rate": 6.3016536078904875e-06, "loss": 0.3183, "step": 12639 }, { "epoch": 0.43376801647220314, "grad_norm": 0.7505119279490402, "learning_rate": 6.301117022992158e-06, "loss": 0.302, "step": 12640 }, { "epoch": 0.43380233356211395, "grad_norm": 0.7449568275489451, "learning_rate": 6.300580422020045e-06, "loss": 0.3053, "step": 12641 }, { "epoch": 0.4338366506520247, "grad_norm": 0.7107471363735453, "learning_rate": 6.300043804980777e-06, "loss": 0.325, "step": 12642 }, { "epoch": 0.4338709677419355, "grad_norm": 0.6887955185330991, "learning_rate": 6.299507171880983e-06, "loss": 0.3007, "step": 12643 }, { "epoch": 0.43390528483184626, "grad_norm": 0.7307150295363384, "learning_rate": 6.298970522727293e-06, "loss": 0.3287, "step": 12644 }, { "epoch": 0.433939601921757, "grad_norm": 0.7896247206216597, "learning_rate": 6.298433857526336e-06, "loss": 0.272, "step": 12645 }, { "epoch": 0.4339739190116678, "grad_norm": 0.7536773973143314, "learning_rate": 6.297897176284744e-06, "loss": 0.3061, "step": 12646 }, { "epoch": 0.43400823610157857, "grad_norm": 0.7825200882332858, "learning_rate": 6.297360479009145e-06, "loss": 0.2689, "step": 12647 }, { "epoch": 0.4340425531914894, "grad_norm": 0.7062186178034384, "learning_rate": 6.296823765706171e-06, "loss": 0.2458, "step": 12648 }, { "epoch": 0.43407687028140013, "grad_norm": 0.9316236772866199, "learning_rate": 6.296287036382449e-06, "loss": 0.2866, "step": 12649 }, { "epoch": 0.43411118737131094, "grad_norm": 0.7787811195885835, "learning_rate": 6.2957502910446125e-06, "loss": 0.3026, "step": 12650 }, { "epoch": 0.4341455044612217, "grad_norm": 0.7806357870948634, "learning_rate": 6.295213529699292e-06, "loss": 0.3088, "step": 12651 }, { "epoch": 0.43417982155113244, "grad_norm": 0.7126893997476234, "learning_rate": 6.294676752353119e-06, "loss": 0.263, "step": 12652 }, { "epoch": 0.43421413864104325, "grad_norm": 0.7975578726078537, "learning_rate": 6.2941399590127235e-06, "loss": 0.2705, "step": 12653 }, { "epoch": 0.434248455730954, "grad_norm": 0.7735911532167175, "learning_rate": 6.293603149684739e-06, "loss": 0.3087, "step": 12654 }, { "epoch": 0.4342827728208648, "grad_norm": 0.7774317737859625, "learning_rate": 6.293066324375795e-06, "loss": 0.2951, "step": 12655 }, { "epoch": 0.43431708991077556, "grad_norm": 0.9036803568277747, "learning_rate": 6.292529483092525e-06, "loss": 0.289, "step": 12656 }, { "epoch": 0.43435140700068636, "grad_norm": 0.7745576402321083, "learning_rate": 6.2919926258415585e-06, "loss": 0.2876, "step": 12657 }, { "epoch": 0.4343857240905971, "grad_norm": 0.767959286685552, "learning_rate": 6.29145575262953e-06, "loss": 0.3224, "step": 12658 }, { "epoch": 0.43442004118050787, "grad_norm": 0.7249427519044431, "learning_rate": 6.290918863463072e-06, "loss": 0.2767, "step": 12659 }, { "epoch": 0.4344543582704187, "grad_norm": 0.7454271737563034, "learning_rate": 6.290381958348816e-06, "loss": 0.2508, "step": 12660 }, { "epoch": 0.4344886753603294, "grad_norm": 0.7118823335495922, "learning_rate": 6.289845037293394e-06, "loss": 0.2633, "step": 12661 }, { "epoch": 0.43452299245024023, "grad_norm": 0.7906030357845276, "learning_rate": 6.289308100303442e-06, "loss": 0.2755, "step": 12662 }, { "epoch": 0.434557309540151, "grad_norm": 0.8153643880409307, "learning_rate": 6.2887711473855926e-06, "loss": 0.328, "step": 12663 }, { "epoch": 0.4345916266300618, "grad_norm": 0.7977054288741652, "learning_rate": 6.288234178546476e-06, "loss": 0.2746, "step": 12664 }, { "epoch": 0.43462594371997254, "grad_norm": 0.8826010976777482, "learning_rate": 6.28769719379273e-06, "loss": 0.2463, "step": 12665 }, { "epoch": 0.43466026080988335, "grad_norm": 0.7551289897946918, "learning_rate": 6.287160193130985e-06, "loss": 0.3573, "step": 12666 }, { "epoch": 0.4346945778997941, "grad_norm": 0.7744605074974185, "learning_rate": 6.286623176567877e-06, "loss": 0.3566, "step": 12667 }, { "epoch": 0.43472889498970485, "grad_norm": 0.7770027945778656, "learning_rate": 6.286086144110039e-06, "loss": 0.2759, "step": 12668 }, { "epoch": 0.43476321207961566, "grad_norm": 0.7708234853235417, "learning_rate": 6.285549095764106e-06, "loss": 0.2975, "step": 12669 }, { "epoch": 0.4347975291695264, "grad_norm": 0.8421663207805375, "learning_rate": 6.285012031536715e-06, "loss": 0.2997, "step": 12670 }, { "epoch": 0.4348318462594372, "grad_norm": 0.9181333554753565, "learning_rate": 6.284474951434497e-06, "loss": 0.3263, "step": 12671 }, { "epoch": 0.43486616334934797, "grad_norm": 0.7593529948836246, "learning_rate": 6.283937855464088e-06, "loss": 0.2955, "step": 12672 }, { "epoch": 0.4349004804392588, "grad_norm": 0.6893357503975422, "learning_rate": 6.283400743632124e-06, "loss": 0.2479, "step": 12673 }, { "epoch": 0.4349347975291695, "grad_norm": 0.7796995348595963, "learning_rate": 6.282863615945241e-06, "loss": 0.2542, "step": 12674 }, { "epoch": 0.4349691146190803, "grad_norm": 0.7672282858102191, "learning_rate": 6.282326472410073e-06, "loss": 0.2716, "step": 12675 }, { "epoch": 0.4350034317089911, "grad_norm": 0.753555732813169, "learning_rate": 6.281789313033257e-06, "loss": 0.2931, "step": 12676 }, { "epoch": 0.43503774879890184, "grad_norm": 0.7778004806735874, "learning_rate": 6.281252137821429e-06, "loss": 0.2873, "step": 12677 }, { "epoch": 0.43507206588881264, "grad_norm": 0.729443338750445, "learning_rate": 6.280714946781223e-06, "loss": 0.2784, "step": 12678 }, { "epoch": 0.4351063829787234, "grad_norm": 0.7096750051740632, "learning_rate": 6.280177739919278e-06, "loss": 0.3066, "step": 12679 }, { "epoch": 0.4351407000686342, "grad_norm": 0.68327745018418, "learning_rate": 6.279640517242229e-06, "loss": 0.2665, "step": 12680 }, { "epoch": 0.43517501715854495, "grad_norm": 0.7482557698430786, "learning_rate": 6.279103278756714e-06, "loss": 0.2761, "step": 12681 }, { "epoch": 0.4352093342484557, "grad_norm": 0.828803462528451, "learning_rate": 6.278566024469368e-06, "loss": 0.2961, "step": 12682 }, { "epoch": 0.4352436513383665, "grad_norm": 0.812741199044618, "learning_rate": 6.278028754386831e-06, "loss": 0.3067, "step": 12683 }, { "epoch": 0.43527796842827726, "grad_norm": 0.7725024984459424, "learning_rate": 6.277491468515738e-06, "loss": 0.2861, "step": 12684 }, { "epoch": 0.43531228551818807, "grad_norm": 0.7315467422379649, "learning_rate": 6.276954166862728e-06, "loss": 0.3294, "step": 12685 }, { "epoch": 0.4353466026080988, "grad_norm": 0.8681650643707035, "learning_rate": 6.276416849434437e-06, "loss": 0.2694, "step": 12686 }, { "epoch": 0.4353809196980096, "grad_norm": 0.872680565250439, "learning_rate": 6.275879516237504e-06, "loss": 0.3064, "step": 12687 }, { "epoch": 0.4354152367879204, "grad_norm": 0.781298234757485, "learning_rate": 6.275342167278569e-06, "loss": 0.352, "step": 12688 }, { "epoch": 0.4354495538778312, "grad_norm": 0.783834081563556, "learning_rate": 6.274804802564266e-06, "loss": 0.2959, "step": 12689 }, { "epoch": 0.43548387096774194, "grad_norm": 0.7896462187749126, "learning_rate": 6.2742674221012365e-06, "loss": 0.3028, "step": 12690 }, { "epoch": 0.4355181880576527, "grad_norm": 0.7353834766532966, "learning_rate": 6.273730025896119e-06, "loss": 0.3118, "step": 12691 }, { "epoch": 0.4355525051475635, "grad_norm": 0.7496463411896291, "learning_rate": 6.273192613955553e-06, "loss": 0.2964, "step": 12692 }, { "epoch": 0.43558682223747425, "grad_norm": 0.9144299904127682, "learning_rate": 6.272655186286176e-06, "loss": 0.2704, "step": 12693 }, { "epoch": 0.43562113932738505, "grad_norm": 0.7343515229119498, "learning_rate": 6.2721177428946276e-06, "loss": 0.2372, "step": 12694 }, { "epoch": 0.4356554564172958, "grad_norm": 0.9358340859425012, "learning_rate": 6.2715802837875475e-06, "loss": 0.3108, "step": 12695 }, { "epoch": 0.4356897735072066, "grad_norm": 0.859745585162722, "learning_rate": 6.271042808971576e-06, "loss": 0.3543, "step": 12696 }, { "epoch": 0.43572409059711736, "grad_norm": 0.7414063785518377, "learning_rate": 6.270505318453352e-06, "loss": 0.2819, "step": 12697 }, { "epoch": 0.4357584076870281, "grad_norm": 0.7919355822998339, "learning_rate": 6.269967812239516e-06, "loss": 0.3201, "step": 12698 }, { "epoch": 0.4357927247769389, "grad_norm": 0.7926679976583465, "learning_rate": 6.269430290336709e-06, "loss": 0.2902, "step": 12699 }, { "epoch": 0.4358270418668497, "grad_norm": 0.7092634910050478, "learning_rate": 6.268892752751571e-06, "loss": 0.2797, "step": 12700 }, { "epoch": 0.4358613589567605, "grad_norm": 0.7037591313940275, "learning_rate": 6.268355199490741e-06, "loss": 0.2879, "step": 12701 }, { "epoch": 0.43589567604667123, "grad_norm": 0.8715171707632882, "learning_rate": 6.267817630560863e-06, "loss": 0.3093, "step": 12702 }, { "epoch": 0.43592999313658204, "grad_norm": 0.7108362153162745, "learning_rate": 6.267280045968576e-06, "loss": 0.3126, "step": 12703 }, { "epoch": 0.4359643102264928, "grad_norm": 0.75240617962246, "learning_rate": 6.2667424457205195e-06, "loss": 0.2965, "step": 12704 }, { "epoch": 0.43599862731640354, "grad_norm": 0.7882950388583853, "learning_rate": 6.266204829823339e-06, "loss": 0.2849, "step": 12705 }, { "epoch": 0.43603294440631435, "grad_norm": 0.7972646008974249, "learning_rate": 6.265667198283674e-06, "loss": 0.2967, "step": 12706 }, { "epoch": 0.4360672614962251, "grad_norm": 0.7601911773837515, "learning_rate": 6.265129551108164e-06, "loss": 0.3075, "step": 12707 }, { "epoch": 0.4361015785861359, "grad_norm": 0.7066859531855241, "learning_rate": 6.2645918883034575e-06, "loss": 0.295, "step": 12708 }, { "epoch": 0.43613589567604666, "grad_norm": 0.7120068499681066, "learning_rate": 6.264054209876189e-06, "loss": 0.2306, "step": 12709 }, { "epoch": 0.43617021276595747, "grad_norm": 0.7322765380169819, "learning_rate": 6.263516515833006e-06, "loss": 0.3121, "step": 12710 }, { "epoch": 0.4362045298558682, "grad_norm": 0.7410917821746589, "learning_rate": 6.26297880618055e-06, "loss": 0.2821, "step": 12711 }, { "epoch": 0.436238846945779, "grad_norm": 0.7557107828417602, "learning_rate": 6.262441080925462e-06, "loss": 0.3085, "step": 12712 }, { "epoch": 0.4362731640356898, "grad_norm": 0.9070210530994145, "learning_rate": 6.261903340074386e-06, "loss": 0.2872, "step": 12713 }, { "epoch": 0.4363074811256005, "grad_norm": 0.7936359539059068, "learning_rate": 6.261365583633967e-06, "loss": 0.2452, "step": 12714 }, { "epoch": 0.43634179821551133, "grad_norm": 0.8054409737343382, "learning_rate": 6.260827811610848e-06, "loss": 0.3225, "step": 12715 }, { "epoch": 0.4363761153054221, "grad_norm": 0.8563228371620236, "learning_rate": 6.260290024011669e-06, "loss": 0.3128, "step": 12716 }, { "epoch": 0.4364104323953329, "grad_norm": 0.7442142110208337, "learning_rate": 6.259752220843077e-06, "loss": 0.2636, "step": 12717 }, { "epoch": 0.43644474948524364, "grad_norm": 0.9209755850312065, "learning_rate": 6.259214402111715e-06, "loss": 0.2845, "step": 12718 }, { "epoch": 0.43647906657515445, "grad_norm": 0.844114854904679, "learning_rate": 6.258676567824227e-06, "loss": 0.3325, "step": 12719 }, { "epoch": 0.4365133836650652, "grad_norm": 0.7174841780623549, "learning_rate": 6.258138717987257e-06, "loss": 0.2704, "step": 12720 }, { "epoch": 0.43654770075497595, "grad_norm": 0.689602056487654, "learning_rate": 6.2576008526074515e-06, "loss": 0.2903, "step": 12721 }, { "epoch": 0.43658201784488676, "grad_norm": 0.822369352824237, "learning_rate": 6.257062971691452e-06, "loss": 0.3004, "step": 12722 }, { "epoch": 0.4366163349347975, "grad_norm": 0.7144718634210205, "learning_rate": 6.256525075245906e-06, "loss": 0.2717, "step": 12723 }, { "epoch": 0.4366506520247083, "grad_norm": 0.8602151590795418, "learning_rate": 6.255987163277458e-06, "loss": 0.2883, "step": 12724 }, { "epoch": 0.43668496911461907, "grad_norm": 0.6744882944139293, "learning_rate": 6.255449235792752e-06, "loss": 0.2845, "step": 12725 }, { "epoch": 0.4367192862045299, "grad_norm": 0.9490296974558444, "learning_rate": 6.2549112927984335e-06, "loss": 0.263, "step": 12726 }, { "epoch": 0.43675360329444063, "grad_norm": 0.6892579152608055, "learning_rate": 6.2543733343011505e-06, "loss": 0.3285, "step": 12727 }, { "epoch": 0.4367879203843514, "grad_norm": 0.7170846574111367, "learning_rate": 6.253835360307549e-06, "loss": 0.2961, "step": 12728 }, { "epoch": 0.4368222374742622, "grad_norm": 0.7059852009540868, "learning_rate": 6.253297370824271e-06, "loss": 0.31, "step": 12729 }, { "epoch": 0.43685655456417294, "grad_norm": 0.7075853669226044, "learning_rate": 6.252759365857965e-06, "loss": 0.2875, "step": 12730 }, { "epoch": 0.43689087165408375, "grad_norm": 0.8607600583426013, "learning_rate": 6.252221345415281e-06, "loss": 0.3022, "step": 12731 }, { "epoch": 0.4369251887439945, "grad_norm": 0.7993774302855571, "learning_rate": 6.251683309502858e-06, "loss": 0.3309, "step": 12732 }, { "epoch": 0.4369595058339053, "grad_norm": 0.7282461704925838, "learning_rate": 6.251145258127349e-06, "loss": 0.2926, "step": 12733 }, { "epoch": 0.43699382292381606, "grad_norm": 0.8771223270787174, "learning_rate": 6.250607191295399e-06, "loss": 0.3218, "step": 12734 }, { "epoch": 0.43702814001372686, "grad_norm": 0.7473393505606891, "learning_rate": 6.250069109013654e-06, "loss": 0.2841, "step": 12735 }, { "epoch": 0.4370624571036376, "grad_norm": 0.7578093049014307, "learning_rate": 6.2495310112887645e-06, "loss": 0.2886, "step": 12736 }, { "epoch": 0.43709677419354837, "grad_norm": 0.8340418028736686, "learning_rate": 6.248992898127375e-06, "loss": 0.2723, "step": 12737 }, { "epoch": 0.43713109128345917, "grad_norm": 0.7033361696196108, "learning_rate": 6.2484547695361356e-06, "loss": 0.2262, "step": 12738 }, { "epoch": 0.4371654083733699, "grad_norm": 0.8487120633691347, "learning_rate": 6.247916625521691e-06, "loss": 0.3117, "step": 12739 }, { "epoch": 0.43719972546328073, "grad_norm": 0.7913247510681637, "learning_rate": 6.247378466090694e-06, "loss": 0.2575, "step": 12740 }, { "epoch": 0.4372340425531915, "grad_norm": 1.014242823834442, "learning_rate": 6.246840291249787e-06, "loss": 0.3151, "step": 12741 }, { "epoch": 0.4372683596431023, "grad_norm": 0.7646438627954002, "learning_rate": 6.246302101005625e-06, "loss": 0.3901, "step": 12742 }, { "epoch": 0.43730267673301304, "grad_norm": 1.4043419028751043, "learning_rate": 6.245763895364851e-06, "loss": 0.375, "step": 12743 }, { "epoch": 0.4373369938229238, "grad_norm": 0.7638995109781205, "learning_rate": 6.245225674334119e-06, "loss": 0.3238, "step": 12744 }, { "epoch": 0.4373713109128346, "grad_norm": 0.7373039600229959, "learning_rate": 6.244687437920075e-06, "loss": 0.3223, "step": 12745 }, { "epoch": 0.43740562800274535, "grad_norm": 0.7785663599301115, "learning_rate": 6.244149186129368e-06, "loss": 0.3358, "step": 12746 }, { "epoch": 0.43743994509265616, "grad_norm": 0.8444822366795223, "learning_rate": 6.24361091896865e-06, "loss": 0.309, "step": 12747 }, { "epoch": 0.4374742621825669, "grad_norm": 0.6853230060155648, "learning_rate": 6.243072636444566e-06, "loss": 0.3267, "step": 12748 }, { "epoch": 0.4375085792724777, "grad_norm": 0.7580805393570352, "learning_rate": 6.2425343385637705e-06, "loss": 0.2914, "step": 12749 }, { "epoch": 0.43754289636238847, "grad_norm": 0.8970432906397512, "learning_rate": 6.241996025332913e-06, "loss": 0.2535, "step": 12750 }, { "epoch": 0.4375772134522992, "grad_norm": 0.6954761895362364, "learning_rate": 6.241457696758642e-06, "loss": 0.336, "step": 12751 }, { "epoch": 0.43761153054221, "grad_norm": 0.812827923424157, "learning_rate": 6.240919352847608e-06, "loss": 0.3107, "step": 12752 }, { "epoch": 0.4376458476321208, "grad_norm": 0.8270665530670956, "learning_rate": 6.240380993606463e-06, "loss": 0.2552, "step": 12753 }, { "epoch": 0.4376801647220316, "grad_norm": 0.9531737261265656, "learning_rate": 6.239842619041856e-06, "loss": 0.2942, "step": 12754 }, { "epoch": 0.43771448181194234, "grad_norm": 0.7069010096061518, "learning_rate": 6.239304229160439e-06, "loss": 0.2449, "step": 12755 }, { "epoch": 0.43774879890185314, "grad_norm": 0.8024889467839503, "learning_rate": 6.238765823968863e-06, "loss": 0.2885, "step": 12756 }, { "epoch": 0.4377831159917639, "grad_norm": 0.7106171953193804, "learning_rate": 6.23822740347378e-06, "loss": 0.3125, "step": 12757 }, { "epoch": 0.4378174330816747, "grad_norm": 0.7105552465919633, "learning_rate": 6.237688967681841e-06, "loss": 0.3113, "step": 12758 }, { "epoch": 0.43785175017158545, "grad_norm": 0.9511343680490435, "learning_rate": 6.2371505165996975e-06, "loss": 0.2564, "step": 12759 }, { "epoch": 0.4378860672614962, "grad_norm": 0.9049278377106771, "learning_rate": 6.236612050234002e-06, "loss": 0.3096, "step": 12760 }, { "epoch": 0.437920384351407, "grad_norm": 0.7796642635742183, "learning_rate": 6.236073568591406e-06, "loss": 0.285, "step": 12761 }, { "epoch": 0.43795470144131776, "grad_norm": 0.7396220058002713, "learning_rate": 6.235535071678564e-06, "loss": 0.3182, "step": 12762 }, { "epoch": 0.43798901853122857, "grad_norm": 0.8551785266860782, "learning_rate": 6.234996559502125e-06, "loss": 0.2602, "step": 12763 }, { "epoch": 0.4380233356211393, "grad_norm": 0.7761839385247405, "learning_rate": 6.234458032068742e-06, "loss": 0.2676, "step": 12764 }, { "epoch": 0.4380576527110501, "grad_norm": 0.8652940103904713, "learning_rate": 6.233919489385071e-06, "loss": 0.3161, "step": 12765 }, { "epoch": 0.4380919698009609, "grad_norm": 0.9762137219962675, "learning_rate": 6.233380931457764e-06, "loss": 0.2622, "step": 12766 }, { "epoch": 0.43812628689087163, "grad_norm": 0.7881397804529322, "learning_rate": 6.232842358293473e-06, "loss": 0.2563, "step": 12767 }, { "epoch": 0.43816060398078244, "grad_norm": 0.790266467237306, "learning_rate": 6.232303769898851e-06, "loss": 0.278, "step": 12768 }, { "epoch": 0.4381949210706932, "grad_norm": 0.9016465608805602, "learning_rate": 6.2317651662805545e-06, "loss": 0.3494, "step": 12769 }, { "epoch": 0.438229238160604, "grad_norm": 0.8183631162185041, "learning_rate": 6.231226547445235e-06, "loss": 0.3077, "step": 12770 }, { "epoch": 0.43826355525051475, "grad_norm": 0.8387715725762829, "learning_rate": 6.230687913399545e-06, "loss": 0.2955, "step": 12771 }, { "epoch": 0.43829787234042555, "grad_norm": 0.7064664374468368, "learning_rate": 6.230149264150142e-06, "loss": 0.287, "step": 12772 }, { "epoch": 0.4383321894303363, "grad_norm": 0.809770609209181, "learning_rate": 6.229610599703679e-06, "loss": 0.3003, "step": 12773 }, { "epoch": 0.43836650652024706, "grad_norm": 0.7410759638993545, "learning_rate": 6.229071920066811e-06, "loss": 0.2653, "step": 12774 }, { "epoch": 0.43840082361015786, "grad_norm": 0.8101749515882752, "learning_rate": 6.228533225246191e-06, "loss": 0.3041, "step": 12775 }, { "epoch": 0.4384351407000686, "grad_norm": 0.7758552128324079, "learning_rate": 6.227994515248479e-06, "loss": 0.3182, "step": 12776 }, { "epoch": 0.4384694577899794, "grad_norm": 0.7366980217959465, "learning_rate": 6.227455790080323e-06, "loss": 0.2937, "step": 12777 }, { "epoch": 0.4385037748798902, "grad_norm": 0.8145459738640372, "learning_rate": 6.226917049748382e-06, "loss": 0.3634, "step": 12778 }, { "epoch": 0.438538091969801, "grad_norm": 0.7915418588519034, "learning_rate": 6.226378294259312e-06, "loss": 0.3152, "step": 12779 }, { "epoch": 0.43857240905971173, "grad_norm": 0.6899590044259822, "learning_rate": 6.225839523619769e-06, "loss": 0.2954, "step": 12780 }, { "epoch": 0.43860672614962254, "grad_norm": 0.8669486060571843, "learning_rate": 6.225300737836406e-06, "loss": 0.3294, "step": 12781 }, { "epoch": 0.4386410432395333, "grad_norm": 0.847655707337745, "learning_rate": 6.224761936915883e-06, "loss": 0.3118, "step": 12782 }, { "epoch": 0.43867536032944404, "grad_norm": 0.7545079527826302, "learning_rate": 6.224223120864853e-06, "loss": 0.3061, "step": 12783 }, { "epoch": 0.43870967741935485, "grad_norm": 0.7247981125232441, "learning_rate": 6.223684289689973e-06, "loss": 0.2758, "step": 12784 }, { "epoch": 0.4387439945092656, "grad_norm": 0.8010561812418269, "learning_rate": 6.223145443397901e-06, "loss": 0.2747, "step": 12785 }, { "epoch": 0.4387783115991764, "grad_norm": 0.7923097811440143, "learning_rate": 6.222606581995293e-06, "loss": 0.3471, "step": 12786 }, { "epoch": 0.43881262868908716, "grad_norm": 0.8209953706119854, "learning_rate": 6.222067705488806e-06, "loss": 0.2936, "step": 12787 }, { "epoch": 0.43884694577899797, "grad_norm": 0.841838558813992, "learning_rate": 6.2215288138850985e-06, "loss": 0.3575, "step": 12788 }, { "epoch": 0.4388812628689087, "grad_norm": 0.9076616159335984, "learning_rate": 6.220989907190826e-06, "loss": 0.2886, "step": 12789 }, { "epoch": 0.43891557995881947, "grad_norm": 0.7602401104669678, "learning_rate": 6.220450985412645e-06, "loss": 0.2682, "step": 12790 }, { "epoch": 0.4389498970487303, "grad_norm": 0.7187097713886218, "learning_rate": 6.219912048557216e-06, "loss": 0.3493, "step": 12791 }, { "epoch": 0.438984214138641, "grad_norm": 0.6921889566187528, "learning_rate": 6.219373096631197e-06, "loss": 0.2521, "step": 12792 }, { "epoch": 0.43901853122855183, "grad_norm": 0.7608753721009138, "learning_rate": 6.218834129641244e-06, "loss": 0.2668, "step": 12793 }, { "epoch": 0.4390528483184626, "grad_norm": 0.7722919237003909, "learning_rate": 6.2182951475940154e-06, "loss": 0.3366, "step": 12794 }, { "epoch": 0.4390871654083734, "grad_norm": 0.7533303157179873, "learning_rate": 6.217756150496174e-06, "loss": 0.2917, "step": 12795 }, { "epoch": 0.43912148249828414, "grad_norm": 0.7986104949907321, "learning_rate": 6.217217138354372e-06, "loss": 0.2828, "step": 12796 }, { "epoch": 0.4391557995881949, "grad_norm": 0.7821459339570553, "learning_rate": 6.216678111175273e-06, "loss": 0.2945, "step": 12797 }, { "epoch": 0.4391901166781057, "grad_norm": 0.7894661788572155, "learning_rate": 6.216139068965534e-06, "loss": 0.2298, "step": 12798 }, { "epoch": 0.43922443376801645, "grad_norm": 0.7536043775947653, "learning_rate": 6.215600011731815e-06, "loss": 0.2646, "step": 12799 }, { "epoch": 0.43925875085792726, "grad_norm": 0.7899601490252325, "learning_rate": 6.215060939480774e-06, "loss": 0.2665, "step": 12800 }, { "epoch": 0.439293067947838, "grad_norm": 0.7995863638617079, "learning_rate": 6.214521852219072e-06, "loss": 0.2548, "step": 12801 }, { "epoch": 0.4393273850377488, "grad_norm": 0.8288341202493846, "learning_rate": 6.213982749953369e-06, "loss": 0.3213, "step": 12802 }, { "epoch": 0.43936170212765957, "grad_norm": 0.7206134246600292, "learning_rate": 6.213443632690324e-06, "loss": 0.2781, "step": 12803 }, { "epoch": 0.4393960192175704, "grad_norm": 0.8162959093539522, "learning_rate": 6.2129045004366e-06, "loss": 0.3314, "step": 12804 }, { "epoch": 0.43943033630748113, "grad_norm": 0.757895232992164, "learning_rate": 6.212365353198853e-06, "loss": 0.2411, "step": 12805 }, { "epoch": 0.4394646533973919, "grad_norm": 0.7480840562974612, "learning_rate": 6.211826190983744e-06, "loss": 0.2634, "step": 12806 }, { "epoch": 0.4394989704873027, "grad_norm": 0.7097759158937834, "learning_rate": 6.211287013797938e-06, "loss": 0.2949, "step": 12807 }, { "epoch": 0.43953328757721344, "grad_norm": 0.8284562072307657, "learning_rate": 6.2107478216480906e-06, "loss": 0.3719, "step": 12808 }, { "epoch": 0.43956760466712425, "grad_norm": 0.79199639807029, "learning_rate": 6.2102086145408665e-06, "loss": 0.3096, "step": 12809 }, { "epoch": 0.439601921757035, "grad_norm": 0.7846599494687293, "learning_rate": 6.2096693924829265e-06, "loss": 0.2921, "step": 12810 }, { "epoch": 0.4396362388469458, "grad_norm": 0.7655133235414704, "learning_rate": 6.209130155480932e-06, "loss": 0.2685, "step": 12811 }, { "epoch": 0.43967055593685656, "grad_norm": 0.7834950039380466, "learning_rate": 6.208590903541544e-06, "loss": 0.2807, "step": 12812 }, { "epoch": 0.4397048730267673, "grad_norm": 0.7979832043986582, "learning_rate": 6.208051636671423e-06, "loss": 0.23, "step": 12813 }, { "epoch": 0.4397391901166781, "grad_norm": 0.8047045119485163, "learning_rate": 6.207512354877234e-06, "loss": 0.3154, "step": 12814 }, { "epoch": 0.43977350720658887, "grad_norm": 0.7706549273299462, "learning_rate": 6.206973058165636e-06, "loss": 0.2932, "step": 12815 }, { "epoch": 0.43980782429649967, "grad_norm": 0.8397539683629992, "learning_rate": 6.206433746543294e-06, "loss": 0.2758, "step": 12816 }, { "epoch": 0.4398421413864104, "grad_norm": 0.8847761529113326, "learning_rate": 6.205894420016871e-06, "loss": 0.3288, "step": 12817 }, { "epoch": 0.43987645847632123, "grad_norm": 0.812844098418063, "learning_rate": 6.205355078593027e-06, "loss": 0.2833, "step": 12818 }, { "epoch": 0.439910775566232, "grad_norm": 0.7574714469574226, "learning_rate": 6.204815722278425e-06, "loss": 0.3018, "step": 12819 }, { "epoch": 0.43994509265614273, "grad_norm": 0.7648595878673191, "learning_rate": 6.204276351079732e-06, "loss": 0.2855, "step": 12820 }, { "epoch": 0.43997940974605354, "grad_norm": 0.8315285981879466, "learning_rate": 6.203736965003608e-06, "loss": 0.2639, "step": 12821 }, { "epoch": 0.4400137268359643, "grad_norm": 0.8005228383580647, "learning_rate": 6.203197564056717e-06, "loss": 0.2906, "step": 12822 }, { "epoch": 0.4400480439258751, "grad_norm": 0.7656328730423643, "learning_rate": 6.202658148245722e-06, "loss": 0.2518, "step": 12823 }, { "epoch": 0.44008236101578585, "grad_norm": 0.645828118232008, "learning_rate": 6.202118717577289e-06, "loss": 0.2508, "step": 12824 }, { "epoch": 0.44011667810569666, "grad_norm": 0.7743946281413574, "learning_rate": 6.20157927205808e-06, "loss": 0.2576, "step": 12825 }, { "epoch": 0.4401509951956074, "grad_norm": 0.7124716421062837, "learning_rate": 6.20103981169476e-06, "loss": 0.2749, "step": 12826 }, { "epoch": 0.4401853122855182, "grad_norm": 0.8372705508564302, "learning_rate": 6.200500336493994e-06, "loss": 0.3318, "step": 12827 }, { "epoch": 0.44021962937542897, "grad_norm": 0.7474941585315957, "learning_rate": 6.199960846462445e-06, "loss": 0.3059, "step": 12828 }, { "epoch": 0.4402539464653397, "grad_norm": 0.738653483404351, "learning_rate": 6.199421341606779e-06, "loss": 0.2602, "step": 12829 }, { "epoch": 0.4402882635552505, "grad_norm": 1.0593823178928505, "learning_rate": 6.1988818219336625e-06, "loss": 0.3188, "step": 12830 }, { "epoch": 0.4403225806451613, "grad_norm": 0.7482603609129039, "learning_rate": 6.198342287449757e-06, "loss": 0.2881, "step": 12831 }, { "epoch": 0.4403568977350721, "grad_norm": 0.787381408175612, "learning_rate": 6.197802738161729e-06, "loss": 0.333, "step": 12832 }, { "epoch": 0.44039121482498284, "grad_norm": 0.7343242441759011, "learning_rate": 6.197263174076247e-06, "loss": 0.3129, "step": 12833 }, { "epoch": 0.44042553191489364, "grad_norm": 0.7612912676946907, "learning_rate": 6.196723595199973e-06, "loss": 0.2849, "step": 12834 }, { "epoch": 0.4404598490048044, "grad_norm": 0.7921277551974639, "learning_rate": 6.1961840015395735e-06, "loss": 0.3188, "step": 12835 }, { "epoch": 0.44049416609471514, "grad_norm": 0.9344977631766191, "learning_rate": 6.195644393101718e-06, "loss": 0.2916, "step": 12836 }, { "epoch": 0.44052848318462595, "grad_norm": 0.7572402607197168, "learning_rate": 6.195104769893067e-06, "loss": 0.2605, "step": 12837 }, { "epoch": 0.4405628002745367, "grad_norm": 0.7234657259008731, "learning_rate": 6.19456513192029e-06, "loss": 0.2887, "step": 12838 }, { "epoch": 0.4405971173644475, "grad_norm": 0.675480033842551, "learning_rate": 6.194025479190055e-06, "loss": 0.352, "step": 12839 }, { "epoch": 0.44063143445435826, "grad_norm": 0.8376150034915614, "learning_rate": 6.193485811709028e-06, "loss": 0.2587, "step": 12840 }, { "epoch": 0.44066575154426907, "grad_norm": 0.748215662017208, "learning_rate": 6.192946129483873e-06, "loss": 0.2699, "step": 12841 }, { "epoch": 0.4407000686341798, "grad_norm": 0.7477756992167839, "learning_rate": 6.1924064325212605e-06, "loss": 0.2976, "step": 12842 }, { "epoch": 0.44073438572409057, "grad_norm": 0.7711803278669984, "learning_rate": 6.191866720827858e-06, "loss": 0.2882, "step": 12843 }, { "epoch": 0.4407687028140014, "grad_norm": 0.7390287950491212, "learning_rate": 6.1913269944103305e-06, "loss": 0.2747, "step": 12844 }, { "epoch": 0.44080301990391213, "grad_norm": 0.6818091460619348, "learning_rate": 6.190787253275346e-06, "loss": 0.2897, "step": 12845 }, { "epoch": 0.44083733699382294, "grad_norm": 0.7634321939477414, "learning_rate": 6.190247497429576e-06, "loss": 0.277, "step": 12846 }, { "epoch": 0.4408716540837337, "grad_norm": 0.7618011809473041, "learning_rate": 6.189707726879683e-06, "loss": 0.3391, "step": 12847 }, { "epoch": 0.4409059711736445, "grad_norm": 0.7773626381753074, "learning_rate": 6.189167941632339e-06, "loss": 0.2858, "step": 12848 }, { "epoch": 0.44094028826355525, "grad_norm": 0.8124832671541, "learning_rate": 6.188628141694213e-06, "loss": 0.2588, "step": 12849 }, { "epoch": 0.44097460535346605, "grad_norm": 0.7105012559140078, "learning_rate": 6.1880883270719705e-06, "loss": 0.2983, "step": 12850 }, { "epoch": 0.4410089224433768, "grad_norm": 0.9266571375624009, "learning_rate": 6.187548497772282e-06, "loss": 0.326, "step": 12851 }, { "epoch": 0.44104323953328756, "grad_norm": 0.7500761759768707, "learning_rate": 6.187008653801817e-06, "loss": 0.2959, "step": 12852 }, { "epoch": 0.44107755662319836, "grad_norm": 0.7481665571802464, "learning_rate": 6.186468795167243e-06, "loss": 0.2793, "step": 12853 }, { "epoch": 0.4411118737131091, "grad_norm": 1.1042567792436824, "learning_rate": 6.18592892187523e-06, "loss": 0.2701, "step": 12854 }, { "epoch": 0.4411461908030199, "grad_norm": 0.7063312800818875, "learning_rate": 6.18538903393245e-06, "loss": 0.2988, "step": 12855 }, { "epoch": 0.4411805078929307, "grad_norm": 0.756877776437833, "learning_rate": 6.184849131345569e-06, "loss": 0.2716, "step": 12856 }, { "epoch": 0.4412148249828415, "grad_norm": 0.7547572769663232, "learning_rate": 6.184309214121259e-06, "loss": 0.2988, "step": 12857 }, { "epoch": 0.44124914207275223, "grad_norm": 0.855957994442733, "learning_rate": 6.183769282266189e-06, "loss": 0.3008, "step": 12858 }, { "epoch": 0.441283459162663, "grad_norm": 0.7628902600583746, "learning_rate": 6.18322933578703e-06, "loss": 0.2804, "step": 12859 }, { "epoch": 0.4413177762525738, "grad_norm": 0.8334944544139917, "learning_rate": 6.182689374690451e-06, "loss": 0.3786, "step": 12860 }, { "epoch": 0.44135209334248454, "grad_norm": 0.7343079799250032, "learning_rate": 6.182149398983125e-06, "loss": 0.2672, "step": 12861 }, { "epoch": 0.44138641043239535, "grad_norm": 0.8834047712205334, "learning_rate": 6.181609408671721e-06, "loss": 0.3646, "step": 12862 }, { "epoch": 0.4414207275223061, "grad_norm": 0.7695398592924819, "learning_rate": 6.181069403762911e-06, "loss": 0.2898, "step": 12863 }, { "epoch": 0.4414550446122169, "grad_norm": 0.73196633454462, "learning_rate": 6.180529384263364e-06, "loss": 0.2875, "step": 12864 }, { "epoch": 0.44148936170212766, "grad_norm": 0.8509313529650865, "learning_rate": 6.179989350179755e-06, "loss": 0.2941, "step": 12865 }, { "epoch": 0.4415236787920384, "grad_norm": 0.7623244586617561, "learning_rate": 6.1794493015187514e-06, "loss": 0.3151, "step": 12866 }, { "epoch": 0.4415579958819492, "grad_norm": 0.8087886845383444, "learning_rate": 6.178909238287027e-06, "loss": 0.3567, "step": 12867 }, { "epoch": 0.44159231297185997, "grad_norm": 0.7230958428240606, "learning_rate": 6.178369160491254e-06, "loss": 0.2714, "step": 12868 }, { "epoch": 0.4416266300617708, "grad_norm": 0.6975645840836139, "learning_rate": 6.177829068138104e-06, "loss": 0.2996, "step": 12869 }, { "epoch": 0.4416609471516815, "grad_norm": 0.8389762219799615, "learning_rate": 6.177288961234248e-06, "loss": 0.3397, "step": 12870 }, { "epoch": 0.44169526424159233, "grad_norm": 0.7198638559653651, "learning_rate": 6.1767488397863595e-06, "loss": 0.2745, "step": 12871 }, { "epoch": 0.4417295813315031, "grad_norm": 0.7288427294853533, "learning_rate": 6.176208703801113e-06, "loss": 0.3213, "step": 12872 }, { "epoch": 0.44176389842141384, "grad_norm": 0.8973372994087185, "learning_rate": 6.175668553285178e-06, "loss": 0.2698, "step": 12873 }, { "epoch": 0.44179821551132464, "grad_norm": 0.7317891280650085, "learning_rate": 6.175128388245228e-06, "loss": 0.2859, "step": 12874 }, { "epoch": 0.4418325326012354, "grad_norm": 0.8483579924324804, "learning_rate": 6.174588208687939e-06, "loss": 0.2878, "step": 12875 }, { "epoch": 0.4418668496911462, "grad_norm": 0.7743957738517326, "learning_rate": 6.17404801461998e-06, "loss": 0.3042, "step": 12876 }, { "epoch": 0.44190116678105695, "grad_norm": 0.7687184801361153, "learning_rate": 6.173507806048027e-06, "loss": 0.2875, "step": 12877 }, { "epoch": 0.44193548387096776, "grad_norm": 0.8105006818772087, "learning_rate": 6.172967582978754e-06, "loss": 0.2775, "step": 12878 }, { "epoch": 0.4419698009608785, "grad_norm": 0.7703058653301974, "learning_rate": 6.172427345418833e-06, "loss": 0.305, "step": 12879 }, { "epoch": 0.4420041180507893, "grad_norm": 0.8625821077339788, "learning_rate": 6.17188709337494e-06, "loss": 0.3191, "step": 12880 }, { "epoch": 0.44203843514070007, "grad_norm": 0.7895051034728559, "learning_rate": 6.171346826853749e-06, "loss": 0.2935, "step": 12881 }, { "epoch": 0.4420727522306108, "grad_norm": 0.8224488521003125, "learning_rate": 6.170806545861931e-06, "loss": 0.3247, "step": 12882 }, { "epoch": 0.44210706932052163, "grad_norm": 0.964331057662592, "learning_rate": 6.170266250406165e-06, "loss": 0.2582, "step": 12883 }, { "epoch": 0.4421413864104324, "grad_norm": 0.7507310390027908, "learning_rate": 6.169725940493123e-06, "loss": 0.2917, "step": 12884 }, { "epoch": 0.4421757035003432, "grad_norm": 0.7449064061716258, "learning_rate": 6.169185616129481e-06, "loss": 0.2306, "step": 12885 }, { "epoch": 0.44221002059025394, "grad_norm": 0.7455034926807889, "learning_rate": 6.168645277321915e-06, "loss": 0.2517, "step": 12886 }, { "epoch": 0.44224433768016475, "grad_norm": 0.7627138157556127, "learning_rate": 6.1681049240770986e-06, "loss": 0.2821, "step": 12887 }, { "epoch": 0.4422786547700755, "grad_norm": 0.8425492950547577, "learning_rate": 6.1675645564017086e-06, "loss": 0.2923, "step": 12888 }, { "epoch": 0.44231297185998625, "grad_norm": 0.6518180751515033, "learning_rate": 6.167024174302417e-06, "loss": 0.2873, "step": 12889 }, { "epoch": 0.44234728894989706, "grad_norm": 0.763872255710711, "learning_rate": 6.1664837777859035e-06, "loss": 0.3088, "step": 12890 }, { "epoch": 0.4423816060398078, "grad_norm": 0.8061990073486537, "learning_rate": 6.1659433668588445e-06, "loss": 0.2432, "step": 12891 }, { "epoch": 0.4424159231297186, "grad_norm": 0.7535946482265975, "learning_rate": 6.165402941527912e-06, "loss": 0.3173, "step": 12892 }, { "epoch": 0.44245024021962936, "grad_norm": 0.8334699090270127, "learning_rate": 6.164862501799787e-06, "loss": 0.2917, "step": 12893 }, { "epoch": 0.44248455730954017, "grad_norm": 0.7286060543592138, "learning_rate": 6.164322047681144e-06, "loss": 0.2278, "step": 12894 }, { "epoch": 0.4425188743994509, "grad_norm": 0.9460504308281674, "learning_rate": 6.163781579178658e-06, "loss": 0.29, "step": 12895 }, { "epoch": 0.4425531914893617, "grad_norm": 0.8474133080986089, "learning_rate": 6.163241096299008e-06, "loss": 0.2693, "step": 12896 }, { "epoch": 0.4425875085792725, "grad_norm": 0.9280535961242943, "learning_rate": 6.162700599048871e-06, "loss": 0.3961, "step": 12897 }, { "epoch": 0.44262182566918323, "grad_norm": 0.75016811088376, "learning_rate": 6.162160087434923e-06, "loss": 0.2576, "step": 12898 }, { "epoch": 0.44265614275909404, "grad_norm": 0.9026568547329011, "learning_rate": 6.161619561463842e-06, "loss": 0.2961, "step": 12899 }, { "epoch": 0.4426904598490048, "grad_norm": 1.2088813140834163, "learning_rate": 6.161079021142305e-06, "loss": 0.2821, "step": 12900 }, { "epoch": 0.4427247769389156, "grad_norm": 0.6680796231607683, "learning_rate": 6.160538466476993e-06, "loss": 0.2655, "step": 12901 }, { "epoch": 0.44275909402882635, "grad_norm": 0.7238904089569108, "learning_rate": 6.15999789747458e-06, "loss": 0.2663, "step": 12902 }, { "epoch": 0.44279341111873716, "grad_norm": 0.709191595837562, "learning_rate": 6.159457314141745e-06, "loss": 0.2876, "step": 12903 }, { "epoch": 0.4428277282086479, "grad_norm": 0.9108989631556987, "learning_rate": 6.158916716485166e-06, "loss": 0.3016, "step": 12904 }, { "epoch": 0.44286204529855866, "grad_norm": 0.6915185953914059, "learning_rate": 6.158376104511523e-06, "loss": 0.267, "step": 12905 }, { "epoch": 0.44289636238846947, "grad_norm": 0.8174671238674891, "learning_rate": 6.157835478227493e-06, "loss": 0.2921, "step": 12906 }, { "epoch": 0.4429306794783802, "grad_norm": 0.8355114030028598, "learning_rate": 6.157294837639758e-06, "loss": 0.3055, "step": 12907 }, { "epoch": 0.442964996568291, "grad_norm": 0.7497614212585566, "learning_rate": 6.156754182754991e-06, "loss": 0.2942, "step": 12908 }, { "epoch": 0.4429993136582018, "grad_norm": 0.7218207793736455, "learning_rate": 6.156213513579877e-06, "loss": 0.2719, "step": 12909 }, { "epoch": 0.4430336307481126, "grad_norm": 0.7198256835784352, "learning_rate": 6.155672830121094e-06, "loss": 0.2713, "step": 12910 }, { "epoch": 0.44306794783802333, "grad_norm": 0.841350854934088, "learning_rate": 6.1551321323853185e-06, "loss": 0.2806, "step": 12911 }, { "epoch": 0.4431022649279341, "grad_norm": 0.8353429677459638, "learning_rate": 6.154591420379232e-06, "loss": 0.3252, "step": 12912 }, { "epoch": 0.4431365820178449, "grad_norm": 0.8152083908066639, "learning_rate": 6.1540506941095164e-06, "loss": 0.2916, "step": 12913 }, { "epoch": 0.44317089910775564, "grad_norm": 0.7720735299212967, "learning_rate": 6.153509953582849e-06, "loss": 0.2933, "step": 12914 }, { "epoch": 0.44320521619766645, "grad_norm": 0.9855209543771366, "learning_rate": 6.15296919880591e-06, "loss": 0.3461, "step": 12915 }, { "epoch": 0.4432395332875772, "grad_norm": 0.9078535436520626, "learning_rate": 6.152428429785382e-06, "loss": 0.2877, "step": 12916 }, { "epoch": 0.443273850377488, "grad_norm": 0.7538615510669486, "learning_rate": 6.151887646527945e-06, "loss": 0.2949, "step": 12917 }, { "epoch": 0.44330816746739876, "grad_norm": 0.7101377271306798, "learning_rate": 6.151346849040278e-06, "loss": 0.2438, "step": 12918 }, { "epoch": 0.4433424845573095, "grad_norm": 0.8384595394286929, "learning_rate": 6.150806037329063e-06, "loss": 0.2884, "step": 12919 }, { "epoch": 0.4433768016472203, "grad_norm": 0.7403080170895849, "learning_rate": 6.150265211400982e-06, "loss": 0.2945, "step": 12920 }, { "epoch": 0.44341111873713107, "grad_norm": 0.7844089697794346, "learning_rate": 6.149724371262714e-06, "loss": 0.2869, "step": 12921 }, { "epoch": 0.4434454358270419, "grad_norm": 0.8016381303287263, "learning_rate": 6.149183516920943e-06, "loss": 0.2351, "step": 12922 }, { "epoch": 0.44347975291695263, "grad_norm": 0.8532980514722913, "learning_rate": 6.14864264838235e-06, "loss": 0.2994, "step": 12923 }, { "epoch": 0.44351407000686344, "grad_norm": 0.8186070188124643, "learning_rate": 6.1481017656536155e-06, "loss": 0.2827, "step": 12924 }, { "epoch": 0.4435483870967742, "grad_norm": 0.8615572046870217, "learning_rate": 6.147560868741421e-06, "loss": 0.3042, "step": 12925 }, { "epoch": 0.443582704186685, "grad_norm": 0.7906305694618713, "learning_rate": 6.147019957652452e-06, "loss": 0.2672, "step": 12926 }, { "epoch": 0.44361702127659575, "grad_norm": 0.8070900830650262, "learning_rate": 6.146479032393386e-06, "loss": 0.3239, "step": 12927 }, { "epoch": 0.4436513383665065, "grad_norm": 0.8185202083569649, "learning_rate": 6.145938092970911e-06, "loss": 0.2697, "step": 12928 }, { "epoch": 0.4436856554564173, "grad_norm": 0.8137243498310104, "learning_rate": 6.1453971393917065e-06, "loss": 0.2753, "step": 12929 }, { "epoch": 0.44371997254632806, "grad_norm": 0.7186353609169895, "learning_rate": 6.144856171662454e-06, "loss": 0.2881, "step": 12930 }, { "epoch": 0.44375428963623886, "grad_norm": 0.8198713598300135, "learning_rate": 6.14431518978984e-06, "loss": 0.3088, "step": 12931 }, { "epoch": 0.4437886067261496, "grad_norm": 0.7276321454932031, "learning_rate": 6.143774193780545e-06, "loss": 0.316, "step": 12932 }, { "epoch": 0.4438229238160604, "grad_norm": 0.8541101399739887, "learning_rate": 6.143233183641255e-06, "loss": 0.3012, "step": 12933 }, { "epoch": 0.4438572409059712, "grad_norm": 0.7124882694441736, "learning_rate": 6.14269215937865e-06, "loss": 0.2892, "step": 12934 }, { "epoch": 0.4438915579958819, "grad_norm": 0.7500510154856939, "learning_rate": 6.142151120999416e-06, "loss": 0.2765, "step": 12935 }, { "epoch": 0.44392587508579273, "grad_norm": 0.7899094755354759, "learning_rate": 6.141610068510237e-06, "loss": 0.3021, "step": 12936 }, { "epoch": 0.4439601921757035, "grad_norm": 0.77167528016618, "learning_rate": 6.141069001917797e-06, "loss": 0.3191, "step": 12937 }, { "epoch": 0.4439945092656143, "grad_norm": 0.7523792691628478, "learning_rate": 6.140527921228778e-06, "loss": 0.3039, "step": 12938 }, { "epoch": 0.44402882635552504, "grad_norm": 0.736586519683295, "learning_rate": 6.1399868264498685e-06, "loss": 0.3087, "step": 12939 }, { "epoch": 0.44406314344543585, "grad_norm": 0.7333698076727018, "learning_rate": 6.13944571758775e-06, "loss": 0.3611, "step": 12940 }, { "epoch": 0.4440974605353466, "grad_norm": 0.7882411445478009, "learning_rate": 6.138904594649107e-06, "loss": 0.2957, "step": 12941 }, { "epoch": 0.44413177762525735, "grad_norm": 0.780421092320764, "learning_rate": 6.138363457640626e-06, "loss": 0.2865, "step": 12942 }, { "epoch": 0.44416609471516816, "grad_norm": 0.7134347651889158, "learning_rate": 6.137822306568992e-06, "loss": 0.3121, "step": 12943 }, { "epoch": 0.4442004118050789, "grad_norm": 0.7875966708368132, "learning_rate": 6.1372811414408906e-06, "loss": 0.335, "step": 12944 }, { "epoch": 0.4442347288949897, "grad_norm": 0.7306281551393959, "learning_rate": 6.136739962263006e-06, "loss": 0.3033, "step": 12945 }, { "epoch": 0.44426904598490047, "grad_norm": 0.7270425648292786, "learning_rate": 6.1361987690420254e-06, "loss": 0.2677, "step": 12946 }, { "epoch": 0.4443033630748113, "grad_norm": 0.7259217892652956, "learning_rate": 6.135657561784633e-06, "loss": 0.2894, "step": 12947 }, { "epoch": 0.444337680164722, "grad_norm": 0.7654332119241068, "learning_rate": 6.1351163404975144e-06, "loss": 0.3006, "step": 12948 }, { "epoch": 0.44437199725463283, "grad_norm": 0.7877880184626269, "learning_rate": 6.134575105187358e-06, "loss": 0.2636, "step": 12949 }, { "epoch": 0.4444063143445436, "grad_norm": 1.032129111114548, "learning_rate": 6.1340338558608495e-06, "loss": 0.2955, "step": 12950 }, { "epoch": 0.44444063143445434, "grad_norm": 0.7389401593821942, "learning_rate": 6.133492592524673e-06, "loss": 0.31, "step": 12951 }, { "epoch": 0.44447494852436514, "grad_norm": 0.8715897223633051, "learning_rate": 6.132951315185519e-06, "loss": 0.2987, "step": 12952 }, { "epoch": 0.4445092656142759, "grad_norm": 0.7643219711409666, "learning_rate": 6.132410023850072e-06, "loss": 0.2708, "step": 12953 }, { "epoch": 0.4445435827041867, "grad_norm": 0.7661074811046309, "learning_rate": 6.131868718525018e-06, "loss": 0.3225, "step": 12954 }, { "epoch": 0.44457789979409745, "grad_norm": 0.838270200374358, "learning_rate": 6.131327399217049e-06, "loss": 0.281, "step": 12955 }, { "epoch": 0.44461221688400826, "grad_norm": 0.8261710758178483, "learning_rate": 6.130786065932845e-06, "loss": 0.3475, "step": 12956 }, { "epoch": 0.444646533973919, "grad_norm": 0.7617673901442855, "learning_rate": 6.1302447186790985e-06, "loss": 0.305, "step": 12957 }, { "epoch": 0.44468085106382976, "grad_norm": 0.6846072508337324, "learning_rate": 6.129703357462498e-06, "loss": 0.3105, "step": 12958 }, { "epoch": 0.44471516815374057, "grad_norm": 0.750511412805532, "learning_rate": 6.1291619822897285e-06, "loss": 0.2807, "step": 12959 }, { "epoch": 0.4447494852436513, "grad_norm": 0.847520699724229, "learning_rate": 6.1286205931674784e-06, "loss": 0.334, "step": 12960 }, { "epoch": 0.44478380233356213, "grad_norm": 0.6485338765107626, "learning_rate": 6.128079190102438e-06, "loss": 0.28, "step": 12961 }, { "epoch": 0.4448181194234729, "grad_norm": 0.7404080511969567, "learning_rate": 6.127537773101294e-06, "loss": 0.2578, "step": 12962 }, { "epoch": 0.4448524365133837, "grad_norm": 0.749546349889489, "learning_rate": 6.126996342170736e-06, "loss": 0.2953, "step": 12963 }, { "epoch": 0.44488675360329444, "grad_norm": 0.8220769872167983, "learning_rate": 6.126454897317451e-06, "loss": 0.2785, "step": 12964 }, { "epoch": 0.4449210706932052, "grad_norm": 0.720119760683347, "learning_rate": 6.125913438548129e-06, "loss": 0.2969, "step": 12965 }, { "epoch": 0.444955387783116, "grad_norm": 0.746206703801331, "learning_rate": 6.1253719658694595e-06, "loss": 0.2742, "step": 12966 }, { "epoch": 0.44498970487302675, "grad_norm": 0.837806308597172, "learning_rate": 6.1248304792881305e-06, "loss": 0.3072, "step": 12967 }, { "epoch": 0.44502402196293755, "grad_norm": 0.7426166342767827, "learning_rate": 6.124288978810835e-06, "loss": 0.2807, "step": 12968 }, { "epoch": 0.4450583390528483, "grad_norm": 0.8516587390826116, "learning_rate": 6.123747464444258e-06, "loss": 0.2732, "step": 12969 }, { "epoch": 0.4450926561427591, "grad_norm": 0.7817112765351698, "learning_rate": 6.123205936195092e-06, "loss": 0.318, "step": 12970 }, { "epoch": 0.44512697323266986, "grad_norm": 0.7444651273235754, "learning_rate": 6.122664394070026e-06, "loss": 0.2628, "step": 12971 }, { "epoch": 0.44516129032258067, "grad_norm": 0.7409064356047472, "learning_rate": 6.122122838075749e-06, "loss": 0.3088, "step": 12972 }, { "epoch": 0.4451956074124914, "grad_norm": 1.1360765332726817, "learning_rate": 6.121581268218955e-06, "loss": 0.3112, "step": 12973 }, { "epoch": 0.4452299245024022, "grad_norm": 0.8306281758532602, "learning_rate": 6.121039684506332e-06, "loss": 0.2739, "step": 12974 }, { "epoch": 0.445264241592313, "grad_norm": 0.7289007703701986, "learning_rate": 6.120498086944568e-06, "loss": 0.2726, "step": 12975 }, { "epoch": 0.44529855868222373, "grad_norm": 0.6867259295057169, "learning_rate": 6.119956475540358e-06, "loss": 0.2659, "step": 12976 }, { "epoch": 0.44533287577213454, "grad_norm": 0.6703349748239248, "learning_rate": 6.119414850300392e-06, "loss": 0.2743, "step": 12977 }, { "epoch": 0.4453671928620453, "grad_norm": 0.7583338291638725, "learning_rate": 6.11887321123136e-06, "loss": 0.2769, "step": 12978 }, { "epoch": 0.4454015099519561, "grad_norm": 0.850413533798222, "learning_rate": 6.118331558339953e-06, "loss": 0.3372, "step": 12979 }, { "epoch": 0.44543582704186685, "grad_norm": 0.7971990782987881, "learning_rate": 6.117789891632865e-06, "loss": 0.2974, "step": 12980 }, { "epoch": 0.4454701441317776, "grad_norm": 0.8598888354623939, "learning_rate": 6.1172482111167865e-06, "loss": 0.2882, "step": 12981 }, { "epoch": 0.4455044612216884, "grad_norm": 0.764445346738174, "learning_rate": 6.116706516798407e-06, "loss": 0.3429, "step": 12982 }, { "epoch": 0.44553877831159916, "grad_norm": 1.416513527073039, "learning_rate": 6.1161648086844215e-06, "loss": 0.266, "step": 12983 }, { "epoch": 0.44557309540150997, "grad_norm": 0.7390953578348609, "learning_rate": 6.115623086781522e-06, "loss": 0.2882, "step": 12984 }, { "epoch": 0.4456074124914207, "grad_norm": 0.775519329504378, "learning_rate": 6.115081351096399e-06, "loss": 0.2826, "step": 12985 }, { "epoch": 0.4456417295813315, "grad_norm": 0.8503882999639448, "learning_rate": 6.114539601635746e-06, "loss": 0.3339, "step": 12986 }, { "epoch": 0.4456760466712423, "grad_norm": 0.7623494948070217, "learning_rate": 6.113997838406256e-06, "loss": 0.2778, "step": 12987 }, { "epoch": 0.44571036376115303, "grad_norm": 0.8698036270172336, "learning_rate": 6.113456061414621e-06, "loss": 0.2689, "step": 12988 }, { "epoch": 0.44574468085106383, "grad_norm": 0.8557989348618754, "learning_rate": 6.112914270667534e-06, "loss": 0.306, "step": 12989 }, { "epoch": 0.4457789979409746, "grad_norm": 0.8753422649971577, "learning_rate": 6.112372466171691e-06, "loss": 0.241, "step": 12990 }, { "epoch": 0.4458133150308854, "grad_norm": 0.8220572485729283, "learning_rate": 6.1118306479337805e-06, "loss": 0.2783, "step": 12991 }, { "epoch": 0.44584763212079614, "grad_norm": 0.8735275174275748, "learning_rate": 6.1112888159605e-06, "loss": 0.3325, "step": 12992 }, { "epoch": 0.44588194921070695, "grad_norm": 0.7582475525019703, "learning_rate": 6.110746970258541e-06, "loss": 0.2873, "step": 12993 }, { "epoch": 0.4459162663006177, "grad_norm": 0.6969623790139565, "learning_rate": 6.110205110834598e-06, "loss": 0.3066, "step": 12994 }, { "epoch": 0.4459505833905285, "grad_norm": 0.7617602522611976, "learning_rate": 6.109663237695367e-06, "loss": 0.3287, "step": 12995 }, { "epoch": 0.44598490048043926, "grad_norm": 0.78000955031624, "learning_rate": 6.109121350847539e-06, "loss": 0.2519, "step": 12996 }, { "epoch": 0.44601921757035, "grad_norm": 0.788041907527416, "learning_rate": 6.10857945029781e-06, "loss": 0.2617, "step": 12997 }, { "epoch": 0.4460535346602608, "grad_norm": 0.79294230692681, "learning_rate": 6.108037536052876e-06, "loss": 0.2915, "step": 12998 }, { "epoch": 0.44608785175017157, "grad_norm": 0.8637849629816773, "learning_rate": 6.107495608119427e-06, "loss": 0.3407, "step": 12999 }, { "epoch": 0.4461221688400824, "grad_norm": 0.7495107202367775, "learning_rate": 6.106953666504164e-06, "loss": 0.2725, "step": 13000 }, { "epoch": 0.44615648592999313, "grad_norm": 0.6733933971271717, "learning_rate": 6.106411711213776e-06, "loss": 0.2577, "step": 13001 }, { "epoch": 0.44619080301990394, "grad_norm": 0.7842698677193456, "learning_rate": 6.105869742254963e-06, "loss": 0.3002, "step": 13002 }, { "epoch": 0.4462251201098147, "grad_norm": 0.7709532730729232, "learning_rate": 6.1053277596344194e-06, "loss": 0.2901, "step": 13003 }, { "epoch": 0.44625943719972544, "grad_norm": 0.7974124592926968, "learning_rate": 6.104785763358839e-06, "loss": 0.2472, "step": 13004 }, { "epoch": 0.44629375428963625, "grad_norm": 0.9572034124413079, "learning_rate": 6.104243753434919e-06, "loss": 0.3116, "step": 13005 }, { "epoch": 0.446328071379547, "grad_norm": 0.7961017563497523, "learning_rate": 6.103701729869354e-06, "loss": 0.3028, "step": 13006 }, { "epoch": 0.4463623884694578, "grad_norm": 0.7667219385458358, "learning_rate": 6.103159692668842e-06, "loss": 0.2099, "step": 13007 }, { "epoch": 0.44639670555936856, "grad_norm": 0.7976746933089846, "learning_rate": 6.102617641840078e-06, "loss": 0.3144, "step": 13008 }, { "epoch": 0.44643102264927936, "grad_norm": 0.7320278370273743, "learning_rate": 6.1020755773897565e-06, "loss": 0.2817, "step": 13009 }, { "epoch": 0.4464653397391901, "grad_norm": 0.7803559378082843, "learning_rate": 6.101533499324579e-06, "loss": 0.2549, "step": 13010 }, { "epoch": 0.44649965682910087, "grad_norm": 0.7725792772643386, "learning_rate": 6.1009914076512374e-06, "loss": 0.2871, "step": 13011 }, { "epoch": 0.4465339739190117, "grad_norm": 0.7080982670645495, "learning_rate": 6.100449302376431e-06, "loss": 0.2449, "step": 13012 }, { "epoch": 0.4465682910089224, "grad_norm": 0.7449075928645613, "learning_rate": 6.099907183506858e-06, "loss": 0.2851, "step": 13013 }, { "epoch": 0.44660260809883323, "grad_norm": 0.7118035488795168, "learning_rate": 6.099365051049213e-06, "loss": 0.2596, "step": 13014 }, { "epoch": 0.446636925188744, "grad_norm": 0.6871004192554486, "learning_rate": 6.098822905010194e-06, "loss": 0.2851, "step": 13015 }, { "epoch": 0.4466712422786548, "grad_norm": 0.8702159682240856, "learning_rate": 6.0982807453965e-06, "loss": 0.2831, "step": 13016 }, { "epoch": 0.44670555936856554, "grad_norm": 0.746182624613421, "learning_rate": 6.097738572214828e-06, "loss": 0.2746, "step": 13017 }, { "epoch": 0.44673987645847635, "grad_norm": 0.7773212713589908, "learning_rate": 6.097196385471874e-06, "loss": 0.2635, "step": 13018 }, { "epoch": 0.4467741935483871, "grad_norm": 0.8103435283023098, "learning_rate": 6.09665418517434e-06, "loss": 0.2849, "step": 13019 }, { "epoch": 0.44680851063829785, "grad_norm": 0.7983970632520258, "learning_rate": 6.096111971328921e-06, "loss": 0.2743, "step": 13020 }, { "epoch": 0.44684282772820866, "grad_norm": 0.7356406874883578, "learning_rate": 6.095569743942317e-06, "loss": 0.2582, "step": 13021 }, { "epoch": 0.4468771448181194, "grad_norm": 0.7765888679270243, "learning_rate": 6.095027503021227e-06, "loss": 0.2846, "step": 13022 }, { "epoch": 0.4469114619080302, "grad_norm": 0.990768239766823, "learning_rate": 6.094485248572346e-06, "loss": 0.3426, "step": 13023 }, { "epoch": 0.44694577899794097, "grad_norm": 0.7633626422702922, "learning_rate": 6.0939429806023775e-06, "loss": 0.299, "step": 13024 }, { "epoch": 0.4469800960878518, "grad_norm": 0.7179269636382417, "learning_rate": 6.093400699118018e-06, "loss": 0.2833, "step": 13025 }, { "epoch": 0.4470144131777625, "grad_norm": 0.7239901974952124, "learning_rate": 6.09285840412597e-06, "loss": 0.2719, "step": 13026 }, { "epoch": 0.4470487302676733, "grad_norm": 0.7259657327286925, "learning_rate": 6.092316095632929e-06, "loss": 0.2911, "step": 13027 }, { "epoch": 0.4470830473575841, "grad_norm": 0.7781011703482629, "learning_rate": 6.091773773645596e-06, "loss": 0.328, "step": 13028 }, { "epoch": 0.44711736444749484, "grad_norm": 0.7814329119263683, "learning_rate": 6.091231438170673e-06, "loss": 0.2924, "step": 13029 }, { "epoch": 0.44715168153740564, "grad_norm": 0.756819168670744, "learning_rate": 6.0906890892148555e-06, "loss": 0.3098, "step": 13030 }, { "epoch": 0.4471859986273164, "grad_norm": 0.7530420390415293, "learning_rate": 6.090146726784846e-06, "loss": 0.2889, "step": 13031 }, { "epoch": 0.4472203157172272, "grad_norm": 0.7984921371702576, "learning_rate": 6.089604350887345e-06, "loss": 0.3195, "step": 13032 }, { "epoch": 0.44725463280713795, "grad_norm": 0.7547752299928074, "learning_rate": 6.089061961529053e-06, "loss": 0.2845, "step": 13033 }, { "epoch": 0.4472889498970487, "grad_norm": 0.7449431467161615, "learning_rate": 6.08851955871667e-06, "loss": 0.3136, "step": 13034 }, { "epoch": 0.4473232669869595, "grad_norm": 0.9531238516038878, "learning_rate": 6.0879771424568975e-06, "loss": 0.3214, "step": 13035 }, { "epoch": 0.44735758407687026, "grad_norm": 0.8253993057873974, "learning_rate": 6.087434712756435e-06, "loss": 0.3527, "step": 13036 }, { "epoch": 0.44739190116678107, "grad_norm": 0.7451576361219104, "learning_rate": 6.086892269621985e-06, "loss": 0.3335, "step": 13037 }, { "epoch": 0.4474262182566918, "grad_norm": 0.7566759948917273, "learning_rate": 6.086349813060248e-06, "loss": 0.2407, "step": 13038 }, { "epoch": 0.44746053534660263, "grad_norm": 0.7649844964130732, "learning_rate": 6.085807343077924e-06, "loss": 0.292, "step": 13039 }, { "epoch": 0.4474948524365134, "grad_norm": 0.7657454540869955, "learning_rate": 6.085264859681718e-06, "loss": 0.2651, "step": 13040 }, { "epoch": 0.4475291695264242, "grad_norm": 0.673202472158308, "learning_rate": 6.084722362878329e-06, "loss": 0.2546, "step": 13041 }, { "epoch": 0.44756348661633494, "grad_norm": 0.8092536110821271, "learning_rate": 6.084179852674462e-06, "loss": 0.2606, "step": 13042 }, { "epoch": 0.4475978037062457, "grad_norm": 0.7577452295285937, "learning_rate": 6.083637329076816e-06, "loss": 0.2512, "step": 13043 }, { "epoch": 0.4476321207961565, "grad_norm": 0.7554104110946674, "learning_rate": 6.083094792092093e-06, "loss": 0.3154, "step": 13044 }, { "epoch": 0.44766643788606725, "grad_norm": 0.8174217488276116, "learning_rate": 6.082552241726996e-06, "loss": 0.296, "step": 13045 }, { "epoch": 0.44770075497597805, "grad_norm": 0.7142576361243562, "learning_rate": 6.08200967798823e-06, "loss": 0.2892, "step": 13046 }, { "epoch": 0.4477350720658888, "grad_norm": 0.8537031871127417, "learning_rate": 6.0814671008824955e-06, "loss": 0.3361, "step": 13047 }, { "epoch": 0.4477693891557996, "grad_norm": 0.7173324334609809, "learning_rate": 6.080924510416497e-06, "loss": 0.2959, "step": 13048 }, { "epoch": 0.44780370624571036, "grad_norm": 0.6786900263534101, "learning_rate": 6.080381906596935e-06, "loss": 0.3047, "step": 13049 }, { "epoch": 0.4478380233356211, "grad_norm": 0.787189631921536, "learning_rate": 6.079839289430513e-06, "loss": 0.3137, "step": 13050 }, { "epoch": 0.4478723404255319, "grad_norm": 0.7492348007966266, "learning_rate": 6.079296658923939e-06, "loss": 0.2867, "step": 13051 }, { "epoch": 0.4479066575154427, "grad_norm": 0.7270330590880515, "learning_rate": 6.078754015083911e-06, "loss": 0.2502, "step": 13052 }, { "epoch": 0.4479409746053535, "grad_norm": 0.7500520799537078, "learning_rate": 6.078211357917134e-06, "loss": 0.2955, "step": 13053 }, { "epoch": 0.44797529169526423, "grad_norm": 0.9009727295302361, "learning_rate": 6.077668687430314e-06, "loss": 0.3378, "step": 13054 }, { "epoch": 0.44800960878517504, "grad_norm": 0.8887226467495745, "learning_rate": 6.077126003630153e-06, "loss": 0.2791, "step": 13055 }, { "epoch": 0.4480439258750858, "grad_norm": 0.7797753757625522, "learning_rate": 6.076583306523356e-06, "loss": 0.2748, "step": 13056 }, { "epoch": 0.44807824296499654, "grad_norm": 0.9217924030116256, "learning_rate": 6.076040596116628e-06, "loss": 0.2957, "step": 13057 }, { "epoch": 0.44811256005490735, "grad_norm": 0.740625244399145, "learning_rate": 6.075497872416674e-06, "loss": 0.2707, "step": 13058 }, { "epoch": 0.4481468771448181, "grad_norm": 0.7530614986605033, "learning_rate": 6.074955135430196e-06, "loss": 0.3045, "step": 13059 }, { "epoch": 0.4481811942347289, "grad_norm": 0.7214613975290767, "learning_rate": 6.0744123851639e-06, "loss": 0.2631, "step": 13060 }, { "epoch": 0.44821551132463966, "grad_norm": 0.7549694458340213, "learning_rate": 6.073869621624493e-06, "loss": 0.3589, "step": 13061 }, { "epoch": 0.44824982841455047, "grad_norm": 0.6864947654207325, "learning_rate": 6.073326844818679e-06, "loss": 0.2819, "step": 13062 }, { "epoch": 0.4482841455044612, "grad_norm": 0.8577937650299966, "learning_rate": 6.072784054753163e-06, "loss": 0.3105, "step": 13063 }, { "epoch": 0.448318462594372, "grad_norm": 0.8446215918728799, "learning_rate": 6.072241251434651e-06, "loss": 0.2813, "step": 13064 }, { "epoch": 0.4483527796842828, "grad_norm": 0.8986459156943546, "learning_rate": 6.071698434869847e-06, "loss": 0.295, "step": 13065 }, { "epoch": 0.4483870967741935, "grad_norm": 0.8519899335220885, "learning_rate": 6.07115560506546e-06, "loss": 0.2968, "step": 13066 }, { "epoch": 0.44842141386410433, "grad_norm": 0.7932459979966966, "learning_rate": 6.070612762028194e-06, "loss": 0.3376, "step": 13067 }, { "epoch": 0.4484557309540151, "grad_norm": 0.7617792837455276, "learning_rate": 6.070069905764754e-06, "loss": 0.2853, "step": 13068 }, { "epoch": 0.4484900480439259, "grad_norm": 0.7981267880311775, "learning_rate": 6.069527036281849e-06, "loss": 0.3322, "step": 13069 }, { "epoch": 0.44852436513383664, "grad_norm": 0.7397699006153025, "learning_rate": 6.068984153586185e-06, "loss": 0.2678, "step": 13070 }, { "epoch": 0.44855868222374745, "grad_norm": 0.8104537846658655, "learning_rate": 6.068441257684467e-06, "loss": 0.2573, "step": 13071 }, { "epoch": 0.4485929993136582, "grad_norm": 0.8310124236435046, "learning_rate": 6.0678983485834044e-06, "loss": 0.2782, "step": 13072 }, { "epoch": 0.44862731640356895, "grad_norm": 0.865479970764046, "learning_rate": 6.067355426289702e-06, "loss": 0.3219, "step": 13073 }, { "epoch": 0.44866163349347976, "grad_norm": 0.8433795615359306, "learning_rate": 6.066812490810068e-06, "loss": 0.2947, "step": 13074 }, { "epoch": 0.4486959505833905, "grad_norm": 0.7444599476740741, "learning_rate": 6.066269542151209e-06, "loss": 0.267, "step": 13075 }, { "epoch": 0.4487302676733013, "grad_norm": 0.9320386998648499, "learning_rate": 6.065726580319832e-06, "loss": 0.2676, "step": 13076 }, { "epoch": 0.44876458476321207, "grad_norm": 0.7186503219228837, "learning_rate": 6.065183605322646e-06, "loss": 0.2843, "step": 13077 }, { "epoch": 0.4487989018531229, "grad_norm": 0.7431126432231968, "learning_rate": 6.06464061716636e-06, "loss": 0.2637, "step": 13078 }, { "epoch": 0.44883321894303363, "grad_norm": 0.7618737477103915, "learning_rate": 6.064097615857679e-06, "loss": 0.3254, "step": 13079 }, { "epoch": 0.4488675360329444, "grad_norm": 0.7160653834690456, "learning_rate": 6.0635546014033145e-06, "loss": 0.2412, "step": 13080 }, { "epoch": 0.4489018531228552, "grad_norm": 0.7748602704430658, "learning_rate": 6.063011573809972e-06, "loss": 0.2839, "step": 13081 }, { "epoch": 0.44893617021276594, "grad_norm": 0.7097213794629172, "learning_rate": 6.0624685330843595e-06, "loss": 0.2871, "step": 13082 }, { "epoch": 0.44897048730267675, "grad_norm": 0.8089455595501407, "learning_rate": 6.0619254792331875e-06, "loss": 0.2927, "step": 13083 }, { "epoch": 0.4490048043925875, "grad_norm": 0.8199343079219829, "learning_rate": 6.061382412263164e-06, "loss": 0.3263, "step": 13084 }, { "epoch": 0.4490391214824983, "grad_norm": 0.7536352856987686, "learning_rate": 6.060839332180999e-06, "loss": 0.2885, "step": 13085 }, { "epoch": 0.44907343857240906, "grad_norm": 0.7806398225928465, "learning_rate": 6.0602962389934014e-06, "loss": 0.3382, "step": 13086 }, { "epoch": 0.44910775566231986, "grad_norm": 0.8093737615698956, "learning_rate": 6.059753132707079e-06, "loss": 0.3297, "step": 13087 }, { "epoch": 0.4491420727522306, "grad_norm": 0.7726949389741746, "learning_rate": 6.059210013328742e-06, "loss": 0.313, "step": 13088 }, { "epoch": 0.44917638984214137, "grad_norm": 0.727105328855782, "learning_rate": 6.0586668808651e-06, "loss": 0.239, "step": 13089 }, { "epoch": 0.4492107069320522, "grad_norm": 0.7793250009438782, "learning_rate": 6.058123735322864e-06, "loss": 0.2751, "step": 13090 }, { "epoch": 0.4492450240219629, "grad_norm": 0.7358962360584104, "learning_rate": 6.057580576708742e-06, "loss": 0.2661, "step": 13091 }, { "epoch": 0.44927934111187373, "grad_norm": 0.8559996497932079, "learning_rate": 6.057037405029445e-06, "loss": 0.2781, "step": 13092 }, { "epoch": 0.4493136582017845, "grad_norm": 0.7626091061610356, "learning_rate": 6.056494220291685e-06, "loss": 0.3044, "step": 13093 }, { "epoch": 0.4493479752916953, "grad_norm": 1.90395042302204, "learning_rate": 6.05595102250217e-06, "loss": 0.2662, "step": 13094 }, { "epoch": 0.44938229238160604, "grad_norm": 0.8526860004884179, "learning_rate": 6.05540781166761e-06, "loss": 0.2983, "step": 13095 }, { "epoch": 0.4494166094715168, "grad_norm": 0.7161382069635743, "learning_rate": 6.054864587794719e-06, "loss": 0.2892, "step": 13096 }, { "epoch": 0.4494509265614276, "grad_norm": 0.7499692688502779, "learning_rate": 6.054321350890204e-06, "loss": 0.3001, "step": 13097 }, { "epoch": 0.44948524365133835, "grad_norm": 0.7934651022441145, "learning_rate": 6.053778100960778e-06, "loss": 0.2993, "step": 13098 }, { "epoch": 0.44951956074124916, "grad_norm": 0.6885367709016412, "learning_rate": 6.053234838013153e-06, "loss": 0.2869, "step": 13099 }, { "epoch": 0.4495538778311599, "grad_norm": 0.7502352156791695, "learning_rate": 6.052691562054039e-06, "loss": 0.2528, "step": 13100 }, { "epoch": 0.4495881949210707, "grad_norm": 0.8834956652774715, "learning_rate": 6.052148273090148e-06, "loss": 0.2705, "step": 13101 }, { "epoch": 0.44962251201098147, "grad_norm": 1.0091677543503275, "learning_rate": 6.0516049711281935e-06, "loss": 0.291, "step": 13102 }, { "epoch": 0.4496568291008922, "grad_norm": 0.8039813691138249, "learning_rate": 6.051061656174884e-06, "loss": 0.3412, "step": 13103 }, { "epoch": 0.449691146190803, "grad_norm": 0.7694031356567528, "learning_rate": 6.0505183282369315e-06, "loss": 0.2776, "step": 13104 }, { "epoch": 0.4497254632807138, "grad_norm": 0.800699238391612, "learning_rate": 6.049974987321052e-06, "loss": 0.3051, "step": 13105 }, { "epoch": 0.4497597803706246, "grad_norm": 0.7300251806459609, "learning_rate": 6.049431633433955e-06, "loss": 0.285, "step": 13106 }, { "epoch": 0.44979409746053534, "grad_norm": 0.7072873451199688, "learning_rate": 6.048888266582355e-06, "loss": 0.3032, "step": 13107 }, { "epoch": 0.44982841455044614, "grad_norm": 0.7309960025331682, "learning_rate": 6.048344886772961e-06, "loss": 0.2647, "step": 13108 }, { "epoch": 0.4498627316403569, "grad_norm": 0.7590803268116778, "learning_rate": 6.047801494012491e-06, "loss": 0.2918, "step": 13109 }, { "epoch": 0.44989704873026765, "grad_norm": 0.791004341426101, "learning_rate": 6.047258088307653e-06, "loss": 0.2882, "step": 13110 }, { "epoch": 0.44993136582017845, "grad_norm": 0.6724821287047005, "learning_rate": 6.046714669665164e-06, "loss": 0.2611, "step": 13111 }, { "epoch": 0.4499656829100892, "grad_norm": 0.8516444570377943, "learning_rate": 6.0461712380917335e-06, "loss": 0.2787, "step": 13112 }, { "epoch": 0.45, "grad_norm": 0.7323895096245949, "learning_rate": 6.045627793594078e-06, "loss": 0.3322, "step": 13113 }, { "epoch": 0.45003431708991076, "grad_norm": 0.7933210609846845, "learning_rate": 6.04508433617891e-06, "loss": 0.2555, "step": 13114 }, { "epoch": 0.45006863417982157, "grad_norm": 0.6871221012391919, "learning_rate": 6.044540865852945e-06, "loss": 0.2662, "step": 13115 }, { "epoch": 0.4501029512697323, "grad_norm": 0.7863122577334168, "learning_rate": 6.043997382622894e-06, "loss": 0.3324, "step": 13116 }, { "epoch": 0.45013726835964313, "grad_norm": 0.770616614469834, "learning_rate": 6.0434538864954715e-06, "loss": 0.2313, "step": 13117 }, { "epoch": 0.4501715854495539, "grad_norm": 0.8299771244110375, "learning_rate": 6.0429103774773954e-06, "loss": 0.2815, "step": 13118 }, { "epoch": 0.45020590253946463, "grad_norm": 0.7263195651694302, "learning_rate": 6.0423668555753765e-06, "loss": 0.2711, "step": 13119 }, { "epoch": 0.45024021962937544, "grad_norm": 0.8811806915409528, "learning_rate": 6.041823320796128e-06, "loss": 0.2964, "step": 13120 }, { "epoch": 0.4502745367192862, "grad_norm": 0.75867001629073, "learning_rate": 6.041279773146369e-06, "loss": 0.3, "step": 13121 }, { "epoch": 0.450308853809197, "grad_norm": 0.8254326802330582, "learning_rate": 6.040736212632814e-06, "loss": 0.2981, "step": 13122 }, { "epoch": 0.45034317089910775, "grad_norm": 0.686453734683319, "learning_rate": 6.040192639262175e-06, "loss": 0.2441, "step": 13123 }, { "epoch": 0.45037748798901855, "grad_norm": 0.7921098495354335, "learning_rate": 6.039649053041168e-06, "loss": 0.3071, "step": 13124 }, { "epoch": 0.4504118050789293, "grad_norm": 0.793614692533436, "learning_rate": 6.03910545397651e-06, "loss": 0.3085, "step": 13125 }, { "epoch": 0.45044612216884006, "grad_norm": 0.8064882213802939, "learning_rate": 6.038561842074916e-06, "loss": 0.3521, "step": 13126 }, { "epoch": 0.45048043925875086, "grad_norm": 0.8437450139302322, "learning_rate": 6.0380182173431e-06, "loss": 0.3747, "step": 13127 }, { "epoch": 0.4505147563486616, "grad_norm": 0.7528584341634058, "learning_rate": 6.03747457978778e-06, "loss": 0.2871, "step": 13128 }, { "epoch": 0.4505490734385724, "grad_norm": 0.7383029920874656, "learning_rate": 6.03693092941567e-06, "loss": 0.2429, "step": 13129 }, { "epoch": 0.4505833905284832, "grad_norm": 0.7908386311539564, "learning_rate": 6.036387266233488e-06, "loss": 0.2737, "step": 13130 }, { "epoch": 0.450617707618394, "grad_norm": 0.7635486239721399, "learning_rate": 6.035843590247951e-06, "loss": 0.3308, "step": 13131 }, { "epoch": 0.45065202470830473, "grad_norm": 0.8607780756963337, "learning_rate": 6.035299901465772e-06, "loss": 0.3043, "step": 13132 }, { "epoch": 0.4506863417982155, "grad_norm": 0.8215431704210322, "learning_rate": 6.034756199893671e-06, "loss": 0.3133, "step": 13133 }, { "epoch": 0.4507206588881263, "grad_norm": 0.8306249802935706, "learning_rate": 6.034212485538363e-06, "loss": 0.3139, "step": 13134 }, { "epoch": 0.45075497597803704, "grad_norm": 0.7360396937945748, "learning_rate": 6.0336687584065655e-06, "loss": 0.2632, "step": 13135 }, { "epoch": 0.45078929306794785, "grad_norm": 0.8471374249691245, "learning_rate": 6.033125018504996e-06, "loss": 0.2882, "step": 13136 }, { "epoch": 0.4508236101578586, "grad_norm": 0.7726754365643459, "learning_rate": 6.032581265840371e-06, "loss": 0.3052, "step": 13137 }, { "epoch": 0.4508579272477694, "grad_norm": 0.7283059498019152, "learning_rate": 6.03203750041941e-06, "loss": 0.2943, "step": 13138 }, { "epoch": 0.45089224433768016, "grad_norm": 0.7102927559662277, "learning_rate": 6.031493722248827e-06, "loss": 0.301, "step": 13139 }, { "epoch": 0.45092656142759097, "grad_norm": 0.8059063959692655, "learning_rate": 6.030949931335341e-06, "loss": 0.3125, "step": 13140 }, { "epoch": 0.4509608785175017, "grad_norm": 0.7380069947446407, "learning_rate": 6.030406127685673e-06, "loss": 0.2689, "step": 13141 }, { "epoch": 0.45099519560741247, "grad_norm": 0.8144265769585685, "learning_rate": 6.029862311306538e-06, "loss": 0.2658, "step": 13142 }, { "epoch": 0.4510295126973233, "grad_norm": 0.6887714833300986, "learning_rate": 6.029318482204652e-06, "loss": 0.3116, "step": 13143 }, { "epoch": 0.451063829787234, "grad_norm": 0.7314259732404478, "learning_rate": 6.02877464038674e-06, "loss": 0.3576, "step": 13144 }, { "epoch": 0.45109814687714483, "grad_norm": 0.739736255641483, "learning_rate": 6.028230785859514e-06, "loss": 0.2529, "step": 13145 }, { "epoch": 0.4511324639670556, "grad_norm": 0.6880066813213359, "learning_rate": 6.027686918629697e-06, "loss": 0.2726, "step": 13146 }, { "epoch": 0.4511667810569664, "grad_norm": 0.7985160705181222, "learning_rate": 6.027143038704007e-06, "loss": 0.3322, "step": 13147 }, { "epoch": 0.45120109814687714, "grad_norm": 0.7566925919243087, "learning_rate": 6.026599146089161e-06, "loss": 0.2663, "step": 13148 }, { "epoch": 0.4512354152367879, "grad_norm": 0.8129223157354908, "learning_rate": 6.026055240791879e-06, "loss": 0.3182, "step": 13149 }, { "epoch": 0.4512697323266987, "grad_norm": 0.7548097704843144, "learning_rate": 6.025511322818882e-06, "loss": 0.2648, "step": 13150 }, { "epoch": 0.45130404941660945, "grad_norm": 0.7896163961083, "learning_rate": 6.024967392176888e-06, "loss": 0.2763, "step": 13151 }, { "epoch": 0.45133836650652026, "grad_norm": 0.7179576204343547, "learning_rate": 6.024423448872616e-06, "loss": 0.2667, "step": 13152 }, { "epoch": 0.451372683596431, "grad_norm": 0.7415929210985742, "learning_rate": 6.023879492912788e-06, "loss": 0.3007, "step": 13153 }, { "epoch": 0.4514070006863418, "grad_norm": 0.8355002037847183, "learning_rate": 6.023335524304121e-06, "loss": 0.284, "step": 13154 }, { "epoch": 0.45144131777625257, "grad_norm": 0.7793212302461857, "learning_rate": 6.022791543053338e-06, "loss": 0.2381, "step": 13155 }, { "epoch": 0.4514756348661633, "grad_norm": 0.7815577589765623, "learning_rate": 6.022247549167157e-06, "loss": 0.241, "step": 13156 }, { "epoch": 0.45150995195607413, "grad_norm": 0.7779368617680643, "learning_rate": 6.0217035426523e-06, "loss": 0.3326, "step": 13157 }, { "epoch": 0.4515442690459849, "grad_norm": 0.7747709444527303, "learning_rate": 6.021159523515486e-06, "loss": 0.3282, "step": 13158 }, { "epoch": 0.4515785861358957, "grad_norm": 0.886341600571576, "learning_rate": 6.0206154917634375e-06, "loss": 0.3061, "step": 13159 }, { "epoch": 0.45161290322580644, "grad_norm": 0.7358407673550622, "learning_rate": 6.020071447402876e-06, "loss": 0.2834, "step": 13160 }, { "epoch": 0.45164722031571725, "grad_norm": 0.938052692266274, "learning_rate": 6.019527390440518e-06, "loss": 0.3013, "step": 13161 }, { "epoch": 0.451681537405628, "grad_norm": 0.7417142814974939, "learning_rate": 6.018983320883089e-06, "loss": 0.312, "step": 13162 }, { "epoch": 0.4517158544955388, "grad_norm": 0.8047298921943596, "learning_rate": 6.01843923873731e-06, "loss": 0.2989, "step": 13163 }, { "epoch": 0.45175017158544956, "grad_norm": 0.7815336014449362, "learning_rate": 6.0178951440099024e-06, "loss": 0.2803, "step": 13164 }, { "epoch": 0.4517844886753603, "grad_norm": 0.7730797330594951, "learning_rate": 6.017351036707583e-06, "loss": 0.3205, "step": 13165 }, { "epoch": 0.4518188057652711, "grad_norm": 0.7368813628252574, "learning_rate": 6.01680691683708e-06, "loss": 0.3463, "step": 13166 }, { "epoch": 0.45185312285518187, "grad_norm": 0.7494537308810676, "learning_rate": 6.016262784405115e-06, "loss": 0.2472, "step": 13167 }, { "epoch": 0.4518874399450927, "grad_norm": 0.6715712633891626, "learning_rate": 6.015718639418405e-06, "loss": 0.2868, "step": 13168 }, { "epoch": 0.4519217570350034, "grad_norm": 0.8095779753122375, "learning_rate": 6.015174481883677e-06, "loss": 0.2902, "step": 13169 }, { "epoch": 0.45195607412491423, "grad_norm": 0.7470703268815594, "learning_rate": 6.014630311807652e-06, "loss": 0.3093, "step": 13170 }, { "epoch": 0.451990391214825, "grad_norm": 0.7035262426176507, "learning_rate": 6.014086129197052e-06, "loss": 0.2933, "step": 13171 }, { "epoch": 0.45202470830473573, "grad_norm": 0.8153620671677589, "learning_rate": 6.0135419340586e-06, "loss": 0.3644, "step": 13172 }, { "epoch": 0.45205902539464654, "grad_norm": 0.7704034555284697, "learning_rate": 6.012997726399019e-06, "loss": 0.3244, "step": 13173 }, { "epoch": 0.4520933424845573, "grad_norm": 0.7232090504441675, "learning_rate": 6.012453506225033e-06, "loss": 0.2803, "step": 13174 }, { "epoch": 0.4521276595744681, "grad_norm": 0.7639120611694771, "learning_rate": 6.011909273543364e-06, "loss": 0.2775, "step": 13175 }, { "epoch": 0.45216197666437885, "grad_norm": 0.798678668316286, "learning_rate": 6.0113650283607364e-06, "loss": 0.2853, "step": 13176 }, { "epoch": 0.45219629375428966, "grad_norm": 0.7338263584039881, "learning_rate": 6.010820770683871e-06, "loss": 0.3091, "step": 13177 }, { "epoch": 0.4522306108442004, "grad_norm": 0.7107727035842883, "learning_rate": 6.010276500519494e-06, "loss": 0.2913, "step": 13178 }, { "epoch": 0.45226492793411116, "grad_norm": 0.7896788360540815, "learning_rate": 6.00973221787433e-06, "loss": 0.2955, "step": 13179 }, { "epoch": 0.45229924502402197, "grad_norm": 0.8489183157606205, "learning_rate": 6.009187922755101e-06, "loss": 0.3055, "step": 13180 }, { "epoch": 0.4523335621139327, "grad_norm": 0.7845023491602735, "learning_rate": 6.0086436151685315e-06, "loss": 0.3289, "step": 13181 }, { "epoch": 0.4523678792038435, "grad_norm": 0.6996784780706516, "learning_rate": 6.008099295121346e-06, "loss": 0.3392, "step": 13182 }, { "epoch": 0.4524021962937543, "grad_norm": 0.6922596310494354, "learning_rate": 6.00755496262027e-06, "loss": 0.3277, "step": 13183 }, { "epoch": 0.4524365133836651, "grad_norm": 0.778380897279516, "learning_rate": 6.007010617672027e-06, "loss": 0.3733, "step": 13184 }, { "epoch": 0.45247083047357584, "grad_norm": 0.6605476741972846, "learning_rate": 6.006466260283341e-06, "loss": 0.2623, "step": 13185 }, { "epoch": 0.45250514756348664, "grad_norm": 0.8032110250336061, "learning_rate": 6.005921890460938e-06, "loss": 0.3239, "step": 13186 }, { "epoch": 0.4525394646533974, "grad_norm": 0.8135867828527439, "learning_rate": 6.0053775082115425e-06, "loss": 0.3085, "step": 13187 }, { "epoch": 0.45257378174330815, "grad_norm": 0.8252605809918562, "learning_rate": 6.00483311354188e-06, "loss": 0.3158, "step": 13188 }, { "epoch": 0.45260809883321895, "grad_norm": 0.7416932078441382, "learning_rate": 6.004288706458677e-06, "loss": 0.2582, "step": 13189 }, { "epoch": 0.4526424159231297, "grad_norm": 0.7817700591664762, "learning_rate": 6.003744286968658e-06, "loss": 0.3444, "step": 13190 }, { "epoch": 0.4526767330130405, "grad_norm": 0.7658109893854177, "learning_rate": 6.0031998550785475e-06, "loss": 0.2648, "step": 13191 }, { "epoch": 0.45271105010295126, "grad_norm": 0.7450729295525846, "learning_rate": 6.002655410795074e-06, "loss": 0.3392, "step": 13192 }, { "epoch": 0.45274536719286207, "grad_norm": 0.712242000096135, "learning_rate": 6.002110954124961e-06, "loss": 0.323, "step": 13193 }, { "epoch": 0.4527796842827728, "grad_norm": 0.7874134066045195, "learning_rate": 6.001566485074934e-06, "loss": 0.2749, "step": 13194 }, { "epoch": 0.45281400137268357, "grad_norm": 0.818310377231814, "learning_rate": 6.001022003651723e-06, "loss": 0.349, "step": 13195 }, { "epoch": 0.4528483184625944, "grad_norm": 0.7065226265816024, "learning_rate": 6.000477509862051e-06, "loss": 0.2913, "step": 13196 }, { "epoch": 0.45288263555250513, "grad_norm": 0.7762408128883269, "learning_rate": 5.9999330037126456e-06, "loss": 0.2846, "step": 13197 }, { "epoch": 0.45291695264241594, "grad_norm": 0.7453836065755352, "learning_rate": 5.999388485210235e-06, "loss": 0.2806, "step": 13198 }, { "epoch": 0.4529512697323267, "grad_norm": 0.7802479628438092, "learning_rate": 5.998843954361544e-06, "loss": 0.2769, "step": 13199 }, { "epoch": 0.4529855868222375, "grad_norm": 0.6994232454240855, "learning_rate": 5.998299411173301e-06, "loss": 0.2659, "step": 13200 }, { "epoch": 0.45301990391214825, "grad_norm": 0.7931536640292352, "learning_rate": 5.997754855652231e-06, "loss": 0.3235, "step": 13201 }, { "epoch": 0.453054221002059, "grad_norm": 1.2109711774513332, "learning_rate": 5.997210287805064e-06, "loss": 0.2618, "step": 13202 }, { "epoch": 0.4530885380919698, "grad_norm": 0.8222483995869225, "learning_rate": 5.996665707638527e-06, "loss": 0.2935, "step": 13203 }, { "epoch": 0.45312285518188056, "grad_norm": 0.7323129157634789, "learning_rate": 5.996121115159347e-06, "loss": 0.3915, "step": 13204 }, { "epoch": 0.45315717227179136, "grad_norm": 0.7901990801172328, "learning_rate": 5.9955765103742526e-06, "loss": 0.3104, "step": 13205 }, { "epoch": 0.4531914893617021, "grad_norm": 0.8166130079256237, "learning_rate": 5.995031893289971e-06, "loss": 0.3093, "step": 13206 }, { "epoch": 0.4532258064516129, "grad_norm": 0.7959045606590481, "learning_rate": 5.994487263913229e-06, "loss": 0.2927, "step": 13207 }, { "epoch": 0.4532601235415237, "grad_norm": 0.711990627734763, "learning_rate": 5.993942622250759e-06, "loss": 0.3347, "step": 13208 }, { "epoch": 0.4532944406314345, "grad_norm": 0.7591933708454952, "learning_rate": 5.993397968309283e-06, "loss": 0.3305, "step": 13209 }, { "epoch": 0.45332875772134523, "grad_norm": 0.7736278920181338, "learning_rate": 5.992853302095536e-06, "loss": 0.3332, "step": 13210 }, { "epoch": 0.453363074811256, "grad_norm": 0.7402702799806989, "learning_rate": 5.992308623616244e-06, "loss": 0.2852, "step": 13211 }, { "epoch": 0.4533973919011668, "grad_norm": 0.8653479024637897, "learning_rate": 5.991763932878135e-06, "loss": 0.3169, "step": 13212 }, { "epoch": 0.45343170899107754, "grad_norm": 0.7661635129964226, "learning_rate": 5.991219229887939e-06, "loss": 0.3149, "step": 13213 }, { "epoch": 0.45346602608098835, "grad_norm": 0.7679100136128488, "learning_rate": 5.9906745146523845e-06, "loss": 0.2526, "step": 13214 }, { "epoch": 0.4535003431708991, "grad_norm": 0.7149299334384909, "learning_rate": 5.990129787178202e-06, "loss": 0.2896, "step": 13215 }, { "epoch": 0.4535346602608099, "grad_norm": 0.6965577007313366, "learning_rate": 5.989585047472122e-06, "loss": 0.2546, "step": 13216 }, { "epoch": 0.45356897735072066, "grad_norm": 0.7927007286065828, "learning_rate": 5.98904029554087e-06, "loss": 0.3142, "step": 13217 }, { "epoch": 0.4536032944406314, "grad_norm": 0.8554221009826197, "learning_rate": 5.988495531391179e-06, "loss": 0.3115, "step": 13218 }, { "epoch": 0.4536376115305422, "grad_norm": 0.6963806265441123, "learning_rate": 5.987950755029778e-06, "loss": 0.2694, "step": 13219 }, { "epoch": 0.45367192862045297, "grad_norm": 0.7648811610269698, "learning_rate": 5.987405966463398e-06, "loss": 0.2586, "step": 13220 }, { "epoch": 0.4537062457103638, "grad_norm": 0.8486480366992458, "learning_rate": 5.986861165698768e-06, "loss": 0.2979, "step": 13221 }, { "epoch": 0.4537405628002745, "grad_norm": 0.7263637394194676, "learning_rate": 5.986316352742619e-06, "loss": 0.2598, "step": 13222 }, { "epoch": 0.45377487989018533, "grad_norm": 0.8164798116224549, "learning_rate": 5.9857715276016815e-06, "loss": 0.2588, "step": 13223 }, { "epoch": 0.4538091969800961, "grad_norm": 0.7483203500293198, "learning_rate": 5.985226690282686e-06, "loss": 0.2777, "step": 13224 }, { "epoch": 0.45384351407000684, "grad_norm": 0.776701234591559, "learning_rate": 5.984681840792363e-06, "loss": 0.2749, "step": 13225 }, { "epoch": 0.45387783115991764, "grad_norm": 0.784429917130733, "learning_rate": 5.984136979137443e-06, "loss": 0.2862, "step": 13226 }, { "epoch": 0.4539121482498284, "grad_norm": 0.7293225762220987, "learning_rate": 5.98359210532466e-06, "loss": 0.2871, "step": 13227 }, { "epoch": 0.4539464653397392, "grad_norm": 0.780028174883057, "learning_rate": 5.983047219360743e-06, "loss": 0.3255, "step": 13228 }, { "epoch": 0.45398078242964995, "grad_norm": 0.7693121676977075, "learning_rate": 5.982502321252424e-06, "loss": 0.3235, "step": 13229 }, { "epoch": 0.45401509951956076, "grad_norm": 0.7327763157112374, "learning_rate": 5.981957411006433e-06, "loss": 0.3351, "step": 13230 }, { "epoch": 0.4540494166094715, "grad_norm": 0.7945904342618615, "learning_rate": 5.9814124886295035e-06, "loss": 0.304, "step": 13231 }, { "epoch": 0.4540837336993823, "grad_norm": 0.8342406805755771, "learning_rate": 5.980867554128367e-06, "loss": 0.3102, "step": 13232 }, { "epoch": 0.45411805078929307, "grad_norm": 0.6911259854079033, "learning_rate": 5.980322607509754e-06, "loss": 0.2577, "step": 13233 }, { "epoch": 0.4541523678792038, "grad_norm": 0.6957007539760677, "learning_rate": 5.9797776487804e-06, "loss": 0.255, "step": 13234 }, { "epoch": 0.45418668496911463, "grad_norm": 0.8055561915014817, "learning_rate": 5.9792326779470355e-06, "loss": 0.3192, "step": 13235 }, { "epoch": 0.4542210020590254, "grad_norm": 0.901429099316197, "learning_rate": 5.978687695016393e-06, "loss": 0.2914, "step": 13236 }, { "epoch": 0.4542553191489362, "grad_norm": 0.7539971918987494, "learning_rate": 5.978142699995205e-06, "loss": 0.3343, "step": 13237 }, { "epoch": 0.45428963623884694, "grad_norm": 0.7572435041537388, "learning_rate": 5.9775976928902035e-06, "loss": 0.3218, "step": 13238 }, { "epoch": 0.45432395332875775, "grad_norm": 0.7391962822510616, "learning_rate": 5.977052673708123e-06, "loss": 0.2453, "step": 13239 }, { "epoch": 0.4543582704186685, "grad_norm": 0.808081178118106, "learning_rate": 5.976507642455696e-06, "loss": 0.2931, "step": 13240 }, { "epoch": 0.45439258750857925, "grad_norm": 0.8022468718452809, "learning_rate": 5.975962599139654e-06, "loss": 0.315, "step": 13241 }, { "epoch": 0.45442690459849006, "grad_norm": 0.7999263298981885, "learning_rate": 5.975417543766733e-06, "loss": 0.3738, "step": 13242 }, { "epoch": 0.4544612216884008, "grad_norm": 0.7620991681324375, "learning_rate": 5.974872476343666e-06, "loss": 0.4186, "step": 13243 }, { "epoch": 0.4544955387783116, "grad_norm": 0.7774850993185156, "learning_rate": 5.974327396877185e-06, "loss": 0.3407, "step": 13244 }, { "epoch": 0.45452985586822237, "grad_norm": 0.7492762860402741, "learning_rate": 5.973782305374026e-06, "loss": 0.2702, "step": 13245 }, { "epoch": 0.4545641729581332, "grad_norm": 0.7628415721001858, "learning_rate": 5.973237201840921e-06, "loss": 0.336, "step": 13246 }, { "epoch": 0.4545984900480439, "grad_norm": 0.8261677559285323, "learning_rate": 5.972692086284606e-06, "loss": 0.3048, "step": 13247 }, { "epoch": 0.4546328071379547, "grad_norm": 0.6691307253835594, "learning_rate": 5.972146958711813e-06, "loss": 0.2609, "step": 13248 }, { "epoch": 0.4546671242278655, "grad_norm": 0.9519387925158305, "learning_rate": 5.971601819129279e-06, "loss": 0.2989, "step": 13249 }, { "epoch": 0.45470144131777623, "grad_norm": 0.7842169281766654, "learning_rate": 5.9710566675437375e-06, "loss": 0.3149, "step": 13250 }, { "epoch": 0.45473575840768704, "grad_norm": 0.7436472147310186, "learning_rate": 5.9705115039619225e-06, "loss": 0.2998, "step": 13251 }, { "epoch": 0.4547700754975978, "grad_norm": 0.7109830872536023, "learning_rate": 5.96996632839057e-06, "loss": 0.3125, "step": 13252 }, { "epoch": 0.4548043925875086, "grad_norm": 0.8559066535568497, "learning_rate": 5.969421140836414e-06, "loss": 0.3173, "step": 13253 }, { "epoch": 0.45483870967741935, "grad_norm": 0.9052101085276821, "learning_rate": 5.9688759413061884e-06, "loss": 0.2404, "step": 13254 }, { "epoch": 0.45487302676733016, "grad_norm": 0.9295654808358719, "learning_rate": 5.968330729806632e-06, "loss": 0.2722, "step": 13255 }, { "epoch": 0.4549073438572409, "grad_norm": 0.8436183817576064, "learning_rate": 5.9677855063444786e-06, "loss": 0.3007, "step": 13256 }, { "epoch": 0.45494166094715166, "grad_norm": 0.7610869470794771, "learning_rate": 5.967240270926462e-06, "loss": 0.2552, "step": 13257 }, { "epoch": 0.45497597803706247, "grad_norm": 0.7664622948377209, "learning_rate": 5.9666950235593215e-06, "loss": 0.3164, "step": 13258 }, { "epoch": 0.4550102951269732, "grad_norm": 0.759982174574729, "learning_rate": 5.96614976424979e-06, "loss": 0.2522, "step": 13259 }, { "epoch": 0.455044612216884, "grad_norm": 0.7864566629507205, "learning_rate": 5.965604493004604e-06, "loss": 0.3235, "step": 13260 }, { "epoch": 0.4550789293067948, "grad_norm": 0.7512396619283931, "learning_rate": 5.965059209830502e-06, "loss": 0.265, "step": 13261 }, { "epoch": 0.4551132463967056, "grad_norm": 0.779810191725951, "learning_rate": 5.9645139147342155e-06, "loss": 0.2691, "step": 13262 }, { "epoch": 0.45514756348661634, "grad_norm": 0.7751872102967838, "learning_rate": 5.963968607722487e-06, "loss": 0.2512, "step": 13263 }, { "epoch": 0.4551818805765271, "grad_norm": 0.7640011177206413, "learning_rate": 5.96342328880205e-06, "loss": 0.2946, "step": 13264 }, { "epoch": 0.4552161976664379, "grad_norm": 0.748826180333537, "learning_rate": 5.962877957979641e-06, "loss": 0.276, "step": 13265 }, { "epoch": 0.45525051475634865, "grad_norm": 0.7680552138746676, "learning_rate": 5.9623326152619965e-06, "loss": 0.2658, "step": 13266 }, { "epoch": 0.45528483184625945, "grad_norm": 0.79254028857603, "learning_rate": 5.961787260655856e-06, "loss": 0.3303, "step": 13267 }, { "epoch": 0.4553191489361702, "grad_norm": 0.7624932985569863, "learning_rate": 5.961241894167954e-06, "loss": 0.3193, "step": 13268 }, { "epoch": 0.455353466026081, "grad_norm": 0.7097058918610629, "learning_rate": 5.960696515805029e-06, "loss": 0.246, "step": 13269 }, { "epoch": 0.45538778311599176, "grad_norm": 0.8068066895557728, "learning_rate": 5.960151125573819e-06, "loss": 0.2792, "step": 13270 }, { "epoch": 0.4554221002059025, "grad_norm": 0.7864907238092267, "learning_rate": 5.9596057234810615e-06, "loss": 0.3005, "step": 13271 }, { "epoch": 0.4554564172958133, "grad_norm": 0.7642284517589525, "learning_rate": 5.959060309533495e-06, "loss": 0.2868, "step": 13272 }, { "epoch": 0.45549073438572407, "grad_norm": 0.6985511260737379, "learning_rate": 5.958514883737856e-06, "loss": 0.2176, "step": 13273 }, { "epoch": 0.4555250514756349, "grad_norm": 0.8182161734624545, "learning_rate": 5.9579694461008815e-06, "loss": 0.2886, "step": 13274 }, { "epoch": 0.45555936856554563, "grad_norm": 0.8172582384609792, "learning_rate": 5.9574239966293134e-06, "loss": 0.309, "step": 13275 }, { "epoch": 0.45559368565545644, "grad_norm": 0.8547408924087029, "learning_rate": 5.9568785353298866e-06, "loss": 0.2786, "step": 13276 }, { "epoch": 0.4556280027453672, "grad_norm": 1.0363665102327024, "learning_rate": 5.9563330622093416e-06, "loss": 0.311, "step": 13277 }, { "epoch": 0.455662319835278, "grad_norm": 0.8666512890977531, "learning_rate": 5.955787577274417e-06, "loss": 0.2626, "step": 13278 }, { "epoch": 0.45569663692518875, "grad_norm": 0.7540037789810234, "learning_rate": 5.9552420805318515e-06, "loss": 0.2642, "step": 13279 }, { "epoch": 0.4557309540150995, "grad_norm": 0.7289628369415264, "learning_rate": 5.954696571988383e-06, "loss": 0.2651, "step": 13280 }, { "epoch": 0.4557652711050103, "grad_norm": 0.7404586500016136, "learning_rate": 5.954151051650752e-06, "loss": 0.3011, "step": 13281 }, { "epoch": 0.45579958819492106, "grad_norm": 0.7432519251819993, "learning_rate": 5.953605519525697e-06, "loss": 0.3106, "step": 13282 }, { "epoch": 0.45583390528483186, "grad_norm": 0.7630465006436647, "learning_rate": 5.953059975619957e-06, "loss": 0.2848, "step": 13283 }, { "epoch": 0.4558682223747426, "grad_norm": 0.8408083906132309, "learning_rate": 5.952514419940272e-06, "loss": 0.2842, "step": 13284 }, { "epoch": 0.4559025394646534, "grad_norm": 0.706783939377672, "learning_rate": 5.951968852493384e-06, "loss": 0.2319, "step": 13285 }, { "epoch": 0.4559368565545642, "grad_norm": 0.7673436129175549, "learning_rate": 5.951423273286028e-06, "loss": 0.3046, "step": 13286 }, { "epoch": 0.4559711736444749, "grad_norm": 0.782155639043587, "learning_rate": 5.950877682324948e-06, "loss": 0.2988, "step": 13287 }, { "epoch": 0.45600549073438573, "grad_norm": 0.8931322253979653, "learning_rate": 5.950332079616884e-06, "loss": 0.2734, "step": 13288 }, { "epoch": 0.4560398078242965, "grad_norm": 0.8452968535694658, "learning_rate": 5.949786465168574e-06, "loss": 0.3111, "step": 13289 }, { "epoch": 0.4560741249142073, "grad_norm": 0.8288566489971304, "learning_rate": 5.9492408389867585e-06, "loss": 0.2726, "step": 13290 }, { "epoch": 0.45610844200411804, "grad_norm": 0.7384446683047381, "learning_rate": 5.9486952010781805e-06, "loss": 0.31, "step": 13291 }, { "epoch": 0.45614275909402885, "grad_norm": 0.735523727069428, "learning_rate": 5.948149551449579e-06, "loss": 0.2503, "step": 13292 }, { "epoch": 0.4561770761839396, "grad_norm": 0.7410526958889132, "learning_rate": 5.947603890107695e-06, "loss": 0.2798, "step": 13293 }, { "epoch": 0.45621139327385035, "grad_norm": 0.7320073425752878, "learning_rate": 5.947058217059269e-06, "loss": 0.2707, "step": 13294 }, { "epoch": 0.45624571036376116, "grad_norm": 0.8159348294700245, "learning_rate": 5.946512532311045e-06, "loss": 0.258, "step": 13295 }, { "epoch": 0.4562800274536719, "grad_norm": 0.8252383861175531, "learning_rate": 5.945966835869761e-06, "loss": 0.3804, "step": 13296 }, { "epoch": 0.4563143445435827, "grad_norm": 0.6504082766583551, "learning_rate": 5.945421127742159e-06, "loss": 0.3181, "step": 13297 }, { "epoch": 0.45634866163349347, "grad_norm": 0.7580321164489099, "learning_rate": 5.944875407934983e-06, "loss": 0.2695, "step": 13298 }, { "epoch": 0.4563829787234043, "grad_norm": 0.9906681470597002, "learning_rate": 5.944329676454971e-06, "loss": 0.268, "step": 13299 }, { "epoch": 0.456417295813315, "grad_norm": 0.8172783982993221, "learning_rate": 5.943783933308868e-06, "loss": 0.296, "step": 13300 }, { "epoch": 0.45645161290322583, "grad_norm": 0.7875562363834775, "learning_rate": 5.943238178503416e-06, "loss": 0.2597, "step": 13301 }, { "epoch": 0.4564859299931366, "grad_norm": 0.9158498438378733, "learning_rate": 5.942692412045353e-06, "loss": 0.3185, "step": 13302 }, { "epoch": 0.45652024708304734, "grad_norm": 0.6787049838126877, "learning_rate": 5.942146633941426e-06, "loss": 0.2983, "step": 13303 }, { "epoch": 0.45655456417295814, "grad_norm": 0.7571186817897304, "learning_rate": 5.941600844198376e-06, "loss": 0.2743, "step": 13304 }, { "epoch": 0.4565888812628689, "grad_norm": 0.731312253002976, "learning_rate": 5.941055042822945e-06, "loss": 0.2808, "step": 13305 }, { "epoch": 0.4566231983527797, "grad_norm": 0.8347225099111828, "learning_rate": 5.940509229821876e-06, "loss": 0.3315, "step": 13306 }, { "epoch": 0.45665751544269045, "grad_norm": 0.7700559025394873, "learning_rate": 5.9399634052019115e-06, "loss": 0.2254, "step": 13307 }, { "epoch": 0.45669183253260126, "grad_norm": 0.8096350053140916, "learning_rate": 5.9394175689697964e-06, "loss": 0.2635, "step": 13308 }, { "epoch": 0.456726149622512, "grad_norm": 0.7061604496282615, "learning_rate": 5.938871721132271e-06, "loss": 0.2738, "step": 13309 }, { "epoch": 0.45676046671242276, "grad_norm": 0.8323303417468919, "learning_rate": 5.938325861696082e-06, "loss": 0.3331, "step": 13310 }, { "epoch": 0.45679478380233357, "grad_norm": 0.8383517817971365, "learning_rate": 5.93777999066797e-06, "loss": 0.3425, "step": 13311 }, { "epoch": 0.4568291008922443, "grad_norm": 0.6519096067863912, "learning_rate": 5.9372341080546795e-06, "loss": 0.246, "step": 13312 }, { "epoch": 0.45686341798215513, "grad_norm": 0.7461136026182902, "learning_rate": 5.9366882138629535e-06, "loss": 0.255, "step": 13313 }, { "epoch": 0.4568977350720659, "grad_norm": 0.7618893490293243, "learning_rate": 5.936142308099537e-06, "loss": 0.2921, "step": 13314 }, { "epoch": 0.4569320521619767, "grad_norm": 0.7433827339200787, "learning_rate": 5.935596390771175e-06, "loss": 0.3572, "step": 13315 }, { "epoch": 0.45696636925188744, "grad_norm": 0.8353729671595296, "learning_rate": 5.935050461884608e-06, "loss": 0.3099, "step": 13316 }, { "epoch": 0.4570006863417982, "grad_norm": 0.7342120951713962, "learning_rate": 5.934504521446585e-06, "loss": 0.3192, "step": 13317 }, { "epoch": 0.457035003431709, "grad_norm": 0.7839311552249995, "learning_rate": 5.9339585694638465e-06, "loss": 0.2773, "step": 13318 }, { "epoch": 0.45706932052161975, "grad_norm": 0.7128050504385955, "learning_rate": 5.93341260594314e-06, "loss": 0.3373, "step": 13319 }, { "epoch": 0.45710363761153056, "grad_norm": 0.7219220111320891, "learning_rate": 5.932866630891209e-06, "loss": 0.2427, "step": 13320 }, { "epoch": 0.4571379547014413, "grad_norm": 0.6373561975304598, "learning_rate": 5.932320644314797e-06, "loss": 0.2688, "step": 13321 }, { "epoch": 0.4571722717913521, "grad_norm": 0.6894484118122719, "learning_rate": 5.9317746462206515e-06, "loss": 0.2439, "step": 13322 }, { "epoch": 0.45720658888126287, "grad_norm": 0.8239666650923663, "learning_rate": 5.931228636615517e-06, "loss": 0.2962, "step": 13323 }, { "epoch": 0.4572409059711736, "grad_norm": 0.8364726593681889, "learning_rate": 5.930682615506138e-06, "loss": 0.3184, "step": 13324 }, { "epoch": 0.4572752230610844, "grad_norm": 0.8010130688895902, "learning_rate": 5.930136582899261e-06, "loss": 0.2577, "step": 13325 }, { "epoch": 0.4573095401509952, "grad_norm": 0.7688875228977039, "learning_rate": 5.92959053880163e-06, "loss": 0.253, "step": 13326 }, { "epoch": 0.457343857240906, "grad_norm": 0.7637474662273336, "learning_rate": 5.929044483219992e-06, "loss": 0.2968, "step": 13327 }, { "epoch": 0.45737817433081673, "grad_norm": 0.8086246818153316, "learning_rate": 5.9284984161610915e-06, "loss": 0.2428, "step": 13328 }, { "epoch": 0.45741249142072754, "grad_norm": 0.8731636253334644, "learning_rate": 5.927952337631676e-06, "loss": 0.3235, "step": 13329 }, { "epoch": 0.4574468085106383, "grad_norm": 0.8303202761869013, "learning_rate": 5.927406247638493e-06, "loss": 0.2599, "step": 13330 }, { "epoch": 0.4574811256005491, "grad_norm": 0.8020044197886452, "learning_rate": 5.926860146188286e-06, "loss": 0.284, "step": 13331 }, { "epoch": 0.45751544269045985, "grad_norm": 0.8344200458565827, "learning_rate": 5.926314033287802e-06, "loss": 0.2947, "step": 13332 }, { "epoch": 0.4575497597803706, "grad_norm": 0.6938637625928905, "learning_rate": 5.925767908943789e-06, "loss": 0.2651, "step": 13333 }, { "epoch": 0.4575840768702814, "grad_norm": 0.7705420297011921, "learning_rate": 5.9252217731629926e-06, "loss": 0.2748, "step": 13334 }, { "epoch": 0.45761839396019216, "grad_norm": 0.7807690155095524, "learning_rate": 5.924675625952158e-06, "loss": 0.2783, "step": 13335 }, { "epoch": 0.45765271105010297, "grad_norm": 0.8117429988398148, "learning_rate": 5.924129467318037e-06, "loss": 0.2945, "step": 13336 }, { "epoch": 0.4576870281400137, "grad_norm": 0.840336486413433, "learning_rate": 5.923583297267372e-06, "loss": 0.2676, "step": 13337 }, { "epoch": 0.4577213452299245, "grad_norm": 0.6776174693696265, "learning_rate": 5.923037115806913e-06, "loss": 0.2813, "step": 13338 }, { "epoch": 0.4577556623198353, "grad_norm": 0.7894513435920295, "learning_rate": 5.922490922943406e-06, "loss": 0.303, "step": 13339 }, { "epoch": 0.45778997940974603, "grad_norm": 0.7909221326177426, "learning_rate": 5.9219447186836e-06, "loss": 0.3365, "step": 13340 }, { "epoch": 0.45782429649965684, "grad_norm": 0.7600833761716744, "learning_rate": 5.921398503034241e-06, "loss": 0.2849, "step": 13341 }, { "epoch": 0.4578586135895676, "grad_norm": 0.8401480306904371, "learning_rate": 5.920852276002078e-06, "loss": 0.4244, "step": 13342 }, { "epoch": 0.4578929306794784, "grad_norm": 0.7918461525179475, "learning_rate": 5.9203060375938595e-06, "loss": 0.3475, "step": 13343 }, { "epoch": 0.45792724776938915, "grad_norm": 0.8043214583734367, "learning_rate": 5.919759787816332e-06, "loss": 0.2855, "step": 13344 }, { "epoch": 0.45796156485929995, "grad_norm": 0.652750828131393, "learning_rate": 5.919213526676244e-06, "loss": 0.2449, "step": 13345 }, { "epoch": 0.4579958819492107, "grad_norm": 0.7698856862771231, "learning_rate": 5.918667254180347e-06, "loss": 0.2396, "step": 13346 }, { "epoch": 0.45803019903912146, "grad_norm": 0.708680430553582, "learning_rate": 5.918120970335385e-06, "loss": 0.2873, "step": 13347 }, { "epoch": 0.45806451612903226, "grad_norm": 0.6896628166500477, "learning_rate": 5.91757467514811e-06, "loss": 0.287, "step": 13348 }, { "epoch": 0.458098833218943, "grad_norm": 0.7356879446202352, "learning_rate": 5.917028368625269e-06, "loss": 0.3117, "step": 13349 }, { "epoch": 0.4581331503088538, "grad_norm": 0.7743128731793842, "learning_rate": 5.916482050773612e-06, "loss": 0.3131, "step": 13350 }, { "epoch": 0.45816746739876457, "grad_norm": 0.7651409926601702, "learning_rate": 5.9159357215998855e-06, "loss": 0.3446, "step": 13351 }, { "epoch": 0.4582017844886754, "grad_norm": 0.6911576643936962, "learning_rate": 5.915389381110846e-06, "loss": 0.2714, "step": 13352 }, { "epoch": 0.45823610157858613, "grad_norm": 0.9266782171353435, "learning_rate": 5.9148430293132334e-06, "loss": 0.3285, "step": 13353 }, { "epoch": 0.45827041866849694, "grad_norm": 2.5057536070842237, "learning_rate": 5.914296666213804e-06, "loss": 0.3738, "step": 13354 }, { "epoch": 0.4583047357584077, "grad_norm": 0.7392392499388399, "learning_rate": 5.913750291819304e-06, "loss": 0.2699, "step": 13355 }, { "epoch": 0.45833905284831844, "grad_norm": 0.7094739481039817, "learning_rate": 5.913203906136487e-06, "loss": 0.2837, "step": 13356 }, { "epoch": 0.45837336993822925, "grad_norm": 0.6344174875338827, "learning_rate": 5.912657509172097e-06, "loss": 0.234, "step": 13357 }, { "epoch": 0.45840768702814, "grad_norm": 0.7334017652765037, "learning_rate": 5.912111100932889e-06, "loss": 0.2743, "step": 13358 }, { "epoch": 0.4584420041180508, "grad_norm": 0.8705717148651291, "learning_rate": 5.911564681425612e-06, "loss": 0.3414, "step": 13359 }, { "epoch": 0.45847632120796156, "grad_norm": 0.6961158239286956, "learning_rate": 5.911018250657016e-06, "loss": 0.3572, "step": 13360 }, { "epoch": 0.45851063829787236, "grad_norm": 0.8773465356547873, "learning_rate": 5.910471808633851e-06, "loss": 0.2789, "step": 13361 }, { "epoch": 0.4585449553877831, "grad_norm": 0.786645560698439, "learning_rate": 5.90992535536287e-06, "loss": 0.3143, "step": 13362 }, { "epoch": 0.45857927247769387, "grad_norm": 0.7066421954945031, "learning_rate": 5.909378890850821e-06, "loss": 0.2778, "step": 13363 }, { "epoch": 0.4586135895676047, "grad_norm": 0.9316620530812459, "learning_rate": 5.9088324151044566e-06, "loss": 0.3061, "step": 13364 }, { "epoch": 0.4586479066575154, "grad_norm": 0.7733358259942549, "learning_rate": 5.908285928130526e-06, "loss": 0.3079, "step": 13365 }, { "epoch": 0.45868222374742623, "grad_norm": 0.8014718570282954, "learning_rate": 5.9077394299357825e-06, "loss": 0.3154, "step": 13366 }, { "epoch": 0.458716540837337, "grad_norm": 0.8441775264271953, "learning_rate": 5.907192920526976e-06, "loss": 0.3047, "step": 13367 }, { "epoch": 0.4587508579272478, "grad_norm": 0.6828931098993476, "learning_rate": 5.90664639991086e-06, "loss": 0.3048, "step": 13368 }, { "epoch": 0.45878517501715854, "grad_norm": 0.7804596646690223, "learning_rate": 5.906099868094185e-06, "loss": 0.2962, "step": 13369 }, { "epoch": 0.4588194921070693, "grad_norm": 0.6868407600793307, "learning_rate": 5.905553325083702e-06, "loss": 0.273, "step": 13370 }, { "epoch": 0.4588538091969801, "grad_norm": 0.7821635487461341, "learning_rate": 5.905006770886162e-06, "loss": 0.2918, "step": 13371 }, { "epoch": 0.45888812628689085, "grad_norm": 0.6760380848833304, "learning_rate": 5.90446020550832e-06, "loss": 0.2315, "step": 13372 }, { "epoch": 0.45892244337680166, "grad_norm": 0.7722370648088487, "learning_rate": 5.9039136289569255e-06, "loss": 0.2881, "step": 13373 }, { "epoch": 0.4589567604667124, "grad_norm": 0.8306166215804414, "learning_rate": 5.903367041238732e-06, "loss": 0.3003, "step": 13374 }, { "epoch": 0.4589910775566232, "grad_norm": 0.7602806523456169, "learning_rate": 5.902820442360494e-06, "loss": 0.2932, "step": 13375 }, { "epoch": 0.45902539464653397, "grad_norm": 0.7824469873429183, "learning_rate": 5.902273832328961e-06, "loss": 0.289, "step": 13376 }, { "epoch": 0.4590597117364448, "grad_norm": 0.7256745721148747, "learning_rate": 5.901727211150886e-06, "loss": 0.279, "step": 13377 }, { "epoch": 0.4590940288263555, "grad_norm": 0.7110774306422234, "learning_rate": 5.901180578833023e-06, "loss": 0.2873, "step": 13378 }, { "epoch": 0.4591283459162663, "grad_norm": 0.7671948370567072, "learning_rate": 5.900633935382125e-06, "loss": 0.313, "step": 13379 }, { "epoch": 0.4591626630061771, "grad_norm": 2.0095991872742145, "learning_rate": 5.900087280804945e-06, "loss": 0.2787, "step": 13380 }, { "epoch": 0.45919698009608784, "grad_norm": 0.7869077702485133, "learning_rate": 5.899540615108234e-06, "loss": 0.2373, "step": 13381 }, { "epoch": 0.45923129718599864, "grad_norm": 0.7412328690366494, "learning_rate": 5.898993938298749e-06, "loss": 0.3396, "step": 13382 }, { "epoch": 0.4592656142759094, "grad_norm": 0.7623731646858686, "learning_rate": 5.898447250383242e-06, "loss": 0.3026, "step": 13383 }, { "epoch": 0.4592999313658202, "grad_norm": 0.8029762069777956, "learning_rate": 5.897900551368468e-06, "loss": 0.2742, "step": 13384 }, { "epoch": 0.45933424845573095, "grad_norm": 0.8721578293235447, "learning_rate": 5.897353841261178e-06, "loss": 0.316, "step": 13385 }, { "epoch": 0.4593685655456417, "grad_norm": 0.8526326953839888, "learning_rate": 5.896807120068127e-06, "loss": 0.2818, "step": 13386 }, { "epoch": 0.4594028826355525, "grad_norm": 0.7793480510011495, "learning_rate": 5.896260387796071e-06, "loss": 0.2952, "step": 13387 }, { "epoch": 0.45943719972546326, "grad_norm": 0.7543039223871125, "learning_rate": 5.895713644451762e-06, "loss": 0.293, "step": 13388 }, { "epoch": 0.45947151681537407, "grad_norm": 0.8486586693665885, "learning_rate": 5.895166890041954e-06, "loss": 0.3381, "step": 13389 }, { "epoch": 0.4595058339052848, "grad_norm": 0.7529711181912246, "learning_rate": 5.894620124573405e-06, "loss": 0.275, "step": 13390 }, { "epoch": 0.45954015099519563, "grad_norm": 0.7245908726784541, "learning_rate": 5.894073348052867e-06, "loss": 0.3293, "step": 13391 }, { "epoch": 0.4595744680851064, "grad_norm": 0.7631517782825051, "learning_rate": 5.893526560487095e-06, "loss": 0.3333, "step": 13392 }, { "epoch": 0.45960878517501713, "grad_norm": 0.7827943957621492, "learning_rate": 5.892979761882843e-06, "loss": 0.26, "step": 13393 }, { "epoch": 0.45964310226492794, "grad_norm": 0.802762475765438, "learning_rate": 5.892432952246869e-06, "loss": 0.2867, "step": 13394 }, { "epoch": 0.4596774193548387, "grad_norm": 0.744717665973634, "learning_rate": 5.891886131585924e-06, "loss": 0.2739, "step": 13395 }, { "epoch": 0.4597117364447495, "grad_norm": 0.7497453299764042, "learning_rate": 5.891339299906767e-06, "loss": 0.2857, "step": 13396 }, { "epoch": 0.45974605353466025, "grad_norm": 0.8051205423700992, "learning_rate": 5.890792457216152e-06, "loss": 0.3112, "step": 13397 }, { "epoch": 0.45978037062457106, "grad_norm": 0.6973868513332846, "learning_rate": 5.890245603520836e-06, "loss": 0.3045, "step": 13398 }, { "epoch": 0.4598146877144818, "grad_norm": 0.8175596599146149, "learning_rate": 5.8896987388275715e-06, "loss": 0.3112, "step": 13399 }, { "epoch": 0.4598490048043926, "grad_norm": 0.9283069127373802, "learning_rate": 5.889151863143119e-06, "loss": 0.2639, "step": 13400 }, { "epoch": 0.45988332189430337, "grad_norm": 0.7888565232756137, "learning_rate": 5.888604976474229e-06, "loss": 0.3022, "step": 13401 }, { "epoch": 0.4599176389842141, "grad_norm": 0.8661650646655153, "learning_rate": 5.888058078827662e-06, "loss": 0.2972, "step": 13402 }, { "epoch": 0.4599519560741249, "grad_norm": 0.7963807417091957, "learning_rate": 5.887511170210171e-06, "loss": 0.26, "step": 13403 }, { "epoch": 0.4599862731640357, "grad_norm": 0.8115271077662328, "learning_rate": 5.8869642506285165e-06, "loss": 0.309, "step": 13404 }, { "epoch": 0.4600205902539465, "grad_norm": 0.8096728108665702, "learning_rate": 5.886417320089451e-06, "loss": 0.36, "step": 13405 }, { "epoch": 0.46005490734385723, "grad_norm": 0.7646192857320583, "learning_rate": 5.8858703785997355e-06, "loss": 0.2768, "step": 13406 }, { "epoch": 0.46008922443376804, "grad_norm": 0.7262976504900968, "learning_rate": 5.885323426166123e-06, "loss": 0.2973, "step": 13407 }, { "epoch": 0.4601235415236788, "grad_norm": 0.7536713427841703, "learning_rate": 5.884776462795371e-06, "loss": 0.2517, "step": 13408 }, { "epoch": 0.46015785861358954, "grad_norm": 0.7598431993746365, "learning_rate": 5.8842294884942375e-06, "loss": 0.2628, "step": 13409 }, { "epoch": 0.46019217570350035, "grad_norm": 0.8349180634807589, "learning_rate": 5.88368250326948e-06, "loss": 0.2578, "step": 13410 }, { "epoch": 0.4602264927934111, "grad_norm": 0.7630017976022866, "learning_rate": 5.883135507127855e-06, "loss": 0.2763, "step": 13411 }, { "epoch": 0.4602608098833219, "grad_norm": 0.7045359357521691, "learning_rate": 5.882588500076121e-06, "loss": 0.2625, "step": 13412 }, { "epoch": 0.46029512697323266, "grad_norm": 0.8331835705371998, "learning_rate": 5.882041482121036e-06, "loss": 0.2904, "step": 13413 }, { "epoch": 0.46032944406314347, "grad_norm": 0.7956306157676486, "learning_rate": 5.881494453269355e-06, "loss": 0.3008, "step": 13414 }, { "epoch": 0.4603637611530542, "grad_norm": 0.7689429017302953, "learning_rate": 5.8809474135278386e-06, "loss": 0.2827, "step": 13415 }, { "epoch": 0.46039807824296497, "grad_norm": 0.6860796248240382, "learning_rate": 5.8804003629032445e-06, "loss": 0.3088, "step": 13416 }, { "epoch": 0.4604323953328758, "grad_norm": 0.7574088290321312, "learning_rate": 5.879853301402328e-06, "loss": 0.3059, "step": 13417 }, { "epoch": 0.46046671242278653, "grad_norm": 0.7815324017145128, "learning_rate": 5.879306229031853e-06, "loss": 0.3054, "step": 13418 }, { "epoch": 0.46050102951269734, "grad_norm": 0.819241864217177, "learning_rate": 5.878759145798572e-06, "loss": 0.356, "step": 13419 }, { "epoch": 0.4605353466026081, "grad_norm": 0.6694812552696887, "learning_rate": 5.878212051709249e-06, "loss": 0.2594, "step": 13420 }, { "epoch": 0.4605696636925189, "grad_norm": 0.8686731310427932, "learning_rate": 5.877664946770638e-06, "loss": 0.2687, "step": 13421 }, { "epoch": 0.46060398078242965, "grad_norm": 0.6737245359686206, "learning_rate": 5.8771178309895e-06, "loss": 0.2886, "step": 13422 }, { "epoch": 0.46063829787234045, "grad_norm": 0.872095952288879, "learning_rate": 5.876570704372595e-06, "loss": 0.3334, "step": 13423 }, { "epoch": 0.4606726149622512, "grad_norm": 0.7476782258593475, "learning_rate": 5.876023566926681e-06, "loss": 0.271, "step": 13424 }, { "epoch": 0.46070693205216195, "grad_norm": 0.7497327971291916, "learning_rate": 5.875476418658515e-06, "loss": 0.2635, "step": 13425 }, { "epoch": 0.46074124914207276, "grad_norm": 0.8257023281136741, "learning_rate": 5.87492925957486e-06, "loss": 0.2972, "step": 13426 }, { "epoch": 0.4607755662319835, "grad_norm": 0.7744332366662563, "learning_rate": 5.874382089682473e-06, "loss": 0.3172, "step": 13427 }, { "epoch": 0.4608098833218943, "grad_norm": 0.7259175593256759, "learning_rate": 5.873834908988116e-06, "loss": 0.2453, "step": 13428 }, { "epoch": 0.46084420041180507, "grad_norm": 0.7850778553850801, "learning_rate": 5.873287717498548e-06, "loss": 0.3029, "step": 13429 }, { "epoch": 0.4608785175017159, "grad_norm": 0.88109272537274, "learning_rate": 5.872740515220528e-06, "loss": 0.3627, "step": 13430 }, { "epoch": 0.46091283459162663, "grad_norm": 0.8225994933903429, "learning_rate": 5.872193302160816e-06, "loss": 0.2624, "step": 13431 }, { "epoch": 0.4609471516815374, "grad_norm": 0.654751778949846, "learning_rate": 5.8716460783261715e-06, "loss": 0.2637, "step": 13432 }, { "epoch": 0.4609814687714482, "grad_norm": 0.8848434966127758, "learning_rate": 5.871098843723357e-06, "loss": 0.2964, "step": 13433 }, { "epoch": 0.46101578586135894, "grad_norm": 0.7357484787474331, "learning_rate": 5.870551598359132e-06, "loss": 0.2986, "step": 13434 }, { "epoch": 0.46105010295126975, "grad_norm": 0.7578033594700436, "learning_rate": 5.870004342240256e-06, "loss": 0.2944, "step": 13435 }, { "epoch": 0.4610844200411805, "grad_norm": 0.7843522506321176, "learning_rate": 5.869457075373492e-06, "loss": 0.3195, "step": 13436 }, { "epoch": 0.4611187371310913, "grad_norm": 0.7610335123794046, "learning_rate": 5.8689097977656e-06, "loss": 0.2679, "step": 13437 }, { "epoch": 0.46115305422100206, "grad_norm": 0.7459178216298732, "learning_rate": 5.86836250942334e-06, "loss": 0.2538, "step": 13438 }, { "epoch": 0.4611873713109128, "grad_norm": 0.829817130506213, "learning_rate": 5.867815210353473e-06, "loss": 0.3228, "step": 13439 }, { "epoch": 0.4612216884008236, "grad_norm": 0.7650060345179391, "learning_rate": 5.86726790056276e-06, "loss": 0.3068, "step": 13440 }, { "epoch": 0.46125600549073437, "grad_norm": 0.7234126958686156, "learning_rate": 5.866720580057964e-06, "loss": 0.2965, "step": 13441 }, { "epoch": 0.4612903225806452, "grad_norm": 0.8368443565470302, "learning_rate": 5.866173248845848e-06, "loss": 0.3388, "step": 13442 }, { "epoch": 0.4613246396705559, "grad_norm": 0.9780896550329674, "learning_rate": 5.8656259069331685e-06, "loss": 0.2754, "step": 13443 }, { "epoch": 0.46135895676046673, "grad_norm": 0.7370071450308945, "learning_rate": 5.865078554326691e-06, "loss": 0.2562, "step": 13444 }, { "epoch": 0.4613932738503775, "grad_norm": 0.7732435784161339, "learning_rate": 5.864531191033177e-06, "loss": 0.2587, "step": 13445 }, { "epoch": 0.4614275909402883, "grad_norm": 0.7187956687776914, "learning_rate": 5.863983817059387e-06, "loss": 0.308, "step": 13446 }, { "epoch": 0.46146190803019904, "grad_norm": 0.8612256185588407, "learning_rate": 5.863436432412085e-06, "loss": 0.3686, "step": 13447 }, { "epoch": 0.4614962251201098, "grad_norm": 0.7761524632129222, "learning_rate": 5.8628890370980315e-06, "loss": 0.2817, "step": 13448 }, { "epoch": 0.4615305422100206, "grad_norm": 0.7879570230853015, "learning_rate": 5.862341631123993e-06, "loss": 0.2745, "step": 13449 }, { "epoch": 0.46156485929993135, "grad_norm": 0.7373004912142731, "learning_rate": 5.8617942144967254e-06, "loss": 0.2549, "step": 13450 }, { "epoch": 0.46159917638984216, "grad_norm": 0.7207829429110373, "learning_rate": 5.861246787222997e-06, "loss": 0.2962, "step": 13451 }, { "epoch": 0.4616334934797529, "grad_norm": 0.7062543292797179, "learning_rate": 5.860699349309569e-06, "loss": 0.2974, "step": 13452 }, { "epoch": 0.4616678105696637, "grad_norm": 0.8223126602433213, "learning_rate": 5.860151900763204e-06, "loss": 0.2959, "step": 13453 }, { "epoch": 0.46170212765957447, "grad_norm": 0.7524166264431822, "learning_rate": 5.8596044415906624e-06, "loss": 0.3019, "step": 13454 }, { "epoch": 0.4617364447494852, "grad_norm": 0.7959935489074802, "learning_rate": 5.859056971798713e-06, "loss": 0.308, "step": 13455 }, { "epoch": 0.461770761839396, "grad_norm": 0.6769883994727188, "learning_rate": 5.858509491394116e-06, "loss": 0.2744, "step": 13456 }, { "epoch": 0.4618050789293068, "grad_norm": 0.7654155548609084, "learning_rate": 5.857962000383634e-06, "loss": 0.2879, "step": 13457 }, { "epoch": 0.4618393960192176, "grad_norm": 0.837716187936775, "learning_rate": 5.857414498774032e-06, "loss": 0.3047, "step": 13458 }, { "epoch": 0.46187371310912834, "grad_norm": 0.7257510215389923, "learning_rate": 5.856866986572074e-06, "loss": 0.2705, "step": 13459 }, { "epoch": 0.46190803019903914, "grad_norm": 0.8500329991803808, "learning_rate": 5.856319463784523e-06, "loss": 0.2864, "step": 13460 }, { "epoch": 0.4619423472889499, "grad_norm": 0.6820166551936344, "learning_rate": 5.855771930418145e-06, "loss": 0.2741, "step": 13461 }, { "epoch": 0.46197666437886065, "grad_norm": 0.8529647005471533, "learning_rate": 5.8552243864797e-06, "loss": 0.3397, "step": 13462 }, { "epoch": 0.46201098146877145, "grad_norm": 0.8557143711607764, "learning_rate": 5.854676831975955e-06, "loss": 0.2547, "step": 13463 }, { "epoch": 0.4620452985586822, "grad_norm": 0.7992812634066863, "learning_rate": 5.8541292669136754e-06, "loss": 0.3028, "step": 13464 }, { "epoch": 0.462079615648593, "grad_norm": 0.7468731883992971, "learning_rate": 5.853581691299626e-06, "loss": 0.3178, "step": 13465 }, { "epoch": 0.46211393273850376, "grad_norm": 0.6929253766020365, "learning_rate": 5.853034105140567e-06, "loss": 0.2719, "step": 13466 }, { "epoch": 0.46214824982841457, "grad_norm": 0.801377371491967, "learning_rate": 5.852486508443267e-06, "loss": 0.2876, "step": 13467 }, { "epoch": 0.4621825669183253, "grad_norm": 0.7639173332339503, "learning_rate": 5.851938901214493e-06, "loss": 0.3019, "step": 13468 }, { "epoch": 0.46221688400823613, "grad_norm": 0.8005013107811578, "learning_rate": 5.851391283461003e-06, "loss": 0.2842, "step": 13469 }, { "epoch": 0.4622512010981469, "grad_norm": 0.7112940066085919, "learning_rate": 5.850843655189567e-06, "loss": 0.299, "step": 13470 }, { "epoch": 0.46228551818805763, "grad_norm": 0.7480205814548732, "learning_rate": 5.850296016406951e-06, "loss": 0.3005, "step": 13471 }, { "epoch": 0.46231983527796844, "grad_norm": 0.7952449811580655, "learning_rate": 5.849748367119919e-06, "loss": 0.2833, "step": 13472 }, { "epoch": 0.4623541523678792, "grad_norm": 0.742342092881182, "learning_rate": 5.849200707335236e-06, "loss": 0.2689, "step": 13473 }, { "epoch": 0.46238846945779, "grad_norm": 0.7424283684205057, "learning_rate": 5.8486530370596695e-06, "loss": 0.2565, "step": 13474 }, { "epoch": 0.46242278654770075, "grad_norm": 0.7366543581110274, "learning_rate": 5.848105356299984e-06, "loss": 0.3052, "step": 13475 }, { "epoch": 0.46245710363761156, "grad_norm": 1.0197501154242112, "learning_rate": 5.847557665062944e-06, "loss": 0.306, "step": 13476 }, { "epoch": 0.4624914207275223, "grad_norm": 0.9072113671297781, "learning_rate": 5.8470099633553175e-06, "loss": 0.2732, "step": 13477 }, { "epoch": 0.46252573781743306, "grad_norm": 0.7140931090257996, "learning_rate": 5.846462251183871e-06, "loss": 0.3247, "step": 13478 }, { "epoch": 0.46256005490734386, "grad_norm": 0.8002817136327798, "learning_rate": 5.845914528555371e-06, "loss": 0.2842, "step": 13479 }, { "epoch": 0.4625943719972546, "grad_norm": 0.6981988816819683, "learning_rate": 5.845366795476581e-06, "loss": 0.3089, "step": 13480 }, { "epoch": 0.4626286890871654, "grad_norm": 0.7516330854711879, "learning_rate": 5.844819051954273e-06, "loss": 0.2941, "step": 13481 }, { "epoch": 0.4626630061770762, "grad_norm": 0.7081025638185068, "learning_rate": 5.844271297995209e-06, "loss": 0.2762, "step": 13482 }, { "epoch": 0.462697323266987, "grad_norm": 0.803512711544217, "learning_rate": 5.843723533606157e-06, "loss": 0.2779, "step": 13483 }, { "epoch": 0.46273164035689773, "grad_norm": 0.7346765155014162, "learning_rate": 5.843175758793886e-06, "loss": 0.2776, "step": 13484 }, { "epoch": 0.4627659574468085, "grad_norm": 0.7878677969252081, "learning_rate": 5.842627973565159e-06, "loss": 0.3082, "step": 13485 }, { "epoch": 0.4628002745367193, "grad_norm": 0.7007919133521133, "learning_rate": 5.842080177926748e-06, "loss": 0.2326, "step": 13486 }, { "epoch": 0.46283459162663004, "grad_norm": 0.7498783905585783, "learning_rate": 5.841532371885418e-06, "loss": 0.3118, "step": 13487 }, { "epoch": 0.46286890871654085, "grad_norm": 0.8049088744442195, "learning_rate": 5.840984555447935e-06, "loss": 0.2825, "step": 13488 }, { "epoch": 0.4629032258064516, "grad_norm": 0.7105347754373454, "learning_rate": 5.840436728621069e-06, "loss": 0.2829, "step": 13489 }, { "epoch": 0.4629375428963624, "grad_norm": 0.8060275000910366, "learning_rate": 5.839888891411589e-06, "loss": 0.2666, "step": 13490 }, { "epoch": 0.46297185998627316, "grad_norm": 0.8779273605565915, "learning_rate": 5.839341043826259e-06, "loss": 0.26, "step": 13491 }, { "epoch": 0.46300617707618397, "grad_norm": 0.7356334016791332, "learning_rate": 5.8387931858718485e-06, "loss": 0.2742, "step": 13492 }, { "epoch": 0.4630404941660947, "grad_norm": 0.770708033149195, "learning_rate": 5.838245317555127e-06, "loss": 0.2581, "step": 13493 }, { "epoch": 0.46307481125600547, "grad_norm": 0.7720774617251157, "learning_rate": 5.837697438882862e-06, "loss": 0.2916, "step": 13494 }, { "epoch": 0.4631091283459163, "grad_norm": 0.6394409821142041, "learning_rate": 5.837149549861822e-06, "loss": 0.2598, "step": 13495 }, { "epoch": 0.46314344543582703, "grad_norm": 0.7980203834425762, "learning_rate": 5.8366016504987745e-06, "loss": 0.2548, "step": 13496 }, { "epoch": 0.46317776252573784, "grad_norm": 0.8087119744869654, "learning_rate": 5.83605374080049e-06, "loss": 0.2768, "step": 13497 }, { "epoch": 0.4632120796156486, "grad_norm": 0.8800076887208728, "learning_rate": 5.835505820773736e-06, "loss": 0.3577, "step": 13498 }, { "epoch": 0.4632463967055594, "grad_norm": 0.7031622635562693, "learning_rate": 5.834957890425281e-06, "loss": 0.2352, "step": 13499 }, { "epoch": 0.46328071379547014, "grad_norm": 0.8164670748313875, "learning_rate": 5.834409949761896e-06, "loss": 0.3157, "step": 13500 }, { "epoch": 0.4633150308853809, "grad_norm": 0.8339926793037011, "learning_rate": 5.833861998790347e-06, "loss": 0.2975, "step": 13501 }, { "epoch": 0.4633493479752917, "grad_norm": 0.793223946325621, "learning_rate": 5.8333140375174055e-06, "loss": 0.3284, "step": 13502 }, { "epoch": 0.46338366506520245, "grad_norm": 0.8045565414982219, "learning_rate": 5.832766065949843e-06, "loss": 0.3283, "step": 13503 }, { "epoch": 0.46341798215511326, "grad_norm": 0.8089626693963768, "learning_rate": 5.832218084094425e-06, "loss": 0.2777, "step": 13504 }, { "epoch": 0.463452299245024, "grad_norm": 0.6891062541981288, "learning_rate": 5.831670091957923e-06, "loss": 0.2736, "step": 13505 }, { "epoch": 0.4634866163349348, "grad_norm": 0.7183336309874432, "learning_rate": 5.831122089547107e-06, "loss": 0.2767, "step": 13506 }, { "epoch": 0.46352093342484557, "grad_norm": 0.7843779199564801, "learning_rate": 5.830574076868745e-06, "loss": 0.3106, "step": 13507 }, { "epoch": 0.4635552505147563, "grad_norm": 0.7203950893046926, "learning_rate": 5.83002605392961e-06, "loss": 0.2961, "step": 13508 }, { "epoch": 0.46358956760466713, "grad_norm": 0.7963744693546413, "learning_rate": 5.829478020736471e-06, "loss": 0.3016, "step": 13509 }, { "epoch": 0.4636238846945779, "grad_norm": 0.8430207053187437, "learning_rate": 5.828929977296098e-06, "loss": 0.2532, "step": 13510 }, { "epoch": 0.4636582017844887, "grad_norm": 0.706623131240151, "learning_rate": 5.828381923615261e-06, "loss": 0.2522, "step": 13511 }, { "epoch": 0.46369251887439944, "grad_norm": 0.7263549857099295, "learning_rate": 5.827833859700731e-06, "loss": 0.302, "step": 13512 }, { "epoch": 0.46372683596431025, "grad_norm": 0.7863301066201348, "learning_rate": 5.82728578555928e-06, "loss": 0.2454, "step": 13513 }, { "epoch": 0.463761153054221, "grad_norm": 0.7994757924534629, "learning_rate": 5.826737701197675e-06, "loss": 0.3176, "step": 13514 }, { "epoch": 0.4637954701441318, "grad_norm": 0.9432562234970802, "learning_rate": 5.826189606622692e-06, "loss": 0.2739, "step": 13515 }, { "epoch": 0.46382978723404256, "grad_norm": 0.7606362501835029, "learning_rate": 5.825641501841101e-06, "loss": 0.3224, "step": 13516 }, { "epoch": 0.4638641043239533, "grad_norm": 0.7510556014416493, "learning_rate": 5.82509338685967e-06, "loss": 0.2993, "step": 13517 }, { "epoch": 0.4638984214138641, "grad_norm": 0.7966948742589619, "learning_rate": 5.8245452616851705e-06, "loss": 0.2676, "step": 13518 }, { "epoch": 0.46393273850377487, "grad_norm": 0.8583077497873857, "learning_rate": 5.8239971263243785e-06, "loss": 0.3166, "step": 13519 }, { "epoch": 0.4639670555936857, "grad_norm": 0.804394552659888, "learning_rate": 5.823448980784061e-06, "loss": 0.2959, "step": 13520 }, { "epoch": 0.4640013726835964, "grad_norm": 0.7398956152106352, "learning_rate": 5.822900825070992e-06, "loss": 0.2698, "step": 13521 }, { "epoch": 0.46403568977350723, "grad_norm": 0.7153201041790781, "learning_rate": 5.8223526591919434e-06, "loss": 0.3055, "step": 13522 }, { "epoch": 0.464070006863418, "grad_norm": 0.8550149623641996, "learning_rate": 5.821804483153685e-06, "loss": 0.3003, "step": 13523 }, { "epoch": 0.46410432395332873, "grad_norm": 0.7673099657993322, "learning_rate": 5.82125629696299e-06, "loss": 0.3238, "step": 13524 }, { "epoch": 0.46413864104323954, "grad_norm": 0.7207833143157006, "learning_rate": 5.8207081006266335e-06, "loss": 0.2552, "step": 13525 }, { "epoch": 0.4641729581331503, "grad_norm": 0.7446483482632328, "learning_rate": 5.820159894151384e-06, "loss": 0.3404, "step": 13526 }, { "epoch": 0.4642072752230611, "grad_norm": 1.7981772535697216, "learning_rate": 5.819611677544014e-06, "loss": 0.2168, "step": 13527 }, { "epoch": 0.46424159231297185, "grad_norm": 0.7296866099534459, "learning_rate": 5.819063450811298e-06, "loss": 0.2723, "step": 13528 }, { "epoch": 0.46427590940288266, "grad_norm": 1.2655038801209284, "learning_rate": 5.818515213960008e-06, "loss": 0.3109, "step": 13529 }, { "epoch": 0.4643102264927934, "grad_norm": 0.7354504732481794, "learning_rate": 5.817966966996918e-06, "loss": 0.293, "step": 13530 }, { "epoch": 0.46434454358270416, "grad_norm": 0.7167338939734126, "learning_rate": 5.8174187099287974e-06, "loss": 0.2521, "step": 13531 }, { "epoch": 0.46437886067261497, "grad_norm": 0.7392691466997918, "learning_rate": 5.816870442762425e-06, "loss": 0.2426, "step": 13532 }, { "epoch": 0.4644131777625257, "grad_norm": 0.7975005833402584, "learning_rate": 5.816322165504568e-06, "loss": 0.3121, "step": 13533 }, { "epoch": 0.4644474948524365, "grad_norm": 0.9224510741117042, "learning_rate": 5.815773878162003e-06, "loss": 0.2485, "step": 13534 }, { "epoch": 0.4644818119423473, "grad_norm": 0.6761043717532693, "learning_rate": 5.8152255807415035e-06, "loss": 0.2629, "step": 13535 }, { "epoch": 0.4645161290322581, "grad_norm": 0.7512254195205231, "learning_rate": 5.814677273249842e-06, "loss": 0.2483, "step": 13536 }, { "epoch": 0.46455044612216884, "grad_norm": 0.7722837562751157, "learning_rate": 5.81412895569379e-06, "loss": 0.4082, "step": 13537 }, { "epoch": 0.46458476321207964, "grad_norm": 0.8337863323548401, "learning_rate": 5.813580628080129e-06, "loss": 0.2913, "step": 13538 }, { "epoch": 0.4646190803019904, "grad_norm": 0.8166170662850162, "learning_rate": 5.813032290415625e-06, "loss": 0.2893, "step": 13539 }, { "epoch": 0.46465339739190115, "grad_norm": 0.7410696681880217, "learning_rate": 5.812483942707054e-06, "loss": 0.3148, "step": 13540 }, { "epoch": 0.46468771448181195, "grad_norm": 0.7669421334883919, "learning_rate": 5.811935584961193e-06, "loss": 0.4106, "step": 13541 }, { "epoch": 0.4647220315717227, "grad_norm": 0.7423225954962681, "learning_rate": 5.811387217184814e-06, "loss": 0.293, "step": 13542 }, { "epoch": 0.4647563486616335, "grad_norm": 0.7428736170391558, "learning_rate": 5.810838839384693e-06, "loss": 0.2849, "step": 13543 }, { "epoch": 0.46479066575154426, "grad_norm": 0.8740569726137207, "learning_rate": 5.810290451567602e-06, "loss": 0.2539, "step": 13544 }, { "epoch": 0.46482498284145507, "grad_norm": 0.8149444577649073, "learning_rate": 5.809742053740317e-06, "loss": 0.2844, "step": 13545 }, { "epoch": 0.4648592999313658, "grad_norm": 0.6596268012804426, "learning_rate": 5.809193645909613e-06, "loss": 0.2582, "step": 13546 }, { "epoch": 0.4648936170212766, "grad_norm": 0.8085667910052281, "learning_rate": 5.808645228082265e-06, "loss": 0.3407, "step": 13547 }, { "epoch": 0.4649279341111874, "grad_norm": 0.7195440816058721, "learning_rate": 5.80809680026505e-06, "loss": 0.249, "step": 13548 }, { "epoch": 0.46496225120109813, "grad_norm": 0.8390535439688517, "learning_rate": 5.807548362464739e-06, "loss": 0.2935, "step": 13549 }, { "epoch": 0.46499656829100894, "grad_norm": 0.8263760893764347, "learning_rate": 5.806999914688109e-06, "loss": 0.3131, "step": 13550 }, { "epoch": 0.4650308853809197, "grad_norm": 0.6426868655780908, "learning_rate": 5.806451456941937e-06, "loss": 0.2583, "step": 13551 }, { "epoch": 0.4650652024708305, "grad_norm": 0.6936606424356043, "learning_rate": 5.805902989232998e-06, "loss": 0.3184, "step": 13552 }, { "epoch": 0.46509951956074125, "grad_norm": 0.9327174183485926, "learning_rate": 5.805354511568066e-06, "loss": 0.3212, "step": 13553 }, { "epoch": 0.465133836650652, "grad_norm": 0.7413291204613663, "learning_rate": 5.8048060239539194e-06, "loss": 0.2819, "step": 13554 }, { "epoch": 0.4651681537405628, "grad_norm": 0.6911406225473365, "learning_rate": 5.804257526397332e-06, "loss": 0.2191, "step": 13555 }, { "epoch": 0.46520247083047356, "grad_norm": 0.7209581741871904, "learning_rate": 5.8037090189050804e-06, "loss": 0.2366, "step": 13556 }, { "epoch": 0.46523678792038436, "grad_norm": 0.7135859919817936, "learning_rate": 5.803160501483942e-06, "loss": 0.3202, "step": 13557 }, { "epoch": 0.4652711050102951, "grad_norm": 0.7853615441460211, "learning_rate": 5.80261197414069e-06, "loss": 0.3016, "step": 13558 }, { "epoch": 0.4653054221002059, "grad_norm": 0.7930768493043114, "learning_rate": 5.8020634368821025e-06, "loss": 0.2836, "step": 13559 }, { "epoch": 0.4653397391901167, "grad_norm": 0.7761709189201006, "learning_rate": 5.801514889714957e-06, "loss": 0.2706, "step": 13560 }, { "epoch": 0.4653740562800274, "grad_norm": 0.6243575104213507, "learning_rate": 5.800966332646032e-06, "loss": 0.2412, "step": 13561 }, { "epoch": 0.46540837336993823, "grad_norm": 0.722335740768954, "learning_rate": 5.800417765682099e-06, "loss": 0.3086, "step": 13562 }, { "epoch": 0.465442690459849, "grad_norm": 0.7402324792693912, "learning_rate": 5.799869188829939e-06, "loss": 0.2702, "step": 13563 }, { "epoch": 0.4654770075497598, "grad_norm": 0.789767594456331, "learning_rate": 5.799320602096328e-06, "loss": 0.3091, "step": 13564 }, { "epoch": 0.46551132463967054, "grad_norm": 0.8099636517232134, "learning_rate": 5.798772005488043e-06, "loss": 0.2961, "step": 13565 }, { "epoch": 0.46554564172958135, "grad_norm": 0.8267918844915204, "learning_rate": 5.79822339901186e-06, "loss": 0.299, "step": 13566 }, { "epoch": 0.4655799588194921, "grad_norm": 0.8143166744979132, "learning_rate": 5.797674782674557e-06, "loss": 0.3122, "step": 13567 }, { "epoch": 0.4656142759094029, "grad_norm": 0.7265104381776497, "learning_rate": 5.7971261564829125e-06, "loss": 0.2528, "step": 13568 }, { "epoch": 0.46564859299931366, "grad_norm": 0.7390676192788425, "learning_rate": 5.796577520443705e-06, "loss": 0.26, "step": 13569 }, { "epoch": 0.4656829100892244, "grad_norm": 0.8016552296123803, "learning_rate": 5.79602887456371e-06, "loss": 0.3091, "step": 13570 }, { "epoch": 0.4657172271791352, "grad_norm": 0.7716286577235548, "learning_rate": 5.795480218849706e-06, "loss": 0.3585, "step": 13571 }, { "epoch": 0.46575154426904597, "grad_norm": 0.7915170891662074, "learning_rate": 5.794931553308471e-06, "loss": 0.3012, "step": 13572 }, { "epoch": 0.4657858613589568, "grad_norm": 0.7603640973193885, "learning_rate": 5.794382877946785e-06, "loss": 0.2663, "step": 13573 }, { "epoch": 0.46582017844886753, "grad_norm": 0.8361362326330407, "learning_rate": 5.793834192771424e-06, "loss": 0.2938, "step": 13574 }, { "epoch": 0.46585449553877833, "grad_norm": 0.7841782429545168, "learning_rate": 5.7932854977891665e-06, "loss": 0.2541, "step": 13575 }, { "epoch": 0.4658888126286891, "grad_norm": 0.7909324437527406, "learning_rate": 5.792736793006793e-06, "loss": 0.2827, "step": 13576 }, { "epoch": 0.46592312971859984, "grad_norm": 0.767012880184132, "learning_rate": 5.792188078431079e-06, "loss": 0.257, "step": 13577 }, { "epoch": 0.46595744680851064, "grad_norm": 0.7787437832007762, "learning_rate": 5.791639354068805e-06, "loss": 0.3446, "step": 13578 }, { "epoch": 0.4659917638984214, "grad_norm": 0.7297317046948867, "learning_rate": 5.791090619926752e-06, "loss": 0.3194, "step": 13579 }, { "epoch": 0.4660260809883322, "grad_norm": 0.8639378883003777, "learning_rate": 5.790541876011695e-06, "loss": 0.3304, "step": 13580 }, { "epoch": 0.46606039807824295, "grad_norm": 0.6920372852630938, "learning_rate": 5.789993122330414e-06, "loss": 0.2814, "step": 13581 }, { "epoch": 0.46609471516815376, "grad_norm": 0.7785922919868502, "learning_rate": 5.78944435888969e-06, "loss": 0.2914, "step": 13582 }, { "epoch": 0.4661290322580645, "grad_norm": 0.673348566098659, "learning_rate": 5.788895585696301e-06, "loss": 0.2652, "step": 13583 }, { "epoch": 0.46616334934797526, "grad_norm": 0.8401224754404624, "learning_rate": 5.7883468027570275e-06, "loss": 0.3544, "step": 13584 }, { "epoch": 0.46619766643788607, "grad_norm": 0.7763333877466848, "learning_rate": 5.787798010078648e-06, "loss": 0.2632, "step": 13585 }, { "epoch": 0.4662319835277968, "grad_norm": 0.7347073275061051, "learning_rate": 5.787249207667943e-06, "loss": 0.3412, "step": 13586 }, { "epoch": 0.46626630061770763, "grad_norm": 0.7390452863191223, "learning_rate": 5.786700395531692e-06, "loss": 0.294, "step": 13587 }, { "epoch": 0.4663006177076184, "grad_norm": 0.7956641128750772, "learning_rate": 5.786151573676673e-06, "loss": 0.3058, "step": 13588 }, { "epoch": 0.4663349347975292, "grad_norm": 0.8097318469271825, "learning_rate": 5.785602742109668e-06, "loss": 0.2762, "step": 13589 }, { "epoch": 0.46636925188743994, "grad_norm": 0.8211854372067617, "learning_rate": 5.785053900837458e-06, "loss": 0.3358, "step": 13590 }, { "epoch": 0.46640356897735075, "grad_norm": 0.775375655777308, "learning_rate": 5.784505049866823e-06, "loss": 0.2554, "step": 13591 }, { "epoch": 0.4664378860672615, "grad_norm": 0.749860846512934, "learning_rate": 5.783956189204543e-06, "loss": 0.2993, "step": 13592 }, { "epoch": 0.46647220315717225, "grad_norm": 0.8212155440199066, "learning_rate": 5.783407318857398e-06, "loss": 0.3369, "step": 13593 }, { "epoch": 0.46650652024708306, "grad_norm": 0.7493955811470514, "learning_rate": 5.782858438832169e-06, "loss": 0.3067, "step": 13594 }, { "epoch": 0.4665408373369938, "grad_norm": 0.8156539625324715, "learning_rate": 5.782309549135635e-06, "loss": 0.3367, "step": 13595 }, { "epoch": 0.4665751544269046, "grad_norm": 0.7178254304026009, "learning_rate": 5.78176064977458e-06, "loss": 0.261, "step": 13596 }, { "epoch": 0.46660947151681537, "grad_norm": 0.8142566320456371, "learning_rate": 5.7812117407557826e-06, "loss": 0.3248, "step": 13597 }, { "epoch": 0.4666437886067262, "grad_norm": 0.756767548907614, "learning_rate": 5.780662822086025e-06, "loss": 0.2544, "step": 13598 }, { "epoch": 0.4666781056966369, "grad_norm": 0.7774393781756297, "learning_rate": 5.78011389377209e-06, "loss": 0.2673, "step": 13599 }, { "epoch": 0.4667124227865477, "grad_norm": 0.731371500748708, "learning_rate": 5.779564955820757e-06, "loss": 0.2467, "step": 13600 }, { "epoch": 0.4667467398764585, "grad_norm": 0.733355771302534, "learning_rate": 5.779016008238807e-06, "loss": 0.3116, "step": 13601 }, { "epoch": 0.46678105696636923, "grad_norm": 0.7068942035596334, "learning_rate": 5.778467051033023e-06, "loss": 0.2713, "step": 13602 }, { "epoch": 0.46681537405628004, "grad_norm": 0.7276378053178865, "learning_rate": 5.7779180842101854e-06, "loss": 0.2729, "step": 13603 }, { "epoch": 0.4668496911461908, "grad_norm": 0.7611178832464573, "learning_rate": 5.777369107777078e-06, "loss": 0.2659, "step": 13604 }, { "epoch": 0.4668840082361016, "grad_norm": 0.7375495435489539, "learning_rate": 5.7768201217404795e-06, "loss": 0.2987, "step": 13605 }, { "epoch": 0.46691832532601235, "grad_norm": 0.8663133540426762, "learning_rate": 5.7762711261071765e-06, "loss": 0.326, "step": 13606 }, { "epoch": 0.4669526424159231, "grad_norm": 0.800053955313436, "learning_rate": 5.7757221208839486e-06, "loss": 0.2821, "step": 13607 }, { "epoch": 0.4669869595058339, "grad_norm": 0.7324104554969845, "learning_rate": 5.775173106077577e-06, "loss": 0.2544, "step": 13608 }, { "epoch": 0.46702127659574466, "grad_norm": 0.7391489720441702, "learning_rate": 5.774624081694846e-06, "loss": 0.2954, "step": 13609 }, { "epoch": 0.46705559368565547, "grad_norm": 0.7555738424275916, "learning_rate": 5.774075047742538e-06, "loss": 0.2689, "step": 13610 }, { "epoch": 0.4670899107755662, "grad_norm": 0.7867454712701575, "learning_rate": 5.773526004227434e-06, "loss": 0.2635, "step": 13611 }, { "epoch": 0.467124227865477, "grad_norm": 0.7588484447990116, "learning_rate": 5.7729769511563195e-06, "loss": 0.3036, "step": 13612 }, { "epoch": 0.4671585449553878, "grad_norm": 0.8139903503510545, "learning_rate": 5.772427888535975e-06, "loss": 0.3026, "step": 13613 }, { "epoch": 0.4671928620452986, "grad_norm": 0.703549891388885, "learning_rate": 5.771878816373186e-06, "loss": 0.2986, "step": 13614 }, { "epoch": 0.46722717913520934, "grad_norm": 0.7440735371974004, "learning_rate": 5.7713297346747335e-06, "loss": 0.3531, "step": 13615 }, { "epoch": 0.4672614962251201, "grad_norm": 0.8114637681593424, "learning_rate": 5.7707806434474e-06, "loss": 0.3217, "step": 13616 }, { "epoch": 0.4672958133150309, "grad_norm": 0.9074479671879205, "learning_rate": 5.770231542697972e-06, "loss": 0.3264, "step": 13617 }, { "epoch": 0.46733013040494165, "grad_norm": 0.7821374578767811, "learning_rate": 5.76968243243323e-06, "loss": 0.3228, "step": 13618 }, { "epoch": 0.46736444749485245, "grad_norm": 0.6720793698078023, "learning_rate": 5.769133312659959e-06, "loss": 0.2931, "step": 13619 }, { "epoch": 0.4673987645847632, "grad_norm": 0.7682652291180464, "learning_rate": 5.7685841833849435e-06, "loss": 0.3073, "step": 13620 }, { "epoch": 0.467433081674674, "grad_norm": 0.7533687241626313, "learning_rate": 5.7680350446149656e-06, "loss": 0.3478, "step": 13621 }, { "epoch": 0.46746739876458476, "grad_norm": 0.7432290111330334, "learning_rate": 5.767485896356812e-06, "loss": 0.2467, "step": 13622 }, { "epoch": 0.4675017158544955, "grad_norm": 0.8134149061932804, "learning_rate": 5.766936738617262e-06, "loss": 0.3125, "step": 13623 }, { "epoch": 0.4675360329444063, "grad_norm": 0.7941436436251411, "learning_rate": 5.766387571403105e-06, "loss": 0.324, "step": 13624 }, { "epoch": 0.4675703500343171, "grad_norm": 0.7424453652869208, "learning_rate": 5.7658383947211216e-06, "loss": 0.2564, "step": 13625 }, { "epoch": 0.4676046671242279, "grad_norm": 0.7409530618071302, "learning_rate": 5.765289208578099e-06, "loss": 0.2487, "step": 13626 }, { "epoch": 0.46763898421413863, "grad_norm": 0.754661448684301, "learning_rate": 5.764740012980819e-06, "loss": 0.3143, "step": 13627 }, { "epoch": 0.46767330130404944, "grad_norm": 0.7974878867283024, "learning_rate": 5.7641908079360705e-06, "loss": 0.262, "step": 13628 }, { "epoch": 0.4677076183939602, "grad_norm": 0.8028633664102817, "learning_rate": 5.763641593450634e-06, "loss": 0.2825, "step": 13629 }, { "epoch": 0.46774193548387094, "grad_norm": 0.8376466979498275, "learning_rate": 5.763092369531296e-06, "loss": 0.3472, "step": 13630 }, { "epoch": 0.46777625257378175, "grad_norm": 0.9065039220538678, "learning_rate": 5.762543136184841e-06, "loss": 0.2587, "step": 13631 }, { "epoch": 0.4678105696636925, "grad_norm": 0.8346058483752136, "learning_rate": 5.7619938934180555e-06, "loss": 0.2937, "step": 13632 }, { "epoch": 0.4678448867536033, "grad_norm": 0.7892730319467022, "learning_rate": 5.761444641237722e-06, "loss": 0.288, "step": 13633 }, { "epoch": 0.46787920384351406, "grad_norm": 0.8036401561755684, "learning_rate": 5.7608953796506285e-06, "loss": 0.3201, "step": 13634 }, { "epoch": 0.46791352093342486, "grad_norm": 0.8880782479528654, "learning_rate": 5.76034610866356e-06, "loss": 0.3209, "step": 13635 }, { "epoch": 0.4679478380233356, "grad_norm": 0.7109003271875052, "learning_rate": 5.7597968282833014e-06, "loss": 0.2953, "step": 13636 }, { "epoch": 0.4679821551132464, "grad_norm": 0.8515428170254252, "learning_rate": 5.759247538516639e-06, "loss": 0.2585, "step": 13637 }, { "epoch": 0.4680164722031572, "grad_norm": 0.7827123317358635, "learning_rate": 5.758698239370361e-06, "loss": 0.295, "step": 13638 }, { "epoch": 0.4680507892930679, "grad_norm": 0.8263483596556754, "learning_rate": 5.758148930851247e-06, "loss": 0.2316, "step": 13639 }, { "epoch": 0.46808510638297873, "grad_norm": 0.8645562764128486, "learning_rate": 5.75759961296609e-06, "loss": 0.2937, "step": 13640 }, { "epoch": 0.4681194234728895, "grad_norm": 0.8341941147746514, "learning_rate": 5.757050285721671e-06, "loss": 0.3027, "step": 13641 }, { "epoch": 0.4681537405628003, "grad_norm": 0.8258951329271709, "learning_rate": 5.756500949124778e-06, "loss": 0.2662, "step": 13642 }, { "epoch": 0.46818805765271104, "grad_norm": 0.8015087064097115, "learning_rate": 5.755951603182198e-06, "loss": 0.3232, "step": 13643 }, { "epoch": 0.46822237474262185, "grad_norm": 0.7008976473752467, "learning_rate": 5.755402247900719e-06, "loss": 0.3063, "step": 13644 }, { "epoch": 0.4682566918325326, "grad_norm": 0.8179363134028176, "learning_rate": 5.754852883287124e-06, "loss": 0.2481, "step": 13645 }, { "epoch": 0.46829100892244335, "grad_norm": 0.7709337278817731, "learning_rate": 5.7543035093482026e-06, "loss": 0.2783, "step": 13646 }, { "epoch": 0.46832532601235416, "grad_norm": 0.7392524570683618, "learning_rate": 5.753754126090742e-06, "loss": 0.2834, "step": 13647 }, { "epoch": 0.4683596431022649, "grad_norm": 0.731836116368325, "learning_rate": 5.753204733521526e-06, "loss": 0.2969, "step": 13648 }, { "epoch": 0.4683939601921757, "grad_norm": 0.7779731048986752, "learning_rate": 5.752655331647343e-06, "loss": 0.2989, "step": 13649 }, { "epoch": 0.46842827728208647, "grad_norm": 0.7557731952701621, "learning_rate": 5.752105920474984e-06, "loss": 0.2965, "step": 13650 }, { "epoch": 0.4684625943719973, "grad_norm": 0.8323022364691693, "learning_rate": 5.75155650001123e-06, "loss": 0.311, "step": 13651 }, { "epoch": 0.468496911461908, "grad_norm": 0.7243961110474689, "learning_rate": 5.751007070262874e-06, "loss": 0.2959, "step": 13652 }, { "epoch": 0.4685312285518188, "grad_norm": 0.7917335400729474, "learning_rate": 5.7504576312367e-06, "loss": 0.3077, "step": 13653 }, { "epoch": 0.4685655456417296, "grad_norm": 0.7833609855307383, "learning_rate": 5.7499081829394985e-06, "loss": 0.3564, "step": 13654 }, { "epoch": 0.46859986273164034, "grad_norm": 0.7351775949695922, "learning_rate": 5.749358725378053e-06, "loss": 0.322, "step": 13655 }, { "epoch": 0.46863417982155114, "grad_norm": 0.7641604333971834, "learning_rate": 5.748809258559156e-06, "loss": 0.3197, "step": 13656 }, { "epoch": 0.4686684969114619, "grad_norm": 0.7860043551324271, "learning_rate": 5.748259782489593e-06, "loss": 0.2989, "step": 13657 }, { "epoch": 0.4687028140013727, "grad_norm": 0.8020685673826226, "learning_rate": 5.747710297176154e-06, "loss": 0.2673, "step": 13658 }, { "epoch": 0.46873713109128345, "grad_norm": 0.8265619355563499, "learning_rate": 5.747160802625624e-06, "loss": 0.3126, "step": 13659 }, { "epoch": 0.46877144818119426, "grad_norm": 0.8391526751440007, "learning_rate": 5.746611298844795e-06, "loss": 0.3001, "step": 13660 }, { "epoch": 0.468805765271105, "grad_norm": 0.8441369974911849, "learning_rate": 5.746061785840453e-06, "loss": 0.3134, "step": 13661 }, { "epoch": 0.46884008236101576, "grad_norm": 0.6500706561098233, "learning_rate": 5.745512263619388e-06, "loss": 0.2475, "step": 13662 }, { "epoch": 0.46887439945092657, "grad_norm": 0.7588259590385431, "learning_rate": 5.7449627321883886e-06, "loss": 0.2778, "step": 13663 }, { "epoch": 0.4689087165408373, "grad_norm": 0.7779055629116399, "learning_rate": 5.744413191554241e-06, "loss": 0.3017, "step": 13664 }, { "epoch": 0.46894303363074813, "grad_norm": 0.7857806290528634, "learning_rate": 5.743863641723738e-06, "loss": 0.2446, "step": 13665 }, { "epoch": 0.4689773507206589, "grad_norm": 0.7844470551406972, "learning_rate": 5.743314082703668e-06, "loss": 0.2701, "step": 13666 }, { "epoch": 0.4690116678105697, "grad_norm": 0.8192182504244264, "learning_rate": 5.7427645145008185e-06, "loss": 0.2836, "step": 13667 }, { "epoch": 0.46904598490048044, "grad_norm": 0.7555505274314311, "learning_rate": 5.742214937121978e-06, "loss": 0.2324, "step": 13668 }, { "epoch": 0.4690803019903912, "grad_norm": 0.8312202967400442, "learning_rate": 5.741665350573937e-06, "loss": 0.2769, "step": 13669 }, { "epoch": 0.469114619080302, "grad_norm": 0.7695801022230527, "learning_rate": 5.741115754863487e-06, "loss": 0.2816, "step": 13670 }, { "epoch": 0.46914893617021275, "grad_norm": 0.836100014671535, "learning_rate": 5.740566149997415e-06, "loss": 0.3177, "step": 13671 }, { "epoch": 0.46918325326012356, "grad_norm": 0.7157787157104637, "learning_rate": 5.740016535982512e-06, "loss": 0.2782, "step": 13672 }, { "epoch": 0.4692175703500343, "grad_norm": 0.781399100329648, "learning_rate": 5.739466912825568e-06, "loss": 0.3334, "step": 13673 }, { "epoch": 0.4692518874399451, "grad_norm": 0.8621468985812358, "learning_rate": 5.738917280533372e-06, "loss": 0.2951, "step": 13674 }, { "epoch": 0.46928620452985587, "grad_norm": 0.728895183641617, "learning_rate": 5.738367639112714e-06, "loss": 0.3059, "step": 13675 }, { "epoch": 0.4693205216197666, "grad_norm": 0.7602452888191751, "learning_rate": 5.737817988570385e-06, "loss": 0.2683, "step": 13676 }, { "epoch": 0.4693548387096774, "grad_norm": 0.7455625035218214, "learning_rate": 5.737268328913176e-06, "loss": 0.3019, "step": 13677 }, { "epoch": 0.4693891557995882, "grad_norm": 0.7083824615603586, "learning_rate": 5.736718660147874e-06, "loss": 0.2737, "step": 13678 }, { "epoch": 0.469423472889499, "grad_norm": 0.8728765548242681, "learning_rate": 5.736168982281274e-06, "loss": 0.2813, "step": 13679 }, { "epoch": 0.46945778997940973, "grad_norm": 0.7030888041853731, "learning_rate": 5.735619295320163e-06, "loss": 0.28, "step": 13680 }, { "epoch": 0.46949210706932054, "grad_norm": 0.8152614691658925, "learning_rate": 5.735069599271334e-06, "loss": 0.3107, "step": 13681 }, { "epoch": 0.4695264241592313, "grad_norm": 0.9279874908236192, "learning_rate": 5.7345198941415784e-06, "loss": 0.2939, "step": 13682 }, { "epoch": 0.4695607412491421, "grad_norm": 0.7821156572801244, "learning_rate": 5.733970179937685e-06, "loss": 0.295, "step": 13683 }, { "epoch": 0.46959505833905285, "grad_norm": 0.7434342775904181, "learning_rate": 5.733420456666444e-06, "loss": 0.2761, "step": 13684 }, { "epoch": 0.4696293754289636, "grad_norm": 0.8219000974288392, "learning_rate": 5.732870724334649e-06, "loss": 0.33, "step": 13685 }, { "epoch": 0.4696636925188744, "grad_norm": 0.6477710446009132, "learning_rate": 5.732320982949092e-06, "loss": 0.2453, "step": 13686 }, { "epoch": 0.46969800960878516, "grad_norm": 0.8424070539774824, "learning_rate": 5.731771232516562e-06, "loss": 0.2927, "step": 13687 }, { "epoch": 0.46973232669869597, "grad_norm": 0.7481940001579985, "learning_rate": 5.731221473043852e-06, "loss": 0.2836, "step": 13688 }, { "epoch": 0.4697666437886067, "grad_norm": 0.6900712396049662, "learning_rate": 5.730671704537753e-06, "loss": 0.2526, "step": 13689 }, { "epoch": 0.4698009608785175, "grad_norm": 0.8075546881141584, "learning_rate": 5.730121927005058e-06, "loss": 0.2771, "step": 13690 }, { "epoch": 0.4698352779684283, "grad_norm": 0.7013191118710046, "learning_rate": 5.729572140452557e-06, "loss": 0.2834, "step": 13691 }, { "epoch": 0.46986959505833903, "grad_norm": 0.7667125393607666, "learning_rate": 5.729022344887042e-06, "loss": 0.2968, "step": 13692 }, { "epoch": 0.46990391214824984, "grad_norm": 0.7654789770983916, "learning_rate": 5.7284725403153065e-06, "loss": 0.2728, "step": 13693 }, { "epoch": 0.4699382292381606, "grad_norm": 0.817474676439047, "learning_rate": 5.727922726744142e-06, "loss": 0.3207, "step": 13694 }, { "epoch": 0.4699725463280714, "grad_norm": 0.8062971171334555, "learning_rate": 5.727372904180343e-06, "loss": 0.3499, "step": 13695 }, { "epoch": 0.47000686341798215, "grad_norm": 0.7656371144651896, "learning_rate": 5.726823072630697e-06, "loss": 0.2747, "step": 13696 }, { "epoch": 0.47004118050789295, "grad_norm": 0.825293018539658, "learning_rate": 5.726273232102e-06, "loss": 0.331, "step": 13697 }, { "epoch": 0.4700754975978037, "grad_norm": 0.8136706521091823, "learning_rate": 5.725723382601045e-06, "loss": 0.2974, "step": 13698 }, { "epoch": 0.47010981468771446, "grad_norm": 0.7507763892114719, "learning_rate": 5.725173524134623e-06, "loss": 0.299, "step": 13699 }, { "epoch": 0.47014413177762526, "grad_norm": 0.7469450678870069, "learning_rate": 5.724623656709527e-06, "loss": 0.2915, "step": 13700 }, { "epoch": 0.470178448867536, "grad_norm": 0.6893987015910186, "learning_rate": 5.724073780332551e-06, "loss": 0.2204, "step": 13701 }, { "epoch": 0.4702127659574468, "grad_norm": 0.8327896807626628, "learning_rate": 5.723523895010487e-06, "loss": 0.3027, "step": 13702 }, { "epoch": 0.4702470830473576, "grad_norm": 0.8061226488917197, "learning_rate": 5.72297400075013e-06, "loss": 0.3498, "step": 13703 }, { "epoch": 0.4702814001372684, "grad_norm": 0.7167229270588509, "learning_rate": 5.722424097558273e-06, "loss": 0.2755, "step": 13704 }, { "epoch": 0.47031571722717913, "grad_norm": 0.7227694213533079, "learning_rate": 5.721874185441707e-06, "loss": 0.2943, "step": 13705 }, { "epoch": 0.47035003431708994, "grad_norm": 0.8574562713328437, "learning_rate": 5.721324264407226e-06, "loss": 0.2873, "step": 13706 }, { "epoch": 0.4703843514070007, "grad_norm": 0.7584333804051626, "learning_rate": 5.720774334461628e-06, "loss": 0.313, "step": 13707 }, { "epoch": 0.47041866849691144, "grad_norm": 0.7505925897464779, "learning_rate": 5.7202243956117e-06, "loss": 0.2617, "step": 13708 }, { "epoch": 0.47045298558682225, "grad_norm": 0.689063750518011, "learning_rate": 5.719674447864241e-06, "loss": 0.2503, "step": 13709 }, { "epoch": 0.470487302676733, "grad_norm": 0.6878259271775476, "learning_rate": 5.719124491226043e-06, "loss": 0.3115, "step": 13710 }, { "epoch": 0.4705216197666438, "grad_norm": 0.7353715581079111, "learning_rate": 5.718574525703901e-06, "loss": 0.2496, "step": 13711 }, { "epoch": 0.47055593685655456, "grad_norm": 0.9001256957233043, "learning_rate": 5.718024551304607e-06, "loss": 0.3152, "step": 13712 }, { "epoch": 0.47059025394646536, "grad_norm": 0.7870233369850463, "learning_rate": 5.717474568034958e-06, "loss": 0.367, "step": 13713 }, { "epoch": 0.4706245710363761, "grad_norm": 0.7496043611423656, "learning_rate": 5.716924575901746e-06, "loss": 0.2545, "step": 13714 }, { "epoch": 0.47065888812628687, "grad_norm": 0.7730509792853157, "learning_rate": 5.716374574911767e-06, "loss": 0.2847, "step": 13715 }, { "epoch": 0.4706932052161977, "grad_norm": 0.8673933844570798, "learning_rate": 5.715824565071816e-06, "loss": 0.2389, "step": 13716 }, { "epoch": 0.4707275223061084, "grad_norm": 0.8307449968215038, "learning_rate": 5.715274546388687e-06, "loss": 0.3127, "step": 13717 }, { "epoch": 0.47076183939601923, "grad_norm": 0.8183560111585556, "learning_rate": 5.714724518869176e-06, "loss": 0.3142, "step": 13718 }, { "epoch": 0.47079615648593, "grad_norm": 0.719671895422515, "learning_rate": 5.7141744825200755e-06, "loss": 0.3144, "step": 13719 }, { "epoch": 0.4708304735758408, "grad_norm": 0.7951781725285698, "learning_rate": 5.713624437348182e-06, "loss": 0.2965, "step": 13720 }, { "epoch": 0.47086479066575154, "grad_norm": 0.7338150077920588, "learning_rate": 5.713074383360291e-06, "loss": 0.2832, "step": 13721 }, { "epoch": 0.4708991077556623, "grad_norm": 0.8164962806790905, "learning_rate": 5.712524320563197e-06, "loss": 0.3363, "step": 13722 }, { "epoch": 0.4709334248455731, "grad_norm": 0.7738803672528342, "learning_rate": 5.711974248963694e-06, "loss": 0.2824, "step": 13723 }, { "epoch": 0.47096774193548385, "grad_norm": 0.691515611296447, "learning_rate": 5.711424168568582e-06, "loss": 0.341, "step": 13724 }, { "epoch": 0.47100205902539466, "grad_norm": 0.7511850337870546, "learning_rate": 5.710874079384653e-06, "loss": 0.3511, "step": 13725 }, { "epoch": 0.4710363761153054, "grad_norm": 0.7980566218347436, "learning_rate": 5.710323981418704e-06, "loss": 0.3283, "step": 13726 }, { "epoch": 0.4710706932052162, "grad_norm": 0.7627403888531812, "learning_rate": 5.70977387467753e-06, "loss": 0.3192, "step": 13727 }, { "epoch": 0.47110501029512697, "grad_norm": 0.7419638427222167, "learning_rate": 5.709223759167927e-06, "loss": 0.256, "step": 13728 }, { "epoch": 0.4711393273850378, "grad_norm": 0.7189510678445169, "learning_rate": 5.708673634896691e-06, "loss": 0.2636, "step": 13729 }, { "epoch": 0.4711736444749485, "grad_norm": 0.7450757473576086, "learning_rate": 5.7081235018706195e-06, "loss": 0.3102, "step": 13730 }, { "epoch": 0.4712079615648593, "grad_norm": 0.7683740018275078, "learning_rate": 5.707573360096507e-06, "loss": 0.3235, "step": 13731 }, { "epoch": 0.4712422786547701, "grad_norm": 0.6904364257582467, "learning_rate": 5.7070232095811505e-06, "loss": 0.2387, "step": 13732 }, { "epoch": 0.47127659574468084, "grad_norm": 0.7825150718828713, "learning_rate": 5.7064730503313455e-06, "loss": 0.2612, "step": 13733 }, { "epoch": 0.47131091283459164, "grad_norm": 0.7670317635440081, "learning_rate": 5.7059228823538926e-06, "loss": 0.3177, "step": 13734 }, { "epoch": 0.4713452299245024, "grad_norm": 0.8046729518028635, "learning_rate": 5.705372705655583e-06, "loss": 0.3161, "step": 13735 }, { "epoch": 0.4713795470144132, "grad_norm": 0.7455180952395954, "learning_rate": 5.704822520243217e-06, "loss": 0.3657, "step": 13736 }, { "epoch": 0.47141386410432395, "grad_norm": 0.7368064018371685, "learning_rate": 5.70427232612359e-06, "loss": 0.3546, "step": 13737 }, { "epoch": 0.4714481811942347, "grad_norm": 0.7972405777212455, "learning_rate": 5.7037221233035e-06, "loss": 0.3086, "step": 13738 }, { "epoch": 0.4714824982841455, "grad_norm": 0.7552865490415077, "learning_rate": 5.703171911789743e-06, "loss": 0.2894, "step": 13739 }, { "epoch": 0.47151681537405626, "grad_norm": 0.7357497801623474, "learning_rate": 5.7026216915891185e-06, "loss": 0.2805, "step": 13740 }, { "epoch": 0.47155113246396707, "grad_norm": 0.8145468113377983, "learning_rate": 5.70207146270842e-06, "loss": 0.2714, "step": 13741 }, { "epoch": 0.4715854495538778, "grad_norm": 0.703437898240328, "learning_rate": 5.701521225154449e-06, "loss": 0.2579, "step": 13742 }, { "epoch": 0.47161976664378863, "grad_norm": 0.8064004608801101, "learning_rate": 5.700970978934001e-06, "loss": 0.2851, "step": 13743 }, { "epoch": 0.4716540837336994, "grad_norm": 0.8640134748423332, "learning_rate": 5.700420724053872e-06, "loss": 0.283, "step": 13744 }, { "epoch": 0.47168840082361013, "grad_norm": 0.7617159330300023, "learning_rate": 5.699870460520862e-06, "loss": 0.3389, "step": 13745 }, { "epoch": 0.47172271791352094, "grad_norm": 0.7014941154276734, "learning_rate": 5.699320188341768e-06, "loss": 0.2722, "step": 13746 }, { "epoch": 0.4717570350034317, "grad_norm": 0.6832979728171527, "learning_rate": 5.698769907523392e-06, "loss": 0.2548, "step": 13747 }, { "epoch": 0.4717913520933425, "grad_norm": 0.7422595340785002, "learning_rate": 5.698219618072524e-06, "loss": 0.2716, "step": 13748 }, { "epoch": 0.47182566918325325, "grad_norm": 0.7403045419696155, "learning_rate": 5.697669319995968e-06, "loss": 0.2905, "step": 13749 }, { "epoch": 0.47185998627316406, "grad_norm": 0.853050655555204, "learning_rate": 5.697119013300524e-06, "loss": 0.3449, "step": 13750 }, { "epoch": 0.4718943033630748, "grad_norm": 0.7515820552552465, "learning_rate": 5.696568697992984e-06, "loss": 0.2473, "step": 13751 }, { "epoch": 0.4719286204529856, "grad_norm": 0.861473721462254, "learning_rate": 5.6960183740801515e-06, "loss": 0.2973, "step": 13752 }, { "epoch": 0.47196293754289637, "grad_norm": 0.7621485132539201, "learning_rate": 5.695468041568823e-06, "loss": 0.2936, "step": 13753 }, { "epoch": 0.4719972546328071, "grad_norm": 0.8545846925376445, "learning_rate": 5.694917700465797e-06, "loss": 0.3332, "step": 13754 }, { "epoch": 0.4720315717227179, "grad_norm": 0.7760366501655204, "learning_rate": 5.6943673507778714e-06, "loss": 0.2492, "step": 13755 }, { "epoch": 0.4720658888126287, "grad_norm": 0.7488637288484579, "learning_rate": 5.69381699251185e-06, "loss": 0.2736, "step": 13756 }, { "epoch": 0.4721002059025395, "grad_norm": 1.0173942894495696, "learning_rate": 5.693266625674528e-06, "loss": 0.3261, "step": 13757 }, { "epoch": 0.47213452299245023, "grad_norm": 0.6850550453856002, "learning_rate": 5.692716250272704e-06, "loss": 0.3048, "step": 13758 }, { "epoch": 0.47216884008236104, "grad_norm": 0.799257476471589, "learning_rate": 5.6921658663131785e-06, "loss": 0.2707, "step": 13759 }, { "epoch": 0.4722031571722718, "grad_norm": 0.6928492650085457, "learning_rate": 5.6916154738027505e-06, "loss": 0.2937, "step": 13760 }, { "epoch": 0.47223747426218254, "grad_norm": 0.7748837695958732, "learning_rate": 5.691065072748221e-06, "loss": 0.309, "step": 13761 }, { "epoch": 0.47227179135209335, "grad_norm": 0.7147228373892345, "learning_rate": 5.690514663156387e-06, "loss": 0.3217, "step": 13762 }, { "epoch": 0.4723061084420041, "grad_norm": 0.7496976741405806, "learning_rate": 5.689964245034051e-06, "loss": 0.3098, "step": 13763 }, { "epoch": 0.4723404255319149, "grad_norm": 0.9280788265372129, "learning_rate": 5.689413818388009e-06, "loss": 0.3376, "step": 13764 }, { "epoch": 0.47237474262182566, "grad_norm": 0.7064086641810028, "learning_rate": 5.688863383225064e-06, "loss": 0.2552, "step": 13765 }, { "epoch": 0.47240905971173647, "grad_norm": 0.76299486491352, "learning_rate": 5.688312939552016e-06, "loss": 0.2835, "step": 13766 }, { "epoch": 0.4724433768016472, "grad_norm": 0.7408859235451923, "learning_rate": 5.6877624873756625e-06, "loss": 0.2835, "step": 13767 }, { "epoch": 0.47247769389155797, "grad_norm": 0.738834026145159, "learning_rate": 5.687212026702805e-06, "loss": 0.3133, "step": 13768 }, { "epoch": 0.4725120109814688, "grad_norm": 0.78617660318508, "learning_rate": 5.686661557540246e-06, "loss": 0.3156, "step": 13769 }, { "epoch": 0.47254632807137953, "grad_norm": 0.8038876454713539, "learning_rate": 5.6861110798947824e-06, "loss": 0.3012, "step": 13770 }, { "epoch": 0.47258064516129034, "grad_norm": 0.7292617316428919, "learning_rate": 5.685560593773217e-06, "loss": 0.2706, "step": 13771 }, { "epoch": 0.4726149622512011, "grad_norm": 0.855561719126549, "learning_rate": 5.685010099182351e-06, "loss": 0.2464, "step": 13772 }, { "epoch": 0.4726492793411119, "grad_norm": 0.8302374252939048, "learning_rate": 5.684459596128983e-06, "loss": 0.3282, "step": 13773 }, { "epoch": 0.47268359643102265, "grad_norm": 0.8078024033921123, "learning_rate": 5.683909084619914e-06, "loss": 0.2978, "step": 13774 }, { "epoch": 0.4727179135209334, "grad_norm": 0.6892038232623463, "learning_rate": 5.683358564661946e-06, "loss": 0.2989, "step": 13775 }, { "epoch": 0.4727522306108442, "grad_norm": 0.7846599210730281, "learning_rate": 5.68280803626188e-06, "loss": 0.2846, "step": 13776 }, { "epoch": 0.47278654770075496, "grad_norm": 0.8609238150283275, "learning_rate": 5.682257499426516e-06, "loss": 0.2875, "step": 13777 }, { "epoch": 0.47282086479066576, "grad_norm": 0.6925500900287733, "learning_rate": 5.6817069541626555e-06, "loss": 0.3077, "step": 13778 }, { "epoch": 0.4728551818805765, "grad_norm": 0.7979692114506614, "learning_rate": 5.6811564004771025e-06, "loss": 0.2763, "step": 13779 }, { "epoch": 0.4728894989704873, "grad_norm": 0.7690925120863772, "learning_rate": 5.680605838376655e-06, "loss": 0.2543, "step": 13780 }, { "epoch": 0.4729238160603981, "grad_norm": 0.8390622495403165, "learning_rate": 5.680055267868116e-06, "loss": 0.3037, "step": 13781 }, { "epoch": 0.4729581331503089, "grad_norm": 0.7389482817634416, "learning_rate": 5.679504688958287e-06, "loss": 0.2561, "step": 13782 }, { "epoch": 0.47299245024021963, "grad_norm": 0.7991655958196404, "learning_rate": 5.678954101653969e-06, "loss": 0.2741, "step": 13783 }, { "epoch": 0.4730267673301304, "grad_norm": 0.772339989982598, "learning_rate": 5.678403505961966e-06, "loss": 0.3129, "step": 13784 }, { "epoch": 0.4730610844200412, "grad_norm": 0.7650903281591326, "learning_rate": 5.677852901889079e-06, "loss": 0.2947, "step": 13785 }, { "epoch": 0.47309540150995194, "grad_norm": 0.7935108031330389, "learning_rate": 5.67730228944211e-06, "loss": 0.3202, "step": 13786 }, { "epoch": 0.47312971859986275, "grad_norm": 0.8010166637577921, "learning_rate": 5.676751668627859e-06, "loss": 0.2433, "step": 13787 }, { "epoch": 0.4731640356897735, "grad_norm": 0.7262363520672717, "learning_rate": 5.6762010394531305e-06, "loss": 0.2592, "step": 13788 }, { "epoch": 0.4731983527796843, "grad_norm": 0.7100930955976882, "learning_rate": 5.675650401924726e-06, "loss": 0.2865, "step": 13789 }, { "epoch": 0.47323266986959506, "grad_norm": 0.8059348736381582, "learning_rate": 5.675099756049449e-06, "loss": 0.3313, "step": 13790 }, { "epoch": 0.4732669869595058, "grad_norm": 0.8161094908333303, "learning_rate": 5.674549101834104e-06, "loss": 0.2988, "step": 13791 }, { "epoch": 0.4733013040494166, "grad_norm": 0.8604213845534342, "learning_rate": 5.673998439285488e-06, "loss": 0.2533, "step": 13792 }, { "epoch": 0.47333562113932737, "grad_norm": 0.7086383737265631, "learning_rate": 5.67344776841041e-06, "loss": 0.2586, "step": 13793 }, { "epoch": 0.4733699382292382, "grad_norm": 0.7524392189618034, "learning_rate": 5.6728970892156676e-06, "loss": 0.3285, "step": 13794 }, { "epoch": 0.4734042553191489, "grad_norm": 0.7002526404771419, "learning_rate": 5.672346401708068e-06, "loss": 0.2922, "step": 13795 }, { "epoch": 0.47343857240905973, "grad_norm": 0.7246255354359414, "learning_rate": 5.6717957058944105e-06, "loss": 0.302, "step": 13796 }, { "epoch": 0.4734728894989705, "grad_norm": 0.7032602375950815, "learning_rate": 5.6712450017815015e-06, "loss": 0.3207, "step": 13797 }, { "epoch": 0.47350720658888124, "grad_norm": 0.7815613667576091, "learning_rate": 5.670694289376143e-06, "loss": 0.2825, "step": 13798 }, { "epoch": 0.47354152367879204, "grad_norm": 0.923430780408314, "learning_rate": 5.670143568685138e-06, "loss": 0.311, "step": 13799 }, { "epoch": 0.4735758407687028, "grad_norm": 0.7144960586943376, "learning_rate": 5.669592839715291e-06, "loss": 0.2031, "step": 13800 }, { "epoch": 0.4736101578586136, "grad_norm": 0.7269297203633504, "learning_rate": 5.669042102473406e-06, "loss": 0.3207, "step": 13801 }, { "epoch": 0.47364447494852435, "grad_norm": 0.787948262949798, "learning_rate": 5.668491356966285e-06, "loss": 0.2865, "step": 13802 }, { "epoch": 0.47367879203843516, "grad_norm": 0.9228067043814637, "learning_rate": 5.667940603200732e-06, "loss": 0.2872, "step": 13803 }, { "epoch": 0.4737131091283459, "grad_norm": 0.7533514120587406, "learning_rate": 5.667389841183551e-06, "loss": 0.29, "step": 13804 }, { "epoch": 0.4737474262182567, "grad_norm": 0.8450900442270053, "learning_rate": 5.666839070921548e-06, "loss": 0.2682, "step": 13805 }, { "epoch": 0.47378174330816747, "grad_norm": 0.7026136840325666, "learning_rate": 5.666288292421526e-06, "loss": 0.2785, "step": 13806 }, { "epoch": 0.4738160603980782, "grad_norm": 1.180162142086547, "learning_rate": 5.6657375056902885e-06, "loss": 0.2989, "step": 13807 }, { "epoch": 0.473850377487989, "grad_norm": 0.8050157895895147, "learning_rate": 5.66518671073464e-06, "loss": 0.3067, "step": 13808 }, { "epoch": 0.4738846945778998, "grad_norm": 0.7331674001194473, "learning_rate": 5.664635907561385e-06, "loss": 0.2852, "step": 13809 }, { "epoch": 0.4739190116678106, "grad_norm": 0.7546595713334483, "learning_rate": 5.6640850961773296e-06, "loss": 0.2811, "step": 13810 }, { "epoch": 0.47395332875772134, "grad_norm": 0.8487271620025415, "learning_rate": 5.663534276589275e-06, "loss": 0.2754, "step": 13811 }, { "epoch": 0.47398764584763214, "grad_norm": 0.8604671815818717, "learning_rate": 5.662983448804029e-06, "loss": 0.3128, "step": 13812 }, { "epoch": 0.4740219629375429, "grad_norm": 0.9159643279364299, "learning_rate": 5.662432612828397e-06, "loss": 0.2862, "step": 13813 }, { "epoch": 0.47405628002745365, "grad_norm": 0.820483060654234, "learning_rate": 5.661881768669182e-06, "loss": 0.3041, "step": 13814 }, { "epoch": 0.47409059711736445, "grad_norm": 0.8427749582151386, "learning_rate": 5.6613309163331874e-06, "loss": 0.2716, "step": 13815 }, { "epoch": 0.4741249142072752, "grad_norm": 0.8760332573176334, "learning_rate": 5.660780055827222e-06, "loss": 0.3333, "step": 13816 }, { "epoch": 0.474159231297186, "grad_norm": 0.7985421574190911, "learning_rate": 5.660229187158089e-06, "loss": 0.2831, "step": 13817 }, { "epoch": 0.47419354838709676, "grad_norm": 0.7977757901301724, "learning_rate": 5.659678310332594e-06, "loss": 0.2505, "step": 13818 }, { "epoch": 0.47422786547700757, "grad_norm": 0.8087651418606604, "learning_rate": 5.659127425357542e-06, "loss": 0.2936, "step": 13819 }, { "epoch": 0.4742621825669183, "grad_norm": 0.7967374432511303, "learning_rate": 5.65857653223974e-06, "loss": 0.2946, "step": 13820 }, { "epoch": 0.4742964996568291, "grad_norm": 0.7525328010732518, "learning_rate": 5.658025630985993e-06, "loss": 0.3525, "step": 13821 }, { "epoch": 0.4743308167467399, "grad_norm": 0.6521076961225492, "learning_rate": 5.657474721603106e-06, "loss": 0.2412, "step": 13822 }, { "epoch": 0.47436513383665063, "grad_norm": 0.8170906416042271, "learning_rate": 5.656923804097886e-06, "loss": 0.3157, "step": 13823 }, { "epoch": 0.47439945092656144, "grad_norm": 0.7205503099265698, "learning_rate": 5.656372878477137e-06, "loss": 0.2455, "step": 13824 }, { "epoch": 0.4744337680164722, "grad_norm": 0.7733327347106094, "learning_rate": 5.655821944747667e-06, "loss": 0.2355, "step": 13825 }, { "epoch": 0.474468085106383, "grad_norm": 0.7737589553518903, "learning_rate": 5.655271002916281e-06, "loss": 0.2782, "step": 13826 }, { "epoch": 0.47450240219629375, "grad_norm": 0.7982450562054887, "learning_rate": 5.6547200529897865e-06, "loss": 0.3152, "step": 13827 }, { "epoch": 0.47453671928620456, "grad_norm": 0.8088123348452236, "learning_rate": 5.654169094974988e-06, "loss": 0.3639, "step": 13828 }, { "epoch": 0.4745710363761153, "grad_norm": 0.7570098954117762, "learning_rate": 5.653618128878694e-06, "loss": 0.2708, "step": 13829 }, { "epoch": 0.47460535346602606, "grad_norm": 0.7270763057356507, "learning_rate": 5.653067154707711e-06, "loss": 0.3461, "step": 13830 }, { "epoch": 0.47463967055593687, "grad_norm": 0.8079449316174062, "learning_rate": 5.652516172468842e-06, "loss": 0.2922, "step": 13831 }, { "epoch": 0.4746739876458476, "grad_norm": 0.7188879824604956, "learning_rate": 5.6519651821689e-06, "loss": 0.3153, "step": 13832 }, { "epoch": 0.4747083047357584, "grad_norm": 0.8080814370132914, "learning_rate": 5.6514141838146866e-06, "loss": 0.3189, "step": 13833 }, { "epoch": 0.4747426218256692, "grad_norm": 0.7611897465889005, "learning_rate": 5.650863177413008e-06, "loss": 0.2669, "step": 13834 }, { "epoch": 0.47477693891558, "grad_norm": 0.7657784230674504, "learning_rate": 5.650312162970676e-06, "loss": 0.2951, "step": 13835 }, { "epoch": 0.47481125600549073, "grad_norm": 0.7320780763619152, "learning_rate": 5.649761140494497e-06, "loss": 0.2592, "step": 13836 }, { "epoch": 0.4748455730954015, "grad_norm": 0.7376284784171053, "learning_rate": 5.649210109991274e-06, "loss": 0.2773, "step": 13837 }, { "epoch": 0.4748798901853123, "grad_norm": 0.856029587006978, "learning_rate": 5.648659071467818e-06, "loss": 0.2965, "step": 13838 }, { "epoch": 0.47491420727522304, "grad_norm": 0.876387112206712, "learning_rate": 5.648108024930936e-06, "loss": 0.3249, "step": 13839 }, { "epoch": 0.47494852436513385, "grad_norm": 0.920276169012361, "learning_rate": 5.647556970387434e-06, "loss": 0.2712, "step": 13840 }, { "epoch": 0.4749828414550446, "grad_norm": 0.7236832883972499, "learning_rate": 5.647005907844121e-06, "loss": 0.2828, "step": 13841 }, { "epoch": 0.4750171585449554, "grad_norm": 1.5538325104294157, "learning_rate": 5.646454837307803e-06, "loss": 0.3094, "step": 13842 }, { "epoch": 0.47505147563486616, "grad_norm": 0.8291917707504954, "learning_rate": 5.645903758785292e-06, "loss": 0.3197, "step": 13843 }, { "epoch": 0.4750857927247769, "grad_norm": 0.7766697060881623, "learning_rate": 5.645352672283391e-06, "loss": 0.2542, "step": 13844 }, { "epoch": 0.4751201098146877, "grad_norm": 0.780317410808789, "learning_rate": 5.64480157780891e-06, "loss": 0.313, "step": 13845 }, { "epoch": 0.47515442690459847, "grad_norm": 0.7632300058650832, "learning_rate": 5.644250475368659e-06, "loss": 0.2858, "step": 13846 }, { "epoch": 0.4751887439945093, "grad_norm": 0.8551169844827939, "learning_rate": 5.643699364969444e-06, "loss": 0.3238, "step": 13847 }, { "epoch": 0.47522306108442003, "grad_norm": 0.7377207598590309, "learning_rate": 5.643148246618073e-06, "loss": 0.2984, "step": 13848 }, { "epoch": 0.47525737817433084, "grad_norm": 0.6761204284359262, "learning_rate": 5.642597120321355e-06, "loss": 0.2593, "step": 13849 }, { "epoch": 0.4752916952642416, "grad_norm": 0.8466967061200918, "learning_rate": 5.6420459860860985e-06, "loss": 0.2796, "step": 13850 }, { "epoch": 0.4753260123541524, "grad_norm": 0.8024347557458357, "learning_rate": 5.641494843919113e-06, "loss": 0.228, "step": 13851 }, { "epoch": 0.47536032944406315, "grad_norm": 0.6869728225390735, "learning_rate": 5.640943693827208e-06, "loss": 0.2678, "step": 13852 }, { "epoch": 0.4753946465339739, "grad_norm": 0.7793503444429012, "learning_rate": 5.640392535817189e-06, "loss": 0.2841, "step": 13853 }, { "epoch": 0.4754289636238847, "grad_norm": 0.8226116241152306, "learning_rate": 5.639841369895869e-06, "loss": 0.2618, "step": 13854 }, { "epoch": 0.47546328071379546, "grad_norm": 0.7938327508321981, "learning_rate": 5.639290196070054e-06, "loss": 0.2762, "step": 13855 }, { "epoch": 0.47549759780370626, "grad_norm": 0.7913367023535423, "learning_rate": 5.638739014346552e-06, "loss": 0.2666, "step": 13856 }, { "epoch": 0.475531914893617, "grad_norm": 0.7666365147780531, "learning_rate": 5.638187824732176e-06, "loss": 0.3112, "step": 13857 }, { "epoch": 0.4755662319835278, "grad_norm": 0.7539350491684299, "learning_rate": 5.637636627233732e-06, "loss": 0.3162, "step": 13858 }, { "epoch": 0.47560054907343857, "grad_norm": 0.7019459353431569, "learning_rate": 5.637085421858033e-06, "loss": 0.2658, "step": 13859 }, { "epoch": 0.4756348661633493, "grad_norm": 0.8122199515661357, "learning_rate": 5.636534208611885e-06, "loss": 0.2665, "step": 13860 }, { "epoch": 0.47566918325326013, "grad_norm": 0.766172661002879, "learning_rate": 5.6359829875021e-06, "loss": 0.2768, "step": 13861 }, { "epoch": 0.4757035003431709, "grad_norm": 0.8455248776560967, "learning_rate": 5.635431758535487e-06, "loss": 0.2856, "step": 13862 }, { "epoch": 0.4757378174330817, "grad_norm": 0.7012786574531377, "learning_rate": 5.634880521718854e-06, "loss": 0.2658, "step": 13863 }, { "epoch": 0.47577213452299244, "grad_norm": 0.7906252241789463, "learning_rate": 5.634329277059011e-06, "loss": 0.2358, "step": 13864 }, { "epoch": 0.47580645161290325, "grad_norm": 0.7902461552893726, "learning_rate": 5.633778024562773e-06, "loss": 0.342, "step": 13865 }, { "epoch": 0.475840768702814, "grad_norm": 0.7783073244998568, "learning_rate": 5.633226764236943e-06, "loss": 0.2635, "step": 13866 }, { "epoch": 0.47587508579272475, "grad_norm": 0.7039788122889347, "learning_rate": 5.6326754960883366e-06, "loss": 0.2933, "step": 13867 }, { "epoch": 0.47590940288263556, "grad_norm": 0.6996615163055678, "learning_rate": 5.6321242201237625e-06, "loss": 0.2779, "step": 13868 }, { "epoch": 0.4759437199725463, "grad_norm": 0.7949407279748077, "learning_rate": 5.631572936350029e-06, "loss": 0.2785, "step": 13869 }, { "epoch": 0.4759780370624571, "grad_norm": 0.6816165655315588, "learning_rate": 5.631021644773949e-06, "loss": 0.2348, "step": 13870 }, { "epoch": 0.47601235415236787, "grad_norm": 0.844367998645893, "learning_rate": 5.630470345402332e-06, "loss": 0.2842, "step": 13871 }, { "epoch": 0.4760466712422787, "grad_norm": 0.7927332172759836, "learning_rate": 5.62991903824199e-06, "loss": 0.2903, "step": 13872 }, { "epoch": 0.4760809883321894, "grad_norm": 0.8043199662028284, "learning_rate": 5.629367723299731e-06, "loss": 0.325, "step": 13873 }, { "epoch": 0.47611530542210023, "grad_norm": 0.7215307471501888, "learning_rate": 5.628816400582369e-06, "loss": 0.2757, "step": 13874 }, { "epoch": 0.476149622512011, "grad_norm": 0.8640606340035449, "learning_rate": 5.628265070096713e-06, "loss": 0.306, "step": 13875 }, { "epoch": 0.47618393960192174, "grad_norm": 0.8314063106503916, "learning_rate": 5.627713731849575e-06, "loss": 0.3213, "step": 13876 }, { "epoch": 0.47621825669183254, "grad_norm": 0.7345263559494388, "learning_rate": 5.627162385847765e-06, "loss": 0.3172, "step": 13877 }, { "epoch": 0.4762525737817433, "grad_norm": 0.6511355146036983, "learning_rate": 5.626611032098095e-06, "loss": 0.2834, "step": 13878 }, { "epoch": 0.4762868908716541, "grad_norm": 0.8260998426857841, "learning_rate": 5.626059670607377e-06, "loss": 0.2954, "step": 13879 }, { "epoch": 0.47632120796156485, "grad_norm": 0.76097363553358, "learning_rate": 5.625508301382421e-06, "loss": 0.3256, "step": 13880 }, { "epoch": 0.47635552505147566, "grad_norm": 0.7951221642380405, "learning_rate": 5.62495692443004e-06, "loss": 0.263, "step": 13881 }, { "epoch": 0.4763898421413864, "grad_norm": 0.659224491176178, "learning_rate": 5.624405539757043e-06, "loss": 0.2243, "step": 13882 }, { "epoch": 0.47642415923129716, "grad_norm": 0.7110728698684359, "learning_rate": 5.623854147370244e-06, "loss": 0.2707, "step": 13883 }, { "epoch": 0.47645847632120797, "grad_norm": 0.7313672371339002, "learning_rate": 5.623302747276457e-06, "loss": 0.2531, "step": 13884 }, { "epoch": 0.4764927934111187, "grad_norm": 1.0263734436201335, "learning_rate": 5.622751339482488e-06, "loss": 0.3261, "step": 13885 }, { "epoch": 0.4765271105010295, "grad_norm": 0.8296244566234348, "learning_rate": 5.622199923995153e-06, "loss": 0.2909, "step": 13886 }, { "epoch": 0.4765614275909403, "grad_norm": 0.7722072090056621, "learning_rate": 5.621648500821264e-06, "loss": 0.2417, "step": 13887 }, { "epoch": 0.4765957446808511, "grad_norm": 0.7088688157008406, "learning_rate": 5.621097069967634e-06, "loss": 0.2819, "step": 13888 }, { "epoch": 0.47663006177076184, "grad_norm": 0.8259925669976335, "learning_rate": 5.620545631441071e-06, "loss": 0.3243, "step": 13889 }, { "epoch": 0.4766643788606726, "grad_norm": 0.7216067019495673, "learning_rate": 5.6199941852483915e-06, "loss": 0.2573, "step": 13890 }, { "epoch": 0.4766986959505834, "grad_norm": 0.7936556295923607, "learning_rate": 5.619442731396407e-06, "loss": 0.2675, "step": 13891 }, { "epoch": 0.47673301304049415, "grad_norm": 0.8055212224599433, "learning_rate": 5.618891269891929e-06, "loss": 0.2692, "step": 13892 }, { "epoch": 0.47676733013040495, "grad_norm": 0.8674812479700543, "learning_rate": 5.618339800741773e-06, "loss": 0.3354, "step": 13893 }, { "epoch": 0.4768016472203157, "grad_norm": 0.8305123903380055, "learning_rate": 5.617788323952747e-06, "loss": 0.3528, "step": 13894 }, { "epoch": 0.4768359643102265, "grad_norm": 0.8122144891371178, "learning_rate": 5.617236839531667e-06, "loss": 0.298, "step": 13895 }, { "epoch": 0.47687028140013726, "grad_norm": 0.7828023571254284, "learning_rate": 5.616685347485346e-06, "loss": 0.2908, "step": 13896 }, { "epoch": 0.47690459849004807, "grad_norm": 0.7921950947786416, "learning_rate": 5.616133847820597e-06, "loss": 0.2645, "step": 13897 }, { "epoch": 0.4769389155799588, "grad_norm": 0.7125802350359607, "learning_rate": 5.615582340544232e-06, "loss": 0.2819, "step": 13898 }, { "epoch": 0.4769732326698696, "grad_norm": 0.7170004712957222, "learning_rate": 5.615030825663065e-06, "loss": 0.244, "step": 13899 }, { "epoch": 0.4770075497597804, "grad_norm": 0.7217749844645276, "learning_rate": 5.61447930318391e-06, "loss": 0.2625, "step": 13900 }, { "epoch": 0.47704186684969113, "grad_norm": 0.8519926377950853, "learning_rate": 5.613927773113577e-06, "loss": 0.3312, "step": 13901 }, { "epoch": 0.47707618393960194, "grad_norm": 0.7385012927934419, "learning_rate": 5.613376235458885e-06, "loss": 0.2506, "step": 13902 }, { "epoch": 0.4771105010295127, "grad_norm": 0.7320747457487811, "learning_rate": 5.612824690226643e-06, "loss": 0.3053, "step": 13903 }, { "epoch": 0.4771448181194235, "grad_norm": 0.7711161535480364, "learning_rate": 5.612273137423668e-06, "loss": 0.2668, "step": 13904 }, { "epoch": 0.47717913520933425, "grad_norm": 0.6705201145000314, "learning_rate": 5.611721577056772e-06, "loss": 0.2661, "step": 13905 }, { "epoch": 0.477213452299245, "grad_norm": 0.79830664127721, "learning_rate": 5.611170009132768e-06, "loss": 0.321, "step": 13906 }, { "epoch": 0.4772477693891558, "grad_norm": 0.7498012668800468, "learning_rate": 5.610618433658472e-06, "loss": 0.3469, "step": 13907 }, { "epoch": 0.47728208647906656, "grad_norm": 0.7453866010433057, "learning_rate": 5.610066850640697e-06, "loss": 0.2825, "step": 13908 }, { "epoch": 0.47731640356897737, "grad_norm": 0.8778329996936669, "learning_rate": 5.609515260086256e-06, "loss": 0.2568, "step": 13909 }, { "epoch": 0.4773507206588881, "grad_norm": 0.7751589561545535, "learning_rate": 5.608963662001967e-06, "loss": 0.3009, "step": 13910 }, { "epoch": 0.4773850377487989, "grad_norm": 0.7812736129556018, "learning_rate": 5.6084120563946395e-06, "loss": 0.2921, "step": 13911 }, { "epoch": 0.4774193548387097, "grad_norm": 0.7863865838518258, "learning_rate": 5.607860443271091e-06, "loss": 0.2507, "step": 13912 }, { "epoch": 0.4774536719286204, "grad_norm": 0.7795022425288765, "learning_rate": 5.607308822638138e-06, "loss": 0.3293, "step": 13913 }, { "epoch": 0.47748798901853123, "grad_norm": 0.8125199321356931, "learning_rate": 5.60675719450259e-06, "loss": 0.2695, "step": 13914 }, { "epoch": 0.477522306108442, "grad_norm": 0.9050732797742314, "learning_rate": 5.606205558871265e-06, "loss": 0.291, "step": 13915 }, { "epoch": 0.4775566231983528, "grad_norm": 0.7743434818630778, "learning_rate": 5.605653915750976e-06, "loss": 0.3185, "step": 13916 }, { "epoch": 0.47759094028826354, "grad_norm": 0.7721292965552178, "learning_rate": 5.60510226514854e-06, "loss": 0.2686, "step": 13917 }, { "epoch": 0.47762525737817435, "grad_norm": 0.7757727539575198, "learning_rate": 5.604550607070771e-06, "loss": 0.2962, "step": 13918 }, { "epoch": 0.4776595744680851, "grad_norm": 0.7538893276466747, "learning_rate": 5.603998941524483e-06, "loss": 0.3134, "step": 13919 }, { "epoch": 0.4776938915579959, "grad_norm": 0.7943480108910113, "learning_rate": 5.603447268516494e-06, "loss": 0.2986, "step": 13920 }, { "epoch": 0.47772820864790666, "grad_norm": 0.687306299924696, "learning_rate": 5.602895588053617e-06, "loss": 0.2677, "step": 13921 }, { "epoch": 0.4777625257378174, "grad_norm": 0.7534869261692607, "learning_rate": 5.6023439001426684e-06, "loss": 0.2823, "step": 13922 }, { "epoch": 0.4777968428277282, "grad_norm": 0.8117565979282645, "learning_rate": 5.601792204790462e-06, "loss": 0.2918, "step": 13923 }, { "epoch": 0.47783115991763897, "grad_norm": 0.9346112568306122, "learning_rate": 5.601240502003814e-06, "loss": 0.3527, "step": 13924 }, { "epoch": 0.4778654770075498, "grad_norm": 1.0136722800837725, "learning_rate": 5.6006887917895415e-06, "loss": 0.3232, "step": 13925 }, { "epoch": 0.47789979409746053, "grad_norm": 0.807104366959356, "learning_rate": 5.60013707415446e-06, "loss": 0.3164, "step": 13926 }, { "epoch": 0.47793411118737134, "grad_norm": 0.8111186852698582, "learning_rate": 5.599585349105384e-06, "loss": 0.3319, "step": 13927 }, { "epoch": 0.4779684282772821, "grad_norm": 0.8038032349027622, "learning_rate": 5.59903361664913e-06, "loss": 0.2497, "step": 13928 }, { "epoch": 0.47800274536719284, "grad_norm": 0.8336642652663382, "learning_rate": 5.598481876792515e-06, "loss": 0.2766, "step": 13929 }, { "epoch": 0.47803706245710365, "grad_norm": 0.7727588868301565, "learning_rate": 5.597930129542354e-06, "loss": 0.248, "step": 13930 }, { "epoch": 0.4780713795470144, "grad_norm": 0.7789505167929631, "learning_rate": 5.597378374905461e-06, "loss": 0.2698, "step": 13931 }, { "epoch": 0.4781056966369252, "grad_norm": 0.7541566512481456, "learning_rate": 5.596826612888657e-06, "loss": 0.3081, "step": 13932 }, { "epoch": 0.47814001372683596, "grad_norm": 0.7313188531445566, "learning_rate": 5.5962748434987555e-06, "loss": 0.2715, "step": 13933 }, { "epoch": 0.47817433081674676, "grad_norm": 0.9967921891742118, "learning_rate": 5.5957230667425735e-06, "loss": 0.307, "step": 13934 }, { "epoch": 0.4782086479066575, "grad_norm": 0.7431218089934802, "learning_rate": 5.595171282626926e-06, "loss": 0.2963, "step": 13935 }, { "epoch": 0.47824296499656826, "grad_norm": 0.7910624172289148, "learning_rate": 5.5946194911586334e-06, "loss": 0.2937, "step": 13936 }, { "epoch": 0.47827728208647907, "grad_norm": 0.7931445214884252, "learning_rate": 5.59406769234451e-06, "loss": 0.2816, "step": 13937 }, { "epoch": 0.4783115991763898, "grad_norm": 0.7957000881396822, "learning_rate": 5.593515886191372e-06, "loss": 0.2956, "step": 13938 }, { "epoch": 0.47834591626630063, "grad_norm": 0.7318857779928127, "learning_rate": 5.592964072706037e-06, "loss": 0.3077, "step": 13939 }, { "epoch": 0.4783802333562114, "grad_norm": 0.8148972007768311, "learning_rate": 5.592412251895323e-06, "loss": 0.2943, "step": 13940 }, { "epoch": 0.4784145504461222, "grad_norm": 0.6656037019320371, "learning_rate": 5.5918604237660455e-06, "loss": 0.246, "step": 13941 }, { "epoch": 0.47844886753603294, "grad_norm": 0.8302797484500575, "learning_rate": 5.591308588325025e-06, "loss": 0.3277, "step": 13942 }, { "epoch": 0.47848318462594375, "grad_norm": 0.8467311570357066, "learning_rate": 5.590756745579073e-06, "loss": 0.2703, "step": 13943 }, { "epoch": 0.4785175017158545, "grad_norm": 0.6858419649088165, "learning_rate": 5.590204895535012e-06, "loss": 0.3205, "step": 13944 }, { "epoch": 0.47855181880576525, "grad_norm": 0.8177295257988418, "learning_rate": 5.589653038199657e-06, "loss": 0.3274, "step": 13945 }, { "epoch": 0.47858613589567606, "grad_norm": 1.0035006437290213, "learning_rate": 5.5891011735798266e-06, "loss": 0.3165, "step": 13946 }, { "epoch": 0.4786204529855868, "grad_norm": 0.8235755175544605, "learning_rate": 5.5885493016823375e-06, "loss": 0.2617, "step": 13947 }, { "epoch": 0.4786547700754976, "grad_norm": 0.7729971668561609, "learning_rate": 5.587997422514009e-06, "loss": 0.2676, "step": 13948 }, { "epoch": 0.47868908716540837, "grad_norm": 0.7798187449579366, "learning_rate": 5.5874455360816574e-06, "loss": 0.3222, "step": 13949 }, { "epoch": 0.4787234042553192, "grad_norm": 0.9263800815540458, "learning_rate": 5.5868936423921e-06, "loss": 0.2784, "step": 13950 }, { "epoch": 0.4787577213452299, "grad_norm": 0.7712327444825806, "learning_rate": 5.586341741452157e-06, "loss": 0.3896, "step": 13951 }, { "epoch": 0.4787920384351407, "grad_norm": 0.7775745461992686, "learning_rate": 5.585789833268646e-06, "loss": 0.3153, "step": 13952 }, { "epoch": 0.4788263555250515, "grad_norm": 0.8824119924432818, "learning_rate": 5.585237917848382e-06, "loss": 0.2633, "step": 13953 }, { "epoch": 0.47886067261496224, "grad_norm": 0.7301507574426916, "learning_rate": 5.584685995198187e-06, "loss": 0.3214, "step": 13954 }, { "epoch": 0.47889498970487304, "grad_norm": 0.6716927277478064, "learning_rate": 5.58413406532488e-06, "loss": 0.2916, "step": 13955 }, { "epoch": 0.4789293067947838, "grad_norm": 0.9321589895077067, "learning_rate": 5.583582128235277e-06, "loss": 0.2715, "step": 13956 }, { "epoch": 0.4789636238846946, "grad_norm": 0.8436277539756032, "learning_rate": 5.5830301839361955e-06, "loss": 0.2448, "step": 13957 }, { "epoch": 0.47899794097460535, "grad_norm": 0.7977122466032774, "learning_rate": 5.582478232434459e-06, "loss": 0.266, "step": 13958 }, { "epoch": 0.4790322580645161, "grad_norm": 0.7703301471279806, "learning_rate": 5.5819262737368795e-06, "loss": 0.3206, "step": 13959 }, { "epoch": 0.4790665751544269, "grad_norm": 0.8122392573982661, "learning_rate": 5.58137430785028e-06, "loss": 0.2904, "step": 13960 }, { "epoch": 0.47910089224433766, "grad_norm": 0.7500902286095058, "learning_rate": 5.58082233478148e-06, "loss": 0.2741, "step": 13961 }, { "epoch": 0.47913520933424847, "grad_norm": 0.7507359166607643, "learning_rate": 5.580270354537297e-06, "loss": 0.2925, "step": 13962 }, { "epoch": 0.4791695264241592, "grad_norm": 0.7919443189010213, "learning_rate": 5.579718367124549e-06, "loss": 0.2991, "step": 13963 }, { "epoch": 0.47920384351407, "grad_norm": 0.7625073922135541, "learning_rate": 5.579166372550059e-06, "loss": 0.2635, "step": 13964 }, { "epoch": 0.4792381606039808, "grad_norm": 0.781053056897481, "learning_rate": 5.578614370820642e-06, "loss": 0.3189, "step": 13965 }, { "epoch": 0.4792724776938916, "grad_norm": 0.7261253377491389, "learning_rate": 5.57806236194312e-06, "loss": 0.2462, "step": 13966 }, { "epoch": 0.47930679478380234, "grad_norm": 0.8124194149959889, "learning_rate": 5.5775103459243096e-06, "loss": 0.2959, "step": 13967 }, { "epoch": 0.4793411118737131, "grad_norm": 0.7888742214744074, "learning_rate": 5.576958322771033e-06, "loss": 0.2726, "step": 13968 }, { "epoch": 0.4793754289636239, "grad_norm": 0.8763163406046719, "learning_rate": 5.576406292490109e-06, "loss": 0.2996, "step": 13969 }, { "epoch": 0.47940974605353465, "grad_norm": 0.7697202574593796, "learning_rate": 5.575854255088357e-06, "loss": 0.2928, "step": 13970 }, { "epoch": 0.47944406314344545, "grad_norm": 0.7769676902444956, "learning_rate": 5.575302210572599e-06, "loss": 0.2835, "step": 13971 }, { "epoch": 0.4794783802333562, "grad_norm": 0.7644376809358132, "learning_rate": 5.574750158949652e-06, "loss": 0.2992, "step": 13972 }, { "epoch": 0.479512697323267, "grad_norm": 0.913510484447427, "learning_rate": 5.574198100226337e-06, "loss": 0.2752, "step": 13973 }, { "epoch": 0.47954701441317776, "grad_norm": 0.7749020009584927, "learning_rate": 5.573646034409474e-06, "loss": 0.327, "step": 13974 }, { "epoch": 0.4795813315030885, "grad_norm": 0.7312213526408159, "learning_rate": 5.573093961505881e-06, "loss": 0.2771, "step": 13975 }, { "epoch": 0.4796156485929993, "grad_norm": 0.7187565989931377, "learning_rate": 5.572541881522381e-06, "loss": 0.3551, "step": 13976 }, { "epoch": 0.4796499656829101, "grad_norm": 0.8229046308812544, "learning_rate": 5.5719897944657955e-06, "loss": 0.3139, "step": 13977 }, { "epoch": 0.4796842827728209, "grad_norm": 0.7182936722095846, "learning_rate": 5.571437700342942e-06, "loss": 0.2371, "step": 13978 }, { "epoch": 0.47971859986273163, "grad_norm": 0.780392691201994, "learning_rate": 5.570885599160642e-06, "loss": 0.3038, "step": 13979 }, { "epoch": 0.47975291695264244, "grad_norm": 0.7413944826709244, "learning_rate": 5.5703334909257155e-06, "loss": 0.281, "step": 13980 }, { "epoch": 0.4797872340425532, "grad_norm": 0.7761882396975244, "learning_rate": 5.569781375644984e-06, "loss": 0.3279, "step": 13981 }, { "epoch": 0.47982155113246394, "grad_norm": 0.7343740407737234, "learning_rate": 5.5692292533252675e-06, "loss": 0.2793, "step": 13982 }, { "epoch": 0.47985586822237475, "grad_norm": 0.8252673854261058, "learning_rate": 5.568677123973387e-06, "loss": 0.2914, "step": 13983 }, { "epoch": 0.4798901853122855, "grad_norm": 0.8116030470240971, "learning_rate": 5.568124987596164e-06, "loss": 0.2766, "step": 13984 }, { "epoch": 0.4799245024021963, "grad_norm": 0.7934330651113712, "learning_rate": 5.56757284420042e-06, "loss": 0.3012, "step": 13985 }, { "epoch": 0.47995881949210706, "grad_norm": 0.8648586229687458, "learning_rate": 5.567020693792973e-06, "loss": 0.2864, "step": 13986 }, { "epoch": 0.47999313658201787, "grad_norm": 0.8441520985966993, "learning_rate": 5.566468536380649e-06, "loss": 0.282, "step": 13987 }, { "epoch": 0.4800274536719286, "grad_norm": 0.748845082926711, "learning_rate": 5.565916371970266e-06, "loss": 0.3007, "step": 13988 }, { "epoch": 0.4800617707618394, "grad_norm": 0.6961468527229254, "learning_rate": 5.565364200568646e-06, "loss": 0.2598, "step": 13989 }, { "epoch": 0.4800960878517502, "grad_norm": 0.7782681946682052, "learning_rate": 5.5648120221826096e-06, "loss": 0.3045, "step": 13990 }, { "epoch": 0.4801304049416609, "grad_norm": 0.8285765371662942, "learning_rate": 5.564259836818979e-06, "loss": 0.273, "step": 13991 }, { "epoch": 0.48016472203157173, "grad_norm": 0.8757718797006054, "learning_rate": 5.563707644484577e-06, "loss": 0.2936, "step": 13992 }, { "epoch": 0.4801990391214825, "grad_norm": 0.7730440565844146, "learning_rate": 5.563155445186225e-06, "loss": 0.2383, "step": 13993 }, { "epoch": 0.4802333562113933, "grad_norm": 0.73943539320574, "learning_rate": 5.562603238930743e-06, "loss": 0.2537, "step": 13994 }, { "epoch": 0.48026767330130404, "grad_norm": 0.7530899853059588, "learning_rate": 5.562051025724953e-06, "loss": 0.2936, "step": 13995 }, { "epoch": 0.48030199039121485, "grad_norm": 0.8659260845880811, "learning_rate": 5.56149880557568e-06, "loss": 0.2639, "step": 13996 }, { "epoch": 0.4803363074811256, "grad_norm": 0.8262458313002795, "learning_rate": 5.560946578489741e-06, "loss": 0.2924, "step": 13997 }, { "epoch": 0.48037062457103635, "grad_norm": 0.7469644214412344, "learning_rate": 5.560394344473962e-06, "loss": 0.2975, "step": 13998 }, { "epoch": 0.48040494166094716, "grad_norm": 0.7535508379882069, "learning_rate": 5.559842103535166e-06, "loss": 0.3023, "step": 13999 }, { "epoch": 0.4804392587508579, "grad_norm": 0.7795756747063274, "learning_rate": 5.559289855680173e-06, "loss": 0.2761, "step": 14000 }, { "epoch": 0.4804735758407687, "grad_norm": 0.8100330887169634, "learning_rate": 5.558737600915804e-06, "loss": 0.3224, "step": 14001 }, { "epoch": 0.48050789293067947, "grad_norm": 0.8728169320380625, "learning_rate": 5.558185339248885e-06, "loss": 0.2691, "step": 14002 }, { "epoch": 0.4805422100205903, "grad_norm": 0.7353257900505578, "learning_rate": 5.557633070686238e-06, "loss": 0.2655, "step": 14003 }, { "epoch": 0.48057652711050103, "grad_norm": 0.6915336298956848, "learning_rate": 5.557080795234683e-06, "loss": 0.2413, "step": 14004 }, { "epoch": 0.4806108442004118, "grad_norm": 0.7711173980961702, "learning_rate": 5.556528512901044e-06, "loss": 0.2548, "step": 14005 }, { "epoch": 0.4806451612903226, "grad_norm": 0.6635834599354451, "learning_rate": 5.555976223692144e-06, "loss": 0.2799, "step": 14006 }, { "epoch": 0.48067947838023334, "grad_norm": 0.6996093224086943, "learning_rate": 5.555423927614807e-06, "loss": 0.2625, "step": 14007 }, { "epoch": 0.48071379547014415, "grad_norm": 0.7574381149137464, "learning_rate": 5.554871624675854e-06, "loss": 0.2896, "step": 14008 }, { "epoch": 0.4807481125600549, "grad_norm": 0.8015804460793422, "learning_rate": 5.554319314882111e-06, "loss": 0.3068, "step": 14009 }, { "epoch": 0.4807824296499657, "grad_norm": 0.7581134278237015, "learning_rate": 5.5537669982403964e-06, "loss": 0.259, "step": 14010 }, { "epoch": 0.48081674673987645, "grad_norm": 0.8246616320650233, "learning_rate": 5.553214674757538e-06, "loss": 0.2842, "step": 14011 }, { "epoch": 0.4808510638297872, "grad_norm": 0.8088039408573572, "learning_rate": 5.552662344440356e-06, "loss": 0.2916, "step": 14012 }, { "epoch": 0.480885380919698, "grad_norm": 0.7442264891653971, "learning_rate": 5.552110007295676e-06, "loss": 0.3247, "step": 14013 }, { "epoch": 0.48091969800960876, "grad_norm": 0.7387818177396115, "learning_rate": 5.551557663330319e-06, "loss": 0.3168, "step": 14014 }, { "epoch": 0.48095401509951957, "grad_norm": 0.7708231652589491, "learning_rate": 5.551005312551111e-06, "loss": 0.2907, "step": 14015 }, { "epoch": 0.4809883321894303, "grad_norm": 0.8891236641368712, "learning_rate": 5.550452954964876e-06, "loss": 0.2517, "step": 14016 }, { "epoch": 0.48102264927934113, "grad_norm": 0.9236603792794058, "learning_rate": 5.549900590578436e-06, "loss": 0.2994, "step": 14017 }, { "epoch": 0.4810569663692519, "grad_norm": 0.8764400007006918, "learning_rate": 5.549348219398615e-06, "loss": 0.3227, "step": 14018 }, { "epoch": 0.4810912834591627, "grad_norm": 0.8530912981975075, "learning_rate": 5.5487958414322364e-06, "loss": 0.3025, "step": 14019 }, { "epoch": 0.48112560054907344, "grad_norm": 0.7907873114131063, "learning_rate": 5.548243456686125e-06, "loss": 0.3351, "step": 14020 }, { "epoch": 0.4811599176389842, "grad_norm": 0.8035642676505244, "learning_rate": 5.547691065167106e-06, "loss": 0.2977, "step": 14021 }, { "epoch": 0.481194234728895, "grad_norm": 0.8047104542965973, "learning_rate": 5.547138666882004e-06, "loss": 0.3142, "step": 14022 }, { "epoch": 0.48122855181880575, "grad_norm": 0.7690250525617844, "learning_rate": 5.546586261837639e-06, "loss": 0.2449, "step": 14023 }, { "epoch": 0.48126286890871656, "grad_norm": 0.7773025688573546, "learning_rate": 5.5460338500408385e-06, "loss": 0.2872, "step": 14024 }, { "epoch": 0.4812971859986273, "grad_norm": 0.8699847489424957, "learning_rate": 5.545481431498427e-06, "loss": 0.2487, "step": 14025 }, { "epoch": 0.4813315030885381, "grad_norm": 0.792429011498385, "learning_rate": 5.544929006217229e-06, "loss": 0.3217, "step": 14026 }, { "epoch": 0.48136582017844887, "grad_norm": 0.824081947243524, "learning_rate": 5.544376574204067e-06, "loss": 0.3305, "step": 14027 }, { "epoch": 0.4814001372683596, "grad_norm": 0.8350263580568489, "learning_rate": 5.5438241354657675e-06, "loss": 0.2755, "step": 14028 }, { "epoch": 0.4814344543582704, "grad_norm": 0.811034918550683, "learning_rate": 5.543271690009154e-06, "loss": 0.2629, "step": 14029 }, { "epoch": 0.4814687714481812, "grad_norm": 0.7660519982546057, "learning_rate": 5.5427192378410534e-06, "loss": 0.3063, "step": 14030 }, { "epoch": 0.481503088538092, "grad_norm": 0.787989396428682, "learning_rate": 5.542166778968289e-06, "loss": 0.2901, "step": 14031 }, { "epoch": 0.48153740562800273, "grad_norm": 0.7331956721085872, "learning_rate": 5.541614313397687e-06, "loss": 0.3674, "step": 14032 }, { "epoch": 0.48157172271791354, "grad_norm": 0.7595157011300705, "learning_rate": 5.541061841136071e-06, "loss": 0.2593, "step": 14033 }, { "epoch": 0.4816060398078243, "grad_norm": 0.7910801078085468, "learning_rate": 5.540509362190266e-06, "loss": 0.2568, "step": 14034 }, { "epoch": 0.48164035689773504, "grad_norm": 0.7673069409716127, "learning_rate": 5.539956876567099e-06, "loss": 0.314, "step": 14035 }, { "epoch": 0.48167467398764585, "grad_norm": 0.8400031211762614, "learning_rate": 5.539404384273394e-06, "loss": 0.2832, "step": 14036 }, { "epoch": 0.4817089910775566, "grad_norm": 0.8472302002890855, "learning_rate": 5.538851885315976e-06, "loss": 0.3049, "step": 14037 }, { "epoch": 0.4817433081674674, "grad_norm": 0.6942600039959981, "learning_rate": 5.538299379701672e-06, "loss": 0.2724, "step": 14038 }, { "epoch": 0.48177762525737816, "grad_norm": 0.8079881658557134, "learning_rate": 5.537746867437307e-06, "loss": 0.3201, "step": 14039 }, { "epoch": 0.48181194234728897, "grad_norm": 0.7670358156076768, "learning_rate": 5.537194348529706e-06, "loss": 0.2965, "step": 14040 }, { "epoch": 0.4818462594371997, "grad_norm": 0.6507257542942998, "learning_rate": 5.5366418229856956e-06, "loss": 0.2583, "step": 14041 }, { "epoch": 0.4818805765271105, "grad_norm": 0.8024659070180491, "learning_rate": 5.5360892908121e-06, "loss": 0.3064, "step": 14042 }, { "epoch": 0.4819148936170213, "grad_norm": 0.8742393550024267, "learning_rate": 5.5355367520157464e-06, "loss": 0.2935, "step": 14043 }, { "epoch": 0.48194921070693203, "grad_norm": 0.7377016976499162, "learning_rate": 5.534984206603461e-06, "loss": 0.2668, "step": 14044 }, { "epoch": 0.48198352779684284, "grad_norm": 0.7556913916845939, "learning_rate": 5.534431654582071e-06, "loss": 0.2651, "step": 14045 }, { "epoch": 0.4820178448867536, "grad_norm": 0.7725974617696838, "learning_rate": 5.533879095958399e-06, "loss": 0.288, "step": 14046 }, { "epoch": 0.4820521619766644, "grad_norm": 0.7913671528287108, "learning_rate": 5.533326530739274e-06, "loss": 0.281, "step": 14047 }, { "epoch": 0.48208647906657515, "grad_norm": 0.718047420562046, "learning_rate": 5.532773958931522e-06, "loss": 0.2401, "step": 14048 }, { "epoch": 0.48212079615648595, "grad_norm": 0.7265441335382856, "learning_rate": 5.5322213805419675e-06, "loss": 0.251, "step": 14049 }, { "epoch": 0.4821551132463967, "grad_norm": 0.6999939809539294, "learning_rate": 5.531668795577439e-06, "loss": 0.2356, "step": 14050 }, { "epoch": 0.48218943033630746, "grad_norm": 0.7695644041301003, "learning_rate": 5.531116204044762e-06, "loss": 0.277, "step": 14051 }, { "epoch": 0.48222374742621826, "grad_norm": 0.7894336925338138, "learning_rate": 5.530563605950764e-06, "loss": 0.3091, "step": 14052 }, { "epoch": 0.482258064516129, "grad_norm": 0.7655402727063401, "learning_rate": 5.530011001302271e-06, "loss": 0.2932, "step": 14053 }, { "epoch": 0.4822923816060398, "grad_norm": 0.7624460299091306, "learning_rate": 5.529458390106111e-06, "loss": 0.2829, "step": 14054 }, { "epoch": 0.4823266986959506, "grad_norm": 0.7599453012923383, "learning_rate": 5.528905772369109e-06, "loss": 0.2883, "step": 14055 }, { "epoch": 0.4823610157858614, "grad_norm": 0.8306092571165562, "learning_rate": 5.528353148098093e-06, "loss": 0.2798, "step": 14056 }, { "epoch": 0.48239533287577213, "grad_norm": 0.7074929243859949, "learning_rate": 5.527800517299889e-06, "loss": 0.224, "step": 14057 }, { "epoch": 0.4824296499656829, "grad_norm": 0.6842710502799896, "learning_rate": 5.527247879981326e-06, "loss": 0.2759, "step": 14058 }, { "epoch": 0.4824639670555937, "grad_norm": 0.7883117012961968, "learning_rate": 5.5266952361492295e-06, "loss": 0.3237, "step": 14059 }, { "epoch": 0.48249828414550444, "grad_norm": 0.7999961257617247, "learning_rate": 5.526142585810428e-06, "loss": 0.2764, "step": 14060 }, { "epoch": 0.48253260123541525, "grad_norm": 0.7110625334761831, "learning_rate": 5.525589928971748e-06, "loss": 0.2347, "step": 14061 }, { "epoch": 0.482566918325326, "grad_norm": 1.0745612034190577, "learning_rate": 5.525037265640018e-06, "loss": 0.3029, "step": 14062 }, { "epoch": 0.4826012354152368, "grad_norm": 0.7501723524537616, "learning_rate": 5.524484595822064e-06, "loss": 0.2507, "step": 14063 }, { "epoch": 0.48263555250514756, "grad_norm": 0.7593841257465345, "learning_rate": 5.523931919524714e-06, "loss": 0.3061, "step": 14064 }, { "epoch": 0.48266986959505837, "grad_norm": 0.925874510093837, "learning_rate": 5.523379236754795e-06, "loss": 0.3137, "step": 14065 }, { "epoch": 0.4827041866849691, "grad_norm": 0.9064020413098619, "learning_rate": 5.522826547519138e-06, "loss": 0.2776, "step": 14066 }, { "epoch": 0.48273850377487987, "grad_norm": 0.6887084887901797, "learning_rate": 5.5222738518245685e-06, "loss": 0.3032, "step": 14067 }, { "epoch": 0.4827728208647907, "grad_norm": 0.7782895200895469, "learning_rate": 5.521721149677913e-06, "loss": 0.2993, "step": 14068 }, { "epoch": 0.4828071379547014, "grad_norm": 0.8708403318779532, "learning_rate": 5.521168441086001e-06, "loss": 0.2717, "step": 14069 }, { "epoch": 0.48284145504461223, "grad_norm": 0.8481847629635336, "learning_rate": 5.5206157260556626e-06, "loss": 0.3036, "step": 14070 }, { "epoch": 0.482875772134523, "grad_norm": 0.7613114166259884, "learning_rate": 5.520063004593722e-06, "loss": 0.2488, "step": 14071 }, { "epoch": 0.4829100892244338, "grad_norm": 0.9116250714463483, "learning_rate": 5.519510276707009e-06, "loss": 0.2549, "step": 14072 }, { "epoch": 0.48294440631434454, "grad_norm": 0.7658708935680719, "learning_rate": 5.518957542402354e-06, "loss": 0.2978, "step": 14073 }, { "epoch": 0.4829787234042553, "grad_norm": 0.7223670974280553, "learning_rate": 5.518404801686581e-06, "loss": 0.2702, "step": 14074 }, { "epoch": 0.4830130404941661, "grad_norm": 0.8335675908071821, "learning_rate": 5.517852054566523e-06, "loss": 0.3618, "step": 14075 }, { "epoch": 0.48304735758407685, "grad_norm": 0.7599553446260595, "learning_rate": 5.517299301049006e-06, "loss": 0.3078, "step": 14076 }, { "epoch": 0.48308167467398766, "grad_norm": 0.7366358538164895, "learning_rate": 5.51674654114086e-06, "loss": 0.2975, "step": 14077 }, { "epoch": 0.4831159917638984, "grad_norm": 0.7443976463570721, "learning_rate": 5.516193774848913e-06, "loss": 0.2717, "step": 14078 }, { "epoch": 0.4831503088538092, "grad_norm": 0.769804304037867, "learning_rate": 5.5156410021799924e-06, "loss": 0.2583, "step": 14079 }, { "epoch": 0.48318462594371997, "grad_norm": 0.7366437343345433, "learning_rate": 5.515088223140929e-06, "loss": 0.3537, "step": 14080 }, { "epoch": 0.4832189430336307, "grad_norm": 0.7007467175975821, "learning_rate": 5.514535437738551e-06, "loss": 0.2343, "step": 14081 }, { "epoch": 0.48325326012354153, "grad_norm": 0.843373798047533, "learning_rate": 5.513982645979687e-06, "loss": 0.3063, "step": 14082 }, { "epoch": 0.4832875772134523, "grad_norm": 0.7664740605951988, "learning_rate": 5.513429847871169e-06, "loss": 0.3802, "step": 14083 }, { "epoch": 0.4833218943033631, "grad_norm": 0.7281341025807917, "learning_rate": 5.512877043419822e-06, "loss": 0.286, "step": 14084 }, { "epoch": 0.48335621139327384, "grad_norm": 0.8919484295664005, "learning_rate": 5.512324232632477e-06, "loss": 0.2386, "step": 14085 }, { "epoch": 0.48339052848318464, "grad_norm": 0.8068832620271068, "learning_rate": 5.511771415515964e-06, "loss": 0.3063, "step": 14086 }, { "epoch": 0.4834248455730954, "grad_norm": 0.75952246173452, "learning_rate": 5.511218592077111e-06, "loss": 0.2995, "step": 14087 }, { "epoch": 0.4834591626630062, "grad_norm": 0.7714890329907202, "learning_rate": 5.510665762322749e-06, "loss": 0.2706, "step": 14088 }, { "epoch": 0.48349347975291695, "grad_norm": 0.8005682495797978, "learning_rate": 5.5101129262597076e-06, "loss": 0.2786, "step": 14089 }, { "epoch": 0.4835277968428277, "grad_norm": 0.7418440730976997, "learning_rate": 5.509560083894815e-06, "loss": 0.2058, "step": 14090 }, { "epoch": 0.4835621139327385, "grad_norm": 0.7517824165014366, "learning_rate": 5.509007235234901e-06, "loss": 0.3011, "step": 14091 }, { "epoch": 0.48359643102264926, "grad_norm": 0.6843923434802122, "learning_rate": 5.508454380286798e-06, "loss": 0.2719, "step": 14092 }, { "epoch": 0.48363074811256007, "grad_norm": 0.8823524621581975, "learning_rate": 5.507901519057333e-06, "loss": 0.3085, "step": 14093 }, { "epoch": 0.4836650652024708, "grad_norm": 0.6943812929608968, "learning_rate": 5.5073486515533355e-06, "loss": 0.2883, "step": 14094 }, { "epoch": 0.48369938229238163, "grad_norm": 0.7151493774572866, "learning_rate": 5.5067957777816375e-06, "loss": 0.3009, "step": 14095 }, { "epoch": 0.4837336993822924, "grad_norm": 0.7508332494600771, "learning_rate": 5.506242897749069e-06, "loss": 0.2901, "step": 14096 }, { "epoch": 0.48376801647220313, "grad_norm": 0.8414837207689477, "learning_rate": 5.5056900114624605e-06, "loss": 0.2889, "step": 14097 }, { "epoch": 0.48380233356211394, "grad_norm": 0.7897802416056773, "learning_rate": 5.50513711892864e-06, "loss": 0.28, "step": 14098 }, { "epoch": 0.4838366506520247, "grad_norm": 0.7823329744527844, "learning_rate": 5.504584220154441e-06, "loss": 0.3068, "step": 14099 }, { "epoch": 0.4838709677419355, "grad_norm": 0.7538251992695555, "learning_rate": 5.50403131514669e-06, "loss": 0.31, "step": 14100 }, { "epoch": 0.48390528483184625, "grad_norm": 0.7725075861074151, "learning_rate": 5.503478403912221e-06, "loss": 0.3144, "step": 14101 }, { "epoch": 0.48393960192175706, "grad_norm": 0.8839467164719633, "learning_rate": 5.502925486457863e-06, "loss": 0.2935, "step": 14102 }, { "epoch": 0.4839739190116678, "grad_norm": 0.7241704964130579, "learning_rate": 5.502372562790448e-06, "loss": 0.3669, "step": 14103 }, { "epoch": 0.48400823610157856, "grad_norm": 0.804103498098222, "learning_rate": 5.5018196329168035e-06, "loss": 0.2631, "step": 14104 }, { "epoch": 0.48404255319148937, "grad_norm": 0.7767409982138387, "learning_rate": 5.5012666968437656e-06, "loss": 0.3245, "step": 14105 }, { "epoch": 0.4840768702814001, "grad_norm": 0.8419677629731154, "learning_rate": 5.5007137545781595e-06, "loss": 0.2838, "step": 14106 }, { "epoch": 0.4841111873713109, "grad_norm": 0.7683641536017858, "learning_rate": 5.50016080612682e-06, "loss": 0.3286, "step": 14107 }, { "epoch": 0.4841455044612217, "grad_norm": 0.8357066471624215, "learning_rate": 5.499607851496577e-06, "loss": 0.2552, "step": 14108 }, { "epoch": 0.4841798215511325, "grad_norm": 0.7995487145239208, "learning_rate": 5.499054890694261e-06, "loss": 0.3024, "step": 14109 }, { "epoch": 0.48421413864104323, "grad_norm": 0.7724514940104585, "learning_rate": 5.498501923726704e-06, "loss": 0.3186, "step": 14110 }, { "epoch": 0.48424845573095404, "grad_norm": 0.7863567007502037, "learning_rate": 5.4979489506007355e-06, "loss": 0.306, "step": 14111 }, { "epoch": 0.4842827728208648, "grad_norm": 0.7973797242595349, "learning_rate": 5.4973959713231905e-06, "loss": 0.2682, "step": 14112 }, { "epoch": 0.48431708991077554, "grad_norm": 0.7386213526490951, "learning_rate": 5.496842985900897e-06, "loss": 0.2403, "step": 14113 }, { "epoch": 0.48435140700068635, "grad_norm": 0.7199281475905052, "learning_rate": 5.496289994340687e-06, "loss": 0.32, "step": 14114 }, { "epoch": 0.4843857240905971, "grad_norm": 0.8083877263971535, "learning_rate": 5.4957369966493944e-06, "loss": 0.3575, "step": 14115 }, { "epoch": 0.4844200411805079, "grad_norm": 0.791828029233948, "learning_rate": 5.495183992833848e-06, "loss": 0.2764, "step": 14116 }, { "epoch": 0.48445435827041866, "grad_norm": 0.8048164114905041, "learning_rate": 5.4946309829008795e-06, "loss": 0.3003, "step": 14117 }, { "epoch": 0.48448867536032947, "grad_norm": 0.760179458928575, "learning_rate": 5.494077966857324e-06, "loss": 0.3075, "step": 14118 }, { "epoch": 0.4845229924502402, "grad_norm": 0.8167354023127259, "learning_rate": 5.493524944710012e-06, "loss": 0.2711, "step": 14119 }, { "epoch": 0.48455730954015097, "grad_norm": 0.8016380338790311, "learning_rate": 5.492971916465772e-06, "loss": 0.2977, "step": 14120 }, { "epoch": 0.4845916266300618, "grad_norm": 0.6764798709400373, "learning_rate": 5.492418882131441e-06, "loss": 0.2803, "step": 14121 }, { "epoch": 0.48462594371997253, "grad_norm": 0.6837713263957479, "learning_rate": 5.491865841713848e-06, "loss": 0.2403, "step": 14122 }, { "epoch": 0.48466026080988334, "grad_norm": 0.9012374278681035, "learning_rate": 5.4913127952198256e-06, "loss": 0.2929, "step": 14123 }, { "epoch": 0.4846945778997941, "grad_norm": 0.7846424178783744, "learning_rate": 5.4907597426562065e-06, "loss": 0.2853, "step": 14124 }, { "epoch": 0.4847288949897049, "grad_norm": 0.8357666843830924, "learning_rate": 5.490206684029823e-06, "loss": 0.2672, "step": 14125 }, { "epoch": 0.48476321207961565, "grad_norm": 0.8342047657967568, "learning_rate": 5.489653619347507e-06, "loss": 0.367, "step": 14126 }, { "epoch": 0.4847975291695264, "grad_norm": 0.8176589317248182, "learning_rate": 5.489100548616092e-06, "loss": 0.3251, "step": 14127 }, { "epoch": 0.4848318462594372, "grad_norm": 0.7826952538983534, "learning_rate": 5.488547471842411e-06, "loss": 0.2615, "step": 14128 }, { "epoch": 0.48486616334934796, "grad_norm": 0.7215976792296831, "learning_rate": 5.487994389033294e-06, "loss": 0.2886, "step": 14129 }, { "epoch": 0.48490048043925876, "grad_norm": 0.7665331188808061, "learning_rate": 5.487441300195575e-06, "loss": 0.2876, "step": 14130 }, { "epoch": 0.4849347975291695, "grad_norm": 0.843886116464975, "learning_rate": 5.4868882053360885e-06, "loss": 0.2894, "step": 14131 }, { "epoch": 0.4849691146190803, "grad_norm": 0.8099567000915527, "learning_rate": 5.486335104461664e-06, "loss": 0.2382, "step": 14132 }, { "epoch": 0.4850034317089911, "grad_norm": 0.7706763854817351, "learning_rate": 5.485781997579138e-06, "loss": 0.2836, "step": 14133 }, { "epoch": 0.4850377487989019, "grad_norm": 0.7840560728934804, "learning_rate": 5.485228884695342e-06, "loss": 0.2841, "step": 14134 }, { "epoch": 0.48507206588881263, "grad_norm": 0.7637360479033753, "learning_rate": 5.4846757658171075e-06, "loss": 0.266, "step": 14135 }, { "epoch": 0.4851063829787234, "grad_norm": 0.7876445988332191, "learning_rate": 5.48412264095127e-06, "loss": 0.2639, "step": 14136 }, { "epoch": 0.4851407000686342, "grad_norm": 0.8126403279436865, "learning_rate": 5.483569510104663e-06, "loss": 0.2679, "step": 14137 }, { "epoch": 0.48517501715854494, "grad_norm": 0.8192461987884908, "learning_rate": 5.483016373284116e-06, "loss": 0.2962, "step": 14138 }, { "epoch": 0.48520933424845575, "grad_norm": 0.7900027937195909, "learning_rate": 5.482463230496464e-06, "loss": 0.3022, "step": 14139 }, { "epoch": 0.4852436513383665, "grad_norm": 0.7728580315932473, "learning_rate": 5.481910081748543e-06, "loss": 0.3114, "step": 14140 }, { "epoch": 0.4852779684282773, "grad_norm": 0.7682581933509256, "learning_rate": 5.4813569270471855e-06, "loss": 0.2845, "step": 14141 }, { "epoch": 0.48531228551818806, "grad_norm": 0.7647503130635517, "learning_rate": 5.480803766399223e-06, "loss": 0.3383, "step": 14142 }, { "epoch": 0.4853466026080988, "grad_norm": 0.7611051226787326, "learning_rate": 5.480250599811492e-06, "loss": 0.3199, "step": 14143 }, { "epoch": 0.4853809196980096, "grad_norm": 0.7630728815421387, "learning_rate": 5.479697427290824e-06, "loss": 0.2843, "step": 14144 }, { "epoch": 0.48541523678792037, "grad_norm": 0.7589910950171544, "learning_rate": 5.479144248844054e-06, "loss": 0.2846, "step": 14145 }, { "epoch": 0.4854495538778312, "grad_norm": 0.8485664894617408, "learning_rate": 5.478591064478014e-06, "loss": 0.2833, "step": 14146 }, { "epoch": 0.4854838709677419, "grad_norm": 0.8083260645154244, "learning_rate": 5.47803787419954e-06, "loss": 0.3, "step": 14147 }, { "epoch": 0.48551818805765273, "grad_norm": 0.8702646697779427, "learning_rate": 5.477484678015465e-06, "loss": 0.3084, "step": 14148 }, { "epoch": 0.4855525051475635, "grad_norm": 0.7978204899350541, "learning_rate": 5.476931475932626e-06, "loss": 0.2766, "step": 14149 }, { "epoch": 0.48558682223747424, "grad_norm": 0.8054513373033636, "learning_rate": 5.476378267957853e-06, "loss": 0.2822, "step": 14150 }, { "epoch": 0.48562113932738504, "grad_norm": 0.8012083725954723, "learning_rate": 5.47582505409798e-06, "loss": 0.2693, "step": 14151 }, { "epoch": 0.4856554564172958, "grad_norm": 0.8782857321041366, "learning_rate": 5.475271834359845e-06, "loss": 0.2665, "step": 14152 }, { "epoch": 0.4856897735072066, "grad_norm": 0.7468243510030386, "learning_rate": 5.4747186087502815e-06, "loss": 0.2939, "step": 14153 }, { "epoch": 0.48572409059711735, "grad_norm": 0.7467340854141551, "learning_rate": 5.474165377276122e-06, "loss": 0.2725, "step": 14154 }, { "epoch": 0.48575840768702816, "grad_norm": 0.8628114386737556, "learning_rate": 5.4736121399442025e-06, "loss": 0.2447, "step": 14155 }, { "epoch": 0.4857927247769389, "grad_norm": 0.7172810314007696, "learning_rate": 5.4730588967613566e-06, "loss": 0.2387, "step": 14156 }, { "epoch": 0.4858270418668497, "grad_norm": 0.8855316919327142, "learning_rate": 5.4725056477344195e-06, "loss": 0.2926, "step": 14157 }, { "epoch": 0.48586135895676047, "grad_norm": 0.8309052203471808, "learning_rate": 5.471952392870227e-06, "loss": 0.2696, "step": 14158 }, { "epoch": 0.4858956760466712, "grad_norm": 0.8037867050263283, "learning_rate": 5.4713991321756124e-06, "loss": 0.3355, "step": 14159 }, { "epoch": 0.48592999313658203, "grad_norm": 0.7431071647473461, "learning_rate": 5.470845865657411e-06, "loss": 0.3068, "step": 14160 }, { "epoch": 0.4859643102264928, "grad_norm": 1.0607350329838718, "learning_rate": 5.4702925933224575e-06, "loss": 0.2493, "step": 14161 }, { "epoch": 0.4859986273164036, "grad_norm": 0.8756511592734432, "learning_rate": 5.469739315177587e-06, "loss": 0.2951, "step": 14162 }, { "epoch": 0.48603294440631434, "grad_norm": 0.7750118422356181, "learning_rate": 5.4691860312296365e-06, "loss": 0.2748, "step": 14163 }, { "epoch": 0.48606726149622514, "grad_norm": 0.7189203269368892, "learning_rate": 5.468632741485439e-06, "loss": 0.2868, "step": 14164 }, { "epoch": 0.4861015785861359, "grad_norm": 0.6397705885636956, "learning_rate": 5.46807944595183e-06, "loss": 0.248, "step": 14165 }, { "epoch": 0.48613589567604665, "grad_norm": 0.7859354295515806, "learning_rate": 5.4675261446356454e-06, "loss": 0.2941, "step": 14166 }, { "epoch": 0.48617021276595745, "grad_norm": 0.7934422386833696, "learning_rate": 5.4669728375437206e-06, "loss": 0.3218, "step": 14167 }, { "epoch": 0.4862045298558682, "grad_norm": 0.8113876193104755, "learning_rate": 5.46641952468289e-06, "loss": 0.3274, "step": 14168 }, { "epoch": 0.486238846945779, "grad_norm": 0.7658396325634178, "learning_rate": 5.4658662060599895e-06, "loss": 0.28, "step": 14169 }, { "epoch": 0.48627316403568976, "grad_norm": 0.5773335263828883, "learning_rate": 5.465312881681857e-06, "loss": 0.238, "step": 14170 }, { "epoch": 0.48630748112560057, "grad_norm": 0.8011990919174354, "learning_rate": 5.464759551555324e-06, "loss": 0.2586, "step": 14171 }, { "epoch": 0.4863417982155113, "grad_norm": 0.8506596466919312, "learning_rate": 5.464206215687231e-06, "loss": 0.3856, "step": 14172 }, { "epoch": 0.4863761153054221, "grad_norm": 0.6042512999710629, "learning_rate": 5.463652874084411e-06, "loss": 0.2224, "step": 14173 }, { "epoch": 0.4864104323953329, "grad_norm": 0.7609713198640123, "learning_rate": 5.463099526753699e-06, "loss": 0.3084, "step": 14174 }, { "epoch": 0.48644474948524363, "grad_norm": 0.8408446249233664, "learning_rate": 5.462546173701934e-06, "loss": 0.2897, "step": 14175 }, { "epoch": 0.48647906657515444, "grad_norm": 0.775406153033298, "learning_rate": 5.461992814935948e-06, "loss": 0.3113, "step": 14176 }, { "epoch": 0.4865133836650652, "grad_norm": 0.7617852459144889, "learning_rate": 5.461439450462581e-06, "loss": 0.2848, "step": 14177 }, { "epoch": 0.486547700754976, "grad_norm": 0.7194188888632997, "learning_rate": 5.460886080288668e-06, "loss": 0.3494, "step": 14178 }, { "epoch": 0.48658201784488675, "grad_norm": 0.7444202672567467, "learning_rate": 5.460332704421044e-06, "loss": 0.2957, "step": 14179 }, { "epoch": 0.48661633493479756, "grad_norm": 0.7920324003038556, "learning_rate": 5.459779322866546e-06, "loss": 0.2512, "step": 14180 }, { "epoch": 0.4866506520247083, "grad_norm": 0.7000368273931192, "learning_rate": 5.459225935632011e-06, "loss": 0.2791, "step": 14181 }, { "epoch": 0.48668496911461906, "grad_norm": 0.6996162073260684, "learning_rate": 5.458672542724276e-06, "loss": 0.2479, "step": 14182 }, { "epoch": 0.48671928620452987, "grad_norm": 0.7261808940057697, "learning_rate": 5.458119144150173e-06, "loss": 0.2709, "step": 14183 }, { "epoch": 0.4867536032944406, "grad_norm": 0.817484122967299, "learning_rate": 5.457565739916545e-06, "loss": 0.2821, "step": 14184 }, { "epoch": 0.4867879203843514, "grad_norm": 0.8292461929299116, "learning_rate": 5.457012330030226e-06, "loss": 0.3361, "step": 14185 }, { "epoch": 0.4868222374742622, "grad_norm": 0.8562663550954084, "learning_rate": 5.45645891449805e-06, "loss": 0.25, "step": 14186 }, { "epoch": 0.486856554564173, "grad_norm": 0.7896843148440891, "learning_rate": 5.455905493326858e-06, "loss": 0.3223, "step": 14187 }, { "epoch": 0.48689087165408373, "grad_norm": 0.8771263482720497, "learning_rate": 5.455352066523485e-06, "loss": 0.3399, "step": 14188 }, { "epoch": 0.4869251887439945, "grad_norm": 0.7844917980770049, "learning_rate": 5.454798634094769e-06, "loss": 0.2912, "step": 14189 }, { "epoch": 0.4869595058339053, "grad_norm": 0.6776269081146907, "learning_rate": 5.454245196047545e-06, "loss": 0.2551, "step": 14190 }, { "epoch": 0.48699382292381604, "grad_norm": 0.6649529035983619, "learning_rate": 5.453691752388652e-06, "loss": 0.2536, "step": 14191 }, { "epoch": 0.48702814001372685, "grad_norm": 0.7627178570821894, "learning_rate": 5.4531383031249256e-06, "loss": 0.3, "step": 14192 }, { "epoch": 0.4870624571036376, "grad_norm": 0.7420393813746775, "learning_rate": 5.452584848263204e-06, "loss": 0.379, "step": 14193 }, { "epoch": 0.4870967741935484, "grad_norm": 0.6945058972397453, "learning_rate": 5.4520313878103235e-06, "loss": 0.2547, "step": 14194 }, { "epoch": 0.48713109128345916, "grad_norm": 0.7378748693039939, "learning_rate": 5.451477921773124e-06, "loss": 0.3707, "step": 14195 }, { "epoch": 0.4871654083733699, "grad_norm": 0.7517994625036593, "learning_rate": 5.45092445015844e-06, "loss": 0.2777, "step": 14196 }, { "epoch": 0.4871997254632807, "grad_norm": 0.733470876737345, "learning_rate": 5.45037097297311e-06, "loss": 0.3006, "step": 14197 }, { "epoch": 0.48723404255319147, "grad_norm": 0.6542696622975794, "learning_rate": 5.449817490223972e-06, "loss": 0.2585, "step": 14198 }, { "epoch": 0.4872683596431023, "grad_norm": 0.6962817084486964, "learning_rate": 5.449264001917863e-06, "loss": 0.3278, "step": 14199 }, { "epoch": 0.48730267673301303, "grad_norm": 0.7268137951880065, "learning_rate": 5.448710508061621e-06, "loss": 0.2635, "step": 14200 }, { "epoch": 0.48733699382292384, "grad_norm": 0.7913816157890133, "learning_rate": 5.448157008662084e-06, "loss": 0.2952, "step": 14201 }, { "epoch": 0.4873713109128346, "grad_norm": 0.7517366908284966, "learning_rate": 5.447603503726091e-06, "loss": 0.2884, "step": 14202 }, { "epoch": 0.4874056280027454, "grad_norm": 0.8920434222114395, "learning_rate": 5.447049993260477e-06, "loss": 0.275, "step": 14203 }, { "epoch": 0.48743994509265615, "grad_norm": 0.6696532897218699, "learning_rate": 5.4464964772720815e-06, "loss": 0.2839, "step": 14204 }, { "epoch": 0.4874742621825669, "grad_norm": 0.721844590393604, "learning_rate": 5.445942955767744e-06, "loss": 0.244, "step": 14205 }, { "epoch": 0.4875085792724777, "grad_norm": 0.7471623969708175, "learning_rate": 5.4453894287542995e-06, "loss": 0.2532, "step": 14206 }, { "epoch": 0.48754289636238846, "grad_norm": 0.6906785551180021, "learning_rate": 5.4448358962385886e-06, "loss": 0.2934, "step": 14207 }, { "epoch": 0.48757721345229926, "grad_norm": 0.7435188886663604, "learning_rate": 5.44428235822745e-06, "loss": 0.2719, "step": 14208 }, { "epoch": 0.48761153054221, "grad_norm": 0.7778235733309228, "learning_rate": 5.443728814727719e-06, "loss": 0.2956, "step": 14209 }, { "epoch": 0.4876458476321208, "grad_norm": 0.6326608400706021, "learning_rate": 5.443175265746237e-06, "loss": 0.2392, "step": 14210 }, { "epoch": 0.4876801647220316, "grad_norm": 0.753093174281684, "learning_rate": 5.44262171128984e-06, "loss": 0.272, "step": 14211 }, { "epoch": 0.4877144818119423, "grad_norm": 0.7815230313137809, "learning_rate": 5.4420681513653695e-06, "loss": 0.2609, "step": 14212 }, { "epoch": 0.48774879890185313, "grad_norm": 0.9305802207091041, "learning_rate": 5.4415145859796616e-06, "loss": 0.377, "step": 14213 }, { "epoch": 0.4877831159917639, "grad_norm": 0.9246443995774835, "learning_rate": 5.440961015139555e-06, "loss": 0.3192, "step": 14214 }, { "epoch": 0.4878174330816747, "grad_norm": 0.7261959738485424, "learning_rate": 5.440407438851889e-06, "loss": 0.2993, "step": 14215 }, { "epoch": 0.48785175017158544, "grad_norm": 0.7181440841990173, "learning_rate": 5.439853857123504e-06, "loss": 0.252, "step": 14216 }, { "epoch": 0.48788606726149625, "grad_norm": 0.8217784870435393, "learning_rate": 5.4393002699612365e-06, "loss": 0.3163, "step": 14217 }, { "epoch": 0.487920384351407, "grad_norm": 0.7946916884564967, "learning_rate": 5.438746677371928e-06, "loss": 0.2672, "step": 14218 }, { "epoch": 0.48795470144131775, "grad_norm": 0.799291208219562, "learning_rate": 5.438193079362414e-06, "loss": 0.2637, "step": 14219 }, { "epoch": 0.48798901853122856, "grad_norm": 0.7875515648267656, "learning_rate": 5.437639475939535e-06, "loss": 0.2924, "step": 14220 }, { "epoch": 0.4880233356211393, "grad_norm": 0.8047608999782714, "learning_rate": 5.437085867110131e-06, "loss": 0.2785, "step": 14221 }, { "epoch": 0.4880576527110501, "grad_norm": 0.7158488428571679, "learning_rate": 5.436532252881041e-06, "loss": 0.245, "step": 14222 }, { "epoch": 0.48809196980096087, "grad_norm": 0.9147665768890766, "learning_rate": 5.435978633259105e-06, "loss": 0.283, "step": 14223 }, { "epoch": 0.4881262868908717, "grad_norm": 0.740730933593271, "learning_rate": 5.43542500825116e-06, "loss": 0.3058, "step": 14224 }, { "epoch": 0.4881606039807824, "grad_norm": 0.6573267854177358, "learning_rate": 5.434871377864047e-06, "loss": 0.2247, "step": 14225 }, { "epoch": 0.48819492107069323, "grad_norm": 0.7288150836215899, "learning_rate": 5.434317742104606e-06, "loss": 0.3275, "step": 14226 }, { "epoch": 0.488229238160604, "grad_norm": 0.8319433763256923, "learning_rate": 5.433764100979675e-06, "loss": 0.2866, "step": 14227 }, { "epoch": 0.48826355525051474, "grad_norm": 0.7444898772667755, "learning_rate": 5.433210454496093e-06, "loss": 0.257, "step": 14228 }, { "epoch": 0.48829787234042554, "grad_norm": 0.861421524401348, "learning_rate": 5.432656802660702e-06, "loss": 0.2633, "step": 14229 }, { "epoch": 0.4883321894303363, "grad_norm": 0.7276090782152355, "learning_rate": 5.4321031454803404e-06, "loss": 0.2412, "step": 14230 }, { "epoch": 0.4883665065202471, "grad_norm": 0.8065718052568912, "learning_rate": 5.431549482961848e-06, "loss": 0.3114, "step": 14231 }, { "epoch": 0.48840082361015785, "grad_norm": 0.814161535515333, "learning_rate": 5.430995815112066e-06, "loss": 0.2812, "step": 14232 }, { "epoch": 0.48843514070006866, "grad_norm": 0.6742538233107068, "learning_rate": 5.4304421419378315e-06, "loss": 0.2602, "step": 14233 }, { "epoch": 0.4884694577899794, "grad_norm": 0.776545654613299, "learning_rate": 5.42988846344599e-06, "loss": 0.2835, "step": 14234 }, { "epoch": 0.48850377487989016, "grad_norm": 0.7194359499858306, "learning_rate": 5.429334779643374e-06, "loss": 0.3184, "step": 14235 }, { "epoch": 0.48853809196980097, "grad_norm": 0.750027020479622, "learning_rate": 5.428781090536827e-06, "loss": 0.303, "step": 14236 }, { "epoch": 0.4885724090597117, "grad_norm": 0.8551553317760804, "learning_rate": 5.428227396133191e-06, "loss": 0.3002, "step": 14237 }, { "epoch": 0.48860672614962253, "grad_norm": 0.7176290982008021, "learning_rate": 5.427673696439304e-06, "loss": 0.3015, "step": 14238 }, { "epoch": 0.4886410432395333, "grad_norm": 0.7373977607625789, "learning_rate": 5.427119991462008e-06, "loss": 0.3442, "step": 14239 }, { "epoch": 0.4886753603294441, "grad_norm": 0.7838240409655289, "learning_rate": 5.426566281208143e-06, "loss": 0.3194, "step": 14240 }, { "epoch": 0.48870967741935484, "grad_norm": 0.7691321507981221, "learning_rate": 5.4260125656845474e-06, "loss": 0.288, "step": 14241 }, { "epoch": 0.4887439945092656, "grad_norm": 0.8313108133382311, "learning_rate": 5.4254588448980635e-06, "loss": 0.2997, "step": 14242 }, { "epoch": 0.4887783115991764, "grad_norm": 0.8311022652169705, "learning_rate": 5.4249051188555326e-06, "loss": 0.2937, "step": 14243 }, { "epoch": 0.48881262868908715, "grad_norm": 0.8580965020242363, "learning_rate": 5.424351387563793e-06, "loss": 0.2556, "step": 14244 }, { "epoch": 0.48884694577899795, "grad_norm": 0.766694623888267, "learning_rate": 5.423797651029687e-06, "loss": 0.2606, "step": 14245 }, { "epoch": 0.4888812628689087, "grad_norm": 0.7806505152696516, "learning_rate": 5.423243909260058e-06, "loss": 0.2656, "step": 14246 }, { "epoch": 0.4889155799588195, "grad_norm": 0.8502285497531079, "learning_rate": 5.422690162261741e-06, "loss": 0.3194, "step": 14247 }, { "epoch": 0.48894989704873026, "grad_norm": 0.7890513083981289, "learning_rate": 5.42213641004158e-06, "loss": 0.2924, "step": 14248 }, { "epoch": 0.488984214138641, "grad_norm": 0.8235712662257724, "learning_rate": 5.421582652606416e-06, "loss": 0.2984, "step": 14249 }, { "epoch": 0.4890185312285518, "grad_norm": 0.8128960407637179, "learning_rate": 5.42102888996309e-06, "loss": 0.2683, "step": 14250 }, { "epoch": 0.4890528483184626, "grad_norm": 0.8356403877968197, "learning_rate": 5.4204751221184425e-06, "loss": 0.3466, "step": 14251 }, { "epoch": 0.4890871654083734, "grad_norm": 0.7245551343642052, "learning_rate": 5.419921349079316e-06, "loss": 0.2314, "step": 14252 }, { "epoch": 0.48912148249828413, "grad_norm": 0.7142151551053442, "learning_rate": 5.419367570852551e-06, "loss": 0.2891, "step": 14253 }, { "epoch": 0.48915579958819494, "grad_norm": 0.7724468661001141, "learning_rate": 5.418813787444989e-06, "loss": 0.2983, "step": 14254 }, { "epoch": 0.4891901166781057, "grad_norm": 0.8171831156116867, "learning_rate": 5.418259998863469e-06, "loss": 0.338, "step": 14255 }, { "epoch": 0.4892244337680165, "grad_norm": 0.7885922808148738, "learning_rate": 5.417706205114837e-06, "loss": 0.2744, "step": 14256 }, { "epoch": 0.48925875085792725, "grad_norm": 0.7969239152300276, "learning_rate": 5.417152406205929e-06, "loss": 0.2746, "step": 14257 }, { "epoch": 0.489293067947838, "grad_norm": 0.8797312846529665, "learning_rate": 5.416598602143589e-06, "loss": 0.2902, "step": 14258 }, { "epoch": 0.4893273850377488, "grad_norm": 0.7657702196986534, "learning_rate": 5.416044792934661e-06, "loss": 0.2667, "step": 14259 }, { "epoch": 0.48936170212765956, "grad_norm": 0.810057568513729, "learning_rate": 5.415490978585984e-06, "loss": 0.3091, "step": 14260 }, { "epoch": 0.48939601921757037, "grad_norm": 0.7885644095202515, "learning_rate": 5.414937159104401e-06, "loss": 0.2933, "step": 14261 }, { "epoch": 0.4894303363074811, "grad_norm": 0.7415640122120172, "learning_rate": 5.4143833344967544e-06, "loss": 0.2304, "step": 14262 }, { "epoch": 0.4894646533973919, "grad_norm": 0.9027009832699385, "learning_rate": 5.413829504769883e-06, "loss": 0.2995, "step": 14263 }, { "epoch": 0.4894989704873027, "grad_norm": 0.7833991989330924, "learning_rate": 5.41327566993063e-06, "loss": 0.2593, "step": 14264 }, { "epoch": 0.4895332875772134, "grad_norm": 0.738265550083491, "learning_rate": 5.412721829985839e-06, "loss": 0.2728, "step": 14265 }, { "epoch": 0.48956760466712423, "grad_norm": 0.7549073690911564, "learning_rate": 5.41216798494235e-06, "loss": 0.3604, "step": 14266 }, { "epoch": 0.489601921757035, "grad_norm": 0.7726299214132082, "learning_rate": 5.411614134807006e-06, "loss": 0.3012, "step": 14267 }, { "epoch": 0.4896362388469458, "grad_norm": 0.7258790088736324, "learning_rate": 5.411060279586651e-06, "loss": 0.2662, "step": 14268 }, { "epoch": 0.48967055593685654, "grad_norm": 0.885629610394941, "learning_rate": 5.410506419288124e-06, "loss": 0.2652, "step": 14269 }, { "epoch": 0.48970487302676735, "grad_norm": 0.8335437720590186, "learning_rate": 5.409952553918269e-06, "loss": 0.2993, "step": 14270 }, { "epoch": 0.4897391901166781, "grad_norm": 0.8182970748344135, "learning_rate": 5.409398683483928e-06, "loss": 0.2846, "step": 14271 }, { "epoch": 0.48977350720658885, "grad_norm": 0.722664003392343, "learning_rate": 5.408844807991944e-06, "loss": 0.2846, "step": 14272 }, { "epoch": 0.48980782429649966, "grad_norm": 0.7512660098145509, "learning_rate": 5.408290927449159e-06, "loss": 0.2829, "step": 14273 }, { "epoch": 0.4898421413864104, "grad_norm": 0.7969662606247104, "learning_rate": 5.407737041862417e-06, "loss": 0.3155, "step": 14274 }, { "epoch": 0.4898764584763212, "grad_norm": 0.7422836134792004, "learning_rate": 5.4071831512385576e-06, "loss": 0.2967, "step": 14275 }, { "epoch": 0.48991077556623197, "grad_norm": 0.7933347200267056, "learning_rate": 5.406629255584426e-06, "loss": 0.2945, "step": 14276 }, { "epoch": 0.4899450926561428, "grad_norm": 0.6673985391253969, "learning_rate": 5.406075354906862e-06, "loss": 0.2853, "step": 14277 }, { "epoch": 0.48997940974605353, "grad_norm": 0.6890140977477592, "learning_rate": 5.405521449212713e-06, "loss": 0.2538, "step": 14278 }, { "epoch": 0.49001372683596434, "grad_norm": 0.9532388460734654, "learning_rate": 5.404967538508818e-06, "loss": 0.2995, "step": 14279 }, { "epoch": 0.4900480439258751, "grad_norm": 0.7538221067627507, "learning_rate": 5.404413622802021e-06, "loss": 0.2327, "step": 14280 }, { "epoch": 0.49008236101578584, "grad_norm": 0.7657647167133671, "learning_rate": 5.4038597020991645e-06, "loss": 0.2659, "step": 14281 }, { "epoch": 0.49011667810569665, "grad_norm": 0.8858736521673424, "learning_rate": 5.4033057764070936e-06, "loss": 0.3013, "step": 14282 }, { "epoch": 0.4901509951956074, "grad_norm": 0.721567870809527, "learning_rate": 5.40275184573265e-06, "loss": 0.2592, "step": 14283 }, { "epoch": 0.4901853122855182, "grad_norm": 0.7884556542813396, "learning_rate": 5.402197910082676e-06, "loss": 0.3025, "step": 14284 }, { "epoch": 0.49021962937542896, "grad_norm": 0.7897372374047781, "learning_rate": 5.401643969464018e-06, "loss": 0.2849, "step": 14285 }, { "epoch": 0.49025394646533976, "grad_norm": 0.9067192149845472, "learning_rate": 5.401090023883515e-06, "loss": 0.3212, "step": 14286 }, { "epoch": 0.4902882635552505, "grad_norm": 0.6932714206396879, "learning_rate": 5.400536073348013e-06, "loss": 0.2496, "step": 14287 }, { "epoch": 0.49032258064516127, "grad_norm": 0.9447873474875765, "learning_rate": 5.399982117864354e-06, "loss": 0.3051, "step": 14288 }, { "epoch": 0.4903568977350721, "grad_norm": 0.7507762939141317, "learning_rate": 5.399428157439382e-06, "loss": 0.2758, "step": 14289 }, { "epoch": 0.4903912148249828, "grad_norm": 0.8414828372196558, "learning_rate": 5.398874192079942e-06, "loss": 0.2712, "step": 14290 }, { "epoch": 0.49042553191489363, "grad_norm": 0.7304399767773949, "learning_rate": 5.398320221792876e-06, "loss": 0.2843, "step": 14291 }, { "epoch": 0.4904598490048044, "grad_norm": 0.7814532808368062, "learning_rate": 5.397766246585029e-06, "loss": 0.2978, "step": 14292 }, { "epoch": 0.4904941660947152, "grad_norm": 1.0306753070216406, "learning_rate": 5.397212266463242e-06, "loss": 0.3321, "step": 14293 }, { "epoch": 0.49052848318462594, "grad_norm": 0.7470409901634875, "learning_rate": 5.396658281434361e-06, "loss": 0.2615, "step": 14294 }, { "epoch": 0.4905628002745367, "grad_norm": 0.7031738917939492, "learning_rate": 5.396104291505231e-06, "loss": 0.2989, "step": 14295 }, { "epoch": 0.4905971173644475, "grad_norm": 0.7222836024975767, "learning_rate": 5.395550296682692e-06, "loss": 0.2823, "step": 14296 }, { "epoch": 0.49063143445435825, "grad_norm": 0.7446996778302287, "learning_rate": 5.394996296973592e-06, "loss": 0.2887, "step": 14297 }, { "epoch": 0.49066575154426906, "grad_norm": 0.7429753017931324, "learning_rate": 5.394442292384772e-06, "loss": 0.2538, "step": 14298 }, { "epoch": 0.4907000686341798, "grad_norm": 0.7464033217720922, "learning_rate": 5.3938882829230785e-06, "loss": 0.2627, "step": 14299 }, { "epoch": 0.4907343857240906, "grad_norm": 0.7075623264775122, "learning_rate": 5.393334268595354e-06, "loss": 0.3188, "step": 14300 }, { "epoch": 0.49076870281400137, "grad_norm": 0.7371630545102152, "learning_rate": 5.392780249408444e-06, "loss": 0.246, "step": 14301 }, { "epoch": 0.4908030199039122, "grad_norm": 0.9184724819589173, "learning_rate": 5.392226225369191e-06, "loss": 0.3129, "step": 14302 }, { "epoch": 0.4908373369938229, "grad_norm": 0.7862334956662577, "learning_rate": 5.391672196484438e-06, "loss": 0.2562, "step": 14303 }, { "epoch": 0.4908716540837337, "grad_norm": 0.69038784519741, "learning_rate": 5.391118162761036e-06, "loss": 0.2545, "step": 14304 }, { "epoch": 0.4909059711736445, "grad_norm": 0.6925833193010424, "learning_rate": 5.390564124205823e-06, "loss": 0.2706, "step": 14305 }, { "epoch": 0.49094028826355524, "grad_norm": 0.8256976699064752, "learning_rate": 5.390010080825645e-06, "loss": 0.2652, "step": 14306 }, { "epoch": 0.49097460535346604, "grad_norm": 0.801935221552627, "learning_rate": 5.3894560326273494e-06, "loss": 0.2984, "step": 14307 }, { "epoch": 0.4910089224433768, "grad_norm": 0.7429371583997557, "learning_rate": 5.3889019796177766e-06, "loss": 0.2902, "step": 14308 }, { "epoch": 0.4910432395332876, "grad_norm": 0.8010407426667694, "learning_rate": 5.3883479218037735e-06, "loss": 0.2767, "step": 14309 }, { "epoch": 0.49107755662319835, "grad_norm": 0.7589103705686788, "learning_rate": 5.387793859192185e-06, "loss": 0.3085, "step": 14310 }, { "epoch": 0.4911118737131091, "grad_norm": 0.8011853953696377, "learning_rate": 5.387239791789855e-06, "loss": 0.2331, "step": 14311 }, { "epoch": 0.4911461908030199, "grad_norm": 0.6792814454180507, "learning_rate": 5.3866857196036296e-06, "loss": 0.271, "step": 14312 }, { "epoch": 0.49118050789293066, "grad_norm": 0.7233845145593709, "learning_rate": 5.386131642640352e-06, "loss": 0.3513, "step": 14313 }, { "epoch": 0.49121482498284147, "grad_norm": 0.7081104656140831, "learning_rate": 5.3855775609068695e-06, "loss": 0.2521, "step": 14314 }, { "epoch": 0.4912491420727522, "grad_norm": 0.7567016632899093, "learning_rate": 5.385023474410025e-06, "loss": 0.2506, "step": 14315 }, { "epoch": 0.491283459162663, "grad_norm": 0.7465813230396834, "learning_rate": 5.384469383156663e-06, "loss": 0.2325, "step": 14316 }, { "epoch": 0.4913177762525738, "grad_norm": 0.7425337873613947, "learning_rate": 5.383915287153632e-06, "loss": 0.3015, "step": 14317 }, { "epoch": 0.49135209334248453, "grad_norm": 0.7888104261539634, "learning_rate": 5.383361186407774e-06, "loss": 0.2609, "step": 14318 }, { "epoch": 0.49138641043239534, "grad_norm": 0.7745096992414241, "learning_rate": 5.3828070809259356e-06, "loss": 0.3562, "step": 14319 }, { "epoch": 0.4914207275223061, "grad_norm": 1.014236169262831, "learning_rate": 5.382252970714964e-06, "loss": 0.2637, "step": 14320 }, { "epoch": 0.4914550446122169, "grad_norm": 0.7351801756992141, "learning_rate": 5.381698855781701e-06, "loss": 0.27, "step": 14321 }, { "epoch": 0.49148936170212765, "grad_norm": 0.8047659406104256, "learning_rate": 5.381144736132992e-06, "loss": 0.2617, "step": 14322 }, { "epoch": 0.49152367879203845, "grad_norm": 0.8219901160043237, "learning_rate": 5.380590611775688e-06, "loss": 0.2929, "step": 14323 }, { "epoch": 0.4915579958819492, "grad_norm": 0.7144050958103545, "learning_rate": 5.380036482716628e-06, "loss": 0.2836, "step": 14324 }, { "epoch": 0.49159231297186, "grad_norm": 0.8415556264168245, "learning_rate": 5.37948234896266e-06, "loss": 0.2777, "step": 14325 }, { "epoch": 0.49162663006177076, "grad_norm": 0.7708308019811986, "learning_rate": 5.378928210520632e-06, "loss": 0.2862, "step": 14326 }, { "epoch": 0.4916609471516815, "grad_norm": 0.8157337831120599, "learning_rate": 5.378374067397387e-06, "loss": 0.2946, "step": 14327 }, { "epoch": 0.4916952642415923, "grad_norm": 0.7286675640151854, "learning_rate": 5.377819919599771e-06, "loss": 0.245, "step": 14328 }, { "epoch": 0.4917295813315031, "grad_norm": 0.7478780479840471, "learning_rate": 5.377265767134631e-06, "loss": 0.2642, "step": 14329 }, { "epoch": 0.4917638984214139, "grad_norm": 0.7648271648507619, "learning_rate": 5.376711610008813e-06, "loss": 0.2905, "step": 14330 }, { "epoch": 0.49179821551132463, "grad_norm": 0.7334075545600873, "learning_rate": 5.37615744822916e-06, "loss": 0.2653, "step": 14331 }, { "epoch": 0.49183253260123544, "grad_norm": 0.7809055994915785, "learning_rate": 5.3756032818025216e-06, "loss": 0.2877, "step": 14332 }, { "epoch": 0.4918668496911462, "grad_norm": 0.7432521374780428, "learning_rate": 5.375049110735742e-06, "loss": 0.3169, "step": 14333 }, { "epoch": 0.49190116678105694, "grad_norm": 0.8422552718895214, "learning_rate": 5.374494935035668e-06, "loss": 0.327, "step": 14334 }, { "epoch": 0.49193548387096775, "grad_norm": 0.7435473516624276, "learning_rate": 5.373940754709146e-06, "loss": 0.3949, "step": 14335 }, { "epoch": 0.4919698009608785, "grad_norm": 0.811130999897706, "learning_rate": 5.3733865697630225e-06, "loss": 0.3338, "step": 14336 }, { "epoch": 0.4920041180507893, "grad_norm": 0.8277338242003853, "learning_rate": 5.372832380204142e-06, "loss": 0.3072, "step": 14337 }, { "epoch": 0.49203843514070006, "grad_norm": 0.7621428903131203, "learning_rate": 5.372278186039354e-06, "loss": 0.2677, "step": 14338 }, { "epoch": 0.49207275223061087, "grad_norm": 0.780047833978647, "learning_rate": 5.371723987275501e-06, "loss": 0.241, "step": 14339 }, { "epoch": 0.4921070693205216, "grad_norm": 0.833829589910241, "learning_rate": 5.371169783919434e-06, "loss": 0.3354, "step": 14340 }, { "epoch": 0.49214138641043237, "grad_norm": 0.6829059515375547, "learning_rate": 5.3706155759779946e-06, "loss": 0.2856, "step": 14341 }, { "epoch": 0.4921757035003432, "grad_norm": 0.8029298139532098, "learning_rate": 5.370061363458032e-06, "loss": 0.3064, "step": 14342 }, { "epoch": 0.4922100205902539, "grad_norm": 0.8113200842733894, "learning_rate": 5.369507146366394e-06, "loss": 0.2695, "step": 14343 }, { "epoch": 0.49224433768016473, "grad_norm": 0.7963041083446287, "learning_rate": 5.368952924709926e-06, "loss": 0.2662, "step": 14344 }, { "epoch": 0.4922786547700755, "grad_norm": 0.6990857400128053, "learning_rate": 5.368398698495474e-06, "loss": 0.2603, "step": 14345 }, { "epoch": 0.4923129718599863, "grad_norm": 0.807570140316073, "learning_rate": 5.367844467729887e-06, "loss": 0.3352, "step": 14346 }, { "epoch": 0.49234728894989704, "grad_norm": 0.7420044098566365, "learning_rate": 5.367290232420007e-06, "loss": 0.2978, "step": 14347 }, { "epoch": 0.49238160603980785, "grad_norm": 0.7952255296538862, "learning_rate": 5.366735992572687e-06, "loss": 0.2407, "step": 14348 }, { "epoch": 0.4924159231297186, "grad_norm": 0.7347282218303606, "learning_rate": 5.366181748194772e-06, "loss": 0.253, "step": 14349 }, { "epoch": 0.49245024021962935, "grad_norm": 0.740665553224482, "learning_rate": 5.365627499293108e-06, "loss": 0.3315, "step": 14350 }, { "epoch": 0.49248455730954016, "grad_norm": 0.8153141657284233, "learning_rate": 5.365073245874541e-06, "loss": 0.3302, "step": 14351 }, { "epoch": 0.4925188743994509, "grad_norm": 0.7499385733087734, "learning_rate": 5.364518987945921e-06, "loss": 0.2726, "step": 14352 }, { "epoch": 0.4925531914893617, "grad_norm": 0.7721787711648939, "learning_rate": 5.3639647255140944e-06, "loss": 0.3335, "step": 14353 }, { "epoch": 0.49258750857927247, "grad_norm": 0.6914411206679895, "learning_rate": 5.363410458585907e-06, "loss": 0.2749, "step": 14354 }, { "epoch": 0.4926218256691833, "grad_norm": 0.7119085082073313, "learning_rate": 5.3628561871682075e-06, "loss": 0.2699, "step": 14355 }, { "epoch": 0.49265614275909403, "grad_norm": 0.7926435365980413, "learning_rate": 5.362301911267842e-06, "loss": 0.2896, "step": 14356 }, { "epoch": 0.4926904598490048, "grad_norm": 0.8271171353158535, "learning_rate": 5.36174763089166e-06, "loss": 0.2825, "step": 14357 }, { "epoch": 0.4927247769389156, "grad_norm": 0.7964630968825411, "learning_rate": 5.361193346046506e-06, "loss": 0.299, "step": 14358 }, { "epoch": 0.49275909402882634, "grad_norm": 1.4421085805827643, "learning_rate": 5.360639056739232e-06, "loss": 0.2467, "step": 14359 }, { "epoch": 0.49279341111873715, "grad_norm": 0.8891628210499688, "learning_rate": 5.360084762976681e-06, "loss": 0.2959, "step": 14360 }, { "epoch": 0.4928277282086479, "grad_norm": 0.7136499973326967, "learning_rate": 5.359530464765703e-06, "loss": 0.2424, "step": 14361 }, { "epoch": 0.4928620452985587, "grad_norm": 0.8621362818198248, "learning_rate": 5.358976162113145e-06, "loss": 0.3013, "step": 14362 }, { "epoch": 0.49289636238846946, "grad_norm": 0.7488887502774005, "learning_rate": 5.358421855025855e-06, "loss": 0.2682, "step": 14363 }, { "epoch": 0.4929306794783802, "grad_norm": 0.8054795317450695, "learning_rate": 5.357867543510681e-06, "loss": 0.3542, "step": 14364 }, { "epoch": 0.492964996568291, "grad_norm": 0.7196565176451574, "learning_rate": 5.357313227574472e-06, "loss": 0.2417, "step": 14365 }, { "epoch": 0.49299931365820177, "grad_norm": 0.7371788436180584, "learning_rate": 5.3567589072240735e-06, "loss": 0.2847, "step": 14366 }, { "epoch": 0.4930336307481126, "grad_norm": 0.7950155267787367, "learning_rate": 5.356204582466334e-06, "loss": 0.306, "step": 14367 }, { "epoch": 0.4930679478380233, "grad_norm": 0.8752140675537994, "learning_rate": 5.355650253308103e-06, "loss": 0.2863, "step": 14368 }, { "epoch": 0.49310226492793413, "grad_norm": 0.765829462199042, "learning_rate": 5.355095919756227e-06, "loss": 0.2841, "step": 14369 }, { "epoch": 0.4931365820178449, "grad_norm": 0.6609723893627065, "learning_rate": 5.354541581817556e-06, "loss": 0.3164, "step": 14370 }, { "epoch": 0.4931708991077557, "grad_norm": 0.7168605332826534, "learning_rate": 5.353987239498937e-06, "loss": 0.2629, "step": 14371 }, { "epoch": 0.49320521619766644, "grad_norm": 0.8309131619003124, "learning_rate": 5.353432892807218e-06, "loss": 0.3496, "step": 14372 }, { "epoch": 0.4932395332875772, "grad_norm": 0.6574986901236399, "learning_rate": 5.352878541749248e-06, "loss": 0.2514, "step": 14373 }, { "epoch": 0.493273850377488, "grad_norm": 0.7635561050602939, "learning_rate": 5.352324186331874e-06, "loss": 0.3142, "step": 14374 }, { "epoch": 0.49330816746739875, "grad_norm": 0.7847716959826759, "learning_rate": 5.351769826561948e-06, "loss": 0.2804, "step": 14375 }, { "epoch": 0.49334248455730956, "grad_norm": 0.78338263834571, "learning_rate": 5.351215462446313e-06, "loss": 0.231, "step": 14376 }, { "epoch": 0.4933768016472203, "grad_norm": 0.6936443836096918, "learning_rate": 5.350661093991821e-06, "loss": 0.2557, "step": 14377 }, { "epoch": 0.4934111187371311, "grad_norm": 0.8128021087433349, "learning_rate": 5.350106721205322e-06, "loss": 0.285, "step": 14378 }, { "epoch": 0.49344543582704187, "grad_norm": 0.8699775709466572, "learning_rate": 5.34955234409366e-06, "loss": 0.2267, "step": 14379 }, { "epoch": 0.4934797529169526, "grad_norm": 0.8599467219589687, "learning_rate": 5.348997962663687e-06, "loss": 0.3399, "step": 14380 }, { "epoch": 0.4935140700068634, "grad_norm": 0.7534493619465115, "learning_rate": 5.348443576922253e-06, "loss": 0.2862, "step": 14381 }, { "epoch": 0.4935483870967742, "grad_norm": 0.7172248343231766, "learning_rate": 5.347889186876203e-06, "loss": 0.276, "step": 14382 }, { "epoch": 0.493582704186685, "grad_norm": 0.7025987594841149, "learning_rate": 5.347334792532388e-06, "loss": 0.2606, "step": 14383 }, { "epoch": 0.49361702127659574, "grad_norm": 0.7251750529766589, "learning_rate": 5.346780393897657e-06, "loss": 0.235, "step": 14384 }, { "epoch": 0.49365133836650654, "grad_norm": 0.7364218236506165, "learning_rate": 5.346225990978857e-06, "loss": 0.2651, "step": 14385 }, { "epoch": 0.4936856554564173, "grad_norm": 0.7420330602359103, "learning_rate": 5.34567158378284e-06, "loss": 0.2349, "step": 14386 }, { "epoch": 0.49371997254632805, "grad_norm": 0.720260663137302, "learning_rate": 5.345117172316455e-06, "loss": 0.2927, "step": 14387 }, { "epoch": 0.49375428963623885, "grad_norm": 0.7778603260760519, "learning_rate": 5.344562756586548e-06, "loss": 0.2716, "step": 14388 }, { "epoch": 0.4937886067261496, "grad_norm": 0.7223654418064726, "learning_rate": 5.344008336599969e-06, "loss": 0.3408, "step": 14389 }, { "epoch": 0.4938229238160604, "grad_norm": 0.9538033414224173, "learning_rate": 5.34345391236357e-06, "loss": 0.269, "step": 14390 }, { "epoch": 0.49385724090597116, "grad_norm": 0.766448707258235, "learning_rate": 5.3428994838841966e-06, "loss": 0.2865, "step": 14391 }, { "epoch": 0.49389155799588197, "grad_norm": 0.7381937201959403, "learning_rate": 5.3423450511687e-06, "loss": 0.2666, "step": 14392 }, { "epoch": 0.4939258750857927, "grad_norm": 0.7767559797463226, "learning_rate": 5.34179061422393e-06, "loss": 0.253, "step": 14393 }, { "epoch": 0.4939601921757035, "grad_norm": 0.781545118921368, "learning_rate": 5.341236173056736e-06, "loss": 0.2881, "step": 14394 }, { "epoch": 0.4939945092656143, "grad_norm": 0.6872442805371246, "learning_rate": 5.340681727673968e-06, "loss": 0.3248, "step": 14395 }, { "epoch": 0.49402882635552503, "grad_norm": 0.8134542504364685, "learning_rate": 5.340127278082472e-06, "loss": 0.2726, "step": 14396 }, { "epoch": 0.49406314344543584, "grad_norm": 0.8060883526041408, "learning_rate": 5.339572824289102e-06, "loss": 0.2761, "step": 14397 }, { "epoch": 0.4940974605353466, "grad_norm": 1.016028309803388, "learning_rate": 5.339018366300704e-06, "loss": 0.2662, "step": 14398 }, { "epoch": 0.4941317776252574, "grad_norm": 0.8378194244145706, "learning_rate": 5.33846390412413e-06, "loss": 0.275, "step": 14399 }, { "epoch": 0.49416609471516815, "grad_norm": 0.774910414229585, "learning_rate": 5.337909437766229e-06, "loss": 0.2481, "step": 14400 }, { "epoch": 0.49420041180507895, "grad_norm": 0.7356369624687011, "learning_rate": 5.337354967233851e-06, "loss": 0.2346, "step": 14401 }, { "epoch": 0.4942347288949897, "grad_norm": 0.7721883248553691, "learning_rate": 5.336800492533844e-06, "loss": 0.2772, "step": 14402 }, { "epoch": 0.49426904598490046, "grad_norm": 0.7715584656033735, "learning_rate": 5.336246013673063e-06, "loss": 0.222, "step": 14403 }, { "epoch": 0.49430336307481126, "grad_norm": 0.7569484256202753, "learning_rate": 5.335691530658352e-06, "loss": 0.3424, "step": 14404 }, { "epoch": 0.494337680164722, "grad_norm": 0.7436573396515844, "learning_rate": 5.335137043496564e-06, "loss": 0.3847, "step": 14405 }, { "epoch": 0.4943719972546328, "grad_norm": 0.8572758789901783, "learning_rate": 5.334582552194549e-06, "loss": 0.2932, "step": 14406 }, { "epoch": 0.4944063143445436, "grad_norm": 0.7811031408081335, "learning_rate": 5.334028056759156e-06, "loss": 0.2586, "step": 14407 }, { "epoch": 0.4944406314344544, "grad_norm": 0.6796657506233051, "learning_rate": 5.333473557197236e-06, "loss": 0.3137, "step": 14408 }, { "epoch": 0.49447494852436513, "grad_norm": 0.7644407465394204, "learning_rate": 5.3329190535156395e-06, "loss": 0.2381, "step": 14409 }, { "epoch": 0.4945092656142759, "grad_norm": 0.7857526475781098, "learning_rate": 5.332364545721217e-06, "loss": 0.2886, "step": 14410 }, { "epoch": 0.4945435827041867, "grad_norm": 0.7770671246440083, "learning_rate": 5.331810033820817e-06, "loss": 0.353, "step": 14411 }, { "epoch": 0.49457789979409744, "grad_norm": 0.8028491704634841, "learning_rate": 5.33125551782129e-06, "loss": 0.3261, "step": 14412 }, { "epoch": 0.49461221688400825, "grad_norm": 0.8184202175732253, "learning_rate": 5.330700997729489e-06, "loss": 0.3069, "step": 14413 }, { "epoch": 0.494646533973919, "grad_norm": 0.7624031563977444, "learning_rate": 5.330146473552262e-06, "loss": 0.2728, "step": 14414 }, { "epoch": 0.4946808510638298, "grad_norm": 0.7970698330883023, "learning_rate": 5.329591945296459e-06, "loss": 0.2842, "step": 14415 }, { "epoch": 0.49471516815374056, "grad_norm": 0.9591170277054766, "learning_rate": 5.329037412968934e-06, "loss": 0.2801, "step": 14416 }, { "epoch": 0.49474948524365137, "grad_norm": 0.8321555002435898, "learning_rate": 5.328482876576534e-06, "loss": 0.3217, "step": 14417 }, { "epoch": 0.4947838023335621, "grad_norm": 0.7585655446646665, "learning_rate": 5.327928336126111e-06, "loss": 0.2531, "step": 14418 }, { "epoch": 0.49481811942347287, "grad_norm": 0.7815694834989322, "learning_rate": 5.327373791624517e-06, "loss": 0.2887, "step": 14419 }, { "epoch": 0.4948524365133837, "grad_norm": 0.7303335482652403, "learning_rate": 5.3268192430786004e-06, "loss": 0.2871, "step": 14420 }, { "epoch": 0.4948867536032944, "grad_norm": 0.7815830043385701, "learning_rate": 5.326264690495213e-06, "loss": 0.3185, "step": 14421 }, { "epoch": 0.49492107069320523, "grad_norm": 0.6868992812590685, "learning_rate": 5.325710133881205e-06, "loss": 0.2768, "step": 14422 }, { "epoch": 0.494955387783116, "grad_norm": 0.7370012272718315, "learning_rate": 5.325155573243429e-06, "loss": 0.2979, "step": 14423 }, { "epoch": 0.4949897048730268, "grad_norm": 0.7860359276194336, "learning_rate": 5.324601008588735e-06, "loss": 0.2755, "step": 14424 }, { "epoch": 0.49502402196293754, "grad_norm": 0.7626014903820657, "learning_rate": 5.324046439923975e-06, "loss": 0.3264, "step": 14425 }, { "epoch": 0.4950583390528483, "grad_norm": 0.7424018839445647, "learning_rate": 5.323491867255997e-06, "loss": 0.2855, "step": 14426 }, { "epoch": 0.4950926561427591, "grad_norm": 0.8083506855826258, "learning_rate": 5.322937290591657e-06, "loss": 0.2661, "step": 14427 }, { "epoch": 0.49512697323266985, "grad_norm": 0.7770437439259115, "learning_rate": 5.3223827099378e-06, "loss": 0.2933, "step": 14428 }, { "epoch": 0.49516129032258066, "grad_norm": 0.7453328911840407, "learning_rate": 5.321828125301282e-06, "loss": 0.2545, "step": 14429 }, { "epoch": 0.4951956074124914, "grad_norm": 0.7603313288400684, "learning_rate": 5.321273536688952e-06, "loss": 0.2421, "step": 14430 }, { "epoch": 0.4952299245024022, "grad_norm": 0.7173972988388367, "learning_rate": 5.320718944107664e-06, "loss": 0.2612, "step": 14431 }, { "epoch": 0.49526424159231297, "grad_norm": 0.8479599435473472, "learning_rate": 5.320164347564266e-06, "loss": 0.2622, "step": 14432 }, { "epoch": 0.4952985586822237, "grad_norm": 0.77663246591708, "learning_rate": 5.319609747065611e-06, "loss": 0.2567, "step": 14433 }, { "epoch": 0.49533287577213453, "grad_norm": 0.7547000388827451, "learning_rate": 5.319055142618549e-06, "loss": 0.2535, "step": 14434 }, { "epoch": 0.4953671928620453, "grad_norm": 0.7015212906428779, "learning_rate": 5.318500534229934e-06, "loss": 0.2992, "step": 14435 }, { "epoch": 0.4954015099519561, "grad_norm": 0.7038159771288008, "learning_rate": 5.317945921906615e-06, "loss": 0.2693, "step": 14436 }, { "epoch": 0.49543582704186684, "grad_norm": 0.7647652368815895, "learning_rate": 5.317391305655446e-06, "loss": 0.2957, "step": 14437 }, { "epoch": 0.49547014413177765, "grad_norm": 0.7438992791290753, "learning_rate": 5.316836685483278e-06, "loss": 0.3029, "step": 14438 }, { "epoch": 0.4955044612216884, "grad_norm": 1.3335936668014756, "learning_rate": 5.3162820613969614e-06, "loss": 0.2597, "step": 14439 }, { "epoch": 0.4955387783115992, "grad_norm": 1.4192087090650352, "learning_rate": 5.315727433403348e-06, "loss": 0.3033, "step": 14440 }, { "epoch": 0.49557309540150996, "grad_norm": 0.7658679484882779, "learning_rate": 5.315172801509291e-06, "loss": 0.3644, "step": 14441 }, { "epoch": 0.4956074124914207, "grad_norm": 0.7714593117247854, "learning_rate": 5.314618165721642e-06, "loss": 0.3214, "step": 14442 }, { "epoch": 0.4956417295813315, "grad_norm": 0.813781854704434, "learning_rate": 5.3140635260472514e-06, "loss": 0.271, "step": 14443 }, { "epoch": 0.49567604667124227, "grad_norm": 0.6742362846272194, "learning_rate": 5.313508882492971e-06, "loss": 0.2647, "step": 14444 }, { "epoch": 0.49571036376115307, "grad_norm": 0.7662127485846352, "learning_rate": 5.312954235065657e-06, "loss": 0.2849, "step": 14445 }, { "epoch": 0.4957446808510638, "grad_norm": 0.742334040864825, "learning_rate": 5.312399583772156e-06, "loss": 0.2723, "step": 14446 }, { "epoch": 0.49577899794097463, "grad_norm": 0.8544890473997939, "learning_rate": 5.311844928619323e-06, "loss": 0.3312, "step": 14447 }, { "epoch": 0.4958133150308854, "grad_norm": 0.7820428728533872, "learning_rate": 5.311290269614011e-06, "loss": 0.2618, "step": 14448 }, { "epoch": 0.49584763212079613, "grad_norm": 0.7616973363428755, "learning_rate": 5.310735606763069e-06, "loss": 0.299, "step": 14449 }, { "epoch": 0.49588194921070694, "grad_norm": 0.7111473406321764, "learning_rate": 5.310180940073351e-06, "loss": 0.2342, "step": 14450 }, { "epoch": 0.4959162663006177, "grad_norm": 0.7389074659313943, "learning_rate": 5.309626269551708e-06, "loss": 0.2518, "step": 14451 }, { "epoch": 0.4959505833905285, "grad_norm": 0.7237057662473387, "learning_rate": 5.309071595204994e-06, "loss": 0.2959, "step": 14452 }, { "epoch": 0.49598490048043925, "grad_norm": 0.8252907370355945, "learning_rate": 5.308516917040062e-06, "loss": 0.3151, "step": 14453 }, { "epoch": 0.49601921757035006, "grad_norm": 0.7461221158076731, "learning_rate": 5.307962235063763e-06, "loss": 0.2743, "step": 14454 }, { "epoch": 0.4960535346602608, "grad_norm": 0.8328169747309396, "learning_rate": 5.307407549282949e-06, "loss": 0.2455, "step": 14455 }, { "epoch": 0.49608785175017156, "grad_norm": 0.8167759914961967, "learning_rate": 5.306852859704473e-06, "loss": 0.2681, "step": 14456 }, { "epoch": 0.49612216884008237, "grad_norm": 0.8121896870242225, "learning_rate": 5.3062981663351886e-06, "loss": 0.2868, "step": 14457 }, { "epoch": 0.4961564859299931, "grad_norm": 0.8030212507359706, "learning_rate": 5.305743469181947e-06, "loss": 0.2648, "step": 14458 }, { "epoch": 0.4961908030199039, "grad_norm": 0.7975469000088312, "learning_rate": 5.3051887682516005e-06, "loss": 0.2823, "step": 14459 }, { "epoch": 0.4962251201098147, "grad_norm": 0.7548220754873347, "learning_rate": 5.304634063551003e-06, "loss": 0.3317, "step": 14460 }, { "epoch": 0.4962594371997255, "grad_norm": 0.7337338654566375, "learning_rate": 5.304079355087008e-06, "loss": 0.2896, "step": 14461 }, { "epoch": 0.49629375428963624, "grad_norm": 0.833753320007695, "learning_rate": 5.303524642866465e-06, "loss": 0.3218, "step": 14462 }, { "epoch": 0.496328071379547, "grad_norm": 0.8283726065239178, "learning_rate": 5.30296992689623e-06, "loss": 0.2947, "step": 14463 }, { "epoch": 0.4963623884694578, "grad_norm": 0.7398707519462218, "learning_rate": 5.302415207183156e-06, "loss": 0.2837, "step": 14464 }, { "epoch": 0.49639670555936855, "grad_norm": 0.7479350008002362, "learning_rate": 5.301860483734094e-06, "loss": 0.295, "step": 14465 }, { "epoch": 0.49643102264927935, "grad_norm": 0.6776087641399874, "learning_rate": 5.3013057565558955e-06, "loss": 0.2549, "step": 14466 }, { "epoch": 0.4964653397391901, "grad_norm": 0.662209905933325, "learning_rate": 5.300751025655417e-06, "loss": 0.2632, "step": 14467 }, { "epoch": 0.4964996568291009, "grad_norm": 0.7121270092247334, "learning_rate": 5.300196291039511e-06, "loss": 0.2494, "step": 14468 }, { "epoch": 0.49653397391901166, "grad_norm": 0.7036519833640412, "learning_rate": 5.299641552715029e-06, "loss": 0.2316, "step": 14469 }, { "epoch": 0.49656829100892247, "grad_norm": 0.8013724311382823, "learning_rate": 5.299086810688826e-06, "loss": 0.2849, "step": 14470 }, { "epoch": 0.4966026080988332, "grad_norm": 0.6185656611448752, "learning_rate": 5.298532064967754e-06, "loss": 0.2116, "step": 14471 }, { "epoch": 0.49663692518874397, "grad_norm": 0.9190884656687623, "learning_rate": 5.2979773155586656e-06, "loss": 0.2896, "step": 14472 }, { "epoch": 0.4966712422786548, "grad_norm": 0.7520483235558997, "learning_rate": 5.297422562468415e-06, "loss": 0.2469, "step": 14473 }, { "epoch": 0.49670555936856553, "grad_norm": 0.7494156572209385, "learning_rate": 5.296867805703856e-06, "loss": 0.2945, "step": 14474 }, { "epoch": 0.49673987645847634, "grad_norm": 0.7727109506616997, "learning_rate": 5.29631304527184e-06, "loss": 0.2703, "step": 14475 }, { "epoch": 0.4967741935483871, "grad_norm": 0.711252769520683, "learning_rate": 5.295758281179223e-06, "loss": 0.3072, "step": 14476 }, { "epoch": 0.4968085106382979, "grad_norm": 0.726475314985023, "learning_rate": 5.295203513432858e-06, "loss": 0.2708, "step": 14477 }, { "epoch": 0.49684282772820865, "grad_norm": 0.7416152661458356, "learning_rate": 5.294648742039596e-06, "loss": 0.3529, "step": 14478 }, { "epoch": 0.4968771448181194, "grad_norm": 0.8661505243172207, "learning_rate": 5.294093967006293e-06, "loss": 0.2719, "step": 14479 }, { "epoch": 0.4969114619080302, "grad_norm": 0.789626112913475, "learning_rate": 5.293539188339801e-06, "loss": 0.2826, "step": 14480 }, { "epoch": 0.49694577899794096, "grad_norm": 0.9337444079204567, "learning_rate": 5.292984406046976e-06, "loss": 0.2699, "step": 14481 }, { "epoch": 0.49698009608785176, "grad_norm": 0.7124467334468857, "learning_rate": 5.2924296201346696e-06, "loss": 0.2823, "step": 14482 }, { "epoch": 0.4970144131777625, "grad_norm": 0.7257858094338934, "learning_rate": 5.291874830609738e-06, "loss": 0.2569, "step": 14483 }, { "epoch": 0.4970487302676733, "grad_norm": 0.8119423242523885, "learning_rate": 5.29132003747903e-06, "loss": 0.284, "step": 14484 }, { "epoch": 0.4970830473575841, "grad_norm": 0.7654432880548115, "learning_rate": 5.290765240749405e-06, "loss": 0.2612, "step": 14485 }, { "epoch": 0.4971173644474948, "grad_norm": 0.7878597142883519, "learning_rate": 5.290210440427713e-06, "loss": 0.3044, "step": 14486 }, { "epoch": 0.49715168153740563, "grad_norm": 0.8600791497523954, "learning_rate": 5.289655636520811e-06, "loss": 0.3229, "step": 14487 }, { "epoch": 0.4971859986273164, "grad_norm": 0.7760552953590494, "learning_rate": 5.289100829035549e-06, "loss": 0.3175, "step": 14488 }, { "epoch": 0.4972203157172272, "grad_norm": 0.695496308137249, "learning_rate": 5.288546017978784e-06, "loss": 0.2892, "step": 14489 }, { "epoch": 0.49725463280713794, "grad_norm": 1.0509202588963886, "learning_rate": 5.287991203357371e-06, "loss": 0.2935, "step": 14490 }, { "epoch": 0.49728894989704875, "grad_norm": 0.637592734571768, "learning_rate": 5.28743638517816e-06, "loss": 0.2531, "step": 14491 }, { "epoch": 0.4973232669869595, "grad_norm": 0.7549556921923536, "learning_rate": 5.2868815634480096e-06, "loss": 0.3033, "step": 14492 }, { "epoch": 0.4973575840768703, "grad_norm": 0.6640438085846379, "learning_rate": 5.286326738173773e-06, "loss": 0.3134, "step": 14493 }, { "epoch": 0.49739190116678106, "grad_norm": 0.7383517463040824, "learning_rate": 5.2857719093623005e-06, "loss": 0.3192, "step": 14494 }, { "epoch": 0.4974262182566918, "grad_norm": 0.6672745178936126, "learning_rate": 5.285217077020451e-06, "loss": 0.2326, "step": 14495 }, { "epoch": 0.4974605353466026, "grad_norm": 0.7455470864582584, "learning_rate": 5.284662241155077e-06, "loss": 0.2767, "step": 14496 }, { "epoch": 0.49749485243651337, "grad_norm": 0.783731330524261, "learning_rate": 5.284107401773031e-06, "loss": 0.3042, "step": 14497 }, { "epoch": 0.4975291695264242, "grad_norm": 0.7565619033282499, "learning_rate": 5.283552558881171e-06, "loss": 0.2754, "step": 14498 }, { "epoch": 0.4975634866163349, "grad_norm": 0.8274844058552708, "learning_rate": 5.282997712486349e-06, "loss": 0.3907, "step": 14499 }, { "epoch": 0.49759780370624573, "grad_norm": 0.8142792537366178, "learning_rate": 5.282442862595421e-06, "loss": 0.3415, "step": 14500 }, { "epoch": 0.4976321207961565, "grad_norm": 0.789749518700623, "learning_rate": 5.281888009215241e-06, "loss": 0.2597, "step": 14501 }, { "epoch": 0.49766643788606724, "grad_norm": 0.78807898106747, "learning_rate": 5.281333152352661e-06, "loss": 0.3076, "step": 14502 }, { "epoch": 0.49770075497597804, "grad_norm": 0.7056079298339177, "learning_rate": 5.28077829201454e-06, "loss": 0.265, "step": 14503 }, { "epoch": 0.4977350720658888, "grad_norm": 0.7414624490336164, "learning_rate": 5.28022342820773e-06, "loss": 0.2501, "step": 14504 }, { "epoch": 0.4977693891557996, "grad_norm": 0.75700248404756, "learning_rate": 5.279668560939087e-06, "loss": 0.2999, "step": 14505 }, { "epoch": 0.49780370624571035, "grad_norm": 0.734455564585182, "learning_rate": 5.2791136902154635e-06, "loss": 0.2691, "step": 14506 }, { "epoch": 0.49783802333562116, "grad_norm": 0.7216987761741392, "learning_rate": 5.278558816043717e-06, "loss": 0.3009, "step": 14507 }, { "epoch": 0.4978723404255319, "grad_norm": 0.8158725805948964, "learning_rate": 5.2780039384307e-06, "loss": 0.2824, "step": 14508 }, { "epoch": 0.49790665751544266, "grad_norm": 0.7842554713082649, "learning_rate": 5.277449057383271e-06, "loss": 0.3604, "step": 14509 }, { "epoch": 0.49794097460535347, "grad_norm": 0.8559551346258116, "learning_rate": 5.27689417290828e-06, "loss": 0.2937, "step": 14510 }, { "epoch": 0.4979752916952642, "grad_norm": 0.8311694466345841, "learning_rate": 5.276339285012584e-06, "loss": 0.2813, "step": 14511 }, { "epoch": 0.49800960878517503, "grad_norm": 0.79758600341738, "learning_rate": 5.27578439370304e-06, "loss": 0.2824, "step": 14512 }, { "epoch": 0.4980439258750858, "grad_norm": 0.7530787482302935, "learning_rate": 5.2752294989864994e-06, "loss": 0.3294, "step": 14513 }, { "epoch": 0.4980782429649966, "grad_norm": 0.7685102705856609, "learning_rate": 5.2746746008698205e-06, "loss": 0.2634, "step": 14514 }, { "epoch": 0.49811256005490734, "grad_norm": 0.8854524345467653, "learning_rate": 5.2741196993598565e-06, "loss": 0.2551, "step": 14515 }, { "epoch": 0.49814687714481815, "grad_norm": 0.7326078944783112, "learning_rate": 5.273564794463465e-06, "loss": 0.3061, "step": 14516 }, { "epoch": 0.4981811942347289, "grad_norm": 0.6960968711174309, "learning_rate": 5.273009886187498e-06, "loss": 0.3112, "step": 14517 }, { "epoch": 0.49821551132463965, "grad_norm": 0.7864142940857898, "learning_rate": 5.272454974538812e-06, "loss": 0.2719, "step": 14518 }, { "epoch": 0.49824982841455046, "grad_norm": 0.7788482529356026, "learning_rate": 5.271900059524262e-06, "loss": 0.2823, "step": 14519 }, { "epoch": 0.4982841455044612, "grad_norm": 0.8154566757754994, "learning_rate": 5.271345141150704e-06, "loss": 0.2701, "step": 14520 }, { "epoch": 0.498318462594372, "grad_norm": 0.8280632355430538, "learning_rate": 5.2707902194249935e-06, "loss": 0.309, "step": 14521 }, { "epoch": 0.49835277968428277, "grad_norm": 0.6893982496350195, "learning_rate": 5.270235294353987e-06, "loss": 0.264, "step": 14522 }, { "epoch": 0.49838709677419357, "grad_norm": 0.7154975931242852, "learning_rate": 5.269680365944537e-06, "loss": 0.2885, "step": 14523 }, { "epoch": 0.4984214138641043, "grad_norm": 0.8962874913408186, "learning_rate": 5.2691254342035e-06, "loss": 0.2847, "step": 14524 }, { "epoch": 0.4984557309540151, "grad_norm": 0.7536119198145134, "learning_rate": 5.268570499137733e-06, "loss": 0.3628, "step": 14525 }, { "epoch": 0.4984900480439259, "grad_norm": 0.8054223849164587, "learning_rate": 5.26801556075409e-06, "loss": 0.3015, "step": 14526 }, { "epoch": 0.49852436513383663, "grad_norm": 0.79986792382497, "learning_rate": 5.2674606190594265e-06, "loss": 0.3624, "step": 14527 }, { "epoch": 0.49855868222374744, "grad_norm": 0.7233896913856467, "learning_rate": 5.266905674060601e-06, "loss": 0.3604, "step": 14528 }, { "epoch": 0.4985929993136582, "grad_norm": 0.7020148846767063, "learning_rate": 5.266350725764467e-06, "loss": 0.2751, "step": 14529 }, { "epoch": 0.498627316403569, "grad_norm": 0.6626853155827084, "learning_rate": 5.265795774177878e-06, "loss": 0.3261, "step": 14530 }, { "epoch": 0.49866163349347975, "grad_norm": 0.8007897865581577, "learning_rate": 5.265240819307693e-06, "loss": 0.3012, "step": 14531 }, { "epoch": 0.4986959505833905, "grad_norm": 0.6918799851112987, "learning_rate": 5.264685861160769e-06, "loss": 0.2448, "step": 14532 }, { "epoch": 0.4987302676733013, "grad_norm": 0.7397923933860183, "learning_rate": 5.264130899743956e-06, "loss": 0.2444, "step": 14533 }, { "epoch": 0.49876458476321206, "grad_norm": 0.896275570216226, "learning_rate": 5.263575935064116e-06, "loss": 0.3166, "step": 14534 }, { "epoch": 0.49879890185312287, "grad_norm": 0.8398662676632317, "learning_rate": 5.263020967128103e-06, "loss": 0.3342, "step": 14535 }, { "epoch": 0.4988332189430336, "grad_norm": 0.8156296241443362, "learning_rate": 5.262465995942773e-06, "loss": 0.2628, "step": 14536 }, { "epoch": 0.4988675360329444, "grad_norm": 0.7850528156982995, "learning_rate": 5.2619110215149795e-06, "loss": 0.2571, "step": 14537 }, { "epoch": 0.4989018531228552, "grad_norm": 0.7764699889576734, "learning_rate": 5.261356043851583e-06, "loss": 0.2878, "step": 14538 }, { "epoch": 0.498936170212766, "grad_norm": 0.7937829393877476, "learning_rate": 5.260801062959435e-06, "loss": 0.3498, "step": 14539 }, { "epoch": 0.49897048730267674, "grad_norm": 0.7599803476483141, "learning_rate": 5.260246078845395e-06, "loss": 0.275, "step": 14540 }, { "epoch": 0.4990048043925875, "grad_norm": 0.6808778156364415, "learning_rate": 5.259691091516318e-06, "loss": 0.2538, "step": 14541 }, { "epoch": 0.4990391214824983, "grad_norm": 0.7822875242981414, "learning_rate": 5.259136100979061e-06, "loss": 0.3055, "step": 14542 }, { "epoch": 0.49907343857240904, "grad_norm": 0.800453396291014, "learning_rate": 5.258581107240478e-06, "loss": 0.2728, "step": 14543 }, { "epoch": 0.49910775566231985, "grad_norm": 0.8088286561750645, "learning_rate": 5.258026110307429e-06, "loss": 0.2781, "step": 14544 }, { "epoch": 0.4991420727522306, "grad_norm": 0.7220083236605118, "learning_rate": 5.257471110186767e-06, "loss": 0.3249, "step": 14545 }, { "epoch": 0.4991763898421414, "grad_norm": 0.738990862550926, "learning_rate": 5.25691610688535e-06, "loss": 0.3297, "step": 14546 }, { "epoch": 0.49921070693205216, "grad_norm": 0.7358467946682542, "learning_rate": 5.2563611004100335e-06, "loss": 0.2905, "step": 14547 }, { "epoch": 0.4992450240219629, "grad_norm": 0.7571010198837209, "learning_rate": 5.255806090767675e-06, "loss": 0.3361, "step": 14548 }, { "epoch": 0.4992793411118737, "grad_norm": 0.7188973085967453, "learning_rate": 5.25525107796513e-06, "loss": 0.2565, "step": 14549 }, { "epoch": 0.49931365820178447, "grad_norm": 0.8066936097389964, "learning_rate": 5.254696062009256e-06, "loss": 0.2573, "step": 14550 }, { "epoch": 0.4993479752916953, "grad_norm": 0.8482337635400903, "learning_rate": 5.2541410429069095e-06, "loss": 0.2698, "step": 14551 }, { "epoch": 0.49938229238160603, "grad_norm": 0.8027884073245529, "learning_rate": 5.253586020664946e-06, "loss": 0.2971, "step": 14552 }, { "epoch": 0.49941660947151684, "grad_norm": 0.7773778851146051, "learning_rate": 5.2530309952902225e-06, "loss": 0.2833, "step": 14553 }, { "epoch": 0.4994509265614276, "grad_norm": 0.8778977471204987, "learning_rate": 5.252475966789598e-06, "loss": 0.3179, "step": 14554 }, { "epoch": 0.49948524365133834, "grad_norm": 0.7517309695615737, "learning_rate": 5.251920935169924e-06, "loss": 0.2401, "step": 14555 }, { "epoch": 0.49951956074124915, "grad_norm": 0.8076830310191462, "learning_rate": 5.251365900438062e-06, "loss": 0.2835, "step": 14556 }, { "epoch": 0.4995538778311599, "grad_norm": 0.7604565697078941, "learning_rate": 5.2508108626008695e-06, "loss": 0.3102, "step": 14557 }, { "epoch": 0.4995881949210707, "grad_norm": 0.7545504066630997, "learning_rate": 5.250255821665199e-06, "loss": 0.3013, "step": 14558 }, { "epoch": 0.49962251201098146, "grad_norm": 0.7559463217932435, "learning_rate": 5.2497007776379095e-06, "loss": 0.2858, "step": 14559 }, { "epoch": 0.49965682910089226, "grad_norm": 0.795907997275962, "learning_rate": 5.24914573052586e-06, "loss": 0.2735, "step": 14560 }, { "epoch": 0.499691146190803, "grad_norm": 0.7877345752491128, "learning_rate": 5.248590680335903e-06, "loss": 0.2714, "step": 14561 }, { "epoch": 0.4997254632807138, "grad_norm": 0.8595198995060136, "learning_rate": 5.2480356270749e-06, "loss": 0.3381, "step": 14562 }, { "epoch": 0.4997597803706246, "grad_norm": 1.1646429784923067, "learning_rate": 5.247480570749704e-06, "loss": 0.2814, "step": 14563 }, { "epoch": 0.4997940974605353, "grad_norm": 0.7862323279391925, "learning_rate": 5.246925511367176e-06, "loss": 0.2972, "step": 14564 }, { "epoch": 0.49982841455044613, "grad_norm": 0.7857354153865199, "learning_rate": 5.24637044893417e-06, "loss": 0.2891, "step": 14565 }, { "epoch": 0.4998627316403569, "grad_norm": 0.7664614930529173, "learning_rate": 5.2458153834575446e-06, "loss": 0.3233, "step": 14566 }, { "epoch": 0.4998970487302677, "grad_norm": 0.7684834612249181, "learning_rate": 5.245260314944157e-06, "loss": 0.2902, "step": 14567 }, { "epoch": 0.49993136582017844, "grad_norm": 0.752498334728335, "learning_rate": 5.244705243400864e-06, "loss": 0.2677, "step": 14568 }, { "epoch": 0.49996568291008925, "grad_norm": 0.9460177972781438, "learning_rate": 5.244150168834523e-06, "loss": 0.2954, "step": 14569 }, { "epoch": 0.5, "grad_norm": 0.7737249154082096, "learning_rate": 5.243595091251991e-06, "loss": 0.2753, "step": 14570 }, { "epoch": 0.5000343170899108, "grad_norm": 0.863800559787414, "learning_rate": 5.2430400106601255e-06, "loss": 0.3043, "step": 14571 }, { "epoch": 0.5000686341798215, "grad_norm": 0.7346608065499036, "learning_rate": 5.242484927065784e-06, "loss": 0.305, "step": 14572 }, { "epoch": 0.5001029512697324, "grad_norm": 0.8031963222060021, "learning_rate": 5.241929840475825e-06, "loss": 0.2642, "step": 14573 }, { "epoch": 0.5001372683596431, "grad_norm": 0.898939155558038, "learning_rate": 5.241374750897105e-06, "loss": 0.3038, "step": 14574 }, { "epoch": 0.5001715854495539, "grad_norm": 0.7661117157958874, "learning_rate": 5.24081965833648e-06, "loss": 0.297, "step": 14575 }, { "epoch": 0.5002059025394646, "grad_norm": 0.8165400586593227, "learning_rate": 5.240264562800811e-06, "loss": 0.3, "step": 14576 }, { "epoch": 0.5002402196293755, "grad_norm": 0.8266668462524819, "learning_rate": 5.239709464296951e-06, "loss": 0.2845, "step": 14577 }, { "epoch": 0.5002745367192862, "grad_norm": 0.737396356874079, "learning_rate": 5.23915436283176e-06, "loss": 0.2913, "step": 14578 }, { "epoch": 0.500308853809197, "grad_norm": 0.7991311462212457, "learning_rate": 5.238599258412097e-06, "loss": 0.2774, "step": 14579 }, { "epoch": 0.5003431708991077, "grad_norm": 0.834071364549906, "learning_rate": 5.2380441510448185e-06, "loss": 0.3147, "step": 14580 }, { "epoch": 0.5003774879890185, "grad_norm": 0.8058584829122203, "learning_rate": 5.237489040736782e-06, "loss": 0.2981, "step": 14581 }, { "epoch": 0.5004118050789294, "grad_norm": 0.7772678295213625, "learning_rate": 5.236933927494845e-06, "loss": 0.3417, "step": 14582 }, { "epoch": 0.5004461221688401, "grad_norm": 0.8949279689402, "learning_rate": 5.236378811325866e-06, "loss": 0.2981, "step": 14583 }, { "epoch": 0.5004804392587509, "grad_norm": 0.7564764844701819, "learning_rate": 5.2358236922367015e-06, "loss": 0.2337, "step": 14584 }, { "epoch": 0.5005147563486616, "grad_norm": 0.7110355048929551, "learning_rate": 5.2352685702342095e-06, "loss": 0.3089, "step": 14585 }, { "epoch": 0.5005490734385724, "grad_norm": 0.7772318203631202, "learning_rate": 5.23471344532525e-06, "loss": 0.3132, "step": 14586 }, { "epoch": 0.5005833905284832, "grad_norm": 0.8171371472719786, "learning_rate": 5.234158317516679e-06, "loss": 0.3102, "step": 14587 }, { "epoch": 0.500617707618394, "grad_norm": 0.7689057737941973, "learning_rate": 5.233603186815356e-06, "loss": 0.2793, "step": 14588 }, { "epoch": 0.5006520247083047, "grad_norm": 0.7780431962726514, "learning_rate": 5.233048053228137e-06, "loss": 0.2541, "step": 14589 }, { "epoch": 0.5006863417982155, "grad_norm": 0.7998232618761246, "learning_rate": 5.232492916761881e-06, "loss": 0.3525, "step": 14590 }, { "epoch": 0.5007206588881263, "grad_norm": 0.898674217098982, "learning_rate": 5.231937777423447e-06, "loss": 0.326, "step": 14591 }, { "epoch": 0.5007549759780371, "grad_norm": 0.7532860437000947, "learning_rate": 5.231382635219691e-06, "loss": 0.2431, "step": 14592 }, { "epoch": 0.5007892930679478, "grad_norm": 0.6602239594042667, "learning_rate": 5.230827490157471e-06, "loss": 0.2481, "step": 14593 }, { "epoch": 0.5008236101578586, "grad_norm": 0.7304984803442003, "learning_rate": 5.230272342243649e-06, "loss": 0.2257, "step": 14594 }, { "epoch": 0.5008579272477693, "grad_norm": 0.7970292994803567, "learning_rate": 5.22971719148508e-06, "loss": 0.2983, "step": 14595 }, { "epoch": 0.5008922443376802, "grad_norm": 0.7448511499459409, "learning_rate": 5.229162037888624e-06, "loss": 0.3059, "step": 14596 }, { "epoch": 0.500926561427591, "grad_norm": 0.7392104413189079, "learning_rate": 5.228606881461137e-06, "loss": 0.2628, "step": 14597 }, { "epoch": 0.5009608785175017, "grad_norm": 0.7010102702781, "learning_rate": 5.228051722209479e-06, "loss": 0.2652, "step": 14598 }, { "epoch": 0.5009951956074125, "grad_norm": 0.7350852180693666, "learning_rate": 5.227496560140507e-06, "loss": 0.297, "step": 14599 }, { "epoch": 0.5010295126973233, "grad_norm": 0.7251862686494027, "learning_rate": 5.2269413952610795e-06, "loss": 0.2553, "step": 14600 }, { "epoch": 0.5010638297872341, "grad_norm": 0.747386199707455, "learning_rate": 5.2263862275780565e-06, "loss": 0.3079, "step": 14601 }, { "epoch": 0.5010981468771448, "grad_norm": 0.8589601918448158, "learning_rate": 5.225831057098298e-06, "loss": 0.3267, "step": 14602 }, { "epoch": 0.5011324639670556, "grad_norm": 0.7843375621708206, "learning_rate": 5.225275883828656e-06, "loss": 0.2837, "step": 14603 }, { "epoch": 0.5011667810569663, "grad_norm": 0.8146785535809044, "learning_rate": 5.224720707775996e-06, "loss": 0.2915, "step": 14604 }, { "epoch": 0.5012010981468772, "grad_norm": 0.789367488497583, "learning_rate": 5.224165528947173e-06, "loss": 0.2469, "step": 14605 }, { "epoch": 0.5012354152367879, "grad_norm": 0.8278269933584484, "learning_rate": 5.223610347349046e-06, "loss": 0.2788, "step": 14606 }, { "epoch": 0.5012697323266987, "grad_norm": 0.6716218577216543, "learning_rate": 5.223055162988474e-06, "loss": 0.2691, "step": 14607 }, { "epoch": 0.5013040494166094, "grad_norm": 0.6496672504104195, "learning_rate": 5.222499975872315e-06, "loss": 0.2981, "step": 14608 }, { "epoch": 0.5013383665065202, "grad_norm": 0.7695502946816956, "learning_rate": 5.221944786007428e-06, "loss": 0.2713, "step": 14609 }, { "epoch": 0.5013726835964311, "grad_norm": 0.7594939133286221, "learning_rate": 5.221389593400672e-06, "loss": 0.3206, "step": 14610 }, { "epoch": 0.5014070006863418, "grad_norm": 0.6999349488300525, "learning_rate": 5.220834398058906e-06, "loss": 0.2663, "step": 14611 }, { "epoch": 0.5014413177762526, "grad_norm": 0.955024432130914, "learning_rate": 5.22027919998899e-06, "loss": 0.3373, "step": 14612 }, { "epoch": 0.5014756348661633, "grad_norm": 0.8090018405286269, "learning_rate": 5.21972399919778e-06, "loss": 0.3183, "step": 14613 }, { "epoch": 0.5015099519560742, "grad_norm": 0.8821332317138945, "learning_rate": 5.2191687956921355e-06, "loss": 0.3078, "step": 14614 }, { "epoch": 0.5015442690459849, "grad_norm": 0.7935179961759947, "learning_rate": 5.218613589478917e-06, "loss": 0.288, "step": 14615 }, { "epoch": 0.5015785861358957, "grad_norm": 0.8400017456199006, "learning_rate": 5.218058380564981e-06, "loss": 0.2996, "step": 14616 }, { "epoch": 0.5016129032258064, "grad_norm": 0.7285220684661906, "learning_rate": 5.21750316895719e-06, "loss": 0.2987, "step": 14617 }, { "epoch": 0.5016472203157172, "grad_norm": 0.7360834510722211, "learning_rate": 5.2169479546624e-06, "loss": 0.2823, "step": 14618 }, { "epoch": 0.501681537405628, "grad_norm": 0.8252454331605734, "learning_rate": 5.216392737687471e-06, "loss": 0.2574, "step": 14619 }, { "epoch": 0.5017158544955388, "grad_norm": 0.749164784708814, "learning_rate": 5.215837518039262e-06, "loss": 0.3221, "step": 14620 }, { "epoch": 0.5017501715854495, "grad_norm": 0.8388752888062904, "learning_rate": 5.215282295724633e-06, "loss": 0.2674, "step": 14621 }, { "epoch": 0.5017844886753603, "grad_norm": 0.7440095734678251, "learning_rate": 5.21472707075044e-06, "loss": 0.2596, "step": 14622 }, { "epoch": 0.5018188057652712, "grad_norm": 0.7761042468981723, "learning_rate": 5.214171843123545e-06, "loss": 0.2559, "step": 14623 }, { "epoch": 0.5018531228551819, "grad_norm": 0.7688072103994199, "learning_rate": 5.213616612850809e-06, "loss": 0.2781, "step": 14624 }, { "epoch": 0.5018874399450927, "grad_norm": 0.7863188169370875, "learning_rate": 5.213061379939086e-06, "loss": 0.2951, "step": 14625 }, { "epoch": 0.5019217570350034, "grad_norm": 0.7530873659689062, "learning_rate": 5.21250614439524e-06, "loss": 0.303, "step": 14626 }, { "epoch": 0.5019560741249142, "grad_norm": 0.7331429661209623, "learning_rate": 5.211950906226126e-06, "loss": 0.2856, "step": 14627 }, { "epoch": 0.501990391214825, "grad_norm": 0.8602401091216655, "learning_rate": 5.2113956654386085e-06, "loss": 0.3368, "step": 14628 }, { "epoch": 0.5020247083047358, "grad_norm": 0.8133772571182307, "learning_rate": 5.2108404220395425e-06, "loss": 0.313, "step": 14629 }, { "epoch": 0.5020590253946465, "grad_norm": 0.8247966164033212, "learning_rate": 5.210285176035788e-06, "loss": 0.2911, "step": 14630 }, { "epoch": 0.5020933424845573, "grad_norm": 0.7459266102786, "learning_rate": 5.209729927434207e-06, "loss": 0.2876, "step": 14631 }, { "epoch": 0.502127659574468, "grad_norm": 0.7810841445558273, "learning_rate": 5.209174676241657e-06, "loss": 0.2682, "step": 14632 }, { "epoch": 0.5021619766643789, "grad_norm": 0.8149057545871651, "learning_rate": 5.208619422464998e-06, "loss": 0.312, "step": 14633 }, { "epoch": 0.5021962937542896, "grad_norm": 0.7396768114549462, "learning_rate": 5.208064166111088e-06, "loss": 0.2527, "step": 14634 }, { "epoch": 0.5022306108442004, "grad_norm": 0.7641449913783416, "learning_rate": 5.207508907186789e-06, "loss": 0.296, "step": 14635 }, { "epoch": 0.5022649279341111, "grad_norm": 0.7350011200580469, "learning_rate": 5.206953645698959e-06, "loss": 0.2803, "step": 14636 }, { "epoch": 0.502299245024022, "grad_norm": 0.7689596312643393, "learning_rate": 5.206398381654457e-06, "loss": 0.2251, "step": 14637 }, { "epoch": 0.5023335621139328, "grad_norm": 0.9329535995009268, "learning_rate": 5.205843115060145e-06, "loss": 0.3168, "step": 14638 }, { "epoch": 0.5023678792038435, "grad_norm": 0.7292148700785764, "learning_rate": 5.20528784592288e-06, "loss": 0.2664, "step": 14639 }, { "epoch": 0.5024021962937543, "grad_norm": 0.7822460983550763, "learning_rate": 5.204732574249523e-06, "loss": 0.2978, "step": 14640 }, { "epoch": 0.502436513383665, "grad_norm": 0.7436892833536545, "learning_rate": 5.204177300046936e-06, "loss": 0.3075, "step": 14641 }, { "epoch": 0.5024708304735759, "grad_norm": 0.8035096846916172, "learning_rate": 5.2036220233219746e-06, "loss": 0.2856, "step": 14642 }, { "epoch": 0.5025051475634866, "grad_norm": 0.8558935342401345, "learning_rate": 5.203066744081502e-06, "loss": 0.3176, "step": 14643 }, { "epoch": 0.5025394646533974, "grad_norm": 0.8598390371028267, "learning_rate": 5.202511462332375e-06, "loss": 0.3053, "step": 14644 }, { "epoch": 0.5025737817433081, "grad_norm": 0.8546532236078448, "learning_rate": 5.201956178081455e-06, "loss": 0.2298, "step": 14645 }, { "epoch": 0.502608098833219, "grad_norm": 0.9916774821609635, "learning_rate": 5.201400891335602e-06, "loss": 0.3765, "step": 14646 }, { "epoch": 0.5026424159231297, "grad_norm": 0.7568732027790778, "learning_rate": 5.200845602101677e-06, "loss": 0.2658, "step": 14647 }, { "epoch": 0.5026767330130405, "grad_norm": 0.687203831898396, "learning_rate": 5.200290310386537e-06, "loss": 0.2457, "step": 14648 }, { "epoch": 0.5027110501029513, "grad_norm": 0.9695528461600836, "learning_rate": 5.199735016197045e-06, "loss": 0.2429, "step": 14649 }, { "epoch": 0.502745367192862, "grad_norm": 0.7844401421114323, "learning_rate": 5.1991797195400605e-06, "loss": 0.2549, "step": 14650 }, { "epoch": 0.5027796842827729, "grad_norm": 0.7566674573384454, "learning_rate": 5.198624420422441e-06, "loss": 0.2936, "step": 14651 }, { "epoch": 0.5028140013726836, "grad_norm": 0.8194800546600902, "learning_rate": 5.198069118851048e-06, "loss": 0.3282, "step": 14652 }, { "epoch": 0.5028483184625944, "grad_norm": 0.8182886053098242, "learning_rate": 5.1975138148327445e-06, "loss": 0.3006, "step": 14653 }, { "epoch": 0.5028826355525051, "grad_norm": 0.7711170706179111, "learning_rate": 5.196958508374385e-06, "loss": 0.2572, "step": 14654 }, { "epoch": 0.5029169526424159, "grad_norm": 0.7358753512108557, "learning_rate": 5.196403199482836e-06, "loss": 0.3174, "step": 14655 }, { "epoch": 0.5029512697323267, "grad_norm": 0.8332317157975936, "learning_rate": 5.195847888164952e-06, "loss": 0.3168, "step": 14656 }, { "epoch": 0.5029855868222375, "grad_norm": 0.8076799690607487, "learning_rate": 5.195292574427597e-06, "loss": 0.2905, "step": 14657 }, { "epoch": 0.5030199039121482, "grad_norm": 0.8623035336501124, "learning_rate": 5.194737258277629e-06, "loss": 0.2812, "step": 14658 }, { "epoch": 0.503054221002059, "grad_norm": 0.7076750592369512, "learning_rate": 5.194181939721909e-06, "loss": 0.2805, "step": 14659 }, { "epoch": 0.5030885380919699, "grad_norm": 0.7758794874994199, "learning_rate": 5.193626618767299e-06, "loss": 0.2779, "step": 14660 }, { "epoch": 0.5031228551818806, "grad_norm": 0.7540328440255527, "learning_rate": 5.193071295420656e-06, "loss": 0.2962, "step": 14661 }, { "epoch": 0.5031571722717914, "grad_norm": 0.8797270875027763, "learning_rate": 5.192515969688844e-06, "loss": 0.3008, "step": 14662 }, { "epoch": 0.5031914893617021, "grad_norm": 0.784898018018224, "learning_rate": 5.191960641578722e-06, "loss": 0.2599, "step": 14663 }, { "epoch": 0.5032258064516129, "grad_norm": 0.7537886370746077, "learning_rate": 5.191405311097148e-06, "loss": 0.2948, "step": 14664 }, { "epoch": 0.5032601235415237, "grad_norm": 0.7705779654624956, "learning_rate": 5.190849978250986e-06, "loss": 0.2999, "step": 14665 }, { "epoch": 0.5032944406314345, "grad_norm": 0.7004825668452258, "learning_rate": 5.190294643047094e-06, "loss": 0.2775, "step": 14666 }, { "epoch": 0.5033287577213452, "grad_norm": 0.6941928823698607, "learning_rate": 5.189739305492334e-06, "loss": 0.2671, "step": 14667 }, { "epoch": 0.503363074811256, "grad_norm": 0.7119265377631897, "learning_rate": 5.189183965593566e-06, "loss": 0.2523, "step": 14668 }, { "epoch": 0.5033973919011668, "grad_norm": 0.8196145938990046, "learning_rate": 5.188628623357651e-06, "loss": 0.3444, "step": 14669 }, { "epoch": 0.5034317089910776, "grad_norm": 0.7576236022677982, "learning_rate": 5.18807327879145e-06, "loss": 0.2826, "step": 14670 }, { "epoch": 0.5034660260809883, "grad_norm": 0.7246476874491627, "learning_rate": 5.1875179319018205e-06, "loss": 0.2872, "step": 14671 }, { "epoch": 0.5035003431708991, "grad_norm": 0.7759465112670918, "learning_rate": 5.186962582695628e-06, "loss": 0.2812, "step": 14672 }, { "epoch": 0.5035346602608098, "grad_norm": 0.7525971186462617, "learning_rate": 5.186407231179731e-06, "loss": 0.3157, "step": 14673 }, { "epoch": 0.5035689773507207, "grad_norm": 0.7246136746099604, "learning_rate": 5.185851877360988e-06, "loss": 0.3074, "step": 14674 }, { "epoch": 0.5036032944406315, "grad_norm": 0.7717033868742652, "learning_rate": 5.185296521246261e-06, "loss": 0.2623, "step": 14675 }, { "epoch": 0.5036376115305422, "grad_norm": 0.8202748598190166, "learning_rate": 5.184741162842415e-06, "loss": 0.2252, "step": 14676 }, { "epoch": 0.503671928620453, "grad_norm": 0.7211227905970601, "learning_rate": 5.184185802156305e-06, "loss": 0.3445, "step": 14677 }, { "epoch": 0.5037062457103637, "grad_norm": 1.6371358425725293, "learning_rate": 5.1836304391947945e-06, "loss": 0.2989, "step": 14678 }, { "epoch": 0.5037405628002746, "grad_norm": 0.7767317258637584, "learning_rate": 5.183075073964745e-06, "loss": 0.262, "step": 14679 }, { "epoch": 0.5037748798901853, "grad_norm": 0.7513664936168334, "learning_rate": 5.182519706473015e-06, "loss": 0.2761, "step": 14680 }, { "epoch": 0.5038091969800961, "grad_norm": 0.9132441223656815, "learning_rate": 5.181964336726467e-06, "loss": 0.2152, "step": 14681 }, { "epoch": 0.5038435140700068, "grad_norm": 0.8468050067076497, "learning_rate": 5.181408964731962e-06, "loss": 0.2967, "step": 14682 }, { "epoch": 0.5038778311599177, "grad_norm": 0.7968412056525778, "learning_rate": 5.180853590496361e-06, "loss": 0.315, "step": 14683 }, { "epoch": 0.5039121482498284, "grad_norm": 0.9028510791438985, "learning_rate": 5.180298214026525e-06, "loss": 0.3613, "step": 14684 }, { "epoch": 0.5039464653397392, "grad_norm": 0.6996948104860119, "learning_rate": 5.179742835329315e-06, "loss": 0.2583, "step": 14685 }, { "epoch": 0.5039807824296499, "grad_norm": 0.8390163516510218, "learning_rate": 5.17918745441159e-06, "loss": 0.2708, "step": 14686 }, { "epoch": 0.5040150995195607, "grad_norm": 0.8681921305173328, "learning_rate": 5.178632071280215e-06, "loss": 0.35, "step": 14687 }, { "epoch": 0.5040494166094716, "grad_norm": 0.7801316866639799, "learning_rate": 5.178076685942048e-06, "loss": 0.2802, "step": 14688 }, { "epoch": 0.5040837336993823, "grad_norm": 0.8155097239906246, "learning_rate": 5.177521298403952e-06, "loss": 0.348, "step": 14689 }, { "epoch": 0.5041180507892931, "grad_norm": 0.7185806148172939, "learning_rate": 5.176965908672785e-06, "loss": 0.3398, "step": 14690 }, { "epoch": 0.5041523678792038, "grad_norm": 0.756379462497003, "learning_rate": 5.1764105167554125e-06, "loss": 0.3228, "step": 14691 }, { "epoch": 0.5041866849691147, "grad_norm": 0.7215847537283628, "learning_rate": 5.175855122658694e-06, "loss": 0.2574, "step": 14692 }, { "epoch": 0.5042210020590254, "grad_norm": 0.6921966317960434, "learning_rate": 5.17529972638949e-06, "loss": 0.2578, "step": 14693 }, { "epoch": 0.5042553191489362, "grad_norm": 0.8645262218067734, "learning_rate": 5.174744327954661e-06, "loss": 0.2821, "step": 14694 }, { "epoch": 0.5042896362388469, "grad_norm": 0.789819156708641, "learning_rate": 5.174188927361072e-06, "loss": 0.3345, "step": 14695 }, { "epoch": 0.5043239533287577, "grad_norm": 0.7614371670923705, "learning_rate": 5.173633524615581e-06, "loss": 0.2642, "step": 14696 }, { "epoch": 0.5043582704186685, "grad_norm": 0.7590064891552121, "learning_rate": 5.173078119725048e-06, "loss": 0.2979, "step": 14697 }, { "epoch": 0.5043925875085793, "grad_norm": 0.8147537679158556, "learning_rate": 5.172522712696339e-06, "loss": 0.282, "step": 14698 }, { "epoch": 0.50442690459849, "grad_norm": 0.8905626977803075, "learning_rate": 5.171967303536313e-06, "loss": 0.3249, "step": 14699 }, { "epoch": 0.5044612216884008, "grad_norm": 0.7777299595684837, "learning_rate": 5.171411892251831e-06, "loss": 0.2843, "step": 14700 }, { "epoch": 0.5044955387783115, "grad_norm": 0.8273699286129698, "learning_rate": 5.170856478849755e-06, "loss": 0.2559, "step": 14701 }, { "epoch": 0.5045298558682224, "grad_norm": 0.7871763313555733, "learning_rate": 5.170301063336947e-06, "loss": 0.2825, "step": 14702 }, { "epoch": 0.5045641729581332, "grad_norm": 0.7001993578282512, "learning_rate": 5.169745645720267e-06, "loss": 0.278, "step": 14703 }, { "epoch": 0.5045984900480439, "grad_norm": 0.8233284428231625, "learning_rate": 5.1691902260065775e-06, "loss": 0.3164, "step": 14704 }, { "epoch": 0.5046328071379547, "grad_norm": 0.8245614811903498, "learning_rate": 5.168634804202739e-06, "loss": 0.3209, "step": 14705 }, { "epoch": 0.5046671242278655, "grad_norm": 0.7496189672026767, "learning_rate": 5.168079380315616e-06, "loss": 0.3071, "step": 14706 }, { "epoch": 0.5047014413177763, "grad_norm": 0.7624766562442441, "learning_rate": 5.167523954352067e-06, "loss": 0.2798, "step": 14707 }, { "epoch": 0.504735758407687, "grad_norm": 0.7765249045658049, "learning_rate": 5.166968526318956e-06, "loss": 0.3194, "step": 14708 }, { "epoch": 0.5047700754975978, "grad_norm": 0.8209358489737434, "learning_rate": 5.166413096223143e-06, "loss": 0.2858, "step": 14709 }, { "epoch": 0.5048043925875085, "grad_norm": 0.8799984387463697, "learning_rate": 5.165857664071489e-06, "loss": 0.2963, "step": 14710 }, { "epoch": 0.5048387096774194, "grad_norm": 0.7872280969916297, "learning_rate": 5.165302229870859e-06, "loss": 0.3426, "step": 14711 }, { "epoch": 0.5048730267673301, "grad_norm": 0.769643219645419, "learning_rate": 5.16474679362811e-06, "loss": 0.2776, "step": 14712 }, { "epoch": 0.5049073438572409, "grad_norm": 0.7529862034833362, "learning_rate": 5.164191355350109e-06, "loss": 0.3108, "step": 14713 }, { "epoch": 0.5049416609471516, "grad_norm": 0.7573210109230117, "learning_rate": 5.163635915043715e-06, "loss": 0.2859, "step": 14714 }, { "epoch": 0.5049759780370625, "grad_norm": 0.8728274034156713, "learning_rate": 5.163080472715789e-06, "loss": 0.3161, "step": 14715 }, { "epoch": 0.5050102951269733, "grad_norm": 0.7811340081594744, "learning_rate": 5.162525028373194e-06, "loss": 0.2737, "step": 14716 }, { "epoch": 0.505044612216884, "grad_norm": 0.7183414105604939, "learning_rate": 5.1619695820227935e-06, "loss": 0.2678, "step": 14717 }, { "epoch": 0.5050789293067948, "grad_norm": 0.6908095890470983, "learning_rate": 5.161414133671446e-06, "loss": 0.2541, "step": 14718 }, { "epoch": 0.5051132463967055, "grad_norm": 0.7313749447842702, "learning_rate": 5.160858683326013e-06, "loss": 0.2462, "step": 14719 }, { "epoch": 0.5051475634866164, "grad_norm": 0.6877451016969461, "learning_rate": 5.160303230993361e-06, "loss": 0.2873, "step": 14720 }, { "epoch": 0.5051818805765271, "grad_norm": 0.8804261872810817, "learning_rate": 5.159747776680351e-06, "loss": 0.2785, "step": 14721 }, { "epoch": 0.5052161976664379, "grad_norm": 0.8029380107252719, "learning_rate": 5.159192320393841e-06, "loss": 0.2777, "step": 14722 }, { "epoch": 0.5052505147563486, "grad_norm": 0.7455392417058264, "learning_rate": 5.158636862140696e-06, "loss": 0.2847, "step": 14723 }, { "epoch": 0.5052848318462594, "grad_norm": 0.7884865835696584, "learning_rate": 5.158081401927779e-06, "loss": 0.3214, "step": 14724 }, { "epoch": 0.5053191489361702, "grad_norm": 0.7077401793585486, "learning_rate": 5.15752593976195e-06, "loss": 0.2264, "step": 14725 }, { "epoch": 0.505353466026081, "grad_norm": 0.8644644904372553, "learning_rate": 5.1569704756500695e-06, "loss": 0.316, "step": 14726 }, { "epoch": 0.5053877831159918, "grad_norm": 0.7932842742261446, "learning_rate": 5.156415009599003e-06, "loss": 0.282, "step": 14727 }, { "epoch": 0.5054221002059025, "grad_norm": 0.761516736460739, "learning_rate": 5.155859541615612e-06, "loss": 0.2556, "step": 14728 }, { "epoch": 0.5054564172958134, "grad_norm": 0.8184378094052386, "learning_rate": 5.155304071706757e-06, "loss": 0.2536, "step": 14729 }, { "epoch": 0.5054907343857241, "grad_norm": 0.8080389222778684, "learning_rate": 5.154748599879304e-06, "loss": 0.3305, "step": 14730 }, { "epoch": 0.5055250514756349, "grad_norm": 0.8397868889442279, "learning_rate": 5.154193126140109e-06, "loss": 0.2933, "step": 14731 }, { "epoch": 0.5055593685655456, "grad_norm": 0.74041245441124, "learning_rate": 5.1536376504960395e-06, "loss": 0.2673, "step": 14732 }, { "epoch": 0.5055936856554564, "grad_norm": 0.8507948259610784, "learning_rate": 5.153082172953956e-06, "loss": 0.2397, "step": 14733 }, { "epoch": 0.5056280027453672, "grad_norm": 0.723337633140449, "learning_rate": 5.15252669352072e-06, "loss": 0.2801, "step": 14734 }, { "epoch": 0.505662319835278, "grad_norm": 0.7648469406239023, "learning_rate": 5.151971212203194e-06, "loss": 0.2458, "step": 14735 }, { "epoch": 0.5056966369251887, "grad_norm": 0.7386383431324665, "learning_rate": 5.151415729008243e-06, "loss": 0.2575, "step": 14736 }, { "epoch": 0.5057309540150995, "grad_norm": 0.7753438696675037, "learning_rate": 5.150860243942726e-06, "loss": 0.28, "step": 14737 }, { "epoch": 0.5057652711050104, "grad_norm": 0.8506567100445691, "learning_rate": 5.150304757013506e-06, "loss": 0.3446, "step": 14738 }, { "epoch": 0.5057995881949211, "grad_norm": 0.7166375960020175, "learning_rate": 5.1497492682274466e-06, "loss": 0.2828, "step": 14739 }, { "epoch": 0.5058339052848319, "grad_norm": 0.7285454225626231, "learning_rate": 5.149193777591411e-06, "loss": 0.2601, "step": 14740 }, { "epoch": 0.5058682223747426, "grad_norm": 0.7877208148806516, "learning_rate": 5.1486382851122564e-06, "loss": 0.3011, "step": 14741 }, { "epoch": 0.5059025394646534, "grad_norm": 0.7704259072431064, "learning_rate": 5.148082790796851e-06, "loss": 0.2529, "step": 14742 }, { "epoch": 0.5059368565545642, "grad_norm": 0.8639262492562536, "learning_rate": 5.147527294652057e-06, "loss": 0.2834, "step": 14743 }, { "epoch": 0.505971173644475, "grad_norm": 0.7559752179703713, "learning_rate": 5.1469717966847344e-06, "loss": 0.264, "step": 14744 }, { "epoch": 0.5060054907343857, "grad_norm": 0.9723078034570487, "learning_rate": 5.146416296901745e-06, "loss": 0.2547, "step": 14745 }, { "epoch": 0.5060398078242965, "grad_norm": 0.7736748229203149, "learning_rate": 5.145860795309956e-06, "loss": 0.3409, "step": 14746 }, { "epoch": 0.5060741249142072, "grad_norm": 0.7325527750219619, "learning_rate": 5.145305291916225e-06, "loss": 0.2733, "step": 14747 }, { "epoch": 0.5061084420041181, "grad_norm": 0.8317060228430123, "learning_rate": 5.144749786727417e-06, "loss": 0.3203, "step": 14748 }, { "epoch": 0.5061427590940288, "grad_norm": 0.7373912210475858, "learning_rate": 5.144194279750393e-06, "loss": 0.2906, "step": 14749 }, { "epoch": 0.5061770761839396, "grad_norm": 0.8257529208000959, "learning_rate": 5.143638770992016e-06, "loss": 0.2729, "step": 14750 }, { "epoch": 0.5062113932738503, "grad_norm": 0.7775982415358548, "learning_rate": 5.143083260459152e-06, "loss": 0.2663, "step": 14751 }, { "epoch": 0.5062457103637612, "grad_norm": 0.7664391103301753, "learning_rate": 5.14252774815866e-06, "loss": 0.2629, "step": 14752 }, { "epoch": 0.506280027453672, "grad_norm": 0.803537231377667, "learning_rate": 5.1419722340974045e-06, "loss": 0.2976, "step": 14753 }, { "epoch": 0.5063143445435827, "grad_norm": 0.7757428173721493, "learning_rate": 5.141416718282247e-06, "loss": 0.2884, "step": 14754 }, { "epoch": 0.5063486616334935, "grad_norm": 0.9365824081031983, "learning_rate": 5.140861200720051e-06, "loss": 0.3014, "step": 14755 }, { "epoch": 0.5063829787234042, "grad_norm": 0.8612954202818631, "learning_rate": 5.140305681417679e-06, "loss": 0.2745, "step": 14756 }, { "epoch": 0.5064172958133151, "grad_norm": 0.7474807773576675, "learning_rate": 5.139750160381993e-06, "loss": 0.3326, "step": 14757 }, { "epoch": 0.5064516129032258, "grad_norm": 0.7476257866910896, "learning_rate": 5.139194637619858e-06, "loss": 0.3083, "step": 14758 }, { "epoch": 0.5064859299931366, "grad_norm": 0.6718307515031017, "learning_rate": 5.138639113138135e-06, "loss": 0.2425, "step": 14759 }, { "epoch": 0.5065202470830473, "grad_norm": 0.8072674623873368, "learning_rate": 5.138083586943688e-06, "loss": 0.2873, "step": 14760 }, { "epoch": 0.5065545641729582, "grad_norm": 0.7632488895017316, "learning_rate": 5.137528059043379e-06, "loss": 0.3091, "step": 14761 }, { "epoch": 0.5065888812628689, "grad_norm": 0.7811536249856479, "learning_rate": 5.136972529444072e-06, "loss": 0.3837, "step": 14762 }, { "epoch": 0.5066231983527797, "grad_norm": 0.7358007088810743, "learning_rate": 5.1364169981526266e-06, "loss": 0.2597, "step": 14763 }, { "epoch": 0.5066575154426904, "grad_norm": 0.7182153064585964, "learning_rate": 5.135861465175909e-06, "loss": 0.3164, "step": 14764 }, { "epoch": 0.5066918325326012, "grad_norm": 0.806261573418872, "learning_rate": 5.135305930520783e-06, "loss": 0.2607, "step": 14765 }, { "epoch": 0.5067261496225121, "grad_norm": 0.7224570539200015, "learning_rate": 5.134750394194109e-06, "loss": 0.2764, "step": 14766 }, { "epoch": 0.5067604667124228, "grad_norm": 0.9443410305786463, "learning_rate": 5.134194856202751e-06, "loss": 0.3016, "step": 14767 }, { "epoch": 0.5067947838023336, "grad_norm": 0.7298847982068559, "learning_rate": 5.133639316553572e-06, "loss": 0.2312, "step": 14768 }, { "epoch": 0.5068291008922443, "grad_norm": 0.8059661505103605, "learning_rate": 5.133083775253436e-06, "loss": 0.2467, "step": 14769 }, { "epoch": 0.5068634179821551, "grad_norm": 0.8280996685795394, "learning_rate": 5.132528232309204e-06, "loss": 0.251, "step": 14770 }, { "epoch": 0.5068977350720659, "grad_norm": 0.745454545390735, "learning_rate": 5.13197268772774e-06, "loss": 0.2965, "step": 14771 }, { "epoch": 0.5069320521619767, "grad_norm": 0.7557432393889338, "learning_rate": 5.131417141515907e-06, "loss": 0.3101, "step": 14772 }, { "epoch": 0.5069663692518874, "grad_norm": 0.7600850212308474, "learning_rate": 5.130861593680569e-06, "loss": 0.418, "step": 14773 }, { "epoch": 0.5070006863417982, "grad_norm": 0.829764411566174, "learning_rate": 5.130306044228589e-06, "loss": 0.3131, "step": 14774 }, { "epoch": 0.507035003431709, "grad_norm": 0.77762111683691, "learning_rate": 5.129750493166831e-06, "loss": 0.3525, "step": 14775 }, { "epoch": 0.5070693205216198, "grad_norm": 0.774351979326832, "learning_rate": 5.129194940502155e-06, "loss": 0.3005, "step": 14776 }, { "epoch": 0.5071036376115305, "grad_norm": 0.7714970302888933, "learning_rate": 5.128639386241426e-06, "loss": 0.2762, "step": 14777 }, { "epoch": 0.5071379547014413, "grad_norm": 0.8311885098351582, "learning_rate": 5.128083830391508e-06, "loss": 0.2647, "step": 14778 }, { "epoch": 0.507172271791352, "grad_norm": 0.7642450507836698, "learning_rate": 5.127528272959264e-06, "loss": 0.247, "step": 14779 }, { "epoch": 0.5072065888812629, "grad_norm": 0.7001793994143283, "learning_rate": 5.126972713951556e-06, "loss": 0.253, "step": 14780 }, { "epoch": 0.5072409059711737, "grad_norm": 0.7436274389870491, "learning_rate": 5.12641715337525e-06, "loss": 0.2646, "step": 14781 }, { "epoch": 0.5072752230610844, "grad_norm": 0.7536797027803303, "learning_rate": 5.125861591237204e-06, "loss": 0.279, "step": 14782 }, { "epoch": 0.5073095401509952, "grad_norm": 0.8558330414342127, "learning_rate": 5.125306027544288e-06, "loss": 0.323, "step": 14783 }, { "epoch": 0.507343857240906, "grad_norm": 0.96701746001378, "learning_rate": 5.12475046230336e-06, "loss": 0.2959, "step": 14784 }, { "epoch": 0.5073781743308168, "grad_norm": 0.7767203084010773, "learning_rate": 5.124194895521288e-06, "loss": 0.3114, "step": 14785 }, { "epoch": 0.5074124914207275, "grad_norm": 0.822524475058411, "learning_rate": 5.123639327204929e-06, "loss": 0.2739, "step": 14786 }, { "epoch": 0.5074468085106383, "grad_norm": 0.7719099159656004, "learning_rate": 5.123083757361152e-06, "loss": 0.2824, "step": 14787 }, { "epoch": 0.507481125600549, "grad_norm": 0.856083164117203, "learning_rate": 5.122528185996821e-06, "loss": 0.3159, "step": 14788 }, { "epoch": 0.5075154426904599, "grad_norm": 0.7979770749561522, "learning_rate": 5.1219726131187954e-06, "loss": 0.2842, "step": 14789 }, { "epoch": 0.5075497597803706, "grad_norm": 0.8123176030793766, "learning_rate": 5.1214170387339405e-06, "loss": 0.309, "step": 14790 }, { "epoch": 0.5075840768702814, "grad_norm": 0.8166693189462425, "learning_rate": 5.12086146284912e-06, "loss": 0.2537, "step": 14791 }, { "epoch": 0.5076183939601921, "grad_norm": 0.7037145080995408, "learning_rate": 5.120305885471196e-06, "loss": 0.2705, "step": 14792 }, { "epoch": 0.5076527110501029, "grad_norm": 0.8227441245905919, "learning_rate": 5.119750306607033e-06, "loss": 0.3335, "step": 14793 }, { "epoch": 0.5076870281400138, "grad_norm": 0.719251415093908, "learning_rate": 5.119194726263496e-06, "loss": 0.3182, "step": 14794 }, { "epoch": 0.5077213452299245, "grad_norm": 0.7356800046899379, "learning_rate": 5.1186391444474444e-06, "loss": 0.2556, "step": 14795 }, { "epoch": 0.5077556623198353, "grad_norm": 0.7333027849161657, "learning_rate": 5.118083561165747e-06, "loss": 0.2339, "step": 14796 }, { "epoch": 0.507789979409746, "grad_norm": 0.7873670741209928, "learning_rate": 5.117527976425264e-06, "loss": 0.2735, "step": 14797 }, { "epoch": 0.5078242964996569, "grad_norm": 0.7564274585999383, "learning_rate": 5.116972390232861e-06, "loss": 0.275, "step": 14798 }, { "epoch": 0.5078586135895676, "grad_norm": 0.8309683946486345, "learning_rate": 5.1164168025953985e-06, "loss": 0.3021, "step": 14799 }, { "epoch": 0.5078929306794784, "grad_norm": 0.7968776653247729, "learning_rate": 5.115861213519744e-06, "loss": 0.2947, "step": 14800 }, { "epoch": 0.5079272477693891, "grad_norm": 0.7466431096426341, "learning_rate": 5.1153056230127586e-06, "loss": 0.2738, "step": 14801 }, { "epoch": 0.5079615648592999, "grad_norm": 0.8678339602280487, "learning_rate": 5.114750031081306e-06, "loss": 0.2597, "step": 14802 }, { "epoch": 0.5079958819492107, "grad_norm": 0.8440393713093922, "learning_rate": 5.114194437732251e-06, "loss": 0.2686, "step": 14803 }, { "epoch": 0.5080301990391215, "grad_norm": 0.8324520115473213, "learning_rate": 5.113638842972457e-06, "loss": 0.287, "step": 14804 }, { "epoch": 0.5080645161290323, "grad_norm": 0.7946138518724678, "learning_rate": 5.1130832468087875e-06, "loss": 0.3407, "step": 14805 }, { "epoch": 0.508098833218943, "grad_norm": 0.7217427941935974, "learning_rate": 5.1125276492481065e-06, "loss": 0.2666, "step": 14806 }, { "epoch": 0.5081331503088538, "grad_norm": 0.7127664820996843, "learning_rate": 5.1119720502972785e-06, "loss": 0.2666, "step": 14807 }, { "epoch": 0.5081674673987646, "grad_norm": 0.7404044983532184, "learning_rate": 5.1114164499631634e-06, "loss": 0.2669, "step": 14808 }, { "epoch": 0.5082017844886754, "grad_norm": 0.8601643879714854, "learning_rate": 5.11086084825263e-06, "loss": 0.3056, "step": 14809 }, { "epoch": 0.5082361015785861, "grad_norm": 0.8164119092865509, "learning_rate": 5.110305245172541e-06, "loss": 0.2911, "step": 14810 }, { "epoch": 0.5082704186684969, "grad_norm": 0.786125026256203, "learning_rate": 5.109749640729758e-06, "loss": 0.29, "step": 14811 }, { "epoch": 0.5083047357584077, "grad_norm": 0.7080151146311409, "learning_rate": 5.109194034931146e-06, "loss": 0.2194, "step": 14812 }, { "epoch": 0.5083390528483185, "grad_norm": 0.7853527297504584, "learning_rate": 5.10863842778357e-06, "loss": 0.322, "step": 14813 }, { "epoch": 0.5083733699382292, "grad_norm": 0.7754777769909584, "learning_rate": 5.108082819293892e-06, "loss": 0.2724, "step": 14814 }, { "epoch": 0.50840768702814, "grad_norm": 0.7328323591620851, "learning_rate": 5.107527209468977e-06, "loss": 0.2922, "step": 14815 }, { "epoch": 0.5084420041180507, "grad_norm": 0.6929915260298414, "learning_rate": 5.1069715983156876e-06, "loss": 0.2657, "step": 14816 }, { "epoch": 0.5084763212079616, "grad_norm": 0.8168900358069096, "learning_rate": 5.106415985840891e-06, "loss": 0.3287, "step": 14817 }, { "epoch": 0.5085106382978724, "grad_norm": 0.8482771666536044, "learning_rate": 5.1058603720514474e-06, "loss": 0.3026, "step": 14818 }, { "epoch": 0.5085449553877831, "grad_norm": 0.746405556998089, "learning_rate": 5.105304756954223e-06, "loss": 0.2607, "step": 14819 }, { "epoch": 0.5085792724776939, "grad_norm": 0.7061187671163844, "learning_rate": 5.104749140556083e-06, "loss": 0.2977, "step": 14820 }, { "epoch": 0.5086135895676047, "grad_norm": 0.7674051386795714, "learning_rate": 5.1041935228638866e-06, "loss": 0.2904, "step": 14821 }, { "epoch": 0.5086479066575155, "grad_norm": 0.7240613413519966, "learning_rate": 5.103637903884501e-06, "loss": 0.2874, "step": 14822 }, { "epoch": 0.5086822237474262, "grad_norm": 0.9742326709244999, "learning_rate": 5.103082283624791e-06, "loss": 0.3064, "step": 14823 }, { "epoch": 0.508716540837337, "grad_norm": 0.7942608736247424, "learning_rate": 5.102526662091618e-06, "loss": 0.2803, "step": 14824 }, { "epoch": 0.5087508579272477, "grad_norm": 0.8216778650242071, "learning_rate": 5.101971039291849e-06, "loss": 0.3046, "step": 14825 }, { "epoch": 0.5087851750171586, "grad_norm": 0.6527051017762756, "learning_rate": 5.101415415232347e-06, "loss": 0.2443, "step": 14826 }, { "epoch": 0.5088194921070693, "grad_norm": 0.7774250027806792, "learning_rate": 5.1008597899199755e-06, "loss": 0.2919, "step": 14827 }, { "epoch": 0.5088538091969801, "grad_norm": 0.8030635893416587, "learning_rate": 5.100304163361598e-06, "loss": 0.3179, "step": 14828 }, { "epoch": 0.5088881262868908, "grad_norm": 0.8289102773265865, "learning_rate": 5.09974853556408e-06, "loss": 0.2931, "step": 14829 }, { "epoch": 0.5089224433768016, "grad_norm": 0.7681450757370278, "learning_rate": 5.099192906534285e-06, "loss": 0.295, "step": 14830 }, { "epoch": 0.5089567604667125, "grad_norm": 0.7342833319467815, "learning_rate": 5.098637276279077e-06, "loss": 0.2711, "step": 14831 }, { "epoch": 0.5089910775566232, "grad_norm": 0.775726219498152, "learning_rate": 5.09808164480532e-06, "loss": 0.2968, "step": 14832 }, { "epoch": 0.509025394646534, "grad_norm": 0.8001848626914593, "learning_rate": 5.097526012119881e-06, "loss": 0.3004, "step": 14833 }, { "epoch": 0.5090597117364447, "grad_norm": 0.7139643658913042, "learning_rate": 5.096970378229621e-06, "loss": 0.2509, "step": 14834 }, { "epoch": 0.5090940288263556, "grad_norm": 0.7872761852717551, "learning_rate": 5.0964147431414035e-06, "loss": 0.2781, "step": 14835 }, { "epoch": 0.5091283459162663, "grad_norm": 0.843814077877887, "learning_rate": 5.095859106862097e-06, "loss": 0.2693, "step": 14836 }, { "epoch": 0.5091626630061771, "grad_norm": 0.6420685755079693, "learning_rate": 5.09530346939856e-06, "loss": 0.2654, "step": 14837 }, { "epoch": 0.5091969800960878, "grad_norm": 0.7284568822393307, "learning_rate": 5.0947478307576606e-06, "loss": 0.3087, "step": 14838 }, { "epoch": 0.5092312971859986, "grad_norm": 0.733312587821165, "learning_rate": 5.094192190946263e-06, "loss": 0.322, "step": 14839 }, { "epoch": 0.5092656142759094, "grad_norm": 0.8891538969959919, "learning_rate": 5.09363654997123e-06, "loss": 0.2778, "step": 14840 }, { "epoch": 0.5092999313658202, "grad_norm": 0.793447733350275, "learning_rate": 5.0930809078394265e-06, "loss": 0.3115, "step": 14841 }, { "epoch": 0.5093342484557309, "grad_norm": 0.7600898356144854, "learning_rate": 5.092525264557718e-06, "loss": 0.328, "step": 14842 }, { "epoch": 0.5093685655456417, "grad_norm": 0.8202111624389363, "learning_rate": 5.091969620132967e-06, "loss": 0.3433, "step": 14843 }, { "epoch": 0.5094028826355526, "grad_norm": 0.8441538366113909, "learning_rate": 5.091413974572038e-06, "loss": 0.3138, "step": 14844 }, { "epoch": 0.5094371997254633, "grad_norm": 0.7784248821710902, "learning_rate": 5.090858327881797e-06, "loss": 0.253, "step": 14845 }, { "epoch": 0.5094715168153741, "grad_norm": 0.7867573576514585, "learning_rate": 5.090302680069106e-06, "loss": 0.2929, "step": 14846 }, { "epoch": 0.5095058339052848, "grad_norm": 0.7679002951742586, "learning_rate": 5.089747031140832e-06, "loss": 0.2619, "step": 14847 }, { "epoch": 0.5095401509951956, "grad_norm": 0.7033300104252246, "learning_rate": 5.089191381103836e-06, "loss": 0.3057, "step": 14848 }, { "epoch": 0.5095744680851064, "grad_norm": 0.6716091668078111, "learning_rate": 5.088635729964988e-06, "loss": 0.2893, "step": 14849 }, { "epoch": 0.5096087851750172, "grad_norm": 0.8171309998909295, "learning_rate": 5.088080077731147e-06, "loss": 0.3004, "step": 14850 }, { "epoch": 0.5096431022649279, "grad_norm": 0.8033513218545146, "learning_rate": 5.087524424409178e-06, "loss": 0.2749, "step": 14851 }, { "epoch": 0.5096774193548387, "grad_norm": 0.6882479240300826, "learning_rate": 5.086968770005948e-06, "loss": 0.2559, "step": 14852 }, { "epoch": 0.5097117364447494, "grad_norm": 0.8452674045974088, "learning_rate": 5.08641311452832e-06, "loss": 0.3844, "step": 14853 }, { "epoch": 0.5097460535346603, "grad_norm": 0.7784519307596309, "learning_rate": 5.085857457983159e-06, "loss": 0.2535, "step": 14854 }, { "epoch": 0.509780370624571, "grad_norm": 0.8235062989576375, "learning_rate": 5.08530180037733e-06, "loss": 0.2967, "step": 14855 }, { "epoch": 0.5098146877144818, "grad_norm": 0.7289495223837309, "learning_rate": 5.0847461417176954e-06, "loss": 0.2954, "step": 14856 }, { "epoch": 0.5098490048043925, "grad_norm": 0.789345726190304, "learning_rate": 5.084190482011121e-06, "loss": 0.246, "step": 14857 }, { "epoch": 0.5098833218943034, "grad_norm": 0.6632318140110476, "learning_rate": 5.083634821264472e-06, "loss": 0.2663, "step": 14858 }, { "epoch": 0.5099176389842142, "grad_norm": 0.7738668948141355, "learning_rate": 5.083079159484613e-06, "loss": 0.3408, "step": 14859 }, { "epoch": 0.5099519560741249, "grad_norm": 0.7362596928842436, "learning_rate": 5.082523496678406e-06, "loss": 0.2724, "step": 14860 }, { "epoch": 0.5099862731640357, "grad_norm": 0.7555570975106916, "learning_rate": 5.081967832852717e-06, "loss": 0.3147, "step": 14861 }, { "epoch": 0.5100205902539464, "grad_norm": 0.7647270058735012, "learning_rate": 5.081412168014414e-06, "loss": 0.3178, "step": 14862 }, { "epoch": 0.5100549073438573, "grad_norm": 0.7409589382558354, "learning_rate": 5.080856502170357e-06, "loss": 0.2256, "step": 14863 }, { "epoch": 0.510089224433768, "grad_norm": 0.8180861274132956, "learning_rate": 5.080300835327412e-06, "loss": 0.3101, "step": 14864 }, { "epoch": 0.5101235415236788, "grad_norm": 0.7808238257814202, "learning_rate": 5.0797451674924445e-06, "loss": 0.299, "step": 14865 }, { "epoch": 0.5101578586135895, "grad_norm": 0.7985515584053337, "learning_rate": 5.079189498672317e-06, "loss": 0.2696, "step": 14866 }, { "epoch": 0.5101921757035004, "grad_norm": 0.7081676489491817, "learning_rate": 5.078633828873897e-06, "loss": 0.2595, "step": 14867 }, { "epoch": 0.5102264927934111, "grad_norm": 0.734449987600238, "learning_rate": 5.078078158104047e-06, "loss": 0.2745, "step": 14868 }, { "epoch": 0.5102608098833219, "grad_norm": 0.7126232428311655, "learning_rate": 5.077522486369633e-06, "loss": 0.283, "step": 14869 }, { "epoch": 0.5102951269732326, "grad_norm": 0.7124791310615188, "learning_rate": 5.0769668136775176e-06, "loss": 0.3543, "step": 14870 }, { "epoch": 0.5103294440631434, "grad_norm": 0.7432873647075251, "learning_rate": 5.0764111400345695e-06, "loss": 0.2949, "step": 14871 }, { "epoch": 0.5103637611530543, "grad_norm": 0.7587512682477998, "learning_rate": 5.075855465447649e-06, "loss": 0.2895, "step": 14872 }, { "epoch": 0.510398078242965, "grad_norm": 0.7456333952864949, "learning_rate": 5.075299789923622e-06, "loss": 0.2845, "step": 14873 }, { "epoch": 0.5104323953328758, "grad_norm": 0.7071663714550043, "learning_rate": 5.0747441134693555e-06, "loss": 0.2772, "step": 14874 }, { "epoch": 0.5104667124227865, "grad_norm": 0.7688152363827274, "learning_rate": 5.074188436091711e-06, "loss": 0.265, "step": 14875 }, { "epoch": 0.5105010295126973, "grad_norm": 0.7716991625260986, "learning_rate": 5.073632757797556e-06, "loss": 0.274, "step": 14876 }, { "epoch": 0.5105353466026081, "grad_norm": 0.7308658746848004, "learning_rate": 5.073077078593753e-06, "loss": 0.3089, "step": 14877 }, { "epoch": 0.5105696636925189, "grad_norm": 0.7828682174194111, "learning_rate": 5.07252139848717e-06, "loss": 0.3003, "step": 14878 }, { "epoch": 0.5106039807824296, "grad_norm": 0.7585531576027383, "learning_rate": 5.07196571748467e-06, "loss": 0.2562, "step": 14879 }, { "epoch": 0.5106382978723404, "grad_norm": 0.7781439001654904, "learning_rate": 5.071410035593115e-06, "loss": 0.2601, "step": 14880 }, { "epoch": 0.5106726149622512, "grad_norm": 0.7897463064641718, "learning_rate": 5.070854352819374e-06, "loss": 0.3004, "step": 14881 }, { "epoch": 0.510706932052162, "grad_norm": 0.8063020235182392, "learning_rate": 5.070298669170309e-06, "loss": 0.2384, "step": 14882 }, { "epoch": 0.5107412491420728, "grad_norm": 0.8318027717345527, "learning_rate": 5.0697429846527846e-06, "loss": 0.3038, "step": 14883 }, { "epoch": 0.5107755662319835, "grad_norm": 0.8628545457867984, "learning_rate": 5.06918729927367e-06, "loss": 0.2733, "step": 14884 }, { "epoch": 0.5108098833218943, "grad_norm": 0.7293787975599856, "learning_rate": 5.068631613039825e-06, "loss": 0.2432, "step": 14885 }, { "epoch": 0.5108442004118051, "grad_norm": 0.7320176387066853, "learning_rate": 5.068075925958117e-06, "loss": 0.3189, "step": 14886 }, { "epoch": 0.5108785175017159, "grad_norm": 0.7268638915684381, "learning_rate": 5.067520238035411e-06, "loss": 0.2922, "step": 14887 }, { "epoch": 0.5109128345916266, "grad_norm": 0.7512823290575995, "learning_rate": 5.0669645492785705e-06, "loss": 0.2845, "step": 14888 }, { "epoch": 0.5109471516815374, "grad_norm": 0.8880144551538826, "learning_rate": 5.066408859694461e-06, "loss": 0.2865, "step": 14889 }, { "epoch": 0.5109814687714482, "grad_norm": 0.6874402572956554, "learning_rate": 5.065853169289947e-06, "loss": 0.2838, "step": 14890 }, { "epoch": 0.511015785861359, "grad_norm": 0.7515761676932707, "learning_rate": 5.0652974780718944e-06, "loss": 0.2939, "step": 14891 }, { "epoch": 0.5110501029512697, "grad_norm": 0.7892801621015179, "learning_rate": 5.064741786047167e-06, "loss": 0.2935, "step": 14892 }, { "epoch": 0.5110844200411805, "grad_norm": 0.8236011067351912, "learning_rate": 5.064186093222631e-06, "loss": 0.2611, "step": 14893 }, { "epoch": 0.5111187371310912, "grad_norm": 0.8820401658622179, "learning_rate": 5.063630399605152e-06, "loss": 0.3762, "step": 14894 }, { "epoch": 0.5111530542210021, "grad_norm": 0.724005296019067, "learning_rate": 5.063074705201591e-06, "loss": 0.2828, "step": 14895 }, { "epoch": 0.5111873713109129, "grad_norm": 0.8222830005212534, "learning_rate": 5.062519010018817e-06, "loss": 0.3418, "step": 14896 }, { "epoch": 0.5112216884008236, "grad_norm": 0.6839130963147976, "learning_rate": 5.061963314063692e-06, "loss": 0.2546, "step": 14897 }, { "epoch": 0.5112560054907344, "grad_norm": 0.7518421945545595, "learning_rate": 5.061407617343085e-06, "loss": 0.3054, "step": 14898 }, { "epoch": 0.5112903225806451, "grad_norm": 0.8050370148282253, "learning_rate": 5.060851919863856e-06, "loss": 0.2266, "step": 14899 }, { "epoch": 0.511324639670556, "grad_norm": 0.7263064838940518, "learning_rate": 5.060296221632875e-06, "loss": 0.2326, "step": 14900 }, { "epoch": 0.5113589567604667, "grad_norm": 0.7173109272436886, "learning_rate": 5.059740522657003e-06, "loss": 0.2396, "step": 14901 }, { "epoch": 0.5113932738503775, "grad_norm": 0.9528357066740619, "learning_rate": 5.059184822943106e-06, "loss": 0.3081, "step": 14902 }, { "epoch": 0.5114275909402882, "grad_norm": 0.889724991504842, "learning_rate": 5.058629122498051e-06, "loss": 0.2808, "step": 14903 }, { "epoch": 0.5114619080301991, "grad_norm": 0.7851835981961175, "learning_rate": 5.058073421328701e-06, "loss": 0.2818, "step": 14904 }, { "epoch": 0.5114962251201098, "grad_norm": 0.7447334430423114, "learning_rate": 5.05751771944192e-06, "loss": 0.2656, "step": 14905 }, { "epoch": 0.5115305422100206, "grad_norm": 0.7238128197709592, "learning_rate": 5.056962016844577e-06, "loss": 0.2804, "step": 14906 }, { "epoch": 0.5115648592999313, "grad_norm": 0.8063961318485203, "learning_rate": 5.056406313543533e-06, "loss": 0.2748, "step": 14907 }, { "epoch": 0.5115991763898421, "grad_norm": 0.8413388590906893, "learning_rate": 5.055850609545655e-06, "loss": 0.2485, "step": 14908 }, { "epoch": 0.511633493479753, "grad_norm": 0.8194109744780517, "learning_rate": 5.055294904857808e-06, "loss": 0.2664, "step": 14909 }, { "epoch": 0.5116678105696637, "grad_norm": 0.7861123289830854, "learning_rate": 5.054739199486859e-06, "loss": 0.3234, "step": 14910 }, { "epoch": 0.5117021276595745, "grad_norm": 0.8076956379549585, "learning_rate": 5.0541834934396685e-06, "loss": 0.3069, "step": 14911 }, { "epoch": 0.5117364447494852, "grad_norm": 0.7520061973593507, "learning_rate": 5.053627786723104e-06, "loss": 0.2829, "step": 14912 }, { "epoch": 0.5117707618393961, "grad_norm": 0.7254851580890729, "learning_rate": 5.053072079344032e-06, "loss": 0.2441, "step": 14913 }, { "epoch": 0.5118050789293068, "grad_norm": 0.7245219454630775, "learning_rate": 5.052516371309315e-06, "loss": 0.2342, "step": 14914 }, { "epoch": 0.5118393960192176, "grad_norm": 0.7483137907609355, "learning_rate": 5.05196066262582e-06, "loss": 0.2745, "step": 14915 }, { "epoch": 0.5118737131091283, "grad_norm": 0.6914290721962176, "learning_rate": 5.051404953300413e-06, "loss": 0.2854, "step": 14916 }, { "epoch": 0.5119080301990391, "grad_norm": 0.6741124639798441, "learning_rate": 5.050849243339957e-06, "loss": 0.242, "step": 14917 }, { "epoch": 0.5119423472889499, "grad_norm": 0.7564380520336104, "learning_rate": 5.050293532751317e-06, "loss": 0.2851, "step": 14918 }, { "epoch": 0.5119766643788607, "grad_norm": 0.6979269982693951, "learning_rate": 5.049737821541359e-06, "loss": 0.2524, "step": 14919 }, { "epoch": 0.5120109814687714, "grad_norm": 0.7302929305725873, "learning_rate": 5.049182109716949e-06, "loss": 0.308, "step": 14920 }, { "epoch": 0.5120452985586822, "grad_norm": 0.714545103385547, "learning_rate": 5.0486263972849505e-06, "loss": 0.2892, "step": 14921 }, { "epoch": 0.512079615648593, "grad_norm": 0.8365411111967542, "learning_rate": 5.048070684252231e-06, "loss": 0.2609, "step": 14922 }, { "epoch": 0.5121139327385038, "grad_norm": 0.7603347599386693, "learning_rate": 5.0475149706256535e-06, "loss": 0.2837, "step": 14923 }, { "epoch": 0.5121482498284146, "grad_norm": 0.7506741666343038, "learning_rate": 5.046959256412084e-06, "loss": 0.3072, "step": 14924 }, { "epoch": 0.5121825669183253, "grad_norm": 0.7069385774458135, "learning_rate": 5.046403541618386e-06, "loss": 0.2504, "step": 14925 }, { "epoch": 0.5122168840082361, "grad_norm": 0.7633990692403105, "learning_rate": 5.0458478262514295e-06, "loss": 0.3151, "step": 14926 }, { "epoch": 0.5122512010981469, "grad_norm": 0.713691458964682, "learning_rate": 5.045292110318074e-06, "loss": 0.2872, "step": 14927 }, { "epoch": 0.5122855181880577, "grad_norm": 0.7487350816278968, "learning_rate": 5.044736393825188e-06, "loss": 0.2993, "step": 14928 }, { "epoch": 0.5123198352779684, "grad_norm": 0.7423406480141571, "learning_rate": 5.044180676779637e-06, "loss": 0.2654, "step": 14929 }, { "epoch": 0.5123541523678792, "grad_norm": 0.9023152531966334, "learning_rate": 5.0436249591882835e-06, "loss": 0.3135, "step": 14930 }, { "epoch": 0.5123884694577899, "grad_norm": 0.8792650475645666, "learning_rate": 5.043069241057995e-06, "loss": 0.2744, "step": 14931 }, { "epoch": 0.5124227865477008, "grad_norm": 0.8861804493427106, "learning_rate": 5.042513522395638e-06, "loss": 0.3214, "step": 14932 }, { "epoch": 0.5124571036376115, "grad_norm": 0.7522509683129605, "learning_rate": 5.041957803208075e-06, "loss": 0.2964, "step": 14933 }, { "epoch": 0.5124914207275223, "grad_norm": 0.7267367706679217, "learning_rate": 5.041402083502172e-06, "loss": 0.2655, "step": 14934 }, { "epoch": 0.512525737817433, "grad_norm": 0.8404813804853093, "learning_rate": 5.0408463632847935e-06, "loss": 0.2707, "step": 14935 }, { "epoch": 0.5125600549073439, "grad_norm": 0.8303379332321309, "learning_rate": 5.040290642562806e-06, "loss": 0.2657, "step": 14936 }, { "epoch": 0.5125943719972547, "grad_norm": 0.8080064549785833, "learning_rate": 5.039734921343076e-06, "loss": 0.2586, "step": 14937 }, { "epoch": 0.5126286890871654, "grad_norm": 0.7980233765231264, "learning_rate": 5.0391791996324655e-06, "loss": 0.2627, "step": 14938 }, { "epoch": 0.5126630061770762, "grad_norm": 0.6562565262221223, "learning_rate": 5.038623477437844e-06, "loss": 0.258, "step": 14939 }, { "epoch": 0.5126973232669869, "grad_norm": 0.744160057447033, "learning_rate": 5.038067754766074e-06, "loss": 0.2651, "step": 14940 }, { "epoch": 0.5127316403568978, "grad_norm": 0.7374866861378419, "learning_rate": 5.03751203162402e-06, "loss": 0.2769, "step": 14941 }, { "epoch": 0.5127659574468085, "grad_norm": 0.6898200894160561, "learning_rate": 5.036956308018549e-06, "loss": 0.3059, "step": 14942 }, { "epoch": 0.5128002745367193, "grad_norm": 0.7838815657971447, "learning_rate": 5.036400583956525e-06, "loss": 0.3231, "step": 14943 }, { "epoch": 0.51283459162663, "grad_norm": 0.9442270880813035, "learning_rate": 5.035844859444816e-06, "loss": 0.2748, "step": 14944 }, { "epoch": 0.5128689087165408, "grad_norm": 0.7796507041542307, "learning_rate": 5.035289134490285e-06, "loss": 0.3033, "step": 14945 }, { "epoch": 0.5129032258064516, "grad_norm": 0.7644827968489721, "learning_rate": 5.034733409099798e-06, "loss": 0.2506, "step": 14946 }, { "epoch": 0.5129375428963624, "grad_norm": 0.7983638238303633, "learning_rate": 5.03417768328022e-06, "loss": 0.2799, "step": 14947 }, { "epoch": 0.5129718599862731, "grad_norm": 0.9136125466772517, "learning_rate": 5.033621957038417e-06, "loss": 0.3337, "step": 14948 }, { "epoch": 0.5130061770761839, "grad_norm": 0.7198503522445201, "learning_rate": 5.033066230381253e-06, "loss": 0.2556, "step": 14949 }, { "epoch": 0.5130404941660948, "grad_norm": 0.8087896805711429, "learning_rate": 5.0325105033155945e-06, "loss": 0.2597, "step": 14950 }, { "epoch": 0.5130748112560055, "grad_norm": 0.7844938549166897, "learning_rate": 5.031954775848308e-06, "loss": 0.2732, "step": 14951 }, { "epoch": 0.5131091283459163, "grad_norm": 0.7953667036383232, "learning_rate": 5.031399047986257e-06, "loss": 0.3594, "step": 14952 }, { "epoch": 0.513143445435827, "grad_norm": 0.7511896278613024, "learning_rate": 5.030843319736305e-06, "loss": 0.2742, "step": 14953 }, { "epoch": 0.5131777625257378, "grad_norm": 0.7746772662100988, "learning_rate": 5.030287591105322e-06, "loss": 0.2473, "step": 14954 }, { "epoch": 0.5132120796156486, "grad_norm": 0.8452713107105554, "learning_rate": 5.029731862100171e-06, "loss": 0.2917, "step": 14955 }, { "epoch": 0.5132463967055594, "grad_norm": 0.7768156662403475, "learning_rate": 5.029176132727717e-06, "loss": 0.2657, "step": 14956 }, { "epoch": 0.5132807137954701, "grad_norm": 0.7899775632750666, "learning_rate": 5.028620402994826e-06, "loss": 0.3094, "step": 14957 }, { "epoch": 0.5133150308853809, "grad_norm": 0.8207801486550006, "learning_rate": 5.028064672908362e-06, "loss": 0.3288, "step": 14958 }, { "epoch": 0.5133493479752917, "grad_norm": 0.8081905745978274, "learning_rate": 5.027508942475193e-06, "loss": 0.227, "step": 14959 }, { "epoch": 0.5133836650652025, "grad_norm": 0.8008055103544941, "learning_rate": 5.026953211702184e-06, "loss": 0.2835, "step": 14960 }, { "epoch": 0.5134179821551133, "grad_norm": 0.7509624763796161, "learning_rate": 5.026397480596199e-06, "loss": 0.2588, "step": 14961 }, { "epoch": 0.513452299245024, "grad_norm": 0.7080442806998123, "learning_rate": 5.025841749164103e-06, "loss": 0.2631, "step": 14962 }, { "epoch": 0.5134866163349348, "grad_norm": 0.7605388697721621, "learning_rate": 5.025286017412763e-06, "loss": 0.3012, "step": 14963 }, { "epoch": 0.5135209334248456, "grad_norm": 0.7810219377252675, "learning_rate": 5.024730285349043e-06, "loss": 0.2886, "step": 14964 }, { "epoch": 0.5135552505147564, "grad_norm": 0.9248349264752688, "learning_rate": 5.0241745529798106e-06, "loss": 0.3134, "step": 14965 }, { "epoch": 0.5135895676046671, "grad_norm": 0.7414922020311889, "learning_rate": 5.023618820311927e-06, "loss": 0.2858, "step": 14966 }, { "epoch": 0.5136238846945779, "grad_norm": 0.7291022381309764, "learning_rate": 5.023063087352265e-06, "loss": 0.3211, "step": 14967 }, { "epoch": 0.5136582017844886, "grad_norm": 0.7856503274614363, "learning_rate": 5.022507354107682e-06, "loss": 0.2538, "step": 14968 }, { "epoch": 0.5136925188743995, "grad_norm": 0.847703848340854, "learning_rate": 5.021951620585047e-06, "loss": 0.2663, "step": 14969 }, { "epoch": 0.5137268359643102, "grad_norm": 0.7568996578599554, "learning_rate": 5.021395886791226e-06, "loss": 0.2738, "step": 14970 }, { "epoch": 0.513761153054221, "grad_norm": 0.6785768304689399, "learning_rate": 5.020840152733083e-06, "loss": 0.2229, "step": 14971 }, { "epoch": 0.5137954701441317, "grad_norm": 0.7276674666432189, "learning_rate": 5.020284418417485e-06, "loss": 0.2646, "step": 14972 }, { "epoch": 0.5138297872340426, "grad_norm": 0.7355068257739221, "learning_rate": 5.019728683851296e-06, "loss": 0.2623, "step": 14973 }, { "epoch": 0.5138641043239534, "grad_norm": 0.892173707112567, "learning_rate": 5.0191729490413845e-06, "loss": 0.2603, "step": 14974 }, { "epoch": 0.5138984214138641, "grad_norm": 0.8177186774925714, "learning_rate": 5.018617213994611e-06, "loss": 0.288, "step": 14975 }, { "epoch": 0.5139327385037749, "grad_norm": 0.7564397383456549, "learning_rate": 5.018061478717844e-06, "loss": 0.2961, "step": 14976 }, { "epoch": 0.5139670555936856, "grad_norm": 0.8471330217462615, "learning_rate": 5.01750574321795e-06, "loss": 0.3161, "step": 14977 }, { "epoch": 0.5140013726835965, "grad_norm": 0.7644121363547138, "learning_rate": 5.016950007501791e-06, "loss": 0.3137, "step": 14978 }, { "epoch": 0.5140356897735072, "grad_norm": 0.8613753355216945, "learning_rate": 5.016394271576235e-06, "loss": 0.2559, "step": 14979 }, { "epoch": 0.514070006863418, "grad_norm": 0.7913100514938557, "learning_rate": 5.0158385354481465e-06, "loss": 0.2556, "step": 14980 }, { "epoch": 0.5141043239533287, "grad_norm": 0.8248124468842215, "learning_rate": 5.015282799124393e-06, "loss": 0.2481, "step": 14981 }, { "epoch": 0.5141386410432396, "grad_norm": 0.7997163148044636, "learning_rate": 5.014727062611836e-06, "loss": 0.2361, "step": 14982 }, { "epoch": 0.5141729581331503, "grad_norm": 0.8106970539408022, "learning_rate": 5.014171325917345e-06, "loss": 0.3285, "step": 14983 }, { "epoch": 0.5142072752230611, "grad_norm": 0.7423457361009255, "learning_rate": 5.013615589047783e-06, "loss": 0.3061, "step": 14984 }, { "epoch": 0.5142415923129718, "grad_norm": 0.6390738003343998, "learning_rate": 5.013059852010017e-06, "loss": 0.2341, "step": 14985 }, { "epoch": 0.5142759094028826, "grad_norm": 0.8214802626243901, "learning_rate": 5.012504114810911e-06, "loss": 0.2385, "step": 14986 }, { "epoch": 0.5143102264927935, "grad_norm": 0.874700172089423, "learning_rate": 5.011948377457332e-06, "loss": 0.2961, "step": 14987 }, { "epoch": 0.5143445435827042, "grad_norm": 0.7846814210371879, "learning_rate": 5.0113926399561444e-06, "loss": 0.3034, "step": 14988 }, { "epoch": 0.514378860672615, "grad_norm": 0.70635215422116, "learning_rate": 5.010836902314213e-06, "loss": 0.2893, "step": 14989 }, { "epoch": 0.5144131777625257, "grad_norm": 0.7608749470873275, "learning_rate": 5.010281164538406e-06, "loss": 0.259, "step": 14990 }, { "epoch": 0.5144474948524365, "grad_norm": 0.7551716670983974, "learning_rate": 5.009725426635587e-06, "loss": 0.3057, "step": 14991 }, { "epoch": 0.5144818119423473, "grad_norm": 0.6469079177578583, "learning_rate": 5.00916968861262e-06, "loss": 0.2717, "step": 14992 }, { "epoch": 0.5145161290322581, "grad_norm": 0.7366651062336951, "learning_rate": 5.0086139504763745e-06, "loss": 0.2489, "step": 14993 }, { "epoch": 0.5145504461221688, "grad_norm": 0.7539698068910028, "learning_rate": 5.008058212233712e-06, "loss": 0.3008, "step": 14994 }, { "epoch": 0.5145847632120796, "grad_norm": 0.9283867810563978, "learning_rate": 5.007502473891499e-06, "loss": 0.2732, "step": 14995 }, { "epoch": 0.5146190803019904, "grad_norm": 0.8126775898618218, "learning_rate": 5.006946735456603e-06, "loss": 0.2764, "step": 14996 }, { "epoch": 0.5146533973919012, "grad_norm": 0.7130620266677915, "learning_rate": 5.006390996935889e-06, "loss": 0.2257, "step": 14997 }, { "epoch": 0.5146877144818119, "grad_norm": 0.7515354827065318, "learning_rate": 5.005835258336221e-06, "loss": 0.3331, "step": 14998 }, { "epoch": 0.5147220315717227, "grad_norm": 0.7181986761080581, "learning_rate": 5.005279519664465e-06, "loss": 0.2438, "step": 14999 }, { "epoch": 0.5147563486616334, "grad_norm": 0.7380161099043194, "learning_rate": 5.004723780927486e-06, "loss": 0.2915, "step": 15000 }, { "epoch": 0.5147906657515443, "grad_norm": 0.724760831319285, "learning_rate": 5.004168042132152e-06, "loss": 0.2403, "step": 15001 }, { "epoch": 0.5148249828414551, "grad_norm": 0.7457868208378181, "learning_rate": 5.003612303285323e-06, "loss": 0.3102, "step": 15002 }, { "epoch": 0.5148592999313658, "grad_norm": 0.6624975953231655, "learning_rate": 5.003056564393872e-06, "loss": 0.2653, "step": 15003 }, { "epoch": 0.5148936170212766, "grad_norm": 0.7634889200591103, "learning_rate": 5.002500825464659e-06, "loss": 0.268, "step": 15004 }, { "epoch": 0.5149279341111874, "grad_norm": 0.7768807687438972, "learning_rate": 5.001945086504552e-06, "loss": 0.254, "step": 15005 }, { "epoch": 0.5149622512010982, "grad_norm": 0.6654367622994214, "learning_rate": 5.001389347520416e-06, "loss": 0.2259, "step": 15006 }, { "epoch": 0.5149965682910089, "grad_norm": 0.7290317720119365, "learning_rate": 5.0008336085191156e-06, "loss": 0.2325, "step": 15007 }, { "epoch": 0.5150308853809197, "grad_norm": 0.8214845906562793, "learning_rate": 5.000277869507516e-06, "loss": 0.3061, "step": 15008 }, { "epoch": 0.5150652024708304, "grad_norm": 0.7515633842574707, "learning_rate": 4.999722130492485e-06, "loss": 0.2741, "step": 15009 }, { "epoch": 0.5150995195607413, "grad_norm": 0.7460846574535309, "learning_rate": 4.999166391480886e-06, "loss": 0.3257, "step": 15010 }, { "epoch": 0.515133836650652, "grad_norm": 0.933512105259811, "learning_rate": 4.998610652479585e-06, "loss": 0.3069, "step": 15011 }, { "epoch": 0.5151681537405628, "grad_norm": 0.8575309759632597, "learning_rate": 4.99805491349545e-06, "loss": 0.3227, "step": 15012 }, { "epoch": 0.5152024708304735, "grad_norm": 0.7023788527687604, "learning_rate": 4.997499174535341e-06, "loss": 0.2776, "step": 15013 }, { "epoch": 0.5152367879203843, "grad_norm": 0.7756016639371534, "learning_rate": 4.996943435606129e-06, "loss": 0.3329, "step": 15014 }, { "epoch": 0.5152711050102952, "grad_norm": 0.7443233140229394, "learning_rate": 4.996387696714678e-06, "loss": 0.3181, "step": 15015 }, { "epoch": 0.5153054221002059, "grad_norm": 0.8370920951144719, "learning_rate": 4.99583195786785e-06, "loss": 0.3048, "step": 15016 }, { "epoch": 0.5153397391901167, "grad_norm": 0.7289405923831993, "learning_rate": 4.995276219072515e-06, "loss": 0.2533, "step": 15017 }, { "epoch": 0.5153740562800274, "grad_norm": 0.7509756085312507, "learning_rate": 4.994720480335537e-06, "loss": 0.2663, "step": 15018 }, { "epoch": 0.5154083733699383, "grad_norm": 0.7160638053519579, "learning_rate": 4.99416474166378e-06, "loss": 0.3065, "step": 15019 }, { "epoch": 0.515442690459849, "grad_norm": 0.7777053117049387, "learning_rate": 4.9936090030641124e-06, "loss": 0.2642, "step": 15020 }, { "epoch": 0.5154770075497598, "grad_norm": 0.8437980643053699, "learning_rate": 4.993053264543398e-06, "loss": 0.294, "step": 15021 }, { "epoch": 0.5155113246396705, "grad_norm": 0.8310335615034171, "learning_rate": 4.992497526108501e-06, "loss": 0.2941, "step": 15022 }, { "epoch": 0.5155456417295813, "grad_norm": 0.8482761000297242, "learning_rate": 4.99194178776629e-06, "loss": 0.3023, "step": 15023 }, { "epoch": 0.5155799588194921, "grad_norm": 0.7163627045614409, "learning_rate": 4.991386049523628e-06, "loss": 0.2813, "step": 15024 }, { "epoch": 0.5156142759094029, "grad_norm": 0.7614554298923936, "learning_rate": 4.9908303113873804e-06, "loss": 0.2823, "step": 15025 }, { "epoch": 0.5156485929993136, "grad_norm": 0.7926682661421289, "learning_rate": 4.990274573364415e-06, "loss": 0.2626, "step": 15026 }, { "epoch": 0.5156829100892244, "grad_norm": 0.8567665615153605, "learning_rate": 4.989718835461596e-06, "loss": 0.305, "step": 15027 }, { "epoch": 0.5157172271791353, "grad_norm": 1.029371792474436, "learning_rate": 4.989163097685787e-06, "loss": 0.317, "step": 15028 }, { "epoch": 0.515751544269046, "grad_norm": 0.7353094129693347, "learning_rate": 4.988607360043858e-06, "loss": 0.2697, "step": 15029 }, { "epoch": 0.5157858613589568, "grad_norm": 0.9182150483673167, "learning_rate": 4.988051622542669e-06, "loss": 0.3457, "step": 15030 }, { "epoch": 0.5158201784488675, "grad_norm": 0.8007577670119959, "learning_rate": 4.98749588518909e-06, "loss": 0.2845, "step": 15031 }, { "epoch": 0.5158544955387783, "grad_norm": 0.8544191955144506, "learning_rate": 4.986940147989985e-06, "loss": 0.3111, "step": 15032 }, { "epoch": 0.5158888126286891, "grad_norm": 0.7715578722829803, "learning_rate": 4.9863844109522186e-06, "loss": 0.2789, "step": 15033 }, { "epoch": 0.5159231297185999, "grad_norm": 0.706788292549454, "learning_rate": 4.985828674082655e-06, "loss": 0.2925, "step": 15034 }, { "epoch": 0.5159574468085106, "grad_norm": 0.7933537955052193, "learning_rate": 4.9852729373881655e-06, "loss": 0.2528, "step": 15035 }, { "epoch": 0.5159917638984214, "grad_norm": 0.7653867271652658, "learning_rate": 4.984717200875611e-06, "loss": 0.2404, "step": 15036 }, { "epoch": 0.5160260809883321, "grad_norm": 0.7633116432608985, "learning_rate": 4.984161464551854e-06, "loss": 0.2913, "step": 15037 }, { "epoch": 0.516060398078243, "grad_norm": 0.851251476800774, "learning_rate": 4.983605728423766e-06, "loss": 0.2715, "step": 15038 }, { "epoch": 0.5160947151681538, "grad_norm": 0.7306341890603265, "learning_rate": 4.983049992498211e-06, "loss": 0.2782, "step": 15039 }, { "epoch": 0.5161290322580645, "grad_norm": 0.813777734166509, "learning_rate": 4.982494256782052e-06, "loss": 0.3347, "step": 15040 }, { "epoch": 0.5161633493479753, "grad_norm": 0.8001372667897257, "learning_rate": 4.981938521282158e-06, "loss": 0.2594, "step": 15041 }, { "epoch": 0.5161976664378861, "grad_norm": 0.7521383302966428, "learning_rate": 4.981382786005389e-06, "loss": 0.2947, "step": 15042 }, { "epoch": 0.5162319835277969, "grad_norm": 0.7058713631342491, "learning_rate": 4.980827050958618e-06, "loss": 0.2353, "step": 15043 }, { "epoch": 0.5162663006177076, "grad_norm": 0.7771260834275682, "learning_rate": 4.980271316148705e-06, "loss": 0.2801, "step": 15044 }, { "epoch": 0.5163006177076184, "grad_norm": 0.8859551007105677, "learning_rate": 4.979715581582515e-06, "loss": 0.2927, "step": 15045 }, { "epoch": 0.5163349347975291, "grad_norm": 0.743468209638207, "learning_rate": 4.979159847266918e-06, "loss": 0.2765, "step": 15046 }, { "epoch": 0.51636925188744, "grad_norm": 0.7603179168778675, "learning_rate": 4.978604113208776e-06, "loss": 0.3025, "step": 15047 }, { "epoch": 0.5164035689773507, "grad_norm": 0.7706147824733671, "learning_rate": 4.978048379414953e-06, "loss": 0.2746, "step": 15048 }, { "epoch": 0.5164378860672615, "grad_norm": 0.8048260250253926, "learning_rate": 4.97749264589232e-06, "loss": 0.2593, "step": 15049 }, { "epoch": 0.5164722031571722, "grad_norm": 0.7806156944803634, "learning_rate": 4.976936912647739e-06, "loss": 0.3338, "step": 15050 }, { "epoch": 0.5165065202470831, "grad_norm": 0.8463619313713651, "learning_rate": 4.976381179688073e-06, "loss": 0.304, "step": 15051 }, { "epoch": 0.5165408373369939, "grad_norm": 0.7306181712560164, "learning_rate": 4.975825447020192e-06, "loss": 0.2876, "step": 15052 }, { "epoch": 0.5165751544269046, "grad_norm": 0.7846480235413029, "learning_rate": 4.975269714650958e-06, "loss": 0.277, "step": 15053 }, { "epoch": 0.5166094715168154, "grad_norm": 0.7906000997558087, "learning_rate": 4.974713982587238e-06, "loss": 0.2743, "step": 15054 }, { "epoch": 0.5166437886067261, "grad_norm": 0.8268552197616353, "learning_rate": 4.974158250835898e-06, "loss": 0.2734, "step": 15055 }, { "epoch": 0.516678105696637, "grad_norm": 0.7535200009914163, "learning_rate": 4.973602519403803e-06, "loss": 0.2553, "step": 15056 }, { "epoch": 0.5167124227865477, "grad_norm": 0.7727261214367698, "learning_rate": 4.973046788297816e-06, "loss": 0.2623, "step": 15057 }, { "epoch": 0.5167467398764585, "grad_norm": 0.8018073658040367, "learning_rate": 4.9724910575248075e-06, "loss": 0.337, "step": 15058 }, { "epoch": 0.5167810569663692, "grad_norm": 0.7787211589251384, "learning_rate": 4.97193532709164e-06, "loss": 0.301, "step": 15059 }, { "epoch": 0.51681537405628, "grad_norm": 0.8803009647311776, "learning_rate": 4.971379597005175e-06, "loss": 0.2952, "step": 15060 }, { "epoch": 0.5168496911461908, "grad_norm": 0.8996431635606482, "learning_rate": 4.970823867272284e-06, "loss": 0.2881, "step": 15061 }, { "epoch": 0.5168840082361016, "grad_norm": 0.7667327073684695, "learning_rate": 4.9702681378998315e-06, "loss": 0.3397, "step": 15062 }, { "epoch": 0.5169183253260123, "grad_norm": 0.7288556890694962, "learning_rate": 4.969712408894679e-06, "loss": 0.3307, "step": 15063 }, { "epoch": 0.5169526424159231, "grad_norm": 0.7239082514657101, "learning_rate": 4.969156680263696e-06, "loss": 0.2914, "step": 15064 }, { "epoch": 0.516986959505834, "grad_norm": 0.7626293676401651, "learning_rate": 4.968600952013747e-06, "loss": 0.3359, "step": 15065 }, { "epoch": 0.5170212765957447, "grad_norm": 0.7501036877303172, "learning_rate": 4.968045224151694e-06, "loss": 0.265, "step": 15066 }, { "epoch": 0.5170555936856555, "grad_norm": 0.7867595912653137, "learning_rate": 4.967489496684407e-06, "loss": 0.2619, "step": 15067 }, { "epoch": 0.5170899107755662, "grad_norm": 0.740241249694913, "learning_rate": 4.966933769618749e-06, "loss": 0.3139, "step": 15068 }, { "epoch": 0.517124227865477, "grad_norm": 0.8763866742135096, "learning_rate": 4.966378042961584e-06, "loss": 0.2785, "step": 15069 }, { "epoch": 0.5171585449553878, "grad_norm": 0.7608485110028348, "learning_rate": 4.9658223167197815e-06, "loss": 0.2776, "step": 15070 }, { "epoch": 0.5171928620452986, "grad_norm": 0.7851235169821782, "learning_rate": 4.965266590900204e-06, "loss": 0.2387, "step": 15071 }, { "epoch": 0.5172271791352093, "grad_norm": 0.9090643334125843, "learning_rate": 4.964710865509716e-06, "loss": 0.2694, "step": 15072 }, { "epoch": 0.5172614962251201, "grad_norm": 0.7669252539240812, "learning_rate": 4.964155140555185e-06, "loss": 0.267, "step": 15073 }, { "epoch": 0.5172958133150309, "grad_norm": 1.243792495255763, "learning_rate": 4.963599416043474e-06, "loss": 0.2846, "step": 15074 }, { "epoch": 0.5173301304049417, "grad_norm": 0.7758141121649117, "learning_rate": 4.963043691981452e-06, "loss": 0.2785, "step": 15075 }, { "epoch": 0.5173644474948524, "grad_norm": 0.7518521834890668, "learning_rate": 4.962487968375982e-06, "loss": 0.2399, "step": 15076 }, { "epoch": 0.5173987645847632, "grad_norm": 0.7537231017437194, "learning_rate": 4.961932245233927e-06, "loss": 0.2898, "step": 15077 }, { "epoch": 0.517433081674674, "grad_norm": 0.8484056231630956, "learning_rate": 4.961376522562157e-06, "loss": 0.2693, "step": 15078 }, { "epoch": 0.5174673987645848, "grad_norm": 0.7542124915940233, "learning_rate": 4.960820800367535e-06, "loss": 0.3337, "step": 15079 }, { "epoch": 0.5175017158544956, "grad_norm": 0.7842805517740705, "learning_rate": 4.960265078656924e-06, "loss": 0.2464, "step": 15080 }, { "epoch": 0.5175360329444063, "grad_norm": 0.804523212380476, "learning_rate": 4.9597093574371945e-06, "loss": 0.2335, "step": 15081 }, { "epoch": 0.5175703500343171, "grad_norm": 0.846447045048709, "learning_rate": 4.959153636715207e-06, "loss": 0.3866, "step": 15082 }, { "epoch": 0.5176046671242278, "grad_norm": 0.7617664999779565, "learning_rate": 4.95859791649783e-06, "loss": 0.3025, "step": 15083 }, { "epoch": 0.5176389842141387, "grad_norm": 0.7043320886849888, "learning_rate": 4.958042196791927e-06, "loss": 0.2553, "step": 15084 }, { "epoch": 0.5176733013040494, "grad_norm": 0.7619579437475162, "learning_rate": 4.957486477604364e-06, "loss": 0.293, "step": 15085 }, { "epoch": 0.5177076183939602, "grad_norm": 0.7042604370274362, "learning_rate": 4.956930758942005e-06, "loss": 0.2859, "step": 15086 }, { "epoch": 0.5177419354838709, "grad_norm": 0.8331663071018692, "learning_rate": 4.956375040811717e-06, "loss": 0.2842, "step": 15087 }, { "epoch": 0.5177762525737818, "grad_norm": 0.7651662027670622, "learning_rate": 4.955819323220366e-06, "loss": 0.2587, "step": 15088 }, { "epoch": 0.5178105696636925, "grad_norm": 0.7770874048601244, "learning_rate": 4.9552636061748125e-06, "loss": 0.3361, "step": 15089 }, { "epoch": 0.5178448867536033, "grad_norm": 0.875668643112975, "learning_rate": 4.954707889681928e-06, "loss": 0.2763, "step": 15090 }, { "epoch": 0.517879203843514, "grad_norm": 0.8424173533979659, "learning_rate": 4.954152173748573e-06, "loss": 0.2555, "step": 15091 }, { "epoch": 0.5179135209334248, "grad_norm": 0.787701579703789, "learning_rate": 4.953596458381613e-06, "loss": 0.2818, "step": 15092 }, { "epoch": 0.5179478380233357, "grad_norm": 0.7980428859367571, "learning_rate": 4.953040743587918e-06, "loss": 0.2693, "step": 15093 }, { "epoch": 0.5179821551132464, "grad_norm": 0.7386389226265053, "learning_rate": 4.952485029374349e-06, "loss": 0.2565, "step": 15094 }, { "epoch": 0.5180164722031572, "grad_norm": 0.7749703557333424, "learning_rate": 4.95192931574777e-06, "loss": 0.3668, "step": 15095 }, { "epoch": 0.5180507892930679, "grad_norm": 0.7461009385249964, "learning_rate": 4.95137360271505e-06, "loss": 0.273, "step": 15096 }, { "epoch": 0.5180851063829788, "grad_norm": 0.7290240225806124, "learning_rate": 4.9508178902830526e-06, "loss": 0.2435, "step": 15097 }, { "epoch": 0.5181194234728895, "grad_norm": 0.8090483327719737, "learning_rate": 4.950262178458643e-06, "loss": 0.3296, "step": 15098 }, { "epoch": 0.5181537405628003, "grad_norm": 0.7876564976542003, "learning_rate": 4.949706467248685e-06, "loss": 0.324, "step": 15099 }, { "epoch": 0.518188057652711, "grad_norm": 0.7514366325273739, "learning_rate": 4.949150756660046e-06, "loss": 0.3103, "step": 15100 }, { "epoch": 0.5182223747426218, "grad_norm": 0.7637940389619765, "learning_rate": 4.948595046699588e-06, "loss": 0.2673, "step": 15101 }, { "epoch": 0.5182566918325326, "grad_norm": 0.7386094630907712, "learning_rate": 4.948039337374181e-06, "loss": 0.2979, "step": 15102 }, { "epoch": 0.5182910089224434, "grad_norm": 0.9243102304823708, "learning_rate": 4.947483628690685e-06, "loss": 0.3287, "step": 15103 }, { "epoch": 0.5183253260123541, "grad_norm": 0.9537530154989193, "learning_rate": 4.946927920655969e-06, "loss": 0.3188, "step": 15104 }, { "epoch": 0.5183596431022649, "grad_norm": 0.8058097400081516, "learning_rate": 4.946372213276897e-06, "loss": 0.3156, "step": 15105 }, { "epoch": 0.5183939601921757, "grad_norm": 0.7949207968965808, "learning_rate": 4.945816506560333e-06, "loss": 0.2333, "step": 15106 }, { "epoch": 0.5184282772820865, "grad_norm": 0.6919520015597315, "learning_rate": 4.945260800513143e-06, "loss": 0.2708, "step": 15107 }, { "epoch": 0.5184625943719973, "grad_norm": 0.8563178812573788, "learning_rate": 4.944705095142193e-06, "loss": 0.2957, "step": 15108 }, { "epoch": 0.518496911461908, "grad_norm": 0.8059073084140002, "learning_rate": 4.944149390454345e-06, "loss": 0.2619, "step": 15109 }, { "epoch": 0.5185312285518188, "grad_norm": 0.7548674585856854, "learning_rate": 4.943593686456468e-06, "loss": 0.2779, "step": 15110 }, { "epoch": 0.5185655456417296, "grad_norm": 0.7884383583730307, "learning_rate": 4.943037983155426e-06, "loss": 0.3185, "step": 15111 }, { "epoch": 0.5185998627316404, "grad_norm": 0.7579683164760411, "learning_rate": 4.94248228055808e-06, "loss": 0.2188, "step": 15112 }, { "epoch": 0.5186341798215511, "grad_norm": 0.7649819616864239, "learning_rate": 4.9419265786713e-06, "loss": 0.2848, "step": 15113 }, { "epoch": 0.5186684969114619, "grad_norm": 0.7514369079193586, "learning_rate": 4.941370877501951e-06, "loss": 0.3032, "step": 15114 }, { "epoch": 0.5187028140013726, "grad_norm": 0.8291676103425437, "learning_rate": 4.940815177056894e-06, "loss": 0.2944, "step": 15115 }, { "epoch": 0.5187371310912835, "grad_norm": 0.7640702784340923, "learning_rate": 4.940259477342999e-06, "loss": 0.3462, "step": 15116 }, { "epoch": 0.5187714481811943, "grad_norm": 0.8877921713281665, "learning_rate": 4.939703778367127e-06, "loss": 0.2954, "step": 15117 }, { "epoch": 0.518805765271105, "grad_norm": 0.8186220760337719, "learning_rate": 4.939148080136144e-06, "loss": 0.2768, "step": 15118 }, { "epoch": 0.5188400823610158, "grad_norm": 0.7821199301264083, "learning_rate": 4.938592382656917e-06, "loss": 0.2877, "step": 15119 }, { "epoch": 0.5188743994509266, "grad_norm": 0.7624239919188854, "learning_rate": 4.9380366859363085e-06, "loss": 0.2839, "step": 15120 }, { "epoch": 0.5189087165408374, "grad_norm": 0.7851759011738622, "learning_rate": 4.9374809899811845e-06, "loss": 0.2786, "step": 15121 }, { "epoch": 0.5189430336307481, "grad_norm": 0.704605396648202, "learning_rate": 4.9369252947984105e-06, "loss": 0.2681, "step": 15122 }, { "epoch": 0.5189773507206589, "grad_norm": 0.7531121868676824, "learning_rate": 4.936369600394852e-06, "loss": 0.2893, "step": 15123 }, { "epoch": 0.5190116678105696, "grad_norm": 0.7581308844589698, "learning_rate": 4.935813906777369e-06, "loss": 0.2824, "step": 15124 }, { "epoch": 0.5190459849004805, "grad_norm": 0.6995039070528173, "learning_rate": 4.935258213952834e-06, "loss": 0.2763, "step": 15125 }, { "epoch": 0.5190803019903912, "grad_norm": 0.7284803791733127, "learning_rate": 4.934702521928108e-06, "loss": 0.2858, "step": 15126 }, { "epoch": 0.519114619080302, "grad_norm": 0.9361816007448561, "learning_rate": 4.934146830710054e-06, "loss": 0.3402, "step": 15127 }, { "epoch": 0.5191489361702127, "grad_norm": 0.7642273891127761, "learning_rate": 4.933591140305541e-06, "loss": 0.3109, "step": 15128 }, { "epoch": 0.5191832532601235, "grad_norm": 0.7317983971686078, "learning_rate": 4.933035450721432e-06, "loss": 0.2889, "step": 15129 }, { "epoch": 0.5192175703500344, "grad_norm": 0.7662004880705139, "learning_rate": 4.9324797619645905e-06, "loss": 0.2687, "step": 15130 }, { "epoch": 0.5192518874399451, "grad_norm": 0.7414707533946944, "learning_rate": 4.931924074041884e-06, "loss": 0.2688, "step": 15131 }, { "epoch": 0.5192862045298559, "grad_norm": 0.7306270999802796, "learning_rate": 4.931368386960177e-06, "loss": 0.3113, "step": 15132 }, { "epoch": 0.5193205216197666, "grad_norm": 0.8002879979658206, "learning_rate": 4.930812700726332e-06, "loss": 0.309, "step": 15133 }, { "epoch": 0.5193548387096775, "grad_norm": 0.7586635160232729, "learning_rate": 4.930257015347217e-06, "loss": 0.2919, "step": 15134 }, { "epoch": 0.5193891557995882, "grad_norm": 0.788325125194285, "learning_rate": 4.9297013308296924e-06, "loss": 0.3087, "step": 15135 }, { "epoch": 0.519423472889499, "grad_norm": 0.7661460838082306, "learning_rate": 4.929145647180627e-06, "loss": 0.2462, "step": 15136 }, { "epoch": 0.5194577899794097, "grad_norm": 0.7985894784727777, "learning_rate": 4.928589964406887e-06, "loss": 0.3199, "step": 15137 }, { "epoch": 0.5194921070693205, "grad_norm": 0.6572104361398213, "learning_rate": 4.928034282515331e-06, "loss": 0.3381, "step": 15138 }, { "epoch": 0.5195264241592313, "grad_norm": 0.8182241577285262, "learning_rate": 4.9274786015128315e-06, "loss": 0.3326, "step": 15139 }, { "epoch": 0.5195607412491421, "grad_norm": 0.76372033219768, "learning_rate": 4.926922921406248e-06, "loss": 0.2902, "step": 15140 }, { "epoch": 0.5195950583390528, "grad_norm": 0.6921883968899551, "learning_rate": 4.926367242202445e-06, "loss": 0.2224, "step": 15141 }, { "epoch": 0.5196293754289636, "grad_norm": 0.8169407921818935, "learning_rate": 4.92581156390829e-06, "loss": 0.3277, "step": 15142 }, { "epoch": 0.5196636925188745, "grad_norm": 0.8721083753735848, "learning_rate": 4.925255886530646e-06, "loss": 0.324, "step": 15143 }, { "epoch": 0.5196980096087852, "grad_norm": 0.8008280929095696, "learning_rate": 4.924700210076378e-06, "loss": 0.254, "step": 15144 }, { "epoch": 0.519732326698696, "grad_norm": 0.7643564141863267, "learning_rate": 4.924144534552353e-06, "loss": 0.2551, "step": 15145 }, { "epoch": 0.5197666437886067, "grad_norm": 0.7193810928130765, "learning_rate": 4.923588859965434e-06, "loss": 0.2981, "step": 15146 }, { "epoch": 0.5198009608785175, "grad_norm": 0.79828478522021, "learning_rate": 4.923033186322483e-06, "loss": 0.2746, "step": 15147 }, { "epoch": 0.5198352779684283, "grad_norm": 0.7471478540677845, "learning_rate": 4.9224775136303695e-06, "loss": 0.2626, "step": 15148 }, { "epoch": 0.5198695950583391, "grad_norm": 0.6970393475074034, "learning_rate": 4.921921841895954e-06, "loss": 0.325, "step": 15149 }, { "epoch": 0.5199039121482498, "grad_norm": 0.7768185709781186, "learning_rate": 4.921366171126105e-06, "loss": 0.3261, "step": 15150 }, { "epoch": 0.5199382292381606, "grad_norm": 0.7548933702144736, "learning_rate": 4.9208105013276834e-06, "loss": 0.2678, "step": 15151 }, { "epoch": 0.5199725463280713, "grad_norm": 0.7779347584136244, "learning_rate": 4.920254832507558e-06, "loss": 0.2533, "step": 15152 }, { "epoch": 0.5200068634179822, "grad_norm": 0.7470647665080725, "learning_rate": 4.919699164672589e-06, "loss": 0.2496, "step": 15153 }, { "epoch": 0.5200411805078929, "grad_norm": 0.820387241395235, "learning_rate": 4.919143497829645e-06, "loss": 0.2881, "step": 15154 }, { "epoch": 0.5200754975978037, "grad_norm": 0.7888029111354884, "learning_rate": 4.918587831985588e-06, "loss": 0.2789, "step": 15155 }, { "epoch": 0.5201098146877144, "grad_norm": 0.699135960700243, "learning_rate": 4.918032167147283e-06, "loss": 0.2896, "step": 15156 }, { "epoch": 0.5201441317776253, "grad_norm": 0.6951788808358081, "learning_rate": 4.917476503321595e-06, "loss": 0.2673, "step": 15157 }, { "epoch": 0.5201784488675361, "grad_norm": 0.7716039161483403, "learning_rate": 4.9169208405153896e-06, "loss": 0.2773, "step": 15158 }, { "epoch": 0.5202127659574468, "grad_norm": 0.7078431376075917, "learning_rate": 4.916365178735528e-06, "loss": 0.2746, "step": 15159 }, { "epoch": 0.5202470830473576, "grad_norm": 0.6718959215530984, "learning_rate": 4.91580951798888e-06, "loss": 0.2869, "step": 15160 }, { "epoch": 0.5202814001372683, "grad_norm": 0.8930780014231121, "learning_rate": 4.915253858282307e-06, "loss": 0.2988, "step": 15161 }, { "epoch": 0.5203157172271792, "grad_norm": 0.648098812777296, "learning_rate": 4.914698199622671e-06, "loss": 0.2585, "step": 15162 }, { "epoch": 0.5203500343170899, "grad_norm": 0.7659483708396257, "learning_rate": 4.914142542016843e-06, "loss": 0.2631, "step": 15163 }, { "epoch": 0.5203843514070007, "grad_norm": 0.7572907642304468, "learning_rate": 4.913586885471681e-06, "loss": 0.2972, "step": 15164 }, { "epoch": 0.5204186684969114, "grad_norm": 0.7384087695630455, "learning_rate": 4.913031229994053e-06, "loss": 0.3147, "step": 15165 }, { "epoch": 0.5204529855868223, "grad_norm": 0.7425168440068798, "learning_rate": 4.912475575590823e-06, "loss": 0.2504, "step": 15166 }, { "epoch": 0.520487302676733, "grad_norm": 0.6941845979788583, "learning_rate": 4.911919922268854e-06, "loss": 0.2857, "step": 15167 }, { "epoch": 0.5205216197666438, "grad_norm": 0.7581473825395466, "learning_rate": 4.911364270035014e-06, "loss": 0.2994, "step": 15168 }, { "epoch": 0.5205559368565545, "grad_norm": 0.7488318678859459, "learning_rate": 4.9108086188961644e-06, "loss": 0.2414, "step": 15169 }, { "epoch": 0.5205902539464653, "grad_norm": 0.7271204173641734, "learning_rate": 4.910252968859169e-06, "loss": 0.2473, "step": 15170 }, { "epoch": 0.5206245710363762, "grad_norm": 0.7732614249967948, "learning_rate": 4.909697319930895e-06, "loss": 0.2735, "step": 15171 }, { "epoch": 0.5206588881262869, "grad_norm": 0.8024998135644867, "learning_rate": 4.909141672118205e-06, "loss": 0.2685, "step": 15172 }, { "epoch": 0.5206932052161977, "grad_norm": 0.7683061377151335, "learning_rate": 4.908586025427963e-06, "loss": 0.2405, "step": 15173 }, { "epoch": 0.5207275223061084, "grad_norm": 0.7244097509848979, "learning_rate": 4.908030379867034e-06, "loss": 0.2753, "step": 15174 }, { "epoch": 0.5207618393960192, "grad_norm": 0.8671840634629294, "learning_rate": 4.907474735442284e-06, "loss": 0.2656, "step": 15175 }, { "epoch": 0.52079615648593, "grad_norm": 0.6856456675201562, "learning_rate": 4.9069190921605735e-06, "loss": 0.3049, "step": 15176 }, { "epoch": 0.5208304735758408, "grad_norm": 0.7956479979109422, "learning_rate": 4.9063634500287715e-06, "loss": 0.3511, "step": 15177 }, { "epoch": 0.5208647906657515, "grad_norm": 0.730055374847433, "learning_rate": 4.9058078090537395e-06, "loss": 0.2954, "step": 15178 }, { "epoch": 0.5208991077556623, "grad_norm": 0.7355471407517252, "learning_rate": 4.90525216924234e-06, "loss": 0.3339, "step": 15179 }, { "epoch": 0.5209334248455731, "grad_norm": 0.7485127760000009, "learning_rate": 4.904696530601441e-06, "loss": 0.2699, "step": 15180 }, { "epoch": 0.5209677419354839, "grad_norm": 0.6945356049188817, "learning_rate": 4.904140893137906e-06, "loss": 0.2725, "step": 15181 }, { "epoch": 0.5210020590253946, "grad_norm": 0.8180941123794776, "learning_rate": 4.9035852568585965e-06, "loss": 0.2781, "step": 15182 }, { "epoch": 0.5210363761153054, "grad_norm": 0.787067826742513, "learning_rate": 4.903029621770381e-06, "loss": 0.3518, "step": 15183 }, { "epoch": 0.5210706932052162, "grad_norm": 0.7666790776993695, "learning_rate": 4.9024739878801204e-06, "loss": 0.3024, "step": 15184 }, { "epoch": 0.521105010295127, "grad_norm": 0.7532195175681959, "learning_rate": 4.901918355194679e-06, "loss": 0.2548, "step": 15185 }, { "epoch": 0.5211393273850378, "grad_norm": 0.7693276409218569, "learning_rate": 4.901362723720925e-06, "loss": 0.3422, "step": 15186 }, { "epoch": 0.5211736444749485, "grad_norm": 0.7519083348666447, "learning_rate": 4.9008070934657164e-06, "loss": 0.3211, "step": 15187 }, { "epoch": 0.5212079615648593, "grad_norm": 0.8041997884536634, "learning_rate": 4.900251464435921e-06, "loss": 0.2808, "step": 15188 }, { "epoch": 0.5212422786547701, "grad_norm": 0.7193312540664621, "learning_rate": 4.899695836638403e-06, "loss": 0.2706, "step": 15189 }, { "epoch": 0.5212765957446809, "grad_norm": 0.6477748213774996, "learning_rate": 4.899140210080027e-06, "loss": 0.2595, "step": 15190 }, { "epoch": 0.5213109128345916, "grad_norm": 0.804301656881028, "learning_rate": 4.898584584767654e-06, "loss": 0.2804, "step": 15191 }, { "epoch": 0.5213452299245024, "grad_norm": 0.7193350993356696, "learning_rate": 4.898028960708153e-06, "loss": 0.3035, "step": 15192 }, { "epoch": 0.5213795470144131, "grad_norm": 0.7620711051980441, "learning_rate": 4.897473337908383e-06, "loss": 0.2605, "step": 15193 }, { "epoch": 0.521413864104324, "grad_norm": 0.9059912544773937, "learning_rate": 4.896917716375211e-06, "loss": 0.2695, "step": 15194 }, { "epoch": 0.5214481811942348, "grad_norm": 0.7460852751196243, "learning_rate": 4.8963620961155e-06, "loss": 0.2916, "step": 15195 }, { "epoch": 0.5214824982841455, "grad_norm": 0.7747503578001523, "learning_rate": 4.895806477136116e-06, "loss": 0.2951, "step": 15196 }, { "epoch": 0.5215168153740563, "grad_norm": 0.7260574320219964, "learning_rate": 4.895250859443919e-06, "loss": 0.2666, "step": 15197 }, { "epoch": 0.521551132463967, "grad_norm": 0.8665347667354648, "learning_rate": 4.894695243045778e-06, "loss": 0.322, "step": 15198 }, { "epoch": 0.5215854495538779, "grad_norm": 0.8074066843101423, "learning_rate": 4.894139627948552e-06, "loss": 0.293, "step": 15199 }, { "epoch": 0.5216197666437886, "grad_norm": 0.7229379614551107, "learning_rate": 4.89358401415911e-06, "loss": 0.306, "step": 15200 }, { "epoch": 0.5216540837336994, "grad_norm": 0.8381608029180543, "learning_rate": 4.893028401684313e-06, "loss": 0.2364, "step": 15201 }, { "epoch": 0.5216884008236101, "grad_norm": 0.8212621510380826, "learning_rate": 4.8924727905310235e-06, "loss": 0.2655, "step": 15202 }, { "epoch": 0.521722717913521, "grad_norm": 0.8331033435704185, "learning_rate": 4.891917180706109e-06, "loss": 0.2895, "step": 15203 }, { "epoch": 0.5217570350034317, "grad_norm": 0.8116384191509932, "learning_rate": 4.891361572216432e-06, "loss": 0.3249, "step": 15204 }, { "epoch": 0.5217913520933425, "grad_norm": 0.7767048643114569, "learning_rate": 4.890805965068854e-06, "loss": 0.2742, "step": 15205 }, { "epoch": 0.5218256691832532, "grad_norm": 0.6952580800406942, "learning_rate": 4.890250359270243e-06, "loss": 0.2709, "step": 15206 }, { "epoch": 0.521859986273164, "grad_norm": 0.887028849403569, "learning_rate": 4.889694754827462e-06, "loss": 0.303, "step": 15207 }, { "epoch": 0.5218943033630749, "grad_norm": 0.8199704905632892, "learning_rate": 4.88913915174737e-06, "loss": 0.2257, "step": 15208 }, { "epoch": 0.5219286204529856, "grad_norm": 0.8130579441482473, "learning_rate": 4.888583550036838e-06, "loss": 0.2809, "step": 15209 }, { "epoch": 0.5219629375428964, "grad_norm": 0.7367834693452986, "learning_rate": 4.888027949702724e-06, "loss": 0.2722, "step": 15210 }, { "epoch": 0.5219972546328071, "grad_norm": 1.1170582632901265, "learning_rate": 4.887472350751894e-06, "loss": 0.2885, "step": 15211 }, { "epoch": 0.522031571722718, "grad_norm": 0.7450771315708509, "learning_rate": 4.886916753191213e-06, "loss": 0.273, "step": 15212 }, { "epoch": 0.5220658888126287, "grad_norm": 0.8232969891325675, "learning_rate": 4.886361157027545e-06, "loss": 0.2827, "step": 15213 }, { "epoch": 0.5221002059025395, "grad_norm": 0.7722751450952108, "learning_rate": 4.88580556226775e-06, "loss": 0.2845, "step": 15214 }, { "epoch": 0.5221345229924502, "grad_norm": 0.8195420437384332, "learning_rate": 4.8852499689186956e-06, "loss": 0.2981, "step": 15215 }, { "epoch": 0.522168840082361, "grad_norm": 0.8363528139992404, "learning_rate": 4.884694376987243e-06, "loss": 0.2863, "step": 15216 }, { "epoch": 0.5222031571722718, "grad_norm": 0.775697832665345, "learning_rate": 4.884138786480258e-06, "loss": 0.2771, "step": 15217 }, { "epoch": 0.5222374742621826, "grad_norm": 0.7107268474242856, "learning_rate": 4.883583197404602e-06, "loss": 0.3288, "step": 15218 }, { "epoch": 0.5222717913520933, "grad_norm": 0.7530174875538698, "learning_rate": 4.883027609767142e-06, "loss": 0.2744, "step": 15219 }, { "epoch": 0.5223061084420041, "grad_norm": 0.7349477471737036, "learning_rate": 4.8824720235747365e-06, "loss": 0.2275, "step": 15220 }, { "epoch": 0.5223404255319148, "grad_norm": 1.1114789776199678, "learning_rate": 4.881916438834254e-06, "loss": 0.2932, "step": 15221 }, { "epoch": 0.5223747426218257, "grad_norm": 0.7582725849874659, "learning_rate": 4.881360855552558e-06, "loss": 0.2657, "step": 15222 }, { "epoch": 0.5224090597117365, "grad_norm": 0.8195572238103738, "learning_rate": 4.880805273736506e-06, "loss": 0.2584, "step": 15223 }, { "epoch": 0.5224433768016472, "grad_norm": 0.8110967099593211, "learning_rate": 4.880249693392968e-06, "loss": 0.2294, "step": 15224 }, { "epoch": 0.522477693891558, "grad_norm": 0.7801129833605899, "learning_rate": 4.8796941145288066e-06, "loss": 0.2237, "step": 15225 }, { "epoch": 0.5225120109814688, "grad_norm": 0.7847257685178017, "learning_rate": 4.879138537150882e-06, "loss": 0.2807, "step": 15226 }, { "epoch": 0.5225463280713796, "grad_norm": 0.8208538061331981, "learning_rate": 4.878582961266062e-06, "loss": 0.3285, "step": 15227 }, { "epoch": 0.5225806451612903, "grad_norm": 0.7322461747365284, "learning_rate": 4.878027386881208e-06, "loss": 0.2854, "step": 15228 }, { "epoch": 0.5226149622512011, "grad_norm": 0.7281036082588795, "learning_rate": 4.8774718140031805e-06, "loss": 0.2402, "step": 15229 }, { "epoch": 0.5226492793411118, "grad_norm": 0.810784425008225, "learning_rate": 4.876916242638849e-06, "loss": 0.2612, "step": 15230 }, { "epoch": 0.5226835964310227, "grad_norm": 0.7616918147720775, "learning_rate": 4.876360672795071e-06, "loss": 0.2114, "step": 15231 }, { "epoch": 0.5227179135209334, "grad_norm": 0.9002335750439385, "learning_rate": 4.875805104478714e-06, "loss": 0.3267, "step": 15232 }, { "epoch": 0.5227522306108442, "grad_norm": 0.7425912058645814, "learning_rate": 4.875249537696641e-06, "loss": 0.2808, "step": 15233 }, { "epoch": 0.5227865477007549, "grad_norm": 0.7298832566601425, "learning_rate": 4.874693972455713e-06, "loss": 0.2274, "step": 15234 }, { "epoch": 0.5228208647906658, "grad_norm": 0.8286948472617758, "learning_rate": 4.874138408762797e-06, "loss": 0.2492, "step": 15235 }, { "epoch": 0.5228551818805766, "grad_norm": 0.7192055574899067, "learning_rate": 4.873582846624754e-06, "loss": 0.2928, "step": 15236 }, { "epoch": 0.5228894989704873, "grad_norm": 0.7928449975718695, "learning_rate": 4.873027286048445e-06, "loss": 0.2384, "step": 15237 }, { "epoch": 0.5229238160603981, "grad_norm": 0.8284815651719141, "learning_rate": 4.872471727040739e-06, "loss": 0.2811, "step": 15238 }, { "epoch": 0.5229581331503088, "grad_norm": 0.7605906967301873, "learning_rate": 4.871916169608494e-06, "loss": 0.3154, "step": 15239 }, { "epoch": 0.5229924502402197, "grad_norm": 0.663457126024747, "learning_rate": 4.871360613758575e-06, "loss": 0.2792, "step": 15240 }, { "epoch": 0.5230267673301304, "grad_norm": 0.7147157282147326, "learning_rate": 4.870805059497847e-06, "loss": 0.263, "step": 15241 }, { "epoch": 0.5230610844200412, "grad_norm": 0.8028180500767174, "learning_rate": 4.8702495068331715e-06, "loss": 0.2718, "step": 15242 }, { "epoch": 0.5230954015099519, "grad_norm": 0.8044086175171775, "learning_rate": 4.869693955771411e-06, "loss": 0.2508, "step": 15243 }, { "epoch": 0.5231297185998627, "grad_norm": 0.769479157474944, "learning_rate": 4.869138406319432e-06, "loss": 0.3016, "step": 15244 }, { "epoch": 0.5231640356897735, "grad_norm": 0.6420702098849809, "learning_rate": 4.868582858484095e-06, "loss": 0.228, "step": 15245 }, { "epoch": 0.5231983527796843, "grad_norm": 0.7383376739545118, "learning_rate": 4.868027312272262e-06, "loss": 0.2941, "step": 15246 }, { "epoch": 0.523232669869595, "grad_norm": 0.7931171568767582, "learning_rate": 4.867471767690798e-06, "loss": 0.3052, "step": 15247 }, { "epoch": 0.5232669869595058, "grad_norm": 0.8198200500757229, "learning_rate": 4.866916224746567e-06, "loss": 0.2667, "step": 15248 }, { "epoch": 0.5233013040494167, "grad_norm": 0.6863714676072824, "learning_rate": 4.866360683446428e-06, "loss": 0.2499, "step": 15249 }, { "epoch": 0.5233356211393274, "grad_norm": 0.7751929157695152, "learning_rate": 4.86580514379725e-06, "loss": 0.309, "step": 15250 }, { "epoch": 0.5233699382292382, "grad_norm": 0.7718991460653587, "learning_rate": 4.865249605805893e-06, "loss": 0.2721, "step": 15251 }, { "epoch": 0.5234042553191489, "grad_norm": 0.8249987568149739, "learning_rate": 4.864694069479218e-06, "loss": 0.2925, "step": 15252 }, { "epoch": 0.5234385724090597, "grad_norm": 0.8676311677813766, "learning_rate": 4.864138534824092e-06, "loss": 0.3018, "step": 15253 }, { "epoch": 0.5234728894989705, "grad_norm": 0.7117067969529184, "learning_rate": 4.863583001847375e-06, "loss": 0.2393, "step": 15254 }, { "epoch": 0.5235072065888813, "grad_norm": 0.7383185908185892, "learning_rate": 4.86302747055593e-06, "loss": 0.3293, "step": 15255 }, { "epoch": 0.523541523678792, "grad_norm": 0.7532610347073656, "learning_rate": 4.862471940956623e-06, "loss": 0.3281, "step": 15256 }, { "epoch": 0.5235758407687028, "grad_norm": 0.8437868887259614, "learning_rate": 4.861916413056314e-06, "loss": 0.287, "step": 15257 }, { "epoch": 0.5236101578586135, "grad_norm": 0.8719294118039499, "learning_rate": 4.861360886861866e-06, "loss": 0.2875, "step": 15258 }, { "epoch": 0.5236444749485244, "grad_norm": 0.8024739923349539, "learning_rate": 4.860805362380144e-06, "loss": 0.2936, "step": 15259 }, { "epoch": 0.5236787920384351, "grad_norm": 0.8370071320981429, "learning_rate": 4.860249839618007e-06, "loss": 0.2394, "step": 15260 }, { "epoch": 0.5237131091283459, "grad_norm": 0.796491137159741, "learning_rate": 4.859694318582323e-06, "loss": 0.2824, "step": 15261 }, { "epoch": 0.5237474262182567, "grad_norm": 0.7066985209953375, "learning_rate": 4.859138799279951e-06, "loss": 0.258, "step": 15262 }, { "epoch": 0.5237817433081675, "grad_norm": 0.7330889506146234, "learning_rate": 4.858583281717753e-06, "loss": 0.3392, "step": 15263 }, { "epoch": 0.5238160603980783, "grad_norm": 0.7850132930458064, "learning_rate": 4.858027765902596e-06, "loss": 0.295, "step": 15264 }, { "epoch": 0.523850377487989, "grad_norm": 0.7661003588695243, "learning_rate": 4.857472251841342e-06, "loss": 0.278, "step": 15265 }, { "epoch": 0.5238846945778998, "grad_norm": 0.7476951919264624, "learning_rate": 4.856916739540848e-06, "loss": 0.2741, "step": 15266 }, { "epoch": 0.5239190116678105, "grad_norm": 0.7623312034432151, "learning_rate": 4.856361229007985e-06, "loss": 0.2586, "step": 15267 }, { "epoch": 0.5239533287577214, "grad_norm": 0.9053769580393926, "learning_rate": 4.855805720249609e-06, "loss": 0.3209, "step": 15268 }, { "epoch": 0.5239876458476321, "grad_norm": 0.7040120137878193, "learning_rate": 4.855250213272585e-06, "loss": 0.3316, "step": 15269 }, { "epoch": 0.5240219629375429, "grad_norm": 0.73171431181178, "learning_rate": 4.854694708083777e-06, "loss": 0.2872, "step": 15270 }, { "epoch": 0.5240562800274536, "grad_norm": 0.7448286515435103, "learning_rate": 4.854139204690047e-06, "loss": 0.2953, "step": 15271 }, { "epoch": 0.5240905971173645, "grad_norm": 0.983608796270636, "learning_rate": 4.853583703098254e-06, "loss": 0.2463, "step": 15272 }, { "epoch": 0.5241249142072752, "grad_norm": 0.8063266775259459, "learning_rate": 4.853028203315267e-06, "loss": 0.319, "step": 15273 }, { "epoch": 0.524159231297186, "grad_norm": 0.8038837248740334, "learning_rate": 4.852472705347945e-06, "loss": 0.2408, "step": 15274 }, { "epoch": 0.5241935483870968, "grad_norm": 0.6885301437137686, "learning_rate": 4.851917209203149e-06, "loss": 0.2713, "step": 15275 }, { "epoch": 0.5242278654770075, "grad_norm": 0.6912759415737406, "learning_rate": 4.851361714887744e-06, "loss": 0.2294, "step": 15276 }, { "epoch": 0.5242621825669184, "grad_norm": 0.6844634396398892, "learning_rate": 4.850806222408592e-06, "loss": 0.2545, "step": 15277 }, { "epoch": 0.5242964996568291, "grad_norm": 0.7256210907250189, "learning_rate": 4.850250731772554e-06, "loss": 0.2359, "step": 15278 }, { "epoch": 0.5243308167467399, "grad_norm": 0.7371862286546488, "learning_rate": 4.849695242986495e-06, "loss": 0.2218, "step": 15279 }, { "epoch": 0.5243651338366506, "grad_norm": 0.8366804936946197, "learning_rate": 4.849139756057277e-06, "loss": 0.3385, "step": 15280 }, { "epoch": 0.5243994509265614, "grad_norm": 0.8057004800187532, "learning_rate": 4.848584270991758e-06, "loss": 0.2999, "step": 15281 }, { "epoch": 0.5244337680164722, "grad_norm": 0.7774404747337104, "learning_rate": 4.848028787796807e-06, "loss": 0.2592, "step": 15282 }, { "epoch": 0.524468085106383, "grad_norm": 0.7804475279650469, "learning_rate": 4.847473306479282e-06, "loss": 0.2737, "step": 15283 }, { "epoch": 0.5245024021962937, "grad_norm": 0.7972499354261903, "learning_rate": 4.846917827046046e-06, "loss": 0.3586, "step": 15284 }, { "epoch": 0.5245367192862045, "grad_norm": 0.8726884490921492, "learning_rate": 4.846362349503962e-06, "loss": 0.282, "step": 15285 }, { "epoch": 0.5245710363761154, "grad_norm": 0.8412302989912569, "learning_rate": 4.845806873859893e-06, "loss": 0.2682, "step": 15286 }, { "epoch": 0.5246053534660261, "grad_norm": 0.8797196636791288, "learning_rate": 4.845251400120698e-06, "loss": 0.2899, "step": 15287 }, { "epoch": 0.5246396705559369, "grad_norm": 0.7800981850495532, "learning_rate": 4.8446959282932445e-06, "loss": 0.2748, "step": 15288 }, { "epoch": 0.5246739876458476, "grad_norm": 0.7941328115312173, "learning_rate": 4.844140458384391e-06, "loss": 0.2568, "step": 15289 }, { "epoch": 0.5247083047357584, "grad_norm": 0.8231642645270882, "learning_rate": 4.843584990400998e-06, "loss": 0.2706, "step": 15290 }, { "epoch": 0.5247426218256692, "grad_norm": 0.8425353913641467, "learning_rate": 4.843029524349932e-06, "loss": 0.2802, "step": 15291 }, { "epoch": 0.52477693891558, "grad_norm": 0.773626176605249, "learning_rate": 4.842474060238053e-06, "loss": 0.2756, "step": 15292 }, { "epoch": 0.5248112560054907, "grad_norm": 0.7980717732212388, "learning_rate": 4.841918598072222e-06, "loss": 0.3296, "step": 15293 }, { "epoch": 0.5248455730954015, "grad_norm": 0.7139524532849898, "learning_rate": 4.841363137859305e-06, "loss": 0.3133, "step": 15294 }, { "epoch": 0.5248798901853123, "grad_norm": 0.7607407903981415, "learning_rate": 4.8408076796061595e-06, "loss": 0.2314, "step": 15295 }, { "epoch": 0.5249142072752231, "grad_norm": 0.7330843836456187, "learning_rate": 4.840252223319651e-06, "loss": 0.2625, "step": 15296 }, { "epoch": 0.5249485243651338, "grad_norm": 0.7670226535343962, "learning_rate": 4.83969676900664e-06, "loss": 0.2736, "step": 15297 }, { "epoch": 0.5249828414550446, "grad_norm": 0.7899186709017639, "learning_rate": 4.839141316673986e-06, "loss": 0.3449, "step": 15298 }, { "epoch": 0.5250171585449553, "grad_norm": 0.729172398284891, "learning_rate": 4.8385858663285555e-06, "loss": 0.2619, "step": 15299 }, { "epoch": 0.5250514756348662, "grad_norm": 0.767873366622194, "learning_rate": 4.838030417977209e-06, "loss": 0.2574, "step": 15300 }, { "epoch": 0.525085792724777, "grad_norm": 0.7384411512736154, "learning_rate": 4.837474971626806e-06, "loss": 0.2347, "step": 15301 }, { "epoch": 0.5251201098146877, "grad_norm": 0.822418801842769, "learning_rate": 4.836919527284212e-06, "loss": 0.2635, "step": 15302 }, { "epoch": 0.5251544269045985, "grad_norm": 0.7854656667263747, "learning_rate": 4.836364084956287e-06, "loss": 0.23, "step": 15303 }, { "epoch": 0.5251887439945092, "grad_norm": 0.834439812553293, "learning_rate": 4.835808644649891e-06, "loss": 0.317, "step": 15304 }, { "epoch": 0.5252230610844201, "grad_norm": 0.6977924790826227, "learning_rate": 4.835253206371891e-06, "loss": 0.2635, "step": 15305 }, { "epoch": 0.5252573781743308, "grad_norm": 0.7747461377663077, "learning_rate": 4.834697770129143e-06, "loss": 0.3063, "step": 15306 }, { "epoch": 0.5252916952642416, "grad_norm": 0.6855879746504961, "learning_rate": 4.834142335928511e-06, "loss": 0.2891, "step": 15307 }, { "epoch": 0.5253260123541523, "grad_norm": 0.7211041477131923, "learning_rate": 4.833586903776858e-06, "loss": 0.2797, "step": 15308 }, { "epoch": 0.5253603294440632, "grad_norm": 0.7753768071634147, "learning_rate": 4.833031473681046e-06, "loss": 0.2923, "step": 15309 }, { "epoch": 0.5253946465339739, "grad_norm": 0.8750660520921807, "learning_rate": 4.832476045647934e-06, "loss": 0.2914, "step": 15310 }, { "epoch": 0.5254289636238847, "grad_norm": 0.8103786118089743, "learning_rate": 4.831920619684386e-06, "loss": 0.3849, "step": 15311 }, { "epoch": 0.5254632807137954, "grad_norm": 0.8064071514845907, "learning_rate": 4.831365195797263e-06, "loss": 0.3102, "step": 15312 }, { "epoch": 0.5254975978037062, "grad_norm": 0.9001581361040049, "learning_rate": 4.830809773993424e-06, "loss": 0.2833, "step": 15313 }, { "epoch": 0.5255319148936171, "grad_norm": 0.7505517379849523, "learning_rate": 4.830254354279735e-06, "loss": 0.3001, "step": 15314 }, { "epoch": 0.5255662319835278, "grad_norm": 0.7364547788183138, "learning_rate": 4.829698936663056e-06, "loss": 0.265, "step": 15315 }, { "epoch": 0.5256005490734386, "grad_norm": 0.7250770562545621, "learning_rate": 4.829143521150246e-06, "loss": 0.2918, "step": 15316 }, { "epoch": 0.5256348661633493, "grad_norm": 0.8157910898281476, "learning_rate": 4.828588107748171e-06, "loss": 0.3242, "step": 15317 }, { "epoch": 0.5256691832532602, "grad_norm": 0.9160758084313032, "learning_rate": 4.82803269646369e-06, "loss": 0.2916, "step": 15318 }, { "epoch": 0.5257035003431709, "grad_norm": 0.759140673749414, "learning_rate": 4.8274772873036616e-06, "loss": 0.2449, "step": 15319 }, { "epoch": 0.5257378174330817, "grad_norm": 0.682183681191399, "learning_rate": 4.826921880274953e-06, "loss": 0.2538, "step": 15320 }, { "epoch": 0.5257721345229924, "grad_norm": 0.6892637421840766, "learning_rate": 4.826366475384422e-06, "loss": 0.2692, "step": 15321 }, { "epoch": 0.5258064516129032, "grad_norm": 0.8540078194033159, "learning_rate": 4.825811072638929e-06, "loss": 0.3342, "step": 15322 }, { "epoch": 0.525840768702814, "grad_norm": 0.8147765611318258, "learning_rate": 4.8252556720453395e-06, "loss": 0.3332, "step": 15323 }, { "epoch": 0.5258750857927248, "grad_norm": 0.785436350736979, "learning_rate": 4.82470027361051e-06, "loss": 0.2469, "step": 15324 }, { "epoch": 0.5259094028826355, "grad_norm": 0.8383847355431018, "learning_rate": 4.824144877341307e-06, "loss": 0.2769, "step": 15325 }, { "epoch": 0.5259437199725463, "grad_norm": 0.821155785750445, "learning_rate": 4.823589483244589e-06, "loss": 0.3785, "step": 15326 }, { "epoch": 0.525978037062457, "grad_norm": 0.8426887178803224, "learning_rate": 4.823034091327215e-06, "loss": 0.2459, "step": 15327 }, { "epoch": 0.5260123541523679, "grad_norm": 0.8192475763692727, "learning_rate": 4.822478701596051e-06, "loss": 0.3314, "step": 15328 }, { "epoch": 0.5260466712422787, "grad_norm": 0.8325220578472045, "learning_rate": 4.821923314057953e-06, "loss": 0.2759, "step": 15329 }, { "epoch": 0.5260809883321894, "grad_norm": 0.8195473337117876, "learning_rate": 4.8213679287197856e-06, "loss": 0.2637, "step": 15330 }, { "epoch": 0.5261153054221002, "grad_norm": 0.6952212012986941, "learning_rate": 4.820812545588411e-06, "loss": 0.2633, "step": 15331 }, { "epoch": 0.526149622512011, "grad_norm": 0.6720836475128282, "learning_rate": 4.820257164670688e-06, "loss": 0.2449, "step": 15332 }, { "epoch": 0.5261839396019218, "grad_norm": 0.8483909205676996, "learning_rate": 4.819701785973476e-06, "loss": 0.2446, "step": 15333 }, { "epoch": 0.5262182566918325, "grad_norm": 0.7771188721386532, "learning_rate": 4.81914640950364e-06, "loss": 0.2744, "step": 15334 }, { "epoch": 0.5262525737817433, "grad_norm": 0.7506584212384688, "learning_rate": 4.818591035268039e-06, "loss": 0.2683, "step": 15335 }, { "epoch": 0.526286890871654, "grad_norm": 0.7902099403639432, "learning_rate": 4.818035663273535e-06, "loss": 0.2631, "step": 15336 }, { "epoch": 0.5263212079615649, "grad_norm": 0.7927575618928975, "learning_rate": 4.817480293526986e-06, "loss": 0.3398, "step": 15337 }, { "epoch": 0.5263555250514756, "grad_norm": 0.743969814815088, "learning_rate": 4.816924926035257e-06, "loss": 0.2851, "step": 15338 }, { "epoch": 0.5263898421413864, "grad_norm": 0.7248273347831017, "learning_rate": 4.8163695608052055e-06, "loss": 0.2405, "step": 15339 }, { "epoch": 0.5264241592312972, "grad_norm": 0.7952819807103361, "learning_rate": 4.815814197843697e-06, "loss": 0.306, "step": 15340 }, { "epoch": 0.526458476321208, "grad_norm": 0.8455137146779048, "learning_rate": 4.815258837157588e-06, "loss": 0.3077, "step": 15341 }, { "epoch": 0.5264927934111188, "grad_norm": 0.6792693084677188, "learning_rate": 4.814703478753739e-06, "loss": 0.2741, "step": 15342 }, { "epoch": 0.5265271105010295, "grad_norm": 0.7247645523296153, "learning_rate": 4.814148122639013e-06, "loss": 0.3362, "step": 15343 }, { "epoch": 0.5265614275909403, "grad_norm": 0.7443022750602599, "learning_rate": 4.813592768820272e-06, "loss": 0.2996, "step": 15344 }, { "epoch": 0.526595744680851, "grad_norm": 0.7710494227223986, "learning_rate": 4.813037417304373e-06, "loss": 0.2941, "step": 15345 }, { "epoch": 0.5266300617707619, "grad_norm": 0.8313078521627713, "learning_rate": 4.81248206809818e-06, "loss": 0.2955, "step": 15346 }, { "epoch": 0.5266643788606726, "grad_norm": 0.7887715803689935, "learning_rate": 4.811926721208553e-06, "loss": 0.2871, "step": 15347 }, { "epoch": 0.5266986959505834, "grad_norm": 0.7769789834622503, "learning_rate": 4.81137137664235e-06, "loss": 0.3461, "step": 15348 }, { "epoch": 0.5267330130404941, "grad_norm": 0.8176443601534024, "learning_rate": 4.810816034406436e-06, "loss": 0.2908, "step": 15349 }, { "epoch": 0.5267673301304049, "grad_norm": 0.6939324946998908, "learning_rate": 4.810260694507668e-06, "loss": 0.2863, "step": 15350 }, { "epoch": 0.5268016472203157, "grad_norm": 0.7828851998132739, "learning_rate": 4.809705356952907e-06, "loss": 0.3015, "step": 15351 }, { "epoch": 0.5268359643102265, "grad_norm": 0.7514755358987171, "learning_rate": 4.809150021749016e-06, "loss": 0.3029, "step": 15352 }, { "epoch": 0.5268702814001373, "grad_norm": 0.6501770782847979, "learning_rate": 4.808594688902854e-06, "loss": 0.2559, "step": 15353 }, { "epoch": 0.526904598490048, "grad_norm": 0.8443666467124695, "learning_rate": 4.80803935842128e-06, "loss": 0.2899, "step": 15354 }, { "epoch": 0.5269389155799589, "grad_norm": 0.706971465263339, "learning_rate": 4.807484030311158e-06, "loss": 0.2471, "step": 15355 }, { "epoch": 0.5269732326698696, "grad_norm": 0.8979420019932103, "learning_rate": 4.806928704579344e-06, "loss": 0.2969, "step": 15356 }, { "epoch": 0.5270075497597804, "grad_norm": 0.826190404481763, "learning_rate": 4.806373381232703e-06, "loss": 0.3188, "step": 15357 }, { "epoch": 0.5270418668496911, "grad_norm": 0.6691217146426237, "learning_rate": 4.805818060278092e-06, "loss": 0.2615, "step": 15358 }, { "epoch": 0.5270761839396019, "grad_norm": 0.7127902182566778, "learning_rate": 4.8052627417223715e-06, "loss": 0.2933, "step": 15359 }, { "epoch": 0.5271105010295127, "grad_norm": 0.8864190872264531, "learning_rate": 4.804707425572404e-06, "loss": 0.3085, "step": 15360 }, { "epoch": 0.5271448181194235, "grad_norm": 0.8043339020898637, "learning_rate": 4.80415211183505e-06, "loss": 0.324, "step": 15361 }, { "epoch": 0.5271791352093342, "grad_norm": 0.721999417334792, "learning_rate": 4.803596800517165e-06, "loss": 0.261, "step": 15362 }, { "epoch": 0.527213452299245, "grad_norm": 0.7372859910452774, "learning_rate": 4.803041491625615e-06, "loss": 0.2512, "step": 15363 }, { "epoch": 0.5272477693891559, "grad_norm": 0.7807266350844427, "learning_rate": 4.802486185167258e-06, "loss": 0.2677, "step": 15364 }, { "epoch": 0.5272820864790666, "grad_norm": 0.7658247970914662, "learning_rate": 4.801930881148952e-06, "loss": 0.2456, "step": 15365 }, { "epoch": 0.5273164035689774, "grad_norm": 0.799753115756155, "learning_rate": 4.8013755795775595e-06, "loss": 0.2926, "step": 15366 }, { "epoch": 0.5273507206588881, "grad_norm": 0.8434330409330506, "learning_rate": 4.800820280459942e-06, "loss": 0.3011, "step": 15367 }, { "epoch": 0.5273850377487989, "grad_norm": 0.8738501116563936, "learning_rate": 4.800264983802955e-06, "loss": 0.2627, "step": 15368 }, { "epoch": 0.5274193548387097, "grad_norm": 0.7370529764894207, "learning_rate": 4.7997096896134635e-06, "loss": 0.2791, "step": 15369 }, { "epoch": 0.5274536719286205, "grad_norm": 0.7907883509490633, "learning_rate": 4.799154397898325e-06, "loss": 0.3176, "step": 15370 }, { "epoch": 0.5274879890185312, "grad_norm": 0.7561124495867012, "learning_rate": 4.798599108664398e-06, "loss": 0.271, "step": 15371 }, { "epoch": 0.527522306108442, "grad_norm": 0.72849866165262, "learning_rate": 4.798043821918546e-06, "loss": 0.3195, "step": 15372 }, { "epoch": 0.5275566231983527, "grad_norm": 0.7455481586866071, "learning_rate": 4.797488537667627e-06, "loss": 0.295, "step": 15373 }, { "epoch": 0.5275909402882636, "grad_norm": 0.9237358540793239, "learning_rate": 4.796933255918499e-06, "loss": 0.2929, "step": 15374 }, { "epoch": 0.5276252573781743, "grad_norm": 0.8229184513104246, "learning_rate": 4.796377976678026e-06, "loss": 0.2346, "step": 15375 }, { "epoch": 0.5276595744680851, "grad_norm": 0.7966882903728789, "learning_rate": 4.795822699953066e-06, "loss": 0.2887, "step": 15376 }, { "epoch": 0.5276938915579958, "grad_norm": 0.7524468366454995, "learning_rate": 4.795267425750476e-06, "loss": 0.2869, "step": 15377 }, { "epoch": 0.5277282086479067, "grad_norm": 0.6828096692028389, "learning_rate": 4.794712154077121e-06, "loss": 0.2521, "step": 15378 }, { "epoch": 0.5277625257378175, "grad_norm": 0.774317151914421, "learning_rate": 4.794156884939856e-06, "loss": 0.3209, "step": 15379 }, { "epoch": 0.5277968428277282, "grad_norm": 0.741100728896878, "learning_rate": 4.7936016183455444e-06, "loss": 0.2748, "step": 15380 }, { "epoch": 0.527831159917639, "grad_norm": 0.6899055895662052, "learning_rate": 4.793046354301042e-06, "loss": 0.2695, "step": 15381 }, { "epoch": 0.5278654770075497, "grad_norm": 0.7906284262289638, "learning_rate": 4.792491092813213e-06, "loss": 0.2975, "step": 15382 }, { "epoch": 0.5278997940974606, "grad_norm": 0.8146556512959426, "learning_rate": 4.7919358338889125e-06, "loss": 0.2981, "step": 15383 }, { "epoch": 0.5279341111873713, "grad_norm": 0.8134385753279849, "learning_rate": 4.791380577535004e-06, "loss": 0.3375, "step": 15384 }, { "epoch": 0.5279684282772821, "grad_norm": 0.7969584573958081, "learning_rate": 4.790825323758344e-06, "loss": 0.3071, "step": 15385 }, { "epoch": 0.5280027453671928, "grad_norm": 0.6844924186807081, "learning_rate": 4.790270072565794e-06, "loss": 0.286, "step": 15386 }, { "epoch": 0.5280370624571037, "grad_norm": 0.7884393076031372, "learning_rate": 4.789714823964213e-06, "loss": 0.3394, "step": 15387 }, { "epoch": 0.5280713795470144, "grad_norm": 0.9128181421733179, "learning_rate": 4.789159577960458e-06, "loss": 0.316, "step": 15388 }, { "epoch": 0.5281056966369252, "grad_norm": 0.7848886230732356, "learning_rate": 4.788604334561392e-06, "loss": 0.2925, "step": 15389 }, { "epoch": 0.5281400137268359, "grad_norm": 0.8082560018620837, "learning_rate": 4.7880490937738745e-06, "loss": 0.2923, "step": 15390 }, { "epoch": 0.5281743308167467, "grad_norm": 0.8175176262463111, "learning_rate": 4.787493855604761e-06, "loss": 0.3144, "step": 15391 }, { "epoch": 0.5282086479066576, "grad_norm": 0.8029797816151565, "learning_rate": 4.786938620060915e-06, "loss": 0.3039, "step": 15392 }, { "epoch": 0.5282429649965683, "grad_norm": 0.7720894056241256, "learning_rate": 4.7863833871491934e-06, "loss": 0.2374, "step": 15393 }, { "epoch": 0.5282772820864791, "grad_norm": 0.8478678530653959, "learning_rate": 4.785828156876455e-06, "loss": 0.2666, "step": 15394 }, { "epoch": 0.5283115991763898, "grad_norm": 0.8058482851269476, "learning_rate": 4.7852729292495614e-06, "loss": 0.2769, "step": 15395 }, { "epoch": 0.5283459162663006, "grad_norm": 0.8166176014398077, "learning_rate": 4.7847177042753695e-06, "loss": 0.2891, "step": 15396 }, { "epoch": 0.5283802333562114, "grad_norm": 0.8165221738679876, "learning_rate": 4.784162481960738e-06, "loss": 0.2446, "step": 15397 }, { "epoch": 0.5284145504461222, "grad_norm": 0.7664969960593995, "learning_rate": 4.78360726231253e-06, "loss": 0.2992, "step": 15398 }, { "epoch": 0.5284488675360329, "grad_norm": 0.7369356066355865, "learning_rate": 4.783052045337602e-06, "loss": 0.2807, "step": 15399 }, { "epoch": 0.5284831846259437, "grad_norm": 0.612951432357934, "learning_rate": 4.782496831042811e-06, "loss": 0.2121, "step": 15400 }, { "epoch": 0.5285175017158545, "grad_norm": 0.8748879195615388, "learning_rate": 4.78194161943502e-06, "loss": 0.3145, "step": 15401 }, { "epoch": 0.5285518188057653, "grad_norm": 0.7873405232933387, "learning_rate": 4.781386410521085e-06, "loss": 0.2462, "step": 15402 }, { "epoch": 0.528586135895676, "grad_norm": 0.7840011725237809, "learning_rate": 4.780831204307866e-06, "loss": 0.3179, "step": 15403 }, { "epoch": 0.5286204529855868, "grad_norm": 0.7004300221980454, "learning_rate": 4.780276000802222e-06, "loss": 0.2702, "step": 15404 }, { "epoch": 0.5286547700754975, "grad_norm": 0.8111590389186107, "learning_rate": 4.7797208000110125e-06, "loss": 0.2957, "step": 15405 }, { "epoch": 0.5286890871654084, "grad_norm": 0.6489786659021397, "learning_rate": 4.779165601941094e-06, "loss": 0.2723, "step": 15406 }, { "epoch": 0.5287234042553192, "grad_norm": 0.7605394257361626, "learning_rate": 4.7786104065993285e-06, "loss": 0.2435, "step": 15407 }, { "epoch": 0.5287577213452299, "grad_norm": 0.8257740070380284, "learning_rate": 4.7780552139925745e-06, "loss": 0.2516, "step": 15408 }, { "epoch": 0.5287920384351407, "grad_norm": 0.7143328185151857, "learning_rate": 4.777500024127687e-06, "loss": 0.2406, "step": 15409 }, { "epoch": 0.5288263555250515, "grad_norm": 0.7321188594233836, "learning_rate": 4.776944837011528e-06, "loss": 0.3056, "step": 15410 }, { "epoch": 0.5288606726149623, "grad_norm": 0.6792441455139383, "learning_rate": 4.7763896526509565e-06, "loss": 0.1934, "step": 15411 }, { "epoch": 0.528894989704873, "grad_norm": 0.7832807982542601, "learning_rate": 4.775834471052828e-06, "loss": 0.3014, "step": 15412 }, { "epoch": 0.5289293067947838, "grad_norm": 0.7481190886608893, "learning_rate": 4.775279292224006e-06, "loss": 0.2767, "step": 15413 }, { "epoch": 0.5289636238846945, "grad_norm": 0.7712437365625062, "learning_rate": 4.774724116171346e-06, "loss": 0.2818, "step": 15414 }, { "epoch": 0.5289979409746054, "grad_norm": 0.7599700216412351, "learning_rate": 4.774168942901705e-06, "loss": 0.28, "step": 15415 }, { "epoch": 0.5290322580645161, "grad_norm": 0.6956157350588179, "learning_rate": 4.773613772421945e-06, "loss": 0.2669, "step": 15416 }, { "epoch": 0.5290665751544269, "grad_norm": 0.7834589225982134, "learning_rate": 4.7730586047389205e-06, "loss": 0.2694, "step": 15417 }, { "epoch": 0.5291008922443377, "grad_norm": 0.7672286341470183, "learning_rate": 4.772503439859494e-06, "loss": 0.2901, "step": 15418 }, { "epoch": 0.5291352093342484, "grad_norm": 0.7251011909924819, "learning_rate": 4.771948277790524e-06, "loss": 0.2576, "step": 15419 }, { "epoch": 0.5291695264241593, "grad_norm": 0.7640057903306782, "learning_rate": 4.771393118538864e-06, "loss": 0.2883, "step": 15420 }, { "epoch": 0.52920384351407, "grad_norm": 0.805265360963826, "learning_rate": 4.770837962111378e-06, "loss": 0.2941, "step": 15421 }, { "epoch": 0.5292381606039808, "grad_norm": 0.7523853114817513, "learning_rate": 4.770282808514922e-06, "loss": 0.236, "step": 15422 }, { "epoch": 0.5292724776938915, "grad_norm": 0.7367420594356686, "learning_rate": 4.769727657756351e-06, "loss": 0.2594, "step": 15423 }, { "epoch": 0.5293067947838024, "grad_norm": 0.7203461048493045, "learning_rate": 4.769172509842529e-06, "loss": 0.2835, "step": 15424 }, { "epoch": 0.5293411118737131, "grad_norm": 0.6943447912735493, "learning_rate": 4.768617364780311e-06, "loss": 0.2393, "step": 15425 }, { "epoch": 0.5293754289636239, "grad_norm": 0.858324486851987, "learning_rate": 4.768062222576555e-06, "loss": 0.2738, "step": 15426 }, { "epoch": 0.5294097460535346, "grad_norm": 0.9029440974915329, "learning_rate": 4.76750708323812e-06, "loss": 0.2891, "step": 15427 }, { "epoch": 0.5294440631434454, "grad_norm": 0.8268052428652418, "learning_rate": 4.766951946771865e-06, "loss": 0.2755, "step": 15428 }, { "epoch": 0.5294783802333562, "grad_norm": 0.9282211561244527, "learning_rate": 4.766396813184645e-06, "loss": 0.3257, "step": 15429 }, { "epoch": 0.529512697323267, "grad_norm": 0.8845584593253228, "learning_rate": 4.765841682483322e-06, "loss": 0.3099, "step": 15430 }, { "epoch": 0.5295470144131778, "grad_norm": 0.7221502831360819, "learning_rate": 4.7652865546747524e-06, "loss": 0.2471, "step": 15431 }, { "epoch": 0.5295813315030885, "grad_norm": 0.764507336460586, "learning_rate": 4.764731429765791e-06, "loss": 0.3167, "step": 15432 }, { "epoch": 0.5296156485929994, "grad_norm": 0.7920875341680639, "learning_rate": 4.7641763077633e-06, "loss": 0.2904, "step": 15433 }, { "epoch": 0.5296499656829101, "grad_norm": 0.7919865410688447, "learning_rate": 4.763621188674137e-06, "loss": 0.2924, "step": 15434 }, { "epoch": 0.5296842827728209, "grad_norm": 0.76659339554773, "learning_rate": 4.763066072505156e-06, "loss": 0.2773, "step": 15435 }, { "epoch": 0.5297185998627316, "grad_norm": 0.6880294923269478, "learning_rate": 4.76251095926322e-06, "loss": 0.2778, "step": 15436 }, { "epoch": 0.5297529169526424, "grad_norm": 0.8012598649453935, "learning_rate": 4.761955848955184e-06, "loss": 0.2429, "step": 15437 }, { "epoch": 0.5297872340425532, "grad_norm": 0.80857863251913, "learning_rate": 4.7614007415879034e-06, "loss": 0.3071, "step": 15438 }, { "epoch": 0.529821551132464, "grad_norm": 0.7638054767491235, "learning_rate": 4.760845637168241e-06, "loss": 0.3086, "step": 15439 }, { "epoch": 0.5298558682223747, "grad_norm": 0.7492105747179525, "learning_rate": 4.760290535703051e-06, "loss": 0.2413, "step": 15440 }, { "epoch": 0.5298901853122855, "grad_norm": 0.769131865300474, "learning_rate": 4.759735437199191e-06, "loss": 0.275, "step": 15441 }, { "epoch": 0.5299245024021962, "grad_norm": 0.7976034340837675, "learning_rate": 4.759180341663521e-06, "loss": 0.2705, "step": 15442 }, { "epoch": 0.5299588194921071, "grad_norm": 0.6853192355218046, "learning_rate": 4.758625249102898e-06, "loss": 0.2705, "step": 15443 }, { "epoch": 0.5299931365820179, "grad_norm": 0.8001152233531755, "learning_rate": 4.758070159524175e-06, "loss": 0.2986, "step": 15444 }, { "epoch": 0.5300274536719286, "grad_norm": 0.7750533934612552, "learning_rate": 4.7575150729342174e-06, "loss": 0.3016, "step": 15445 }, { "epoch": 0.5300617707618394, "grad_norm": 0.7580534424880189, "learning_rate": 4.756959989339876e-06, "loss": 0.3174, "step": 15446 }, { "epoch": 0.5300960878517502, "grad_norm": 0.8204680508856244, "learning_rate": 4.756404908748011e-06, "loss": 0.3445, "step": 15447 }, { "epoch": 0.530130404941661, "grad_norm": 0.7301589300735458, "learning_rate": 4.755849831165479e-06, "loss": 0.2631, "step": 15448 }, { "epoch": 0.5301647220315717, "grad_norm": 0.8840508160048417, "learning_rate": 4.755294756599136e-06, "loss": 0.3029, "step": 15449 }, { "epoch": 0.5301990391214825, "grad_norm": 0.7448019599757135, "learning_rate": 4.754739685055844e-06, "loss": 0.2681, "step": 15450 }, { "epoch": 0.5302333562113932, "grad_norm": 0.8250031313782958, "learning_rate": 4.754184616542458e-06, "loss": 0.2634, "step": 15451 }, { "epoch": 0.5302676733013041, "grad_norm": 0.6874409532616746, "learning_rate": 4.753629551065831e-06, "loss": 0.2575, "step": 15452 }, { "epoch": 0.5303019903912148, "grad_norm": 0.8575971965860192, "learning_rate": 4.753074488632826e-06, "loss": 0.2969, "step": 15453 }, { "epoch": 0.5303363074811256, "grad_norm": 0.7166919717140201, "learning_rate": 4.752519429250297e-06, "loss": 0.2883, "step": 15454 }, { "epoch": 0.5303706245710363, "grad_norm": 0.7307119463252794, "learning_rate": 4.751964372925102e-06, "loss": 0.2745, "step": 15455 }, { "epoch": 0.5304049416609472, "grad_norm": 0.8096552103927755, "learning_rate": 4.751409319664098e-06, "loss": 0.3513, "step": 15456 }, { "epoch": 0.530439258750858, "grad_norm": 0.718706248281449, "learning_rate": 4.7508542694741425e-06, "loss": 0.2693, "step": 15457 }, { "epoch": 0.5304735758407687, "grad_norm": 0.7965169961591824, "learning_rate": 4.7502992223620905e-06, "loss": 0.2825, "step": 15458 }, { "epoch": 0.5305078929306795, "grad_norm": 0.8847834384360636, "learning_rate": 4.749744178334803e-06, "loss": 0.3335, "step": 15459 }, { "epoch": 0.5305422100205902, "grad_norm": 0.7183864558252192, "learning_rate": 4.749189137399133e-06, "loss": 0.2423, "step": 15460 }, { "epoch": 0.5305765271105011, "grad_norm": 0.7881931924191704, "learning_rate": 4.748634099561938e-06, "loss": 0.2953, "step": 15461 }, { "epoch": 0.5306108442004118, "grad_norm": 0.6932710333301232, "learning_rate": 4.748079064830078e-06, "loss": 0.2746, "step": 15462 }, { "epoch": 0.5306451612903226, "grad_norm": 0.7360668413086118, "learning_rate": 4.747524033210405e-06, "loss": 0.3124, "step": 15463 }, { "epoch": 0.5306794783802333, "grad_norm": 0.736568876837038, "learning_rate": 4.7469690047097774e-06, "loss": 0.2898, "step": 15464 }, { "epoch": 0.5307137954701441, "grad_norm": 0.7869358452501322, "learning_rate": 4.746413979335056e-06, "loss": 0.2428, "step": 15465 }, { "epoch": 0.5307481125600549, "grad_norm": 0.691502189997893, "learning_rate": 4.745858957093094e-06, "loss": 0.3561, "step": 15466 }, { "epoch": 0.5307824296499657, "grad_norm": 0.7788398568314805, "learning_rate": 4.745303937990745e-06, "loss": 0.2976, "step": 15467 }, { "epoch": 0.5308167467398764, "grad_norm": 0.8099401111165324, "learning_rate": 4.744748922034872e-06, "loss": 0.3748, "step": 15468 }, { "epoch": 0.5308510638297872, "grad_norm": 0.7715723383017882, "learning_rate": 4.744193909232327e-06, "loss": 0.2904, "step": 15469 }, { "epoch": 0.5308853809196981, "grad_norm": 0.7724662127762998, "learning_rate": 4.743638899589968e-06, "loss": 0.2629, "step": 15470 }, { "epoch": 0.5309196980096088, "grad_norm": 0.9569811060091955, "learning_rate": 4.743083893114651e-06, "loss": 0.2633, "step": 15471 }, { "epoch": 0.5309540150995196, "grad_norm": 0.7592943363236624, "learning_rate": 4.7425288898132355e-06, "loss": 0.2836, "step": 15472 }, { "epoch": 0.5309883321894303, "grad_norm": 0.8974366652667489, "learning_rate": 4.741973889692572e-06, "loss": 0.2905, "step": 15473 }, { "epoch": 0.5310226492793411, "grad_norm": 0.8134570607182051, "learning_rate": 4.741418892759523e-06, "loss": 0.2686, "step": 15474 }, { "epoch": 0.5310569663692519, "grad_norm": 0.8243566888759716, "learning_rate": 4.740863899020942e-06, "loss": 0.2734, "step": 15475 }, { "epoch": 0.5310912834591627, "grad_norm": 0.7904527488245204, "learning_rate": 4.740308908483683e-06, "loss": 0.3208, "step": 15476 }, { "epoch": 0.5311256005490734, "grad_norm": 0.82081862504202, "learning_rate": 4.739753921154606e-06, "loss": 0.3052, "step": 15477 }, { "epoch": 0.5311599176389842, "grad_norm": 0.7185979401319049, "learning_rate": 4.739198937040566e-06, "loss": 0.2863, "step": 15478 }, { "epoch": 0.531194234728895, "grad_norm": 0.7614320336580651, "learning_rate": 4.738643956148418e-06, "loss": 0.2478, "step": 15479 }, { "epoch": 0.5312285518188058, "grad_norm": 0.6770691350010737, "learning_rate": 4.738088978485022e-06, "loss": 0.2456, "step": 15480 }, { "epoch": 0.5312628689087165, "grad_norm": 0.7771899644939881, "learning_rate": 4.737534004057228e-06, "loss": 0.3074, "step": 15481 }, { "epoch": 0.5312971859986273, "grad_norm": 0.7821125487448695, "learning_rate": 4.7369790328718975e-06, "loss": 0.3194, "step": 15482 }, { "epoch": 0.531331503088538, "grad_norm": 0.8740320158004051, "learning_rate": 4.736424064935886e-06, "loss": 0.2846, "step": 15483 }, { "epoch": 0.5313658201784489, "grad_norm": 0.7967806231865576, "learning_rate": 4.735869100256044e-06, "loss": 0.2884, "step": 15484 }, { "epoch": 0.5314001372683597, "grad_norm": 0.6836533190190418, "learning_rate": 4.735314138839232e-06, "loss": 0.236, "step": 15485 }, { "epoch": 0.5314344543582704, "grad_norm": 0.7786835331979761, "learning_rate": 4.734759180692308e-06, "loss": 0.2706, "step": 15486 }, { "epoch": 0.5314687714481812, "grad_norm": 0.6854695890068543, "learning_rate": 4.7342042258221225e-06, "loss": 0.2866, "step": 15487 }, { "epoch": 0.5315030885380919, "grad_norm": 0.733242997461089, "learning_rate": 4.733649274235535e-06, "loss": 0.341, "step": 15488 }, { "epoch": 0.5315374056280028, "grad_norm": 0.8362179095715591, "learning_rate": 4.733094325939401e-06, "loss": 0.2788, "step": 15489 }, { "epoch": 0.5315717227179135, "grad_norm": 0.7552619176487194, "learning_rate": 4.732539380940573e-06, "loss": 0.3058, "step": 15490 }, { "epoch": 0.5316060398078243, "grad_norm": 0.7193246606691738, "learning_rate": 4.731984439245911e-06, "loss": 0.3148, "step": 15491 }, { "epoch": 0.531640356897735, "grad_norm": 0.7471366992996842, "learning_rate": 4.7314295008622685e-06, "loss": 0.2575, "step": 15492 }, { "epoch": 0.5316746739876459, "grad_norm": 0.9036727443212297, "learning_rate": 4.7308745657965e-06, "loss": 0.2873, "step": 15493 }, { "epoch": 0.5317089910775566, "grad_norm": 0.7426299318458109, "learning_rate": 4.730319634055465e-06, "loss": 0.2681, "step": 15494 }, { "epoch": 0.5317433081674674, "grad_norm": 0.8254637263215939, "learning_rate": 4.729764705646015e-06, "loss": 0.3024, "step": 15495 }, { "epoch": 0.5317776252573782, "grad_norm": 0.7277767667657801, "learning_rate": 4.7292097805750065e-06, "loss": 0.3179, "step": 15496 }, { "epoch": 0.5318119423472889, "grad_norm": 0.7788628950229871, "learning_rate": 4.728654858849297e-06, "loss": 0.2967, "step": 15497 }, { "epoch": 0.5318462594371998, "grad_norm": 0.7347881563261415, "learning_rate": 4.72809994047574e-06, "loss": 0.2449, "step": 15498 }, { "epoch": 0.5318805765271105, "grad_norm": 0.7748406761099775, "learning_rate": 4.727545025461189e-06, "loss": 0.2761, "step": 15499 }, { "epoch": 0.5319148936170213, "grad_norm": 0.7945979556505925, "learning_rate": 4.7269901138125035e-06, "loss": 0.2509, "step": 15500 }, { "epoch": 0.531949210706932, "grad_norm": 0.8142250664676621, "learning_rate": 4.726435205536538e-06, "loss": 0.3262, "step": 15501 }, { "epoch": 0.5319835277968429, "grad_norm": 0.7837752140612984, "learning_rate": 4.7258803006401435e-06, "loss": 0.2655, "step": 15502 }, { "epoch": 0.5320178448867536, "grad_norm": 0.6951847134984639, "learning_rate": 4.72532539913018e-06, "loss": 0.284, "step": 15503 }, { "epoch": 0.5320521619766644, "grad_norm": 0.7962483808685987, "learning_rate": 4.724770501013502e-06, "loss": 0.3408, "step": 15504 }, { "epoch": 0.5320864790665751, "grad_norm": 0.6654006172838058, "learning_rate": 4.724215606296962e-06, "loss": 0.2619, "step": 15505 }, { "epoch": 0.5321207961564859, "grad_norm": 0.7560019229668022, "learning_rate": 4.723660714987418e-06, "loss": 0.2901, "step": 15506 }, { "epoch": 0.5321551132463967, "grad_norm": 0.7132285586103436, "learning_rate": 4.7231058270917224e-06, "loss": 0.3116, "step": 15507 }, { "epoch": 0.5321894303363075, "grad_norm": 0.7769090748251947, "learning_rate": 4.722550942616731e-06, "loss": 0.2746, "step": 15508 }, { "epoch": 0.5322237474262183, "grad_norm": 0.800982532617026, "learning_rate": 4.721996061569301e-06, "loss": 0.2769, "step": 15509 }, { "epoch": 0.532258064516129, "grad_norm": 0.6430865279215194, "learning_rate": 4.721441183956285e-06, "loss": 0.2548, "step": 15510 }, { "epoch": 0.5322923816060398, "grad_norm": 0.6978093910493732, "learning_rate": 4.720886309784537e-06, "loss": 0.2782, "step": 15511 }, { "epoch": 0.5323266986959506, "grad_norm": 0.7482631909504969, "learning_rate": 4.720331439060916e-06, "loss": 0.274, "step": 15512 }, { "epoch": 0.5323610157858614, "grad_norm": 0.8319928620702232, "learning_rate": 4.7197765717922705e-06, "loss": 0.2526, "step": 15513 }, { "epoch": 0.5323953328757721, "grad_norm": 0.8249833299608814, "learning_rate": 4.7192217079854616e-06, "loss": 0.3262, "step": 15514 }, { "epoch": 0.5324296499656829, "grad_norm": 0.7131813262700313, "learning_rate": 4.71866684764734e-06, "loss": 0.2321, "step": 15515 }, { "epoch": 0.5324639670555937, "grad_norm": 0.8603741358726325, "learning_rate": 4.71811199078476e-06, "loss": 0.2876, "step": 15516 }, { "epoch": 0.5324982841455045, "grad_norm": 0.7126256230749207, "learning_rate": 4.71755713740458e-06, "loss": 0.289, "step": 15517 }, { "epoch": 0.5325326012354152, "grad_norm": 0.7523979221187596, "learning_rate": 4.717002287513653e-06, "loss": 0.3018, "step": 15518 }, { "epoch": 0.532566918325326, "grad_norm": 0.8138391685865008, "learning_rate": 4.71644744111883e-06, "loss": 0.2908, "step": 15519 }, { "epoch": 0.5326012354152367, "grad_norm": 0.6903465560572692, "learning_rate": 4.715892598226971e-06, "loss": 0.2419, "step": 15520 }, { "epoch": 0.5326355525051476, "grad_norm": 0.7411044656964715, "learning_rate": 4.715337758844926e-06, "loss": 0.2358, "step": 15521 }, { "epoch": 0.5326698695950584, "grad_norm": 0.6814778186630795, "learning_rate": 4.71478292297955e-06, "loss": 0.2516, "step": 15522 }, { "epoch": 0.5327041866849691, "grad_norm": 0.7633355188986698, "learning_rate": 4.7142280906377e-06, "loss": 0.2883, "step": 15523 }, { "epoch": 0.5327385037748799, "grad_norm": 0.7817039402424673, "learning_rate": 4.7136732618262296e-06, "loss": 0.2746, "step": 15524 }, { "epoch": 0.5327728208647907, "grad_norm": 0.8232516830695231, "learning_rate": 4.71311843655199e-06, "loss": 0.2783, "step": 15525 }, { "epoch": 0.5328071379547015, "grad_norm": 0.8207236866657028, "learning_rate": 4.71256361482184e-06, "loss": 0.2933, "step": 15526 }, { "epoch": 0.5328414550446122, "grad_norm": 0.7119752254557709, "learning_rate": 4.712008796642631e-06, "loss": 0.3069, "step": 15527 }, { "epoch": 0.532875772134523, "grad_norm": 0.7490279889994699, "learning_rate": 4.711453982021216e-06, "loss": 0.2906, "step": 15528 }, { "epoch": 0.5329100892244337, "grad_norm": 0.8705153550331765, "learning_rate": 4.7108991709644514e-06, "loss": 0.2888, "step": 15529 }, { "epoch": 0.5329444063143446, "grad_norm": 0.7792703884102904, "learning_rate": 4.710344363479192e-06, "loss": 0.3001, "step": 15530 }, { "epoch": 0.5329787234042553, "grad_norm": 0.9354435389561738, "learning_rate": 4.709789559572288e-06, "loss": 0.3059, "step": 15531 }, { "epoch": 0.5330130404941661, "grad_norm": 0.7466245982867185, "learning_rate": 4.7092347592505965e-06, "loss": 0.2805, "step": 15532 }, { "epoch": 0.5330473575840768, "grad_norm": 0.8047852794802168, "learning_rate": 4.708679962520971e-06, "loss": 0.2314, "step": 15533 }, { "epoch": 0.5330816746739876, "grad_norm": 0.7803043017208788, "learning_rate": 4.7081251693902645e-06, "loss": 0.2719, "step": 15534 }, { "epoch": 0.5331159917638985, "grad_norm": 0.7990501001670922, "learning_rate": 4.707570379865331e-06, "loss": 0.2584, "step": 15535 }, { "epoch": 0.5331503088538092, "grad_norm": 0.7925700236465999, "learning_rate": 4.707015593953026e-06, "loss": 0.2776, "step": 15536 }, { "epoch": 0.53318462594372, "grad_norm": 0.6327798788715736, "learning_rate": 4.7064608116601995e-06, "loss": 0.2599, "step": 15537 }, { "epoch": 0.5332189430336307, "grad_norm": 0.8161198750602188, "learning_rate": 4.7059060329937085e-06, "loss": 0.2528, "step": 15538 }, { "epoch": 0.5332532601235416, "grad_norm": 0.772326529642788, "learning_rate": 4.705351257960406e-06, "loss": 0.2461, "step": 15539 }, { "epoch": 0.5332875772134523, "grad_norm": 0.8221032734376535, "learning_rate": 4.704796486567144e-06, "loss": 0.3021, "step": 15540 }, { "epoch": 0.5333218943033631, "grad_norm": 0.8699119026664234, "learning_rate": 4.704241718820779e-06, "loss": 0.2941, "step": 15541 }, { "epoch": 0.5333562113932738, "grad_norm": 0.7358151713110005, "learning_rate": 4.703686954728161e-06, "loss": 0.2671, "step": 15542 }, { "epoch": 0.5333905284831846, "grad_norm": 0.823980858520432, "learning_rate": 4.703132194296146e-06, "loss": 0.3069, "step": 15543 }, { "epoch": 0.5334248455730954, "grad_norm": 0.8355951746186594, "learning_rate": 4.702577437531587e-06, "loss": 0.2842, "step": 15544 }, { "epoch": 0.5334591626630062, "grad_norm": 0.802652603760271, "learning_rate": 4.702022684441336e-06, "loss": 0.2311, "step": 15545 }, { "epoch": 0.5334934797529169, "grad_norm": 0.8164558079983363, "learning_rate": 4.7014679350322474e-06, "loss": 0.2764, "step": 15546 }, { "epoch": 0.5335277968428277, "grad_norm": 0.8541480937553216, "learning_rate": 4.700913189311176e-06, "loss": 0.3217, "step": 15547 }, { "epoch": 0.5335621139327386, "grad_norm": 0.7795679508609917, "learning_rate": 4.700358447284971e-06, "loss": 0.2676, "step": 15548 }, { "epoch": 0.5335964310226493, "grad_norm": 0.8242525404831428, "learning_rate": 4.69980370896049e-06, "loss": 0.3002, "step": 15549 }, { "epoch": 0.5336307481125601, "grad_norm": 0.6936740456548108, "learning_rate": 4.699248974344585e-06, "loss": 0.2439, "step": 15550 }, { "epoch": 0.5336650652024708, "grad_norm": 0.8235260820633921, "learning_rate": 4.698694243444105e-06, "loss": 0.2801, "step": 15551 }, { "epoch": 0.5336993822923816, "grad_norm": 1.0371018479458889, "learning_rate": 4.698139516265908e-06, "loss": 0.1896, "step": 15552 }, { "epoch": 0.5337336993822924, "grad_norm": 0.9229619639764628, "learning_rate": 4.697584792816847e-06, "loss": 0.3403, "step": 15553 }, { "epoch": 0.5337680164722032, "grad_norm": 0.842892932391533, "learning_rate": 4.69703007310377e-06, "loss": 0.2915, "step": 15554 }, { "epoch": 0.5338023335621139, "grad_norm": 0.7976290229705825, "learning_rate": 4.696475357133536e-06, "loss": 0.2383, "step": 15555 }, { "epoch": 0.5338366506520247, "grad_norm": 0.7833540533565343, "learning_rate": 4.695920644912995e-06, "loss": 0.3124, "step": 15556 }, { "epoch": 0.5338709677419354, "grad_norm": 0.6893200556339781, "learning_rate": 4.695365936448998e-06, "loss": 0.2881, "step": 15557 }, { "epoch": 0.5339052848318463, "grad_norm": 0.7638486604863384, "learning_rate": 4.694811231748401e-06, "loss": 0.2335, "step": 15558 }, { "epoch": 0.533939601921757, "grad_norm": 0.8017080077409298, "learning_rate": 4.694256530818056e-06, "loss": 0.323, "step": 15559 }, { "epoch": 0.5339739190116678, "grad_norm": 0.7762105011557161, "learning_rate": 4.693701833664812e-06, "loss": 0.2313, "step": 15560 }, { "epoch": 0.5340082361015785, "grad_norm": 0.7294807191296665, "learning_rate": 4.6931471402955284e-06, "loss": 0.2515, "step": 15561 }, { "epoch": 0.5340425531914894, "grad_norm": 0.7887034673091682, "learning_rate": 4.692592450717053e-06, "loss": 0.2791, "step": 15562 }, { "epoch": 0.5340768702814002, "grad_norm": 0.7885202819286145, "learning_rate": 4.692037764936238e-06, "loss": 0.2597, "step": 15563 }, { "epoch": 0.5341111873713109, "grad_norm": 0.8097479530221668, "learning_rate": 4.691483082959939e-06, "loss": 0.3048, "step": 15564 }, { "epoch": 0.5341455044612217, "grad_norm": 0.7745446030921049, "learning_rate": 4.690928404795007e-06, "loss": 0.242, "step": 15565 }, { "epoch": 0.5341798215511324, "grad_norm": 0.7494230559747228, "learning_rate": 4.690373730448293e-06, "loss": 0.2681, "step": 15566 }, { "epoch": 0.5342141386410433, "grad_norm": 0.828997278172082, "learning_rate": 4.689819059926651e-06, "loss": 0.3659, "step": 15567 }, { "epoch": 0.534248455730954, "grad_norm": 0.7308911530660858, "learning_rate": 4.689264393236934e-06, "loss": 0.288, "step": 15568 }, { "epoch": 0.5342827728208648, "grad_norm": 0.7052593234942441, "learning_rate": 4.68870973038599e-06, "loss": 0.2391, "step": 15569 }, { "epoch": 0.5343170899107755, "grad_norm": 0.9626749401905377, "learning_rate": 4.688155071380678e-06, "loss": 0.2898, "step": 15570 }, { "epoch": 0.5343514070006864, "grad_norm": 0.7517570558445996, "learning_rate": 4.687600416227846e-06, "loss": 0.2761, "step": 15571 }, { "epoch": 0.5343857240905971, "grad_norm": 0.802034843622396, "learning_rate": 4.687045764934344e-06, "loss": 0.3191, "step": 15572 }, { "epoch": 0.5344200411805079, "grad_norm": 0.805447941343955, "learning_rate": 4.6864911175070295e-06, "loss": 0.3031, "step": 15573 }, { "epoch": 0.5344543582704187, "grad_norm": 0.7098540112741359, "learning_rate": 4.685936473952749e-06, "loss": 0.2622, "step": 15574 }, { "epoch": 0.5344886753603294, "grad_norm": 0.8462689419773746, "learning_rate": 4.685381834278359e-06, "loss": 0.2988, "step": 15575 }, { "epoch": 0.5345229924502403, "grad_norm": 0.7286242816297551, "learning_rate": 4.68482719849071e-06, "loss": 0.2336, "step": 15576 }, { "epoch": 0.534557309540151, "grad_norm": 0.8097891913292518, "learning_rate": 4.684272566596652e-06, "loss": 0.2488, "step": 15577 }, { "epoch": 0.5345916266300618, "grad_norm": 0.7902585013619485, "learning_rate": 4.68371793860304e-06, "loss": 0.2845, "step": 15578 }, { "epoch": 0.5346259437199725, "grad_norm": 0.7882889893733803, "learning_rate": 4.683163314516724e-06, "loss": 0.2551, "step": 15579 }, { "epoch": 0.5346602608098833, "grad_norm": 0.7240444115491234, "learning_rate": 4.682608694344555e-06, "loss": 0.2789, "step": 15580 }, { "epoch": 0.5346945778997941, "grad_norm": 0.7613064559572259, "learning_rate": 4.682054078093386e-06, "loss": 0.3293, "step": 15581 }, { "epoch": 0.5347288949897049, "grad_norm": 0.7909862371999858, "learning_rate": 4.681499465770067e-06, "loss": 0.2916, "step": 15582 }, { "epoch": 0.5347632120796156, "grad_norm": 0.7369089412069918, "learning_rate": 4.680944857381451e-06, "loss": 0.2342, "step": 15583 }, { "epoch": 0.5347975291695264, "grad_norm": 0.7818874868400213, "learning_rate": 4.680390252934391e-06, "loss": 0.2772, "step": 15584 }, { "epoch": 0.5348318462594372, "grad_norm": 0.7890711234893064, "learning_rate": 4.679835652435737e-06, "loss": 0.2647, "step": 15585 }, { "epoch": 0.534866163349348, "grad_norm": 0.7354700692407447, "learning_rate": 4.679281055892337e-06, "loss": 0.2938, "step": 15586 }, { "epoch": 0.5349004804392588, "grad_norm": 0.676621439510081, "learning_rate": 4.678726463311049e-06, "loss": 0.2927, "step": 15587 }, { "epoch": 0.5349347975291695, "grad_norm": 0.7804385577502723, "learning_rate": 4.678171874698719e-06, "loss": 0.2841, "step": 15588 }, { "epoch": 0.5349691146190803, "grad_norm": 0.745282980595003, "learning_rate": 4.677617290062202e-06, "loss": 0.2893, "step": 15589 }, { "epoch": 0.5350034317089911, "grad_norm": 0.6531687568131693, "learning_rate": 4.677062709408345e-06, "loss": 0.2244, "step": 15590 }, { "epoch": 0.5350377487989019, "grad_norm": 0.727501653045266, "learning_rate": 4.676508132744004e-06, "loss": 0.2562, "step": 15591 }, { "epoch": 0.5350720658888126, "grad_norm": 0.8356045815554597, "learning_rate": 4.675953560076026e-06, "loss": 0.2668, "step": 15592 }, { "epoch": 0.5351063829787234, "grad_norm": 0.802135924436506, "learning_rate": 4.675398991411266e-06, "loss": 0.3473, "step": 15593 }, { "epoch": 0.5351407000686342, "grad_norm": 0.8429197267261452, "learning_rate": 4.674844426756573e-06, "loss": 0.257, "step": 15594 }, { "epoch": 0.535175017158545, "grad_norm": 0.8883057338587322, "learning_rate": 4.674289866118796e-06, "loss": 0.2725, "step": 15595 }, { "epoch": 0.5352093342484557, "grad_norm": 0.7310168060349698, "learning_rate": 4.673735309504789e-06, "loss": 0.3023, "step": 15596 }, { "epoch": 0.5352436513383665, "grad_norm": 0.7498352388567198, "learning_rate": 4.673180756921402e-06, "loss": 0.2797, "step": 15597 }, { "epoch": 0.5352779684282772, "grad_norm": 0.7194192293800719, "learning_rate": 4.672626208375485e-06, "loss": 0.2906, "step": 15598 }, { "epoch": 0.5353122855181881, "grad_norm": 0.8082582622620462, "learning_rate": 4.672071663873891e-06, "loss": 0.3512, "step": 15599 }, { "epoch": 0.5353466026080989, "grad_norm": 0.859182891928249, "learning_rate": 4.671517123423469e-06, "loss": 0.3006, "step": 15600 }, { "epoch": 0.5353809196980096, "grad_norm": 0.801810901573227, "learning_rate": 4.670962587031067e-06, "loss": 0.293, "step": 15601 }, { "epoch": 0.5354152367879204, "grad_norm": 0.736920564429264, "learning_rate": 4.670408054703542e-06, "loss": 0.2624, "step": 15602 }, { "epoch": 0.5354495538778311, "grad_norm": 0.7652280572011, "learning_rate": 4.669853526447741e-06, "loss": 0.2705, "step": 15603 }, { "epoch": 0.535483870967742, "grad_norm": 0.9141623420465559, "learning_rate": 4.669299002270512e-06, "loss": 0.28, "step": 15604 }, { "epoch": 0.5355181880576527, "grad_norm": 0.7086568737129751, "learning_rate": 4.668744482178711e-06, "loss": 0.2645, "step": 15605 }, { "epoch": 0.5355525051475635, "grad_norm": 0.836409435377765, "learning_rate": 4.668189966179184e-06, "loss": 0.3194, "step": 15606 }, { "epoch": 0.5355868222374742, "grad_norm": 0.6988800434387754, "learning_rate": 4.667635454278785e-06, "loss": 0.2762, "step": 15607 }, { "epoch": 0.5356211393273851, "grad_norm": 0.7396603277330498, "learning_rate": 4.667080946484362e-06, "loss": 0.2611, "step": 15608 }, { "epoch": 0.5356554564172958, "grad_norm": 0.8043042160837154, "learning_rate": 4.666526442802764e-06, "loss": 0.2657, "step": 15609 }, { "epoch": 0.5356897735072066, "grad_norm": 0.6817384663546714, "learning_rate": 4.665971943240845e-06, "loss": 0.2414, "step": 15610 }, { "epoch": 0.5357240905971173, "grad_norm": 0.8306786559740585, "learning_rate": 4.665417447805453e-06, "loss": 0.3254, "step": 15611 }, { "epoch": 0.5357584076870281, "grad_norm": 0.7581458944608789, "learning_rate": 4.664862956503437e-06, "loss": 0.2719, "step": 15612 }, { "epoch": 0.535792724776939, "grad_norm": 0.7526390332166961, "learning_rate": 4.664308469341649e-06, "loss": 0.271, "step": 15613 }, { "epoch": 0.5358270418668497, "grad_norm": 0.9457665624484419, "learning_rate": 4.66375398632694e-06, "loss": 0.2522, "step": 15614 }, { "epoch": 0.5358613589567605, "grad_norm": 0.7777749976142556, "learning_rate": 4.663199507466156e-06, "loss": 0.2613, "step": 15615 }, { "epoch": 0.5358956760466712, "grad_norm": 0.8356878986291367, "learning_rate": 4.662645032766151e-06, "loss": 0.3624, "step": 15616 }, { "epoch": 0.5359299931365821, "grad_norm": 0.8445786134760889, "learning_rate": 4.6620905622337735e-06, "loss": 0.3015, "step": 15617 }, { "epoch": 0.5359643102264928, "grad_norm": 0.8020869908840195, "learning_rate": 4.661536095875871e-06, "loss": 0.321, "step": 15618 }, { "epoch": 0.5359986273164036, "grad_norm": 0.8198209595984307, "learning_rate": 4.660981633699297e-06, "loss": 0.3004, "step": 15619 }, { "epoch": 0.5360329444063143, "grad_norm": 0.8280689156825881, "learning_rate": 4.660427175710901e-06, "loss": 0.293, "step": 15620 }, { "epoch": 0.5360672614962251, "grad_norm": 0.8164950871564186, "learning_rate": 4.659872721917529e-06, "loss": 0.3063, "step": 15621 }, { "epoch": 0.5361015785861359, "grad_norm": 0.7575537523081148, "learning_rate": 4.659318272326035e-06, "loss": 0.2975, "step": 15622 }, { "epoch": 0.5361358956760467, "grad_norm": 0.8601046262233023, "learning_rate": 4.658763826943265e-06, "loss": 0.3039, "step": 15623 }, { "epoch": 0.5361702127659574, "grad_norm": 0.8576386217420773, "learning_rate": 4.65820938577607e-06, "loss": 0.2773, "step": 15624 }, { "epoch": 0.5362045298558682, "grad_norm": 0.775426514053829, "learning_rate": 4.657654948831301e-06, "loss": 0.2758, "step": 15625 }, { "epoch": 0.536238846945779, "grad_norm": 0.7462830565860857, "learning_rate": 4.657100516115805e-06, "loss": 0.2762, "step": 15626 }, { "epoch": 0.5362731640356898, "grad_norm": 0.8123609927767073, "learning_rate": 4.656546087636431e-06, "loss": 0.2927, "step": 15627 }, { "epoch": 0.5363074811256006, "grad_norm": 0.7836300785903447, "learning_rate": 4.6559916634000315e-06, "loss": 0.2358, "step": 15628 }, { "epoch": 0.5363417982155113, "grad_norm": 0.7161452069387589, "learning_rate": 4.655437243413455e-06, "loss": 0.2944, "step": 15629 }, { "epoch": 0.5363761153054221, "grad_norm": 0.781890011327683, "learning_rate": 4.654882827683547e-06, "loss": 0.2983, "step": 15630 }, { "epoch": 0.5364104323953329, "grad_norm": 0.821988489621053, "learning_rate": 4.654328416217161e-06, "loss": 0.2293, "step": 15631 }, { "epoch": 0.5364447494852437, "grad_norm": 0.7806643281443649, "learning_rate": 4.653774009021144e-06, "loss": 0.2819, "step": 15632 }, { "epoch": 0.5364790665751544, "grad_norm": 0.6789287772643862, "learning_rate": 4.653219606102345e-06, "loss": 0.2747, "step": 15633 }, { "epoch": 0.5365133836650652, "grad_norm": 0.74420468222931, "learning_rate": 4.652665207467613e-06, "loss": 0.2394, "step": 15634 }, { "epoch": 0.5365477007549759, "grad_norm": 0.7893911919383739, "learning_rate": 4.6521108131238e-06, "loss": 0.333, "step": 15635 }, { "epoch": 0.5365820178448868, "grad_norm": 0.7555750868060576, "learning_rate": 4.651556423077749e-06, "loss": 0.3312, "step": 15636 }, { "epoch": 0.5366163349347975, "grad_norm": 0.7392190537851185, "learning_rate": 4.651002037336314e-06, "loss": 0.2804, "step": 15637 }, { "epoch": 0.5366506520247083, "grad_norm": 0.7546542548181134, "learning_rate": 4.650447655906341e-06, "loss": 0.2339, "step": 15638 }, { "epoch": 0.536684969114619, "grad_norm": 0.8423026604439205, "learning_rate": 4.64989327879468e-06, "loss": 0.3227, "step": 15639 }, { "epoch": 0.5367192862045299, "grad_norm": 0.7331979484582598, "learning_rate": 4.64933890600818e-06, "loss": 0.2628, "step": 15640 }, { "epoch": 0.5367536032944407, "grad_norm": 0.7608744329516409, "learning_rate": 4.648784537553688e-06, "loss": 0.2941, "step": 15641 }, { "epoch": 0.5367879203843514, "grad_norm": 0.7008536062263462, "learning_rate": 4.648230173438054e-06, "loss": 0.2462, "step": 15642 }, { "epoch": 0.5368222374742622, "grad_norm": 0.7181117949185667, "learning_rate": 4.6476758136681275e-06, "loss": 0.2396, "step": 15643 }, { "epoch": 0.5368565545641729, "grad_norm": 0.7845443363693911, "learning_rate": 4.647121458250753e-06, "loss": 0.312, "step": 15644 }, { "epoch": 0.5368908716540838, "grad_norm": 0.7132208783529108, "learning_rate": 4.646567107192783e-06, "loss": 0.2691, "step": 15645 }, { "epoch": 0.5369251887439945, "grad_norm": 0.8053239997516511, "learning_rate": 4.6460127605010655e-06, "loss": 0.2539, "step": 15646 }, { "epoch": 0.5369595058339053, "grad_norm": 0.7811702867768677, "learning_rate": 4.6454584181824445e-06, "loss": 0.2815, "step": 15647 }, { "epoch": 0.536993822923816, "grad_norm": 0.8055060807934513, "learning_rate": 4.644904080243774e-06, "loss": 0.2905, "step": 15648 }, { "epoch": 0.5370281400137268, "grad_norm": 0.8246010244389936, "learning_rate": 4.644349746691898e-06, "loss": 0.2684, "step": 15649 }, { "epoch": 0.5370624571036376, "grad_norm": 0.8755684349010594, "learning_rate": 4.6437954175336664e-06, "loss": 0.2506, "step": 15650 }, { "epoch": 0.5370967741935484, "grad_norm": 0.7599842039093496, "learning_rate": 4.643241092775928e-06, "loss": 0.282, "step": 15651 }, { "epoch": 0.5371310912834591, "grad_norm": 0.8744504719033652, "learning_rate": 4.642686772425531e-06, "loss": 0.3004, "step": 15652 }, { "epoch": 0.5371654083733699, "grad_norm": 0.725985345220352, "learning_rate": 4.642132456489319e-06, "loss": 0.2831, "step": 15653 }, { "epoch": 0.5371997254632808, "grad_norm": 0.7064234923780177, "learning_rate": 4.641578144974146e-06, "loss": 0.2516, "step": 15654 }, { "epoch": 0.5372340425531915, "grad_norm": 0.894520363634624, "learning_rate": 4.641023837886856e-06, "loss": 0.2873, "step": 15655 }, { "epoch": 0.5372683596431023, "grad_norm": 0.8091766089075142, "learning_rate": 4.640469535234299e-06, "loss": 0.284, "step": 15656 }, { "epoch": 0.537302676733013, "grad_norm": 0.7025859268808319, "learning_rate": 4.639915237023321e-06, "loss": 0.2566, "step": 15657 }, { "epoch": 0.5373369938229238, "grad_norm": 0.7854520124102249, "learning_rate": 4.639360943260771e-06, "loss": 0.2948, "step": 15658 }, { "epoch": 0.5373713109128346, "grad_norm": 0.8047306224661634, "learning_rate": 4.638806653953495e-06, "loss": 0.3176, "step": 15659 }, { "epoch": 0.5374056280027454, "grad_norm": 0.7117439601174937, "learning_rate": 4.638252369108343e-06, "loss": 0.3029, "step": 15660 }, { "epoch": 0.5374399450926561, "grad_norm": 0.7482103686615245, "learning_rate": 4.63769808873216e-06, "loss": 0.2597, "step": 15661 }, { "epoch": 0.5374742621825669, "grad_norm": 0.7579420509299633, "learning_rate": 4.637143812831794e-06, "loss": 0.2581, "step": 15662 }, { "epoch": 0.5375085792724777, "grad_norm": 0.8207891687717767, "learning_rate": 4.636589541414094e-06, "loss": 0.3088, "step": 15663 }, { "epoch": 0.5375428963623885, "grad_norm": 0.7429782835213171, "learning_rate": 4.636035274485908e-06, "loss": 0.288, "step": 15664 }, { "epoch": 0.5375772134522993, "grad_norm": 0.7515784755436792, "learning_rate": 4.63548101205408e-06, "loss": 0.2666, "step": 15665 }, { "epoch": 0.53761153054221, "grad_norm": 0.75858150581516, "learning_rate": 4.63492675412546e-06, "loss": 0.2733, "step": 15666 }, { "epoch": 0.5376458476321208, "grad_norm": 0.8322780623599384, "learning_rate": 4.634372500706895e-06, "loss": 0.2858, "step": 15667 }, { "epoch": 0.5376801647220316, "grad_norm": 0.7608962408985516, "learning_rate": 4.633818251805229e-06, "loss": 0.3046, "step": 15668 }, { "epoch": 0.5377144818119424, "grad_norm": 0.806132081790386, "learning_rate": 4.633264007427314e-06, "loss": 0.3268, "step": 15669 }, { "epoch": 0.5377487989018531, "grad_norm": 0.7660528425015052, "learning_rate": 4.632709767579992e-06, "loss": 0.2703, "step": 15670 }, { "epoch": 0.5377831159917639, "grad_norm": 0.7467246467668522, "learning_rate": 4.632155532270115e-06, "loss": 0.2533, "step": 15671 }, { "epoch": 0.5378174330816746, "grad_norm": 0.8375985547569178, "learning_rate": 4.631601301504528e-06, "loss": 0.2777, "step": 15672 }, { "epoch": 0.5378517501715855, "grad_norm": 0.7398315816064149, "learning_rate": 4.631047075290075e-06, "loss": 0.2865, "step": 15673 }, { "epoch": 0.5378860672614962, "grad_norm": 0.8850051432198066, "learning_rate": 4.630492853633607e-06, "loss": 0.2666, "step": 15674 }, { "epoch": 0.537920384351407, "grad_norm": 0.8792138456079146, "learning_rate": 4.629938636541969e-06, "loss": 0.2961, "step": 15675 }, { "epoch": 0.5379547014413177, "grad_norm": 0.7175517637344522, "learning_rate": 4.629384424022006e-06, "loss": 0.2155, "step": 15676 }, { "epoch": 0.5379890185312286, "grad_norm": 0.7865440938406424, "learning_rate": 4.628830216080569e-06, "loss": 0.2723, "step": 15677 }, { "epoch": 0.5380233356211394, "grad_norm": 0.813185009266405, "learning_rate": 4.6282760127244995e-06, "loss": 0.2745, "step": 15678 }, { "epoch": 0.5380576527110501, "grad_norm": 0.7276315385162176, "learning_rate": 4.627721813960646e-06, "loss": 0.3269, "step": 15679 }, { "epoch": 0.5380919698009609, "grad_norm": 0.8097724280364946, "learning_rate": 4.6271676197958585e-06, "loss": 0.3225, "step": 15680 }, { "epoch": 0.5381262868908716, "grad_norm": 0.745911388928391, "learning_rate": 4.62661343023698e-06, "loss": 0.2646, "step": 15681 }, { "epoch": 0.5381606039807825, "grad_norm": 0.7760024970536845, "learning_rate": 4.626059245290854e-06, "loss": 0.2735, "step": 15682 }, { "epoch": 0.5381949210706932, "grad_norm": 0.8731207800116648, "learning_rate": 4.625505064964333e-06, "loss": 0.2473, "step": 15683 }, { "epoch": 0.538229238160604, "grad_norm": 0.7695089902166258, "learning_rate": 4.6249508892642605e-06, "loss": 0.3011, "step": 15684 }, { "epoch": 0.5382635552505147, "grad_norm": 0.7890973784740285, "learning_rate": 4.62439671819748e-06, "loss": 0.2695, "step": 15685 }, { "epoch": 0.5382978723404256, "grad_norm": 0.8359761944442277, "learning_rate": 4.623842551770841e-06, "loss": 0.2825, "step": 15686 }, { "epoch": 0.5383321894303363, "grad_norm": 0.8264194063845223, "learning_rate": 4.623288389991191e-06, "loss": 0.3092, "step": 15687 }, { "epoch": 0.5383665065202471, "grad_norm": 0.9091698791615477, "learning_rate": 4.62273423286537e-06, "loss": 0.3143, "step": 15688 }, { "epoch": 0.5384008236101578, "grad_norm": 0.7236585476995925, "learning_rate": 4.622180080400231e-06, "loss": 0.2575, "step": 15689 }, { "epoch": 0.5384351407000686, "grad_norm": 0.7263450624007274, "learning_rate": 4.621625932602616e-06, "loss": 0.2979, "step": 15690 }, { "epoch": 0.5384694577899795, "grad_norm": 0.7311103345209634, "learning_rate": 4.6210717894793696e-06, "loss": 0.2735, "step": 15691 }, { "epoch": 0.5385037748798902, "grad_norm": 0.8084190468152996, "learning_rate": 4.620517651037341e-06, "loss": 0.2737, "step": 15692 }, { "epoch": 0.538538091969801, "grad_norm": 0.7577465811978739, "learning_rate": 4.619963517283374e-06, "loss": 0.2279, "step": 15693 }, { "epoch": 0.5385724090597117, "grad_norm": 0.7431676507810042, "learning_rate": 4.619409388224314e-06, "loss": 0.2738, "step": 15694 }, { "epoch": 0.5386067261496225, "grad_norm": 0.7733740873656613, "learning_rate": 4.6188552638670085e-06, "loss": 0.2882, "step": 15695 }, { "epoch": 0.5386410432395333, "grad_norm": 0.7839982087713834, "learning_rate": 4.618301144218302e-06, "loss": 0.3098, "step": 15696 }, { "epoch": 0.5386753603294441, "grad_norm": 0.7617631835156561, "learning_rate": 4.617747029285038e-06, "loss": 0.3237, "step": 15697 }, { "epoch": 0.5387096774193548, "grad_norm": 0.7900936643674359, "learning_rate": 4.617192919074065e-06, "loss": 0.2935, "step": 15698 }, { "epoch": 0.5387439945092656, "grad_norm": 0.7947940394598371, "learning_rate": 4.616638813592226e-06, "loss": 0.3309, "step": 15699 }, { "epoch": 0.5387783115991764, "grad_norm": 0.8583873119157119, "learning_rate": 4.61608471284637e-06, "loss": 0.3101, "step": 15700 }, { "epoch": 0.5388126286890872, "grad_norm": 0.8700188136831581, "learning_rate": 4.6155306168433375e-06, "loss": 0.2815, "step": 15701 }, { "epoch": 0.5388469457789979, "grad_norm": 0.798825713845326, "learning_rate": 4.6149765255899755e-06, "loss": 0.2669, "step": 15702 }, { "epoch": 0.5388812628689087, "grad_norm": 0.7339161797783348, "learning_rate": 4.614422439093132e-06, "loss": 0.2838, "step": 15703 }, { "epoch": 0.5389155799588194, "grad_norm": 0.8322899668263899, "learning_rate": 4.61386835735965e-06, "loss": 0.3209, "step": 15704 }, { "epoch": 0.5389498970487303, "grad_norm": 0.8089302757774258, "learning_rate": 4.613314280396371e-06, "loss": 0.2865, "step": 15705 }, { "epoch": 0.5389842141386411, "grad_norm": 0.7697141703334465, "learning_rate": 4.612760208210146e-06, "loss": 0.3341, "step": 15706 }, { "epoch": 0.5390185312285518, "grad_norm": 0.789158518203092, "learning_rate": 4.6122061408078165e-06, "loss": 0.336, "step": 15707 }, { "epoch": 0.5390528483184626, "grad_norm": 0.7079544498186653, "learning_rate": 4.611652078196227e-06, "loss": 0.284, "step": 15708 }, { "epoch": 0.5390871654083733, "grad_norm": 0.8174092418771282, "learning_rate": 4.611098020382225e-06, "loss": 0.2864, "step": 15709 }, { "epoch": 0.5391214824982842, "grad_norm": 0.6429342766985598, "learning_rate": 4.610543967372653e-06, "loss": 0.248, "step": 15710 }, { "epoch": 0.5391557995881949, "grad_norm": 0.8663882404341559, "learning_rate": 4.609989919174354e-06, "loss": 0.2866, "step": 15711 }, { "epoch": 0.5391901166781057, "grad_norm": 0.7917936751519354, "learning_rate": 4.609435875794179e-06, "loss": 0.3066, "step": 15712 }, { "epoch": 0.5392244337680164, "grad_norm": 0.8069340373904197, "learning_rate": 4.608881837238967e-06, "loss": 0.2569, "step": 15713 }, { "epoch": 0.5392587508579273, "grad_norm": 0.6921577321418656, "learning_rate": 4.608327803515562e-06, "loss": 0.2646, "step": 15714 }, { "epoch": 0.539293067947838, "grad_norm": 0.7502457852998393, "learning_rate": 4.60777377463081e-06, "loss": 0.2726, "step": 15715 }, { "epoch": 0.5393273850377488, "grad_norm": 0.7741715602622032, "learning_rate": 4.607219750591558e-06, "loss": 0.2743, "step": 15716 }, { "epoch": 0.5393617021276595, "grad_norm": 0.7914118967652728, "learning_rate": 4.606665731404647e-06, "loss": 0.2928, "step": 15717 }, { "epoch": 0.5393960192175703, "grad_norm": 0.7343926335184391, "learning_rate": 4.606111717076922e-06, "loss": 0.2616, "step": 15718 }, { "epoch": 0.5394303363074812, "grad_norm": 0.7667827706783604, "learning_rate": 4.60555770761523e-06, "loss": 0.2584, "step": 15719 }, { "epoch": 0.5394646533973919, "grad_norm": 0.7432208385152453, "learning_rate": 4.605003703026409e-06, "loss": 0.2852, "step": 15720 }, { "epoch": 0.5394989704873027, "grad_norm": 0.7252495063018882, "learning_rate": 4.6044497033173094e-06, "loss": 0.2676, "step": 15721 }, { "epoch": 0.5395332875772134, "grad_norm": 0.817179087776228, "learning_rate": 4.603895708494771e-06, "loss": 0.2421, "step": 15722 }, { "epoch": 0.5395676046671243, "grad_norm": 0.8627334756761238, "learning_rate": 4.60334171856564e-06, "loss": 0.2708, "step": 15723 }, { "epoch": 0.539601921757035, "grad_norm": 0.7773985122268596, "learning_rate": 4.6027877335367585e-06, "loss": 0.349, "step": 15724 }, { "epoch": 0.5396362388469458, "grad_norm": 0.8161489557133644, "learning_rate": 4.602233753414974e-06, "loss": 0.3162, "step": 15725 }, { "epoch": 0.5396705559368565, "grad_norm": 0.7634133021297868, "learning_rate": 4.601679778207125e-06, "loss": 0.2278, "step": 15726 }, { "epoch": 0.5397048730267673, "grad_norm": 0.8333348152858457, "learning_rate": 4.601125807920059e-06, "loss": 0.2763, "step": 15727 }, { "epoch": 0.5397391901166781, "grad_norm": 0.7409014191268063, "learning_rate": 4.6005718425606195e-06, "loss": 0.2776, "step": 15728 }, { "epoch": 0.5397735072065889, "grad_norm": 0.8034836658527097, "learning_rate": 4.6000178821356475e-06, "loss": 0.2792, "step": 15729 }, { "epoch": 0.5398078242964996, "grad_norm": 0.8933179374157811, "learning_rate": 4.599463926651989e-06, "loss": 0.269, "step": 15730 }, { "epoch": 0.5398421413864104, "grad_norm": 0.6917030258162966, "learning_rate": 4.598909976116486e-06, "loss": 0.2346, "step": 15731 }, { "epoch": 0.5398764584763212, "grad_norm": 0.7900410238779088, "learning_rate": 4.598356030535984e-06, "loss": 0.3021, "step": 15732 }, { "epoch": 0.539910775566232, "grad_norm": 0.7000812433513582, "learning_rate": 4.5978020899173245e-06, "loss": 0.2415, "step": 15733 }, { "epoch": 0.5399450926561428, "grad_norm": 0.6955010932392071, "learning_rate": 4.5972481542673505e-06, "loss": 0.2793, "step": 15734 }, { "epoch": 0.5399794097460535, "grad_norm": 0.7209654442761418, "learning_rate": 4.596694223592907e-06, "loss": 0.2309, "step": 15735 }, { "epoch": 0.5400137268359643, "grad_norm": 0.8317423107341532, "learning_rate": 4.596140297900837e-06, "loss": 0.2705, "step": 15736 }, { "epoch": 0.5400480439258751, "grad_norm": 0.7513223758773664, "learning_rate": 4.59558637719798e-06, "loss": 0.2645, "step": 15737 }, { "epoch": 0.5400823610157859, "grad_norm": 0.9016801824001165, "learning_rate": 4.595032461491183e-06, "loss": 0.3249, "step": 15738 }, { "epoch": 0.5401166781056966, "grad_norm": 0.7973534123496936, "learning_rate": 4.594478550787289e-06, "loss": 0.2501, "step": 15739 }, { "epoch": 0.5401509951956074, "grad_norm": 0.7860578036296471, "learning_rate": 4.593924645093138e-06, "loss": 0.3157, "step": 15740 }, { "epoch": 0.5401853122855181, "grad_norm": 0.7833623127073929, "learning_rate": 4.593370744415576e-06, "loss": 0.2626, "step": 15741 }, { "epoch": 0.540219629375429, "grad_norm": 0.7798258347632685, "learning_rate": 4.592816848761445e-06, "loss": 0.2899, "step": 15742 }, { "epoch": 0.5402539464653398, "grad_norm": 0.7528354660886293, "learning_rate": 4.592262958137584e-06, "loss": 0.2864, "step": 15743 }, { "epoch": 0.5402882635552505, "grad_norm": 0.9370877590959594, "learning_rate": 4.591709072550842e-06, "loss": 0.3314, "step": 15744 }, { "epoch": 0.5403225806451613, "grad_norm": 0.8090467873595749, "learning_rate": 4.591155192008056e-06, "loss": 0.2398, "step": 15745 }, { "epoch": 0.5403568977350721, "grad_norm": 0.70267855808235, "learning_rate": 4.590601316516072e-06, "loss": 0.2574, "step": 15746 }, { "epoch": 0.5403912148249829, "grad_norm": 0.8068634940456927, "learning_rate": 4.5900474460817315e-06, "loss": 0.2987, "step": 15747 }, { "epoch": 0.5404255319148936, "grad_norm": 0.8300494901438396, "learning_rate": 4.589493580711878e-06, "loss": 0.3227, "step": 15748 }, { "epoch": 0.5404598490048044, "grad_norm": 0.686272772328448, "learning_rate": 4.58893972041335e-06, "loss": 0.2865, "step": 15749 }, { "epoch": 0.5404941660947151, "grad_norm": 0.7982770704205423, "learning_rate": 4.5883858651929955e-06, "loss": 0.3402, "step": 15750 }, { "epoch": 0.540528483184626, "grad_norm": 0.7722865937974807, "learning_rate": 4.587832015057652e-06, "loss": 0.3018, "step": 15751 }, { "epoch": 0.5405628002745367, "grad_norm": 0.6635240946932477, "learning_rate": 4.587278170014163e-06, "loss": 0.2493, "step": 15752 }, { "epoch": 0.5405971173644475, "grad_norm": 0.7341812278666016, "learning_rate": 4.586724330069372e-06, "loss": 0.2635, "step": 15753 }, { "epoch": 0.5406314344543582, "grad_norm": 0.7782978934953292, "learning_rate": 4.58617049523012e-06, "loss": 0.2715, "step": 15754 }, { "epoch": 0.540665751544269, "grad_norm": 0.7242357901692852, "learning_rate": 4.585616665503247e-06, "loss": 0.2763, "step": 15755 }, { "epoch": 0.5407000686341799, "grad_norm": 0.8018730974104923, "learning_rate": 4.5850628408955995e-06, "loss": 0.2772, "step": 15756 }, { "epoch": 0.5407343857240906, "grad_norm": 0.712968094465905, "learning_rate": 4.584509021414017e-06, "loss": 0.2912, "step": 15757 }, { "epoch": 0.5407687028140014, "grad_norm": 0.8208728590140751, "learning_rate": 4.58395520706534e-06, "loss": 0.2401, "step": 15758 }, { "epoch": 0.5408030199039121, "grad_norm": 0.7120031549848502, "learning_rate": 4.583401397856412e-06, "loss": 0.2319, "step": 15759 }, { "epoch": 0.540837336993823, "grad_norm": 0.6998990819066848, "learning_rate": 4.582847593794073e-06, "loss": 0.2658, "step": 15760 }, { "epoch": 0.5408716540837337, "grad_norm": 0.8908033026917808, "learning_rate": 4.582293794885165e-06, "loss": 0.3146, "step": 15761 }, { "epoch": 0.5409059711736445, "grad_norm": 0.6632020590555608, "learning_rate": 4.581740001136533e-06, "loss": 0.3348, "step": 15762 }, { "epoch": 0.5409402882635552, "grad_norm": 0.8431175196525026, "learning_rate": 4.581186212555012e-06, "loss": 0.3131, "step": 15763 }, { "epoch": 0.540974605353466, "grad_norm": 0.7223298065029087, "learning_rate": 4.58063242914745e-06, "loss": 0.2605, "step": 15764 }, { "epoch": 0.5410089224433768, "grad_norm": 0.8354842630975698, "learning_rate": 4.580078650920686e-06, "loss": 0.3137, "step": 15765 }, { "epoch": 0.5410432395332876, "grad_norm": 0.7641797720100355, "learning_rate": 4.5795248778815575e-06, "loss": 0.2739, "step": 15766 }, { "epoch": 0.5410775566231983, "grad_norm": 0.7644355345058651, "learning_rate": 4.578971110036911e-06, "loss": 0.2661, "step": 15767 }, { "epoch": 0.5411118737131091, "grad_norm": 0.76962179385998, "learning_rate": 4.578417347393585e-06, "loss": 0.2425, "step": 15768 }, { "epoch": 0.54114619080302, "grad_norm": 0.7500945685268029, "learning_rate": 4.57786358995842e-06, "loss": 0.3103, "step": 15769 }, { "epoch": 0.5411805078929307, "grad_norm": 0.7601463325134191, "learning_rate": 4.5773098377382606e-06, "loss": 0.262, "step": 15770 }, { "epoch": 0.5412148249828415, "grad_norm": 0.8129308699098619, "learning_rate": 4.576756090739945e-06, "loss": 0.3827, "step": 15771 }, { "epoch": 0.5412491420727522, "grad_norm": 0.7090937102826883, "learning_rate": 4.576202348970313e-06, "loss": 0.2585, "step": 15772 }, { "epoch": 0.541283459162663, "grad_norm": 0.7980081381583426, "learning_rate": 4.575648612436208e-06, "loss": 0.2369, "step": 15773 }, { "epoch": 0.5413177762525738, "grad_norm": 0.6881296086898668, "learning_rate": 4.575094881144469e-06, "loss": 0.2602, "step": 15774 }, { "epoch": 0.5413520933424846, "grad_norm": 0.7567212660398024, "learning_rate": 4.574541155101937e-06, "loss": 0.3004, "step": 15775 }, { "epoch": 0.5413864104323953, "grad_norm": 0.7223610297052591, "learning_rate": 4.573987434315453e-06, "loss": 0.3193, "step": 15776 }, { "epoch": 0.5414207275223061, "grad_norm": 0.7976901334082003, "learning_rate": 4.57343371879186e-06, "loss": 0.2839, "step": 15777 }, { "epoch": 0.5414550446122168, "grad_norm": 0.7222189463893579, "learning_rate": 4.572880008537992e-06, "loss": 0.3108, "step": 15778 }, { "epoch": 0.5414893617021277, "grad_norm": 0.7675468116212596, "learning_rate": 4.5723263035606965e-06, "loss": 0.2316, "step": 15779 }, { "epoch": 0.5415236787920384, "grad_norm": 0.7626405153943061, "learning_rate": 4.571772603866812e-06, "loss": 0.3559, "step": 15780 }, { "epoch": 0.5415579958819492, "grad_norm": 0.8013613705864845, "learning_rate": 4.571218909463174e-06, "loss": 0.3426, "step": 15781 }, { "epoch": 0.54159231297186, "grad_norm": 0.9805677747152609, "learning_rate": 4.5706652203566286e-06, "loss": 0.2452, "step": 15782 }, { "epoch": 0.5416266300617708, "grad_norm": 0.8776449970386859, "learning_rate": 4.570111536554014e-06, "loss": 0.3159, "step": 15783 }, { "epoch": 0.5416609471516816, "grad_norm": 0.6873526477786779, "learning_rate": 4.569557858062168e-06, "loss": 0.3285, "step": 15784 }, { "epoch": 0.5416952642415923, "grad_norm": 0.7210202568181869, "learning_rate": 4.569004184887936e-06, "loss": 0.2634, "step": 15785 }, { "epoch": 0.5417295813315031, "grad_norm": 0.7646739054468503, "learning_rate": 4.568450517038154e-06, "loss": 0.2686, "step": 15786 }, { "epoch": 0.5417638984214138, "grad_norm": 0.8489445512471304, "learning_rate": 4.56789685451966e-06, "loss": 0.2987, "step": 15787 }, { "epoch": 0.5417982155113247, "grad_norm": 0.6787239085848045, "learning_rate": 4.5673431973393e-06, "loss": 0.2858, "step": 15788 }, { "epoch": 0.5418325326012354, "grad_norm": 0.7807307500989278, "learning_rate": 4.566789545503909e-06, "loss": 0.26, "step": 15789 }, { "epoch": 0.5418668496911462, "grad_norm": 0.8322108092675802, "learning_rate": 4.5662358990203266e-06, "loss": 0.274, "step": 15790 }, { "epoch": 0.5419011667810569, "grad_norm": 0.8696169474715901, "learning_rate": 4.5656822578953965e-06, "loss": 0.3105, "step": 15791 }, { "epoch": 0.5419354838709678, "grad_norm": 0.7789932544112351, "learning_rate": 4.565128622135955e-06, "loss": 0.3049, "step": 15792 }, { "epoch": 0.5419698009608785, "grad_norm": 0.8175721248954279, "learning_rate": 4.5645749917488406e-06, "loss": 0.2852, "step": 15793 }, { "epoch": 0.5420041180507893, "grad_norm": 0.6949478850555781, "learning_rate": 4.564021366740898e-06, "loss": 0.2424, "step": 15794 }, { "epoch": 0.5420384351407, "grad_norm": 0.7515819653718289, "learning_rate": 4.5634677471189594e-06, "loss": 0.26, "step": 15795 }, { "epoch": 0.5420727522306108, "grad_norm": 0.8341500685205099, "learning_rate": 4.56291413288987e-06, "loss": 0.297, "step": 15796 }, { "epoch": 0.5421070693205217, "grad_norm": 0.8379287121286392, "learning_rate": 4.562360524060466e-06, "loss": 0.308, "step": 15797 }, { "epoch": 0.5421413864104324, "grad_norm": 0.7708012920238791, "learning_rate": 4.561806920637588e-06, "loss": 0.2325, "step": 15798 }, { "epoch": 0.5421757035003432, "grad_norm": 0.6773821639991626, "learning_rate": 4.561253322628074e-06, "loss": 0.2725, "step": 15799 }, { "epoch": 0.5422100205902539, "grad_norm": 0.880482322081521, "learning_rate": 4.560699730038765e-06, "loss": 0.2509, "step": 15800 }, { "epoch": 0.5422443376801647, "grad_norm": 0.6632578928935967, "learning_rate": 4.560146142876497e-06, "loss": 0.2391, "step": 15801 }, { "epoch": 0.5422786547700755, "grad_norm": 0.8540343782846374, "learning_rate": 4.559592561148112e-06, "loss": 0.2673, "step": 15802 }, { "epoch": 0.5423129718599863, "grad_norm": 0.7957881994583391, "learning_rate": 4.559038984860448e-06, "loss": 0.3446, "step": 15803 }, { "epoch": 0.542347288949897, "grad_norm": 1.2415162814989418, "learning_rate": 4.55848541402034e-06, "loss": 0.264, "step": 15804 }, { "epoch": 0.5423816060398078, "grad_norm": 0.7800218732702578, "learning_rate": 4.557931848634632e-06, "loss": 0.3229, "step": 15805 }, { "epoch": 0.5424159231297186, "grad_norm": 0.8612634258107799, "learning_rate": 4.5573782887101615e-06, "loss": 0.3145, "step": 15806 }, { "epoch": 0.5424502402196294, "grad_norm": 0.7439658210377076, "learning_rate": 4.556824734253764e-06, "loss": 0.2531, "step": 15807 }, { "epoch": 0.5424845573095401, "grad_norm": 0.6936137548525313, "learning_rate": 4.556271185272282e-06, "loss": 0.3056, "step": 15808 }, { "epoch": 0.5425188743994509, "grad_norm": 0.7857395733019664, "learning_rate": 4.5557176417725536e-06, "loss": 0.3017, "step": 15809 }, { "epoch": 0.5425531914893617, "grad_norm": 0.8513867907945598, "learning_rate": 4.555164103761412e-06, "loss": 0.2643, "step": 15810 }, { "epoch": 0.5425875085792725, "grad_norm": 0.7843166742022698, "learning_rate": 4.554610571245702e-06, "loss": 0.3433, "step": 15811 }, { "epoch": 0.5426218256691833, "grad_norm": 0.7191756486492412, "learning_rate": 4.554057044232259e-06, "loss": 0.2734, "step": 15812 }, { "epoch": 0.542656142759094, "grad_norm": 0.6618327720473974, "learning_rate": 4.5535035227279185e-06, "loss": 0.2431, "step": 15813 }, { "epoch": 0.5426904598490048, "grad_norm": 1.2038254606996743, "learning_rate": 4.5529500067395245e-06, "loss": 0.3243, "step": 15814 }, { "epoch": 0.5427247769389156, "grad_norm": 0.7024719023841645, "learning_rate": 4.552396496273912e-06, "loss": 0.2953, "step": 15815 }, { "epoch": 0.5427590940288264, "grad_norm": 0.6708008365912478, "learning_rate": 4.551842991337916e-06, "loss": 0.2011, "step": 15816 }, { "epoch": 0.5427934111187371, "grad_norm": 1.0926315951193548, "learning_rate": 4.55128949193838e-06, "loss": 0.3068, "step": 15817 }, { "epoch": 0.5428277282086479, "grad_norm": 0.8536125840958391, "learning_rate": 4.550735998082138e-06, "loss": 0.3211, "step": 15818 }, { "epoch": 0.5428620452985586, "grad_norm": 0.7475694874910338, "learning_rate": 4.55018250977603e-06, "loss": 0.295, "step": 15819 }, { "epoch": 0.5428963623884695, "grad_norm": 0.7643782213394468, "learning_rate": 4.549629027026891e-06, "loss": 0.2697, "step": 15820 }, { "epoch": 0.5429306794783803, "grad_norm": 0.7699923281406199, "learning_rate": 4.549075549841563e-06, "loss": 0.2707, "step": 15821 }, { "epoch": 0.542964996568291, "grad_norm": 0.8563993950132982, "learning_rate": 4.548522078226877e-06, "loss": 0.2879, "step": 15822 }, { "epoch": 0.5429993136582018, "grad_norm": 0.7795672675354954, "learning_rate": 4.547968612189677e-06, "loss": 0.2481, "step": 15823 }, { "epoch": 0.5430336307481125, "grad_norm": 0.8910029279634615, "learning_rate": 4.547415151736799e-06, "loss": 0.3066, "step": 15824 }, { "epoch": 0.5430679478380234, "grad_norm": 0.7737579535479697, "learning_rate": 4.546861696875076e-06, "loss": 0.2519, "step": 15825 }, { "epoch": 0.5431022649279341, "grad_norm": 0.7723693506075082, "learning_rate": 4.546308247611349e-06, "loss": 0.239, "step": 15826 }, { "epoch": 0.5431365820178449, "grad_norm": 0.8131166472047164, "learning_rate": 4.545754803952456e-06, "loss": 0.2866, "step": 15827 }, { "epoch": 0.5431708991077556, "grad_norm": 0.859434840368301, "learning_rate": 4.545201365905232e-06, "loss": 0.2834, "step": 15828 }, { "epoch": 0.5432052161976665, "grad_norm": 0.8427147264571406, "learning_rate": 4.544647933476516e-06, "loss": 0.2523, "step": 15829 }, { "epoch": 0.5432395332875772, "grad_norm": 0.7936583026207144, "learning_rate": 4.544094506673142e-06, "loss": 0.2959, "step": 15830 }, { "epoch": 0.543273850377488, "grad_norm": 0.8118842934017939, "learning_rate": 4.5435410855019505e-06, "loss": 0.2697, "step": 15831 }, { "epoch": 0.5433081674673987, "grad_norm": 0.7218377657956103, "learning_rate": 4.542987669969777e-06, "loss": 0.2518, "step": 15832 }, { "epoch": 0.5433424845573095, "grad_norm": 0.7894208286026297, "learning_rate": 4.542434260083456e-06, "loss": 0.3684, "step": 15833 }, { "epoch": 0.5433768016472204, "grad_norm": 0.7868652959960151, "learning_rate": 4.5418808558498275e-06, "loss": 0.3066, "step": 15834 }, { "epoch": 0.5434111187371311, "grad_norm": 0.8454637993155467, "learning_rate": 4.541327457275727e-06, "loss": 0.2961, "step": 15835 }, { "epoch": 0.5434454358270419, "grad_norm": 0.7753564314301483, "learning_rate": 4.540774064367989e-06, "loss": 0.2224, "step": 15836 }, { "epoch": 0.5434797529169526, "grad_norm": 0.8090341468036633, "learning_rate": 4.540220677133455e-06, "loss": 0.3247, "step": 15837 }, { "epoch": 0.5435140700068635, "grad_norm": 0.9556611061993211, "learning_rate": 4.5396672955789584e-06, "loss": 0.2897, "step": 15838 }, { "epoch": 0.5435483870967742, "grad_norm": 0.7041981224985746, "learning_rate": 4.539113919711333e-06, "loss": 0.2538, "step": 15839 }, { "epoch": 0.543582704186685, "grad_norm": 0.8303213561665438, "learning_rate": 4.53856054953742e-06, "loss": 0.2723, "step": 15840 }, { "epoch": 0.5436170212765957, "grad_norm": 0.7650724999536572, "learning_rate": 4.538007185064053e-06, "loss": 0.3216, "step": 15841 }, { "epoch": 0.5436513383665065, "grad_norm": 0.7592783392664589, "learning_rate": 4.5374538262980685e-06, "loss": 0.2973, "step": 15842 }, { "epoch": 0.5436856554564173, "grad_norm": 0.8621204629778141, "learning_rate": 4.536900473246302e-06, "loss": 0.2943, "step": 15843 }, { "epoch": 0.5437199725463281, "grad_norm": 0.8215767476935926, "learning_rate": 4.536347125915592e-06, "loss": 0.2679, "step": 15844 }, { "epoch": 0.5437542896362388, "grad_norm": 0.9195481401757943, "learning_rate": 4.53579378431277e-06, "loss": 0.3162, "step": 15845 }, { "epoch": 0.5437886067261496, "grad_norm": 0.7816102300593072, "learning_rate": 4.535240448444677e-06, "loss": 0.2622, "step": 15846 }, { "epoch": 0.5438229238160603, "grad_norm": 0.7766313471106917, "learning_rate": 4.5346871183181465e-06, "loss": 0.2853, "step": 15847 }, { "epoch": 0.5438572409059712, "grad_norm": 0.8101540374892381, "learning_rate": 4.534133793940011e-06, "loss": 0.2932, "step": 15848 }, { "epoch": 0.543891557995882, "grad_norm": 0.8360906604499567, "learning_rate": 4.533580475317111e-06, "loss": 0.3409, "step": 15849 }, { "epoch": 0.5439258750857927, "grad_norm": 0.7975264909401313, "learning_rate": 4.533027162456282e-06, "loss": 0.3061, "step": 15850 }, { "epoch": 0.5439601921757035, "grad_norm": 0.8829790284400092, "learning_rate": 4.532473855364356e-06, "loss": 0.2915, "step": 15851 }, { "epoch": 0.5439945092656143, "grad_norm": 0.7831028999836434, "learning_rate": 4.531920554048172e-06, "loss": 0.2469, "step": 15852 }, { "epoch": 0.5440288263555251, "grad_norm": 0.867736489210222, "learning_rate": 4.531367258514564e-06, "loss": 0.3062, "step": 15853 }, { "epoch": 0.5440631434454358, "grad_norm": 0.7572565544764507, "learning_rate": 4.530813968770365e-06, "loss": 0.2776, "step": 15854 }, { "epoch": 0.5440974605353466, "grad_norm": 0.8333614346395358, "learning_rate": 4.5302606848224145e-06, "loss": 0.3026, "step": 15855 }, { "epoch": 0.5441317776252573, "grad_norm": 0.8395310605988295, "learning_rate": 4.529707406677543e-06, "loss": 0.2891, "step": 15856 }, { "epoch": 0.5441660947151682, "grad_norm": 0.7965284609211913, "learning_rate": 4.5291541343425906e-06, "loss": 0.3042, "step": 15857 }, { "epoch": 0.5442004118050789, "grad_norm": 0.7051191051732091, "learning_rate": 4.52860086782439e-06, "loss": 0.2696, "step": 15858 }, { "epoch": 0.5442347288949897, "grad_norm": 0.9129100977769565, "learning_rate": 4.528047607129773e-06, "loss": 0.2823, "step": 15859 }, { "epoch": 0.5442690459849004, "grad_norm": 0.7698881802219615, "learning_rate": 4.527494352265581e-06, "loss": 0.2781, "step": 15860 }, { "epoch": 0.5443033630748113, "grad_norm": 0.6468732872328913, "learning_rate": 4.526941103238646e-06, "loss": 0.2154, "step": 15861 }, { "epoch": 0.5443376801647221, "grad_norm": 0.8090795606689065, "learning_rate": 4.526387860055798e-06, "loss": 0.3111, "step": 15862 }, { "epoch": 0.5443719972546328, "grad_norm": 0.8029876911643101, "learning_rate": 4.52583462272388e-06, "loss": 0.2492, "step": 15863 }, { "epoch": 0.5444063143445436, "grad_norm": 0.7370178487874112, "learning_rate": 4.52528139124972e-06, "loss": 0.2761, "step": 15864 }, { "epoch": 0.5444406314344543, "grad_norm": 0.6960936591930704, "learning_rate": 4.524728165640154e-06, "loss": 0.288, "step": 15865 }, { "epoch": 0.5444749485243652, "grad_norm": 0.791493357631877, "learning_rate": 4.5241749459020205e-06, "loss": 0.2839, "step": 15866 }, { "epoch": 0.5445092656142759, "grad_norm": 0.7789869846328156, "learning_rate": 4.52362173204215e-06, "loss": 0.247, "step": 15867 }, { "epoch": 0.5445435827041867, "grad_norm": 0.7725632032028393, "learning_rate": 4.523068524067375e-06, "loss": 0.2848, "step": 15868 }, { "epoch": 0.5445778997940974, "grad_norm": 0.7793207262471975, "learning_rate": 4.5225153219845355e-06, "loss": 0.3491, "step": 15869 }, { "epoch": 0.5446122168840082, "grad_norm": 0.7016414491113315, "learning_rate": 4.521962125800462e-06, "loss": 0.269, "step": 15870 }, { "epoch": 0.544646533973919, "grad_norm": 0.7143163492510063, "learning_rate": 4.521408935521987e-06, "loss": 0.2758, "step": 15871 }, { "epoch": 0.5446808510638298, "grad_norm": 0.7234297172577555, "learning_rate": 4.520855751155948e-06, "loss": 0.2877, "step": 15872 }, { "epoch": 0.5447151681537405, "grad_norm": 0.6631769192944368, "learning_rate": 4.5203025727091785e-06, "loss": 0.3256, "step": 15873 }, { "epoch": 0.5447494852436513, "grad_norm": 0.7939588346849109, "learning_rate": 4.519749400188509e-06, "loss": 0.2974, "step": 15874 }, { "epoch": 0.5447838023335622, "grad_norm": 0.8663904358580955, "learning_rate": 4.519196233600778e-06, "loss": 0.2594, "step": 15875 }, { "epoch": 0.5448181194234729, "grad_norm": 0.7957049391733768, "learning_rate": 4.518643072952817e-06, "loss": 0.2941, "step": 15876 }, { "epoch": 0.5448524365133837, "grad_norm": 0.7704588536602088, "learning_rate": 4.518089918251457e-06, "loss": 0.3172, "step": 15877 }, { "epoch": 0.5448867536032944, "grad_norm": 0.8444526884928021, "learning_rate": 4.517536769503537e-06, "loss": 0.3165, "step": 15878 }, { "epoch": 0.5449210706932052, "grad_norm": 0.7769413929725216, "learning_rate": 4.516983626715887e-06, "loss": 0.2888, "step": 15879 }, { "epoch": 0.544955387783116, "grad_norm": 0.7210814098493351, "learning_rate": 4.51643048989534e-06, "loss": 0.2818, "step": 15880 }, { "epoch": 0.5449897048730268, "grad_norm": 0.7574410788984333, "learning_rate": 4.5158773590487315e-06, "loss": 0.239, "step": 15881 }, { "epoch": 0.5450240219629375, "grad_norm": 0.8611503008855349, "learning_rate": 4.515324234182895e-06, "loss": 0.2897, "step": 15882 }, { "epoch": 0.5450583390528483, "grad_norm": 0.7557324599374787, "learning_rate": 4.514771115304659e-06, "loss": 0.2828, "step": 15883 }, { "epoch": 0.5450926561427591, "grad_norm": 0.7909042908530338, "learning_rate": 4.514218002420863e-06, "loss": 0.2289, "step": 15884 }, { "epoch": 0.5451269732326699, "grad_norm": 0.8927197720060847, "learning_rate": 4.513664895538337e-06, "loss": 0.2617, "step": 15885 }, { "epoch": 0.5451612903225806, "grad_norm": 0.7934827491198607, "learning_rate": 4.513111794663913e-06, "loss": 0.2585, "step": 15886 }, { "epoch": 0.5451956074124914, "grad_norm": 0.8116390914013583, "learning_rate": 4.5125586998044255e-06, "loss": 0.2924, "step": 15887 }, { "epoch": 0.5452299245024022, "grad_norm": 0.6916853721135455, "learning_rate": 4.512005610966707e-06, "loss": 0.2379, "step": 15888 }, { "epoch": 0.545264241592313, "grad_norm": 0.7907264524219804, "learning_rate": 4.51145252815759e-06, "loss": 0.2966, "step": 15889 }, { "epoch": 0.5452985586822238, "grad_norm": 0.8337324707476734, "learning_rate": 4.510899451383909e-06, "loss": 0.3759, "step": 15890 }, { "epoch": 0.5453328757721345, "grad_norm": 0.8109794641191553, "learning_rate": 4.510346380652493e-06, "loss": 0.2964, "step": 15891 }, { "epoch": 0.5453671928620453, "grad_norm": 0.7477319683567603, "learning_rate": 4.509793315970179e-06, "loss": 0.2393, "step": 15892 }, { "epoch": 0.545401509951956, "grad_norm": 0.8888738212810952, "learning_rate": 4.509240257343795e-06, "loss": 0.2721, "step": 15893 }, { "epoch": 0.5454358270418669, "grad_norm": 0.8356261487221315, "learning_rate": 4.508687204780176e-06, "loss": 0.303, "step": 15894 }, { "epoch": 0.5454701441317776, "grad_norm": 0.8515114791322521, "learning_rate": 4.508134158286154e-06, "loss": 0.3243, "step": 15895 }, { "epoch": 0.5455044612216884, "grad_norm": 0.7724295147092992, "learning_rate": 4.507581117868561e-06, "loss": 0.327, "step": 15896 }, { "epoch": 0.5455387783115991, "grad_norm": 0.8428473453794971, "learning_rate": 4.5070280835342285e-06, "loss": 0.2677, "step": 15897 }, { "epoch": 0.54557309540151, "grad_norm": 0.7596575905908608, "learning_rate": 4.506475055289991e-06, "loss": 0.2699, "step": 15898 }, { "epoch": 0.5456074124914208, "grad_norm": 0.7603829363243501, "learning_rate": 4.505922033142678e-06, "loss": 0.2874, "step": 15899 }, { "epoch": 0.5456417295813315, "grad_norm": 0.8308708956636577, "learning_rate": 4.5053690170991205e-06, "loss": 0.2946, "step": 15900 }, { "epoch": 0.5456760466712423, "grad_norm": 0.7688917789790045, "learning_rate": 4.504816007166154e-06, "loss": 0.2732, "step": 15901 }, { "epoch": 0.545710363761153, "grad_norm": 0.7056693441988655, "learning_rate": 4.504263003350608e-06, "loss": 0.2491, "step": 15902 }, { "epoch": 0.5457446808510639, "grad_norm": 0.730152402940037, "learning_rate": 4.503710005659313e-06, "loss": 0.305, "step": 15903 }, { "epoch": 0.5457789979409746, "grad_norm": 0.7867625347967306, "learning_rate": 4.503157014099105e-06, "loss": 0.3251, "step": 15904 }, { "epoch": 0.5458133150308854, "grad_norm": 0.6975374479358959, "learning_rate": 4.502604028676812e-06, "loss": 0.2139, "step": 15905 }, { "epoch": 0.5458476321207961, "grad_norm": 0.8021692935383864, "learning_rate": 4.5020510493992645e-06, "loss": 0.2566, "step": 15906 }, { "epoch": 0.545881949210707, "grad_norm": 0.7640295474734936, "learning_rate": 4.501498076273299e-06, "loss": 0.2772, "step": 15907 }, { "epoch": 0.5459162663006177, "grad_norm": 0.7697692357747112, "learning_rate": 4.500945109305741e-06, "loss": 0.2707, "step": 15908 }, { "epoch": 0.5459505833905285, "grad_norm": 0.7323946695559059, "learning_rate": 4.500392148503425e-06, "loss": 0.2435, "step": 15909 }, { "epoch": 0.5459849004804392, "grad_norm": 0.8273432546649432, "learning_rate": 4.499839193873181e-06, "loss": 0.3008, "step": 15910 }, { "epoch": 0.54601921757035, "grad_norm": 0.7662002697578757, "learning_rate": 4.499286245421842e-06, "loss": 0.2673, "step": 15911 }, { "epoch": 0.5460535346602609, "grad_norm": 0.8325202396743445, "learning_rate": 4.498733303156236e-06, "loss": 0.317, "step": 15912 }, { "epoch": 0.5460878517501716, "grad_norm": 0.8432204803288545, "learning_rate": 4.498180367083197e-06, "loss": 0.2697, "step": 15913 }, { "epoch": 0.5461221688400824, "grad_norm": 0.8167018814877366, "learning_rate": 4.497627437209555e-06, "loss": 0.2863, "step": 15914 }, { "epoch": 0.5461564859299931, "grad_norm": 0.762884352265475, "learning_rate": 4.497074513542138e-06, "loss": 0.2405, "step": 15915 }, { "epoch": 0.5461908030199039, "grad_norm": 0.7568040698236526, "learning_rate": 4.49652159608778e-06, "loss": 0.2724, "step": 15916 }, { "epoch": 0.5462251201098147, "grad_norm": 0.8089839137860083, "learning_rate": 4.495968684853312e-06, "loss": 0.27, "step": 15917 }, { "epoch": 0.5462594371997255, "grad_norm": 0.8128538967419612, "learning_rate": 4.495415779845561e-06, "loss": 0.2949, "step": 15918 }, { "epoch": 0.5462937542896362, "grad_norm": 0.7348765788635873, "learning_rate": 4.494862881071362e-06, "loss": 0.2793, "step": 15919 }, { "epoch": 0.546328071379547, "grad_norm": 0.7772207512170535, "learning_rate": 4.49430998853754e-06, "loss": 0.2757, "step": 15920 }, { "epoch": 0.5463623884694578, "grad_norm": 0.7701376055008622, "learning_rate": 4.493757102250932e-06, "loss": 0.3012, "step": 15921 }, { "epoch": 0.5463967055593686, "grad_norm": 0.8721586789866457, "learning_rate": 4.493204222218365e-06, "loss": 0.2719, "step": 15922 }, { "epoch": 0.5464310226492793, "grad_norm": 0.756698555166681, "learning_rate": 4.492651348446665e-06, "loss": 0.2923, "step": 15923 }, { "epoch": 0.5464653397391901, "grad_norm": 0.7363381879191487, "learning_rate": 4.492098480942669e-06, "loss": 0.2663, "step": 15924 }, { "epoch": 0.5464996568291008, "grad_norm": 0.7898112126311897, "learning_rate": 4.491545619713205e-06, "loss": 0.2977, "step": 15925 }, { "epoch": 0.5465339739190117, "grad_norm": 0.704050971864092, "learning_rate": 4.490992764765099e-06, "loss": 0.2608, "step": 15926 }, { "epoch": 0.5465682910089225, "grad_norm": 0.7740559320836687, "learning_rate": 4.490439916105187e-06, "loss": 0.2756, "step": 15927 }, { "epoch": 0.5466026080988332, "grad_norm": 0.644598898941594, "learning_rate": 4.489887073740294e-06, "loss": 0.2578, "step": 15928 }, { "epoch": 0.546636925188744, "grad_norm": 0.7969929284313791, "learning_rate": 4.489334237677251e-06, "loss": 0.2766, "step": 15929 }, { "epoch": 0.5466712422786548, "grad_norm": 0.7883366013933616, "learning_rate": 4.48878140792289e-06, "loss": 0.2946, "step": 15930 }, { "epoch": 0.5467055593685656, "grad_norm": 0.6962030900196939, "learning_rate": 4.488228584484038e-06, "loss": 0.2946, "step": 15931 }, { "epoch": 0.5467398764584763, "grad_norm": 0.8224594372937888, "learning_rate": 4.4876757673675235e-06, "loss": 0.2729, "step": 15932 }, { "epoch": 0.5467741935483871, "grad_norm": 0.8804750355774503, "learning_rate": 4.487122956580179e-06, "loss": 0.3092, "step": 15933 }, { "epoch": 0.5468085106382978, "grad_norm": 0.8487634446095015, "learning_rate": 4.486570152128833e-06, "loss": 0.262, "step": 15934 }, { "epoch": 0.5468428277282087, "grad_norm": 0.7319072969875011, "learning_rate": 4.486017354020313e-06, "loss": 0.2912, "step": 15935 }, { "epoch": 0.5468771448181194, "grad_norm": 0.7766772454441766, "learning_rate": 4.485464562261451e-06, "loss": 0.3177, "step": 15936 }, { "epoch": 0.5469114619080302, "grad_norm": 0.7802747834278612, "learning_rate": 4.484911776859073e-06, "loss": 0.2679, "step": 15937 }, { "epoch": 0.546945778997941, "grad_norm": 0.791284810404315, "learning_rate": 4.484358997820009e-06, "loss": 0.2744, "step": 15938 }, { "epoch": 0.5469800960878517, "grad_norm": 0.7831415802078835, "learning_rate": 4.483806225151089e-06, "loss": 0.2558, "step": 15939 }, { "epoch": 0.5470144131777626, "grad_norm": 0.7787928630012345, "learning_rate": 4.483253458859142e-06, "loss": 0.302, "step": 15940 }, { "epoch": 0.5470487302676733, "grad_norm": 0.8555946406896804, "learning_rate": 4.4827006989509945e-06, "loss": 0.2489, "step": 15941 }, { "epoch": 0.5470830473575841, "grad_norm": 0.8200927730104975, "learning_rate": 4.482147945433479e-06, "loss": 0.2874, "step": 15942 }, { "epoch": 0.5471173644474948, "grad_norm": 0.8370682950820936, "learning_rate": 4.481595198313421e-06, "loss": 0.2775, "step": 15943 }, { "epoch": 0.5471516815374057, "grad_norm": 0.7052779351835263, "learning_rate": 4.481042457597648e-06, "loss": 0.2577, "step": 15944 }, { "epoch": 0.5471859986273164, "grad_norm": 0.8047638800961298, "learning_rate": 4.480489723292993e-06, "loss": 0.3101, "step": 15945 }, { "epoch": 0.5472203157172272, "grad_norm": 0.7593814527858382, "learning_rate": 4.479936995406281e-06, "loss": 0.3025, "step": 15946 }, { "epoch": 0.5472546328071379, "grad_norm": 0.7633045250792821, "learning_rate": 4.479384273944339e-06, "loss": 0.317, "step": 15947 }, { "epoch": 0.5472889498970487, "grad_norm": 0.758905944919099, "learning_rate": 4.478831558914e-06, "loss": 0.2568, "step": 15948 }, { "epoch": 0.5473232669869595, "grad_norm": 0.8736135102778093, "learning_rate": 4.4782788503220895e-06, "loss": 0.3022, "step": 15949 }, { "epoch": 0.5473575840768703, "grad_norm": 0.7491959009167929, "learning_rate": 4.477726148175433e-06, "loss": 0.2871, "step": 15950 }, { "epoch": 0.547391901166781, "grad_norm": 0.8274462101636961, "learning_rate": 4.477173452480864e-06, "loss": 0.2901, "step": 15951 }, { "epoch": 0.5474262182566918, "grad_norm": 0.8082517617786419, "learning_rate": 4.476620763245204e-06, "loss": 0.2811, "step": 15952 }, { "epoch": 0.5474605353466027, "grad_norm": 0.7703319039312874, "learning_rate": 4.476068080475288e-06, "loss": 0.3774, "step": 15953 }, { "epoch": 0.5474948524365134, "grad_norm": 0.767246858979141, "learning_rate": 4.475515404177937e-06, "loss": 0.3016, "step": 15954 }, { "epoch": 0.5475291695264242, "grad_norm": 0.8180952296148704, "learning_rate": 4.474962734359982e-06, "loss": 0.3253, "step": 15955 }, { "epoch": 0.5475634866163349, "grad_norm": 0.7358011724705796, "learning_rate": 4.4744100710282525e-06, "loss": 0.2791, "step": 15956 }, { "epoch": 0.5475978037062457, "grad_norm": 0.8517794227658068, "learning_rate": 4.473857414189573e-06, "loss": 0.2884, "step": 15957 }, { "epoch": 0.5476321207961565, "grad_norm": 0.6642302431062558, "learning_rate": 4.473304763850771e-06, "loss": 0.2773, "step": 15958 }, { "epoch": 0.5476664378860673, "grad_norm": 0.7809848878432123, "learning_rate": 4.472752120018675e-06, "loss": 0.3166, "step": 15959 }, { "epoch": 0.547700754975978, "grad_norm": 0.7757548934026142, "learning_rate": 4.472199482700112e-06, "loss": 0.2444, "step": 15960 }, { "epoch": 0.5477350720658888, "grad_norm": 0.7560799622618246, "learning_rate": 4.471646851901908e-06, "loss": 0.2517, "step": 15961 }, { "epoch": 0.5477693891557995, "grad_norm": 0.724282339345541, "learning_rate": 4.471094227630892e-06, "loss": 0.3027, "step": 15962 }, { "epoch": 0.5478037062457104, "grad_norm": 0.7815060218980896, "learning_rate": 4.470541609893891e-06, "loss": 0.2949, "step": 15963 }, { "epoch": 0.5478380233356211, "grad_norm": 0.8463952560280126, "learning_rate": 4.469988998697729e-06, "loss": 0.3421, "step": 15964 }, { "epoch": 0.5478723404255319, "grad_norm": 0.7841940822779709, "learning_rate": 4.469436394049238e-06, "loss": 0.258, "step": 15965 }, { "epoch": 0.5479066575154427, "grad_norm": 0.780953271418812, "learning_rate": 4.46888379595524e-06, "loss": 0.3411, "step": 15966 }, { "epoch": 0.5479409746053535, "grad_norm": 0.7049928190258006, "learning_rate": 4.468331204422562e-06, "loss": 0.2376, "step": 15967 }, { "epoch": 0.5479752916952643, "grad_norm": 0.7306482802047264, "learning_rate": 4.467778619458033e-06, "loss": 0.2476, "step": 15968 }, { "epoch": 0.548009608785175, "grad_norm": 0.8040246136591545, "learning_rate": 4.467226041068481e-06, "loss": 0.3044, "step": 15969 }, { "epoch": 0.5480439258750858, "grad_norm": 0.7682401248551396, "learning_rate": 4.466673469260727e-06, "loss": 0.2847, "step": 15970 }, { "epoch": 0.5480782429649965, "grad_norm": 0.6678969583587399, "learning_rate": 4.466120904041603e-06, "loss": 0.2891, "step": 15971 }, { "epoch": 0.5481125600549074, "grad_norm": 0.8796934651382604, "learning_rate": 4.465568345417932e-06, "loss": 0.29, "step": 15972 }, { "epoch": 0.5481468771448181, "grad_norm": 0.9186907521530245, "learning_rate": 4.465015793396539e-06, "loss": 0.3005, "step": 15973 }, { "epoch": 0.5481811942347289, "grad_norm": 0.6822855225655632, "learning_rate": 4.464463247984254e-06, "loss": 0.2661, "step": 15974 }, { "epoch": 0.5482155113246396, "grad_norm": 0.7427572027228474, "learning_rate": 4.463910709187902e-06, "loss": 0.2912, "step": 15975 }, { "epoch": 0.5482498284145505, "grad_norm": 0.7206395058001673, "learning_rate": 4.463358177014305e-06, "loss": 0.3176, "step": 15976 }, { "epoch": 0.5482841455044613, "grad_norm": 0.8565106366716287, "learning_rate": 4.462805651470296e-06, "loss": 0.3214, "step": 15977 }, { "epoch": 0.548318462594372, "grad_norm": 0.8510318660993877, "learning_rate": 4.462253132562695e-06, "loss": 0.293, "step": 15978 }, { "epoch": 0.5483527796842828, "grad_norm": 0.7352125661546688, "learning_rate": 4.4617006202983285e-06, "loss": 0.2746, "step": 15979 }, { "epoch": 0.5483870967741935, "grad_norm": 0.7874507172964925, "learning_rate": 4.461148114684026e-06, "loss": 0.2803, "step": 15980 }, { "epoch": 0.5484214138641044, "grad_norm": 0.7671282320860167, "learning_rate": 4.460595615726607e-06, "loss": 0.2466, "step": 15981 }, { "epoch": 0.5484557309540151, "grad_norm": 0.7790343153790839, "learning_rate": 4.460043123432903e-06, "loss": 0.2775, "step": 15982 }, { "epoch": 0.5484900480439259, "grad_norm": 0.7731590800389904, "learning_rate": 4.459490637809736e-06, "loss": 0.2342, "step": 15983 }, { "epoch": 0.5485243651338366, "grad_norm": 0.7552133936400847, "learning_rate": 4.458938158863931e-06, "loss": 0.3485, "step": 15984 }, { "epoch": 0.5485586822237474, "grad_norm": 0.7697797603332805, "learning_rate": 4.4583856866023145e-06, "loss": 0.2631, "step": 15985 }, { "epoch": 0.5485929993136582, "grad_norm": 0.7610904825388999, "learning_rate": 4.457833221031713e-06, "loss": 0.304, "step": 15986 }, { "epoch": 0.548627316403569, "grad_norm": 0.9505974428647401, "learning_rate": 4.457280762158947e-06, "loss": 0.3009, "step": 15987 }, { "epoch": 0.5486616334934797, "grad_norm": 0.7708596371384298, "learning_rate": 4.456728309990847e-06, "loss": 0.298, "step": 15988 }, { "epoch": 0.5486959505833905, "grad_norm": 0.7416041288916209, "learning_rate": 4.456175864534235e-06, "loss": 0.2934, "step": 15989 }, { "epoch": 0.5487302676733014, "grad_norm": 0.7784527539251679, "learning_rate": 4.455623425795935e-06, "loss": 0.2659, "step": 15990 }, { "epoch": 0.5487645847632121, "grad_norm": 0.7975972786752259, "learning_rate": 4.455070993782773e-06, "loss": 0.2836, "step": 15991 }, { "epoch": 0.5487989018531229, "grad_norm": 0.7663162092342998, "learning_rate": 4.4545185685015755e-06, "loss": 0.2665, "step": 15992 }, { "epoch": 0.5488332189430336, "grad_norm": 0.7548448300061497, "learning_rate": 4.453966149959162e-06, "loss": 0.2702, "step": 15993 }, { "epoch": 0.5488675360329444, "grad_norm": 1.0216524668087494, "learning_rate": 4.453413738162363e-06, "loss": 0.2653, "step": 15994 }, { "epoch": 0.5489018531228552, "grad_norm": 0.8799963278586288, "learning_rate": 4.452861333117999e-06, "loss": 0.2923, "step": 15995 }, { "epoch": 0.548936170212766, "grad_norm": 0.8251127283234404, "learning_rate": 4.452308934832894e-06, "loss": 0.2912, "step": 15996 }, { "epoch": 0.5489704873026767, "grad_norm": 0.8416707877914054, "learning_rate": 4.451756543313876e-06, "loss": 0.2817, "step": 15997 }, { "epoch": 0.5490048043925875, "grad_norm": 0.771345571456728, "learning_rate": 4.451204158567764e-06, "loss": 0.3025, "step": 15998 }, { "epoch": 0.5490391214824983, "grad_norm": 0.7794947685409171, "learning_rate": 4.450651780601385e-06, "loss": 0.2794, "step": 15999 }, { "epoch": 0.5490734385724091, "grad_norm": 0.8392848538507532, "learning_rate": 4.450099409421566e-06, "loss": 0.2769, "step": 16000 }, { "epoch": 0.5491077556623198, "grad_norm": 0.7198968344190586, "learning_rate": 4.449547045035125e-06, "loss": 0.2224, "step": 16001 }, { "epoch": 0.5491420727522306, "grad_norm": 0.7908636597201875, "learning_rate": 4.4489946874488885e-06, "loss": 0.2498, "step": 16002 }, { "epoch": 0.5491763898421413, "grad_norm": 0.7557938279861152, "learning_rate": 4.448442336669682e-06, "loss": 0.2928, "step": 16003 }, { "epoch": 0.5492107069320522, "grad_norm": 0.7742008671441374, "learning_rate": 4.447889992704326e-06, "loss": 0.2614, "step": 16004 }, { "epoch": 0.549245024021963, "grad_norm": 0.7586338223233946, "learning_rate": 4.447337655559645e-06, "loss": 0.2426, "step": 16005 }, { "epoch": 0.5492793411118737, "grad_norm": 0.7743405315434438, "learning_rate": 4.446785325242464e-06, "loss": 0.2665, "step": 16006 }, { "epoch": 0.5493136582017845, "grad_norm": 0.7529918416088978, "learning_rate": 4.446233001759605e-06, "loss": 0.2476, "step": 16007 }, { "epoch": 0.5493479752916952, "grad_norm": 0.7948033342599824, "learning_rate": 4.4456806851178915e-06, "loss": 0.2992, "step": 16008 }, { "epoch": 0.5493822923816061, "grad_norm": 0.9489414917730485, "learning_rate": 4.445128375324148e-06, "loss": 0.2995, "step": 16009 }, { "epoch": 0.5494166094715168, "grad_norm": 0.9257575725645351, "learning_rate": 4.444576072385195e-06, "loss": 0.2718, "step": 16010 }, { "epoch": 0.5494509265614276, "grad_norm": 0.8960854959735576, "learning_rate": 4.444023776307857e-06, "loss": 0.2933, "step": 16011 }, { "epoch": 0.5494852436513383, "grad_norm": 0.8266482519221341, "learning_rate": 4.443471487098957e-06, "loss": 0.3178, "step": 16012 }, { "epoch": 0.5495195607412492, "grad_norm": 0.7859259604909703, "learning_rate": 4.442919204765319e-06, "loss": 0.2775, "step": 16013 }, { "epoch": 0.5495538778311599, "grad_norm": 0.8213308420730577, "learning_rate": 4.442366929313764e-06, "loss": 0.3549, "step": 16014 }, { "epoch": 0.5495881949210707, "grad_norm": 0.810032559035493, "learning_rate": 4.4418146607511156e-06, "loss": 0.304, "step": 16015 }, { "epoch": 0.5496225120109814, "grad_norm": 0.7529317545852855, "learning_rate": 4.441262399084195e-06, "loss": 0.3365, "step": 16016 }, { "epoch": 0.5496568291008922, "grad_norm": 0.7764945571514668, "learning_rate": 4.440710144319829e-06, "loss": 0.2479, "step": 16017 }, { "epoch": 0.5496911461908031, "grad_norm": 0.7660205185315335, "learning_rate": 4.4401578964648364e-06, "loss": 0.3125, "step": 16018 }, { "epoch": 0.5497254632807138, "grad_norm": 0.7281129979495932, "learning_rate": 4.439605655526038e-06, "loss": 0.3071, "step": 16019 }, { "epoch": 0.5497597803706246, "grad_norm": 0.877384340416151, "learning_rate": 4.43905342151026e-06, "loss": 0.2838, "step": 16020 }, { "epoch": 0.5497940974605353, "grad_norm": 0.7752365565283835, "learning_rate": 4.438501194424323e-06, "loss": 0.3085, "step": 16021 }, { "epoch": 0.5498284145504462, "grad_norm": 0.7816800734609182, "learning_rate": 4.437948974275047e-06, "loss": 0.2641, "step": 16022 }, { "epoch": 0.5498627316403569, "grad_norm": 0.7902707182484877, "learning_rate": 4.437396761069259e-06, "loss": 0.2856, "step": 16023 }, { "epoch": 0.5498970487302677, "grad_norm": 0.8059317111073101, "learning_rate": 4.436844554813778e-06, "loss": 0.3201, "step": 16024 }, { "epoch": 0.5499313658201784, "grad_norm": 0.7755722179224482, "learning_rate": 4.4362923555154235e-06, "loss": 0.3006, "step": 16025 }, { "epoch": 0.5499656829100892, "grad_norm": 0.8008872694988213, "learning_rate": 4.435740163181022e-06, "loss": 0.2895, "step": 16026 }, { "epoch": 0.55, "grad_norm": 0.7488137922059146, "learning_rate": 4.435187977817392e-06, "loss": 0.2373, "step": 16027 }, { "epoch": 0.5500343170899108, "grad_norm": 0.774823835453743, "learning_rate": 4.434635799431356e-06, "loss": 0.2447, "step": 16028 }, { "epoch": 0.5500686341798215, "grad_norm": 0.7113595799067448, "learning_rate": 4.434083628029735e-06, "loss": 0.2686, "step": 16029 }, { "epoch": 0.5501029512697323, "grad_norm": 0.7537344798584409, "learning_rate": 4.4335314636193525e-06, "loss": 0.3007, "step": 16030 }, { "epoch": 0.550137268359643, "grad_norm": 0.7405869447820452, "learning_rate": 4.4329793062070266e-06, "loss": 0.2776, "step": 16031 }, { "epoch": 0.5501715854495539, "grad_norm": 0.775020335399689, "learning_rate": 4.432427155799582e-06, "loss": 0.2382, "step": 16032 }, { "epoch": 0.5502059025394647, "grad_norm": 0.8025911782482343, "learning_rate": 4.431875012403838e-06, "loss": 0.2439, "step": 16033 }, { "epoch": 0.5502402196293754, "grad_norm": 0.8616138549482032, "learning_rate": 4.431322876026614e-06, "loss": 0.2797, "step": 16034 }, { "epoch": 0.5502745367192862, "grad_norm": 0.8911669160748305, "learning_rate": 4.430770746674734e-06, "loss": 0.2727, "step": 16035 }, { "epoch": 0.550308853809197, "grad_norm": 0.7046153815728338, "learning_rate": 4.430218624355019e-06, "loss": 0.2474, "step": 16036 }, { "epoch": 0.5503431708991078, "grad_norm": 0.8202650976328227, "learning_rate": 4.429666509074286e-06, "loss": 0.2954, "step": 16037 }, { "epoch": 0.5503774879890185, "grad_norm": 0.8491116432750081, "learning_rate": 4.429114400839361e-06, "loss": 0.2455, "step": 16038 }, { "epoch": 0.5504118050789293, "grad_norm": 0.740914478788359, "learning_rate": 4.428562299657061e-06, "loss": 0.3307, "step": 16039 }, { "epoch": 0.55044612216884, "grad_norm": 0.754017740084293, "learning_rate": 4.428010205534206e-06, "loss": 0.292, "step": 16040 }, { "epoch": 0.5504804392587509, "grad_norm": 0.7379121237400571, "learning_rate": 4.42745811847762e-06, "loss": 0.237, "step": 16041 }, { "epoch": 0.5505147563486616, "grad_norm": 1.1458479493954028, "learning_rate": 4.426906038494121e-06, "loss": 0.26, "step": 16042 }, { "epoch": 0.5505490734385724, "grad_norm": 0.796163882797932, "learning_rate": 4.426353965590528e-06, "loss": 0.2696, "step": 16043 }, { "epoch": 0.5505833905284832, "grad_norm": 0.8012961302977791, "learning_rate": 4.425801899773665e-06, "loss": 0.2734, "step": 16044 }, { "epoch": 0.550617707618394, "grad_norm": 0.7424976225927117, "learning_rate": 4.425249841050349e-06, "loss": 0.2951, "step": 16045 }, { "epoch": 0.5506520247083048, "grad_norm": 0.7674169808946903, "learning_rate": 4.4246977894274015e-06, "loss": 0.3069, "step": 16046 }, { "epoch": 0.5506863417982155, "grad_norm": 0.7930407683876688, "learning_rate": 4.424145744911643e-06, "loss": 0.2491, "step": 16047 }, { "epoch": 0.5507206588881263, "grad_norm": 0.7435510889326976, "learning_rate": 4.4235937075098906e-06, "loss": 0.2817, "step": 16048 }, { "epoch": 0.550754975978037, "grad_norm": 0.7706516527342718, "learning_rate": 4.423041677228968e-06, "loss": 0.3301, "step": 16049 }, { "epoch": 0.5507892930679479, "grad_norm": 0.6994965767084023, "learning_rate": 4.422489654075692e-06, "loss": 0.2675, "step": 16050 }, { "epoch": 0.5508236101578586, "grad_norm": 0.7350807747635274, "learning_rate": 4.421937638056881e-06, "loss": 0.2503, "step": 16051 }, { "epoch": 0.5508579272477694, "grad_norm": 0.8207042690708964, "learning_rate": 4.421385629179359e-06, "loss": 0.2446, "step": 16052 }, { "epoch": 0.5508922443376801, "grad_norm": 0.8581187033878882, "learning_rate": 4.420833627449943e-06, "loss": 0.3084, "step": 16053 }, { "epoch": 0.5509265614275909, "grad_norm": 0.8155974063862981, "learning_rate": 4.42028163287545e-06, "loss": 0.3012, "step": 16054 }, { "epoch": 0.5509608785175018, "grad_norm": 0.7068955327366115, "learning_rate": 4.4197296454627045e-06, "loss": 0.263, "step": 16055 }, { "epoch": 0.5509951956074125, "grad_norm": 0.7100044774465557, "learning_rate": 4.419177665218522e-06, "loss": 0.2982, "step": 16056 }, { "epoch": 0.5510295126973233, "grad_norm": 0.8583949710890137, "learning_rate": 4.418625692149721e-06, "loss": 0.2824, "step": 16057 }, { "epoch": 0.551063829787234, "grad_norm": 1.0184630678330993, "learning_rate": 4.418073726263122e-06, "loss": 0.2698, "step": 16058 }, { "epoch": 0.5510981468771449, "grad_norm": 0.8037850836525904, "learning_rate": 4.417521767565545e-06, "loss": 0.2876, "step": 16059 }, { "epoch": 0.5511324639670556, "grad_norm": 0.9612176589787982, "learning_rate": 4.4169698160638045e-06, "loss": 0.2967, "step": 16060 }, { "epoch": 0.5511667810569664, "grad_norm": 0.7452156233047265, "learning_rate": 4.416417871764726e-06, "loss": 0.2711, "step": 16061 }, { "epoch": 0.5512010981468771, "grad_norm": 0.7435881580390951, "learning_rate": 4.415865934675122e-06, "loss": 0.3052, "step": 16062 }, { "epoch": 0.5512354152367879, "grad_norm": 0.8578844310528805, "learning_rate": 4.415314004801813e-06, "loss": 0.2708, "step": 16063 }, { "epoch": 0.5512697323266987, "grad_norm": 0.8682916681136655, "learning_rate": 4.41476208215162e-06, "loss": 0.2826, "step": 16064 }, { "epoch": 0.5513040494166095, "grad_norm": 0.8268668335017423, "learning_rate": 4.414210166731357e-06, "loss": 0.2714, "step": 16065 }, { "epoch": 0.5513383665065202, "grad_norm": 0.741758365360034, "learning_rate": 4.413658258547844e-06, "loss": 0.2515, "step": 16066 }, { "epoch": 0.551372683596431, "grad_norm": 0.717121505623656, "learning_rate": 4.413106357607901e-06, "loss": 0.25, "step": 16067 }, { "epoch": 0.5514070006863419, "grad_norm": 0.7353724338745293, "learning_rate": 4.412554463918346e-06, "loss": 0.2829, "step": 16068 }, { "epoch": 0.5514413177762526, "grad_norm": 0.8185885743421168, "learning_rate": 4.4120025774859926e-06, "loss": 0.3224, "step": 16069 }, { "epoch": 0.5514756348661634, "grad_norm": 0.6990042775828286, "learning_rate": 4.411450698317664e-06, "loss": 0.2644, "step": 16070 }, { "epoch": 0.5515099519560741, "grad_norm": 0.8626150907274589, "learning_rate": 4.410898826420175e-06, "loss": 0.2987, "step": 16071 }, { "epoch": 0.5515442690459849, "grad_norm": 0.8807819139635437, "learning_rate": 4.410346961800344e-06, "loss": 0.3222, "step": 16072 }, { "epoch": 0.5515785861358957, "grad_norm": 0.8785157631835242, "learning_rate": 4.4097951044649895e-06, "loss": 0.283, "step": 16073 }, { "epoch": 0.5516129032258065, "grad_norm": 0.7638234622438864, "learning_rate": 4.4092432544209284e-06, "loss": 0.2888, "step": 16074 }, { "epoch": 0.5516472203157172, "grad_norm": 0.7786408808154408, "learning_rate": 4.408691411674977e-06, "loss": 0.2762, "step": 16075 }, { "epoch": 0.551681537405628, "grad_norm": 0.8222596615971077, "learning_rate": 4.408139576233955e-06, "loss": 0.2535, "step": 16076 }, { "epoch": 0.5517158544955387, "grad_norm": 0.7777967765326314, "learning_rate": 4.407587748104677e-06, "loss": 0.2721, "step": 16077 }, { "epoch": 0.5517501715854496, "grad_norm": 0.7231086409748706, "learning_rate": 4.4070359272939635e-06, "loss": 0.2626, "step": 16078 }, { "epoch": 0.5517844886753603, "grad_norm": 0.7891336991625233, "learning_rate": 4.406484113808629e-06, "loss": 0.2414, "step": 16079 }, { "epoch": 0.5518188057652711, "grad_norm": 0.7930850308373217, "learning_rate": 4.405932307655491e-06, "loss": 0.2891, "step": 16080 }, { "epoch": 0.5518531228551818, "grad_norm": 0.68317129941549, "learning_rate": 4.405380508841367e-06, "loss": 0.2838, "step": 16081 }, { "epoch": 0.5518874399450927, "grad_norm": 0.8328261909899699, "learning_rate": 4.4048287173730744e-06, "loss": 0.3017, "step": 16082 }, { "epoch": 0.5519217570350035, "grad_norm": 0.7713777560445818, "learning_rate": 4.404276933257427e-06, "loss": 0.2962, "step": 16083 }, { "epoch": 0.5519560741249142, "grad_norm": 0.7566383664778568, "learning_rate": 4.403725156501246e-06, "loss": 0.2857, "step": 16084 }, { "epoch": 0.551990391214825, "grad_norm": 0.7869106955200164, "learning_rate": 4.403173387111346e-06, "loss": 0.3193, "step": 16085 }, { "epoch": 0.5520247083047357, "grad_norm": 0.8302723637328594, "learning_rate": 4.40262162509454e-06, "loss": 0.3197, "step": 16086 }, { "epoch": 0.5520590253946466, "grad_norm": 0.7691866521377942, "learning_rate": 4.4020698704576484e-06, "loss": 0.3045, "step": 16087 }, { "epoch": 0.5520933424845573, "grad_norm": 0.7181195335165865, "learning_rate": 4.401518123207487e-06, "loss": 0.263, "step": 16088 }, { "epoch": 0.5521276595744681, "grad_norm": 0.8621855023339628, "learning_rate": 4.4009663833508694e-06, "loss": 0.3004, "step": 16089 }, { "epoch": 0.5521619766643788, "grad_norm": 0.7031349505796984, "learning_rate": 4.400414650894617e-06, "loss": 0.2399, "step": 16090 }, { "epoch": 0.5521962937542897, "grad_norm": 0.7045738984028467, "learning_rate": 4.399862925845542e-06, "loss": 0.2722, "step": 16091 }, { "epoch": 0.5522306108442004, "grad_norm": 0.8134453431907137, "learning_rate": 4.3993112082104584e-06, "loss": 0.301, "step": 16092 }, { "epoch": 0.5522649279341112, "grad_norm": 0.7256517259418361, "learning_rate": 4.398759497996187e-06, "loss": 0.2995, "step": 16093 }, { "epoch": 0.552299245024022, "grad_norm": 0.7540033838653708, "learning_rate": 4.39820779520954e-06, "loss": 0.2729, "step": 16094 }, { "epoch": 0.5523335621139327, "grad_norm": 0.7361290653594059, "learning_rate": 4.397656099857334e-06, "loss": 0.2676, "step": 16095 }, { "epoch": 0.5523678792038436, "grad_norm": 0.7207612717213786, "learning_rate": 4.397104411946384e-06, "loss": 0.2583, "step": 16096 }, { "epoch": 0.5524021962937543, "grad_norm": 0.770477806542101, "learning_rate": 4.396552731483508e-06, "loss": 0.2507, "step": 16097 }, { "epoch": 0.5524365133836651, "grad_norm": 0.7776232919739653, "learning_rate": 4.396001058475517e-06, "loss": 0.2925, "step": 16098 }, { "epoch": 0.5524708304735758, "grad_norm": 0.7532908985543594, "learning_rate": 4.395449392929231e-06, "loss": 0.2952, "step": 16099 }, { "epoch": 0.5525051475634866, "grad_norm": 0.7625754155927674, "learning_rate": 4.394897734851463e-06, "loss": 0.3209, "step": 16100 }, { "epoch": 0.5525394646533974, "grad_norm": 0.7994563758467298, "learning_rate": 4.394346084249025e-06, "loss": 0.3166, "step": 16101 }, { "epoch": 0.5525737817433082, "grad_norm": 0.7189289486312743, "learning_rate": 4.393794441128736e-06, "loss": 0.2468, "step": 16102 }, { "epoch": 0.5526080988332189, "grad_norm": 0.7904345013436277, "learning_rate": 4.393242805497412e-06, "loss": 0.2908, "step": 16103 }, { "epoch": 0.5526424159231297, "grad_norm": 0.7466180605234544, "learning_rate": 4.3926911773618635e-06, "loss": 0.2514, "step": 16104 }, { "epoch": 0.5526767330130405, "grad_norm": 0.7395445754308165, "learning_rate": 4.39213955672891e-06, "loss": 0.2739, "step": 16105 }, { "epoch": 0.5527110501029513, "grad_norm": 0.7506550459656659, "learning_rate": 4.391587943605362e-06, "loss": 0.2751, "step": 16106 }, { "epoch": 0.552745367192862, "grad_norm": 0.7316801214807226, "learning_rate": 4.391036337998035e-06, "loss": 0.283, "step": 16107 }, { "epoch": 0.5527796842827728, "grad_norm": 0.8328374204060166, "learning_rate": 4.390484739913745e-06, "loss": 0.292, "step": 16108 }, { "epoch": 0.5528140013726835, "grad_norm": 0.7843210533124306, "learning_rate": 4.3899331493593046e-06, "loss": 0.2395, "step": 16109 }, { "epoch": 0.5528483184625944, "grad_norm": 0.85314635701201, "learning_rate": 4.389381566341529e-06, "loss": 0.2867, "step": 16110 }, { "epoch": 0.5528826355525052, "grad_norm": 0.7426904247983562, "learning_rate": 4.388829990867233e-06, "loss": 0.2754, "step": 16111 }, { "epoch": 0.5529169526424159, "grad_norm": 0.6890400474291886, "learning_rate": 4.388278422943229e-06, "loss": 0.231, "step": 16112 }, { "epoch": 0.5529512697323267, "grad_norm": 0.8013583528020423, "learning_rate": 4.387726862576333e-06, "loss": 0.3097, "step": 16113 }, { "epoch": 0.5529855868222375, "grad_norm": 0.8646255471744567, "learning_rate": 4.387175309773358e-06, "loss": 0.2969, "step": 16114 }, { "epoch": 0.5530199039121483, "grad_norm": 0.8390254889972903, "learning_rate": 4.386623764541115e-06, "loss": 0.2885, "step": 16115 }, { "epoch": 0.553054221002059, "grad_norm": 0.8484868873283408, "learning_rate": 4.3860722268864234e-06, "loss": 0.2507, "step": 16116 }, { "epoch": 0.5530885380919698, "grad_norm": 0.7605863415640463, "learning_rate": 4.385520696816092e-06, "loss": 0.2607, "step": 16117 }, { "epoch": 0.5531228551818805, "grad_norm": 0.7463441067722372, "learning_rate": 4.384969174336935e-06, "loss": 0.248, "step": 16118 }, { "epoch": 0.5531571722717914, "grad_norm": 0.8053365766613537, "learning_rate": 4.3844176594557695e-06, "loss": 0.2793, "step": 16119 }, { "epoch": 0.5531914893617021, "grad_norm": 0.777452644150478, "learning_rate": 4.3838661521794055e-06, "loss": 0.3232, "step": 16120 }, { "epoch": 0.5532258064516129, "grad_norm": 0.7426094602085532, "learning_rate": 4.383314652514654e-06, "loss": 0.266, "step": 16121 }, { "epoch": 0.5532601235415237, "grad_norm": 0.7811215054663455, "learning_rate": 4.382763160468334e-06, "loss": 0.2782, "step": 16122 }, { "epoch": 0.5532944406314344, "grad_norm": 0.7771677025848186, "learning_rate": 4.382211676047254e-06, "loss": 0.3158, "step": 16123 }, { "epoch": 0.5533287577213453, "grad_norm": 0.8034607158557433, "learning_rate": 4.38166019925823e-06, "loss": 0.3289, "step": 16124 }, { "epoch": 0.553363074811256, "grad_norm": 0.7740685964980583, "learning_rate": 4.381108730108072e-06, "loss": 0.264, "step": 16125 }, { "epoch": 0.5533973919011668, "grad_norm": 0.8150500716753923, "learning_rate": 4.3805572686035944e-06, "loss": 0.2575, "step": 16126 }, { "epoch": 0.5534317089910775, "grad_norm": 0.7456510204834542, "learning_rate": 4.380005814751609e-06, "loss": 0.3245, "step": 16127 }, { "epoch": 0.5534660260809884, "grad_norm": 0.7833880081367841, "learning_rate": 4.379454368558931e-06, "loss": 0.2995, "step": 16128 }, { "epoch": 0.5535003431708991, "grad_norm": 0.7951740343174547, "learning_rate": 4.378902930032369e-06, "loss": 0.235, "step": 16129 }, { "epoch": 0.5535346602608099, "grad_norm": 0.8670275299523285, "learning_rate": 4.3783514991787365e-06, "loss": 0.2692, "step": 16130 }, { "epoch": 0.5535689773507206, "grad_norm": 0.9204049763802044, "learning_rate": 4.377800076004848e-06, "loss": 0.2721, "step": 16131 }, { "epoch": 0.5536032944406314, "grad_norm": 0.799260131991508, "learning_rate": 4.3772486605175135e-06, "loss": 0.3017, "step": 16132 }, { "epoch": 0.5536376115305423, "grad_norm": 0.7876619856746269, "learning_rate": 4.376697252723545e-06, "loss": 0.3297, "step": 16133 }, { "epoch": 0.553671928620453, "grad_norm": 0.73924271302554, "learning_rate": 4.3761458526297565e-06, "loss": 0.2836, "step": 16134 }, { "epoch": 0.5537062457103638, "grad_norm": 0.6565364472762093, "learning_rate": 4.375594460242959e-06, "loss": 0.2337, "step": 16135 }, { "epoch": 0.5537405628002745, "grad_norm": 0.8526187414848572, "learning_rate": 4.375043075569962e-06, "loss": 0.2837, "step": 16136 }, { "epoch": 0.5537748798901854, "grad_norm": 0.7009615782022446, "learning_rate": 4.374491698617581e-06, "loss": 0.2875, "step": 16137 }, { "epoch": 0.5538091969800961, "grad_norm": 0.7844661831502708, "learning_rate": 4.373940329392624e-06, "loss": 0.2905, "step": 16138 }, { "epoch": 0.5538435140700069, "grad_norm": 0.730646668723309, "learning_rate": 4.373388967901906e-06, "loss": 0.2266, "step": 16139 }, { "epoch": 0.5538778311599176, "grad_norm": 0.7435595316247677, "learning_rate": 4.372837614152236e-06, "loss": 0.2802, "step": 16140 }, { "epoch": 0.5539121482498284, "grad_norm": 0.7972103979668352, "learning_rate": 4.372286268150426e-06, "loss": 0.3587, "step": 16141 }, { "epoch": 0.5539464653397392, "grad_norm": 0.7730196516979805, "learning_rate": 4.371734929903288e-06, "loss": 0.2793, "step": 16142 }, { "epoch": 0.55398078242965, "grad_norm": 0.7345244747620336, "learning_rate": 4.371183599417633e-06, "loss": 0.2455, "step": 16143 }, { "epoch": 0.5540150995195607, "grad_norm": 0.7433033440231462, "learning_rate": 4.370632276700268e-06, "loss": 0.2442, "step": 16144 }, { "epoch": 0.5540494166094715, "grad_norm": 0.7954181644617689, "learning_rate": 4.370080961758011e-06, "loss": 0.2731, "step": 16145 }, { "epoch": 0.5540837336993822, "grad_norm": 0.7164191713253886, "learning_rate": 4.369529654597669e-06, "loss": 0.2936, "step": 16146 }, { "epoch": 0.5541180507892931, "grad_norm": 0.7960349512915486, "learning_rate": 4.3689783552260514e-06, "loss": 0.2469, "step": 16147 }, { "epoch": 0.5541523678792039, "grad_norm": 0.7929370921837954, "learning_rate": 4.3684270636499716e-06, "loss": 0.2933, "step": 16148 }, { "epoch": 0.5541866849691146, "grad_norm": 0.7561368008628764, "learning_rate": 4.367875779876239e-06, "loss": 0.3004, "step": 16149 }, { "epoch": 0.5542210020590254, "grad_norm": 0.7810124697902001, "learning_rate": 4.367324503911663e-06, "loss": 0.2234, "step": 16150 }, { "epoch": 0.5542553191489362, "grad_norm": 0.647263868824923, "learning_rate": 4.366773235763058e-06, "loss": 0.2674, "step": 16151 }, { "epoch": 0.554289636238847, "grad_norm": 0.8156728676793993, "learning_rate": 4.36622197543723e-06, "loss": 0.2648, "step": 16152 }, { "epoch": 0.5543239533287577, "grad_norm": 0.8396547559011067, "learning_rate": 4.365670722940989e-06, "loss": 0.2681, "step": 16153 }, { "epoch": 0.5543582704186685, "grad_norm": 0.755139514225257, "learning_rate": 4.365119478281148e-06, "loss": 0.3127, "step": 16154 }, { "epoch": 0.5543925875085792, "grad_norm": 0.9779823695824512, "learning_rate": 4.364568241464516e-06, "loss": 0.3255, "step": 16155 }, { "epoch": 0.5544269045984901, "grad_norm": 0.9038299419746837, "learning_rate": 4.364017012497901e-06, "loss": 0.2775, "step": 16156 }, { "epoch": 0.5544612216884008, "grad_norm": 0.803794624109316, "learning_rate": 4.363465791388116e-06, "loss": 0.2808, "step": 16157 }, { "epoch": 0.5544955387783116, "grad_norm": 0.6958965674346779, "learning_rate": 4.362914578141969e-06, "loss": 0.2841, "step": 16158 }, { "epoch": 0.5545298558682223, "grad_norm": 0.7722034634387528, "learning_rate": 4.362363372766268e-06, "loss": 0.2819, "step": 16159 }, { "epoch": 0.5545641729581332, "grad_norm": 0.7693220493103677, "learning_rate": 4.361812175267826e-06, "loss": 0.2827, "step": 16160 }, { "epoch": 0.554598490048044, "grad_norm": 0.9625221421525307, "learning_rate": 4.361260985653449e-06, "loss": 0.2509, "step": 16161 }, { "epoch": 0.5546328071379547, "grad_norm": 0.7331517325126743, "learning_rate": 4.360709803929948e-06, "loss": 0.3201, "step": 16162 }, { "epoch": 0.5546671242278655, "grad_norm": 0.8343713199969623, "learning_rate": 4.360158630104133e-06, "loss": 0.3312, "step": 16163 }, { "epoch": 0.5547014413177762, "grad_norm": 0.8019857501018098, "learning_rate": 4.359607464182812e-06, "loss": 0.2244, "step": 16164 }, { "epoch": 0.5547357584076871, "grad_norm": 0.7338231375711343, "learning_rate": 4.359056306172793e-06, "loss": 0.2774, "step": 16165 }, { "epoch": 0.5547700754975978, "grad_norm": 0.8241943818713434, "learning_rate": 4.358505156080888e-06, "loss": 0.3207, "step": 16166 }, { "epoch": 0.5548043925875086, "grad_norm": 0.7795216643006039, "learning_rate": 4.357954013913903e-06, "loss": 0.2799, "step": 16167 }, { "epoch": 0.5548387096774193, "grad_norm": 0.9035336529216335, "learning_rate": 4.357402879678647e-06, "loss": 0.3042, "step": 16168 }, { "epoch": 0.5548730267673301, "grad_norm": 0.8054985853001918, "learning_rate": 4.356851753381928e-06, "loss": 0.2578, "step": 16169 }, { "epoch": 0.5549073438572409, "grad_norm": 0.7261307653546222, "learning_rate": 4.356300635030558e-06, "loss": 0.2415, "step": 16170 }, { "epoch": 0.5549416609471517, "grad_norm": 0.8359844714595893, "learning_rate": 4.355749524631342e-06, "loss": 0.2733, "step": 16171 }, { "epoch": 0.5549759780370624, "grad_norm": 0.7274519362692661, "learning_rate": 4.355198422191091e-06, "loss": 0.2632, "step": 16172 }, { "epoch": 0.5550102951269732, "grad_norm": 0.7443995306298385, "learning_rate": 4.354647327716609e-06, "loss": 0.2839, "step": 16173 }, { "epoch": 0.5550446122168841, "grad_norm": 0.7632345386628511, "learning_rate": 4.35409624121471e-06, "loss": 0.2637, "step": 16174 }, { "epoch": 0.5550789293067948, "grad_norm": 0.7356903212170758, "learning_rate": 4.353545162692199e-06, "loss": 0.3019, "step": 16175 }, { "epoch": 0.5551132463967056, "grad_norm": 0.7618048343255399, "learning_rate": 4.35299409215588e-06, "loss": 0.2467, "step": 16176 }, { "epoch": 0.5551475634866163, "grad_norm": 0.7971200008375441, "learning_rate": 4.352443029612568e-06, "loss": 0.2732, "step": 16177 }, { "epoch": 0.5551818805765271, "grad_norm": 0.7677290149277333, "learning_rate": 4.351891975069066e-06, "loss": 0.2404, "step": 16178 }, { "epoch": 0.5552161976664379, "grad_norm": 1.032213089549237, "learning_rate": 4.351340928532183e-06, "loss": 0.2797, "step": 16179 }, { "epoch": 0.5552505147563487, "grad_norm": 0.8481555324649871, "learning_rate": 4.350789890008727e-06, "loss": 0.2677, "step": 16180 }, { "epoch": 0.5552848318462594, "grad_norm": 0.7951240855684638, "learning_rate": 4.350238859505506e-06, "loss": 0.2801, "step": 16181 }, { "epoch": 0.5553191489361702, "grad_norm": 0.8822069670645492, "learning_rate": 4.349687837029324e-06, "loss": 0.2673, "step": 16182 }, { "epoch": 0.5553534660260809, "grad_norm": 0.879283086841569, "learning_rate": 4.3491368225869925e-06, "loss": 0.2775, "step": 16183 }, { "epoch": 0.5553877831159918, "grad_norm": 0.8249218345460628, "learning_rate": 4.348585816185316e-06, "loss": 0.3031, "step": 16184 }, { "epoch": 0.5554221002059025, "grad_norm": 0.6877861718916684, "learning_rate": 4.348034817831101e-06, "loss": 0.2823, "step": 16185 }, { "epoch": 0.5554564172958133, "grad_norm": 0.8214267607539216, "learning_rate": 4.3474838275311585e-06, "loss": 0.2614, "step": 16186 }, { "epoch": 0.555490734385724, "grad_norm": 0.87653707588759, "learning_rate": 4.346932845292292e-06, "loss": 0.3791, "step": 16187 }, { "epoch": 0.5555250514756349, "grad_norm": 0.7961612212409424, "learning_rate": 4.3463818711213066e-06, "loss": 0.2602, "step": 16188 }, { "epoch": 0.5555593685655457, "grad_norm": 0.7455731320496949, "learning_rate": 4.345830905025013e-06, "loss": 0.2975, "step": 16189 }, { "epoch": 0.5555936856554564, "grad_norm": 0.7691498501864622, "learning_rate": 4.345279947010215e-06, "loss": 0.3257, "step": 16190 }, { "epoch": 0.5556280027453672, "grad_norm": 0.6801233559015687, "learning_rate": 4.34472899708372e-06, "loss": 0.2794, "step": 16191 }, { "epoch": 0.5556623198352779, "grad_norm": 0.8351118325956632, "learning_rate": 4.344178055252335e-06, "loss": 0.3346, "step": 16192 }, { "epoch": 0.5556966369251888, "grad_norm": 0.8669968071746414, "learning_rate": 4.343627121522866e-06, "loss": 0.2788, "step": 16193 }, { "epoch": 0.5557309540150995, "grad_norm": 0.7711233555473895, "learning_rate": 4.3430761959021165e-06, "loss": 0.2775, "step": 16194 }, { "epoch": 0.5557652711050103, "grad_norm": 0.7749173161999782, "learning_rate": 4.342525278396897e-06, "loss": 0.2482, "step": 16195 }, { "epoch": 0.555799588194921, "grad_norm": 0.7431052038093254, "learning_rate": 4.3419743690140105e-06, "loss": 0.3338, "step": 16196 }, { "epoch": 0.5558339052848319, "grad_norm": 0.7299551177098818, "learning_rate": 4.341423467760261e-06, "loss": 0.3006, "step": 16197 }, { "epoch": 0.5558682223747426, "grad_norm": 0.7454282831363398, "learning_rate": 4.340872574642459e-06, "loss": 0.3016, "step": 16198 }, { "epoch": 0.5559025394646534, "grad_norm": 0.7698155875217422, "learning_rate": 4.340321689667409e-06, "loss": 0.2605, "step": 16199 }, { "epoch": 0.5559368565545642, "grad_norm": 0.7161859030604708, "learning_rate": 4.3397708128419126e-06, "loss": 0.2487, "step": 16200 }, { "epoch": 0.5559711736444749, "grad_norm": 0.7377644269830156, "learning_rate": 4.33921994417278e-06, "loss": 0.2739, "step": 16201 }, { "epoch": 0.5560054907343858, "grad_norm": 0.7026865446895245, "learning_rate": 4.3386690836668125e-06, "loss": 0.2812, "step": 16202 }, { "epoch": 0.5560398078242965, "grad_norm": 0.8412583494086107, "learning_rate": 4.33811823133082e-06, "loss": 0.2913, "step": 16203 }, { "epoch": 0.5560741249142073, "grad_norm": 0.907630818677615, "learning_rate": 4.337567387171605e-06, "loss": 0.3138, "step": 16204 }, { "epoch": 0.556108442004118, "grad_norm": 0.8237710252750134, "learning_rate": 4.33701655119597e-06, "loss": 0.3228, "step": 16205 }, { "epoch": 0.5561427590940288, "grad_norm": 0.7343410449663065, "learning_rate": 4.336465723410726e-06, "loss": 0.2779, "step": 16206 }, { "epoch": 0.5561770761839396, "grad_norm": 0.7729180194095521, "learning_rate": 4.335914903822672e-06, "loss": 0.2588, "step": 16207 }, { "epoch": 0.5562113932738504, "grad_norm": 0.7736136022307153, "learning_rate": 4.335364092438615e-06, "loss": 0.2879, "step": 16208 }, { "epoch": 0.5562457103637611, "grad_norm": 0.8949726974456691, "learning_rate": 4.334813289265361e-06, "loss": 0.3214, "step": 16209 }, { "epoch": 0.5562800274536719, "grad_norm": 0.8076319947412659, "learning_rate": 4.334262494309713e-06, "loss": 0.2966, "step": 16210 }, { "epoch": 0.5563143445435828, "grad_norm": 0.7474664580218799, "learning_rate": 4.333711707578475e-06, "loss": 0.2752, "step": 16211 }, { "epoch": 0.5563486616334935, "grad_norm": 0.7564719071072024, "learning_rate": 4.333160929078453e-06, "loss": 0.264, "step": 16212 }, { "epoch": 0.5563829787234043, "grad_norm": 0.7697491656304746, "learning_rate": 4.33261015881645e-06, "loss": 0.2444, "step": 16213 }, { "epoch": 0.556417295813315, "grad_norm": 0.7216164955479869, "learning_rate": 4.33205939679927e-06, "loss": 0.2664, "step": 16214 }, { "epoch": 0.5564516129032258, "grad_norm": 0.9000388930660987, "learning_rate": 4.331508643033717e-06, "loss": 0.2989, "step": 16215 }, { "epoch": 0.5564859299931366, "grad_norm": 0.714517630714107, "learning_rate": 4.330957897526597e-06, "loss": 0.251, "step": 16216 }, { "epoch": 0.5565202470830474, "grad_norm": 0.7113155640806635, "learning_rate": 4.33040716028471e-06, "loss": 0.2636, "step": 16217 }, { "epoch": 0.5565545641729581, "grad_norm": 0.7505102740909242, "learning_rate": 4.329856431314863e-06, "loss": 0.3399, "step": 16218 }, { "epoch": 0.5565888812628689, "grad_norm": 0.8185178934148906, "learning_rate": 4.32930571062386e-06, "loss": 0.267, "step": 16219 }, { "epoch": 0.5566231983527797, "grad_norm": 0.858681730373104, "learning_rate": 4.3287549982185e-06, "loss": 0.2671, "step": 16220 }, { "epoch": 0.5566575154426905, "grad_norm": 0.7116254014485474, "learning_rate": 4.32820429410559e-06, "loss": 0.243, "step": 16221 }, { "epoch": 0.5566918325326012, "grad_norm": 0.842162374032902, "learning_rate": 4.327653598291935e-06, "loss": 0.2799, "step": 16222 }, { "epoch": 0.556726149622512, "grad_norm": 0.8862688762061979, "learning_rate": 4.327102910784333e-06, "loss": 0.2855, "step": 16223 }, { "epoch": 0.5567604667124227, "grad_norm": 0.7313019706708133, "learning_rate": 4.326552231589593e-06, "loss": 0.2629, "step": 16224 }, { "epoch": 0.5567947838023336, "grad_norm": 0.8103768464860132, "learning_rate": 4.326001560714513e-06, "loss": 0.2945, "step": 16225 }, { "epoch": 0.5568291008922444, "grad_norm": 0.8125908680347929, "learning_rate": 4.325450898165898e-06, "loss": 0.2854, "step": 16226 }, { "epoch": 0.5568634179821551, "grad_norm": 0.8412329139779648, "learning_rate": 4.324900243950551e-06, "loss": 0.2934, "step": 16227 }, { "epoch": 0.5568977350720659, "grad_norm": 0.7523922271267064, "learning_rate": 4.324349598075275e-06, "loss": 0.2511, "step": 16228 }, { "epoch": 0.5569320521619766, "grad_norm": 0.716959402285927, "learning_rate": 4.32379896054687e-06, "loss": 0.2908, "step": 16229 }, { "epoch": 0.5569663692518875, "grad_norm": 0.8461271609926517, "learning_rate": 4.323248331372143e-06, "loss": 0.2414, "step": 16230 }, { "epoch": 0.5570006863417982, "grad_norm": 0.7823524876021997, "learning_rate": 4.322697710557894e-06, "loss": 0.2891, "step": 16231 }, { "epoch": 0.557035003431709, "grad_norm": 0.7940075017697972, "learning_rate": 4.322147098110922e-06, "loss": 0.3036, "step": 16232 }, { "epoch": 0.5570693205216197, "grad_norm": 0.8642573782395562, "learning_rate": 4.321596494038035e-06, "loss": 0.3166, "step": 16233 }, { "epoch": 0.5571036376115306, "grad_norm": 0.7121984353945684, "learning_rate": 4.32104589834603e-06, "loss": 0.2659, "step": 16234 }, { "epoch": 0.5571379547014413, "grad_norm": 0.7238532843572709, "learning_rate": 4.320495311041714e-06, "loss": 0.2345, "step": 16235 }, { "epoch": 0.5571722717913521, "grad_norm": 0.8460873430229695, "learning_rate": 4.3199447321318856e-06, "loss": 0.2723, "step": 16236 }, { "epoch": 0.5572065888812628, "grad_norm": 0.8157996151078807, "learning_rate": 4.319394161623345e-06, "loss": 0.2664, "step": 16237 }, { "epoch": 0.5572409059711736, "grad_norm": 0.7301531840863267, "learning_rate": 4.318843599522898e-06, "loss": 0.2255, "step": 16238 }, { "epoch": 0.5572752230610845, "grad_norm": 0.8050061676171859, "learning_rate": 4.318293045837345e-06, "loss": 0.2797, "step": 16239 }, { "epoch": 0.5573095401509952, "grad_norm": 0.8536924429330036, "learning_rate": 4.317742500573484e-06, "loss": 0.2726, "step": 16240 }, { "epoch": 0.557343857240906, "grad_norm": 0.809204083155292, "learning_rate": 4.317191963738122e-06, "loss": 0.2567, "step": 16241 }, { "epoch": 0.5573781743308167, "grad_norm": 0.7353752370864809, "learning_rate": 4.316641435338057e-06, "loss": 0.3299, "step": 16242 }, { "epoch": 0.5574124914207276, "grad_norm": 0.7754710741757399, "learning_rate": 4.316090915380088e-06, "loss": 0.2888, "step": 16243 }, { "epoch": 0.5574468085106383, "grad_norm": 0.8019482915343236, "learning_rate": 4.315540403871019e-06, "loss": 0.3131, "step": 16244 }, { "epoch": 0.5574811256005491, "grad_norm": 0.7800146067878108, "learning_rate": 4.314989900817651e-06, "loss": 0.304, "step": 16245 }, { "epoch": 0.5575154426904598, "grad_norm": 0.8830731127066735, "learning_rate": 4.314439406226783e-06, "loss": 0.2927, "step": 16246 }, { "epoch": 0.5575497597803706, "grad_norm": 0.786896384954378, "learning_rate": 4.313888920105218e-06, "loss": 0.2466, "step": 16247 }, { "epoch": 0.5575840768702814, "grad_norm": 0.8342290972606643, "learning_rate": 4.313338442459756e-06, "loss": 0.2549, "step": 16248 }, { "epoch": 0.5576183939601922, "grad_norm": 0.7997675978503771, "learning_rate": 4.312787973297195e-06, "loss": 0.2529, "step": 16249 }, { "epoch": 0.557652711050103, "grad_norm": 0.723069162939137, "learning_rate": 4.31223751262434e-06, "loss": 0.2404, "step": 16250 }, { "epoch": 0.5576870281400137, "grad_norm": 0.8040301893100659, "learning_rate": 4.311687060447986e-06, "loss": 0.3086, "step": 16251 }, { "epoch": 0.5577213452299244, "grad_norm": 0.784774124530361, "learning_rate": 4.311136616774936e-06, "loss": 0.2432, "step": 16252 }, { "epoch": 0.5577556623198353, "grad_norm": 0.7516724437636322, "learning_rate": 4.310586181611992e-06, "loss": 0.2892, "step": 16253 }, { "epoch": 0.5577899794097461, "grad_norm": 0.8069804460029866, "learning_rate": 4.310035754965952e-06, "loss": 0.2942, "step": 16254 }, { "epoch": 0.5578242964996568, "grad_norm": 0.7889784626166855, "learning_rate": 4.309485336843614e-06, "loss": 0.2825, "step": 16255 }, { "epoch": 0.5578586135895676, "grad_norm": 0.8509367285601593, "learning_rate": 4.308934927251781e-06, "loss": 0.3229, "step": 16256 }, { "epoch": 0.5578929306794784, "grad_norm": 0.8054512202582157, "learning_rate": 4.30838452619725e-06, "loss": 0.296, "step": 16257 }, { "epoch": 0.5579272477693892, "grad_norm": 0.7423565833212606, "learning_rate": 4.307834133686823e-06, "loss": 0.2749, "step": 16258 }, { "epoch": 0.5579615648592999, "grad_norm": 0.8446317751540162, "learning_rate": 4.307283749727298e-06, "loss": 0.3387, "step": 16259 }, { "epoch": 0.5579958819492107, "grad_norm": 0.7953233948580294, "learning_rate": 4.306733374325475e-06, "loss": 0.2874, "step": 16260 }, { "epoch": 0.5580301990391214, "grad_norm": 0.7517844007222804, "learning_rate": 4.30618300748815e-06, "loss": 0.2795, "step": 16261 }, { "epoch": 0.5580645161290323, "grad_norm": 0.8090546041578124, "learning_rate": 4.305632649222129e-06, "loss": 0.2091, "step": 16262 }, { "epoch": 0.558098833218943, "grad_norm": 0.8038075028537669, "learning_rate": 4.305082299534206e-06, "loss": 0.2834, "step": 16263 }, { "epoch": 0.5581331503088538, "grad_norm": 0.8651745302714285, "learning_rate": 4.30453195843118e-06, "loss": 0.322, "step": 16264 }, { "epoch": 0.5581674673987645, "grad_norm": 0.8015527840781478, "learning_rate": 4.303981625919851e-06, "loss": 0.364, "step": 16265 }, { "epoch": 0.5582017844886754, "grad_norm": 0.7193377173206935, "learning_rate": 4.303431302007017e-06, "loss": 0.3013, "step": 16266 }, { "epoch": 0.5582361015785862, "grad_norm": 0.7846151970936139, "learning_rate": 4.302880986699478e-06, "loss": 0.2888, "step": 16267 }, { "epoch": 0.5582704186684969, "grad_norm": 0.7621589317326647, "learning_rate": 4.3023306800040325e-06, "loss": 0.2423, "step": 16268 }, { "epoch": 0.5583047357584077, "grad_norm": 0.895840103262716, "learning_rate": 4.301780381927476e-06, "loss": 0.2675, "step": 16269 }, { "epoch": 0.5583390528483184, "grad_norm": 0.8789110919926699, "learning_rate": 4.30123009247661e-06, "loss": 0.2718, "step": 16270 }, { "epoch": 0.5583733699382293, "grad_norm": 0.7448599370773958, "learning_rate": 4.300679811658233e-06, "loss": 0.291, "step": 16271 }, { "epoch": 0.55840768702814, "grad_norm": 0.6941701524935576, "learning_rate": 4.300129539479139e-06, "loss": 0.2955, "step": 16272 }, { "epoch": 0.5584420041180508, "grad_norm": 0.8835934278624071, "learning_rate": 4.299579275946129e-06, "loss": 0.2964, "step": 16273 }, { "epoch": 0.5584763212079615, "grad_norm": 0.7173042375706843, "learning_rate": 4.299029021066002e-06, "loss": 0.2469, "step": 16274 }, { "epoch": 0.5585106382978723, "grad_norm": 0.6866633989771836, "learning_rate": 4.298478774845552e-06, "loss": 0.2602, "step": 16275 }, { "epoch": 0.5585449553877831, "grad_norm": 0.8258266301904434, "learning_rate": 4.2979285372915805e-06, "loss": 0.3163, "step": 16276 }, { "epoch": 0.5585792724776939, "grad_norm": 0.7679927008291698, "learning_rate": 4.297378308410884e-06, "loss": 0.2337, "step": 16277 }, { "epoch": 0.5586135895676047, "grad_norm": 0.7504942345953898, "learning_rate": 4.296828088210258e-06, "loss": 0.2995, "step": 16278 }, { "epoch": 0.5586479066575154, "grad_norm": 0.8239508398809682, "learning_rate": 4.296277876696502e-06, "loss": 0.265, "step": 16279 }, { "epoch": 0.5586822237474263, "grad_norm": 0.7978585733446347, "learning_rate": 4.2957276738764116e-06, "loss": 0.2695, "step": 16280 }, { "epoch": 0.558716540837337, "grad_norm": 0.8314292524228486, "learning_rate": 4.295177479756784e-06, "loss": 0.2669, "step": 16281 }, { "epoch": 0.5587508579272478, "grad_norm": 0.6723096476003988, "learning_rate": 4.294627294344418e-06, "loss": 0.2242, "step": 16282 }, { "epoch": 0.5587851750171585, "grad_norm": 0.7861694839085724, "learning_rate": 4.294077117646111e-06, "loss": 0.3206, "step": 16283 }, { "epoch": 0.5588194921070693, "grad_norm": 0.8381952842442801, "learning_rate": 4.2935269496686545e-06, "loss": 0.254, "step": 16284 }, { "epoch": 0.5588538091969801, "grad_norm": 0.8402729543557867, "learning_rate": 4.292976790418851e-06, "loss": 0.2876, "step": 16285 }, { "epoch": 0.5588881262868909, "grad_norm": 0.7870178351369992, "learning_rate": 4.292426639903496e-06, "loss": 0.339, "step": 16286 }, { "epoch": 0.5589224433768016, "grad_norm": 0.87348082009811, "learning_rate": 4.291876498129382e-06, "loss": 0.2721, "step": 16287 }, { "epoch": 0.5589567604667124, "grad_norm": 0.761401572945026, "learning_rate": 4.29132636510331e-06, "loss": 0.242, "step": 16288 }, { "epoch": 0.5589910775566233, "grad_norm": 0.8158033048679855, "learning_rate": 4.290776240832075e-06, "loss": 0.294, "step": 16289 }, { "epoch": 0.559025394646534, "grad_norm": 0.8670048614973641, "learning_rate": 4.290226125322471e-06, "loss": 0.2844, "step": 16290 }, { "epoch": 0.5590597117364448, "grad_norm": 0.9057096737864042, "learning_rate": 4.289676018581298e-06, "loss": 0.2972, "step": 16291 }, { "epoch": 0.5590940288263555, "grad_norm": 0.7703641946664931, "learning_rate": 4.289125920615349e-06, "loss": 0.3186, "step": 16292 }, { "epoch": 0.5591283459162663, "grad_norm": 0.8837763895630449, "learning_rate": 4.2885758314314184e-06, "loss": 0.3135, "step": 16293 }, { "epoch": 0.5591626630061771, "grad_norm": 0.7350032667811464, "learning_rate": 4.2880257510363066e-06, "loss": 0.2485, "step": 16294 }, { "epoch": 0.5591969800960879, "grad_norm": 0.7582484767039078, "learning_rate": 4.287475679436804e-06, "loss": 0.2504, "step": 16295 }, { "epoch": 0.5592312971859986, "grad_norm": 0.8231727185501038, "learning_rate": 4.28692561663971e-06, "loss": 0.2745, "step": 16296 }, { "epoch": 0.5592656142759094, "grad_norm": 0.8303378335131386, "learning_rate": 4.28637556265182e-06, "loss": 0.2601, "step": 16297 }, { "epoch": 0.5592999313658201, "grad_norm": 0.8825096796876833, "learning_rate": 4.285825517479925e-06, "loss": 0.344, "step": 16298 }, { "epoch": 0.559334248455731, "grad_norm": 0.8582158292492258, "learning_rate": 4.285275481130826e-06, "loss": 0.2871, "step": 16299 }, { "epoch": 0.5593685655456417, "grad_norm": 0.698912645086378, "learning_rate": 4.284725453611314e-06, "loss": 0.2558, "step": 16300 }, { "epoch": 0.5594028826355525, "grad_norm": 0.7464180601264238, "learning_rate": 4.284175434928184e-06, "loss": 0.3771, "step": 16301 }, { "epoch": 0.5594371997254632, "grad_norm": 0.6123961703159131, "learning_rate": 4.283625425088234e-06, "loss": 0.24, "step": 16302 }, { "epoch": 0.5594715168153741, "grad_norm": 0.7357303775638732, "learning_rate": 4.283075424098255e-06, "loss": 0.3172, "step": 16303 }, { "epoch": 0.5595058339052849, "grad_norm": 0.6726263963672386, "learning_rate": 4.282525431965043e-06, "loss": 0.2596, "step": 16304 }, { "epoch": 0.5595401509951956, "grad_norm": 0.6992279897942293, "learning_rate": 4.281975448695394e-06, "loss": 0.266, "step": 16305 }, { "epoch": 0.5595744680851064, "grad_norm": 0.7392732442451415, "learning_rate": 4.281425474296102e-06, "loss": 0.2701, "step": 16306 }, { "epoch": 0.5596087851750171, "grad_norm": 0.7584432612100211, "learning_rate": 4.280875508773958e-06, "loss": 0.2865, "step": 16307 }, { "epoch": 0.559643102264928, "grad_norm": 0.7494659545881381, "learning_rate": 4.2803255521357614e-06, "loss": 0.2598, "step": 16308 }, { "epoch": 0.5596774193548387, "grad_norm": 0.8290032158308722, "learning_rate": 4.279775604388301e-06, "loss": 0.2997, "step": 16309 }, { "epoch": 0.5597117364447495, "grad_norm": 0.9256459309950507, "learning_rate": 4.279225665538375e-06, "loss": 0.2713, "step": 16310 }, { "epoch": 0.5597460535346602, "grad_norm": 0.8208892865717058, "learning_rate": 4.278675735592775e-06, "loss": 0.2601, "step": 16311 }, { "epoch": 0.5597803706245711, "grad_norm": 0.8409315072853447, "learning_rate": 4.278125814558295e-06, "loss": 0.2701, "step": 16312 }, { "epoch": 0.5598146877144818, "grad_norm": 0.7229432462964557, "learning_rate": 4.277575902441728e-06, "loss": 0.2759, "step": 16313 }, { "epoch": 0.5598490048043926, "grad_norm": 0.7283287975437552, "learning_rate": 4.277025999249871e-06, "loss": 0.257, "step": 16314 }, { "epoch": 0.5598833218943033, "grad_norm": 0.6397868823036273, "learning_rate": 4.276476104989514e-06, "loss": 0.2797, "step": 16315 }, { "epoch": 0.5599176389842141, "grad_norm": 0.6562522107066866, "learning_rate": 4.27592621966745e-06, "loss": 0.2513, "step": 16316 }, { "epoch": 0.559951956074125, "grad_norm": 0.7665534691153522, "learning_rate": 4.275376343290475e-06, "loss": 0.3054, "step": 16317 }, { "epoch": 0.5599862731640357, "grad_norm": 0.8447590509775483, "learning_rate": 4.274826475865379e-06, "loss": 0.3492, "step": 16318 }, { "epoch": 0.5600205902539465, "grad_norm": 0.793667410104376, "learning_rate": 4.274276617398956e-06, "loss": 0.2619, "step": 16319 }, { "epoch": 0.5600549073438572, "grad_norm": 0.696283420491661, "learning_rate": 4.273726767898001e-06, "loss": 0.2274, "step": 16320 }, { "epoch": 0.560089224433768, "grad_norm": 0.8197078894212894, "learning_rate": 4.273176927369305e-06, "loss": 0.2885, "step": 16321 }, { "epoch": 0.5601235415236788, "grad_norm": 0.8082718108805347, "learning_rate": 4.2726270958196595e-06, "loss": 0.2562, "step": 16322 }, { "epoch": 0.5601578586135896, "grad_norm": 0.7159186101975616, "learning_rate": 4.272077273255859e-06, "loss": 0.2971, "step": 16323 }, { "epoch": 0.5601921757035003, "grad_norm": 0.9332229399105022, "learning_rate": 4.271527459684694e-06, "loss": 0.3169, "step": 16324 }, { "epoch": 0.5602264927934111, "grad_norm": 0.8058204079605957, "learning_rate": 4.270977655112959e-06, "loss": 0.2904, "step": 16325 }, { "epoch": 0.5602608098833219, "grad_norm": 0.7810851460532596, "learning_rate": 4.270427859547444e-06, "loss": 0.2977, "step": 16326 }, { "epoch": 0.5602951269732327, "grad_norm": 0.8371218666753528, "learning_rate": 4.269878072994942e-06, "loss": 0.2949, "step": 16327 }, { "epoch": 0.5603294440631434, "grad_norm": 0.7424700916662008, "learning_rate": 4.269328295462248e-06, "loss": 0.2802, "step": 16328 }, { "epoch": 0.5603637611530542, "grad_norm": 0.9012217118911999, "learning_rate": 4.268778526956149e-06, "loss": 0.3055, "step": 16329 }, { "epoch": 0.560398078242965, "grad_norm": 0.749330725630174, "learning_rate": 4.268228767483438e-06, "loss": 0.286, "step": 16330 }, { "epoch": 0.5604323953328758, "grad_norm": 0.7769339714543617, "learning_rate": 4.2676790170509095e-06, "loss": 0.3115, "step": 16331 }, { "epoch": 0.5604667124227866, "grad_norm": 0.8053430766121111, "learning_rate": 4.267129275665352e-06, "loss": 0.2719, "step": 16332 }, { "epoch": 0.5605010295126973, "grad_norm": 0.7841504526060108, "learning_rate": 4.266579543333557e-06, "loss": 0.2507, "step": 16333 }, { "epoch": 0.5605353466026081, "grad_norm": 0.7386992524654576, "learning_rate": 4.2660298200623176e-06, "loss": 0.2644, "step": 16334 }, { "epoch": 0.5605696636925189, "grad_norm": 0.8122972520779594, "learning_rate": 4.265480105858424e-06, "loss": 0.2968, "step": 16335 }, { "epoch": 0.5606039807824297, "grad_norm": 0.7963963757962188, "learning_rate": 4.264930400728667e-06, "loss": 0.2852, "step": 16336 }, { "epoch": 0.5606382978723404, "grad_norm": 0.8050366584611773, "learning_rate": 4.264380704679838e-06, "loss": 0.2935, "step": 16337 }, { "epoch": 0.5606726149622512, "grad_norm": 0.8285597882206168, "learning_rate": 4.2638310177187285e-06, "loss": 0.3197, "step": 16338 }, { "epoch": 0.5607069320521619, "grad_norm": 0.8528094444662904, "learning_rate": 4.263281339852127e-06, "loss": 0.2399, "step": 16339 }, { "epoch": 0.5607412491420728, "grad_norm": 0.8567058567555018, "learning_rate": 4.262731671086826e-06, "loss": 0.2879, "step": 16340 }, { "epoch": 0.5607755662319835, "grad_norm": 0.6675622020630475, "learning_rate": 4.2621820114296165e-06, "loss": 0.2519, "step": 16341 }, { "epoch": 0.5608098833218943, "grad_norm": 0.7934701309151845, "learning_rate": 4.261632360887287e-06, "loss": 0.2614, "step": 16342 }, { "epoch": 0.560844200411805, "grad_norm": 0.8309561176743089, "learning_rate": 4.261082719466629e-06, "loss": 0.3058, "step": 16343 }, { "epoch": 0.5608785175017158, "grad_norm": 0.7885125158026286, "learning_rate": 4.260533087174434e-06, "loss": 0.2714, "step": 16344 }, { "epoch": 0.5609128345916267, "grad_norm": 0.7306036620288253, "learning_rate": 4.259983464017489e-06, "loss": 0.2567, "step": 16345 }, { "epoch": 0.5609471516815374, "grad_norm": 0.7744197410251659, "learning_rate": 4.259433850002587e-06, "loss": 0.3111, "step": 16346 }, { "epoch": 0.5609814687714482, "grad_norm": 0.7638593020958508, "learning_rate": 4.258884245136514e-06, "loss": 0.2787, "step": 16347 }, { "epoch": 0.5610157858613589, "grad_norm": 0.7821186923672107, "learning_rate": 4.258334649426063e-06, "loss": 0.3087, "step": 16348 }, { "epoch": 0.5610501029512698, "grad_norm": 0.7648315120463621, "learning_rate": 4.257785062878024e-06, "loss": 0.2707, "step": 16349 }, { "epoch": 0.5610844200411805, "grad_norm": 0.7203312833169255, "learning_rate": 4.257235485499185e-06, "loss": 0.2464, "step": 16350 }, { "epoch": 0.5611187371310913, "grad_norm": 0.8492746950960757, "learning_rate": 4.2566859172963335e-06, "loss": 0.2664, "step": 16351 }, { "epoch": 0.561153054221002, "grad_norm": 0.9476715212340803, "learning_rate": 4.256136358276263e-06, "loss": 0.3258, "step": 16352 }, { "epoch": 0.5611873713109128, "grad_norm": 0.7837994816257497, "learning_rate": 4.25558680844576e-06, "loss": 0.2847, "step": 16353 }, { "epoch": 0.5612216884008236, "grad_norm": 0.7848398731321358, "learning_rate": 4.255037267811614e-06, "loss": 0.3271, "step": 16354 }, { "epoch": 0.5612560054907344, "grad_norm": 0.7015642416430629, "learning_rate": 4.2544877363806135e-06, "loss": 0.2509, "step": 16355 }, { "epoch": 0.5612903225806452, "grad_norm": 0.8754295485433515, "learning_rate": 4.253938214159549e-06, "loss": 0.2467, "step": 16356 }, { "epoch": 0.5613246396705559, "grad_norm": 0.6949520949065584, "learning_rate": 4.253388701155206e-06, "loss": 0.2703, "step": 16357 }, { "epoch": 0.5613589567604668, "grad_norm": 0.8734613162271365, "learning_rate": 4.2528391973743775e-06, "loss": 0.3263, "step": 16358 }, { "epoch": 0.5613932738503775, "grad_norm": 0.7489975272652343, "learning_rate": 4.252289702823847e-06, "loss": 0.2169, "step": 16359 }, { "epoch": 0.5614275909402883, "grad_norm": 0.7321940475613262, "learning_rate": 4.251740217510408e-06, "loss": 0.3159, "step": 16360 }, { "epoch": 0.561461908030199, "grad_norm": 0.7823751758077617, "learning_rate": 4.251190741440846e-06, "loss": 0.2484, "step": 16361 }, { "epoch": 0.5614962251201098, "grad_norm": 0.8782934966982685, "learning_rate": 4.2506412746219475e-06, "loss": 0.2906, "step": 16362 }, { "epoch": 0.5615305422100206, "grad_norm": 0.7262187458878921, "learning_rate": 4.250091817060503e-06, "loss": 0.2748, "step": 16363 }, { "epoch": 0.5615648592999314, "grad_norm": 0.7973069241987459, "learning_rate": 4.249542368763301e-06, "loss": 0.2586, "step": 16364 }, { "epoch": 0.5615991763898421, "grad_norm": 0.8745623846600865, "learning_rate": 4.248992929737127e-06, "loss": 0.2617, "step": 16365 }, { "epoch": 0.5616334934797529, "grad_norm": 0.8061961884902268, "learning_rate": 4.2484434999887706e-06, "loss": 0.2893, "step": 16366 }, { "epoch": 0.5616678105696636, "grad_norm": 0.7970564147793643, "learning_rate": 4.247894079525019e-06, "loss": 0.2655, "step": 16367 }, { "epoch": 0.5617021276595745, "grad_norm": 0.8319998464199823, "learning_rate": 4.247344668352657e-06, "loss": 0.2762, "step": 16368 }, { "epoch": 0.5617364447494853, "grad_norm": 0.7950938542955847, "learning_rate": 4.246795266478476e-06, "loss": 0.3234, "step": 16369 }, { "epoch": 0.561770761839396, "grad_norm": 0.8822619365859933, "learning_rate": 4.24624587390926e-06, "loss": 0.2857, "step": 16370 }, { "epoch": 0.5618050789293068, "grad_norm": 0.8467208264805277, "learning_rate": 4.245696490651797e-06, "loss": 0.3145, "step": 16371 }, { "epoch": 0.5618393960192176, "grad_norm": 0.781097269647721, "learning_rate": 4.245147116712877e-06, "loss": 0.3193, "step": 16372 }, { "epoch": 0.5618737131091284, "grad_norm": 0.8782828277531597, "learning_rate": 4.2445977520992836e-06, "loss": 0.2894, "step": 16373 }, { "epoch": 0.5619080301990391, "grad_norm": 0.8006133900296779, "learning_rate": 4.244048396817802e-06, "loss": 0.2726, "step": 16374 }, { "epoch": 0.5619423472889499, "grad_norm": 0.7226816273635123, "learning_rate": 4.243499050875224e-06, "loss": 0.2731, "step": 16375 }, { "epoch": 0.5619766643788606, "grad_norm": 0.7903449004309964, "learning_rate": 4.242949714278332e-06, "loss": 0.2194, "step": 16376 }, { "epoch": 0.5620109814687715, "grad_norm": 0.7896089192144842, "learning_rate": 4.242400387033913e-06, "loss": 0.2827, "step": 16377 }, { "epoch": 0.5620452985586822, "grad_norm": 0.8171605112702229, "learning_rate": 4.241851069148754e-06, "loss": 0.2651, "step": 16378 }, { "epoch": 0.562079615648593, "grad_norm": 0.7852579188494136, "learning_rate": 4.241301760629643e-06, "loss": 0.3121, "step": 16379 }, { "epoch": 0.5621139327385037, "grad_norm": 0.7844447306520411, "learning_rate": 4.2407524614833606e-06, "loss": 0.2743, "step": 16380 }, { "epoch": 0.5621482498284146, "grad_norm": 0.8121419428051109, "learning_rate": 4.240203171716699e-06, "loss": 0.2741, "step": 16381 }, { "epoch": 0.5621825669183254, "grad_norm": 0.7811020845185784, "learning_rate": 4.239653891336442e-06, "loss": 0.3162, "step": 16382 }, { "epoch": 0.5622168840082361, "grad_norm": 0.7347626148811343, "learning_rate": 4.239104620349372e-06, "loss": 0.2844, "step": 16383 }, { "epoch": 0.5622512010981469, "grad_norm": 0.7081682515308545, "learning_rate": 4.238555358762279e-06, "loss": 0.2456, "step": 16384 }, { "epoch": 0.5622855181880576, "grad_norm": 0.8326415859286992, "learning_rate": 4.238006106581948e-06, "loss": 0.275, "step": 16385 }, { "epoch": 0.5623198352779685, "grad_norm": 0.8820684228567377, "learning_rate": 4.23745686381516e-06, "loss": 0.2935, "step": 16386 }, { "epoch": 0.5623541523678792, "grad_norm": 0.7992000558421243, "learning_rate": 4.2369076304687065e-06, "loss": 0.2597, "step": 16387 }, { "epoch": 0.56238846945779, "grad_norm": 0.7497311704800291, "learning_rate": 4.236358406549369e-06, "loss": 0.2524, "step": 16388 }, { "epoch": 0.5624227865477007, "grad_norm": 0.7735882175672552, "learning_rate": 4.235809192063931e-06, "loss": 0.3227, "step": 16389 }, { "epoch": 0.5624571036376115, "grad_norm": 0.8208046404470587, "learning_rate": 4.235259987019182e-06, "loss": 0.2581, "step": 16390 }, { "epoch": 0.5624914207275223, "grad_norm": 0.7698030136915245, "learning_rate": 4.234710791421901e-06, "loss": 0.2642, "step": 16391 }, { "epoch": 0.5625257378174331, "grad_norm": 0.8548045286482931, "learning_rate": 4.234161605278879e-06, "loss": 0.2963, "step": 16392 }, { "epoch": 0.5625600549073438, "grad_norm": 0.8238639287925659, "learning_rate": 4.233612428596897e-06, "loss": 0.244, "step": 16393 }, { "epoch": 0.5625943719972546, "grad_norm": 0.7710936555464083, "learning_rate": 4.2330632613827376e-06, "loss": 0.2762, "step": 16394 }, { "epoch": 0.5626286890871655, "grad_norm": 0.7756872641481907, "learning_rate": 4.23251410364319e-06, "loss": 0.3242, "step": 16395 }, { "epoch": 0.5626630061770762, "grad_norm": 0.6988866731610628, "learning_rate": 4.231964955385036e-06, "loss": 0.294, "step": 16396 }, { "epoch": 0.562697323266987, "grad_norm": 0.7417888729709238, "learning_rate": 4.231415816615057e-06, "loss": 0.2648, "step": 16397 }, { "epoch": 0.5627316403568977, "grad_norm": 0.8506909485496367, "learning_rate": 4.230866687340043e-06, "loss": 0.3031, "step": 16398 }, { "epoch": 0.5627659574468085, "grad_norm": 0.7401176103757112, "learning_rate": 4.230317567566772e-06, "loss": 0.2724, "step": 16399 }, { "epoch": 0.5628002745367193, "grad_norm": 0.7940065226108912, "learning_rate": 4.229768457302031e-06, "loss": 0.2932, "step": 16400 }, { "epoch": 0.5628345916266301, "grad_norm": 0.7596834658894712, "learning_rate": 4.229219356552601e-06, "loss": 0.2437, "step": 16401 }, { "epoch": 0.5628689087165408, "grad_norm": 0.7269188584904948, "learning_rate": 4.22867026532527e-06, "loss": 0.274, "step": 16402 }, { "epoch": 0.5629032258064516, "grad_norm": 0.9149851663067426, "learning_rate": 4.228121183626815e-06, "loss": 0.2691, "step": 16403 }, { "epoch": 0.5629375428963624, "grad_norm": 0.8198854829817517, "learning_rate": 4.227572111464026e-06, "loss": 0.2633, "step": 16404 }, { "epoch": 0.5629718599862732, "grad_norm": 0.8181471097201741, "learning_rate": 4.227023048843682e-06, "loss": 0.2902, "step": 16405 }, { "epoch": 0.563006177076184, "grad_norm": 0.709414033387166, "learning_rate": 4.226473995772567e-06, "loss": 0.2857, "step": 16406 }, { "epoch": 0.5630404941660947, "grad_norm": 0.7856103101115598, "learning_rate": 4.225924952257463e-06, "loss": 0.2561, "step": 16407 }, { "epoch": 0.5630748112560054, "grad_norm": 0.7697446948932138, "learning_rate": 4.225375918305156e-06, "loss": 0.2503, "step": 16408 }, { "epoch": 0.5631091283459163, "grad_norm": 0.8325339728065901, "learning_rate": 4.224826893922424e-06, "loss": 0.2599, "step": 16409 }, { "epoch": 0.5631434454358271, "grad_norm": 0.8265227433440804, "learning_rate": 4.224277879116053e-06, "loss": 0.2841, "step": 16410 }, { "epoch": 0.5631777625257378, "grad_norm": 0.784192035895344, "learning_rate": 4.223728873892826e-06, "loss": 0.3212, "step": 16411 }, { "epoch": 0.5632120796156486, "grad_norm": 0.8088029031608059, "learning_rate": 4.2231798782595204e-06, "loss": 0.3321, "step": 16412 }, { "epoch": 0.5632463967055593, "grad_norm": 0.7449897526101608, "learning_rate": 4.2226308922229245e-06, "loss": 0.2545, "step": 16413 }, { "epoch": 0.5632807137954702, "grad_norm": 0.734266546353918, "learning_rate": 4.222081915789816e-06, "loss": 0.2358, "step": 16414 }, { "epoch": 0.5633150308853809, "grad_norm": 0.7669468272331739, "learning_rate": 4.221532948966978e-06, "loss": 0.2313, "step": 16415 }, { "epoch": 0.5633493479752917, "grad_norm": 0.8231938531365767, "learning_rate": 4.220983991761195e-06, "loss": 0.3107, "step": 16416 }, { "epoch": 0.5633836650652024, "grad_norm": 0.7609199569257603, "learning_rate": 4.220435044179246e-06, "loss": 0.2974, "step": 16417 }, { "epoch": 0.5634179821551133, "grad_norm": 0.8042627784312003, "learning_rate": 4.2198861062279106e-06, "loss": 0.2944, "step": 16418 }, { "epoch": 0.563452299245024, "grad_norm": 0.874170931132448, "learning_rate": 4.219337177913976e-06, "loss": 0.2686, "step": 16419 }, { "epoch": 0.5634866163349348, "grad_norm": 0.8551661138984928, "learning_rate": 4.218788259244218e-06, "loss": 0.2775, "step": 16420 }, { "epoch": 0.5635209334248455, "grad_norm": 0.7401809442865972, "learning_rate": 4.218239350225422e-06, "loss": 0.2505, "step": 16421 }, { "epoch": 0.5635552505147563, "grad_norm": 0.7591747174040924, "learning_rate": 4.217690450864367e-06, "loss": 0.2528, "step": 16422 }, { "epoch": 0.5635895676046672, "grad_norm": 0.7476889619595223, "learning_rate": 4.217141561167832e-06, "loss": 0.2767, "step": 16423 }, { "epoch": 0.5636238846945779, "grad_norm": 0.693596548639777, "learning_rate": 4.2165926811426035e-06, "loss": 0.2421, "step": 16424 }, { "epoch": 0.5636582017844887, "grad_norm": 0.8217966744426307, "learning_rate": 4.216043810795459e-06, "loss": 0.2835, "step": 16425 }, { "epoch": 0.5636925188743994, "grad_norm": 0.8908016880796041, "learning_rate": 4.2154949501331775e-06, "loss": 0.2486, "step": 16426 }, { "epoch": 0.5637268359643103, "grad_norm": 0.7621732711599747, "learning_rate": 4.214946099162543e-06, "loss": 0.2824, "step": 16427 }, { "epoch": 0.563761153054221, "grad_norm": 0.7949144157541725, "learning_rate": 4.214397257890334e-06, "loss": 0.2571, "step": 16428 }, { "epoch": 0.5637954701441318, "grad_norm": 0.7684699498104649, "learning_rate": 4.213848426323328e-06, "loss": 0.2785, "step": 16429 }, { "epoch": 0.5638297872340425, "grad_norm": 0.6642770769632, "learning_rate": 4.21329960446831e-06, "loss": 0.2828, "step": 16430 }, { "epoch": 0.5638641043239533, "grad_norm": 0.7967744918926563, "learning_rate": 4.21275079233206e-06, "loss": 0.2634, "step": 16431 }, { "epoch": 0.5638984214138641, "grad_norm": 0.9389564371884456, "learning_rate": 4.212201989921353e-06, "loss": 0.3145, "step": 16432 }, { "epoch": 0.5639327385037749, "grad_norm": 0.793976166364632, "learning_rate": 4.211653197242974e-06, "loss": 0.2909, "step": 16433 }, { "epoch": 0.5639670555936857, "grad_norm": 0.721974481421704, "learning_rate": 4.2111044143037015e-06, "loss": 0.3052, "step": 16434 }, { "epoch": 0.5640013726835964, "grad_norm": 0.7099835085071252, "learning_rate": 4.210555641110311e-06, "loss": 0.2454, "step": 16435 }, { "epoch": 0.5640356897735072, "grad_norm": 0.9069074420145223, "learning_rate": 4.210006877669588e-06, "loss": 0.2977, "step": 16436 }, { "epoch": 0.564070006863418, "grad_norm": 0.7840146899294081, "learning_rate": 4.209458123988308e-06, "loss": 0.2589, "step": 16437 }, { "epoch": 0.5641043239533288, "grad_norm": 0.7440878358403509, "learning_rate": 4.208909380073249e-06, "loss": 0.251, "step": 16438 }, { "epoch": 0.5641386410432395, "grad_norm": 0.6939281088452407, "learning_rate": 4.2083606459311955e-06, "loss": 0.2325, "step": 16439 }, { "epoch": 0.5641729581331503, "grad_norm": 0.8039062672722044, "learning_rate": 4.2078119215689225e-06, "loss": 0.3022, "step": 16440 }, { "epoch": 0.5642072752230611, "grad_norm": 0.794546369368941, "learning_rate": 4.207263206993208e-06, "loss": 0.2778, "step": 16441 }, { "epoch": 0.5642415923129719, "grad_norm": 0.700627800264969, "learning_rate": 4.206714502210834e-06, "loss": 0.3091, "step": 16442 }, { "epoch": 0.5642759094028826, "grad_norm": 0.7218880736575792, "learning_rate": 4.2061658072285776e-06, "loss": 0.2303, "step": 16443 }, { "epoch": 0.5643102264927934, "grad_norm": 0.8122042320306034, "learning_rate": 4.205617122053217e-06, "loss": 0.256, "step": 16444 }, { "epoch": 0.5643445435827041, "grad_norm": 0.8423498497107776, "learning_rate": 4.2050684466915294e-06, "loss": 0.2596, "step": 16445 }, { "epoch": 0.564378860672615, "grad_norm": 0.7991352687179962, "learning_rate": 4.2045197811502965e-06, "loss": 0.3055, "step": 16446 }, { "epoch": 0.5644131777625258, "grad_norm": 0.7481325454226071, "learning_rate": 4.2039711254362915e-06, "loss": 0.2772, "step": 16447 }, { "epoch": 0.5644474948524365, "grad_norm": 0.7023075483103889, "learning_rate": 4.2034224795562975e-06, "loss": 0.2375, "step": 16448 }, { "epoch": 0.5644818119423473, "grad_norm": 0.8254431196852166, "learning_rate": 4.20287384351709e-06, "loss": 0.2978, "step": 16449 }, { "epoch": 0.5645161290322581, "grad_norm": 0.7115215327106041, "learning_rate": 4.202325217325444e-06, "loss": 0.2877, "step": 16450 }, { "epoch": 0.5645504461221689, "grad_norm": 0.7753710730002206, "learning_rate": 4.201776600988142e-06, "loss": 0.2995, "step": 16451 }, { "epoch": 0.5645847632120796, "grad_norm": 0.7517305458557259, "learning_rate": 4.20122799451196e-06, "loss": 0.3038, "step": 16452 }, { "epoch": 0.5646190803019904, "grad_norm": 0.8204932133571688, "learning_rate": 4.200679397903674e-06, "loss": 0.2729, "step": 16453 }, { "epoch": 0.5646533973919011, "grad_norm": 0.8501548315466012, "learning_rate": 4.200130811170063e-06, "loss": 0.3267, "step": 16454 }, { "epoch": 0.564687714481812, "grad_norm": 0.7192547846335132, "learning_rate": 4.199582234317901e-06, "loss": 0.2525, "step": 16455 }, { "epoch": 0.5647220315717227, "grad_norm": 0.7151300426048846, "learning_rate": 4.19903366735397e-06, "loss": 0.2824, "step": 16456 }, { "epoch": 0.5647563486616335, "grad_norm": 0.6822493684033574, "learning_rate": 4.198485110285044e-06, "loss": 0.2475, "step": 16457 }, { "epoch": 0.5647906657515442, "grad_norm": 0.7370710005509467, "learning_rate": 4.1979365631178975e-06, "loss": 0.2787, "step": 16458 }, { "epoch": 0.564824982841455, "grad_norm": 0.753953627613636, "learning_rate": 4.1973880258593114e-06, "loss": 0.3102, "step": 16459 }, { "epoch": 0.5648592999313659, "grad_norm": 0.748771678469579, "learning_rate": 4.196839498516061e-06, "loss": 0.2541, "step": 16460 }, { "epoch": 0.5648936170212766, "grad_norm": 0.705516357648183, "learning_rate": 4.19629098109492e-06, "loss": 0.2493, "step": 16461 }, { "epoch": 0.5649279341111874, "grad_norm": 0.7483993270104395, "learning_rate": 4.19574247360267e-06, "loss": 0.2493, "step": 16462 }, { "epoch": 0.5649622512010981, "grad_norm": 0.8287504790846257, "learning_rate": 4.195193976046083e-06, "loss": 0.304, "step": 16463 }, { "epoch": 0.564996568291009, "grad_norm": 0.8290130979584945, "learning_rate": 4.194645488431935e-06, "loss": 0.2734, "step": 16464 }, { "epoch": 0.5650308853809197, "grad_norm": 0.9144067945074733, "learning_rate": 4.194097010767004e-06, "loss": 0.2704, "step": 16465 }, { "epoch": 0.5650652024708305, "grad_norm": 0.7641183644737732, "learning_rate": 4.193548543058064e-06, "loss": 0.2903, "step": 16466 }, { "epoch": 0.5650995195607412, "grad_norm": 0.7870109246309958, "learning_rate": 4.193000085311892e-06, "loss": 0.2463, "step": 16467 }, { "epoch": 0.565133836650652, "grad_norm": 0.7391913586183746, "learning_rate": 4.192451637535263e-06, "loss": 0.2792, "step": 16468 }, { "epoch": 0.5651681537405628, "grad_norm": 0.7834160021519687, "learning_rate": 4.191903199734953e-06, "loss": 0.2401, "step": 16469 }, { "epoch": 0.5652024708304736, "grad_norm": 0.9199703200796254, "learning_rate": 4.191354771917735e-06, "loss": 0.256, "step": 16470 }, { "epoch": 0.5652367879203843, "grad_norm": 0.675677861844373, "learning_rate": 4.190806354090388e-06, "loss": 0.2469, "step": 16471 }, { "epoch": 0.5652711050102951, "grad_norm": 0.756910690150962, "learning_rate": 4.190257946259686e-06, "loss": 0.2651, "step": 16472 }, { "epoch": 0.565305422100206, "grad_norm": 0.8259724299061018, "learning_rate": 4.1897095484324e-06, "loss": 0.3235, "step": 16473 }, { "epoch": 0.5653397391901167, "grad_norm": 0.7794678340802214, "learning_rate": 4.1891611606153095e-06, "loss": 0.2791, "step": 16474 }, { "epoch": 0.5653740562800275, "grad_norm": 0.895543671711016, "learning_rate": 4.188612782815188e-06, "loss": 0.264, "step": 16475 }, { "epoch": 0.5654083733699382, "grad_norm": 0.7592743966673368, "learning_rate": 4.188064415038808e-06, "loss": 0.2734, "step": 16476 }, { "epoch": 0.565442690459849, "grad_norm": 0.7579678265062324, "learning_rate": 4.187516057292947e-06, "loss": 0.2895, "step": 16477 }, { "epoch": 0.5654770075497598, "grad_norm": 0.7936243979039924, "learning_rate": 4.186967709584378e-06, "loss": 0.2718, "step": 16478 }, { "epoch": 0.5655113246396706, "grad_norm": 0.751611932722626, "learning_rate": 4.186419371919873e-06, "loss": 0.2696, "step": 16479 }, { "epoch": 0.5655456417295813, "grad_norm": 0.7904599753471396, "learning_rate": 4.1858710443062105e-06, "loss": 0.2911, "step": 16480 }, { "epoch": 0.5655799588194921, "grad_norm": 0.7471729525099842, "learning_rate": 4.18532272675016e-06, "loss": 0.2296, "step": 16481 }, { "epoch": 0.5656142759094028, "grad_norm": 0.6849046482792607, "learning_rate": 4.184774419258497e-06, "loss": 0.2668, "step": 16482 }, { "epoch": 0.5656485929993137, "grad_norm": 0.8840570967025322, "learning_rate": 4.184226121837998e-06, "loss": 0.2757, "step": 16483 }, { "epoch": 0.5656829100892244, "grad_norm": 0.6839426083404112, "learning_rate": 4.183677834495432e-06, "loss": 0.2347, "step": 16484 }, { "epoch": 0.5657172271791352, "grad_norm": 0.7523901889704437, "learning_rate": 4.183129557237578e-06, "loss": 0.3009, "step": 16485 }, { "epoch": 0.565751544269046, "grad_norm": 0.7293763173392429, "learning_rate": 4.182581290071203e-06, "loss": 0.2663, "step": 16486 }, { "epoch": 0.5657858613589568, "grad_norm": 0.8517777731006556, "learning_rate": 4.182033033003083e-06, "loss": 0.276, "step": 16487 }, { "epoch": 0.5658201784488676, "grad_norm": 0.8374951313925991, "learning_rate": 4.181484786039993e-06, "loss": 0.3035, "step": 16488 }, { "epoch": 0.5658544955387783, "grad_norm": 0.7774098235777579, "learning_rate": 4.180936549188703e-06, "loss": 0.2697, "step": 16489 }, { "epoch": 0.5658888126286891, "grad_norm": 0.7798044373210722, "learning_rate": 4.180388322455986e-06, "loss": 0.2651, "step": 16490 }, { "epoch": 0.5659231297185998, "grad_norm": 0.6576969177467192, "learning_rate": 4.179840105848618e-06, "loss": 0.2545, "step": 16491 }, { "epoch": 0.5659574468085107, "grad_norm": 0.8849749389779183, "learning_rate": 4.179291899373369e-06, "loss": 0.2489, "step": 16492 }, { "epoch": 0.5659917638984214, "grad_norm": 0.7657396383423203, "learning_rate": 4.1787437030370096e-06, "loss": 0.2834, "step": 16493 }, { "epoch": 0.5660260809883322, "grad_norm": 0.7045782963341523, "learning_rate": 4.178195516846317e-06, "loss": 0.2783, "step": 16494 }, { "epoch": 0.5660603980782429, "grad_norm": 0.8291544463043429, "learning_rate": 4.177647340808059e-06, "loss": 0.2922, "step": 16495 }, { "epoch": 0.5660947151681538, "grad_norm": 0.7341585626580913, "learning_rate": 4.177099174929009e-06, "loss": 0.2722, "step": 16496 }, { "epoch": 0.5661290322580645, "grad_norm": 0.9011993579529272, "learning_rate": 4.1765510192159395e-06, "loss": 0.3144, "step": 16497 }, { "epoch": 0.5661633493479753, "grad_norm": 0.9301712931614169, "learning_rate": 4.176002873675624e-06, "loss": 0.2877, "step": 16498 }, { "epoch": 0.566197666437886, "grad_norm": 0.7503699224325757, "learning_rate": 4.1754547383148295e-06, "loss": 0.3137, "step": 16499 }, { "epoch": 0.5662319835277968, "grad_norm": 0.7754205846120718, "learning_rate": 4.174906613140332e-06, "loss": 0.3106, "step": 16500 }, { "epoch": 0.5662663006177077, "grad_norm": 0.7400587251591421, "learning_rate": 4.174358498158903e-06, "loss": 0.2776, "step": 16501 }, { "epoch": 0.5663006177076184, "grad_norm": 0.682431965015732, "learning_rate": 4.173810393377308e-06, "loss": 0.2576, "step": 16502 }, { "epoch": 0.5663349347975292, "grad_norm": 0.7789434009480505, "learning_rate": 4.1732622988023254e-06, "loss": 0.2742, "step": 16503 }, { "epoch": 0.5663692518874399, "grad_norm": 0.821207554649293, "learning_rate": 4.172714214440722e-06, "loss": 0.314, "step": 16504 }, { "epoch": 0.5664035689773507, "grad_norm": 0.7652295080454148, "learning_rate": 4.1721661402992695e-06, "loss": 0.2755, "step": 16505 }, { "epoch": 0.5664378860672615, "grad_norm": 0.7726868275350568, "learning_rate": 4.17161807638474e-06, "loss": 0.2906, "step": 16506 }, { "epoch": 0.5664722031571723, "grad_norm": 0.7461016902343316, "learning_rate": 4.171070022703904e-06, "loss": 0.2618, "step": 16507 }, { "epoch": 0.566506520247083, "grad_norm": 0.7309038693118484, "learning_rate": 4.17052197926353e-06, "loss": 0.2572, "step": 16508 }, { "epoch": 0.5665408373369938, "grad_norm": 0.6752003938742437, "learning_rate": 4.169973946070391e-06, "loss": 0.2699, "step": 16509 }, { "epoch": 0.5665751544269046, "grad_norm": 0.7412522173303081, "learning_rate": 4.169425923131256e-06, "loss": 0.2643, "step": 16510 }, { "epoch": 0.5666094715168154, "grad_norm": 0.7601345513332513, "learning_rate": 4.1688779104528955e-06, "loss": 0.3131, "step": 16511 }, { "epoch": 0.5666437886067262, "grad_norm": 0.7053183291255044, "learning_rate": 4.168329908042079e-06, "loss": 0.2319, "step": 16512 }, { "epoch": 0.5666781056966369, "grad_norm": 0.9089005711029515, "learning_rate": 4.167781915905578e-06, "loss": 0.3008, "step": 16513 }, { "epoch": 0.5667124227865477, "grad_norm": 0.6957583733868354, "learning_rate": 4.167233934050158e-06, "loss": 0.2887, "step": 16514 }, { "epoch": 0.5667467398764585, "grad_norm": 0.7915216740223636, "learning_rate": 4.166685962482595e-06, "loss": 0.2871, "step": 16515 }, { "epoch": 0.5667810569663693, "grad_norm": 0.6975648225097664, "learning_rate": 4.166138001209653e-06, "loss": 0.2141, "step": 16516 }, { "epoch": 0.56681537405628, "grad_norm": 0.7607665589566893, "learning_rate": 4.1655900502381066e-06, "loss": 0.2708, "step": 16517 }, { "epoch": 0.5668496911461908, "grad_norm": 0.8208836327383361, "learning_rate": 4.1650421095747204e-06, "loss": 0.2514, "step": 16518 }, { "epoch": 0.5668840082361016, "grad_norm": 0.8112942638878721, "learning_rate": 4.164494179226265e-06, "loss": 0.2884, "step": 16519 }, { "epoch": 0.5669183253260124, "grad_norm": 0.7874190360680688, "learning_rate": 4.163946259199511e-06, "loss": 0.297, "step": 16520 }, { "epoch": 0.5669526424159231, "grad_norm": 0.6897441992552783, "learning_rate": 4.163398349501227e-06, "loss": 0.2648, "step": 16521 }, { "epoch": 0.5669869595058339, "grad_norm": 0.7344193068454257, "learning_rate": 4.162850450138179e-06, "loss": 0.2312, "step": 16522 }, { "epoch": 0.5670212765957446, "grad_norm": 0.7349008230152028, "learning_rate": 4.162302561117139e-06, "loss": 0.253, "step": 16523 }, { "epoch": 0.5670555936856555, "grad_norm": 0.7504884510411148, "learning_rate": 4.1617546824448755e-06, "loss": 0.2892, "step": 16524 }, { "epoch": 0.5670899107755663, "grad_norm": 1.2486118853293071, "learning_rate": 4.161206814128152e-06, "loss": 0.2679, "step": 16525 }, { "epoch": 0.567124227865477, "grad_norm": 0.7966891067363295, "learning_rate": 4.160658956173743e-06, "loss": 0.2767, "step": 16526 }, { "epoch": 0.5671585449553878, "grad_norm": 0.6968610658293314, "learning_rate": 4.160111108588414e-06, "loss": 0.291, "step": 16527 }, { "epoch": 0.5671928620452985, "grad_norm": 0.7200647861544114, "learning_rate": 4.159563271378931e-06, "loss": 0.2741, "step": 16528 }, { "epoch": 0.5672271791352094, "grad_norm": 0.7015755536126232, "learning_rate": 4.159015444552066e-06, "loss": 0.26, "step": 16529 }, { "epoch": 0.5672614962251201, "grad_norm": 0.7721558656512931, "learning_rate": 4.158467628114585e-06, "loss": 0.2845, "step": 16530 }, { "epoch": 0.5672958133150309, "grad_norm": 0.8535214320298684, "learning_rate": 4.157919822073253e-06, "loss": 0.3484, "step": 16531 }, { "epoch": 0.5673301304049416, "grad_norm": 0.7899468007347703, "learning_rate": 4.157372026434842e-06, "loss": 0.3143, "step": 16532 }, { "epoch": 0.5673644474948525, "grad_norm": 0.7391708972053902, "learning_rate": 4.156824241206117e-06, "loss": 0.2823, "step": 16533 }, { "epoch": 0.5673987645847632, "grad_norm": 0.806939149085576, "learning_rate": 4.1562764663938435e-06, "loss": 0.2908, "step": 16534 }, { "epoch": 0.567433081674674, "grad_norm": 0.8114182814086607, "learning_rate": 4.155728702004792e-06, "loss": 0.2949, "step": 16535 }, { "epoch": 0.5674673987645847, "grad_norm": 0.7665212935851772, "learning_rate": 4.155180948045729e-06, "loss": 0.2431, "step": 16536 }, { "epoch": 0.5675017158544955, "grad_norm": 0.7694897190359938, "learning_rate": 4.154633204523419e-06, "loss": 0.2926, "step": 16537 }, { "epoch": 0.5675360329444064, "grad_norm": 0.7594929713976478, "learning_rate": 4.154085471444631e-06, "loss": 0.317, "step": 16538 }, { "epoch": 0.5675703500343171, "grad_norm": 0.779537960359706, "learning_rate": 4.15353774881613e-06, "loss": 0.2722, "step": 16539 }, { "epoch": 0.5676046671242279, "grad_norm": 0.7390088879030101, "learning_rate": 4.152990036644683e-06, "loss": 0.2855, "step": 16540 }, { "epoch": 0.5676389842141386, "grad_norm": 0.7294200221974666, "learning_rate": 4.152442334937057e-06, "loss": 0.2649, "step": 16541 }, { "epoch": 0.5676733013040495, "grad_norm": 0.7545651561158393, "learning_rate": 4.151894643700019e-06, "loss": 0.2711, "step": 16542 }, { "epoch": 0.5677076183939602, "grad_norm": 0.8173443742262831, "learning_rate": 4.151346962940331e-06, "loss": 0.3214, "step": 16543 }, { "epoch": 0.567741935483871, "grad_norm": 0.7290982482765616, "learning_rate": 4.150799292664765e-06, "loss": 0.2753, "step": 16544 }, { "epoch": 0.5677762525737817, "grad_norm": 0.7565962111994656, "learning_rate": 4.150251632880083e-06, "loss": 0.3402, "step": 16545 }, { "epoch": 0.5678105696636925, "grad_norm": 0.7771127218886333, "learning_rate": 4.14970398359305e-06, "loss": 0.2701, "step": 16546 }, { "epoch": 0.5678448867536033, "grad_norm": 0.7914731716153814, "learning_rate": 4.149156344810434e-06, "loss": 0.3214, "step": 16547 }, { "epoch": 0.5678792038435141, "grad_norm": 0.8076715097087619, "learning_rate": 4.148608716538998e-06, "loss": 0.2517, "step": 16548 }, { "epoch": 0.5679135209334248, "grad_norm": 0.7577763949695403, "learning_rate": 4.14806109878551e-06, "loss": 0.3056, "step": 16549 }, { "epoch": 0.5679478380233356, "grad_norm": 0.7275088065871627, "learning_rate": 4.147513491556734e-06, "loss": 0.2276, "step": 16550 }, { "epoch": 0.5679821551132463, "grad_norm": 0.7958823352936394, "learning_rate": 4.146965894859433e-06, "loss": 0.2707, "step": 16551 }, { "epoch": 0.5680164722031572, "grad_norm": 0.8319980118733111, "learning_rate": 4.1464183087003765e-06, "loss": 0.2721, "step": 16552 }, { "epoch": 0.568050789293068, "grad_norm": 0.7411501665135461, "learning_rate": 4.145870733086325e-06, "loss": 0.2892, "step": 16553 }, { "epoch": 0.5680851063829787, "grad_norm": 0.730225758518976, "learning_rate": 4.145323168024045e-06, "loss": 0.2732, "step": 16554 }, { "epoch": 0.5681194234728895, "grad_norm": 0.6832323375886373, "learning_rate": 4.144775613520302e-06, "loss": 0.3122, "step": 16555 }, { "epoch": 0.5681537405628003, "grad_norm": 0.7528358199069657, "learning_rate": 4.1442280695818576e-06, "loss": 0.2593, "step": 16556 }, { "epoch": 0.5681880576527111, "grad_norm": 0.7693910806372186, "learning_rate": 4.143680536215477e-06, "loss": 0.2319, "step": 16557 }, { "epoch": 0.5682223747426218, "grad_norm": 0.712966793088643, "learning_rate": 4.143133013427927e-06, "loss": 0.2826, "step": 16558 }, { "epoch": 0.5682566918325326, "grad_norm": 0.9137687277087773, "learning_rate": 4.1425855012259695e-06, "loss": 0.2899, "step": 16559 }, { "epoch": 0.5682910089224433, "grad_norm": 0.7801910777200787, "learning_rate": 4.142037999616367e-06, "loss": 0.3034, "step": 16560 }, { "epoch": 0.5683253260123542, "grad_norm": 0.8338825749711349, "learning_rate": 4.141490508605887e-06, "loss": 0.3254, "step": 16561 }, { "epoch": 0.5683596431022649, "grad_norm": 0.7164199389750517, "learning_rate": 4.140943028201288e-06, "loss": 0.2693, "step": 16562 }, { "epoch": 0.5683939601921757, "grad_norm": 0.8106371504481881, "learning_rate": 4.140395558409338e-06, "loss": 0.2638, "step": 16563 }, { "epoch": 0.5684282772820864, "grad_norm": 0.690215875821657, "learning_rate": 4.139848099236798e-06, "loss": 0.2315, "step": 16564 }, { "epoch": 0.5684625943719973, "grad_norm": 0.6926389042044072, "learning_rate": 4.1393006506904334e-06, "loss": 0.2258, "step": 16565 }, { "epoch": 0.5684969114619081, "grad_norm": 0.8258452253852215, "learning_rate": 4.138753212777004e-06, "loss": 0.3559, "step": 16566 }, { "epoch": 0.5685312285518188, "grad_norm": 0.8667102950173667, "learning_rate": 4.138205785503275e-06, "loss": 0.2671, "step": 16567 }, { "epoch": 0.5685655456417296, "grad_norm": 0.7957368362386852, "learning_rate": 4.137658368876011e-06, "loss": 0.3102, "step": 16568 }, { "epoch": 0.5685998627316403, "grad_norm": 0.8103071225105424, "learning_rate": 4.137110962901969e-06, "loss": 0.2784, "step": 16569 }, { "epoch": 0.5686341798215512, "grad_norm": 0.9029762466563734, "learning_rate": 4.136563567587915e-06, "loss": 0.282, "step": 16570 }, { "epoch": 0.5686684969114619, "grad_norm": 0.7604616461420143, "learning_rate": 4.136016182940615e-06, "loss": 0.2615, "step": 16571 }, { "epoch": 0.5687028140013727, "grad_norm": 0.6818681687802601, "learning_rate": 4.1354688089668246e-06, "loss": 0.2621, "step": 16572 }, { "epoch": 0.5687371310912834, "grad_norm": 0.778367565536325, "learning_rate": 4.1349214456733105e-06, "loss": 0.3356, "step": 16573 }, { "epoch": 0.5687714481811942, "grad_norm": 0.8343495785769629, "learning_rate": 4.134374093066834e-06, "loss": 0.2228, "step": 16574 }, { "epoch": 0.568805765271105, "grad_norm": 0.7482625653891178, "learning_rate": 4.133826751154154e-06, "loss": 0.2937, "step": 16575 }, { "epoch": 0.5688400823610158, "grad_norm": 0.7383814748168843, "learning_rate": 4.133279419942037e-06, "loss": 0.2686, "step": 16576 }, { "epoch": 0.5688743994509265, "grad_norm": 0.8242422657368992, "learning_rate": 4.132732099437241e-06, "loss": 0.298, "step": 16577 }, { "epoch": 0.5689087165408373, "grad_norm": 0.8787402242290465, "learning_rate": 4.132184789646529e-06, "loss": 0.2747, "step": 16578 }, { "epoch": 0.5689430336307482, "grad_norm": 0.7657299882111622, "learning_rate": 4.131637490576662e-06, "loss": 0.2436, "step": 16579 }, { "epoch": 0.5689773507206589, "grad_norm": 0.8080208048158927, "learning_rate": 4.1310902022344e-06, "loss": 0.3407, "step": 16580 }, { "epoch": 0.5690116678105697, "grad_norm": 0.6885292191423691, "learning_rate": 4.130542924626509e-06, "loss": 0.2497, "step": 16581 }, { "epoch": 0.5690459849004804, "grad_norm": 0.772635785712239, "learning_rate": 4.129995657759745e-06, "loss": 0.2835, "step": 16582 }, { "epoch": 0.5690803019903912, "grad_norm": 0.7072472786496834, "learning_rate": 4.129448401640869e-06, "loss": 0.2514, "step": 16583 }, { "epoch": 0.569114619080302, "grad_norm": 0.8655766142223157, "learning_rate": 4.128901156276644e-06, "loss": 0.2777, "step": 16584 }, { "epoch": 0.5691489361702128, "grad_norm": 0.7431783761947203, "learning_rate": 4.12835392167383e-06, "loss": 0.2548, "step": 16585 }, { "epoch": 0.5691832532601235, "grad_norm": 0.7692864154511946, "learning_rate": 4.127806697839187e-06, "loss": 0.3156, "step": 16586 }, { "epoch": 0.5692175703500343, "grad_norm": 0.8184545521420267, "learning_rate": 4.127259484779474e-06, "loss": 0.248, "step": 16587 }, { "epoch": 0.5692518874399451, "grad_norm": 0.8302646687088495, "learning_rate": 4.126712282501455e-06, "loss": 0.2812, "step": 16588 }, { "epoch": 0.5692862045298559, "grad_norm": 0.7518780965963805, "learning_rate": 4.126165091011885e-06, "loss": 0.2887, "step": 16589 }, { "epoch": 0.5693205216197667, "grad_norm": 0.7347075987788276, "learning_rate": 4.125617910317528e-06, "loss": 0.2109, "step": 16590 }, { "epoch": 0.5693548387096774, "grad_norm": 0.8246026571687057, "learning_rate": 4.125070740425143e-06, "loss": 0.2664, "step": 16591 }, { "epoch": 0.5693891557995882, "grad_norm": 0.7467521023972744, "learning_rate": 4.124523581341486e-06, "loss": 0.223, "step": 16592 }, { "epoch": 0.569423472889499, "grad_norm": 1.1700098058661996, "learning_rate": 4.123976433073321e-06, "loss": 0.2584, "step": 16593 }, { "epoch": 0.5694577899794098, "grad_norm": 0.735678512091645, "learning_rate": 4.123429295627408e-06, "loss": 0.3057, "step": 16594 }, { "epoch": 0.5694921070693205, "grad_norm": 1.0380951898196409, "learning_rate": 4.122882169010501e-06, "loss": 0.2616, "step": 16595 }, { "epoch": 0.5695264241592313, "grad_norm": 0.7885904291278053, "learning_rate": 4.122335053229364e-06, "loss": 0.27, "step": 16596 }, { "epoch": 0.569560741249142, "grad_norm": 1.0518022198099857, "learning_rate": 4.121787948290754e-06, "loss": 0.2465, "step": 16597 }, { "epoch": 0.5695950583390529, "grad_norm": 0.7449202811593985, "learning_rate": 4.1212408542014285e-06, "loss": 0.2566, "step": 16598 }, { "epoch": 0.5696293754289636, "grad_norm": 0.7919988455450814, "learning_rate": 4.12069377096815e-06, "loss": 0.3236, "step": 16599 }, { "epoch": 0.5696636925188744, "grad_norm": 0.6968675118825378, "learning_rate": 4.120146698597673e-06, "loss": 0.2808, "step": 16600 }, { "epoch": 0.5696980096087851, "grad_norm": 0.7878812057139833, "learning_rate": 4.119599637096756e-06, "loss": 0.2333, "step": 16601 }, { "epoch": 0.569732326698696, "grad_norm": 0.858427206783361, "learning_rate": 4.119052586472163e-06, "loss": 0.2054, "step": 16602 }, { "epoch": 0.5697666437886068, "grad_norm": 0.7793235550002475, "learning_rate": 4.118505546730647e-06, "loss": 0.2774, "step": 16603 }, { "epoch": 0.5698009608785175, "grad_norm": 0.7423678191710027, "learning_rate": 4.117958517878966e-06, "loss": 0.2826, "step": 16604 }, { "epoch": 0.5698352779684283, "grad_norm": 0.7370190987463926, "learning_rate": 4.117411499923881e-06, "loss": 0.2572, "step": 16605 }, { "epoch": 0.569869595058339, "grad_norm": 0.836165833897277, "learning_rate": 4.1168644928721465e-06, "loss": 0.2709, "step": 16606 }, { "epoch": 0.5699039121482499, "grad_norm": 0.839574528932765, "learning_rate": 4.116317496730522e-06, "loss": 0.2571, "step": 16607 }, { "epoch": 0.5699382292381606, "grad_norm": 0.7516930356844808, "learning_rate": 4.115770511505763e-06, "loss": 0.2644, "step": 16608 }, { "epoch": 0.5699725463280714, "grad_norm": 0.810730260201798, "learning_rate": 4.115223537204629e-06, "loss": 0.3063, "step": 16609 }, { "epoch": 0.5700068634179821, "grad_norm": 0.8977135667892288, "learning_rate": 4.114676573833879e-06, "loss": 0.2873, "step": 16610 }, { "epoch": 0.570041180507893, "grad_norm": 0.8284807547376903, "learning_rate": 4.114129621400267e-06, "loss": 0.3243, "step": 16611 }, { "epoch": 0.5700754975978037, "grad_norm": 0.8195373880494867, "learning_rate": 4.113582679910548e-06, "loss": 0.2922, "step": 16612 }, { "epoch": 0.5701098146877145, "grad_norm": 0.7952276852846377, "learning_rate": 4.113035749371484e-06, "loss": 0.2499, "step": 16613 }, { "epoch": 0.5701441317776252, "grad_norm": 0.7549479622334339, "learning_rate": 4.1124888297898295e-06, "loss": 0.2717, "step": 16614 }, { "epoch": 0.570178448867536, "grad_norm": 0.9452024929070482, "learning_rate": 4.11194192117234e-06, "loss": 0.3304, "step": 16615 }, { "epoch": 0.5702127659574469, "grad_norm": 0.7255037908330044, "learning_rate": 4.111395023525772e-06, "loss": 0.2857, "step": 16616 }, { "epoch": 0.5702470830473576, "grad_norm": 0.7351694280483745, "learning_rate": 4.110848136856885e-06, "loss": 0.2798, "step": 16617 }, { "epoch": 0.5702814001372684, "grad_norm": 0.7400749375788507, "learning_rate": 4.1103012611724285e-06, "loss": 0.3205, "step": 16618 }, { "epoch": 0.5703157172271791, "grad_norm": 0.7566094715427674, "learning_rate": 4.109754396479166e-06, "loss": 0.2786, "step": 16619 }, { "epoch": 0.5703500343170899, "grad_norm": 0.7944980219178032, "learning_rate": 4.109207542783849e-06, "loss": 0.2968, "step": 16620 }, { "epoch": 0.5703843514070007, "grad_norm": 0.778013322102884, "learning_rate": 4.108660700093233e-06, "loss": 0.3014, "step": 16621 }, { "epoch": 0.5704186684969115, "grad_norm": 0.727161724924623, "learning_rate": 4.1081138684140776e-06, "loss": 0.2392, "step": 16622 }, { "epoch": 0.5704529855868222, "grad_norm": 0.7981323048470637, "learning_rate": 4.107567047753134e-06, "loss": 0.2736, "step": 16623 }, { "epoch": 0.570487302676733, "grad_norm": 0.7254359081227737, "learning_rate": 4.107020238117157e-06, "loss": 0.2815, "step": 16624 }, { "epoch": 0.5705216197666438, "grad_norm": 0.7611629176225267, "learning_rate": 4.106473439512907e-06, "loss": 0.2824, "step": 16625 }, { "epoch": 0.5705559368565546, "grad_norm": 0.7400885124913466, "learning_rate": 4.1059266519471355e-06, "loss": 0.2678, "step": 16626 }, { "epoch": 0.5705902539464653, "grad_norm": 0.843075000049482, "learning_rate": 4.105379875426596e-06, "loss": 0.2967, "step": 16627 }, { "epoch": 0.5706245710363761, "grad_norm": 0.7264071106310006, "learning_rate": 4.1048331099580466e-06, "loss": 0.2748, "step": 16628 }, { "epoch": 0.5706588881262868, "grad_norm": 0.817780017341264, "learning_rate": 4.10428635554824e-06, "loss": 0.3129, "step": 16629 }, { "epoch": 0.5706932052161977, "grad_norm": 0.709548526602045, "learning_rate": 4.103739612203931e-06, "loss": 0.2424, "step": 16630 }, { "epoch": 0.5707275223061085, "grad_norm": 0.7642449447479766, "learning_rate": 4.103192879931874e-06, "loss": 0.272, "step": 16631 }, { "epoch": 0.5707618393960192, "grad_norm": 0.7691445230230489, "learning_rate": 4.102646158738825e-06, "loss": 0.2846, "step": 16632 }, { "epoch": 0.57079615648593, "grad_norm": 0.7382471271861757, "learning_rate": 4.102099448631533e-06, "loss": 0.2466, "step": 16633 }, { "epoch": 0.5708304735758407, "grad_norm": 0.8024637883536591, "learning_rate": 4.101552749616759e-06, "loss": 0.2758, "step": 16634 }, { "epoch": 0.5708647906657516, "grad_norm": 0.7861369603465211, "learning_rate": 4.1010060617012536e-06, "loss": 0.2918, "step": 16635 }, { "epoch": 0.5708991077556623, "grad_norm": 0.8543429144612562, "learning_rate": 4.1004593848917665e-06, "loss": 0.2707, "step": 16636 }, { "epoch": 0.5709334248455731, "grad_norm": 0.7048330704851743, "learning_rate": 4.099912719195057e-06, "loss": 0.2663, "step": 16637 }, { "epoch": 0.5709677419354838, "grad_norm": 0.8037546851342223, "learning_rate": 4.099366064617877e-06, "loss": 0.2489, "step": 16638 }, { "epoch": 0.5710020590253947, "grad_norm": 0.7858201534100432, "learning_rate": 4.098819421166978e-06, "loss": 0.2774, "step": 16639 }, { "epoch": 0.5710363761153054, "grad_norm": 0.8431415371050264, "learning_rate": 4.098272788849116e-06, "loss": 0.3021, "step": 16640 }, { "epoch": 0.5710706932052162, "grad_norm": 0.7638872117916488, "learning_rate": 4.09772616767104e-06, "loss": 0.2908, "step": 16641 }, { "epoch": 0.571105010295127, "grad_norm": 0.7180917495309512, "learning_rate": 4.097179557639507e-06, "loss": 0.2604, "step": 16642 }, { "epoch": 0.5711393273850377, "grad_norm": 0.6943115406058576, "learning_rate": 4.096632958761269e-06, "loss": 0.272, "step": 16643 }, { "epoch": 0.5711736444749486, "grad_norm": 0.7764695384775829, "learning_rate": 4.0960863710430744e-06, "loss": 0.2966, "step": 16644 }, { "epoch": 0.5712079615648593, "grad_norm": 0.7888000379381013, "learning_rate": 4.095539794491681e-06, "loss": 0.2742, "step": 16645 }, { "epoch": 0.5712422786547701, "grad_norm": 0.7840190076707703, "learning_rate": 4.094993229113839e-06, "loss": 0.3256, "step": 16646 }, { "epoch": 0.5712765957446808, "grad_norm": 0.6966545820203153, "learning_rate": 4.094446674916299e-06, "loss": 0.2508, "step": 16647 }, { "epoch": 0.5713109128345917, "grad_norm": 0.8895789657703044, "learning_rate": 4.093900131905817e-06, "loss": 0.309, "step": 16648 }, { "epoch": 0.5713452299245024, "grad_norm": 0.8859652551607493, "learning_rate": 4.093353600089142e-06, "loss": 0.2901, "step": 16649 }, { "epoch": 0.5713795470144132, "grad_norm": 0.7285982770011441, "learning_rate": 4.092807079473024e-06, "loss": 0.3136, "step": 16650 }, { "epoch": 0.5714138641043239, "grad_norm": 0.7324596535215393, "learning_rate": 4.092260570064219e-06, "loss": 0.3196, "step": 16651 }, { "epoch": 0.5714481811942347, "grad_norm": 0.7753134321662674, "learning_rate": 4.091714071869475e-06, "loss": 0.2715, "step": 16652 }, { "epoch": 0.5714824982841455, "grad_norm": 0.9300005322867696, "learning_rate": 4.091167584895546e-06, "loss": 0.2784, "step": 16653 }, { "epoch": 0.5715168153740563, "grad_norm": 0.7246137135203328, "learning_rate": 4.090621109149181e-06, "loss": 0.3004, "step": 16654 }, { "epoch": 0.571551132463967, "grad_norm": 0.7001302018735283, "learning_rate": 4.090074644637132e-06, "loss": 0.2877, "step": 16655 }, { "epoch": 0.5715854495538778, "grad_norm": 0.7827742259626211, "learning_rate": 4.0895281913661485e-06, "loss": 0.2975, "step": 16656 }, { "epoch": 0.5716197666437886, "grad_norm": 0.7381376780880415, "learning_rate": 4.0889817493429856e-06, "loss": 0.3181, "step": 16657 }, { "epoch": 0.5716540837336994, "grad_norm": 0.7968061434931878, "learning_rate": 4.088435318574391e-06, "loss": 0.2718, "step": 16658 }, { "epoch": 0.5716884008236102, "grad_norm": 0.7714989597388374, "learning_rate": 4.087888899067112e-06, "loss": 0.3764, "step": 16659 }, { "epoch": 0.5717227179135209, "grad_norm": 0.7864779532354758, "learning_rate": 4.087342490827904e-06, "loss": 0.2897, "step": 16660 }, { "epoch": 0.5717570350034317, "grad_norm": 0.8454419270272724, "learning_rate": 4.086796093863517e-06, "loss": 0.2669, "step": 16661 }, { "epoch": 0.5717913520933425, "grad_norm": 0.7495118437834701, "learning_rate": 4.086249708180696e-06, "loss": 0.2969, "step": 16662 }, { "epoch": 0.5718256691832533, "grad_norm": 0.8721011197886697, "learning_rate": 4.085703333786198e-06, "loss": 0.2908, "step": 16663 }, { "epoch": 0.571859986273164, "grad_norm": 0.6954688921788783, "learning_rate": 4.085156970686769e-06, "loss": 0.2568, "step": 16664 }, { "epoch": 0.5718943033630748, "grad_norm": 0.7525646482359649, "learning_rate": 4.084610618889157e-06, "loss": 0.3022, "step": 16665 }, { "epoch": 0.5719286204529855, "grad_norm": 0.7790403938682825, "learning_rate": 4.084064278400115e-06, "loss": 0.2851, "step": 16666 }, { "epoch": 0.5719629375428964, "grad_norm": 0.7897922612462503, "learning_rate": 4.0835179492263905e-06, "loss": 0.2541, "step": 16667 }, { "epoch": 0.5719972546328072, "grad_norm": 0.8259545775603881, "learning_rate": 4.082971631374732e-06, "loss": 0.2769, "step": 16668 }, { "epoch": 0.5720315717227179, "grad_norm": 0.6986733847050419, "learning_rate": 4.0824253248518916e-06, "loss": 0.2809, "step": 16669 }, { "epoch": 0.5720658888126287, "grad_norm": 0.8288303412381522, "learning_rate": 4.0818790296646175e-06, "loss": 0.2822, "step": 16670 }, { "epoch": 0.5721002059025395, "grad_norm": 0.7991030680299246, "learning_rate": 4.081332745819655e-06, "loss": 0.3311, "step": 16671 }, { "epoch": 0.5721345229924503, "grad_norm": 0.7473599490503784, "learning_rate": 4.080786473323757e-06, "loss": 0.2597, "step": 16672 }, { "epoch": 0.572168840082361, "grad_norm": 0.841750358114246, "learning_rate": 4.080240212183669e-06, "loss": 0.3391, "step": 16673 }, { "epoch": 0.5722031571722718, "grad_norm": 0.7575705065735914, "learning_rate": 4.079693962406142e-06, "loss": 0.2572, "step": 16674 }, { "epoch": 0.5722374742621825, "grad_norm": 0.776787229279199, "learning_rate": 4.079147723997923e-06, "loss": 0.2889, "step": 16675 }, { "epoch": 0.5722717913520934, "grad_norm": 0.732523698757376, "learning_rate": 4.078601496965759e-06, "loss": 0.2817, "step": 16676 }, { "epoch": 0.5723061084420041, "grad_norm": 0.7553913030095615, "learning_rate": 4.0780552813164005e-06, "loss": 0.262, "step": 16677 }, { "epoch": 0.5723404255319149, "grad_norm": 0.8782168303872273, "learning_rate": 4.0775090770565955e-06, "loss": 0.2872, "step": 16678 }, { "epoch": 0.5723747426218256, "grad_norm": 0.7548195877530695, "learning_rate": 4.076962884193088e-06, "loss": 0.2382, "step": 16679 }, { "epoch": 0.5724090597117364, "grad_norm": 0.7111230092749854, "learning_rate": 4.076416702732629e-06, "loss": 0.3134, "step": 16680 }, { "epoch": 0.5724433768016473, "grad_norm": 0.816902749602272, "learning_rate": 4.0758705326819645e-06, "loss": 0.2586, "step": 16681 }, { "epoch": 0.572477693891558, "grad_norm": 0.7312020553286707, "learning_rate": 4.075324374047842e-06, "loss": 0.3307, "step": 16682 }, { "epoch": 0.5725120109814688, "grad_norm": 0.8423135171163898, "learning_rate": 4.074778226837009e-06, "loss": 0.2996, "step": 16683 }, { "epoch": 0.5725463280713795, "grad_norm": 0.7466565864105963, "learning_rate": 4.074232091056214e-06, "loss": 0.2649, "step": 16684 }, { "epoch": 0.5725806451612904, "grad_norm": 0.7503909808284402, "learning_rate": 4.073685966712199e-06, "loss": 0.2257, "step": 16685 }, { "epoch": 0.5726149622512011, "grad_norm": 0.827000463363094, "learning_rate": 4.0731398538117155e-06, "loss": 0.3058, "step": 16686 }, { "epoch": 0.5726492793411119, "grad_norm": 0.7686064163406396, "learning_rate": 4.072593752361509e-06, "loss": 0.2445, "step": 16687 }, { "epoch": 0.5726835964310226, "grad_norm": 0.7452956812700796, "learning_rate": 4.072047662368324e-06, "loss": 0.2446, "step": 16688 }, { "epoch": 0.5727179135209334, "grad_norm": 0.878337018314957, "learning_rate": 4.07150158383891e-06, "loss": 0.2608, "step": 16689 }, { "epoch": 0.5727522306108442, "grad_norm": 0.8653982797934531, "learning_rate": 4.07095551678001e-06, "loss": 0.3024, "step": 16690 }, { "epoch": 0.572786547700755, "grad_norm": 0.7367671615238746, "learning_rate": 4.07040946119837e-06, "loss": 0.2926, "step": 16691 }, { "epoch": 0.5728208647906657, "grad_norm": 0.7311574925982842, "learning_rate": 4.069863417100741e-06, "loss": 0.2617, "step": 16692 }, { "epoch": 0.5728551818805765, "grad_norm": 1.216248441207752, "learning_rate": 4.069317384493864e-06, "loss": 0.2635, "step": 16693 }, { "epoch": 0.5728894989704874, "grad_norm": 0.8122283559748554, "learning_rate": 4.068771363384484e-06, "loss": 0.3405, "step": 16694 }, { "epoch": 0.5729238160603981, "grad_norm": 0.8061997519315778, "learning_rate": 4.06822535377935e-06, "loss": 0.2746, "step": 16695 }, { "epoch": 0.5729581331503089, "grad_norm": 0.820069475191652, "learning_rate": 4.067679355685204e-06, "loss": 0.2689, "step": 16696 }, { "epoch": 0.5729924502402196, "grad_norm": 0.9942221885911067, "learning_rate": 4.067133369108793e-06, "loss": 0.3782, "step": 16697 }, { "epoch": 0.5730267673301304, "grad_norm": 0.7351573606954577, "learning_rate": 4.066587394056861e-06, "loss": 0.2652, "step": 16698 }, { "epoch": 0.5730610844200412, "grad_norm": 0.7510051101687951, "learning_rate": 4.066041430536155e-06, "loss": 0.2841, "step": 16699 }, { "epoch": 0.573095401509952, "grad_norm": 0.7353015678020924, "learning_rate": 4.065495478553416e-06, "loss": 0.268, "step": 16700 }, { "epoch": 0.5731297185998627, "grad_norm": 0.8100924167794271, "learning_rate": 4.064949538115393e-06, "loss": 0.2911, "step": 16701 }, { "epoch": 0.5731640356897735, "grad_norm": 0.7766720619932006, "learning_rate": 4.064403609228829e-06, "loss": 0.287, "step": 16702 }, { "epoch": 0.5731983527796842, "grad_norm": 0.7716550514733044, "learning_rate": 4.063857691900465e-06, "loss": 0.2821, "step": 16703 }, { "epoch": 0.5732326698695951, "grad_norm": 0.7719257520794606, "learning_rate": 4.063311786137048e-06, "loss": 0.3867, "step": 16704 }, { "epoch": 0.5732669869595058, "grad_norm": 0.8549487510063302, "learning_rate": 4.062765891945323e-06, "loss": 0.2731, "step": 16705 }, { "epoch": 0.5733013040494166, "grad_norm": 0.7793357954783118, "learning_rate": 4.062220009332032e-06, "loss": 0.3361, "step": 16706 }, { "epoch": 0.5733356211393273, "grad_norm": 0.7098730715341364, "learning_rate": 4.06167413830392e-06, "loss": 0.2907, "step": 16707 }, { "epoch": 0.5733699382292382, "grad_norm": 0.8003906655141582, "learning_rate": 4.061128278867729e-06, "loss": 0.3038, "step": 16708 }, { "epoch": 0.573404255319149, "grad_norm": 0.8071222094417313, "learning_rate": 4.060582431030205e-06, "loss": 0.249, "step": 16709 }, { "epoch": 0.5734385724090597, "grad_norm": 0.8219023558477231, "learning_rate": 4.06003659479809e-06, "loss": 0.3223, "step": 16710 }, { "epoch": 0.5734728894989705, "grad_norm": 0.7724012035372186, "learning_rate": 4.059490770178125e-06, "loss": 0.2528, "step": 16711 }, { "epoch": 0.5735072065888812, "grad_norm": 0.7807318419371266, "learning_rate": 4.058944957177056e-06, "loss": 0.3006, "step": 16712 }, { "epoch": 0.5735415236787921, "grad_norm": 0.7925559863169641, "learning_rate": 4.0583991558016265e-06, "loss": 0.2986, "step": 16713 }, { "epoch": 0.5735758407687028, "grad_norm": 0.749543731552454, "learning_rate": 4.057853366058574e-06, "loss": 0.2704, "step": 16714 }, { "epoch": 0.5736101578586136, "grad_norm": 0.7774376235927237, "learning_rate": 4.057307587954648e-06, "loss": 0.2934, "step": 16715 }, { "epoch": 0.5736444749485243, "grad_norm": 0.8553632167651317, "learning_rate": 4.056761821496588e-06, "loss": 0.2058, "step": 16716 }, { "epoch": 0.5736787920384352, "grad_norm": 0.7683372998614451, "learning_rate": 4.056216066691133e-06, "loss": 0.252, "step": 16717 }, { "epoch": 0.5737131091283459, "grad_norm": 0.8533793391639407, "learning_rate": 4.05567032354503e-06, "loss": 0.3585, "step": 16718 }, { "epoch": 0.5737474262182567, "grad_norm": 0.745254336235855, "learning_rate": 4.055124592065019e-06, "loss": 0.2793, "step": 16719 }, { "epoch": 0.5737817433081674, "grad_norm": 0.8235109911741435, "learning_rate": 4.054578872257841e-06, "loss": 0.2701, "step": 16720 }, { "epoch": 0.5738160603980782, "grad_norm": 0.9012978233373933, "learning_rate": 4.05403316413024e-06, "loss": 0.2828, "step": 16721 }, { "epoch": 0.5738503774879891, "grad_norm": 0.7756847917600077, "learning_rate": 4.053487467688958e-06, "loss": 0.3113, "step": 16722 }, { "epoch": 0.5738846945778998, "grad_norm": 0.7850701539664101, "learning_rate": 4.052941782940731e-06, "loss": 0.2345, "step": 16723 }, { "epoch": 0.5739190116678106, "grad_norm": 0.7332709419276374, "learning_rate": 4.052396109892307e-06, "loss": 0.2393, "step": 16724 }, { "epoch": 0.5739533287577213, "grad_norm": 0.8197436169913114, "learning_rate": 4.051850448550423e-06, "loss": 0.3157, "step": 16725 }, { "epoch": 0.5739876458476321, "grad_norm": 0.8020047562952448, "learning_rate": 4.051304798921821e-06, "loss": 0.2798, "step": 16726 }, { "epoch": 0.5740219629375429, "grad_norm": 0.8454130466017584, "learning_rate": 4.050759161013242e-06, "loss": 0.2777, "step": 16727 }, { "epoch": 0.5740562800274537, "grad_norm": 0.7791650829877275, "learning_rate": 4.050213534831429e-06, "loss": 0.2616, "step": 16728 }, { "epoch": 0.5740905971173644, "grad_norm": 0.8382877967745286, "learning_rate": 4.049667920383117e-06, "loss": 0.2881, "step": 16729 }, { "epoch": 0.5741249142072752, "grad_norm": 0.8007425773560158, "learning_rate": 4.049122317675053e-06, "loss": 0.2489, "step": 16730 }, { "epoch": 0.574159231297186, "grad_norm": 0.7862344959856968, "learning_rate": 4.048576726713974e-06, "loss": 0.2695, "step": 16731 }, { "epoch": 0.5741935483870968, "grad_norm": 0.6962044527309783, "learning_rate": 4.048031147506618e-06, "loss": 0.2799, "step": 16732 }, { "epoch": 0.5742278654770075, "grad_norm": 0.7554006124192705, "learning_rate": 4.04748558005973e-06, "loss": 0.2688, "step": 16733 }, { "epoch": 0.5742621825669183, "grad_norm": 0.7827223568796036, "learning_rate": 4.046940024380044e-06, "loss": 0.2979, "step": 16734 }, { "epoch": 0.574296499656829, "grad_norm": 0.6947036679042483, "learning_rate": 4.046394480474305e-06, "loss": 0.2864, "step": 16735 }, { "epoch": 0.5743308167467399, "grad_norm": 0.79711364501028, "learning_rate": 4.04584894834925e-06, "loss": 0.3073, "step": 16736 }, { "epoch": 0.5743651338366507, "grad_norm": 0.8102295569391434, "learning_rate": 4.045303428011618e-06, "loss": 0.2513, "step": 16737 }, { "epoch": 0.5743994509265614, "grad_norm": 0.7720934501740431, "learning_rate": 4.04475791946815e-06, "loss": 0.2803, "step": 16738 }, { "epoch": 0.5744337680164722, "grad_norm": 0.6997862522135921, "learning_rate": 4.044212422725585e-06, "loss": 0.2746, "step": 16739 }, { "epoch": 0.574468085106383, "grad_norm": 1.1905463485697636, "learning_rate": 4.043666937790659e-06, "loss": 0.1998, "step": 16740 }, { "epoch": 0.5745024021962938, "grad_norm": 0.8809138899718814, "learning_rate": 4.043121464670115e-06, "loss": 0.2723, "step": 16741 }, { "epoch": 0.5745367192862045, "grad_norm": 0.6678270097176418, "learning_rate": 4.042576003370688e-06, "loss": 0.239, "step": 16742 }, { "epoch": 0.5745710363761153, "grad_norm": 0.8201308579191106, "learning_rate": 4.0420305538991185e-06, "loss": 0.2816, "step": 16743 }, { "epoch": 0.574605353466026, "grad_norm": 0.8980573574604607, "learning_rate": 4.041485116262146e-06, "loss": 0.2711, "step": 16744 }, { "epoch": 0.5746396705559369, "grad_norm": 0.7517423131130506, "learning_rate": 4.040939690466508e-06, "loss": 0.3011, "step": 16745 }, { "epoch": 0.5746739876458477, "grad_norm": 0.8713412584483623, "learning_rate": 4.0403942765189384e-06, "loss": 0.3323, "step": 16746 }, { "epoch": 0.5747083047357584, "grad_norm": 1.2095658415699744, "learning_rate": 4.039848874426182e-06, "loss": 0.2732, "step": 16747 }, { "epoch": 0.5747426218256692, "grad_norm": 0.7510254762395004, "learning_rate": 4.0393034841949725e-06, "loss": 0.2555, "step": 16748 }, { "epoch": 0.5747769389155799, "grad_norm": 1.003324351766365, "learning_rate": 4.038758105832048e-06, "loss": 0.2366, "step": 16749 }, { "epoch": 0.5748112560054908, "grad_norm": 0.798668137263796, "learning_rate": 4.038212739344146e-06, "loss": 0.3225, "step": 16750 }, { "epoch": 0.5748455730954015, "grad_norm": 0.7661454549965047, "learning_rate": 4.037667384738005e-06, "loss": 0.2779, "step": 16751 }, { "epoch": 0.5748798901853123, "grad_norm": 0.8930530569984324, "learning_rate": 4.03712204202036e-06, "loss": 0.3392, "step": 16752 }, { "epoch": 0.574914207275223, "grad_norm": 0.912684888573042, "learning_rate": 4.036576711197953e-06, "loss": 0.2554, "step": 16753 }, { "epoch": 0.5749485243651339, "grad_norm": 0.7096963601213367, "learning_rate": 4.0360313922775155e-06, "loss": 0.2682, "step": 16754 }, { "epoch": 0.5749828414550446, "grad_norm": 0.7324450134439927, "learning_rate": 4.035486085265785e-06, "loss": 0.2915, "step": 16755 }, { "epoch": 0.5750171585449554, "grad_norm": 0.7428748263877026, "learning_rate": 4.034940790169501e-06, "loss": 0.3042, "step": 16756 }, { "epoch": 0.5750514756348661, "grad_norm": 0.8252846559569462, "learning_rate": 4.034395506995398e-06, "loss": 0.3034, "step": 16757 }, { "epoch": 0.5750857927247769, "grad_norm": 0.8386397981413444, "learning_rate": 4.033850235750212e-06, "loss": 0.295, "step": 16758 }, { "epoch": 0.5751201098146878, "grad_norm": 0.8008274111945108, "learning_rate": 4.033304976440681e-06, "loss": 0.2812, "step": 16759 }, { "epoch": 0.5751544269045985, "grad_norm": 0.8347901839102612, "learning_rate": 4.03275972907354e-06, "loss": 0.3023, "step": 16760 }, { "epoch": 0.5751887439945093, "grad_norm": 0.6964101600754352, "learning_rate": 4.032214493655523e-06, "loss": 0.2582, "step": 16761 }, { "epoch": 0.57522306108442, "grad_norm": 0.7684600644276663, "learning_rate": 4.03166927019337e-06, "loss": 0.2523, "step": 16762 }, { "epoch": 0.5752573781743309, "grad_norm": 0.7594136695161678, "learning_rate": 4.031124058693812e-06, "loss": 0.2698, "step": 16763 }, { "epoch": 0.5752916952642416, "grad_norm": 0.8121624921775218, "learning_rate": 4.030578859163589e-06, "loss": 0.3049, "step": 16764 }, { "epoch": 0.5753260123541524, "grad_norm": 0.6888230295278276, "learning_rate": 4.030033671609432e-06, "loss": 0.2594, "step": 16765 }, { "epoch": 0.5753603294440631, "grad_norm": 0.6323744893773394, "learning_rate": 4.029488496038077e-06, "loss": 0.2132, "step": 16766 }, { "epoch": 0.5753946465339739, "grad_norm": 0.6863272476122187, "learning_rate": 4.028943332456263e-06, "loss": 0.2588, "step": 16767 }, { "epoch": 0.5754289636238847, "grad_norm": 0.7677477471971749, "learning_rate": 4.028398180870722e-06, "loss": 0.2711, "step": 16768 }, { "epoch": 0.5754632807137955, "grad_norm": 0.7445159102924267, "learning_rate": 4.027853041288186e-06, "loss": 0.2479, "step": 16769 }, { "epoch": 0.5754975978037062, "grad_norm": 0.8552731777471826, "learning_rate": 4.027307913715396e-06, "loss": 0.3159, "step": 16770 }, { "epoch": 0.575531914893617, "grad_norm": 0.8052142409640043, "learning_rate": 4.0267627981590805e-06, "loss": 0.2459, "step": 16771 }, { "epoch": 0.5755662319835277, "grad_norm": 0.8173108064501311, "learning_rate": 4.026217694625976e-06, "loss": 0.2624, "step": 16772 }, { "epoch": 0.5756005490734386, "grad_norm": 0.7159706923725937, "learning_rate": 4.025672603122816e-06, "loss": 0.283, "step": 16773 }, { "epoch": 0.5756348661633494, "grad_norm": 0.7331230931999309, "learning_rate": 4.025127523656337e-06, "loss": 0.2849, "step": 16774 }, { "epoch": 0.5756691832532601, "grad_norm": 0.7955461428108714, "learning_rate": 4.024582456233267e-06, "loss": 0.264, "step": 16775 }, { "epoch": 0.5757035003431709, "grad_norm": 0.8510858617314602, "learning_rate": 4.0240374008603475e-06, "loss": 0.3773, "step": 16776 }, { "epoch": 0.5757378174330817, "grad_norm": 0.8134650733660038, "learning_rate": 4.023492357544307e-06, "loss": 0.2665, "step": 16777 }, { "epoch": 0.5757721345229925, "grad_norm": 0.9264518307982206, "learning_rate": 4.022947326291879e-06, "loss": 0.2763, "step": 16778 }, { "epoch": 0.5758064516129032, "grad_norm": 0.7884685522726163, "learning_rate": 4.022402307109798e-06, "loss": 0.2795, "step": 16779 }, { "epoch": 0.575840768702814, "grad_norm": 0.7514324865910088, "learning_rate": 4.021857300004798e-06, "loss": 0.2934, "step": 16780 }, { "epoch": 0.5758750857927247, "grad_norm": 0.7615470240385656, "learning_rate": 4.021312304983608e-06, "loss": 0.2679, "step": 16781 }, { "epoch": 0.5759094028826356, "grad_norm": 0.8145897843470625, "learning_rate": 4.020767322052965e-06, "loss": 0.2951, "step": 16782 }, { "epoch": 0.5759437199725463, "grad_norm": 0.7252246542482862, "learning_rate": 4.020222351219601e-06, "loss": 0.2467, "step": 16783 }, { "epoch": 0.5759780370624571, "grad_norm": 0.7777011813767553, "learning_rate": 4.019677392490246e-06, "loss": 0.2879, "step": 16784 }, { "epoch": 0.5760123541523678, "grad_norm": 0.7397517244064603, "learning_rate": 4.019132445871635e-06, "loss": 0.2426, "step": 16785 }, { "epoch": 0.5760466712422787, "grad_norm": 0.9590846257794287, "learning_rate": 4.018587511370499e-06, "loss": 0.3103, "step": 16786 }, { "epoch": 0.5760809883321895, "grad_norm": 0.764590982494237, "learning_rate": 4.018042588993568e-06, "loss": 0.293, "step": 16787 }, { "epoch": 0.5761153054221002, "grad_norm": 0.7505378091088573, "learning_rate": 4.017497678747578e-06, "loss": 0.2619, "step": 16788 }, { "epoch": 0.576149622512011, "grad_norm": 0.7760285124653904, "learning_rate": 4.016952780639259e-06, "loss": 0.2691, "step": 16789 }, { "epoch": 0.5761839396019217, "grad_norm": 0.6518109960375407, "learning_rate": 4.0164078946753405e-06, "loss": 0.2509, "step": 16790 }, { "epoch": 0.5762182566918326, "grad_norm": 0.7799757374573433, "learning_rate": 4.0158630208625574e-06, "loss": 0.2693, "step": 16791 }, { "epoch": 0.5762525737817433, "grad_norm": 0.8051500038627943, "learning_rate": 4.015318159207638e-06, "loss": 0.2493, "step": 16792 }, { "epoch": 0.5762868908716541, "grad_norm": 0.7333587885630052, "learning_rate": 4.0147733097173155e-06, "loss": 0.2689, "step": 16793 }, { "epoch": 0.5763212079615648, "grad_norm": 0.8887747437140885, "learning_rate": 4.01422847239832e-06, "loss": 0.2614, "step": 16794 }, { "epoch": 0.5763555250514756, "grad_norm": 0.8396147450469539, "learning_rate": 4.0136836472573835e-06, "loss": 0.2566, "step": 16795 }, { "epoch": 0.5763898421413864, "grad_norm": 0.6614689042399711, "learning_rate": 4.013138834301233e-06, "loss": 0.3135, "step": 16796 }, { "epoch": 0.5764241592312972, "grad_norm": 0.8542542813910217, "learning_rate": 4.012594033536604e-06, "loss": 0.2973, "step": 16797 }, { "epoch": 0.576458476321208, "grad_norm": 0.8175095368701811, "learning_rate": 4.012049244970222e-06, "loss": 0.3108, "step": 16798 }, { "epoch": 0.5764927934111187, "grad_norm": 0.6532809379093594, "learning_rate": 4.011504468608822e-06, "loss": 0.2714, "step": 16799 }, { "epoch": 0.5765271105010296, "grad_norm": 0.7020768038914552, "learning_rate": 4.010959704459131e-06, "loss": 0.2815, "step": 16800 }, { "epoch": 0.5765614275909403, "grad_norm": 0.7456265718270818, "learning_rate": 4.01041495252788e-06, "loss": 0.3023, "step": 16801 }, { "epoch": 0.5765957446808511, "grad_norm": 0.7627558168483402, "learning_rate": 4.0098702128217984e-06, "loss": 0.3005, "step": 16802 }, { "epoch": 0.5766300617707618, "grad_norm": 0.7376064067004379, "learning_rate": 4.009325485347616e-06, "loss": 0.25, "step": 16803 }, { "epoch": 0.5766643788606726, "grad_norm": 1.0632404426537114, "learning_rate": 4.008780770112062e-06, "loss": 0.301, "step": 16804 }, { "epoch": 0.5766986959505834, "grad_norm": 0.8822283121813254, "learning_rate": 4.0082360671218665e-06, "loss": 0.3427, "step": 16805 }, { "epoch": 0.5767330130404942, "grad_norm": 0.8383160359883776, "learning_rate": 4.007691376383759e-06, "loss": 0.2718, "step": 16806 }, { "epoch": 0.5767673301304049, "grad_norm": 0.6530784546933242, "learning_rate": 4.007146697904465e-06, "loss": 0.2642, "step": 16807 }, { "epoch": 0.5768016472203157, "grad_norm": 0.8656921835027892, "learning_rate": 4.006602031690717e-06, "loss": 0.2874, "step": 16808 }, { "epoch": 0.5768359643102265, "grad_norm": 0.8238260754485466, "learning_rate": 4.006057377749244e-06, "loss": 0.2423, "step": 16809 }, { "epoch": 0.5768702814001373, "grad_norm": 0.8559763317507059, "learning_rate": 4.0055127360867705e-06, "loss": 0.3108, "step": 16810 }, { "epoch": 0.576904598490048, "grad_norm": 0.8007360955623404, "learning_rate": 4.00496810671003e-06, "loss": 0.3125, "step": 16811 }, { "epoch": 0.5769389155799588, "grad_norm": 0.7697547143014754, "learning_rate": 4.004423489625749e-06, "loss": 0.2637, "step": 16812 }, { "epoch": 0.5769732326698696, "grad_norm": 0.7970007867316573, "learning_rate": 4.003878884840653e-06, "loss": 0.2859, "step": 16813 }, { "epoch": 0.5770075497597804, "grad_norm": 0.6791298548714934, "learning_rate": 4.003334292361474e-06, "loss": 0.2598, "step": 16814 }, { "epoch": 0.5770418668496912, "grad_norm": 0.9515748203841903, "learning_rate": 4.002789712194937e-06, "loss": 0.3117, "step": 16815 }, { "epoch": 0.5770761839396019, "grad_norm": 0.704900835249869, "learning_rate": 4.0022451443477695e-06, "loss": 0.2795, "step": 16816 }, { "epoch": 0.5771105010295127, "grad_norm": 0.9721233581864237, "learning_rate": 4.001700588826701e-06, "loss": 0.2572, "step": 16817 }, { "epoch": 0.5771448181194234, "grad_norm": 0.8240837256249813, "learning_rate": 4.001156045638458e-06, "loss": 0.2712, "step": 16818 }, { "epoch": 0.5771791352093343, "grad_norm": 0.7000588714557081, "learning_rate": 4.000611514789766e-06, "loss": 0.2651, "step": 16819 }, { "epoch": 0.577213452299245, "grad_norm": 0.6451470275152815, "learning_rate": 4.000066996287355e-06, "loss": 0.2275, "step": 16820 }, { "epoch": 0.5772477693891558, "grad_norm": 0.840774074008583, "learning_rate": 3.999522490137952e-06, "loss": 0.3174, "step": 16821 }, { "epoch": 0.5772820864790665, "grad_norm": 0.7559189734671458, "learning_rate": 3.998977996348279e-06, "loss": 0.3256, "step": 16822 }, { "epoch": 0.5773164035689774, "grad_norm": 0.8012926113663579, "learning_rate": 3.998433514925067e-06, "loss": 0.2757, "step": 16823 }, { "epoch": 0.5773507206588882, "grad_norm": 0.7254302533969396, "learning_rate": 3.997889045875042e-06, "loss": 0.2411, "step": 16824 }, { "epoch": 0.5773850377487989, "grad_norm": 0.6848989387256645, "learning_rate": 3.9973445892049274e-06, "loss": 0.266, "step": 16825 }, { "epoch": 0.5774193548387097, "grad_norm": 0.9425817059708084, "learning_rate": 3.996800144921453e-06, "loss": 0.2619, "step": 16826 }, { "epoch": 0.5774536719286204, "grad_norm": 1.0751749040067997, "learning_rate": 3.996255713031344e-06, "loss": 0.3082, "step": 16827 }, { "epoch": 0.5774879890185313, "grad_norm": 0.7655304908927928, "learning_rate": 3.995711293541323e-06, "loss": 0.2868, "step": 16828 }, { "epoch": 0.577522306108442, "grad_norm": 0.7472098252914268, "learning_rate": 3.995166886458121e-06, "loss": 0.2747, "step": 16829 }, { "epoch": 0.5775566231983528, "grad_norm": 0.8015036058775254, "learning_rate": 3.994622491788458e-06, "loss": 0.2558, "step": 16830 }, { "epoch": 0.5775909402882635, "grad_norm": 0.7548493763479808, "learning_rate": 3.9940781095390625e-06, "loss": 0.3011, "step": 16831 }, { "epoch": 0.5776252573781744, "grad_norm": 0.7041490388285998, "learning_rate": 3.993533739716661e-06, "loss": 0.2751, "step": 16832 }, { "epoch": 0.5776595744680851, "grad_norm": 0.798068257913918, "learning_rate": 3.992989382327974e-06, "loss": 0.3252, "step": 16833 }, { "epoch": 0.5776938915579959, "grad_norm": 0.8182355548690298, "learning_rate": 3.9924450373797315e-06, "loss": 0.3041, "step": 16834 }, { "epoch": 0.5777282086479066, "grad_norm": 0.6807779872364831, "learning_rate": 3.991900704878656e-06, "loss": 0.2171, "step": 16835 }, { "epoch": 0.5777625257378174, "grad_norm": 0.7941428276814402, "learning_rate": 3.991356384831469e-06, "loss": 0.2701, "step": 16836 }, { "epoch": 0.5777968428277283, "grad_norm": 0.7863426575916247, "learning_rate": 3.990812077244901e-06, "loss": 0.2914, "step": 16837 }, { "epoch": 0.577831159917639, "grad_norm": 0.8515970722813113, "learning_rate": 3.990267782125672e-06, "loss": 0.3778, "step": 16838 }, { "epoch": 0.5778654770075498, "grad_norm": 0.7954181439007734, "learning_rate": 3.9897234994805064e-06, "loss": 0.2792, "step": 16839 }, { "epoch": 0.5778997940974605, "grad_norm": 0.9818662118597744, "learning_rate": 3.98917922931613e-06, "loss": 0.3252, "step": 16840 }, { "epoch": 0.5779341111873713, "grad_norm": 0.7330243674643515, "learning_rate": 3.988634971639267e-06, "loss": 0.2809, "step": 16841 }, { "epoch": 0.5779684282772821, "grad_norm": 0.7777832232496791, "learning_rate": 3.988090726456637e-06, "loss": 0.2572, "step": 16842 }, { "epoch": 0.5780027453671929, "grad_norm": 0.6967641369329086, "learning_rate": 3.987546493774969e-06, "loss": 0.2513, "step": 16843 }, { "epoch": 0.5780370624571036, "grad_norm": 0.8649740797798291, "learning_rate": 3.987002273600983e-06, "loss": 0.242, "step": 16844 }, { "epoch": 0.5780713795470144, "grad_norm": 0.9169267887388978, "learning_rate": 3.9864580659414006e-06, "loss": 0.315, "step": 16845 }, { "epoch": 0.5781056966369252, "grad_norm": 0.7295475111718941, "learning_rate": 3.985913870802949e-06, "loss": 0.2932, "step": 16846 }, { "epoch": 0.578140013726836, "grad_norm": 0.6396930427539207, "learning_rate": 3.9853696881923495e-06, "loss": 0.2977, "step": 16847 }, { "epoch": 0.5781743308167467, "grad_norm": 0.7481919319493968, "learning_rate": 3.984825518116323e-06, "loss": 0.2828, "step": 16848 }, { "epoch": 0.5782086479066575, "grad_norm": 0.8365083190246914, "learning_rate": 3.984281360581596e-06, "loss": 0.2921, "step": 16849 }, { "epoch": 0.5782429649965682, "grad_norm": 0.7075891113036852, "learning_rate": 3.983737215594889e-06, "loss": 0.2982, "step": 16850 }, { "epoch": 0.5782772820864791, "grad_norm": 0.7661997037240126, "learning_rate": 3.98319308316292e-06, "loss": 0.251, "step": 16851 }, { "epoch": 0.5783115991763899, "grad_norm": 0.7286313266639437, "learning_rate": 3.982648963292418e-06, "loss": 0.2565, "step": 16852 }, { "epoch": 0.5783459162663006, "grad_norm": 0.8087833183386178, "learning_rate": 3.982104855990101e-06, "loss": 0.3108, "step": 16853 }, { "epoch": 0.5783802333562114, "grad_norm": 0.751619059375067, "learning_rate": 3.981560761262691e-06, "loss": 0.2645, "step": 16854 }, { "epoch": 0.5784145504461222, "grad_norm": 0.7226741071026228, "learning_rate": 3.981016679116912e-06, "loss": 0.2995, "step": 16855 }, { "epoch": 0.578448867536033, "grad_norm": 0.7402971612305912, "learning_rate": 3.980472609559483e-06, "loss": 0.2866, "step": 16856 }, { "epoch": 0.5784831846259437, "grad_norm": 0.7367850664267139, "learning_rate": 3.979928552597126e-06, "loss": 0.3004, "step": 16857 }, { "epoch": 0.5785175017158545, "grad_norm": 0.7384762674948043, "learning_rate": 3.979384508236564e-06, "loss": 0.2451, "step": 16858 }, { "epoch": 0.5785518188057652, "grad_norm": 0.7264980831608208, "learning_rate": 3.978840476484515e-06, "loss": 0.2588, "step": 16859 }, { "epoch": 0.5785861358956761, "grad_norm": 0.922133120329162, "learning_rate": 3.978296457347701e-06, "loss": 0.25, "step": 16860 }, { "epoch": 0.5786204529855868, "grad_norm": 0.832196261835304, "learning_rate": 3.977752450832844e-06, "loss": 0.3077, "step": 16861 }, { "epoch": 0.5786547700754976, "grad_norm": 0.8146806455693245, "learning_rate": 3.977208456946663e-06, "loss": 0.3251, "step": 16862 }, { "epoch": 0.5786890871654083, "grad_norm": 0.8738376597652652, "learning_rate": 3.9766644756958795e-06, "loss": 0.322, "step": 16863 }, { "epoch": 0.5787234042553191, "grad_norm": 0.866619679813148, "learning_rate": 3.976120507087215e-06, "loss": 0.2507, "step": 16864 }, { "epoch": 0.57875772134523, "grad_norm": 0.6427690078154297, "learning_rate": 3.975576551127384e-06, "loss": 0.2342, "step": 16865 }, { "epoch": 0.5787920384351407, "grad_norm": 0.7054764571180315, "learning_rate": 3.975032607823114e-06, "loss": 0.2912, "step": 16866 }, { "epoch": 0.5788263555250515, "grad_norm": 0.8292431565100777, "learning_rate": 3.9744886771811195e-06, "loss": 0.2638, "step": 16867 }, { "epoch": 0.5788606726149622, "grad_norm": 0.7367031350127026, "learning_rate": 3.9739447592081215e-06, "loss": 0.2986, "step": 16868 }, { "epoch": 0.5788949897048731, "grad_norm": 0.7818751010589269, "learning_rate": 3.9734008539108395e-06, "loss": 0.2639, "step": 16869 }, { "epoch": 0.5789293067947838, "grad_norm": 0.7387530320285008, "learning_rate": 3.9728569612959946e-06, "loss": 0.2777, "step": 16870 }, { "epoch": 0.5789636238846946, "grad_norm": 0.7327889195724991, "learning_rate": 3.972313081370303e-06, "loss": 0.2601, "step": 16871 }, { "epoch": 0.5789979409746053, "grad_norm": 0.7572555325271833, "learning_rate": 3.9717692141404865e-06, "loss": 0.3215, "step": 16872 }, { "epoch": 0.5790322580645161, "grad_norm": 0.8120418817331555, "learning_rate": 3.971225359613262e-06, "loss": 0.2564, "step": 16873 }, { "epoch": 0.5790665751544269, "grad_norm": 0.8966646676114127, "learning_rate": 3.9706815177953476e-06, "loss": 0.2497, "step": 16874 }, { "epoch": 0.5791008922443377, "grad_norm": 0.8906945912820657, "learning_rate": 3.970137688693464e-06, "loss": 0.298, "step": 16875 }, { "epoch": 0.5791352093342484, "grad_norm": 0.8972491032430427, "learning_rate": 3.969593872314329e-06, "loss": 0.2581, "step": 16876 }, { "epoch": 0.5791695264241592, "grad_norm": 0.8938360197959727, "learning_rate": 3.969050068664659e-06, "loss": 0.2418, "step": 16877 }, { "epoch": 0.5792038435140701, "grad_norm": 0.7811468627272763, "learning_rate": 3.968506277751174e-06, "loss": 0.2358, "step": 16878 }, { "epoch": 0.5792381606039808, "grad_norm": 0.7440070787690868, "learning_rate": 3.967962499580593e-06, "loss": 0.2887, "step": 16879 }, { "epoch": 0.5792724776938916, "grad_norm": 0.8105764227338034, "learning_rate": 3.967418734159629e-06, "loss": 0.2935, "step": 16880 }, { "epoch": 0.5793067947838023, "grad_norm": 0.7913507102763944, "learning_rate": 3.966874981495006e-06, "loss": 0.2756, "step": 16881 }, { "epoch": 0.5793411118737131, "grad_norm": 0.911563664137742, "learning_rate": 3.966331241593436e-06, "loss": 0.3414, "step": 16882 }, { "epoch": 0.5793754289636239, "grad_norm": 0.7181573032716039, "learning_rate": 3.9657875144616385e-06, "loss": 0.2973, "step": 16883 }, { "epoch": 0.5794097460535347, "grad_norm": 0.699286936409058, "learning_rate": 3.96524380010633e-06, "loss": 0.2887, "step": 16884 }, { "epoch": 0.5794440631434454, "grad_norm": 0.710103028934476, "learning_rate": 3.96470009853423e-06, "loss": 0.3072, "step": 16885 }, { "epoch": 0.5794783802333562, "grad_norm": 0.7336670306950287, "learning_rate": 3.964156409752051e-06, "loss": 0.3154, "step": 16886 }, { "epoch": 0.5795126973232669, "grad_norm": 0.8388241657295338, "learning_rate": 3.963612733766513e-06, "loss": 0.3003, "step": 16887 }, { "epoch": 0.5795470144131778, "grad_norm": 0.8708971928623931, "learning_rate": 3.9630690705843325e-06, "loss": 0.2927, "step": 16888 }, { "epoch": 0.5795813315030885, "grad_norm": 0.8891082677494136, "learning_rate": 3.9625254202122225e-06, "loss": 0.2772, "step": 16889 }, { "epoch": 0.5796156485929993, "grad_norm": 0.8084683467978883, "learning_rate": 3.961981782656901e-06, "loss": 0.2574, "step": 16890 }, { "epoch": 0.57964996568291, "grad_norm": 0.781626117323803, "learning_rate": 3.961438157925086e-06, "loss": 0.2758, "step": 16891 }, { "epoch": 0.5796842827728209, "grad_norm": 0.768876138330284, "learning_rate": 3.960894546023491e-06, "loss": 0.263, "step": 16892 }, { "epoch": 0.5797185998627317, "grad_norm": 0.7355839015197593, "learning_rate": 3.960350946958834e-06, "loss": 0.2478, "step": 16893 }, { "epoch": 0.5797529169526424, "grad_norm": 0.8638658360753687, "learning_rate": 3.959807360737826e-06, "loss": 0.287, "step": 16894 }, { "epoch": 0.5797872340425532, "grad_norm": 0.8223853898252554, "learning_rate": 3.959263787367188e-06, "loss": 0.2706, "step": 16895 }, { "epoch": 0.5798215511324639, "grad_norm": 0.7622547916361871, "learning_rate": 3.9587202268536315e-06, "loss": 0.289, "step": 16896 }, { "epoch": 0.5798558682223748, "grad_norm": 0.7389331170210354, "learning_rate": 3.958176679203872e-06, "loss": 0.2621, "step": 16897 }, { "epoch": 0.5798901853122855, "grad_norm": 0.8024947658141256, "learning_rate": 3.957633144424625e-06, "loss": 0.3064, "step": 16898 }, { "epoch": 0.5799245024021963, "grad_norm": 0.8308094437317811, "learning_rate": 3.957089622522607e-06, "loss": 0.2915, "step": 16899 }, { "epoch": 0.579958819492107, "grad_norm": 0.8561866898151089, "learning_rate": 3.956546113504528e-06, "loss": 0.252, "step": 16900 }, { "epoch": 0.5799931365820179, "grad_norm": 0.7762297907542653, "learning_rate": 3.956002617377108e-06, "loss": 0.248, "step": 16901 }, { "epoch": 0.5800274536719287, "grad_norm": 0.8278276920874807, "learning_rate": 3.9554591341470575e-06, "loss": 0.3008, "step": 16902 }, { "epoch": 0.5800617707618394, "grad_norm": 0.7547973235190563, "learning_rate": 3.954915663821091e-06, "loss": 0.219, "step": 16903 }, { "epoch": 0.5800960878517502, "grad_norm": 0.7383081169904331, "learning_rate": 3.9543722064059236e-06, "loss": 0.2832, "step": 16904 }, { "epoch": 0.5801304049416609, "grad_norm": 0.8634955530873327, "learning_rate": 3.953828761908268e-06, "loss": 0.2344, "step": 16905 }, { "epoch": 0.5801647220315718, "grad_norm": 0.801435682942371, "learning_rate": 3.953285330334837e-06, "loss": 0.2834, "step": 16906 }, { "epoch": 0.5801990391214825, "grad_norm": 0.7837399559997069, "learning_rate": 3.952741911692348e-06, "loss": 0.2699, "step": 16907 }, { "epoch": 0.5802333562113933, "grad_norm": 0.7261847253492651, "learning_rate": 3.952198505987512e-06, "loss": 0.2625, "step": 16908 }, { "epoch": 0.580267673301304, "grad_norm": 0.709609277765438, "learning_rate": 3.9516551132270395e-06, "loss": 0.2928, "step": 16909 }, { "epoch": 0.5803019903912148, "grad_norm": 0.7673060604522669, "learning_rate": 3.951111733417648e-06, "loss": 0.2234, "step": 16910 }, { "epoch": 0.5803363074811256, "grad_norm": 0.8271911467468093, "learning_rate": 3.9505683665660464e-06, "loss": 0.3006, "step": 16911 }, { "epoch": 0.5803706245710364, "grad_norm": 0.8004064666844154, "learning_rate": 3.950025012678949e-06, "loss": 0.2842, "step": 16912 }, { "epoch": 0.5804049416609471, "grad_norm": 0.7978843289146287, "learning_rate": 3.949481671763069e-06, "loss": 0.2802, "step": 16913 }, { "epoch": 0.5804392587508579, "grad_norm": 0.7575380604068241, "learning_rate": 3.94893834382512e-06, "loss": 0.278, "step": 16914 }, { "epoch": 0.5804735758407688, "grad_norm": 0.6966497770230247, "learning_rate": 3.948395028871809e-06, "loss": 0.2275, "step": 16915 }, { "epoch": 0.5805078929306795, "grad_norm": 0.7304166007407962, "learning_rate": 3.9478517269098535e-06, "loss": 0.2694, "step": 16916 }, { "epoch": 0.5805422100205903, "grad_norm": 0.6618125364987124, "learning_rate": 3.947308437945963e-06, "loss": 0.2744, "step": 16917 }, { "epoch": 0.580576527110501, "grad_norm": 0.7324780787685102, "learning_rate": 3.946765161986849e-06, "loss": 0.2435, "step": 16918 }, { "epoch": 0.5806108442004118, "grad_norm": 0.8217291794619624, "learning_rate": 3.946221899039224e-06, "loss": 0.2669, "step": 16919 }, { "epoch": 0.5806451612903226, "grad_norm": 0.8764916250091472, "learning_rate": 3.9456786491097985e-06, "loss": 0.3003, "step": 16920 }, { "epoch": 0.5806794783802334, "grad_norm": 0.7632712772065996, "learning_rate": 3.9451354122052825e-06, "loss": 0.3006, "step": 16921 }, { "epoch": 0.5807137954701441, "grad_norm": 0.6866765854980198, "learning_rate": 3.944592188332391e-06, "loss": 0.2803, "step": 16922 }, { "epoch": 0.5807481125600549, "grad_norm": 0.7407790606503661, "learning_rate": 3.94404897749783e-06, "loss": 0.3112, "step": 16923 }, { "epoch": 0.5807824296499657, "grad_norm": 0.7584900935859243, "learning_rate": 3.9435057797083155e-06, "loss": 0.286, "step": 16924 }, { "epoch": 0.5808167467398765, "grad_norm": 0.8248759775761967, "learning_rate": 3.942962594970556e-06, "loss": 0.2643, "step": 16925 }, { "epoch": 0.5808510638297872, "grad_norm": 0.7271088567931402, "learning_rate": 3.942419423291258e-06, "loss": 0.2972, "step": 16926 }, { "epoch": 0.580885380919698, "grad_norm": 0.7545313946961364, "learning_rate": 3.9418762646771365e-06, "loss": 0.2854, "step": 16927 }, { "epoch": 0.5809196980096087, "grad_norm": 0.6882727243878958, "learning_rate": 3.9413331191349e-06, "loss": 0.2846, "step": 16928 }, { "epoch": 0.5809540150995196, "grad_norm": 0.8595405271484892, "learning_rate": 3.940789986671258e-06, "loss": 0.2684, "step": 16929 }, { "epoch": 0.5809883321894304, "grad_norm": 0.8422272902267718, "learning_rate": 3.9402468672929225e-06, "loss": 0.2679, "step": 16930 }, { "epoch": 0.5810226492793411, "grad_norm": 0.767689383543045, "learning_rate": 3.939703761006601e-06, "loss": 0.2831, "step": 16931 }, { "epoch": 0.5810569663692519, "grad_norm": 0.8534046739912915, "learning_rate": 3.939160667819001e-06, "loss": 0.2351, "step": 16932 }, { "epoch": 0.5810912834591626, "grad_norm": 0.7463316394847951, "learning_rate": 3.938617587736837e-06, "loss": 0.2774, "step": 16933 }, { "epoch": 0.5811256005490735, "grad_norm": 0.7719303823741626, "learning_rate": 3.938074520766814e-06, "loss": 0.2285, "step": 16934 }, { "epoch": 0.5811599176389842, "grad_norm": 0.8267268941266869, "learning_rate": 3.937531466915642e-06, "loss": 0.2993, "step": 16935 }, { "epoch": 0.581194234728895, "grad_norm": 0.7852397711764645, "learning_rate": 3.93698842619003e-06, "loss": 0.2859, "step": 16936 }, { "epoch": 0.5812285518188057, "grad_norm": 0.6540949732758182, "learning_rate": 3.936445398596688e-06, "loss": 0.2612, "step": 16937 }, { "epoch": 0.5812628689087166, "grad_norm": 0.6889181946665998, "learning_rate": 3.935902384142321e-06, "loss": 0.2753, "step": 16938 }, { "epoch": 0.5812971859986273, "grad_norm": 0.8334971598208262, "learning_rate": 3.935359382833641e-06, "loss": 0.298, "step": 16939 }, { "epoch": 0.5813315030885381, "grad_norm": 0.7716838642110851, "learning_rate": 3.934816394677355e-06, "loss": 0.2916, "step": 16940 }, { "epoch": 0.5813658201784488, "grad_norm": 0.7906364999512148, "learning_rate": 3.934273419680169e-06, "loss": 0.3633, "step": 16941 }, { "epoch": 0.5814001372683596, "grad_norm": 0.8377393302305749, "learning_rate": 3.933730457848793e-06, "loss": 0.3221, "step": 16942 }, { "epoch": 0.5814344543582705, "grad_norm": 0.716689771024583, "learning_rate": 3.933187509189935e-06, "loss": 0.266, "step": 16943 }, { "epoch": 0.5814687714481812, "grad_norm": 0.7819707393303598, "learning_rate": 3.932644573710299e-06, "loss": 0.2649, "step": 16944 }, { "epoch": 0.581503088538092, "grad_norm": 0.7300988005556305, "learning_rate": 3.932101651416597e-06, "loss": 0.2416, "step": 16945 }, { "epoch": 0.5815374056280027, "grad_norm": 0.8528877682185586, "learning_rate": 3.931558742315535e-06, "loss": 0.3267, "step": 16946 }, { "epoch": 0.5815717227179136, "grad_norm": 0.8615268499902015, "learning_rate": 3.931015846413816e-06, "loss": 0.3163, "step": 16947 }, { "epoch": 0.5816060398078243, "grad_norm": 0.7584956220082688, "learning_rate": 3.930472963718153e-06, "loss": 0.268, "step": 16948 }, { "epoch": 0.5816403568977351, "grad_norm": 0.7604526219685447, "learning_rate": 3.929930094235248e-06, "loss": 0.2756, "step": 16949 }, { "epoch": 0.5816746739876458, "grad_norm": 0.821798990634856, "learning_rate": 3.929387237971807e-06, "loss": 0.3088, "step": 16950 }, { "epoch": 0.5817089910775566, "grad_norm": 0.7218728090086403, "learning_rate": 3.928844394934542e-06, "loss": 0.2251, "step": 16951 }, { "epoch": 0.5817433081674674, "grad_norm": 0.841461397713792, "learning_rate": 3.928301565130155e-06, "loss": 0.2822, "step": 16952 }, { "epoch": 0.5817776252573782, "grad_norm": 0.7830993338601672, "learning_rate": 3.927758748565351e-06, "loss": 0.3039, "step": 16953 }, { "epoch": 0.581811942347289, "grad_norm": 0.775887690259033, "learning_rate": 3.927215945246839e-06, "loss": 0.3277, "step": 16954 }, { "epoch": 0.5818462594371997, "grad_norm": 0.8709876682438077, "learning_rate": 3.926673155181321e-06, "loss": 0.2585, "step": 16955 }, { "epoch": 0.5818805765271104, "grad_norm": 0.8504316947153104, "learning_rate": 3.926130378375508e-06, "loss": 0.3552, "step": 16956 }, { "epoch": 0.5819148936170213, "grad_norm": 0.7782838297518291, "learning_rate": 3.925587614836101e-06, "loss": 0.3208, "step": 16957 }, { "epoch": 0.5819492107069321, "grad_norm": 0.7614706881786105, "learning_rate": 3.925044864569806e-06, "loss": 0.2768, "step": 16958 }, { "epoch": 0.5819835277968428, "grad_norm": 0.6522569322338335, "learning_rate": 3.924502127583328e-06, "loss": 0.3035, "step": 16959 }, { "epoch": 0.5820178448867536, "grad_norm": 0.7575625901087351, "learning_rate": 3.923959403883374e-06, "loss": 0.271, "step": 16960 }, { "epoch": 0.5820521619766644, "grad_norm": 0.7340673535135469, "learning_rate": 3.923416693476644e-06, "loss": 0.2413, "step": 16961 }, { "epoch": 0.5820864790665752, "grad_norm": 0.7989662848880301, "learning_rate": 3.922873996369849e-06, "loss": 0.3342, "step": 16962 }, { "epoch": 0.5821207961564859, "grad_norm": 0.7739442330549385, "learning_rate": 3.922331312569689e-06, "loss": 0.235, "step": 16963 }, { "epoch": 0.5821551132463967, "grad_norm": 0.8968748755177022, "learning_rate": 3.921788642082867e-06, "loss": 0.2577, "step": 16964 }, { "epoch": 0.5821894303363074, "grad_norm": 0.8292738179117379, "learning_rate": 3.921245984916091e-06, "loss": 0.2333, "step": 16965 }, { "epoch": 0.5822237474262183, "grad_norm": 0.879792224859367, "learning_rate": 3.920703341076063e-06, "loss": 0.2741, "step": 16966 }, { "epoch": 0.582258064516129, "grad_norm": 0.7143000259130339, "learning_rate": 3.920160710569486e-06, "loss": 0.2724, "step": 16967 }, { "epoch": 0.5822923816060398, "grad_norm": 0.8045461248491433, "learning_rate": 3.9196180934030666e-06, "loss": 0.2525, "step": 16968 }, { "epoch": 0.5823266986959506, "grad_norm": 0.6804995406351357, "learning_rate": 3.9190754895835056e-06, "loss": 0.2433, "step": 16969 }, { "epoch": 0.5823610157858614, "grad_norm": 1.0777313477368842, "learning_rate": 3.9185328991175045e-06, "loss": 0.2555, "step": 16970 }, { "epoch": 0.5823953328757722, "grad_norm": 0.76439975714627, "learning_rate": 3.917990322011771e-06, "loss": 0.2756, "step": 16971 }, { "epoch": 0.5824296499656829, "grad_norm": 0.7791059221374169, "learning_rate": 3.9174477582730044e-06, "loss": 0.2208, "step": 16972 }, { "epoch": 0.5824639670555937, "grad_norm": 0.832046057924654, "learning_rate": 3.916905207907907e-06, "loss": 0.353, "step": 16973 }, { "epoch": 0.5824982841455044, "grad_norm": 0.9152682238784468, "learning_rate": 3.916362670923186e-06, "loss": 0.2775, "step": 16974 }, { "epoch": 0.5825326012354153, "grad_norm": 0.8253213200131979, "learning_rate": 3.91582014732554e-06, "loss": 0.3119, "step": 16975 }, { "epoch": 0.582566918325326, "grad_norm": 0.820216540457072, "learning_rate": 3.915277637121671e-06, "loss": 0.3039, "step": 16976 }, { "epoch": 0.5826012354152368, "grad_norm": 0.757218690338647, "learning_rate": 3.914735140318283e-06, "loss": 0.2906, "step": 16977 }, { "epoch": 0.5826355525051475, "grad_norm": 1.0806637391170402, "learning_rate": 3.914192656922077e-06, "loss": 0.3178, "step": 16978 }, { "epoch": 0.5826698695950583, "grad_norm": 0.7360343992720796, "learning_rate": 3.913650186939754e-06, "loss": 0.2468, "step": 16979 }, { "epoch": 0.5827041866849691, "grad_norm": 0.8065849933560061, "learning_rate": 3.913107730378017e-06, "loss": 0.2679, "step": 16980 }, { "epoch": 0.5827385037748799, "grad_norm": 0.770930022866922, "learning_rate": 3.912565287243567e-06, "loss": 0.2553, "step": 16981 }, { "epoch": 0.5827728208647907, "grad_norm": 0.6605522402999608, "learning_rate": 3.912022857543104e-06, "loss": 0.2414, "step": 16982 }, { "epoch": 0.5828071379547014, "grad_norm": 0.650866145156351, "learning_rate": 3.911480441283332e-06, "loss": 0.2478, "step": 16983 }, { "epoch": 0.5828414550446123, "grad_norm": 0.813376322001345, "learning_rate": 3.91093803847095e-06, "loss": 0.3015, "step": 16984 }, { "epoch": 0.582875772134523, "grad_norm": 0.7808045317824447, "learning_rate": 3.910395649112656e-06, "loss": 0.2623, "step": 16985 }, { "epoch": 0.5829100892244338, "grad_norm": 0.6975726855988308, "learning_rate": 3.909853273215155e-06, "loss": 0.2291, "step": 16986 }, { "epoch": 0.5829444063143445, "grad_norm": 0.9176073045656422, "learning_rate": 3.909310910785146e-06, "loss": 0.2987, "step": 16987 }, { "epoch": 0.5829787234042553, "grad_norm": 0.7769724122955262, "learning_rate": 3.9087685618293295e-06, "loss": 0.2794, "step": 16988 }, { "epoch": 0.5830130404941661, "grad_norm": 0.8256456100323661, "learning_rate": 3.9082262263544055e-06, "loss": 0.3187, "step": 16989 }, { "epoch": 0.5830473575840769, "grad_norm": 0.8102802325288516, "learning_rate": 3.907683904367071e-06, "loss": 0.294, "step": 16990 }, { "epoch": 0.5830816746739876, "grad_norm": 0.7617244505796343, "learning_rate": 3.907141595874031e-06, "loss": 0.2556, "step": 16991 }, { "epoch": 0.5831159917638984, "grad_norm": 0.7340019546621237, "learning_rate": 3.906599300881983e-06, "loss": 0.2547, "step": 16992 }, { "epoch": 0.5831503088538093, "grad_norm": 0.7760505575264798, "learning_rate": 3.9060570193976225e-06, "loss": 0.2783, "step": 16993 }, { "epoch": 0.58318462594372, "grad_norm": 0.7547009540511295, "learning_rate": 3.905514751427655e-06, "loss": 0.2437, "step": 16994 }, { "epoch": 0.5832189430336308, "grad_norm": 0.9004677666338815, "learning_rate": 3.904972496978776e-06, "loss": 0.2975, "step": 16995 }, { "epoch": 0.5832532601235415, "grad_norm": 0.8227121900109529, "learning_rate": 3.904430256057684e-06, "loss": 0.3308, "step": 16996 }, { "epoch": 0.5832875772134523, "grad_norm": 0.7942702086439646, "learning_rate": 3.90388802867108e-06, "loss": 0.2486, "step": 16997 }, { "epoch": 0.5833218943033631, "grad_norm": 0.759463183592047, "learning_rate": 3.903345814825662e-06, "loss": 0.2469, "step": 16998 }, { "epoch": 0.5833562113932739, "grad_norm": 0.7216376774293786, "learning_rate": 3.902803614528126e-06, "loss": 0.264, "step": 16999 }, { "epoch": 0.5833905284831846, "grad_norm": 1.095126781940013, "learning_rate": 3.902261427785175e-06, "loss": 0.2425, "step": 17000 }, { "epoch": 0.5834248455730954, "grad_norm": 0.822272997094654, "learning_rate": 3.901719254603502e-06, "loss": 0.2982, "step": 17001 }, { "epoch": 0.5834591626630061, "grad_norm": 0.7764148667730896, "learning_rate": 3.901177094989807e-06, "loss": 0.2734, "step": 17002 }, { "epoch": 0.583493479752917, "grad_norm": 0.7281977036738853, "learning_rate": 3.900634948950788e-06, "loss": 0.2081, "step": 17003 }, { "epoch": 0.5835277968428277, "grad_norm": 0.7855573042452189, "learning_rate": 3.900092816493144e-06, "loss": 0.3414, "step": 17004 }, { "epoch": 0.5835621139327385, "grad_norm": 0.7190865330805419, "learning_rate": 3.899550697623569e-06, "loss": 0.3128, "step": 17005 }, { "epoch": 0.5835964310226492, "grad_norm": 0.7537169735338544, "learning_rate": 3.899008592348763e-06, "loss": 0.2788, "step": 17006 }, { "epoch": 0.5836307481125601, "grad_norm": 0.7933758373383283, "learning_rate": 3.898466500675424e-06, "loss": 0.29, "step": 17007 }, { "epoch": 0.5836650652024709, "grad_norm": 0.9804197442113521, "learning_rate": 3.897924422610244e-06, "loss": 0.3077, "step": 17008 }, { "epoch": 0.5836993822923816, "grad_norm": 0.7357848299920247, "learning_rate": 3.897382358159924e-06, "loss": 0.2867, "step": 17009 }, { "epoch": 0.5837336993822924, "grad_norm": 0.7870956089647552, "learning_rate": 3.8968403073311606e-06, "loss": 0.2434, "step": 17010 }, { "epoch": 0.5837680164722031, "grad_norm": 0.8238353236412344, "learning_rate": 3.8962982701306465e-06, "loss": 0.3006, "step": 17011 }, { "epoch": 0.583802333562114, "grad_norm": 0.7127748262087, "learning_rate": 3.8957562465650836e-06, "loss": 0.2546, "step": 17012 }, { "epoch": 0.5838366506520247, "grad_norm": 0.7922091015910306, "learning_rate": 3.895214236641164e-06, "loss": 0.246, "step": 17013 }, { "epoch": 0.5838709677419355, "grad_norm": 0.7764308329779231, "learning_rate": 3.894672240365582e-06, "loss": 0.2701, "step": 17014 }, { "epoch": 0.5839052848318462, "grad_norm": 0.8103251802111253, "learning_rate": 3.894130257745039e-06, "loss": 0.3016, "step": 17015 }, { "epoch": 0.5839396019217571, "grad_norm": 0.775000533895524, "learning_rate": 3.8935882887862255e-06, "loss": 0.2833, "step": 17016 }, { "epoch": 0.5839739190116678, "grad_norm": 0.7480298542412039, "learning_rate": 3.893046333495837e-06, "loss": 0.2482, "step": 17017 }, { "epoch": 0.5840082361015786, "grad_norm": 0.7432797984645357, "learning_rate": 3.892504391880574e-06, "loss": 0.2903, "step": 17018 }, { "epoch": 0.5840425531914893, "grad_norm": 0.7676000308288463, "learning_rate": 3.891962463947125e-06, "loss": 0.2247, "step": 17019 }, { "epoch": 0.5840768702814001, "grad_norm": 0.7252684549042843, "learning_rate": 3.891420549702191e-06, "loss": 0.3029, "step": 17020 }, { "epoch": 0.584111187371311, "grad_norm": 0.7957406108499409, "learning_rate": 3.890878649152463e-06, "loss": 0.2561, "step": 17021 }, { "epoch": 0.5841455044612217, "grad_norm": 0.8097516175987944, "learning_rate": 3.890336762304634e-06, "loss": 0.2383, "step": 17022 }, { "epoch": 0.5841798215511325, "grad_norm": 0.792588256433346, "learning_rate": 3.889794889165403e-06, "loss": 0.2636, "step": 17023 }, { "epoch": 0.5842141386410432, "grad_norm": 0.7403640193445576, "learning_rate": 3.8892530297414596e-06, "loss": 0.3096, "step": 17024 }, { "epoch": 0.584248455730954, "grad_norm": 0.841661073411607, "learning_rate": 3.888711184039501e-06, "loss": 0.3205, "step": 17025 }, { "epoch": 0.5842827728208648, "grad_norm": 0.7266583168321903, "learning_rate": 3.88816935206622e-06, "loss": 0.2668, "step": 17026 }, { "epoch": 0.5843170899107756, "grad_norm": 0.7806022468756635, "learning_rate": 3.887627533828312e-06, "loss": 0.2294, "step": 17027 }, { "epoch": 0.5843514070006863, "grad_norm": 0.6958214921682428, "learning_rate": 3.887085729332466e-06, "loss": 0.2657, "step": 17028 }, { "epoch": 0.5843857240905971, "grad_norm": 0.7899228971195406, "learning_rate": 3.886543938585381e-06, "loss": 0.2807, "step": 17029 }, { "epoch": 0.5844200411805079, "grad_norm": 0.7842356530429169, "learning_rate": 3.886002161593747e-06, "loss": 0.2874, "step": 17030 }, { "epoch": 0.5844543582704187, "grad_norm": 0.8985143262875919, "learning_rate": 3.8854603983642555e-06, "loss": 0.294, "step": 17031 }, { "epoch": 0.5844886753603294, "grad_norm": 0.7654959672924635, "learning_rate": 3.884918648903603e-06, "loss": 0.3021, "step": 17032 }, { "epoch": 0.5845229924502402, "grad_norm": 0.7834765620929165, "learning_rate": 3.88437691321848e-06, "loss": 0.2377, "step": 17033 }, { "epoch": 0.584557309540151, "grad_norm": 0.7083585577280079, "learning_rate": 3.8838351913155785e-06, "loss": 0.2809, "step": 17034 }, { "epoch": 0.5845916266300618, "grad_norm": 0.6892698279118866, "learning_rate": 3.883293483201594e-06, "loss": 0.2796, "step": 17035 }, { "epoch": 0.5846259437199726, "grad_norm": 0.8452248229420118, "learning_rate": 3.882751788883217e-06, "loss": 0.3125, "step": 17036 }, { "epoch": 0.5846602608098833, "grad_norm": 0.7523224080800442, "learning_rate": 3.882210108367136e-06, "loss": 0.3083, "step": 17037 }, { "epoch": 0.5846945778997941, "grad_norm": 0.9260209257848961, "learning_rate": 3.881668441660048e-06, "loss": 0.3009, "step": 17038 }, { "epoch": 0.5847288949897049, "grad_norm": 0.731515402000625, "learning_rate": 3.881126788768642e-06, "loss": 0.2408, "step": 17039 }, { "epoch": 0.5847632120796157, "grad_norm": 0.7211980824622557, "learning_rate": 3.8805851496996085e-06, "loss": 0.2645, "step": 17040 }, { "epoch": 0.5847975291695264, "grad_norm": 0.6775147126036278, "learning_rate": 3.880043524459643e-06, "loss": 0.2624, "step": 17041 }, { "epoch": 0.5848318462594372, "grad_norm": 0.706130249570022, "learning_rate": 3.8795019130554344e-06, "loss": 0.2742, "step": 17042 }, { "epoch": 0.5848661633493479, "grad_norm": 0.8859452261611894, "learning_rate": 3.878960315493671e-06, "loss": 0.2581, "step": 17043 }, { "epoch": 0.5849004804392588, "grad_norm": 0.7598808898287888, "learning_rate": 3.878418731781048e-06, "loss": 0.3243, "step": 17044 }, { "epoch": 0.5849347975291695, "grad_norm": 0.6995077184279226, "learning_rate": 3.877877161924252e-06, "loss": 0.2722, "step": 17045 }, { "epoch": 0.5849691146190803, "grad_norm": 0.7233482448389947, "learning_rate": 3.877335605929976e-06, "loss": 0.2506, "step": 17046 }, { "epoch": 0.585003431708991, "grad_norm": 0.8249046034189691, "learning_rate": 3.876794063804909e-06, "loss": 0.2527, "step": 17047 }, { "epoch": 0.5850377487989018, "grad_norm": 0.8704111188550278, "learning_rate": 3.876252535555742e-06, "loss": 0.2578, "step": 17048 }, { "epoch": 0.5850720658888127, "grad_norm": 0.7685389915469736, "learning_rate": 3.875711021189166e-06, "loss": 0.2018, "step": 17049 }, { "epoch": 0.5851063829787234, "grad_norm": 0.7781312470852068, "learning_rate": 3.87516952071187e-06, "loss": 0.3155, "step": 17050 }, { "epoch": 0.5851407000686342, "grad_norm": 0.7778233416185434, "learning_rate": 3.874628034130541e-06, "loss": 0.3118, "step": 17051 }, { "epoch": 0.5851750171585449, "grad_norm": 0.7824843985032012, "learning_rate": 3.874086561451872e-06, "loss": 0.2391, "step": 17052 }, { "epoch": 0.5852093342484558, "grad_norm": 0.7935597562724134, "learning_rate": 3.873545102682551e-06, "loss": 0.2998, "step": 17053 }, { "epoch": 0.5852436513383665, "grad_norm": 0.8472567498156406, "learning_rate": 3.8730036578292665e-06, "loss": 0.2558, "step": 17054 }, { "epoch": 0.5852779684282773, "grad_norm": 0.7907949662334949, "learning_rate": 3.872462226898707e-06, "loss": 0.2782, "step": 17055 }, { "epoch": 0.585312285518188, "grad_norm": 0.8383415197189121, "learning_rate": 3.871920809897564e-06, "loss": 0.2599, "step": 17056 }, { "epoch": 0.5853466026080988, "grad_norm": 0.7378184697267569, "learning_rate": 3.8713794068325215e-06, "loss": 0.2574, "step": 17057 }, { "epoch": 0.5853809196980096, "grad_norm": 0.837624561598277, "learning_rate": 3.870838017710273e-06, "loss": 0.2688, "step": 17058 }, { "epoch": 0.5854152367879204, "grad_norm": 0.7388869142770438, "learning_rate": 3.870296642537505e-06, "loss": 0.2883, "step": 17059 }, { "epoch": 0.5854495538778312, "grad_norm": 0.7154213352931512, "learning_rate": 3.8697552813209015e-06, "loss": 0.3003, "step": 17060 }, { "epoch": 0.5854838709677419, "grad_norm": 0.7634314371693062, "learning_rate": 3.869213934067156e-06, "loss": 0.2677, "step": 17061 }, { "epoch": 0.5855181880576528, "grad_norm": 0.7591059714638115, "learning_rate": 3.8686726007829545e-06, "loss": 0.2481, "step": 17062 }, { "epoch": 0.5855525051475635, "grad_norm": 0.8282371780120084, "learning_rate": 3.868131281474982e-06, "loss": 0.3399, "step": 17063 }, { "epoch": 0.5855868222374743, "grad_norm": 0.885332037398398, "learning_rate": 3.86758997614993e-06, "loss": 0.3312, "step": 17064 }, { "epoch": 0.585621139327385, "grad_norm": 0.9207202284953727, "learning_rate": 3.8670486848144836e-06, "loss": 0.2733, "step": 17065 }, { "epoch": 0.5856554564172958, "grad_norm": 0.8785592695153912, "learning_rate": 3.866507407475327e-06, "loss": 0.3042, "step": 17066 }, { "epoch": 0.5856897735072066, "grad_norm": 0.7811386165331154, "learning_rate": 3.865966144139153e-06, "loss": 0.2951, "step": 17067 }, { "epoch": 0.5857240905971174, "grad_norm": 0.7800356405425202, "learning_rate": 3.865424894812644e-06, "loss": 0.2741, "step": 17068 }, { "epoch": 0.5857584076870281, "grad_norm": 0.8061295617752289, "learning_rate": 3.864883659502486e-06, "loss": 0.2456, "step": 17069 }, { "epoch": 0.5857927247769389, "grad_norm": 0.8485452341415839, "learning_rate": 3.864342438215369e-06, "loss": 0.3008, "step": 17070 }, { "epoch": 0.5858270418668496, "grad_norm": 0.7615374935495135, "learning_rate": 3.863801230957977e-06, "loss": 0.2631, "step": 17071 }, { "epoch": 0.5858613589567605, "grad_norm": 0.7086212508450744, "learning_rate": 3.863260037736995e-06, "loss": 0.2602, "step": 17072 }, { "epoch": 0.5858956760466713, "grad_norm": 0.7552319489116603, "learning_rate": 3.862718858559111e-06, "loss": 0.236, "step": 17073 }, { "epoch": 0.585929993136582, "grad_norm": 0.7690745466024259, "learning_rate": 3.8621776934310105e-06, "loss": 0.2739, "step": 17074 }, { "epoch": 0.5859643102264928, "grad_norm": 0.7990467055495857, "learning_rate": 3.861636542359375e-06, "loss": 0.2895, "step": 17075 }, { "epoch": 0.5859986273164036, "grad_norm": 0.8190739912092037, "learning_rate": 3.861095405350894e-06, "loss": 0.2824, "step": 17076 }, { "epoch": 0.5860329444063144, "grad_norm": 0.7302656983434217, "learning_rate": 3.860554282412253e-06, "loss": 0.2724, "step": 17077 }, { "epoch": 0.5860672614962251, "grad_norm": 0.7871333953486943, "learning_rate": 3.860013173550133e-06, "loss": 0.3218, "step": 17078 }, { "epoch": 0.5861015785861359, "grad_norm": 0.7416804726413044, "learning_rate": 3.859472078771223e-06, "loss": 0.2977, "step": 17079 }, { "epoch": 0.5861358956760466, "grad_norm": 0.7756412900631228, "learning_rate": 3.858930998082204e-06, "loss": 0.2698, "step": 17080 }, { "epoch": 0.5861702127659575, "grad_norm": 0.8380468048207796, "learning_rate": 3.858389931489764e-06, "loss": 0.2799, "step": 17081 }, { "epoch": 0.5862045298558682, "grad_norm": 0.9524228388875136, "learning_rate": 3.857848879000587e-06, "loss": 0.3038, "step": 17082 }, { "epoch": 0.586238846945779, "grad_norm": 0.7531995194771667, "learning_rate": 3.857307840621351e-06, "loss": 0.2129, "step": 17083 }, { "epoch": 0.5862731640356897, "grad_norm": 0.7393200961417851, "learning_rate": 3.856766816358747e-06, "loss": 0.2424, "step": 17084 }, { "epoch": 0.5863074811256005, "grad_norm": 0.831762900878347, "learning_rate": 3.8562258062194555e-06, "loss": 0.3229, "step": 17085 }, { "epoch": 0.5863417982155114, "grad_norm": 0.6870030412778415, "learning_rate": 3.85568481021016e-06, "loss": 0.2772, "step": 17086 }, { "epoch": 0.5863761153054221, "grad_norm": 0.7005156578548211, "learning_rate": 3.855143828337547e-06, "loss": 0.2961, "step": 17087 }, { "epoch": 0.5864104323953329, "grad_norm": 0.7880994380675237, "learning_rate": 3.854602860608296e-06, "loss": 0.34, "step": 17088 }, { "epoch": 0.5864447494852436, "grad_norm": 0.808536496460415, "learning_rate": 3.85406190702909e-06, "loss": 0.2728, "step": 17089 }, { "epoch": 0.5864790665751545, "grad_norm": 0.7671202906493013, "learning_rate": 3.8535209676066145e-06, "loss": 0.2488, "step": 17090 }, { "epoch": 0.5865133836650652, "grad_norm": 0.7781459966780586, "learning_rate": 3.85298004234755e-06, "loss": 0.2935, "step": 17091 }, { "epoch": 0.586547700754976, "grad_norm": 0.7718211947077878, "learning_rate": 3.852439131258578e-06, "loss": 0.283, "step": 17092 }, { "epoch": 0.5865820178448867, "grad_norm": 0.7721635523031728, "learning_rate": 3.851898234346386e-06, "loss": 0.2809, "step": 17093 }, { "epoch": 0.5866163349347975, "grad_norm": 0.7331903969635805, "learning_rate": 3.851357351617653e-06, "loss": 0.304, "step": 17094 }, { "epoch": 0.5866506520247083, "grad_norm": 0.8667513684651433, "learning_rate": 3.850816483079057e-06, "loss": 0.2838, "step": 17095 }, { "epoch": 0.5866849691146191, "grad_norm": 0.8693956139407739, "learning_rate": 3.850275628737287e-06, "loss": 0.2473, "step": 17096 }, { "epoch": 0.5867192862045298, "grad_norm": 0.8334863491014375, "learning_rate": 3.84973478859902e-06, "loss": 0.3421, "step": 17097 }, { "epoch": 0.5867536032944406, "grad_norm": 0.7456994698581322, "learning_rate": 3.849193962670938e-06, "loss": 0.3453, "step": 17098 }, { "epoch": 0.5867879203843515, "grad_norm": 0.7687479767073389, "learning_rate": 3.848653150959723e-06, "loss": 0.2573, "step": 17099 }, { "epoch": 0.5868222374742622, "grad_norm": 0.7372807321815684, "learning_rate": 3.848112353472057e-06, "loss": 0.2381, "step": 17100 }, { "epoch": 0.586856554564173, "grad_norm": 0.7813111431930131, "learning_rate": 3.847571570214618e-06, "loss": 0.264, "step": 17101 }, { "epoch": 0.5868908716540837, "grad_norm": 0.7997073546580736, "learning_rate": 3.847030801194091e-06, "loss": 0.2604, "step": 17102 }, { "epoch": 0.5869251887439945, "grad_norm": 0.8303119613877888, "learning_rate": 3.846490046417154e-06, "loss": 0.3223, "step": 17103 }, { "epoch": 0.5869595058339053, "grad_norm": 0.7614387752239322, "learning_rate": 3.845949305890485e-06, "loss": 0.2451, "step": 17104 }, { "epoch": 0.5869938229238161, "grad_norm": 0.820108976936511, "learning_rate": 3.84540857962077e-06, "loss": 0.2523, "step": 17105 }, { "epoch": 0.5870281400137268, "grad_norm": 0.8597450466885018, "learning_rate": 3.844867867614684e-06, "loss": 0.2963, "step": 17106 }, { "epoch": 0.5870624571036376, "grad_norm": 0.7079672514975126, "learning_rate": 3.844327169878907e-06, "loss": 0.2825, "step": 17107 }, { "epoch": 0.5870967741935483, "grad_norm": 0.8891673782356937, "learning_rate": 3.843786486420124e-06, "loss": 0.2473, "step": 17108 }, { "epoch": 0.5871310912834592, "grad_norm": 0.8360072645091975, "learning_rate": 3.843245817245011e-06, "loss": 0.3185, "step": 17109 }, { "epoch": 0.58716540837337, "grad_norm": 0.6965828159845362, "learning_rate": 3.842705162360244e-06, "loss": 0.2386, "step": 17110 }, { "epoch": 0.5871997254632807, "grad_norm": 0.7664398659964523, "learning_rate": 3.842164521772508e-06, "loss": 0.2671, "step": 17111 }, { "epoch": 0.5872340425531914, "grad_norm": 0.7411062558409426, "learning_rate": 3.841623895488478e-06, "loss": 0.2357, "step": 17112 }, { "epoch": 0.5872683596431023, "grad_norm": 0.8092614324350638, "learning_rate": 3.841083283514835e-06, "loss": 0.3155, "step": 17113 }, { "epoch": 0.5873026767330131, "grad_norm": 0.680686026864754, "learning_rate": 3.840542685858257e-06, "loss": 0.2517, "step": 17114 }, { "epoch": 0.5873369938229238, "grad_norm": 0.7522720446368317, "learning_rate": 3.840002102525421e-06, "loss": 0.2643, "step": 17115 }, { "epoch": 0.5873713109128346, "grad_norm": 0.8246002237926974, "learning_rate": 3.8394615335230085e-06, "loss": 0.2709, "step": 17116 }, { "epoch": 0.5874056280027453, "grad_norm": 0.7396842056875094, "learning_rate": 3.838920978857696e-06, "loss": 0.2803, "step": 17117 }, { "epoch": 0.5874399450926562, "grad_norm": 1.0306423654755255, "learning_rate": 3.838380438536158e-06, "loss": 0.2647, "step": 17118 }, { "epoch": 0.5874742621825669, "grad_norm": 0.8192176310050018, "learning_rate": 3.837839912565078e-06, "loss": 0.3188, "step": 17119 }, { "epoch": 0.5875085792724777, "grad_norm": 0.7277644138650947, "learning_rate": 3.837299400951131e-06, "loss": 0.2339, "step": 17120 }, { "epoch": 0.5875428963623884, "grad_norm": 0.8006860518978054, "learning_rate": 3.836758903700993e-06, "loss": 0.2611, "step": 17121 }, { "epoch": 0.5875772134522993, "grad_norm": 0.9425553398459288, "learning_rate": 3.836218420821343e-06, "loss": 0.283, "step": 17122 }, { "epoch": 0.58761153054221, "grad_norm": 0.7858324956219765, "learning_rate": 3.835677952318859e-06, "loss": 0.2435, "step": 17123 }, { "epoch": 0.5876458476321208, "grad_norm": 0.7350469055613366, "learning_rate": 3.835137498200213e-06, "loss": 0.2407, "step": 17124 }, { "epoch": 0.5876801647220316, "grad_norm": 0.7425703641269169, "learning_rate": 3.8345970584720885e-06, "loss": 0.2667, "step": 17125 }, { "epoch": 0.5877144818119423, "grad_norm": 0.7936533201736187, "learning_rate": 3.834056633141159e-06, "loss": 0.3479, "step": 17126 }, { "epoch": 0.5877487989018532, "grad_norm": 0.7406757229857563, "learning_rate": 3.833516222214097e-06, "loss": 0.2796, "step": 17127 }, { "epoch": 0.5877831159917639, "grad_norm": 0.804409927651068, "learning_rate": 3.8329758256975836e-06, "loss": 0.3173, "step": 17128 }, { "epoch": 0.5878174330816747, "grad_norm": 0.7599250367019937, "learning_rate": 3.832435443598295e-06, "loss": 0.2738, "step": 17129 }, { "epoch": 0.5878517501715854, "grad_norm": 0.7121057382154399, "learning_rate": 3.831895075922902e-06, "loss": 0.2274, "step": 17130 }, { "epoch": 0.5878860672614962, "grad_norm": 0.8160148167629578, "learning_rate": 3.8313547226780864e-06, "loss": 0.3006, "step": 17131 }, { "epoch": 0.587920384351407, "grad_norm": 0.8054192078511283, "learning_rate": 3.83081438387052e-06, "loss": 0.2746, "step": 17132 }, { "epoch": 0.5879547014413178, "grad_norm": 0.845197232085027, "learning_rate": 3.830274059506878e-06, "loss": 0.3436, "step": 17133 }, { "epoch": 0.5879890185312285, "grad_norm": 0.7829959939062681, "learning_rate": 3.829733749593837e-06, "loss": 0.3184, "step": 17134 }, { "epoch": 0.5880233356211393, "grad_norm": 0.7553896276789879, "learning_rate": 3.8291934541380705e-06, "loss": 0.2639, "step": 17135 }, { "epoch": 0.5880576527110501, "grad_norm": 0.753196190817595, "learning_rate": 3.828653173146253e-06, "loss": 0.3108, "step": 17136 }, { "epoch": 0.5880919698009609, "grad_norm": 0.8596862736013317, "learning_rate": 3.828112906625061e-06, "loss": 0.2933, "step": 17137 }, { "epoch": 0.5881262868908717, "grad_norm": 0.8376017819568992, "learning_rate": 3.8275726545811685e-06, "loss": 0.2817, "step": 17138 }, { "epoch": 0.5881606039807824, "grad_norm": 0.7336872364052086, "learning_rate": 3.827032417021247e-06, "loss": 0.2696, "step": 17139 }, { "epoch": 0.5881949210706932, "grad_norm": 0.8646939474869759, "learning_rate": 3.826492193951975e-06, "loss": 0.2204, "step": 17140 }, { "epoch": 0.588229238160604, "grad_norm": 0.9585599907025022, "learning_rate": 3.825951985380023e-06, "loss": 0.2488, "step": 17141 }, { "epoch": 0.5882635552505148, "grad_norm": 0.8609954696152591, "learning_rate": 3.825411791312064e-06, "loss": 0.255, "step": 17142 }, { "epoch": 0.5882978723404255, "grad_norm": 0.75479324817224, "learning_rate": 3.8248716117547725e-06, "loss": 0.2573, "step": 17143 }, { "epoch": 0.5883321894303363, "grad_norm": 0.7868290007470844, "learning_rate": 3.824331446714824e-06, "loss": 0.3017, "step": 17144 }, { "epoch": 0.5883665065202471, "grad_norm": 0.7899362781601682, "learning_rate": 3.823791296198888e-06, "loss": 0.2623, "step": 17145 }, { "epoch": 0.5884008236101579, "grad_norm": 0.8636530840902886, "learning_rate": 3.823251160213641e-06, "loss": 0.2791, "step": 17146 }, { "epoch": 0.5884351407000686, "grad_norm": 0.7779271000036393, "learning_rate": 3.822711038765752e-06, "loss": 0.3639, "step": 17147 }, { "epoch": 0.5884694577899794, "grad_norm": 0.7540206641951382, "learning_rate": 3.822170931861898e-06, "loss": 0.2579, "step": 17148 }, { "epoch": 0.5885037748798901, "grad_norm": 0.7091900363136607, "learning_rate": 3.821630839508749e-06, "loss": 0.2543, "step": 17149 }, { "epoch": 0.588538091969801, "grad_norm": 0.7398781841053259, "learning_rate": 3.821090761712974e-06, "loss": 0.2566, "step": 17150 }, { "epoch": 0.5885724090597118, "grad_norm": 0.8477391889948368, "learning_rate": 3.820550698481249e-06, "loss": 0.2528, "step": 17151 }, { "epoch": 0.5886067261496225, "grad_norm": 0.7943594763996817, "learning_rate": 3.820010649820248e-06, "loss": 0.2866, "step": 17152 }, { "epoch": 0.5886410432395333, "grad_norm": 0.7899082269041978, "learning_rate": 3.819470615736636e-06, "loss": 0.2962, "step": 17153 }, { "epoch": 0.588675360329444, "grad_norm": 0.6933336165942869, "learning_rate": 3.81893059623709e-06, "loss": 0.2563, "step": 17154 }, { "epoch": 0.5887096774193549, "grad_norm": 0.7594826979781848, "learning_rate": 3.818390591328281e-06, "loss": 0.2242, "step": 17155 }, { "epoch": 0.5887439945092656, "grad_norm": 0.7442636000469153, "learning_rate": 3.817850601016876e-06, "loss": 0.2491, "step": 17156 }, { "epoch": 0.5887783115991764, "grad_norm": 0.657936752198367, "learning_rate": 3.81731062530955e-06, "loss": 0.2603, "step": 17157 }, { "epoch": 0.5888126286890871, "grad_norm": 0.7852081494299482, "learning_rate": 3.816770664212971e-06, "loss": 0.2572, "step": 17158 }, { "epoch": 0.588846945778998, "grad_norm": 0.7454938700783176, "learning_rate": 3.816230717733811e-06, "loss": 0.2655, "step": 17159 }, { "epoch": 0.5888812628689087, "grad_norm": 0.7623166471775207, "learning_rate": 3.815690785878742e-06, "loss": 0.2533, "step": 17160 }, { "epoch": 0.5889155799588195, "grad_norm": 0.8572054649062888, "learning_rate": 3.8151508686544325e-06, "loss": 0.2502, "step": 17161 }, { "epoch": 0.5889498970487302, "grad_norm": 0.74383775747492, "learning_rate": 3.8146109660675513e-06, "loss": 0.2629, "step": 17162 }, { "epoch": 0.588984214138641, "grad_norm": 0.7259203013656133, "learning_rate": 3.8140710781247704e-06, "loss": 0.2602, "step": 17163 }, { "epoch": 0.5890185312285519, "grad_norm": 0.7647329618419672, "learning_rate": 3.8135312048327584e-06, "loss": 0.2512, "step": 17164 }, { "epoch": 0.5890528483184626, "grad_norm": 0.7740411687875605, "learning_rate": 3.812991346198185e-06, "loss": 0.294, "step": 17165 }, { "epoch": 0.5890871654083734, "grad_norm": 0.722440102212734, "learning_rate": 3.8124515022277193e-06, "loss": 0.2907, "step": 17166 }, { "epoch": 0.5891214824982841, "grad_norm": 0.873624838856684, "learning_rate": 3.811911672928032e-06, "loss": 0.237, "step": 17167 }, { "epoch": 0.589155799588195, "grad_norm": 0.8005021460233728, "learning_rate": 3.8113718583057885e-06, "loss": 0.329, "step": 17168 }, { "epoch": 0.5891901166781057, "grad_norm": 0.7772968386237631, "learning_rate": 3.8108320583676623e-06, "loss": 0.2776, "step": 17169 }, { "epoch": 0.5892244337680165, "grad_norm": 0.6492868770948211, "learning_rate": 3.8102922731203198e-06, "loss": 0.2735, "step": 17170 }, { "epoch": 0.5892587508579272, "grad_norm": 0.7883663435590395, "learning_rate": 3.809752502570426e-06, "loss": 0.2522, "step": 17171 }, { "epoch": 0.589293067947838, "grad_norm": 0.808704517354971, "learning_rate": 3.8092127467246542e-06, "loss": 0.2908, "step": 17172 }, { "epoch": 0.5893273850377488, "grad_norm": 0.7891827109846522, "learning_rate": 3.808673005589672e-06, "loss": 0.2601, "step": 17173 }, { "epoch": 0.5893617021276596, "grad_norm": 0.7880741252946489, "learning_rate": 3.808133279172143e-06, "loss": 0.2679, "step": 17174 }, { "epoch": 0.5893960192175703, "grad_norm": 0.7895959524574548, "learning_rate": 3.80759356747874e-06, "loss": 0.3313, "step": 17175 }, { "epoch": 0.5894303363074811, "grad_norm": 0.700587672300498, "learning_rate": 3.8070538705161265e-06, "loss": 0.2612, "step": 17176 }, { "epoch": 0.5894646533973918, "grad_norm": 0.8535925280767025, "learning_rate": 3.806514188290974e-06, "loss": 0.2971, "step": 17177 }, { "epoch": 0.5894989704873027, "grad_norm": 0.7485562181712164, "learning_rate": 3.8059745208099464e-06, "loss": 0.2604, "step": 17178 }, { "epoch": 0.5895332875772135, "grad_norm": 0.7635819329923383, "learning_rate": 3.80543486807971e-06, "loss": 0.3351, "step": 17179 }, { "epoch": 0.5895676046671242, "grad_norm": 0.6974856663287351, "learning_rate": 3.8048952301069346e-06, "loss": 0.2183, "step": 17180 }, { "epoch": 0.589601921757035, "grad_norm": 0.6946558558975104, "learning_rate": 3.8043556068982844e-06, "loss": 0.2654, "step": 17181 }, { "epoch": 0.5896362388469458, "grad_norm": 0.7836815105760087, "learning_rate": 3.803815998460426e-06, "loss": 0.3136, "step": 17182 }, { "epoch": 0.5896705559368566, "grad_norm": 0.8186369632808022, "learning_rate": 3.803276404800028e-06, "loss": 0.2702, "step": 17183 }, { "epoch": 0.5897048730267673, "grad_norm": 0.7830982484576999, "learning_rate": 3.8027368259237556e-06, "loss": 0.2652, "step": 17184 }, { "epoch": 0.5897391901166781, "grad_norm": 0.8055216095439632, "learning_rate": 3.8021972618382707e-06, "loss": 0.3076, "step": 17185 }, { "epoch": 0.5897735072065888, "grad_norm": 0.8982424778049866, "learning_rate": 3.801657712550245e-06, "loss": 0.3313, "step": 17186 }, { "epoch": 0.5898078242964997, "grad_norm": 0.7099786369728095, "learning_rate": 3.8011181780663396e-06, "loss": 0.2568, "step": 17187 }, { "epoch": 0.5898421413864104, "grad_norm": 0.8139978227161764, "learning_rate": 3.8005786583932215e-06, "loss": 0.2971, "step": 17188 }, { "epoch": 0.5898764584763212, "grad_norm": 0.669947023716907, "learning_rate": 3.800039153537556e-06, "loss": 0.2911, "step": 17189 }, { "epoch": 0.589910775566232, "grad_norm": 0.7228064522818404, "learning_rate": 3.799499663506008e-06, "loss": 0.2331, "step": 17190 }, { "epoch": 0.5899450926561428, "grad_norm": 0.8723066806141123, "learning_rate": 3.7989601883052406e-06, "loss": 0.3343, "step": 17191 }, { "epoch": 0.5899794097460536, "grad_norm": 0.6552666299361585, "learning_rate": 3.7984207279419216e-06, "loss": 0.2438, "step": 17192 }, { "epoch": 0.5900137268359643, "grad_norm": 0.7679933312041938, "learning_rate": 3.797881282422714e-06, "loss": 0.3058, "step": 17193 }, { "epoch": 0.5900480439258751, "grad_norm": 0.8560053775857065, "learning_rate": 3.7973418517542794e-06, "loss": 0.2621, "step": 17194 }, { "epoch": 0.5900823610157858, "grad_norm": 0.8838097567165267, "learning_rate": 3.796802435943285e-06, "loss": 0.272, "step": 17195 }, { "epoch": 0.5901166781056967, "grad_norm": 0.7091995115279461, "learning_rate": 3.796263034996395e-06, "loss": 0.2312, "step": 17196 }, { "epoch": 0.5901509951956074, "grad_norm": 0.7903375099681876, "learning_rate": 3.795723648920269e-06, "loss": 0.3074, "step": 17197 }, { "epoch": 0.5901853122855182, "grad_norm": 0.860721272213532, "learning_rate": 3.7951842777215752e-06, "loss": 0.3006, "step": 17198 }, { "epoch": 0.5902196293754289, "grad_norm": 0.8107473582413994, "learning_rate": 3.794644921406976e-06, "loss": 0.2505, "step": 17199 }, { "epoch": 0.5902539464653397, "grad_norm": 0.8601093023824655, "learning_rate": 3.7941055799831306e-06, "loss": 0.326, "step": 17200 }, { "epoch": 0.5902882635552505, "grad_norm": 0.802762758443978, "learning_rate": 3.7935662534567073e-06, "loss": 0.2936, "step": 17201 }, { "epoch": 0.5903225806451613, "grad_norm": 0.8740473596378515, "learning_rate": 3.7930269418343653e-06, "loss": 0.2858, "step": 17202 }, { "epoch": 0.590356897735072, "grad_norm": 0.8845509110495734, "learning_rate": 3.792487645122767e-06, "loss": 0.2773, "step": 17203 }, { "epoch": 0.5903912148249828, "grad_norm": 0.7839146061610018, "learning_rate": 3.791948363328578e-06, "loss": 0.2778, "step": 17204 }, { "epoch": 0.5904255319148937, "grad_norm": 0.8102683615468116, "learning_rate": 3.791409096458457e-06, "loss": 0.282, "step": 17205 }, { "epoch": 0.5904598490048044, "grad_norm": 0.7083384689859838, "learning_rate": 3.7908698445190693e-06, "loss": 0.2289, "step": 17206 }, { "epoch": 0.5904941660947152, "grad_norm": 0.7415429504761509, "learning_rate": 3.7903306075170748e-06, "loss": 0.2457, "step": 17207 }, { "epoch": 0.5905284831846259, "grad_norm": 0.71719080874841, "learning_rate": 3.7897913854591335e-06, "loss": 0.2256, "step": 17208 }, { "epoch": 0.5905628002745367, "grad_norm": 0.7386165245225512, "learning_rate": 3.7892521783519103e-06, "loss": 0.2748, "step": 17209 }, { "epoch": 0.5905971173644475, "grad_norm": 0.7715396570949152, "learning_rate": 3.7887129862020645e-06, "loss": 0.3387, "step": 17210 }, { "epoch": 0.5906314344543583, "grad_norm": 0.7232295852800507, "learning_rate": 3.7881738090162556e-06, "loss": 0.2542, "step": 17211 }, { "epoch": 0.590665751544269, "grad_norm": 0.7106318149763013, "learning_rate": 3.7876346468011492e-06, "loss": 0.2409, "step": 17212 }, { "epoch": 0.5907000686341798, "grad_norm": 0.8807429129716066, "learning_rate": 3.7870954995634035e-06, "loss": 0.3435, "step": 17213 }, { "epoch": 0.5907343857240906, "grad_norm": 0.6729523708812987, "learning_rate": 3.786556367309676e-06, "loss": 0.2485, "step": 17214 }, { "epoch": 0.5907687028140014, "grad_norm": 0.780027327033979, "learning_rate": 3.7860172500466325e-06, "loss": 0.3427, "step": 17215 }, { "epoch": 0.5908030199039122, "grad_norm": 0.822779231374258, "learning_rate": 3.78547814778093e-06, "loss": 0.294, "step": 17216 }, { "epoch": 0.5908373369938229, "grad_norm": 0.7398043197121233, "learning_rate": 3.784939060519227e-06, "loss": 0.2466, "step": 17217 }, { "epoch": 0.5908716540837337, "grad_norm": 0.7231816838355986, "learning_rate": 3.784399988268187e-06, "loss": 0.277, "step": 17218 }, { "epoch": 0.5909059711736445, "grad_norm": 0.8301221682667584, "learning_rate": 3.783860931034468e-06, "loss": 0.2607, "step": 17219 }, { "epoch": 0.5909402882635553, "grad_norm": 0.742427184342318, "learning_rate": 3.783321888824728e-06, "loss": 0.2687, "step": 17220 }, { "epoch": 0.590974605353466, "grad_norm": 0.7331505362492798, "learning_rate": 3.7827828616456293e-06, "loss": 0.2427, "step": 17221 }, { "epoch": 0.5910089224433768, "grad_norm": 0.8147522874956098, "learning_rate": 3.782243849503829e-06, "loss": 0.2828, "step": 17222 }, { "epoch": 0.5910432395332875, "grad_norm": 0.9460568021612556, "learning_rate": 3.781704852405984e-06, "loss": 0.3084, "step": 17223 }, { "epoch": 0.5910775566231984, "grad_norm": 0.6921617121183238, "learning_rate": 3.781165870358758e-06, "loss": 0.2622, "step": 17224 }, { "epoch": 0.5911118737131091, "grad_norm": 0.9030781883342903, "learning_rate": 3.7806269033688046e-06, "loss": 0.307, "step": 17225 }, { "epoch": 0.5911461908030199, "grad_norm": 0.8591640132588735, "learning_rate": 3.780087951442784e-06, "loss": 0.3552, "step": 17226 }, { "epoch": 0.5911805078929306, "grad_norm": 0.9344715563418817, "learning_rate": 3.779549014587356e-06, "loss": 0.2762, "step": 17227 }, { "epoch": 0.5912148249828415, "grad_norm": 0.8239758091875947, "learning_rate": 3.7790100928091773e-06, "loss": 0.2594, "step": 17228 }, { "epoch": 0.5912491420727523, "grad_norm": 0.6942798184470116, "learning_rate": 3.778471186114903e-06, "loss": 0.2554, "step": 17229 }, { "epoch": 0.591283459162663, "grad_norm": 0.7849583678195329, "learning_rate": 3.7779322945111957e-06, "loss": 0.2917, "step": 17230 }, { "epoch": 0.5913177762525738, "grad_norm": 0.844519593409405, "learning_rate": 3.7773934180047083e-06, "loss": 0.3065, "step": 17231 }, { "epoch": 0.5913520933424845, "grad_norm": 0.7308050048884748, "learning_rate": 3.7768545566021004e-06, "loss": 0.2443, "step": 17232 }, { "epoch": 0.5913864104323954, "grad_norm": 0.8872192045011725, "learning_rate": 3.776315710310028e-06, "loss": 0.2615, "step": 17233 }, { "epoch": 0.5914207275223061, "grad_norm": 0.7197989805900666, "learning_rate": 3.77577687913515e-06, "loss": 0.281, "step": 17234 }, { "epoch": 0.5914550446122169, "grad_norm": 0.7583244151129334, "learning_rate": 3.7752380630841184e-06, "loss": 0.2632, "step": 17235 }, { "epoch": 0.5914893617021276, "grad_norm": 0.8019825797733688, "learning_rate": 3.7746992621635952e-06, "loss": 0.24, "step": 17236 }, { "epoch": 0.5915236787920385, "grad_norm": 0.7594871410642718, "learning_rate": 3.774160476380232e-06, "loss": 0.2714, "step": 17237 }, { "epoch": 0.5915579958819492, "grad_norm": 0.7692251437370089, "learning_rate": 3.773621705740689e-06, "loss": 0.2973, "step": 17238 }, { "epoch": 0.59159231297186, "grad_norm": 0.7121969230696285, "learning_rate": 3.7730829502516185e-06, "loss": 0.2606, "step": 17239 }, { "epoch": 0.5916266300617707, "grad_norm": 0.819334576799016, "learning_rate": 3.7725442099196784e-06, "loss": 0.3262, "step": 17240 }, { "epoch": 0.5916609471516815, "grad_norm": 0.7888640834826203, "learning_rate": 3.7720054847515232e-06, "loss": 0.2619, "step": 17241 }, { "epoch": 0.5916952642415924, "grad_norm": 0.7861002247062332, "learning_rate": 3.771466774753809e-06, "loss": 0.2401, "step": 17242 }, { "epoch": 0.5917295813315031, "grad_norm": 0.8862741196452796, "learning_rate": 3.7709280799331894e-06, "loss": 0.2602, "step": 17243 }, { "epoch": 0.5917638984214139, "grad_norm": 0.8808860054963183, "learning_rate": 3.7703894002963214e-06, "loss": 0.2961, "step": 17244 }, { "epoch": 0.5917982155113246, "grad_norm": 0.7823862588843169, "learning_rate": 3.7698507358498597e-06, "loss": 0.2919, "step": 17245 }, { "epoch": 0.5918325326012354, "grad_norm": 0.678314664538904, "learning_rate": 3.769312086600456e-06, "loss": 0.2441, "step": 17246 }, { "epoch": 0.5918668496911462, "grad_norm": 0.8420947974056884, "learning_rate": 3.768773452554767e-06, "loss": 0.2826, "step": 17247 }, { "epoch": 0.591901166781057, "grad_norm": 0.749981824251271, "learning_rate": 3.7682348337194476e-06, "loss": 0.3087, "step": 17248 }, { "epoch": 0.5919354838709677, "grad_norm": 0.7026415862226095, "learning_rate": 3.767696230101149e-06, "loss": 0.2892, "step": 17249 }, { "epoch": 0.5919698009608785, "grad_norm": 0.7068256396485003, "learning_rate": 3.7671576417065285e-06, "loss": 0.2617, "step": 17250 }, { "epoch": 0.5920041180507893, "grad_norm": 0.7612956135398194, "learning_rate": 3.766619068542238e-06, "loss": 0.2573, "step": 17251 }, { "epoch": 0.5920384351407001, "grad_norm": 0.7605031863664486, "learning_rate": 3.766080510614929e-06, "loss": 0.2998, "step": 17252 }, { "epoch": 0.5920727522306108, "grad_norm": 0.7522171973363465, "learning_rate": 3.765541967931259e-06, "loss": 0.2954, "step": 17253 }, { "epoch": 0.5921070693205216, "grad_norm": 0.7754778079685394, "learning_rate": 3.7650034404978776e-06, "loss": 0.3189, "step": 17254 }, { "epoch": 0.5921413864104323, "grad_norm": 0.78657726836599, "learning_rate": 3.7644649283214384e-06, "loss": 0.3084, "step": 17255 }, { "epoch": 0.5921757035003432, "grad_norm": 0.813187995102222, "learning_rate": 3.763926431408595e-06, "loss": 0.2953, "step": 17256 }, { "epoch": 0.592210020590254, "grad_norm": 0.9056459190797181, "learning_rate": 3.7633879497660003e-06, "loss": 0.2898, "step": 17257 }, { "epoch": 0.5922443376801647, "grad_norm": 0.6134991485592848, "learning_rate": 3.7628494834003034e-06, "loss": 0.2127, "step": 17258 }, { "epoch": 0.5922786547700755, "grad_norm": 0.7804269153770249, "learning_rate": 3.762311032318161e-06, "loss": 0.2643, "step": 17259 }, { "epoch": 0.5923129718599863, "grad_norm": 0.7946706176971732, "learning_rate": 3.761772596526223e-06, "loss": 0.2526, "step": 17260 }, { "epoch": 0.5923472889498971, "grad_norm": 0.7240275828306112, "learning_rate": 3.761234176031139e-06, "loss": 0.2329, "step": 17261 }, { "epoch": 0.5923816060398078, "grad_norm": 0.7069033190956153, "learning_rate": 3.760695770839563e-06, "loss": 0.2565, "step": 17262 }, { "epoch": 0.5924159231297186, "grad_norm": 0.670968321541192, "learning_rate": 3.760157380958147e-06, "loss": 0.2871, "step": 17263 }, { "epoch": 0.5924502402196293, "grad_norm": 0.7360324575450998, "learning_rate": 3.759619006393539e-06, "loss": 0.2736, "step": 17264 }, { "epoch": 0.5924845573095402, "grad_norm": 0.8388714466907514, "learning_rate": 3.7590806471523943e-06, "loss": 0.3455, "step": 17265 }, { "epoch": 0.592518874399451, "grad_norm": 0.8643700951920079, "learning_rate": 3.7585423032413615e-06, "loss": 0.3015, "step": 17266 }, { "epoch": 0.5925531914893617, "grad_norm": 0.7656159809659163, "learning_rate": 3.758003974667089e-06, "loss": 0.2499, "step": 17267 }, { "epoch": 0.5925875085792724, "grad_norm": 0.7426159454743835, "learning_rate": 3.7574656614362307e-06, "loss": 0.2865, "step": 17268 }, { "epoch": 0.5926218256691832, "grad_norm": 0.72958452407548, "learning_rate": 3.7569273635554345e-06, "loss": 0.3138, "step": 17269 }, { "epoch": 0.5926561427590941, "grad_norm": 0.7494692117456915, "learning_rate": 3.756389081031352e-06, "loss": 0.3198, "step": 17270 }, { "epoch": 0.5926904598490048, "grad_norm": 0.8433116753364739, "learning_rate": 3.7558508138706335e-06, "loss": 0.2141, "step": 17271 }, { "epoch": 0.5927247769389156, "grad_norm": 0.8222458125201162, "learning_rate": 3.755312562079926e-06, "loss": 0.2801, "step": 17272 }, { "epoch": 0.5927590940288263, "grad_norm": 0.848586290865963, "learning_rate": 3.7547743256658818e-06, "loss": 0.2763, "step": 17273 }, { "epoch": 0.5927934111187372, "grad_norm": 0.7157025208592385, "learning_rate": 3.75423610463515e-06, "loss": 0.2319, "step": 17274 }, { "epoch": 0.5928277282086479, "grad_norm": 0.8283564425951019, "learning_rate": 3.7536978989943764e-06, "loss": 0.2518, "step": 17275 }, { "epoch": 0.5928620452985587, "grad_norm": 0.8065701993290759, "learning_rate": 3.7531597087502136e-06, "loss": 0.3276, "step": 17276 }, { "epoch": 0.5928963623884694, "grad_norm": 0.7592963810320879, "learning_rate": 3.7526215339093084e-06, "loss": 0.2431, "step": 17277 }, { "epoch": 0.5929306794783802, "grad_norm": 0.8110358253612805, "learning_rate": 3.7520833744783086e-06, "loss": 0.3067, "step": 17278 }, { "epoch": 0.592964996568291, "grad_norm": 0.6851631190878914, "learning_rate": 3.751545230463866e-06, "loss": 0.256, "step": 17279 }, { "epoch": 0.5929993136582018, "grad_norm": 0.6857268354014057, "learning_rate": 3.751007101872627e-06, "loss": 0.3013, "step": 17280 }, { "epoch": 0.5930336307481126, "grad_norm": 0.7152300270905928, "learning_rate": 3.7504689887112367e-06, "loss": 0.2941, "step": 17281 }, { "epoch": 0.5930679478380233, "grad_norm": 0.8224750035683307, "learning_rate": 3.749930890986347e-06, "loss": 0.2661, "step": 17282 }, { "epoch": 0.5931022649279342, "grad_norm": 0.8048771505743206, "learning_rate": 3.749392808704603e-06, "loss": 0.2869, "step": 17283 }, { "epoch": 0.5931365820178449, "grad_norm": 0.8329527926687545, "learning_rate": 3.748854741872652e-06, "loss": 0.2493, "step": 17284 }, { "epoch": 0.5931708991077557, "grad_norm": 0.7737825441547186, "learning_rate": 3.748316690497143e-06, "loss": 0.2553, "step": 17285 }, { "epoch": 0.5932052161976664, "grad_norm": 0.7750824361278975, "learning_rate": 3.7477786545847227e-06, "loss": 0.2813, "step": 17286 }, { "epoch": 0.5932395332875772, "grad_norm": 0.8056742643481054, "learning_rate": 3.747240634142035e-06, "loss": 0.2742, "step": 17287 }, { "epoch": 0.593273850377488, "grad_norm": 0.834694983211835, "learning_rate": 3.7467026291757303e-06, "loss": 0.3272, "step": 17288 }, { "epoch": 0.5933081674673988, "grad_norm": 0.8157101270244478, "learning_rate": 3.7461646396924545e-06, "loss": 0.2611, "step": 17289 }, { "epoch": 0.5933424845573095, "grad_norm": 0.8409043061536678, "learning_rate": 3.7456266656988494e-06, "loss": 0.2752, "step": 17290 }, { "epoch": 0.5933768016472203, "grad_norm": 0.7351250994230273, "learning_rate": 3.7450887072015673e-06, "loss": 0.2895, "step": 17291 }, { "epoch": 0.593411118737131, "grad_norm": 0.7837233547378555, "learning_rate": 3.7445507642072497e-06, "loss": 0.2618, "step": 17292 }, { "epoch": 0.5934454358270419, "grad_norm": 0.7594170531709508, "learning_rate": 3.7440128367225433e-06, "loss": 0.2696, "step": 17293 }, { "epoch": 0.5934797529169527, "grad_norm": 0.8322554322281169, "learning_rate": 3.743474924754096e-06, "loss": 0.3095, "step": 17294 }, { "epoch": 0.5935140700068634, "grad_norm": 0.7767295695911945, "learning_rate": 3.74293702830855e-06, "loss": 0.265, "step": 17295 }, { "epoch": 0.5935483870967742, "grad_norm": 0.7469701633780831, "learning_rate": 3.74239914739255e-06, "loss": 0.3089, "step": 17296 }, { "epoch": 0.593582704186685, "grad_norm": 0.8349717057763251, "learning_rate": 3.741861282012744e-06, "loss": 0.2984, "step": 17297 }, { "epoch": 0.5936170212765958, "grad_norm": 0.7465285839877251, "learning_rate": 3.7413234321757754e-06, "loss": 0.2568, "step": 17298 }, { "epoch": 0.5936513383665065, "grad_norm": 0.83556798055656, "learning_rate": 3.740785597888287e-06, "loss": 0.262, "step": 17299 }, { "epoch": 0.5936856554564173, "grad_norm": 0.802693826851776, "learning_rate": 3.7402477791569247e-06, "loss": 0.2569, "step": 17300 }, { "epoch": 0.593719972546328, "grad_norm": 0.6815448325844495, "learning_rate": 3.739709975988331e-06, "loss": 0.2341, "step": 17301 }, { "epoch": 0.5937542896362389, "grad_norm": 0.8151568678657642, "learning_rate": 3.7391721883891537e-06, "loss": 0.289, "step": 17302 }, { "epoch": 0.5937886067261496, "grad_norm": 0.8765254515858679, "learning_rate": 3.738634416366034e-06, "loss": 0.3016, "step": 17303 }, { "epoch": 0.5938229238160604, "grad_norm": 0.7564046130022437, "learning_rate": 3.738096659925613e-06, "loss": 0.2855, "step": 17304 }, { "epoch": 0.5938572409059711, "grad_norm": 0.8052147637800173, "learning_rate": 3.7375589190745386e-06, "loss": 0.2782, "step": 17305 }, { "epoch": 0.593891557995882, "grad_norm": 0.88099809159675, "learning_rate": 3.7370211938194515e-06, "loss": 0.3338, "step": 17306 }, { "epoch": 0.5939258750857928, "grad_norm": 0.7237502733398182, "learning_rate": 3.736483484166995e-06, "loss": 0.2526, "step": 17307 }, { "epoch": 0.5939601921757035, "grad_norm": 0.7144237411218782, "learning_rate": 3.7359457901238116e-06, "loss": 0.2399, "step": 17308 }, { "epoch": 0.5939945092656143, "grad_norm": 0.7726528368045986, "learning_rate": 3.735408111696546e-06, "loss": 0.2658, "step": 17309 }, { "epoch": 0.594028826355525, "grad_norm": 0.7795965254338098, "learning_rate": 3.7348704488918355e-06, "loss": 0.2824, "step": 17310 }, { "epoch": 0.5940631434454359, "grad_norm": 0.82362390802055, "learning_rate": 3.7343328017163284e-06, "loss": 0.3213, "step": 17311 }, { "epoch": 0.5940974605353466, "grad_norm": 0.6384230614489925, "learning_rate": 3.7337951701766637e-06, "loss": 0.2739, "step": 17312 }, { "epoch": 0.5941317776252574, "grad_norm": 0.7763225569177251, "learning_rate": 3.7332575542794814e-06, "loss": 0.2524, "step": 17313 }, { "epoch": 0.5941660947151681, "grad_norm": 0.8307215536011255, "learning_rate": 3.7327199540314262e-06, "loss": 0.2404, "step": 17314 }, { "epoch": 0.5942004118050789, "grad_norm": 0.7492496225779671, "learning_rate": 3.732182369439139e-06, "loss": 0.331, "step": 17315 }, { "epoch": 0.5942347288949897, "grad_norm": 0.827425823131923, "learning_rate": 3.7316448005092594e-06, "loss": 0.2887, "step": 17316 }, { "epoch": 0.5942690459849005, "grad_norm": 0.7951529590680867, "learning_rate": 3.7311072472484312e-06, "loss": 0.3248, "step": 17317 }, { "epoch": 0.5943033630748112, "grad_norm": 0.7386142226888809, "learning_rate": 3.7305697096632933e-06, "loss": 0.2866, "step": 17318 }, { "epoch": 0.594337680164722, "grad_norm": 0.8588293452782518, "learning_rate": 3.730032187760485e-06, "loss": 0.2849, "step": 17319 }, { "epoch": 0.5943719972546329, "grad_norm": 0.7161733302794575, "learning_rate": 3.7294946815466498e-06, "loss": 0.3041, "step": 17320 }, { "epoch": 0.5944063143445436, "grad_norm": 0.7156586389230627, "learning_rate": 3.728957191028426e-06, "loss": 0.2385, "step": 17321 }, { "epoch": 0.5944406314344544, "grad_norm": 0.7540526754386722, "learning_rate": 3.728419716212453e-06, "loss": 0.2738, "step": 17322 }, { "epoch": 0.5944749485243651, "grad_norm": 0.7476323747431528, "learning_rate": 3.727882257105374e-06, "loss": 0.2293, "step": 17323 }, { "epoch": 0.5945092656142759, "grad_norm": 0.7349295693145669, "learning_rate": 3.727344813713827e-06, "loss": 0.2564, "step": 17324 }, { "epoch": 0.5945435827041867, "grad_norm": 0.7166012157226356, "learning_rate": 3.726807386044448e-06, "loss": 0.2907, "step": 17325 }, { "epoch": 0.5945778997940975, "grad_norm": 0.7538505102665064, "learning_rate": 3.726269974103882e-06, "loss": 0.2783, "step": 17326 }, { "epoch": 0.5946122168840082, "grad_norm": 0.733046610087378, "learning_rate": 3.7257325778987656e-06, "loss": 0.2782, "step": 17327 }, { "epoch": 0.594646533973919, "grad_norm": 0.7711822980315545, "learning_rate": 3.725195197435736e-06, "loss": 0.2602, "step": 17328 }, { "epoch": 0.5946808510638298, "grad_norm": 0.8282172141700216, "learning_rate": 3.7246578327214332e-06, "loss": 0.3526, "step": 17329 }, { "epoch": 0.5947151681537406, "grad_norm": 0.7924219943916133, "learning_rate": 3.724120483762497e-06, "loss": 0.3092, "step": 17330 }, { "epoch": 0.5947494852436513, "grad_norm": 0.7646452170998601, "learning_rate": 3.7235831505655638e-06, "loss": 0.253, "step": 17331 }, { "epoch": 0.5947838023335621, "grad_norm": 0.9247549145770374, "learning_rate": 3.7230458331372743e-06, "loss": 0.2673, "step": 17332 }, { "epoch": 0.5948181194234728, "grad_norm": 0.7577552780497785, "learning_rate": 3.7225085314842624e-06, "loss": 0.3095, "step": 17333 }, { "epoch": 0.5948524365133837, "grad_norm": 0.7123905238806723, "learning_rate": 3.721971245613171e-06, "loss": 0.2881, "step": 17334 }, { "epoch": 0.5948867536032945, "grad_norm": 0.8443439213904719, "learning_rate": 3.7214339755306337e-06, "loss": 0.2496, "step": 17335 }, { "epoch": 0.5949210706932052, "grad_norm": 0.797317929185469, "learning_rate": 3.7208967212432874e-06, "loss": 0.2783, "step": 17336 }, { "epoch": 0.594955387783116, "grad_norm": 0.8730668485260907, "learning_rate": 3.7203594827577715e-06, "loss": 0.2511, "step": 17337 }, { "epoch": 0.5949897048730267, "grad_norm": 0.7854489568808878, "learning_rate": 3.719822260080724e-06, "loss": 0.3482, "step": 17338 }, { "epoch": 0.5950240219629376, "grad_norm": 0.8190512694143985, "learning_rate": 3.7192850532187776e-06, "loss": 0.305, "step": 17339 }, { "epoch": 0.5950583390528483, "grad_norm": 0.8124472369904715, "learning_rate": 3.7187478621785734e-06, "loss": 0.2655, "step": 17340 }, { "epoch": 0.5950926561427591, "grad_norm": 0.8205260912366497, "learning_rate": 3.7182106869667452e-06, "loss": 0.2717, "step": 17341 }, { "epoch": 0.5951269732326698, "grad_norm": 0.8028856252210703, "learning_rate": 3.7176735275899273e-06, "loss": 0.2971, "step": 17342 }, { "epoch": 0.5951612903225807, "grad_norm": 1.28584016112217, "learning_rate": 3.7171363840547605e-06, "loss": 0.3038, "step": 17343 }, { "epoch": 0.5951956074124914, "grad_norm": 0.7649148148268184, "learning_rate": 3.7165992563678765e-06, "loss": 0.3043, "step": 17344 }, { "epoch": 0.5952299245024022, "grad_norm": 0.7168448610691253, "learning_rate": 3.716062144535912e-06, "loss": 0.2962, "step": 17345 }, { "epoch": 0.595264241592313, "grad_norm": 0.8576742263059811, "learning_rate": 3.7155250485655042e-06, "loss": 0.2912, "step": 17346 }, { "epoch": 0.5952985586822237, "grad_norm": 0.8018882220251113, "learning_rate": 3.714987968463287e-06, "loss": 0.2952, "step": 17347 }, { "epoch": 0.5953328757721346, "grad_norm": 0.7122741476533013, "learning_rate": 3.7144509042358936e-06, "loss": 0.2608, "step": 17348 }, { "epoch": 0.5953671928620453, "grad_norm": 0.781054149233056, "learning_rate": 3.7139138558899624e-06, "loss": 0.2829, "step": 17349 }, { "epoch": 0.5954015099519561, "grad_norm": 0.7817801813934324, "learning_rate": 3.713376823432125e-06, "loss": 0.2799, "step": 17350 }, { "epoch": 0.5954358270418668, "grad_norm": 0.7382504133110532, "learning_rate": 3.7128398068690164e-06, "loss": 0.3456, "step": 17351 }, { "epoch": 0.5954701441317777, "grad_norm": 0.8033565692369884, "learning_rate": 3.7123028062072717e-06, "loss": 0.2733, "step": 17352 }, { "epoch": 0.5955044612216884, "grad_norm": 0.809056040088465, "learning_rate": 3.7117658214535258e-06, "loss": 0.3251, "step": 17353 }, { "epoch": 0.5955387783115992, "grad_norm": 0.7374741798232936, "learning_rate": 3.711228852614409e-06, "loss": 0.3096, "step": 17354 }, { "epoch": 0.5955730954015099, "grad_norm": 0.736482983364182, "learning_rate": 3.710691899696559e-06, "loss": 0.2573, "step": 17355 }, { "epoch": 0.5956074124914207, "grad_norm": 0.7806188689057502, "learning_rate": 3.710154962706608e-06, "loss": 0.2915, "step": 17356 }, { "epoch": 0.5956417295813315, "grad_norm": 0.7273414415321307, "learning_rate": 3.7096180416511864e-06, "loss": 0.2255, "step": 17357 }, { "epoch": 0.5956760466712423, "grad_norm": 0.7263506968850957, "learning_rate": 3.7090811365369295e-06, "loss": 0.2764, "step": 17358 }, { "epoch": 0.595710363761153, "grad_norm": 0.8609834229584837, "learning_rate": 3.7085442473704726e-06, "loss": 0.2485, "step": 17359 }, { "epoch": 0.5957446808510638, "grad_norm": 0.7606729612977112, "learning_rate": 3.7080073741584428e-06, "loss": 0.2869, "step": 17360 }, { "epoch": 0.5957789979409746, "grad_norm": 0.867880519277688, "learning_rate": 3.7074705169074775e-06, "loss": 0.2379, "step": 17361 }, { "epoch": 0.5958133150308854, "grad_norm": 0.6493472519678927, "learning_rate": 3.706933675624206e-06, "loss": 0.2295, "step": 17362 }, { "epoch": 0.5958476321207962, "grad_norm": 0.7742932438175126, "learning_rate": 3.7063968503152627e-06, "loss": 0.2879, "step": 17363 }, { "epoch": 0.5958819492107069, "grad_norm": 0.8272814086423587, "learning_rate": 3.705860040987278e-06, "loss": 0.3351, "step": 17364 }, { "epoch": 0.5959162663006177, "grad_norm": 0.7578133088334436, "learning_rate": 3.7053232476468812e-06, "loss": 0.296, "step": 17365 }, { "epoch": 0.5959505833905285, "grad_norm": 0.7931504005705584, "learning_rate": 3.7047864703007093e-06, "loss": 0.2627, "step": 17366 }, { "epoch": 0.5959849004804393, "grad_norm": 0.9029264023404665, "learning_rate": 3.7042497089553887e-06, "loss": 0.2733, "step": 17367 }, { "epoch": 0.59601921757035, "grad_norm": 0.7727913347199802, "learning_rate": 3.7037129636175517e-06, "loss": 0.2551, "step": 17368 }, { "epoch": 0.5960535346602608, "grad_norm": 0.8450561215148652, "learning_rate": 3.7031762342938314e-06, "loss": 0.2732, "step": 17369 }, { "epoch": 0.5960878517501715, "grad_norm": 0.8029121494902125, "learning_rate": 3.702639520990857e-06, "loss": 0.2789, "step": 17370 }, { "epoch": 0.5961221688400824, "grad_norm": 0.760332285209347, "learning_rate": 3.702102823715257e-06, "loss": 0.2948, "step": 17371 }, { "epoch": 0.5961564859299932, "grad_norm": 0.8051879587841557, "learning_rate": 3.7015661424736647e-06, "loss": 0.2818, "step": 17372 }, { "epoch": 0.5961908030199039, "grad_norm": 0.7946590130822176, "learning_rate": 3.7010294772727085e-06, "loss": 0.2993, "step": 17373 }, { "epoch": 0.5962251201098147, "grad_norm": 0.7759255605675478, "learning_rate": 3.700492828119019e-06, "loss": 0.2706, "step": 17374 }, { "epoch": 0.5962594371997255, "grad_norm": 0.7659890521988202, "learning_rate": 3.699956195019224e-06, "loss": 0.278, "step": 17375 }, { "epoch": 0.5962937542896363, "grad_norm": 0.8492937344573801, "learning_rate": 3.699419577979957e-06, "loss": 0.2757, "step": 17376 }, { "epoch": 0.596328071379547, "grad_norm": 1.114172719118037, "learning_rate": 3.6988829770078425e-06, "loss": 0.2726, "step": 17377 }, { "epoch": 0.5963623884694578, "grad_norm": 0.8025674506353332, "learning_rate": 3.6983463921095137e-06, "loss": 0.2851, "step": 17378 }, { "epoch": 0.5963967055593685, "grad_norm": 0.757786277007555, "learning_rate": 3.697809823291597e-06, "loss": 0.3021, "step": 17379 }, { "epoch": 0.5964310226492794, "grad_norm": 0.898964098994197, "learning_rate": 3.6972732705607205e-06, "loss": 0.2687, "step": 17380 }, { "epoch": 0.5964653397391901, "grad_norm": 0.7346168718752287, "learning_rate": 3.696736733923515e-06, "loss": 0.2457, "step": 17381 }, { "epoch": 0.5964996568291009, "grad_norm": 0.7508377209275565, "learning_rate": 3.696200213386608e-06, "loss": 0.2716, "step": 17382 }, { "epoch": 0.5965339739190116, "grad_norm": 0.804241915610372, "learning_rate": 3.695663708956626e-06, "loss": 0.3227, "step": 17383 }, { "epoch": 0.5965682910089224, "grad_norm": 0.8197775545984971, "learning_rate": 3.6951272206402e-06, "loss": 0.2727, "step": 17384 }, { "epoch": 0.5966026080988333, "grad_norm": 0.7111130092116068, "learning_rate": 3.694590748443956e-06, "loss": 0.301, "step": 17385 }, { "epoch": 0.596636925188744, "grad_norm": 1.0053743547029939, "learning_rate": 3.6940542923745187e-06, "loss": 0.2682, "step": 17386 }, { "epoch": 0.5966712422786548, "grad_norm": 0.8010666423845791, "learning_rate": 3.6935178524385206e-06, "loss": 0.339, "step": 17387 }, { "epoch": 0.5967055593685655, "grad_norm": 0.711153257892959, "learning_rate": 3.6929814286425846e-06, "loss": 0.256, "step": 17388 }, { "epoch": 0.5967398764584764, "grad_norm": 0.760550274731633, "learning_rate": 3.6924450209933387e-06, "loss": 0.2426, "step": 17389 }, { "epoch": 0.5967741935483871, "grad_norm": 0.8300643497613597, "learning_rate": 3.691908629497412e-06, "loss": 0.2872, "step": 17390 }, { "epoch": 0.5968085106382979, "grad_norm": 0.6918640852422855, "learning_rate": 3.6913722541614295e-06, "loss": 0.2318, "step": 17391 }, { "epoch": 0.5968428277282086, "grad_norm": 0.7334626458960491, "learning_rate": 3.6908358949920144e-06, "loss": 0.2469, "step": 17392 }, { "epoch": 0.5968771448181194, "grad_norm": 0.8442889126148433, "learning_rate": 3.690299551995798e-06, "loss": 0.3514, "step": 17393 }, { "epoch": 0.5969114619080302, "grad_norm": 0.8146187464692082, "learning_rate": 3.6897632251794013e-06, "loss": 0.2406, "step": 17394 }, { "epoch": 0.596945778997941, "grad_norm": 0.797264968563603, "learning_rate": 3.689226914549455e-06, "loss": 0.2944, "step": 17395 }, { "epoch": 0.5969800960878517, "grad_norm": 0.7785699938582341, "learning_rate": 3.6886906201125806e-06, "loss": 0.2915, "step": 17396 }, { "epoch": 0.5970144131777625, "grad_norm": 0.7675027468807541, "learning_rate": 3.688154341875403e-06, "loss": 0.2807, "step": 17397 }, { "epoch": 0.5970487302676734, "grad_norm": 0.7554858427678571, "learning_rate": 3.6876180798445517e-06, "loss": 0.291, "step": 17398 }, { "epoch": 0.5970830473575841, "grad_norm": 0.7836369719946854, "learning_rate": 3.687081834026649e-06, "loss": 0.265, "step": 17399 }, { "epoch": 0.5971173644474949, "grad_norm": 0.7909538346450211, "learning_rate": 3.6865456044283175e-06, "loss": 0.2842, "step": 17400 }, { "epoch": 0.5971516815374056, "grad_norm": 0.7840801210446212, "learning_rate": 3.6860093910561856e-06, "loss": 0.2605, "step": 17401 }, { "epoch": 0.5971859986273164, "grad_norm": 0.8111512497904773, "learning_rate": 3.6854731939168765e-06, "loss": 0.2385, "step": 17402 }, { "epoch": 0.5972203157172272, "grad_norm": 0.7026380724013984, "learning_rate": 3.6849370130170105e-06, "loss": 0.2683, "step": 17403 }, { "epoch": 0.597254632807138, "grad_norm": 0.7515643669694944, "learning_rate": 3.6844008483632153e-06, "loss": 0.2784, "step": 17404 }, { "epoch": 0.5972889498970487, "grad_norm": 0.694668455241713, "learning_rate": 3.6838646999621155e-06, "loss": 0.3504, "step": 17405 }, { "epoch": 0.5973232669869595, "grad_norm": 0.7256731346254635, "learning_rate": 3.6833285678203306e-06, "loss": 0.2717, "step": 17406 }, { "epoch": 0.5973575840768702, "grad_norm": 0.7657438329809898, "learning_rate": 3.682792451944488e-06, "loss": 0.2852, "step": 17407 }, { "epoch": 0.5973919011667811, "grad_norm": 0.7489524176587271, "learning_rate": 3.682256352341209e-06, "loss": 0.2489, "step": 17408 }, { "epoch": 0.5974262182566918, "grad_norm": 0.791193816747378, "learning_rate": 3.681720269017115e-06, "loss": 0.2752, "step": 17409 }, { "epoch": 0.5974605353466026, "grad_norm": 0.7813103179992102, "learning_rate": 3.6811842019788315e-06, "loss": 0.315, "step": 17410 }, { "epoch": 0.5974948524365133, "grad_norm": 0.745129107572304, "learning_rate": 3.6806481512329784e-06, "loss": 0.235, "step": 17411 }, { "epoch": 0.5975291695264242, "grad_norm": 0.8054891838007736, "learning_rate": 3.6801121167861788e-06, "loss": 0.3024, "step": 17412 }, { "epoch": 0.597563486616335, "grad_norm": 0.7642019877612855, "learning_rate": 3.679576098645057e-06, "loss": 0.2776, "step": 17413 }, { "epoch": 0.5975978037062457, "grad_norm": 0.7286320890431955, "learning_rate": 3.679040096816233e-06, "loss": 0.2558, "step": 17414 }, { "epoch": 0.5976321207961565, "grad_norm": 0.7671586592848676, "learning_rate": 3.6785041113063263e-06, "loss": 0.297, "step": 17415 }, { "epoch": 0.5976664378860672, "grad_norm": 0.7815960460552743, "learning_rate": 3.6779681421219626e-06, "loss": 0.2859, "step": 17416 }, { "epoch": 0.5977007549759781, "grad_norm": 0.8066685265150095, "learning_rate": 3.677432189269761e-06, "loss": 0.2822, "step": 17417 }, { "epoch": 0.5977350720658888, "grad_norm": 0.8436007602698614, "learning_rate": 3.6768962527563414e-06, "loss": 0.3853, "step": 17418 }, { "epoch": 0.5977693891557996, "grad_norm": 0.8654770561362641, "learning_rate": 3.676360332588327e-06, "loss": 0.2924, "step": 17419 }, { "epoch": 0.5978037062457103, "grad_norm": 0.7564478495975745, "learning_rate": 3.675824428772338e-06, "loss": 0.2696, "step": 17420 }, { "epoch": 0.5978380233356212, "grad_norm": 0.7331756989305636, "learning_rate": 3.675288541314992e-06, "loss": 0.2418, "step": 17421 }, { "epoch": 0.597872340425532, "grad_norm": 0.7335295582351004, "learning_rate": 3.674752670222914e-06, "loss": 0.2788, "step": 17422 }, { "epoch": 0.5979066575154427, "grad_norm": 0.7964814780817976, "learning_rate": 3.674216815502722e-06, "loss": 0.2233, "step": 17423 }, { "epoch": 0.5979409746053534, "grad_norm": 0.8415768086906585, "learning_rate": 3.6736809771610332e-06, "loss": 0.2798, "step": 17424 }, { "epoch": 0.5979752916952642, "grad_norm": 0.8588348937712664, "learning_rate": 3.6731451552044704e-06, "loss": 0.2627, "step": 17425 }, { "epoch": 0.5980096087851751, "grad_norm": 0.7753629545162039, "learning_rate": 3.672609349639653e-06, "loss": 0.2341, "step": 17426 }, { "epoch": 0.5980439258750858, "grad_norm": 0.8737693009419248, "learning_rate": 3.672073560473198e-06, "loss": 0.2571, "step": 17427 }, { "epoch": 0.5980782429649966, "grad_norm": 0.851719387076377, "learning_rate": 3.6715377877117286e-06, "loss": 0.2793, "step": 17428 }, { "epoch": 0.5981125600549073, "grad_norm": 0.8071599405516187, "learning_rate": 3.6710020313618583e-06, "loss": 0.2973, "step": 17429 }, { "epoch": 0.5981468771448181, "grad_norm": 0.8082054715843521, "learning_rate": 3.6704662914302102e-06, "loss": 0.2594, "step": 17430 }, { "epoch": 0.5981811942347289, "grad_norm": 0.7756142078276769, "learning_rate": 3.669930567923402e-06, "loss": 0.3215, "step": 17431 }, { "epoch": 0.5982155113246397, "grad_norm": 0.7985874593661535, "learning_rate": 3.6693948608480477e-06, "loss": 0.2482, "step": 17432 }, { "epoch": 0.5982498284145504, "grad_norm": 0.8615553158245107, "learning_rate": 3.6688591702107702e-06, "loss": 0.2895, "step": 17433 }, { "epoch": 0.5982841455044612, "grad_norm": 0.7583815975373402, "learning_rate": 3.668323496018187e-06, "loss": 0.2673, "step": 17434 }, { "epoch": 0.598318462594372, "grad_norm": 0.8030930959884413, "learning_rate": 3.6677878382769116e-06, "loss": 0.3005, "step": 17435 }, { "epoch": 0.5983527796842828, "grad_norm": 0.7752585805101111, "learning_rate": 3.6672521969935665e-06, "loss": 0.2623, "step": 17436 }, { "epoch": 0.5983870967741935, "grad_norm": 0.7674759777192894, "learning_rate": 3.6667165721747666e-06, "loss": 0.2825, "step": 17437 }, { "epoch": 0.5984214138641043, "grad_norm": 0.8052148228467088, "learning_rate": 3.6661809638271275e-06, "loss": 0.2452, "step": 17438 }, { "epoch": 0.598455730954015, "grad_norm": 0.7566279694856691, "learning_rate": 3.6656453719572693e-06, "loss": 0.3131, "step": 17439 }, { "epoch": 0.5984900480439259, "grad_norm": 0.7288229068059088, "learning_rate": 3.665109796571805e-06, "loss": 0.2749, "step": 17440 }, { "epoch": 0.5985243651338367, "grad_norm": 0.8243680838691022, "learning_rate": 3.664574237677354e-06, "loss": 0.2557, "step": 17441 }, { "epoch": 0.5985586822237474, "grad_norm": 0.7965944711295952, "learning_rate": 3.664038695280531e-06, "loss": 0.226, "step": 17442 }, { "epoch": 0.5985929993136582, "grad_norm": 0.7492404829759594, "learning_rate": 3.663503169387953e-06, "loss": 0.2634, "step": 17443 }, { "epoch": 0.598627316403569, "grad_norm": 0.7019867236805912, "learning_rate": 3.6629676600062337e-06, "loss": 0.2468, "step": 17444 }, { "epoch": 0.5986616334934798, "grad_norm": 0.8482870727053976, "learning_rate": 3.6624321671419917e-06, "loss": 0.2881, "step": 17445 }, { "epoch": 0.5986959505833905, "grad_norm": 0.7047543745277924, "learning_rate": 3.6618966908018405e-06, "loss": 0.2634, "step": 17446 }, { "epoch": 0.5987302676733013, "grad_norm": 0.7599749441234985, "learning_rate": 3.661361230992394e-06, "loss": 0.2981, "step": 17447 }, { "epoch": 0.598764584763212, "grad_norm": 1.2521257714022032, "learning_rate": 3.66082578772027e-06, "loss": 0.271, "step": 17448 }, { "epoch": 0.5987989018531229, "grad_norm": 0.7663121097964846, "learning_rate": 3.6602903609920824e-06, "loss": 0.248, "step": 17449 }, { "epoch": 0.5988332189430337, "grad_norm": 0.8246792154577643, "learning_rate": 3.659754950814444e-06, "loss": 0.302, "step": 17450 }, { "epoch": 0.5988675360329444, "grad_norm": 0.850844288317984, "learning_rate": 3.6592195571939724e-06, "loss": 0.3108, "step": 17451 }, { "epoch": 0.5989018531228552, "grad_norm": 0.7901022810979428, "learning_rate": 3.6586841801372808e-06, "loss": 0.2912, "step": 17452 }, { "epoch": 0.5989361702127659, "grad_norm": 0.9532336095711148, "learning_rate": 3.6581488196509795e-06, "loss": 0.3504, "step": 17453 }, { "epoch": 0.5989704873026768, "grad_norm": 0.7920272958412572, "learning_rate": 3.657613475741687e-06, "loss": 0.2629, "step": 17454 }, { "epoch": 0.5990048043925875, "grad_norm": 0.7168304270655373, "learning_rate": 3.6570781484160143e-06, "loss": 0.2548, "step": 17455 }, { "epoch": 0.5990391214824983, "grad_norm": 0.8441028772242534, "learning_rate": 3.6565428376805744e-06, "loss": 0.2823, "step": 17456 }, { "epoch": 0.599073438572409, "grad_norm": 0.7116287758794736, "learning_rate": 3.6560075435419827e-06, "loss": 0.2707, "step": 17457 }, { "epoch": 0.5991077556623199, "grad_norm": 0.7921579254002613, "learning_rate": 3.65547226600685e-06, "loss": 0.2971, "step": 17458 }, { "epoch": 0.5991420727522306, "grad_norm": 0.7222309207165809, "learning_rate": 3.654937005081791e-06, "loss": 0.3034, "step": 17459 }, { "epoch": 0.5991763898421414, "grad_norm": 0.7621376295738279, "learning_rate": 3.654401760773417e-06, "loss": 0.2848, "step": 17460 }, { "epoch": 0.5992107069320521, "grad_norm": 1.1224359715147816, "learning_rate": 3.6538665330883395e-06, "loss": 0.3005, "step": 17461 }, { "epoch": 0.5992450240219629, "grad_norm": 0.8268474746131448, "learning_rate": 3.6533313220331725e-06, "loss": 0.2735, "step": 17462 }, { "epoch": 0.5992793411118738, "grad_norm": 0.790274984354203, "learning_rate": 3.6527961276145262e-06, "loss": 0.2931, "step": 17463 }, { "epoch": 0.5993136582017845, "grad_norm": 0.9585192973475976, "learning_rate": 3.6522609498390127e-06, "loss": 0.2585, "step": 17464 }, { "epoch": 0.5993479752916953, "grad_norm": 0.7549227401476232, "learning_rate": 3.651725788713245e-06, "loss": 0.2566, "step": 17465 }, { "epoch": 0.599382292381606, "grad_norm": 0.7712036280003156, "learning_rate": 3.651190644243834e-06, "loss": 0.2435, "step": 17466 }, { "epoch": 0.5994166094715169, "grad_norm": 0.7534908903731092, "learning_rate": 3.650655516437387e-06, "loss": 0.2711, "step": 17467 }, { "epoch": 0.5994509265614276, "grad_norm": 0.6683448035577506, "learning_rate": 3.650120405300521e-06, "loss": 0.2389, "step": 17468 }, { "epoch": 0.5994852436513384, "grad_norm": 0.8851318942464144, "learning_rate": 3.6495853108398417e-06, "loss": 0.2924, "step": 17469 }, { "epoch": 0.5995195607412491, "grad_norm": 0.8344699296185083, "learning_rate": 3.649050233061962e-06, "loss": 0.2935, "step": 17470 }, { "epoch": 0.5995538778311599, "grad_norm": 0.8349087235913797, "learning_rate": 3.648515171973491e-06, "loss": 0.2616, "step": 17471 }, { "epoch": 0.5995881949210707, "grad_norm": 0.8452940830449492, "learning_rate": 3.647980127581041e-06, "loss": 0.2938, "step": 17472 }, { "epoch": 0.5996225120109815, "grad_norm": 0.9762569913891838, "learning_rate": 3.6474450998912187e-06, "loss": 0.2625, "step": 17473 }, { "epoch": 0.5996568291008922, "grad_norm": 0.7509011033791417, "learning_rate": 3.646910088910637e-06, "loss": 0.2639, "step": 17474 }, { "epoch": 0.599691146190803, "grad_norm": 0.7013195374808964, "learning_rate": 3.6463750946459033e-06, "loss": 0.2848, "step": 17475 }, { "epoch": 0.5997254632807137, "grad_norm": 0.7344128105844712, "learning_rate": 3.6458401171036244e-06, "loss": 0.3021, "step": 17476 }, { "epoch": 0.5997597803706246, "grad_norm": 0.764641531228134, "learning_rate": 3.645305156290415e-06, "loss": 0.2745, "step": 17477 }, { "epoch": 0.5997940974605354, "grad_norm": 0.6800468896434919, "learning_rate": 3.644770212212879e-06, "loss": 0.2226, "step": 17478 }, { "epoch": 0.5998284145504461, "grad_norm": 0.8563721356428207, "learning_rate": 3.6442352848776276e-06, "loss": 0.3147, "step": 17479 }, { "epoch": 0.5998627316403569, "grad_norm": 0.7811610617068964, "learning_rate": 3.643700374291269e-06, "loss": 0.3324, "step": 17480 }, { "epoch": 0.5998970487302677, "grad_norm": 0.7017120845231672, "learning_rate": 3.6431654804604122e-06, "loss": 0.2116, "step": 17481 }, { "epoch": 0.5999313658201785, "grad_norm": 0.8241875730588085, "learning_rate": 3.6426306033916615e-06, "loss": 0.2234, "step": 17482 }, { "epoch": 0.5999656829100892, "grad_norm": 0.7813254914221839, "learning_rate": 3.642095743091629e-06, "loss": 0.286, "step": 17483 }, { "epoch": 0.6, "grad_norm": 0.7414075066271258, "learning_rate": 3.6415608995669193e-06, "loss": 0.2573, "step": 17484 }, { "epoch": 0.6000343170899107, "grad_norm": 0.7752148072048061, "learning_rate": 3.6410260728241407e-06, "loss": 0.2557, "step": 17485 }, { "epoch": 0.6000686341798216, "grad_norm": 0.7541731503275335, "learning_rate": 3.640491262869902e-06, "loss": 0.2696, "step": 17486 }, { "epoch": 0.6001029512697323, "grad_norm": 0.9622072800494885, "learning_rate": 3.6399564697108057e-06, "loss": 0.2933, "step": 17487 }, { "epoch": 0.6001372683596431, "grad_norm": 0.7462378835267104, "learning_rate": 3.639421693353465e-06, "loss": 0.2934, "step": 17488 }, { "epoch": 0.6001715854495538, "grad_norm": 0.7061894287469098, "learning_rate": 3.638886933804483e-06, "loss": 0.289, "step": 17489 }, { "epoch": 0.6002059025394647, "grad_norm": 0.756549338099714, "learning_rate": 3.6383521910704635e-06, "loss": 0.2552, "step": 17490 }, { "epoch": 0.6002402196293755, "grad_norm": 0.8148917666159815, "learning_rate": 3.637817465158017e-06, "loss": 0.2947, "step": 17491 }, { "epoch": 0.6002745367192862, "grad_norm": 0.8012119984343523, "learning_rate": 3.6372827560737465e-06, "loss": 0.2514, "step": 17492 }, { "epoch": 0.600308853809197, "grad_norm": 0.7639098634579905, "learning_rate": 3.636748063824259e-06, "loss": 0.2409, "step": 17493 }, { "epoch": 0.6003431708991077, "grad_norm": 0.8335550005831139, "learning_rate": 3.63621338841616e-06, "loss": 0.3197, "step": 17494 }, { "epoch": 0.6003774879890186, "grad_norm": 0.7347209316042049, "learning_rate": 3.635678729856056e-06, "loss": 0.312, "step": 17495 }, { "epoch": 0.6004118050789293, "grad_norm": 0.9083337864693819, "learning_rate": 3.6351440881505474e-06, "loss": 0.3235, "step": 17496 }, { "epoch": 0.6004461221688401, "grad_norm": 0.8121133315720654, "learning_rate": 3.634609463306245e-06, "loss": 0.3546, "step": 17497 }, { "epoch": 0.6004804392587508, "grad_norm": 0.80876123543993, "learning_rate": 3.6340748553297512e-06, "loss": 0.2594, "step": 17498 }, { "epoch": 0.6005147563486616, "grad_norm": 0.6979828786715793, "learning_rate": 3.633540264227667e-06, "loss": 0.2509, "step": 17499 }, { "epoch": 0.6005490734385724, "grad_norm": 0.8733237726488797, "learning_rate": 3.6330056900066012e-06, "loss": 0.2966, "step": 17500 }, { "epoch": 0.6005833905284832, "grad_norm": 0.8020792098237248, "learning_rate": 3.6324711326731577e-06, "loss": 0.2439, "step": 17501 }, { "epoch": 0.600617707618394, "grad_norm": 0.728865997715989, "learning_rate": 3.6319365922339366e-06, "loss": 0.2672, "step": 17502 }, { "epoch": 0.6006520247083047, "grad_norm": 0.718789633986134, "learning_rate": 3.6314020686955466e-06, "loss": 0.2328, "step": 17503 }, { "epoch": 0.6006863417982156, "grad_norm": 0.7785517507263383, "learning_rate": 3.6308675620645876e-06, "loss": 0.2656, "step": 17504 }, { "epoch": 0.6007206588881263, "grad_norm": 0.7901219236237712, "learning_rate": 3.6303330723476616e-06, "loss": 0.2953, "step": 17505 }, { "epoch": 0.6007549759780371, "grad_norm": 0.6837635743600992, "learning_rate": 3.629798599551376e-06, "loss": 0.253, "step": 17506 }, { "epoch": 0.6007892930679478, "grad_norm": 0.7657061051452151, "learning_rate": 3.6292641436823305e-06, "loss": 0.2385, "step": 17507 }, { "epoch": 0.6008236101578586, "grad_norm": 0.9909730606212795, "learning_rate": 3.628729704747127e-06, "loss": 0.2961, "step": 17508 }, { "epoch": 0.6008579272477694, "grad_norm": 0.7446636018831152, "learning_rate": 3.628195282752371e-06, "loss": 0.2548, "step": 17509 }, { "epoch": 0.6008922443376802, "grad_norm": 0.7472286583797222, "learning_rate": 3.627660877704663e-06, "loss": 0.2971, "step": 17510 }, { "epoch": 0.6009265614275909, "grad_norm": 0.8229504355289067, "learning_rate": 3.627126489610603e-06, "loss": 0.3445, "step": 17511 }, { "epoch": 0.6009608785175017, "grad_norm": 0.9120899214362846, "learning_rate": 3.6265921184767967e-06, "loss": 0.3161, "step": 17512 }, { "epoch": 0.6009951956074125, "grad_norm": 0.7398470588883659, "learning_rate": 3.626057764309844e-06, "loss": 0.2959, "step": 17513 }, { "epoch": 0.6010295126973233, "grad_norm": 0.7613349675190277, "learning_rate": 3.6255234271163423e-06, "loss": 0.2771, "step": 17514 }, { "epoch": 0.601063829787234, "grad_norm": 0.7009903110912296, "learning_rate": 3.6249891069028987e-06, "loss": 0.2852, "step": 17515 }, { "epoch": 0.6010981468771448, "grad_norm": 0.7908385884591427, "learning_rate": 3.6244548036761117e-06, "loss": 0.267, "step": 17516 }, { "epoch": 0.6011324639670556, "grad_norm": 0.7469948501550325, "learning_rate": 3.6239205174425797e-06, "loss": 0.2693, "step": 17517 }, { "epoch": 0.6011667810569664, "grad_norm": 0.8328445296022329, "learning_rate": 3.6233862482089075e-06, "loss": 0.2473, "step": 17518 }, { "epoch": 0.6012010981468772, "grad_norm": 0.6887991029558735, "learning_rate": 3.6228519959816922e-06, "loss": 0.2247, "step": 17519 }, { "epoch": 0.6012354152367879, "grad_norm": 0.7920473788061719, "learning_rate": 3.622317760767536e-06, "loss": 0.2812, "step": 17520 }, { "epoch": 0.6012697323266987, "grad_norm": 0.73923313230525, "learning_rate": 3.6217835425730373e-06, "loss": 0.3013, "step": 17521 }, { "epoch": 0.6013040494166094, "grad_norm": 0.7908502028216623, "learning_rate": 3.621249341404795e-06, "loss": 0.293, "step": 17522 }, { "epoch": 0.6013383665065203, "grad_norm": 0.7926215241891087, "learning_rate": 3.62071515726941e-06, "loss": 0.2616, "step": 17523 }, { "epoch": 0.601372683596431, "grad_norm": 0.8118952862764757, "learning_rate": 3.620180990173483e-06, "loss": 0.2879, "step": 17524 }, { "epoch": 0.6014070006863418, "grad_norm": 0.782295117661865, "learning_rate": 3.6196468401236085e-06, "loss": 0.2847, "step": 17525 }, { "epoch": 0.6014413177762525, "grad_norm": 0.7583121157547767, "learning_rate": 3.61911270712639e-06, "loss": 0.2622, "step": 17526 }, { "epoch": 0.6014756348661634, "grad_norm": 0.7884851198745293, "learning_rate": 3.618578591188424e-06, "loss": 0.3294, "step": 17527 }, { "epoch": 0.6015099519560742, "grad_norm": 0.7018860983664389, "learning_rate": 3.6180444923163066e-06, "loss": 0.2317, "step": 17528 }, { "epoch": 0.6015442690459849, "grad_norm": 0.8136817070930481, "learning_rate": 3.617510410516641e-06, "loss": 0.2667, "step": 17529 }, { "epoch": 0.6015785861358957, "grad_norm": 0.7129087200182019, "learning_rate": 3.6169763457960207e-06, "loss": 0.2849, "step": 17530 }, { "epoch": 0.6016129032258064, "grad_norm": 0.8406744455891156, "learning_rate": 3.616442298161045e-06, "loss": 0.2798, "step": 17531 }, { "epoch": 0.6016472203157173, "grad_norm": 0.7785855427404483, "learning_rate": 3.6159082676183133e-06, "loss": 0.3095, "step": 17532 }, { "epoch": 0.601681537405628, "grad_norm": 0.9080681202405557, "learning_rate": 3.6153742541744207e-06, "loss": 0.325, "step": 17533 }, { "epoch": 0.6017158544955388, "grad_norm": 0.7900675816779275, "learning_rate": 3.614840257835963e-06, "loss": 0.2694, "step": 17534 }, { "epoch": 0.6017501715854495, "grad_norm": 0.7819511288080615, "learning_rate": 3.614306278609542e-06, "loss": 0.2718, "step": 17535 }, { "epoch": 0.6017844886753603, "grad_norm": 0.761348521028212, "learning_rate": 3.6137723165017497e-06, "loss": 0.2655, "step": 17536 }, { "epoch": 0.6018188057652711, "grad_norm": 0.7848102697730199, "learning_rate": 3.613238371519184e-06, "loss": 0.2409, "step": 17537 }, { "epoch": 0.6018531228551819, "grad_norm": 0.7458505433590956, "learning_rate": 3.612704443668442e-06, "loss": 0.288, "step": 17538 }, { "epoch": 0.6018874399450926, "grad_norm": 0.6915699164719881, "learning_rate": 3.6121705329561195e-06, "loss": 0.2267, "step": 17539 }, { "epoch": 0.6019217570350034, "grad_norm": 0.7734668804189501, "learning_rate": 3.6116366393888105e-06, "loss": 0.3025, "step": 17540 }, { "epoch": 0.6019560741249143, "grad_norm": 0.8376892431963168, "learning_rate": 3.6111027629731143e-06, "loss": 0.2888, "step": 17541 }, { "epoch": 0.601990391214825, "grad_norm": 0.7964301707593148, "learning_rate": 3.610568903715623e-06, "loss": 0.2836, "step": 17542 }, { "epoch": 0.6020247083047358, "grad_norm": 0.8276216893149345, "learning_rate": 3.6100350616229324e-06, "loss": 0.2555, "step": 17543 }, { "epoch": 0.6020590253946465, "grad_norm": 0.8153184397674921, "learning_rate": 3.6095012367016386e-06, "loss": 0.237, "step": 17544 }, { "epoch": 0.6020933424845573, "grad_norm": 0.7735512307858319, "learning_rate": 3.6089674289583366e-06, "loss": 0.3319, "step": 17545 }, { "epoch": 0.6021276595744681, "grad_norm": 0.7487179696216459, "learning_rate": 3.608433638399618e-06, "loss": 0.2932, "step": 17546 }, { "epoch": 0.6021619766643789, "grad_norm": 0.6395212064458499, "learning_rate": 3.607899865032082e-06, "loss": 0.2562, "step": 17547 }, { "epoch": 0.6021962937542896, "grad_norm": 0.7209426913619557, "learning_rate": 3.60736610886232e-06, "loss": 0.2111, "step": 17548 }, { "epoch": 0.6022306108442004, "grad_norm": 0.7025328245932306, "learning_rate": 3.6068323698969244e-06, "loss": 0.2536, "step": 17549 }, { "epoch": 0.6022649279341112, "grad_norm": 0.8086076048904576, "learning_rate": 3.6062986481424924e-06, "loss": 0.2633, "step": 17550 }, { "epoch": 0.602299245024022, "grad_norm": 0.7642540697185594, "learning_rate": 3.605764943605613e-06, "loss": 0.3105, "step": 17551 }, { "epoch": 0.6023335621139327, "grad_norm": 0.7918051095254295, "learning_rate": 3.605231256292885e-06, "loss": 0.2695, "step": 17552 }, { "epoch": 0.6023678792038435, "grad_norm": 0.6653733106651527, "learning_rate": 3.6046975862108967e-06, "loss": 0.2852, "step": 17553 }, { "epoch": 0.6024021962937542, "grad_norm": 0.7010375157700462, "learning_rate": 3.6041639333662427e-06, "loss": 0.2211, "step": 17554 }, { "epoch": 0.6024365133836651, "grad_norm": 0.6932449534283712, "learning_rate": 3.603630297765518e-06, "loss": 0.2649, "step": 17555 }, { "epoch": 0.6024708304735759, "grad_norm": 0.7322387683345823, "learning_rate": 3.603096679415313e-06, "loss": 0.2455, "step": 17556 }, { "epoch": 0.6025051475634866, "grad_norm": 0.8248761461972198, "learning_rate": 3.602563078322218e-06, "loss": 0.2476, "step": 17557 }, { "epoch": 0.6025394646533974, "grad_norm": 0.7551691218212809, "learning_rate": 3.602029494492828e-06, "loss": 0.2349, "step": 17558 }, { "epoch": 0.6025737817433081, "grad_norm": 0.7396366660456555, "learning_rate": 3.6014959279337334e-06, "loss": 0.2791, "step": 17559 }, { "epoch": 0.602608098833219, "grad_norm": 0.7876104119757984, "learning_rate": 3.600962378651526e-06, "loss": 0.2663, "step": 17560 }, { "epoch": 0.6026424159231297, "grad_norm": 0.8019098456348872, "learning_rate": 3.6004288466527974e-06, "loss": 0.2467, "step": 17561 }, { "epoch": 0.6026767330130405, "grad_norm": 0.7256086671914509, "learning_rate": 3.59989533194414e-06, "loss": 0.2959, "step": 17562 }, { "epoch": 0.6027110501029512, "grad_norm": 0.7343448862107367, "learning_rate": 3.599361834532141e-06, "loss": 0.2874, "step": 17563 }, { "epoch": 0.6027453671928621, "grad_norm": 0.7553720855861533, "learning_rate": 3.598828354423395e-06, "loss": 0.276, "step": 17564 }, { "epoch": 0.6027796842827728, "grad_norm": 0.8303089451774508, "learning_rate": 3.598294891624492e-06, "loss": 0.2532, "step": 17565 }, { "epoch": 0.6028140013726836, "grad_norm": 0.7018477585536472, "learning_rate": 3.5977614461420184e-06, "loss": 0.2447, "step": 17566 }, { "epoch": 0.6028483184625943, "grad_norm": 0.8312661049091611, "learning_rate": 3.597228017982569e-06, "loss": 0.2998, "step": 17567 }, { "epoch": 0.6028826355525051, "grad_norm": 0.8147347144086518, "learning_rate": 3.596694607152733e-06, "loss": 0.3012, "step": 17568 }, { "epoch": 0.602916952642416, "grad_norm": 0.8319016113528513, "learning_rate": 3.5961612136590973e-06, "loss": 0.2855, "step": 17569 }, { "epoch": 0.6029512697323267, "grad_norm": 0.7067934548289455, "learning_rate": 3.595627837508254e-06, "loss": 0.2288, "step": 17570 }, { "epoch": 0.6029855868222375, "grad_norm": 0.8303391961914497, "learning_rate": 3.5950944787067924e-06, "loss": 0.2663, "step": 17571 }, { "epoch": 0.6030199039121482, "grad_norm": 0.6732713431643136, "learning_rate": 3.594561137261299e-06, "loss": 0.2719, "step": 17572 }, { "epoch": 0.6030542210020591, "grad_norm": 0.7916306827068388, "learning_rate": 3.5940278131783646e-06, "loss": 0.2723, "step": 17573 }, { "epoch": 0.6030885380919698, "grad_norm": 0.9293970250024468, "learning_rate": 3.593494506464578e-06, "loss": 0.2964, "step": 17574 }, { "epoch": 0.6031228551818806, "grad_norm": 0.8000628684070321, "learning_rate": 3.592961217126525e-06, "loss": 0.3061, "step": 17575 }, { "epoch": 0.6031571722717913, "grad_norm": 0.7653342121545749, "learning_rate": 3.5924279451707976e-06, "loss": 0.2951, "step": 17576 }, { "epoch": 0.6031914893617021, "grad_norm": 0.8194378812250608, "learning_rate": 3.591894690603982e-06, "loss": 0.267, "step": 17577 }, { "epoch": 0.603225806451613, "grad_norm": 0.7718247212654666, "learning_rate": 3.5913614534326634e-06, "loss": 0.2953, "step": 17578 }, { "epoch": 0.6032601235415237, "grad_norm": 0.7956165159704693, "learning_rate": 3.590828233663435e-06, "loss": 0.2896, "step": 17579 }, { "epoch": 0.6032944406314344, "grad_norm": 0.8072519249436133, "learning_rate": 3.590295031302879e-06, "loss": 0.3049, "step": 17580 }, { "epoch": 0.6033287577213452, "grad_norm": 0.845428938643032, "learning_rate": 3.5897618463575845e-06, "loss": 0.2507, "step": 17581 }, { "epoch": 0.603363074811256, "grad_norm": 0.7171087439731523, "learning_rate": 3.589228678834138e-06, "loss": 0.2811, "step": 17582 }, { "epoch": 0.6033973919011668, "grad_norm": 0.7415306683978047, "learning_rate": 3.5886955287391255e-06, "loss": 0.2431, "step": 17583 }, { "epoch": 0.6034317089910776, "grad_norm": 0.782913014593457, "learning_rate": 3.5881623960791364e-06, "loss": 0.3178, "step": 17584 }, { "epoch": 0.6034660260809883, "grad_norm": 0.7523969324531807, "learning_rate": 3.587629280860755e-06, "loss": 0.279, "step": 17585 }, { "epoch": 0.6035003431708991, "grad_norm": 0.6803184719717641, "learning_rate": 3.587096183090565e-06, "loss": 0.2771, "step": 17586 }, { "epoch": 0.6035346602608099, "grad_norm": 1.0220605076690423, "learning_rate": 3.5865631027751557e-06, "loss": 0.2915, "step": 17587 }, { "epoch": 0.6035689773507207, "grad_norm": 0.7019767628267546, "learning_rate": 3.586030039921112e-06, "loss": 0.276, "step": 17588 }, { "epoch": 0.6036032944406314, "grad_norm": 0.9847910107948196, "learning_rate": 3.585496994535017e-06, "loss": 0.2749, "step": 17589 }, { "epoch": 0.6036376115305422, "grad_norm": 0.7150410480849958, "learning_rate": 3.584963966623458e-06, "loss": 0.2762, "step": 17590 }, { "epoch": 0.6036719286204529, "grad_norm": 0.7656668717004979, "learning_rate": 3.58443095619302e-06, "loss": 0.2273, "step": 17591 }, { "epoch": 0.6037062457103638, "grad_norm": 0.7347482537752672, "learning_rate": 3.5838979632502856e-06, "loss": 0.2693, "step": 17592 }, { "epoch": 0.6037405628002745, "grad_norm": 0.7782319295257462, "learning_rate": 3.5833649878018433e-06, "loss": 0.2571, "step": 17593 }, { "epoch": 0.6037748798901853, "grad_norm": 0.8786800661181644, "learning_rate": 3.582832029854275e-06, "loss": 0.327, "step": 17594 }, { "epoch": 0.603809196980096, "grad_norm": 0.7587327741646616, "learning_rate": 3.582299089414162e-06, "loss": 0.275, "step": 17595 }, { "epoch": 0.6038435140700069, "grad_norm": 0.7212896932632545, "learning_rate": 3.581766166488093e-06, "loss": 0.2679, "step": 17596 }, { "epoch": 0.6038778311599177, "grad_norm": 0.771463731270892, "learning_rate": 3.5812332610826484e-06, "loss": 0.2704, "step": 17597 }, { "epoch": 0.6039121482498284, "grad_norm": 0.7750570950201504, "learning_rate": 3.580700373204412e-06, "loss": 0.3095, "step": 17598 }, { "epoch": 0.6039464653397392, "grad_norm": 0.8601670228545498, "learning_rate": 3.58016750285997e-06, "loss": 0.2723, "step": 17599 }, { "epoch": 0.6039807824296499, "grad_norm": 0.8330791263241939, "learning_rate": 3.579634650055903e-06, "loss": 0.3018, "step": 17600 }, { "epoch": 0.6040150995195608, "grad_norm": 0.7405585592338204, "learning_rate": 3.5791018147987916e-06, "loss": 0.2406, "step": 17601 }, { "epoch": 0.6040494166094715, "grad_norm": 0.7437257800852548, "learning_rate": 3.578568997095223e-06, "loss": 0.2476, "step": 17602 }, { "epoch": 0.6040837336993823, "grad_norm": 0.7416355494255213, "learning_rate": 3.578036196951776e-06, "loss": 0.2688, "step": 17603 }, { "epoch": 0.604118050789293, "grad_norm": 0.8188176338832679, "learning_rate": 3.5775034143750344e-06, "loss": 0.2844, "step": 17604 }, { "epoch": 0.6041523678792038, "grad_norm": 0.7986829836073623, "learning_rate": 3.576970649371579e-06, "loss": 0.3125, "step": 17605 }, { "epoch": 0.6041866849691147, "grad_norm": 0.7608598553009038, "learning_rate": 3.5764379019479933e-06, "loss": 0.2915, "step": 17606 }, { "epoch": 0.6042210020590254, "grad_norm": 0.7738187077080447, "learning_rate": 3.5759051721108555e-06, "loss": 0.2672, "step": 17607 }, { "epoch": 0.6042553191489362, "grad_norm": 0.7934549938683039, "learning_rate": 3.5753724598667517e-06, "loss": 0.2781, "step": 17608 }, { "epoch": 0.6042896362388469, "grad_norm": 0.6882934613592017, "learning_rate": 3.5748397652222595e-06, "loss": 0.2499, "step": 17609 }, { "epoch": 0.6043239533287578, "grad_norm": 0.7958932778488484, "learning_rate": 3.5743070881839583e-06, "loss": 0.2339, "step": 17610 }, { "epoch": 0.6043582704186685, "grad_norm": 0.8042968725713927, "learning_rate": 3.573774428758432e-06, "loss": 0.2791, "step": 17611 }, { "epoch": 0.6043925875085793, "grad_norm": 0.842263899835762, "learning_rate": 3.5732417869522612e-06, "loss": 0.2858, "step": 17612 }, { "epoch": 0.60442690459849, "grad_norm": 0.774992783683004, "learning_rate": 3.5727091627720224e-06, "loss": 0.2667, "step": 17613 }, { "epoch": 0.6044612216884008, "grad_norm": 0.8068300269338553, "learning_rate": 3.5721765562242994e-06, "loss": 0.2781, "step": 17614 }, { "epoch": 0.6044955387783116, "grad_norm": 0.8367316243254792, "learning_rate": 3.571643967315669e-06, "loss": 0.2685, "step": 17615 }, { "epoch": 0.6045298558682224, "grad_norm": 0.7536347971092426, "learning_rate": 3.5711113960527145e-06, "loss": 0.2628, "step": 17616 }, { "epoch": 0.6045641729581331, "grad_norm": 0.8244691239836291, "learning_rate": 3.5705788424420117e-06, "loss": 0.3354, "step": 17617 }, { "epoch": 0.6045984900480439, "grad_norm": 0.7070614583027636, "learning_rate": 3.570046306490139e-06, "loss": 0.2727, "step": 17618 }, { "epoch": 0.6046328071379548, "grad_norm": 0.9431921669739196, "learning_rate": 3.5695137882036783e-06, "loss": 0.2951, "step": 17619 }, { "epoch": 0.6046671242278655, "grad_norm": 0.7890897044781338, "learning_rate": 3.5689812875892084e-06, "loss": 0.3198, "step": 17620 }, { "epoch": 0.6047014413177763, "grad_norm": 0.775395654981782, "learning_rate": 3.568448804653304e-06, "loss": 0.3391, "step": 17621 }, { "epoch": 0.604735758407687, "grad_norm": 0.7329020153787814, "learning_rate": 3.567916339402547e-06, "loss": 0.2912, "step": 17622 }, { "epoch": 0.6047700754975978, "grad_norm": 0.9047012978884666, "learning_rate": 3.5673838918435144e-06, "loss": 0.242, "step": 17623 }, { "epoch": 0.6048043925875086, "grad_norm": 0.778834670525869, "learning_rate": 3.5668514619827815e-06, "loss": 0.2874, "step": 17624 }, { "epoch": 0.6048387096774194, "grad_norm": 0.786156360640727, "learning_rate": 3.5663190498269295e-06, "loss": 0.2889, "step": 17625 }, { "epoch": 0.6048730267673301, "grad_norm": 0.7765407940782694, "learning_rate": 3.565786655382534e-06, "loss": 0.2917, "step": 17626 }, { "epoch": 0.6049073438572409, "grad_norm": 0.7406757728549241, "learning_rate": 3.565254278656172e-06, "loss": 0.2669, "step": 17627 }, { "epoch": 0.6049416609471516, "grad_norm": 0.7672816124313444, "learning_rate": 3.56472191965442e-06, "loss": 0.2625, "step": 17628 }, { "epoch": 0.6049759780370625, "grad_norm": 0.6423609257149381, "learning_rate": 3.564189578383857e-06, "loss": 0.2405, "step": 17629 }, { "epoch": 0.6050102951269732, "grad_norm": 0.7625053090645127, "learning_rate": 3.563657254851055e-06, "loss": 0.2689, "step": 17630 }, { "epoch": 0.605044612216884, "grad_norm": 0.8196193498558157, "learning_rate": 3.5631249490625957e-06, "loss": 0.3292, "step": 17631 }, { "epoch": 0.6050789293067947, "grad_norm": 0.8605444868597288, "learning_rate": 3.5625926610250526e-06, "loss": 0.322, "step": 17632 }, { "epoch": 0.6051132463967056, "grad_norm": 0.8041331409671876, "learning_rate": 3.562060390744999e-06, "loss": 0.3055, "step": 17633 }, { "epoch": 0.6051475634866164, "grad_norm": 0.7411940364587384, "learning_rate": 3.5615281382290135e-06, "loss": 0.3146, "step": 17634 }, { "epoch": 0.6051818805765271, "grad_norm": 0.800612310164031, "learning_rate": 3.560995903483672e-06, "loss": 0.2282, "step": 17635 }, { "epoch": 0.6052161976664379, "grad_norm": 0.7961615874693593, "learning_rate": 3.560463686515546e-06, "loss": 0.2614, "step": 17636 }, { "epoch": 0.6052505147563486, "grad_norm": 0.7451417449223151, "learning_rate": 3.5599314873312157e-06, "loss": 0.2751, "step": 17637 }, { "epoch": 0.6052848318462595, "grad_norm": 0.8008354873165168, "learning_rate": 3.559399305937252e-06, "loss": 0.3181, "step": 17638 }, { "epoch": 0.6053191489361702, "grad_norm": 0.6684331610296179, "learning_rate": 3.558867142340229e-06, "loss": 0.2895, "step": 17639 }, { "epoch": 0.605353466026081, "grad_norm": 0.700138176536868, "learning_rate": 3.558334996546724e-06, "loss": 0.3905, "step": 17640 }, { "epoch": 0.6053877831159917, "grad_norm": 0.7586639124078735, "learning_rate": 3.557802868563308e-06, "loss": 0.3097, "step": 17641 }, { "epoch": 0.6054221002059026, "grad_norm": 0.73373735323266, "learning_rate": 3.557270758396556e-06, "loss": 0.3052, "step": 17642 }, { "epoch": 0.6054564172958133, "grad_norm": 0.8495894799703706, "learning_rate": 3.556738666053043e-06, "loss": 0.2902, "step": 17643 }, { "epoch": 0.6054907343857241, "grad_norm": 0.805294082207361, "learning_rate": 3.556206591539339e-06, "loss": 0.3074, "step": 17644 }, { "epoch": 0.6055250514756348, "grad_norm": 0.8191299674633252, "learning_rate": 3.5556745348620215e-06, "loss": 0.2881, "step": 17645 }, { "epoch": 0.6055593685655456, "grad_norm": 0.6512420913434744, "learning_rate": 3.5551424960276616e-06, "loss": 0.2467, "step": 17646 }, { "epoch": 0.6055936856554565, "grad_norm": 0.732664550936907, "learning_rate": 3.554610475042829e-06, "loss": 0.2444, "step": 17647 }, { "epoch": 0.6056280027453672, "grad_norm": 0.7156103621803469, "learning_rate": 3.554078471914101e-06, "loss": 0.2513, "step": 17648 }, { "epoch": 0.605662319835278, "grad_norm": 0.7454663966630005, "learning_rate": 3.553546486648047e-06, "loss": 0.2404, "step": 17649 }, { "epoch": 0.6056966369251887, "grad_norm": 0.8662399309595534, "learning_rate": 3.5530145192512375e-06, "loss": 0.3185, "step": 17650 }, { "epoch": 0.6057309540150995, "grad_norm": 0.7288715582960542, "learning_rate": 3.55248256973025e-06, "loss": 0.2736, "step": 17651 }, { "epoch": 0.6057652711050103, "grad_norm": 0.7104714200012514, "learning_rate": 3.5519506380916523e-06, "loss": 0.258, "step": 17652 }, { "epoch": 0.6057995881949211, "grad_norm": 0.7164479332734989, "learning_rate": 3.5514187243420147e-06, "loss": 0.2622, "step": 17653 }, { "epoch": 0.6058339052848318, "grad_norm": 0.7608718490795726, "learning_rate": 3.550886828487911e-06, "loss": 0.2812, "step": 17654 }, { "epoch": 0.6058682223747426, "grad_norm": 0.8361004957442365, "learning_rate": 3.5503549505359103e-06, "loss": 0.2512, "step": 17655 }, { "epoch": 0.6059025394646534, "grad_norm": 0.816321782122327, "learning_rate": 3.5498230904925844e-06, "loss": 0.2868, "step": 17656 }, { "epoch": 0.6059368565545642, "grad_norm": 0.6992628566339513, "learning_rate": 3.549291248364503e-06, "loss": 0.2253, "step": 17657 }, { "epoch": 0.605971173644475, "grad_norm": 0.7371981736986689, "learning_rate": 3.5487594241582385e-06, "loss": 0.319, "step": 17658 }, { "epoch": 0.6060054907343857, "grad_norm": 0.8866859404583907, "learning_rate": 3.5482276178803564e-06, "loss": 0.2647, "step": 17659 }, { "epoch": 0.6060398078242965, "grad_norm": 0.7668332036652658, "learning_rate": 3.5476958295374326e-06, "loss": 0.3103, "step": 17660 }, { "epoch": 0.6060741249142073, "grad_norm": 0.7612202715075549, "learning_rate": 3.547164059136033e-06, "loss": 0.3182, "step": 17661 }, { "epoch": 0.6061084420041181, "grad_norm": 0.7309011151114487, "learning_rate": 3.5466323066827256e-06, "loss": 0.2447, "step": 17662 }, { "epoch": 0.6061427590940288, "grad_norm": 0.7763430351742042, "learning_rate": 3.5461005721840837e-06, "loss": 0.2269, "step": 17663 }, { "epoch": 0.6061770761839396, "grad_norm": 0.7920673076634258, "learning_rate": 3.5455688556466726e-06, "loss": 0.2371, "step": 17664 }, { "epoch": 0.6062113932738504, "grad_norm": 0.7084522218027757, "learning_rate": 3.5450371570770626e-06, "loss": 0.2374, "step": 17665 }, { "epoch": 0.6062457103637612, "grad_norm": 0.7146351547287488, "learning_rate": 3.5445054764818233e-06, "loss": 0.256, "step": 17666 }, { "epoch": 0.6062800274536719, "grad_norm": 0.8357722709259165, "learning_rate": 3.5439738138675223e-06, "loss": 0.2898, "step": 17667 }, { "epoch": 0.6063143445435827, "grad_norm": 0.8107320546281175, "learning_rate": 3.543442169240725e-06, "loss": 0.2826, "step": 17668 }, { "epoch": 0.6063486616334934, "grad_norm": 0.7027025227602577, "learning_rate": 3.542910542608004e-06, "loss": 0.2508, "step": 17669 }, { "epoch": 0.6063829787234043, "grad_norm": 0.8643477512218969, "learning_rate": 3.542378933975923e-06, "loss": 0.278, "step": 17670 }, { "epoch": 0.606417295813315, "grad_norm": 0.6821812719169404, "learning_rate": 3.5418473433510516e-06, "loss": 0.2664, "step": 17671 }, { "epoch": 0.6064516129032258, "grad_norm": 0.8479844820170539, "learning_rate": 3.5413157707399554e-06, "loss": 0.2993, "step": 17672 }, { "epoch": 0.6064859299931366, "grad_norm": 0.7022965190111681, "learning_rate": 3.5407842161492035e-06, "loss": 0.2873, "step": 17673 }, { "epoch": 0.6065202470830473, "grad_norm": 0.812354906175218, "learning_rate": 3.540252679585359e-06, "loss": 0.2931, "step": 17674 }, { "epoch": 0.6065545641729582, "grad_norm": 0.7149716132892447, "learning_rate": 3.539721161054993e-06, "loss": 0.2642, "step": 17675 }, { "epoch": 0.6065888812628689, "grad_norm": 0.8401038403721593, "learning_rate": 3.539189660564668e-06, "loss": 0.2544, "step": 17676 }, { "epoch": 0.6066231983527797, "grad_norm": 0.7500523938533294, "learning_rate": 3.5386581781209527e-06, "loss": 0.2843, "step": 17677 }, { "epoch": 0.6066575154426904, "grad_norm": 0.8363284449561191, "learning_rate": 3.5381267137304116e-06, "loss": 0.2641, "step": 17678 }, { "epoch": 0.6066918325326013, "grad_norm": 0.7574328215451721, "learning_rate": 3.5375952673996108e-06, "loss": 0.2761, "step": 17679 }, { "epoch": 0.606726149622512, "grad_norm": 0.8022094396667453, "learning_rate": 3.5370638391351142e-06, "loss": 0.2952, "step": 17680 }, { "epoch": 0.6067604667124228, "grad_norm": 0.7615331514846513, "learning_rate": 3.53653242894349e-06, "loss": 0.2515, "step": 17681 }, { "epoch": 0.6067947838023335, "grad_norm": 0.7826378709068899, "learning_rate": 3.536001036831299e-06, "loss": 0.314, "step": 17682 }, { "epoch": 0.6068291008922443, "grad_norm": 0.7944896889419814, "learning_rate": 3.535469662805111e-06, "loss": 0.2668, "step": 17683 }, { "epoch": 0.6068634179821552, "grad_norm": 0.8582708113351285, "learning_rate": 3.534938306871488e-06, "loss": 0.2798, "step": 17684 }, { "epoch": 0.6068977350720659, "grad_norm": 0.7245929718175347, "learning_rate": 3.534406969036992e-06, "loss": 0.3216, "step": 17685 }, { "epoch": 0.6069320521619767, "grad_norm": 0.9070280987915907, "learning_rate": 3.5338756493081894e-06, "loss": 0.2808, "step": 17686 }, { "epoch": 0.6069663692518874, "grad_norm": 0.813722386886856, "learning_rate": 3.5333443476916453e-06, "loss": 0.2574, "step": 17687 }, { "epoch": 0.6070006863417983, "grad_norm": 0.8271516240788511, "learning_rate": 3.5328130641939195e-06, "loss": 0.2824, "step": 17688 }, { "epoch": 0.607035003431709, "grad_norm": 0.7691028635444136, "learning_rate": 3.5322817988215808e-06, "loss": 0.3188, "step": 17689 }, { "epoch": 0.6070693205216198, "grad_norm": 0.6965711850367442, "learning_rate": 3.5317505515811884e-06, "loss": 0.2319, "step": 17690 }, { "epoch": 0.6071036376115305, "grad_norm": 0.7721477753777671, "learning_rate": 3.531219322479304e-06, "loss": 0.2917, "step": 17691 }, { "epoch": 0.6071379547014413, "grad_norm": 0.7688975152160709, "learning_rate": 3.5306881115224945e-06, "loss": 0.2637, "step": 17692 }, { "epoch": 0.6071722717913521, "grad_norm": 0.7469473862579006, "learning_rate": 3.5301569187173203e-06, "loss": 0.2856, "step": 17693 }, { "epoch": 0.6072065888812629, "grad_norm": 0.7457837363431776, "learning_rate": 3.5296257440703413e-06, "loss": 0.2812, "step": 17694 }, { "epoch": 0.6072409059711736, "grad_norm": 0.7316539387308918, "learning_rate": 3.5290945875881244e-06, "loss": 0.2495, "step": 17695 }, { "epoch": 0.6072752230610844, "grad_norm": 0.7456769531494555, "learning_rate": 3.528563449277229e-06, "loss": 0.2736, "step": 17696 }, { "epoch": 0.6073095401509951, "grad_norm": 0.7720167513982863, "learning_rate": 3.528032329144215e-06, "loss": 0.3067, "step": 17697 }, { "epoch": 0.607343857240906, "grad_norm": 0.7667466002725851, "learning_rate": 3.5275012271956477e-06, "loss": 0.2944, "step": 17698 }, { "epoch": 0.6073781743308168, "grad_norm": 0.6822112496795982, "learning_rate": 3.5269701434380855e-06, "loss": 0.258, "step": 17699 }, { "epoch": 0.6074124914207275, "grad_norm": 0.7355628401944231, "learning_rate": 3.5264390778780877e-06, "loss": 0.2209, "step": 17700 }, { "epoch": 0.6074468085106383, "grad_norm": 0.7542397992874728, "learning_rate": 3.5259080305222184e-06, "loss": 0.2986, "step": 17701 }, { "epoch": 0.6074811256005491, "grad_norm": 0.8515256040026474, "learning_rate": 3.5253770013770373e-06, "loss": 0.2658, "step": 17702 }, { "epoch": 0.6075154426904599, "grad_norm": 0.8283614634941988, "learning_rate": 3.5248459904491022e-06, "loss": 0.3267, "step": 17703 }, { "epoch": 0.6075497597803706, "grad_norm": 0.7506390859608847, "learning_rate": 3.5243149977449777e-06, "loss": 0.272, "step": 17704 }, { "epoch": 0.6075840768702814, "grad_norm": 0.8184173982346078, "learning_rate": 3.5237840232712193e-06, "loss": 0.2823, "step": 17705 }, { "epoch": 0.6076183939601921, "grad_norm": 0.9459575574732934, "learning_rate": 3.5232530670343877e-06, "loss": 0.3421, "step": 17706 }, { "epoch": 0.607652711050103, "grad_norm": 0.7085551492447068, "learning_rate": 3.522722129041044e-06, "loss": 0.2578, "step": 17707 }, { "epoch": 0.6076870281400137, "grad_norm": 0.9353549243801743, "learning_rate": 3.522191209297745e-06, "loss": 0.3013, "step": 17708 }, { "epoch": 0.6077213452299245, "grad_norm": 0.6962574145829631, "learning_rate": 3.5216603078110508e-06, "loss": 0.3145, "step": 17709 }, { "epoch": 0.6077556623198352, "grad_norm": 0.8223261264198658, "learning_rate": 3.521129424587521e-06, "loss": 0.2526, "step": 17710 }, { "epoch": 0.6077899794097461, "grad_norm": 0.7531919045300423, "learning_rate": 3.5205985596337113e-06, "loss": 0.2534, "step": 17711 }, { "epoch": 0.6078242964996569, "grad_norm": 0.7160348357244108, "learning_rate": 3.5200677129561835e-06, "loss": 0.2708, "step": 17712 }, { "epoch": 0.6078586135895676, "grad_norm": 0.7131393919121275, "learning_rate": 3.519536884561493e-06, "loss": 0.2374, "step": 17713 }, { "epoch": 0.6078929306794784, "grad_norm": 0.7662834308248833, "learning_rate": 3.519006074456197e-06, "loss": 0.3805, "step": 17714 }, { "epoch": 0.6079272477693891, "grad_norm": 0.7660180213123471, "learning_rate": 3.518475282646856e-06, "loss": 0.2654, "step": 17715 }, { "epoch": 0.6079615648593, "grad_norm": 0.7925510868536588, "learning_rate": 3.5179445091400245e-06, "loss": 0.3095, "step": 17716 }, { "epoch": 0.6079958819492107, "grad_norm": 0.74092814821839, "learning_rate": 3.5174137539422604e-06, "loss": 0.2612, "step": 17717 }, { "epoch": 0.6080301990391215, "grad_norm": 0.6140342200016945, "learning_rate": 3.516883017060122e-06, "loss": 0.2065, "step": 17718 }, { "epoch": 0.6080645161290322, "grad_norm": 0.7856289878674847, "learning_rate": 3.516352298500165e-06, "loss": 0.2152, "step": 17719 }, { "epoch": 0.608098833218943, "grad_norm": 0.7389531359716033, "learning_rate": 3.5158215982689436e-06, "loss": 0.2906, "step": 17720 }, { "epoch": 0.6081331503088538, "grad_norm": 1.016792787102913, "learning_rate": 3.515290916373018e-06, "loss": 0.2953, "step": 17721 }, { "epoch": 0.6081674673987646, "grad_norm": 0.7631929907728339, "learning_rate": 3.514760252818941e-06, "loss": 0.2695, "step": 17722 }, { "epoch": 0.6082017844886753, "grad_norm": 0.7938269052039957, "learning_rate": 3.5142296076132697e-06, "loss": 0.3398, "step": 17723 }, { "epoch": 0.6082361015785861, "grad_norm": 0.8452663889524495, "learning_rate": 3.5136989807625592e-06, "loss": 0.3082, "step": 17724 }, { "epoch": 0.608270418668497, "grad_norm": 0.8186791012104141, "learning_rate": 3.513168372273366e-06, "loss": 0.3135, "step": 17725 }, { "epoch": 0.6083047357584077, "grad_norm": 0.8866291366769826, "learning_rate": 3.5126377821522424e-06, "loss": 0.3018, "step": 17726 }, { "epoch": 0.6083390528483185, "grad_norm": 0.7131830763295978, "learning_rate": 3.5121072104057465e-06, "loss": 0.26, "step": 17727 }, { "epoch": 0.6083733699382292, "grad_norm": 0.7936848753336008, "learning_rate": 3.5115766570404313e-06, "loss": 0.3018, "step": 17728 }, { "epoch": 0.60840768702814, "grad_norm": 0.8289141323068301, "learning_rate": 3.5110461220628487e-06, "loss": 0.2877, "step": 17729 }, { "epoch": 0.6084420041180508, "grad_norm": 0.8097024198993009, "learning_rate": 3.5105156054795573e-06, "loss": 0.3172, "step": 17730 }, { "epoch": 0.6084763212079616, "grad_norm": 0.7823575740093587, "learning_rate": 3.5099851072971097e-06, "loss": 0.2895, "step": 17731 }, { "epoch": 0.6085106382978723, "grad_norm": 0.75015566895182, "learning_rate": 3.5094546275220563e-06, "loss": 0.3428, "step": 17732 }, { "epoch": 0.6085449553877831, "grad_norm": 0.7851258545262482, "learning_rate": 3.5089241661609564e-06, "loss": 0.2629, "step": 17733 }, { "epoch": 0.6085792724776939, "grad_norm": 0.788121297950017, "learning_rate": 3.508393723220359e-06, "loss": 0.282, "step": 17734 }, { "epoch": 0.6086135895676047, "grad_norm": 0.7979021635954395, "learning_rate": 3.5078632987068163e-06, "loss": 0.2644, "step": 17735 }, { "epoch": 0.6086479066575154, "grad_norm": 0.7249672897741138, "learning_rate": 3.507332892626885e-06, "loss": 0.2713, "step": 17736 }, { "epoch": 0.6086822237474262, "grad_norm": 0.7776264524487354, "learning_rate": 3.5068025049871146e-06, "loss": 0.2894, "step": 17737 }, { "epoch": 0.608716540837337, "grad_norm": 0.7179331199532442, "learning_rate": 3.5062721357940583e-06, "loss": 0.2615, "step": 17738 }, { "epoch": 0.6087508579272478, "grad_norm": 0.7307169855668815, "learning_rate": 3.505741785054268e-06, "loss": 0.2672, "step": 17739 }, { "epoch": 0.6087851750171586, "grad_norm": 0.7436842562223216, "learning_rate": 3.5052114527742952e-06, "loss": 0.2917, "step": 17740 }, { "epoch": 0.6088194921070693, "grad_norm": 0.792726163450394, "learning_rate": 3.504681138960694e-06, "loss": 0.3053, "step": 17741 }, { "epoch": 0.6088538091969801, "grad_norm": 0.7860420593789321, "learning_rate": 3.5041508436200132e-06, "loss": 0.2813, "step": 17742 }, { "epoch": 0.6088881262868908, "grad_norm": 0.8321328717431378, "learning_rate": 3.503620566758804e-06, "loss": 0.2527, "step": 17743 }, { "epoch": 0.6089224433768017, "grad_norm": 0.8445002788118265, "learning_rate": 3.5030903083836195e-06, "loss": 0.2955, "step": 17744 }, { "epoch": 0.6089567604667124, "grad_norm": 0.766992320009161, "learning_rate": 3.5025600685010074e-06, "loss": 0.2857, "step": 17745 }, { "epoch": 0.6089910775566232, "grad_norm": 0.7799970101650926, "learning_rate": 3.502029847117521e-06, "loss": 0.2684, "step": 17746 }, { "epoch": 0.6090253946465339, "grad_norm": 0.8283192554590918, "learning_rate": 3.501499644239709e-06, "loss": 0.2853, "step": 17747 }, { "epoch": 0.6090597117364448, "grad_norm": 0.7874545441814121, "learning_rate": 3.500969459874122e-06, "loss": 0.2884, "step": 17748 }, { "epoch": 0.6090940288263555, "grad_norm": 0.806516243579333, "learning_rate": 3.5004392940273083e-06, "loss": 0.297, "step": 17749 }, { "epoch": 0.6091283459162663, "grad_norm": 0.7650910738679466, "learning_rate": 3.499909146705821e-06, "loss": 0.2847, "step": 17750 }, { "epoch": 0.609162663006177, "grad_norm": 0.7287674619227232, "learning_rate": 3.4993790179162067e-06, "loss": 0.2509, "step": 17751 }, { "epoch": 0.6091969800960878, "grad_norm": 0.7568315729973072, "learning_rate": 3.4988489076650135e-06, "loss": 0.3293, "step": 17752 }, { "epoch": 0.6092312971859987, "grad_norm": 0.7891369017693135, "learning_rate": 3.4983188159587932e-06, "loss": 0.2411, "step": 17753 }, { "epoch": 0.6092656142759094, "grad_norm": 0.736937034002983, "learning_rate": 3.4977887428040937e-06, "loss": 0.2342, "step": 17754 }, { "epoch": 0.6092999313658202, "grad_norm": 0.6961976954096093, "learning_rate": 3.4972586882074604e-06, "loss": 0.2925, "step": 17755 }, { "epoch": 0.6093342484557309, "grad_norm": 0.8860198971048205, "learning_rate": 3.496728652175447e-06, "loss": 0.2535, "step": 17756 }, { "epoch": 0.6093685655456418, "grad_norm": 0.7819371361068442, "learning_rate": 3.4961986347145977e-06, "loss": 0.2545, "step": 17757 }, { "epoch": 0.6094028826355525, "grad_norm": 0.9446572768804349, "learning_rate": 3.495668635831459e-06, "loss": 0.3222, "step": 17758 }, { "epoch": 0.6094371997254633, "grad_norm": 0.6740723496782146, "learning_rate": 3.495138655532583e-06, "loss": 0.2612, "step": 17759 }, { "epoch": 0.609471516815374, "grad_norm": 0.778936076138349, "learning_rate": 3.494608693824513e-06, "loss": 0.2623, "step": 17760 }, { "epoch": 0.6095058339052848, "grad_norm": 0.7899038175244233, "learning_rate": 3.4940787507137963e-06, "loss": 0.2831, "step": 17761 }, { "epoch": 0.6095401509951957, "grad_norm": 0.7344833082134049, "learning_rate": 3.4935488262069826e-06, "loss": 0.2417, "step": 17762 }, { "epoch": 0.6095744680851064, "grad_norm": 0.7512723753671143, "learning_rate": 3.493018920310618e-06, "loss": 0.257, "step": 17763 }, { "epoch": 0.6096087851750172, "grad_norm": 0.8096523822417752, "learning_rate": 3.4924890330312444e-06, "loss": 0.3287, "step": 17764 }, { "epoch": 0.6096431022649279, "grad_norm": 0.8792737208270328, "learning_rate": 3.491959164375414e-06, "loss": 0.2653, "step": 17765 }, { "epoch": 0.6096774193548387, "grad_norm": 0.8459585820024221, "learning_rate": 3.491429314349669e-06, "loss": 0.2837, "step": 17766 }, { "epoch": 0.6097117364447495, "grad_norm": 0.7608817882416681, "learning_rate": 3.4908994829605556e-06, "loss": 0.2883, "step": 17767 }, { "epoch": 0.6097460535346603, "grad_norm": 0.7048089479609903, "learning_rate": 3.4903696702146195e-06, "loss": 0.225, "step": 17768 }, { "epoch": 0.609780370624571, "grad_norm": 0.7680273449856994, "learning_rate": 3.489839876118408e-06, "loss": 0.2987, "step": 17769 }, { "epoch": 0.6098146877144818, "grad_norm": 0.7562765981796891, "learning_rate": 3.4893101006784615e-06, "loss": 0.2574, "step": 17770 }, { "epoch": 0.6098490048043926, "grad_norm": 0.8052686150272182, "learning_rate": 3.488780343901329e-06, "loss": 0.2477, "step": 17771 }, { "epoch": 0.6098833218943034, "grad_norm": 0.8181619006693016, "learning_rate": 3.4882506057935517e-06, "loss": 0.2669, "step": 17772 }, { "epoch": 0.6099176389842141, "grad_norm": 0.7435826115836165, "learning_rate": 3.487720886361678e-06, "loss": 0.2708, "step": 17773 }, { "epoch": 0.6099519560741249, "grad_norm": 0.9125539629614212, "learning_rate": 3.4871911856122498e-06, "loss": 0.2661, "step": 17774 }, { "epoch": 0.6099862731640356, "grad_norm": 0.84613332764621, "learning_rate": 3.4866615035518083e-06, "loss": 0.2734, "step": 17775 }, { "epoch": 0.6100205902539465, "grad_norm": 0.7028809914338264, "learning_rate": 3.4861318401869006e-06, "loss": 0.2551, "step": 17776 }, { "epoch": 0.6100549073438573, "grad_norm": 0.7648043848624382, "learning_rate": 3.485602195524069e-06, "loss": 0.2828, "step": 17777 }, { "epoch": 0.610089224433768, "grad_norm": 0.8239173909847821, "learning_rate": 3.485072569569856e-06, "loss": 0.2431, "step": 17778 }, { "epoch": 0.6101235415236788, "grad_norm": 0.6931946647682994, "learning_rate": 3.484542962330807e-06, "loss": 0.2748, "step": 17779 }, { "epoch": 0.6101578586135896, "grad_norm": 0.7778106508444664, "learning_rate": 3.484013373813463e-06, "loss": 0.3147, "step": 17780 }, { "epoch": 0.6101921757035004, "grad_norm": 0.7909534436750704, "learning_rate": 3.4834838040243635e-06, "loss": 0.3139, "step": 17781 }, { "epoch": 0.6102264927934111, "grad_norm": 0.9259254244124533, "learning_rate": 3.482954252970056e-06, "loss": 0.3192, "step": 17782 }, { "epoch": 0.6102608098833219, "grad_norm": 0.8505164846131705, "learning_rate": 3.4824247206570792e-06, "loss": 0.273, "step": 17783 }, { "epoch": 0.6102951269732326, "grad_norm": 0.7397429180472336, "learning_rate": 3.4818952070919744e-06, "loss": 0.2912, "step": 17784 }, { "epoch": 0.6103294440631435, "grad_norm": 0.893064884334142, "learning_rate": 3.481365712281286e-06, "loss": 0.3271, "step": 17785 }, { "epoch": 0.6103637611530542, "grad_norm": 0.7875823150155591, "learning_rate": 3.480836236231554e-06, "loss": 0.277, "step": 17786 }, { "epoch": 0.610398078242965, "grad_norm": 0.7835112719877799, "learning_rate": 3.4803067789493174e-06, "loss": 0.256, "step": 17787 }, { "epoch": 0.6104323953328757, "grad_norm": 0.7369418439087057, "learning_rate": 3.4797773404411204e-06, "loss": 0.306, "step": 17788 }, { "epoch": 0.6104667124227865, "grad_norm": 0.8787157897600946, "learning_rate": 3.479247920713501e-06, "loss": 0.259, "step": 17789 }, { "epoch": 0.6105010295126974, "grad_norm": 0.8083345733732106, "learning_rate": 3.478718519773001e-06, "loss": 0.3308, "step": 17790 }, { "epoch": 0.6105353466026081, "grad_norm": 0.8015011404474518, "learning_rate": 3.4781891376261597e-06, "loss": 0.2549, "step": 17791 }, { "epoch": 0.6105696636925189, "grad_norm": 0.6988297438623244, "learning_rate": 3.477659774279518e-06, "loss": 0.2815, "step": 17792 }, { "epoch": 0.6106039807824296, "grad_norm": 0.7525352319203844, "learning_rate": 3.4771304297396134e-06, "loss": 0.2672, "step": 17793 }, { "epoch": 0.6106382978723405, "grad_norm": 0.7083208810491426, "learning_rate": 3.476601104012989e-06, "loss": 0.2428, "step": 17794 }, { "epoch": 0.6106726149622512, "grad_norm": 0.7241434391423202, "learning_rate": 3.4760717971061812e-06, "loss": 0.3037, "step": 17795 }, { "epoch": 0.610706932052162, "grad_norm": 0.7029249526418914, "learning_rate": 3.4755425090257277e-06, "loss": 0.2348, "step": 17796 }, { "epoch": 0.6107412491420727, "grad_norm": 0.8520624112546759, "learning_rate": 3.4750132397781705e-06, "loss": 0.2935, "step": 17797 }, { "epoch": 0.6107755662319835, "grad_norm": 0.748497110661671, "learning_rate": 3.4744839893700477e-06, "loss": 0.2633, "step": 17798 }, { "epoch": 0.6108098833218943, "grad_norm": 0.9271481127801736, "learning_rate": 3.4739547578078947e-06, "loss": 0.3604, "step": 17799 }, { "epoch": 0.6108442004118051, "grad_norm": 0.7482370359559835, "learning_rate": 3.4734255450982536e-06, "loss": 0.3031, "step": 17800 }, { "epoch": 0.6108785175017158, "grad_norm": 0.86464003241761, "learning_rate": 3.472896351247658e-06, "loss": 0.2554, "step": 17801 }, { "epoch": 0.6109128345916266, "grad_norm": 0.804549218888384, "learning_rate": 3.4723671762626488e-06, "loss": 0.2902, "step": 17802 }, { "epoch": 0.6109471516815375, "grad_norm": 0.8089051977076442, "learning_rate": 3.4718380201497624e-06, "loss": 0.2446, "step": 17803 }, { "epoch": 0.6109814687714482, "grad_norm": 0.7649089235211638, "learning_rate": 3.4713088829155337e-06, "loss": 0.2485, "step": 17804 }, { "epoch": 0.611015785861359, "grad_norm": 0.7682909382310972, "learning_rate": 3.470779764566503e-06, "loss": 0.2406, "step": 17805 }, { "epoch": 0.6110501029512697, "grad_norm": 0.7117591969660392, "learning_rate": 3.470250665109206e-06, "loss": 0.266, "step": 17806 }, { "epoch": 0.6110844200411805, "grad_norm": 0.7892062751910989, "learning_rate": 3.469721584550177e-06, "loss": 0.2726, "step": 17807 }, { "epoch": 0.6111187371310913, "grad_norm": 0.6982038003123436, "learning_rate": 3.4691925228959554e-06, "loss": 0.2445, "step": 17808 }, { "epoch": 0.6111530542210021, "grad_norm": 0.7125958925170807, "learning_rate": 3.468663480153075e-06, "loss": 0.2575, "step": 17809 }, { "epoch": 0.6111873713109128, "grad_norm": 0.7543779535549541, "learning_rate": 3.4681344563280706e-06, "loss": 0.2648, "step": 17810 }, { "epoch": 0.6112216884008236, "grad_norm": 0.7357352871259097, "learning_rate": 3.46760545142748e-06, "loss": 0.2801, "step": 17811 }, { "epoch": 0.6112560054907343, "grad_norm": 0.7624534825360807, "learning_rate": 3.467076465457837e-06, "loss": 0.2715, "step": 17812 }, { "epoch": 0.6112903225806452, "grad_norm": 0.7968613236446084, "learning_rate": 3.4665474984256776e-06, "loss": 0.3364, "step": 17813 }, { "epoch": 0.611324639670556, "grad_norm": 0.7430462327251955, "learning_rate": 3.4660185503375355e-06, "loss": 0.3065, "step": 17814 }, { "epoch": 0.6113589567604667, "grad_norm": 0.8195734818521349, "learning_rate": 3.465489621199947e-06, "loss": 0.2793, "step": 17815 }, { "epoch": 0.6113932738503774, "grad_norm": 0.8002333685146615, "learning_rate": 3.464960711019443e-06, "loss": 0.2738, "step": 17816 }, { "epoch": 0.6114275909402883, "grad_norm": 0.7795900615156012, "learning_rate": 3.4644318198025617e-06, "loss": 0.2778, "step": 17817 }, { "epoch": 0.6114619080301991, "grad_norm": 0.8030015234299656, "learning_rate": 3.463902947555835e-06, "loss": 0.2529, "step": 17818 }, { "epoch": 0.6114962251201098, "grad_norm": 0.8099056925077017, "learning_rate": 3.463374094285794e-06, "loss": 0.2264, "step": 17819 }, { "epoch": 0.6115305422100206, "grad_norm": 0.8906332735464375, "learning_rate": 3.462845259998976e-06, "loss": 0.2687, "step": 17820 }, { "epoch": 0.6115648592999313, "grad_norm": 0.8177980812385363, "learning_rate": 3.462316444701914e-06, "loss": 0.2763, "step": 17821 }, { "epoch": 0.6115991763898422, "grad_norm": 0.8944828409120538, "learning_rate": 3.461787648401137e-06, "loss": 0.2767, "step": 17822 }, { "epoch": 0.6116334934797529, "grad_norm": 0.8543526266639049, "learning_rate": 3.4612588711031824e-06, "loss": 0.2791, "step": 17823 }, { "epoch": 0.6116678105696637, "grad_norm": 0.6594101392146071, "learning_rate": 3.460730112814581e-06, "loss": 0.2577, "step": 17824 }, { "epoch": 0.6117021276595744, "grad_norm": 0.6956173782149464, "learning_rate": 3.4602013735418615e-06, "loss": 0.282, "step": 17825 }, { "epoch": 0.6117364447494853, "grad_norm": 0.9028133293453946, "learning_rate": 3.4596726532915613e-06, "loss": 0.2578, "step": 17826 }, { "epoch": 0.611770761839396, "grad_norm": 0.7660520415216945, "learning_rate": 3.4591439520702087e-06, "loss": 0.2702, "step": 17827 }, { "epoch": 0.6118050789293068, "grad_norm": 0.7545536316473003, "learning_rate": 3.4586152698843346e-06, "loss": 0.2127, "step": 17828 }, { "epoch": 0.6118393960192176, "grad_norm": 1.3696745034798008, "learning_rate": 3.4580866067404744e-06, "loss": 0.258, "step": 17829 }, { "epoch": 0.6118737131091283, "grad_norm": 0.7490384005672196, "learning_rate": 3.457557962645156e-06, "loss": 0.3453, "step": 17830 }, { "epoch": 0.6119080301990392, "grad_norm": 0.9989072520415618, "learning_rate": 3.457029337604909e-06, "loss": 0.2485, "step": 17831 }, { "epoch": 0.6119423472889499, "grad_norm": 0.8992714466955339, "learning_rate": 3.456500731626268e-06, "loss": 0.2735, "step": 17832 }, { "epoch": 0.6119766643788607, "grad_norm": 0.683717264727195, "learning_rate": 3.455972144715758e-06, "loss": 0.2492, "step": 17833 }, { "epoch": 0.6120109814687714, "grad_norm": 0.6935793079705916, "learning_rate": 3.4554435768799144e-06, "loss": 0.2312, "step": 17834 }, { "epoch": 0.6120452985586822, "grad_norm": 0.7079776377288991, "learning_rate": 3.4549150281252635e-06, "loss": 0.2603, "step": 17835 }, { "epoch": 0.612079615648593, "grad_norm": 0.7672244845350683, "learning_rate": 3.4543864984583353e-06, "loss": 0.2523, "step": 17836 }, { "epoch": 0.6121139327385038, "grad_norm": 0.7828967617303841, "learning_rate": 3.4538579878856617e-06, "loss": 0.2517, "step": 17837 }, { "epoch": 0.6121482498284145, "grad_norm": 0.7799922319183223, "learning_rate": 3.45332949641377e-06, "loss": 0.289, "step": 17838 }, { "epoch": 0.6121825669183253, "grad_norm": 0.8022163761635713, "learning_rate": 3.4528010240491873e-06, "loss": 0.2655, "step": 17839 }, { "epoch": 0.6122168840082362, "grad_norm": 0.8487989653431531, "learning_rate": 3.452272570798446e-06, "loss": 0.3405, "step": 17840 }, { "epoch": 0.6122512010981469, "grad_norm": 0.7506244438506599, "learning_rate": 3.4517441366680713e-06, "loss": 0.247, "step": 17841 }, { "epoch": 0.6122855181880577, "grad_norm": 0.7391047685591511, "learning_rate": 3.451215721664593e-06, "loss": 0.2691, "step": 17842 }, { "epoch": 0.6123198352779684, "grad_norm": 0.7690320150853905, "learning_rate": 3.450687325794539e-06, "loss": 0.2378, "step": 17843 }, { "epoch": 0.6123541523678792, "grad_norm": 0.740589677869693, "learning_rate": 3.4501589490644373e-06, "loss": 0.2227, "step": 17844 }, { "epoch": 0.61238846945779, "grad_norm": 0.7790744215492942, "learning_rate": 3.4496305914808133e-06, "loss": 0.2616, "step": 17845 }, { "epoch": 0.6124227865477008, "grad_norm": 0.6932774967681089, "learning_rate": 3.449102253050197e-06, "loss": 0.2529, "step": 17846 }, { "epoch": 0.6124571036376115, "grad_norm": 0.7403120885614135, "learning_rate": 3.4485739337791154e-06, "loss": 0.2906, "step": 17847 }, { "epoch": 0.6124914207275223, "grad_norm": 0.8644596623854235, "learning_rate": 3.448045633674091e-06, "loss": 0.2818, "step": 17848 }, { "epoch": 0.6125257378174331, "grad_norm": 0.699753502920528, "learning_rate": 3.447517352741656e-06, "loss": 0.2389, "step": 17849 }, { "epoch": 0.6125600549073439, "grad_norm": 0.7398549070350597, "learning_rate": 3.446989090988333e-06, "loss": 0.2653, "step": 17850 }, { "epoch": 0.6125943719972546, "grad_norm": 0.848135491422889, "learning_rate": 3.4464608484206474e-06, "loss": 0.292, "step": 17851 }, { "epoch": 0.6126286890871654, "grad_norm": 0.8078778976646543, "learning_rate": 3.44593262504513e-06, "loss": 0.2898, "step": 17852 }, { "epoch": 0.6126630061770761, "grad_norm": 0.6082667765750571, "learning_rate": 3.4454044208683024e-06, "loss": 0.2673, "step": 17853 }, { "epoch": 0.612697323266987, "grad_norm": 0.8697558701433123, "learning_rate": 3.444876235896689e-06, "loss": 0.2319, "step": 17854 }, { "epoch": 0.6127316403568978, "grad_norm": 0.7520569606107331, "learning_rate": 3.444348070136818e-06, "loss": 0.2917, "step": 17855 }, { "epoch": 0.6127659574468085, "grad_norm": 0.7884029888122135, "learning_rate": 3.443819923595213e-06, "loss": 0.2587, "step": 17856 }, { "epoch": 0.6128002745367193, "grad_norm": 0.7929265794934945, "learning_rate": 3.4432917962783973e-06, "loss": 0.2497, "step": 17857 }, { "epoch": 0.61283459162663, "grad_norm": 0.7778271720376277, "learning_rate": 3.4427636881928972e-06, "loss": 0.2976, "step": 17858 }, { "epoch": 0.6128689087165409, "grad_norm": 0.899593453329682, "learning_rate": 3.4422355993452374e-06, "loss": 0.3048, "step": 17859 }, { "epoch": 0.6129032258064516, "grad_norm": 0.7116809405290837, "learning_rate": 3.441707529741938e-06, "loss": 0.3232, "step": 17860 }, { "epoch": 0.6129375428963624, "grad_norm": 0.8400515875245445, "learning_rate": 3.441179479389527e-06, "loss": 0.2686, "step": 17861 }, { "epoch": 0.6129718599862731, "grad_norm": 0.7072437031446195, "learning_rate": 3.440651448294527e-06, "loss": 0.2569, "step": 17862 }, { "epoch": 0.613006177076184, "grad_norm": 0.7588412293020258, "learning_rate": 3.4401234364634577e-06, "loss": 0.2897, "step": 17863 }, { "epoch": 0.6130404941660947, "grad_norm": 0.7423562815604943, "learning_rate": 3.4395954439028462e-06, "loss": 0.2937, "step": 17864 }, { "epoch": 0.6130748112560055, "grad_norm": 0.9025045689048752, "learning_rate": 3.4390674706192135e-06, "loss": 0.2396, "step": 17865 }, { "epoch": 0.6131091283459162, "grad_norm": 0.7525427734180813, "learning_rate": 3.438539516619082e-06, "loss": 0.2629, "step": 17866 }, { "epoch": 0.613143445435827, "grad_norm": 0.7962773743800667, "learning_rate": 3.438011581908976e-06, "loss": 0.3108, "step": 17867 }, { "epoch": 0.6131777625257379, "grad_norm": 0.6736996822369166, "learning_rate": 3.437483666495413e-06, "loss": 0.2475, "step": 17868 }, { "epoch": 0.6132120796156486, "grad_norm": 0.7449051613866341, "learning_rate": 3.43695577038492e-06, "loss": 0.3366, "step": 17869 }, { "epoch": 0.6132463967055594, "grad_norm": 0.7663216515718416, "learning_rate": 3.4364278935840166e-06, "loss": 0.265, "step": 17870 }, { "epoch": 0.6132807137954701, "grad_norm": 0.7351665834460333, "learning_rate": 3.435900036099221e-06, "loss": 0.2575, "step": 17871 }, { "epoch": 0.613315030885381, "grad_norm": 0.7860906505269067, "learning_rate": 3.435372197937058e-06, "loss": 0.2202, "step": 17872 }, { "epoch": 0.6133493479752917, "grad_norm": 0.7529892194228994, "learning_rate": 3.4348443791040485e-06, "loss": 0.2823, "step": 17873 }, { "epoch": 0.6133836650652025, "grad_norm": 0.780832443617899, "learning_rate": 3.4343165796067098e-06, "loss": 0.2779, "step": 17874 }, { "epoch": 0.6134179821551132, "grad_norm": 0.927281012927879, "learning_rate": 3.433788799451566e-06, "loss": 0.2865, "step": 17875 }, { "epoch": 0.613452299245024, "grad_norm": 0.7919012856497316, "learning_rate": 3.433261038645136e-06, "loss": 0.3085, "step": 17876 }, { "epoch": 0.6134866163349348, "grad_norm": 0.8227203856933708, "learning_rate": 3.432733297193937e-06, "loss": 0.2988, "step": 17877 }, { "epoch": 0.6135209334248456, "grad_norm": 0.7310681063687142, "learning_rate": 3.432205575104493e-06, "loss": 0.2379, "step": 17878 }, { "epoch": 0.6135552505147563, "grad_norm": 0.7706302710579714, "learning_rate": 3.43167787238332e-06, "loss": 0.2333, "step": 17879 }, { "epoch": 0.6135895676046671, "grad_norm": 0.7226661442807333, "learning_rate": 3.4311501890369373e-06, "loss": 0.254, "step": 17880 }, { "epoch": 0.6136238846945778, "grad_norm": 0.7144761249676147, "learning_rate": 3.430622525071867e-06, "loss": 0.2512, "step": 17881 }, { "epoch": 0.6136582017844887, "grad_norm": 0.8320790798327088, "learning_rate": 3.4300948804946266e-06, "loss": 0.2388, "step": 17882 }, { "epoch": 0.6136925188743995, "grad_norm": 0.7514013186488495, "learning_rate": 3.429567255311731e-06, "loss": 0.2524, "step": 17883 }, { "epoch": 0.6137268359643102, "grad_norm": 0.7287741492991524, "learning_rate": 3.429039649529703e-06, "loss": 0.2446, "step": 17884 }, { "epoch": 0.613761153054221, "grad_norm": 0.7006312129393341, "learning_rate": 3.4285120631550584e-06, "loss": 0.2841, "step": 17885 }, { "epoch": 0.6137954701441318, "grad_norm": 0.8085829432142619, "learning_rate": 3.4279844961943133e-06, "loss": 0.2798, "step": 17886 }, { "epoch": 0.6138297872340426, "grad_norm": 0.7076915767474428, "learning_rate": 3.4274569486539877e-06, "loss": 0.2789, "step": 17887 }, { "epoch": 0.6138641043239533, "grad_norm": 0.7674716388904337, "learning_rate": 3.426929420540599e-06, "loss": 0.273, "step": 17888 }, { "epoch": 0.6138984214138641, "grad_norm": 0.7655049348211848, "learning_rate": 3.426401911860662e-06, "loss": 0.3206, "step": 17889 }, { "epoch": 0.6139327385037748, "grad_norm": 0.7364480651691327, "learning_rate": 3.425874422620696e-06, "loss": 0.2412, "step": 17890 }, { "epoch": 0.6139670555936857, "grad_norm": 0.8564079464443152, "learning_rate": 3.4253469528272167e-06, "loss": 0.2644, "step": 17891 }, { "epoch": 0.6140013726835964, "grad_norm": 0.8693161709442477, "learning_rate": 3.424819502486737e-06, "loss": 0.2938, "step": 17892 }, { "epoch": 0.6140356897735072, "grad_norm": 0.8397063483925983, "learning_rate": 3.4242920716057794e-06, "loss": 0.3128, "step": 17893 }, { "epoch": 0.614070006863418, "grad_norm": 0.7504678009627553, "learning_rate": 3.4237646601908537e-06, "loss": 0.3031, "step": 17894 }, { "epoch": 0.6141043239533288, "grad_norm": 0.7214331287861995, "learning_rate": 3.4232372682484777e-06, "loss": 0.2517, "step": 17895 }, { "epoch": 0.6141386410432396, "grad_norm": 0.769455567230467, "learning_rate": 3.422709895785169e-06, "loss": 0.2515, "step": 17896 }, { "epoch": 0.6141729581331503, "grad_norm": 0.8155201095728412, "learning_rate": 3.4221825428074384e-06, "loss": 0.3237, "step": 17897 }, { "epoch": 0.6142072752230611, "grad_norm": 0.7337637624682714, "learning_rate": 3.4216552093218046e-06, "loss": 0.2982, "step": 17898 }, { "epoch": 0.6142415923129718, "grad_norm": 0.8279661490216199, "learning_rate": 3.421127895334781e-06, "loss": 0.2648, "step": 17899 }, { "epoch": 0.6142759094028827, "grad_norm": 0.7873068476422931, "learning_rate": 3.420600600852879e-06, "loss": 0.3317, "step": 17900 }, { "epoch": 0.6143102264927934, "grad_norm": 1.0457009131901924, "learning_rate": 3.420073325882618e-06, "loss": 0.2798, "step": 17901 }, { "epoch": 0.6143445435827042, "grad_norm": 0.7472810929514244, "learning_rate": 3.419546070430507e-06, "loss": 0.227, "step": 17902 }, { "epoch": 0.6143788606726149, "grad_norm": 0.6744170103067035, "learning_rate": 3.419018834503062e-06, "loss": 0.271, "step": 17903 }, { "epoch": 0.6144131777625257, "grad_norm": 0.8185392620467923, "learning_rate": 3.4184916181067967e-06, "loss": 0.3169, "step": 17904 }, { "epoch": 0.6144474948524365, "grad_norm": 0.9451223711388061, "learning_rate": 3.4179644212482244e-06, "loss": 0.2991, "step": 17905 }, { "epoch": 0.6144818119423473, "grad_norm": 0.7818533278576008, "learning_rate": 3.4174372439338544e-06, "loss": 0.2601, "step": 17906 }, { "epoch": 0.614516129032258, "grad_norm": 0.841155567369184, "learning_rate": 3.4169100861702056e-06, "loss": 0.3049, "step": 17907 }, { "epoch": 0.6145504461221688, "grad_norm": 0.7490940994258057, "learning_rate": 3.4163829479637843e-06, "loss": 0.2553, "step": 17908 }, { "epoch": 0.6145847632120797, "grad_norm": 0.7570403083712899, "learning_rate": 3.4158558293211063e-06, "loss": 0.223, "step": 17909 }, { "epoch": 0.6146190803019904, "grad_norm": 0.761044468277896, "learning_rate": 3.4153287302486825e-06, "loss": 0.2528, "step": 17910 }, { "epoch": 0.6146533973919012, "grad_norm": 0.7508090785991147, "learning_rate": 3.4148016507530256e-06, "loss": 0.2632, "step": 17911 }, { "epoch": 0.6146877144818119, "grad_norm": 0.8190151962586084, "learning_rate": 3.414274590840644e-06, "loss": 0.3131, "step": 17912 }, { "epoch": 0.6147220315717227, "grad_norm": 0.7846613658162738, "learning_rate": 3.4137475505180533e-06, "loss": 0.2463, "step": 17913 }, { "epoch": 0.6147563486616335, "grad_norm": 0.9327978423890132, "learning_rate": 3.413220529791762e-06, "loss": 0.2898, "step": 17914 }, { "epoch": 0.6147906657515443, "grad_norm": 0.7391357991636589, "learning_rate": 3.4126935286682795e-06, "loss": 0.2876, "step": 17915 }, { "epoch": 0.614824982841455, "grad_norm": 0.7793516759346497, "learning_rate": 3.412166547154119e-06, "loss": 0.3077, "step": 17916 }, { "epoch": 0.6148592999313658, "grad_norm": 0.7339323978370114, "learning_rate": 3.411639585255789e-06, "loss": 0.2626, "step": 17917 }, { "epoch": 0.6148936170212767, "grad_norm": 0.8367500877224158, "learning_rate": 3.4111126429797996e-06, "loss": 0.2256, "step": 17918 }, { "epoch": 0.6149279341111874, "grad_norm": 0.8944827949384129, "learning_rate": 3.410585720332663e-06, "loss": 0.3018, "step": 17919 }, { "epoch": 0.6149622512010982, "grad_norm": 0.8127245743427477, "learning_rate": 3.410058817320886e-06, "loss": 0.2538, "step": 17920 }, { "epoch": 0.6149965682910089, "grad_norm": 0.7806195428300139, "learning_rate": 3.409531933950977e-06, "loss": 0.2621, "step": 17921 }, { "epoch": 0.6150308853809197, "grad_norm": 0.7508566423099843, "learning_rate": 3.409005070229449e-06, "loss": 0.2441, "step": 17922 }, { "epoch": 0.6150652024708305, "grad_norm": 1.0578471421736557, "learning_rate": 3.408478226162807e-06, "loss": 0.3028, "step": 17923 }, { "epoch": 0.6150995195607413, "grad_norm": 0.784563398274296, "learning_rate": 3.407951401757561e-06, "loss": 0.2487, "step": 17924 }, { "epoch": 0.615133836650652, "grad_norm": 0.7508043443181299, "learning_rate": 3.407424597020219e-06, "loss": 0.256, "step": 17925 }, { "epoch": 0.6151681537405628, "grad_norm": 0.7183526075996847, "learning_rate": 3.4068978119572893e-06, "loss": 0.255, "step": 17926 }, { "epoch": 0.6152024708304735, "grad_norm": 0.744444655728947, "learning_rate": 3.406371046575281e-06, "loss": 0.2681, "step": 17927 }, { "epoch": 0.6152367879203844, "grad_norm": 0.7475428897194472, "learning_rate": 3.4058443008807006e-06, "loss": 0.2606, "step": 17928 }, { "epoch": 0.6152711050102951, "grad_norm": 0.8001292428686192, "learning_rate": 3.4053175748800527e-06, "loss": 0.2867, "step": 17929 }, { "epoch": 0.6153054221002059, "grad_norm": 0.8273267146949873, "learning_rate": 3.40479086857985e-06, "loss": 0.2881, "step": 17930 }, { "epoch": 0.6153397391901166, "grad_norm": 0.8280168204936766, "learning_rate": 3.4042641819865945e-06, "loss": 0.2279, "step": 17931 }, { "epoch": 0.6153740562800275, "grad_norm": 0.7286722392028777, "learning_rate": 3.403737515106795e-06, "loss": 0.2718, "step": 17932 }, { "epoch": 0.6154083733699383, "grad_norm": 0.7965347547358459, "learning_rate": 3.403210867946957e-06, "loss": 0.2474, "step": 17933 }, { "epoch": 0.615442690459849, "grad_norm": 0.8108793374556327, "learning_rate": 3.4026842405135883e-06, "loss": 0.308, "step": 17934 }, { "epoch": 0.6154770075497598, "grad_norm": 0.7824634395649529, "learning_rate": 3.4021576328131917e-06, "loss": 0.2712, "step": 17935 }, { "epoch": 0.6155113246396705, "grad_norm": 0.81674213256765, "learning_rate": 3.4016310448522766e-06, "loss": 0.2648, "step": 17936 }, { "epoch": 0.6155456417295814, "grad_norm": 0.683176784965261, "learning_rate": 3.401104476637346e-06, "loss": 0.2862, "step": 17937 }, { "epoch": 0.6155799588194921, "grad_norm": 0.7218415023749927, "learning_rate": 3.400577928174904e-06, "loss": 0.2158, "step": 17938 }, { "epoch": 0.6156142759094029, "grad_norm": 0.7268904271464414, "learning_rate": 3.400051399471458e-06, "loss": 0.2904, "step": 17939 }, { "epoch": 0.6156485929993136, "grad_norm": 0.8906147460067955, "learning_rate": 3.3995248905335123e-06, "loss": 0.2647, "step": 17940 }, { "epoch": 0.6156829100892245, "grad_norm": 0.8347627133324876, "learning_rate": 3.3989984013675683e-06, "loss": 0.2667, "step": 17941 }, { "epoch": 0.6157172271791352, "grad_norm": 0.7562614755395252, "learning_rate": 3.398471931980135e-06, "loss": 0.2637, "step": 17942 }, { "epoch": 0.615751544269046, "grad_norm": 1.081991731996564, "learning_rate": 3.3979454823777137e-06, "loss": 0.2912, "step": 17943 }, { "epoch": 0.6157858613589567, "grad_norm": 0.7330433340448957, "learning_rate": 3.397419052566806e-06, "loss": 0.2382, "step": 17944 }, { "epoch": 0.6158201784488675, "grad_norm": 0.7827466296832869, "learning_rate": 3.3968926425539196e-06, "loss": 0.3382, "step": 17945 }, { "epoch": 0.6158544955387784, "grad_norm": 0.760415979507912, "learning_rate": 3.3963662523455545e-06, "loss": 0.3244, "step": 17946 }, { "epoch": 0.6158888126286891, "grad_norm": 0.785286493888216, "learning_rate": 3.395839881948213e-06, "loss": 0.3592, "step": 17947 }, { "epoch": 0.6159231297185999, "grad_norm": 0.7181940746319392, "learning_rate": 3.3953135313684026e-06, "loss": 0.2485, "step": 17948 }, { "epoch": 0.6159574468085106, "grad_norm": 0.8169432535359838, "learning_rate": 3.394787200612622e-06, "loss": 0.2628, "step": 17949 }, { "epoch": 0.6159917638984214, "grad_norm": 0.9069272664409813, "learning_rate": 3.394260889687372e-06, "loss": 0.3389, "step": 17950 }, { "epoch": 0.6160260809883322, "grad_norm": 0.7853700396500717, "learning_rate": 3.3937345985991586e-06, "loss": 0.3409, "step": 17951 }, { "epoch": 0.616060398078243, "grad_norm": 0.9417806402077783, "learning_rate": 3.3932083273544814e-06, "loss": 0.2796, "step": 17952 }, { "epoch": 0.6160947151681537, "grad_norm": 0.8194150984387873, "learning_rate": 3.3926820759598412e-06, "loss": 0.298, "step": 17953 }, { "epoch": 0.6161290322580645, "grad_norm": 0.7298969506668972, "learning_rate": 3.39215584442174e-06, "loss": 0.2755, "step": 17954 }, { "epoch": 0.6161633493479753, "grad_norm": 0.7467254383988862, "learning_rate": 3.39162963274668e-06, "loss": 0.3024, "step": 17955 }, { "epoch": 0.6161976664378861, "grad_norm": 0.7587068805178224, "learning_rate": 3.391103440941158e-06, "loss": 0.2729, "step": 17956 }, { "epoch": 0.6162319835277968, "grad_norm": 0.7239060381370627, "learning_rate": 3.39057726901168e-06, "loss": 0.2775, "step": 17957 }, { "epoch": 0.6162663006177076, "grad_norm": 0.7912169608828034, "learning_rate": 3.3900511169647414e-06, "loss": 0.2914, "step": 17958 }, { "epoch": 0.6163006177076183, "grad_norm": 0.8719322647231718, "learning_rate": 3.3895249848068462e-06, "loss": 0.3193, "step": 17959 }, { "epoch": 0.6163349347975292, "grad_norm": 0.8015656938180884, "learning_rate": 3.388998872544492e-06, "loss": 0.2932, "step": 17960 }, { "epoch": 0.61636925188744, "grad_norm": 0.7725694300672187, "learning_rate": 3.3884727801841765e-06, "loss": 0.2702, "step": 17961 }, { "epoch": 0.6164035689773507, "grad_norm": 0.6877299060219835, "learning_rate": 3.3879467077324018e-06, "loss": 0.2795, "step": 17962 }, { "epoch": 0.6164378860672615, "grad_norm": 0.806860954931913, "learning_rate": 3.387420655195668e-06, "loss": 0.305, "step": 17963 }, { "epoch": 0.6164722031571723, "grad_norm": 0.7951825588050896, "learning_rate": 3.386894622580469e-06, "loss": 0.3036, "step": 17964 }, { "epoch": 0.6165065202470831, "grad_norm": 0.774943274004576, "learning_rate": 3.386368609893308e-06, "loss": 0.2834, "step": 17965 }, { "epoch": 0.6165408373369938, "grad_norm": 0.7282146526673469, "learning_rate": 3.385842617140682e-06, "loss": 0.2511, "step": 17966 }, { "epoch": 0.6165751544269046, "grad_norm": 0.7759954389629734, "learning_rate": 3.3853166443290865e-06, "loss": 0.2312, "step": 17967 }, { "epoch": 0.6166094715168153, "grad_norm": 0.7951937961516051, "learning_rate": 3.384790691465023e-06, "loss": 0.2751, "step": 17968 }, { "epoch": 0.6166437886067262, "grad_norm": 0.8013517873888333, "learning_rate": 3.384264758554987e-06, "loss": 0.2449, "step": 17969 }, { "epoch": 0.616678105696637, "grad_norm": 0.8552062797441129, "learning_rate": 3.3837388456054745e-06, "loss": 0.2988, "step": 17970 }, { "epoch": 0.6167124227865477, "grad_norm": 0.7425880735983189, "learning_rate": 3.3832129526229864e-06, "loss": 0.2488, "step": 17971 }, { "epoch": 0.6167467398764584, "grad_norm": 0.7215157090449018, "learning_rate": 3.3826870796140176e-06, "loss": 0.3426, "step": 17972 }, { "epoch": 0.6167810569663692, "grad_norm": 0.792221525254787, "learning_rate": 3.382161226585062e-06, "loss": 0.2428, "step": 17973 }, { "epoch": 0.6168153740562801, "grad_norm": 0.7636614428400145, "learning_rate": 3.38163539354262e-06, "loss": 0.2484, "step": 17974 }, { "epoch": 0.6168496911461908, "grad_norm": 0.9095003387262396, "learning_rate": 3.3811095804931847e-06, "loss": 0.2558, "step": 17975 }, { "epoch": 0.6168840082361016, "grad_norm": 0.8165071940799634, "learning_rate": 3.380583787443254e-06, "loss": 0.249, "step": 17976 }, { "epoch": 0.6169183253260123, "grad_norm": 0.801938339988152, "learning_rate": 3.3800580143993222e-06, "loss": 0.238, "step": 17977 }, { "epoch": 0.6169526424159232, "grad_norm": 0.7357231692969395, "learning_rate": 3.379532261367886e-06, "loss": 0.2605, "step": 17978 }, { "epoch": 0.6169869595058339, "grad_norm": 0.7555903004311754, "learning_rate": 3.3790065283554376e-06, "loss": 0.3152, "step": 17979 }, { "epoch": 0.6170212765957447, "grad_norm": 0.7533531678970417, "learning_rate": 3.3784808153684745e-06, "loss": 0.2564, "step": 17980 }, { "epoch": 0.6170555936856554, "grad_norm": 0.7773584643493413, "learning_rate": 3.3779551224134916e-06, "loss": 0.2541, "step": 17981 }, { "epoch": 0.6170899107755662, "grad_norm": 0.8017570981254964, "learning_rate": 3.3774294494969797e-06, "loss": 0.2768, "step": 17982 }, { "epoch": 0.617124227865477, "grad_norm": 0.7454140676916571, "learning_rate": 3.376903796625436e-06, "loss": 0.2982, "step": 17983 }, { "epoch": 0.6171585449553878, "grad_norm": 0.8824124634014126, "learning_rate": 3.376378163805355e-06, "loss": 0.3016, "step": 17984 }, { "epoch": 0.6171928620452986, "grad_norm": 0.7643133201219777, "learning_rate": 3.3758525510432267e-06, "loss": 0.2593, "step": 17985 }, { "epoch": 0.6172271791352093, "grad_norm": 0.7779698822796749, "learning_rate": 3.3753269583455482e-06, "loss": 0.212, "step": 17986 }, { "epoch": 0.61726149622512, "grad_norm": 0.7102064021430163, "learning_rate": 3.374801385718811e-06, "loss": 0.2588, "step": 17987 }, { "epoch": 0.6172958133150309, "grad_norm": 0.7635168511751401, "learning_rate": 3.3742758331695065e-06, "loss": 0.2567, "step": 17988 }, { "epoch": 0.6173301304049417, "grad_norm": 0.8162464708429432, "learning_rate": 3.3737503007041304e-06, "loss": 0.325, "step": 17989 }, { "epoch": 0.6173644474948524, "grad_norm": 0.7483804137453522, "learning_rate": 3.373224788329171e-06, "loss": 0.2928, "step": 17990 }, { "epoch": 0.6173987645847632, "grad_norm": 0.8486374382254167, "learning_rate": 3.372699296051124e-06, "loss": 0.2763, "step": 17991 }, { "epoch": 0.617433081674674, "grad_norm": 0.8082300085494983, "learning_rate": 3.3721738238764806e-06, "loss": 0.3108, "step": 17992 }, { "epoch": 0.6174673987645848, "grad_norm": 0.776569696394839, "learning_rate": 3.3716483718117297e-06, "loss": 0.2585, "step": 17993 }, { "epoch": 0.6175017158544955, "grad_norm": 0.7882881977950267, "learning_rate": 3.3711229398633672e-06, "loss": 0.3234, "step": 17994 }, { "epoch": 0.6175360329444063, "grad_norm": 0.7201512407034438, "learning_rate": 3.370597528037881e-06, "loss": 0.3254, "step": 17995 }, { "epoch": 0.617570350034317, "grad_norm": 0.7842910736202647, "learning_rate": 3.3700721363417616e-06, "loss": 0.2132, "step": 17996 }, { "epoch": 0.6176046671242279, "grad_norm": 0.8410175573799135, "learning_rate": 3.3695467647815017e-06, "loss": 0.2922, "step": 17997 }, { "epoch": 0.6176389842141387, "grad_norm": 0.82539379896299, "learning_rate": 3.36902141336359e-06, "loss": 0.2122, "step": 17998 }, { "epoch": 0.6176733013040494, "grad_norm": 0.8500861012075756, "learning_rate": 3.368496082094518e-06, "loss": 0.2992, "step": 17999 }, { "epoch": 0.6177076183939602, "grad_norm": 0.7353205398249463, "learning_rate": 3.3679707709807734e-06, "loss": 0.2539, "step": 18000 }, { "epoch": 0.617741935483871, "grad_norm": 0.7207327860416438, "learning_rate": 3.367445480028849e-06, "loss": 0.2695, "step": 18001 }, { "epoch": 0.6177762525737818, "grad_norm": 0.7941408247422037, "learning_rate": 3.3669202092452303e-06, "loss": 0.2884, "step": 18002 }, { "epoch": 0.6178105696636925, "grad_norm": 0.7788415240040778, "learning_rate": 3.36639495863641e-06, "loss": 0.254, "step": 18003 }, { "epoch": 0.6178448867536033, "grad_norm": 0.6818347983332865, "learning_rate": 3.3658697282088755e-06, "loss": 0.2931, "step": 18004 }, { "epoch": 0.617879203843514, "grad_norm": 0.785892421151048, "learning_rate": 3.3653445179691135e-06, "loss": 0.2596, "step": 18005 }, { "epoch": 0.6179135209334249, "grad_norm": 0.8015371291091671, "learning_rate": 3.364819327923615e-06, "loss": 0.2798, "step": 18006 }, { "epoch": 0.6179478380233356, "grad_norm": 0.721367728434129, "learning_rate": 3.364294158078868e-06, "loss": 0.2571, "step": 18007 }, { "epoch": 0.6179821551132464, "grad_norm": 0.7779735880499673, "learning_rate": 3.3637690084413576e-06, "loss": 0.3087, "step": 18008 }, { "epoch": 0.6180164722031571, "grad_norm": 0.6930931977309174, "learning_rate": 3.363243879017576e-06, "loss": 0.2393, "step": 18009 }, { "epoch": 0.6180507892930679, "grad_norm": 0.8817579490303078, "learning_rate": 3.3627187698140086e-06, "loss": 0.3411, "step": 18010 }, { "epoch": 0.6180851063829788, "grad_norm": 0.8153275699428832, "learning_rate": 3.3621936808371385e-06, "loss": 0.3035, "step": 18011 }, { "epoch": 0.6181194234728895, "grad_norm": 0.8130079591468069, "learning_rate": 3.3616686120934582e-06, "loss": 0.292, "step": 18012 }, { "epoch": 0.6181537405628003, "grad_norm": 0.7521677362962716, "learning_rate": 3.361143563589452e-06, "loss": 0.2524, "step": 18013 }, { "epoch": 0.618188057652711, "grad_norm": 0.7371235542563165, "learning_rate": 3.3606185353316046e-06, "loss": 0.2903, "step": 18014 }, { "epoch": 0.6182223747426219, "grad_norm": 0.885318061644163, "learning_rate": 3.3600935273264056e-06, "loss": 0.2473, "step": 18015 }, { "epoch": 0.6182566918325326, "grad_norm": 0.8012868910263204, "learning_rate": 3.3595685395803403e-06, "loss": 0.2533, "step": 18016 }, { "epoch": 0.6182910089224434, "grad_norm": 0.7649101425080731, "learning_rate": 3.35904357209989e-06, "loss": 0.2287, "step": 18017 }, { "epoch": 0.6183253260123541, "grad_norm": 0.7181514137045375, "learning_rate": 3.3585186248915457e-06, "loss": 0.3077, "step": 18018 }, { "epoch": 0.6183596431022649, "grad_norm": 0.8266970426513226, "learning_rate": 3.3579936979617887e-06, "loss": 0.2394, "step": 18019 }, { "epoch": 0.6183939601921757, "grad_norm": 0.723481622846025, "learning_rate": 3.3574687913171054e-06, "loss": 0.2476, "step": 18020 }, { "epoch": 0.6184282772820865, "grad_norm": 0.7632699701974486, "learning_rate": 3.3569439049639807e-06, "loss": 0.2708, "step": 18021 }, { "epoch": 0.6184625943719972, "grad_norm": 0.8294991218688796, "learning_rate": 3.356419038908897e-06, "loss": 0.2827, "step": 18022 }, { "epoch": 0.618496911461908, "grad_norm": 0.7841281721419576, "learning_rate": 3.3558941931583417e-06, "loss": 0.2973, "step": 18023 }, { "epoch": 0.6185312285518189, "grad_norm": 0.8549471867627482, "learning_rate": 3.355369367718797e-06, "loss": 0.3099, "step": 18024 }, { "epoch": 0.6185655456417296, "grad_norm": 0.6432082098671557, "learning_rate": 3.354844562596744e-06, "loss": 0.2648, "step": 18025 }, { "epoch": 0.6185998627316404, "grad_norm": 0.7155972334503256, "learning_rate": 3.35431977779867e-06, "loss": 0.2663, "step": 18026 }, { "epoch": 0.6186341798215511, "grad_norm": 0.7528836883721493, "learning_rate": 3.3537950133310565e-06, "loss": 0.2629, "step": 18027 }, { "epoch": 0.6186684969114619, "grad_norm": 0.8273801901584232, "learning_rate": 3.353270269200386e-06, "loss": 0.2222, "step": 18028 }, { "epoch": 0.6187028140013727, "grad_norm": 0.8866279360135689, "learning_rate": 3.3527455454131408e-06, "loss": 0.3017, "step": 18029 }, { "epoch": 0.6187371310912835, "grad_norm": 0.7957613937552209, "learning_rate": 3.3522208419758052e-06, "loss": 0.3325, "step": 18030 }, { "epoch": 0.6187714481811942, "grad_norm": 0.6784548159587883, "learning_rate": 3.3516961588948584e-06, "loss": 0.271, "step": 18031 }, { "epoch": 0.618805765271105, "grad_norm": 0.7400139749212473, "learning_rate": 3.3511714961767857e-06, "loss": 0.279, "step": 18032 }, { "epoch": 0.6188400823610157, "grad_norm": 0.8218562015879942, "learning_rate": 3.350646853828067e-06, "loss": 0.2695, "step": 18033 }, { "epoch": 0.6188743994509266, "grad_norm": 0.777994835972692, "learning_rate": 3.3501222318551807e-06, "loss": 0.2686, "step": 18034 }, { "epoch": 0.6189087165408373, "grad_norm": 0.7419473985553763, "learning_rate": 3.349597630264613e-06, "loss": 0.2746, "step": 18035 }, { "epoch": 0.6189430336307481, "grad_norm": 0.7549500550901826, "learning_rate": 3.3490730490628407e-06, "loss": 0.2518, "step": 18036 }, { "epoch": 0.6189773507206588, "grad_norm": 0.7418321398620563, "learning_rate": 3.348548488256347e-06, "loss": 0.2481, "step": 18037 }, { "epoch": 0.6190116678105697, "grad_norm": 0.7662257965031222, "learning_rate": 3.3480239478516117e-06, "loss": 0.2843, "step": 18038 }, { "epoch": 0.6190459849004805, "grad_norm": 0.8559007359534734, "learning_rate": 3.3474994278551148e-06, "loss": 0.2719, "step": 18039 }, { "epoch": 0.6190803019903912, "grad_norm": 0.7750009094104604, "learning_rate": 3.3469749282733346e-06, "loss": 0.2634, "step": 18040 }, { "epoch": 0.619114619080302, "grad_norm": 0.6978280822546974, "learning_rate": 3.3464504491127526e-06, "loss": 0.2586, "step": 18041 }, { "epoch": 0.6191489361702127, "grad_norm": 0.8144554162257017, "learning_rate": 3.3459259903798473e-06, "loss": 0.2424, "step": 18042 }, { "epoch": 0.6191832532601236, "grad_norm": 0.8462719618328213, "learning_rate": 3.345401552081098e-06, "loss": 0.3032, "step": 18043 }, { "epoch": 0.6192175703500343, "grad_norm": 0.7255634496538613, "learning_rate": 3.344877134222983e-06, "loss": 0.26, "step": 18044 }, { "epoch": 0.6192518874399451, "grad_norm": 0.7003583804556545, "learning_rate": 3.3443527368119825e-06, "loss": 0.2702, "step": 18045 }, { "epoch": 0.6192862045298558, "grad_norm": 0.6645999969219237, "learning_rate": 3.3438283598545717e-06, "loss": 0.293, "step": 18046 }, { "epoch": 0.6193205216197667, "grad_norm": 0.725660021055332, "learning_rate": 3.343304003357233e-06, "loss": 0.257, "step": 18047 }, { "epoch": 0.6193548387096774, "grad_norm": 0.7322406060356742, "learning_rate": 3.342779667326441e-06, "loss": 0.2826, "step": 18048 }, { "epoch": 0.6193891557995882, "grad_norm": 0.7593491194947014, "learning_rate": 3.3422553517686728e-06, "loss": 0.2591, "step": 18049 }, { "epoch": 0.619423472889499, "grad_norm": 0.7287773592279575, "learning_rate": 3.3417310566904078e-06, "loss": 0.2427, "step": 18050 }, { "epoch": 0.6194577899794097, "grad_norm": 0.8601725218625574, "learning_rate": 3.341206782098123e-06, "loss": 0.3697, "step": 18051 }, { "epoch": 0.6194921070693206, "grad_norm": 0.7268393974440621, "learning_rate": 3.3406825279982926e-06, "loss": 0.2593, "step": 18052 }, { "epoch": 0.6195264241592313, "grad_norm": 0.8746975666072436, "learning_rate": 3.3401582943973964e-06, "loss": 0.2489, "step": 18053 }, { "epoch": 0.6195607412491421, "grad_norm": 0.7256121750395419, "learning_rate": 3.339634081301908e-06, "loss": 0.23, "step": 18054 }, { "epoch": 0.6195950583390528, "grad_norm": 0.8620785661442251, "learning_rate": 3.339109888718307e-06, "loss": 0.2561, "step": 18055 }, { "epoch": 0.6196293754289636, "grad_norm": 0.7448667903862661, "learning_rate": 3.3385857166530666e-06, "loss": 0.3426, "step": 18056 }, { "epoch": 0.6196636925188744, "grad_norm": 0.7664268077131547, "learning_rate": 3.33806156511266e-06, "loss": 0.261, "step": 18057 }, { "epoch": 0.6196980096087852, "grad_norm": 0.8070232824203015, "learning_rate": 3.337537434103567e-06, "loss": 0.3633, "step": 18058 }, { "epoch": 0.6197323266986959, "grad_norm": 0.7734322104685591, "learning_rate": 3.337013323632261e-06, "loss": 0.2675, "step": 18059 }, { "epoch": 0.6197666437886067, "grad_norm": 0.8219487501302518, "learning_rate": 3.336489233705216e-06, "loss": 0.2656, "step": 18060 }, { "epoch": 0.6198009608785175, "grad_norm": 0.8293306121158781, "learning_rate": 3.335965164328907e-06, "loss": 0.2379, "step": 18061 }, { "epoch": 0.6198352779684283, "grad_norm": 0.7864583355239309, "learning_rate": 3.3354411155098097e-06, "loss": 0.2712, "step": 18062 }, { "epoch": 0.619869595058339, "grad_norm": 0.7200176254806888, "learning_rate": 3.3349170872543944e-06, "loss": 0.3219, "step": 18063 }, { "epoch": 0.6199039121482498, "grad_norm": 0.7375608970224005, "learning_rate": 3.3343930795691394e-06, "loss": 0.3216, "step": 18064 }, { "epoch": 0.6199382292381606, "grad_norm": 0.8226101481788797, "learning_rate": 3.3338690924605145e-06, "loss": 0.2815, "step": 18065 }, { "epoch": 0.6199725463280714, "grad_norm": 0.9301065251601638, "learning_rate": 3.3333451259349935e-06, "loss": 0.257, "step": 18066 }, { "epoch": 0.6200068634179822, "grad_norm": 0.7478253064723734, "learning_rate": 3.332821179999053e-06, "loss": 0.2656, "step": 18067 }, { "epoch": 0.6200411805078929, "grad_norm": 0.7606580552781227, "learning_rate": 3.3322972546591626e-06, "loss": 0.2625, "step": 18068 }, { "epoch": 0.6200754975978037, "grad_norm": 0.8313635060752184, "learning_rate": 3.331773349921793e-06, "loss": 0.3051, "step": 18069 }, { "epoch": 0.6201098146877145, "grad_norm": 0.7253483032129174, "learning_rate": 3.3312494657934214e-06, "loss": 0.2493, "step": 18070 }, { "epoch": 0.6201441317776253, "grad_norm": 1.5239184881249983, "learning_rate": 3.330725602280516e-06, "loss": 0.2268, "step": 18071 }, { "epoch": 0.620178448867536, "grad_norm": 0.7263490426425693, "learning_rate": 3.330201759389549e-06, "loss": 0.243, "step": 18072 }, { "epoch": 0.6202127659574468, "grad_norm": 0.8270038946857674, "learning_rate": 3.3296779371269922e-06, "loss": 0.233, "step": 18073 }, { "epoch": 0.6202470830473575, "grad_norm": 0.7610958215531888, "learning_rate": 3.3291541354993183e-06, "loss": 0.3299, "step": 18074 }, { "epoch": 0.6202814001372684, "grad_norm": 0.7859064827266676, "learning_rate": 3.3286303545129957e-06, "loss": 0.2586, "step": 18075 }, { "epoch": 0.6203157172271792, "grad_norm": 0.8257478717093707, "learning_rate": 3.3281065941744985e-06, "loss": 0.3141, "step": 18076 }, { "epoch": 0.6203500343170899, "grad_norm": 0.8003326961462884, "learning_rate": 3.3275828544902944e-06, "loss": 0.2633, "step": 18077 }, { "epoch": 0.6203843514070007, "grad_norm": 0.7797805415249391, "learning_rate": 3.3270591354668523e-06, "loss": 0.3054, "step": 18078 }, { "epoch": 0.6204186684969114, "grad_norm": 0.8572679491524825, "learning_rate": 3.326535437110646e-06, "loss": 0.2651, "step": 18079 }, { "epoch": 0.6204529855868223, "grad_norm": 0.8276615744818753, "learning_rate": 3.3260117594281425e-06, "loss": 0.2561, "step": 18080 }, { "epoch": 0.620487302676733, "grad_norm": 0.7664316599637994, "learning_rate": 3.325488102425812e-06, "loss": 0.3159, "step": 18081 }, { "epoch": 0.6205216197666438, "grad_norm": 0.7785020336268096, "learning_rate": 3.3249644661101244e-06, "loss": 0.2849, "step": 18082 }, { "epoch": 0.6205559368565545, "grad_norm": 0.8377933784849921, "learning_rate": 3.324440850487547e-06, "loss": 0.2988, "step": 18083 }, { "epoch": 0.6205902539464654, "grad_norm": 0.7729426326335017, "learning_rate": 3.3239172555645505e-06, "loss": 0.2576, "step": 18084 }, { "epoch": 0.6206245710363761, "grad_norm": 0.7764338747636959, "learning_rate": 3.3233936813476025e-06, "loss": 0.2513, "step": 18085 }, { "epoch": 0.6206588881262869, "grad_norm": 0.7087812376409179, "learning_rate": 3.3228701278431685e-06, "loss": 0.2668, "step": 18086 }, { "epoch": 0.6206932052161976, "grad_norm": 0.7378510817900275, "learning_rate": 3.3223465950577216e-06, "loss": 0.2268, "step": 18087 }, { "epoch": 0.6207275223061084, "grad_norm": 0.7817296249399016, "learning_rate": 3.321823082997725e-06, "loss": 0.2541, "step": 18088 }, { "epoch": 0.6207618393960193, "grad_norm": 0.859368833847469, "learning_rate": 3.3212995916696466e-06, "loss": 0.2849, "step": 18089 }, { "epoch": 0.62079615648593, "grad_norm": 0.8024648003658309, "learning_rate": 3.3207761210799562e-06, "loss": 0.2494, "step": 18090 }, { "epoch": 0.6208304735758408, "grad_norm": 0.7741371712732422, "learning_rate": 3.3202526712351196e-06, "loss": 0.2849, "step": 18091 }, { "epoch": 0.6208647906657515, "grad_norm": 0.7790428937901421, "learning_rate": 3.3197292421416003e-06, "loss": 0.2601, "step": 18092 }, { "epoch": 0.6208991077556624, "grad_norm": 0.7877122877523208, "learning_rate": 3.3192058338058703e-06, "loss": 0.2642, "step": 18093 }, { "epoch": 0.6209334248455731, "grad_norm": 0.7632164366045058, "learning_rate": 3.3186824462343907e-06, "loss": 0.2322, "step": 18094 }, { "epoch": 0.6209677419354839, "grad_norm": 0.7192474016596362, "learning_rate": 3.3181590794336294e-06, "loss": 0.2393, "step": 18095 }, { "epoch": 0.6210020590253946, "grad_norm": 0.768049931231294, "learning_rate": 3.3176357334100518e-06, "loss": 0.2755, "step": 18096 }, { "epoch": 0.6210363761153054, "grad_norm": 0.7969112197683379, "learning_rate": 3.3171124081701245e-06, "loss": 0.2219, "step": 18097 }, { "epoch": 0.6210706932052162, "grad_norm": 0.8820179089113745, "learning_rate": 3.316589103720309e-06, "loss": 0.2705, "step": 18098 }, { "epoch": 0.621105010295127, "grad_norm": 0.7881450197339581, "learning_rate": 3.316065820067075e-06, "loss": 0.2522, "step": 18099 }, { "epoch": 0.6211393273850377, "grad_norm": 0.9157410216538605, "learning_rate": 3.315542557216884e-06, "loss": 0.3232, "step": 18100 }, { "epoch": 0.6211736444749485, "grad_norm": 0.7624532979109496, "learning_rate": 3.315019315176199e-06, "loss": 0.288, "step": 18101 }, { "epoch": 0.6212079615648592, "grad_norm": 0.7164526999839996, "learning_rate": 3.314496093951487e-06, "loss": 0.2348, "step": 18102 }, { "epoch": 0.6212422786547701, "grad_norm": 0.7352174743496183, "learning_rate": 3.313972893549212e-06, "loss": 0.2898, "step": 18103 }, { "epoch": 0.6212765957446809, "grad_norm": 0.8096807575698843, "learning_rate": 3.3134497139758338e-06, "loss": 0.2731, "step": 18104 }, { "epoch": 0.6213109128345916, "grad_norm": 0.7791219898030513, "learning_rate": 3.3129265552378197e-06, "loss": 0.2473, "step": 18105 }, { "epoch": 0.6213452299245024, "grad_norm": 1.084800591891905, "learning_rate": 3.3124034173416315e-06, "loss": 0.2615, "step": 18106 }, { "epoch": 0.6213795470144132, "grad_norm": 0.8787470019894258, "learning_rate": 3.3118803002937295e-06, "loss": 0.2865, "step": 18107 }, { "epoch": 0.621413864104324, "grad_norm": 0.8577264310330395, "learning_rate": 3.31135720410058e-06, "loss": 0.2702, "step": 18108 }, { "epoch": 0.6214481811942347, "grad_norm": 0.7550967714210275, "learning_rate": 3.3108341287686424e-06, "loss": 0.2674, "step": 18109 }, { "epoch": 0.6214824982841455, "grad_norm": 0.6855213215360008, "learning_rate": 3.3103110743043796e-06, "loss": 0.2176, "step": 18110 }, { "epoch": 0.6215168153740562, "grad_norm": 0.7607240960064577, "learning_rate": 3.309788040714254e-06, "loss": 0.2663, "step": 18111 }, { "epoch": 0.6215511324639671, "grad_norm": 0.7945798970646689, "learning_rate": 3.3092650280047277e-06, "loss": 0.2419, "step": 18112 }, { "epoch": 0.6215854495538778, "grad_norm": 0.877970149770662, "learning_rate": 3.308742036182259e-06, "loss": 0.3381, "step": 18113 }, { "epoch": 0.6216197666437886, "grad_norm": 0.7249407930420866, "learning_rate": 3.3082190652533115e-06, "loss": 0.2854, "step": 18114 }, { "epoch": 0.6216540837336993, "grad_norm": 0.7795701311913045, "learning_rate": 3.3076961152243436e-06, "loss": 0.2398, "step": 18115 }, { "epoch": 0.6216884008236102, "grad_norm": 0.7369111695893159, "learning_rate": 3.307173186101819e-06, "loss": 0.2933, "step": 18116 }, { "epoch": 0.621722717913521, "grad_norm": 0.7030364928948589, "learning_rate": 3.3066502778921955e-06, "loss": 0.2682, "step": 18117 }, { "epoch": 0.6217570350034317, "grad_norm": 0.8123822917823297, "learning_rate": 3.306127390601933e-06, "loss": 0.2509, "step": 18118 }, { "epoch": 0.6217913520933425, "grad_norm": 0.8445279402003997, "learning_rate": 3.3056045242374924e-06, "loss": 0.3025, "step": 18119 }, { "epoch": 0.6218256691832532, "grad_norm": 0.7871259502007212, "learning_rate": 3.3050816788053332e-06, "loss": 0.2902, "step": 18120 }, { "epoch": 0.6218599862731641, "grad_norm": 0.8259160570192494, "learning_rate": 3.304558854311911e-06, "loss": 0.2965, "step": 18121 }, { "epoch": 0.6218943033630748, "grad_norm": 0.7498202141681604, "learning_rate": 3.3040360507636906e-06, "loss": 0.3471, "step": 18122 }, { "epoch": 0.6219286204529856, "grad_norm": 0.8349474156802629, "learning_rate": 3.303513268167127e-06, "loss": 0.2717, "step": 18123 }, { "epoch": 0.6219629375428963, "grad_norm": 0.699193632080944, "learning_rate": 3.302990506528677e-06, "loss": 0.253, "step": 18124 }, { "epoch": 0.6219972546328071, "grad_norm": 0.7951285342553801, "learning_rate": 3.3024677658548015e-06, "loss": 0.3143, "step": 18125 }, { "epoch": 0.622031571722718, "grad_norm": 0.7587153642282372, "learning_rate": 3.3019450461519592e-06, "loss": 0.2891, "step": 18126 }, { "epoch": 0.6220658888126287, "grad_norm": 0.7526627888174098, "learning_rate": 3.301422347426604e-06, "loss": 0.2531, "step": 18127 }, { "epoch": 0.6221002059025394, "grad_norm": 0.7254469150759186, "learning_rate": 3.300899669685197e-06, "loss": 0.2714, "step": 18128 }, { "epoch": 0.6221345229924502, "grad_norm": 0.8421673455668741, "learning_rate": 3.3003770129341937e-06, "loss": 0.2756, "step": 18129 }, { "epoch": 0.6221688400823611, "grad_norm": 0.7223406670695849, "learning_rate": 3.299854377180049e-06, "loss": 0.2375, "step": 18130 }, { "epoch": 0.6222031571722718, "grad_norm": 0.8783783581276047, "learning_rate": 3.299331762429223e-06, "loss": 0.279, "step": 18131 }, { "epoch": 0.6222374742621826, "grad_norm": 0.7351314659253279, "learning_rate": 3.2988091686881697e-06, "loss": 0.2966, "step": 18132 }, { "epoch": 0.6222717913520933, "grad_norm": 0.722363514777066, "learning_rate": 3.298286595963345e-06, "loss": 0.2457, "step": 18133 }, { "epoch": 0.6223061084420041, "grad_norm": 0.8548828431494744, "learning_rate": 3.2977640442612068e-06, "loss": 0.2133, "step": 18134 }, { "epoch": 0.6223404255319149, "grad_norm": 0.7689272472838372, "learning_rate": 3.29724151358821e-06, "loss": 0.2468, "step": 18135 }, { "epoch": 0.6223747426218257, "grad_norm": 0.7950577194570495, "learning_rate": 3.2967190039508056e-06, "loss": 0.281, "step": 18136 }, { "epoch": 0.6224090597117364, "grad_norm": 0.9658160314552197, "learning_rate": 3.2961965153554553e-06, "loss": 0.2863, "step": 18137 }, { "epoch": 0.6224433768016472, "grad_norm": 0.7837524388145124, "learning_rate": 3.295674047808609e-06, "loss": 0.3099, "step": 18138 }, { "epoch": 0.622477693891558, "grad_norm": 0.8277977221419238, "learning_rate": 3.295151601316723e-06, "loss": 0.3254, "step": 18139 }, { "epoch": 0.6225120109814688, "grad_norm": 0.8702557818890365, "learning_rate": 3.2946291758862512e-06, "loss": 0.2872, "step": 18140 }, { "epoch": 0.6225463280713796, "grad_norm": 0.6516422568663439, "learning_rate": 3.294106771523649e-06, "loss": 0.2565, "step": 18141 }, { "epoch": 0.6225806451612903, "grad_norm": 0.7303383984658468, "learning_rate": 3.293584388235367e-06, "loss": 0.278, "step": 18142 }, { "epoch": 0.622614962251201, "grad_norm": 0.8154237991845772, "learning_rate": 3.293062026027861e-06, "loss": 0.2587, "step": 18143 }, { "epoch": 0.6226492793411119, "grad_norm": 0.7931917948442395, "learning_rate": 3.2925396849075854e-06, "loss": 0.2894, "step": 18144 }, { "epoch": 0.6226835964310227, "grad_norm": 0.7496826617124857, "learning_rate": 3.2920173648809883e-06, "loss": 0.2536, "step": 18145 }, { "epoch": 0.6227179135209334, "grad_norm": 0.7916371982055402, "learning_rate": 3.2914950659545273e-06, "loss": 0.2588, "step": 18146 }, { "epoch": 0.6227522306108442, "grad_norm": 0.8410573824628165, "learning_rate": 3.290972788134651e-06, "loss": 0.2993, "step": 18147 }, { "epoch": 0.6227865477007549, "grad_norm": 0.8271829628612307, "learning_rate": 3.2904505314278146e-06, "loss": 0.2541, "step": 18148 }, { "epoch": 0.6228208647906658, "grad_norm": 0.6851712130875863, "learning_rate": 3.2899282958404687e-06, "loss": 0.2697, "step": 18149 }, { "epoch": 0.6228551818805765, "grad_norm": 0.8519815777771789, "learning_rate": 3.289406081379064e-06, "loss": 0.2615, "step": 18150 }, { "epoch": 0.6228894989704873, "grad_norm": 0.8355195503599899, "learning_rate": 3.288883888050055e-06, "loss": 0.3208, "step": 18151 }, { "epoch": 0.622923816060398, "grad_norm": 0.8329239791365164, "learning_rate": 3.2883617158598903e-06, "loss": 0.263, "step": 18152 }, { "epoch": 0.6229581331503089, "grad_norm": 0.8181017773810026, "learning_rate": 3.2878395648150185e-06, "loss": 0.2427, "step": 18153 }, { "epoch": 0.6229924502402197, "grad_norm": 0.7727325833166304, "learning_rate": 3.287317434921895e-06, "loss": 0.3098, "step": 18154 }, { "epoch": 0.6230267673301304, "grad_norm": 1.0570575153127912, "learning_rate": 3.2867953261869667e-06, "loss": 0.2888, "step": 18155 }, { "epoch": 0.6230610844200412, "grad_norm": 0.8105234403498306, "learning_rate": 3.286273238616684e-06, "loss": 0.2477, "step": 18156 }, { "epoch": 0.6230954015099519, "grad_norm": 0.7876851364300423, "learning_rate": 3.2857511722174997e-06, "loss": 0.2788, "step": 18157 }, { "epoch": 0.6231297185998628, "grad_norm": 0.8169350564932649, "learning_rate": 3.2852291269958603e-06, "loss": 0.2279, "step": 18158 }, { "epoch": 0.6231640356897735, "grad_norm": 0.7269013627269919, "learning_rate": 3.2847071029582143e-06, "loss": 0.2547, "step": 18159 }, { "epoch": 0.6231983527796843, "grad_norm": 0.7230727069633388, "learning_rate": 3.2841851001110142e-06, "loss": 0.2762, "step": 18160 }, { "epoch": 0.623232669869595, "grad_norm": 0.7046904002645709, "learning_rate": 3.283663118460706e-06, "loss": 0.2399, "step": 18161 }, { "epoch": 0.6232669869595059, "grad_norm": 0.8044969869607422, "learning_rate": 3.2831411580137384e-06, "loss": 0.264, "step": 18162 }, { "epoch": 0.6233013040494166, "grad_norm": 0.8308282169138255, "learning_rate": 3.2826192187765604e-06, "loss": 0.2523, "step": 18163 }, { "epoch": 0.6233356211393274, "grad_norm": 0.7365058677214749, "learning_rate": 3.28209730075562e-06, "loss": 0.3026, "step": 18164 }, { "epoch": 0.6233699382292381, "grad_norm": 0.7638948105953707, "learning_rate": 3.281575403957363e-06, "loss": 0.2662, "step": 18165 }, { "epoch": 0.6234042553191489, "grad_norm": 0.8032017063940888, "learning_rate": 3.2810535283882407e-06, "loss": 0.2898, "step": 18166 }, { "epoch": 0.6234385724090598, "grad_norm": 0.7168394672954468, "learning_rate": 3.2805316740546977e-06, "loss": 0.3165, "step": 18167 }, { "epoch": 0.6234728894989705, "grad_norm": 0.7844761942693729, "learning_rate": 3.28000984096318e-06, "loss": 0.2633, "step": 18168 }, { "epoch": 0.6235072065888813, "grad_norm": 0.8584678283191768, "learning_rate": 3.2794880291201353e-06, "loss": 0.2599, "step": 18169 }, { "epoch": 0.623541523678792, "grad_norm": 0.7479186305340891, "learning_rate": 3.2789662385320117e-06, "loss": 0.2611, "step": 18170 }, { "epoch": 0.6235758407687028, "grad_norm": 0.7316432561399876, "learning_rate": 3.278444469205251e-06, "loss": 0.3139, "step": 18171 }, { "epoch": 0.6236101578586136, "grad_norm": 0.7858194793485719, "learning_rate": 3.2779227211463043e-06, "loss": 0.3248, "step": 18172 }, { "epoch": 0.6236444749485244, "grad_norm": 0.8054222353214806, "learning_rate": 3.2774009943616146e-06, "loss": 0.289, "step": 18173 }, { "epoch": 0.6236787920384351, "grad_norm": 0.731821487982123, "learning_rate": 3.2768792888576253e-06, "loss": 0.2329, "step": 18174 }, { "epoch": 0.6237131091283459, "grad_norm": 0.8632178994678199, "learning_rate": 3.276357604640785e-06, "loss": 0.2842, "step": 18175 }, { "epoch": 0.6237474262182567, "grad_norm": 0.7875821140343496, "learning_rate": 3.2758359417175363e-06, "loss": 0.2822, "step": 18176 }, { "epoch": 0.6237817433081675, "grad_norm": 0.801403016213545, "learning_rate": 3.2753143000943235e-06, "loss": 0.3386, "step": 18177 }, { "epoch": 0.6238160603980782, "grad_norm": 0.7941136289466875, "learning_rate": 3.274792679777593e-06, "loss": 0.2798, "step": 18178 }, { "epoch": 0.623850377487989, "grad_norm": 0.8575517927518856, "learning_rate": 3.2742710807737865e-06, "loss": 0.3869, "step": 18179 }, { "epoch": 0.6238846945778997, "grad_norm": 0.7230399219120787, "learning_rate": 3.2737495030893503e-06, "loss": 0.2492, "step": 18180 }, { "epoch": 0.6239190116678106, "grad_norm": 0.7559213954731093, "learning_rate": 3.273227946730726e-06, "loss": 0.2961, "step": 18181 }, { "epoch": 0.6239533287577214, "grad_norm": 0.7821183400477574, "learning_rate": 3.2727064117043563e-06, "loss": 0.2373, "step": 18182 }, { "epoch": 0.6239876458476321, "grad_norm": 0.7954837747148339, "learning_rate": 3.272184898016686e-06, "loss": 0.2737, "step": 18183 }, { "epoch": 0.6240219629375429, "grad_norm": 0.860731466411516, "learning_rate": 3.2716634056741557e-06, "loss": 0.3492, "step": 18184 }, { "epoch": 0.6240562800274537, "grad_norm": 0.8034897567368521, "learning_rate": 3.2711419346832098e-06, "loss": 0.2563, "step": 18185 }, { "epoch": 0.6240905971173645, "grad_norm": 0.8167961013073529, "learning_rate": 3.2706204850502894e-06, "loss": 0.2665, "step": 18186 }, { "epoch": 0.6241249142072752, "grad_norm": 0.7723085995196686, "learning_rate": 3.270099056781838e-06, "loss": 0.3075, "step": 18187 }, { "epoch": 0.624159231297186, "grad_norm": 0.6867780778980224, "learning_rate": 3.269577649884293e-06, "loss": 0.2315, "step": 18188 }, { "epoch": 0.6241935483870967, "grad_norm": 0.6193480797771073, "learning_rate": 3.2690562643641013e-06, "loss": 0.2274, "step": 18189 }, { "epoch": 0.6242278654770076, "grad_norm": 0.7814912362321595, "learning_rate": 3.268534900227701e-06, "loss": 0.274, "step": 18190 }, { "epoch": 0.6242621825669183, "grad_norm": 0.7373406788504351, "learning_rate": 3.268013557481532e-06, "loss": 0.2661, "step": 18191 }, { "epoch": 0.6242964996568291, "grad_norm": 0.8063070819448246, "learning_rate": 3.267492236132036e-06, "loss": 0.281, "step": 18192 }, { "epoch": 0.6243308167467398, "grad_norm": 0.7290345373853729, "learning_rate": 3.2669709361856555e-06, "loss": 0.26, "step": 18193 }, { "epoch": 0.6243651338366506, "grad_norm": 0.7047915541266225, "learning_rate": 3.266449657648826e-06, "loss": 0.2391, "step": 18194 }, { "epoch": 0.6243994509265615, "grad_norm": 0.7792045019343155, "learning_rate": 3.2659284005279925e-06, "loss": 0.2883, "step": 18195 }, { "epoch": 0.6244337680164722, "grad_norm": 0.8394888786916694, "learning_rate": 3.2654071648295915e-06, "loss": 0.2706, "step": 18196 }, { "epoch": 0.624468085106383, "grad_norm": 0.8686516093090121, "learning_rate": 3.2648859505600607e-06, "loss": 0.2981, "step": 18197 }, { "epoch": 0.6245024021962937, "grad_norm": 0.7863594926155414, "learning_rate": 3.264364757725843e-06, "loss": 0.2566, "step": 18198 }, { "epoch": 0.6245367192862046, "grad_norm": 0.7474495792335505, "learning_rate": 3.263843586333374e-06, "loss": 0.2403, "step": 18199 }, { "epoch": 0.6245710363761153, "grad_norm": 0.744543036470232, "learning_rate": 3.2633224363890924e-06, "loss": 0.2475, "step": 18200 }, { "epoch": 0.6246053534660261, "grad_norm": 0.7308479950074072, "learning_rate": 3.26280130789944e-06, "loss": 0.2676, "step": 18201 }, { "epoch": 0.6246396705559368, "grad_norm": 0.8001329929629619, "learning_rate": 3.262280200870851e-06, "loss": 0.305, "step": 18202 }, { "epoch": 0.6246739876458476, "grad_norm": 0.7782527091120454, "learning_rate": 3.2617591153097628e-06, "loss": 0.2564, "step": 18203 }, { "epoch": 0.6247083047357584, "grad_norm": 0.7760881741443212, "learning_rate": 3.2612380512226162e-06, "loss": 0.2664, "step": 18204 }, { "epoch": 0.6247426218256692, "grad_norm": 0.7622684056147031, "learning_rate": 3.2607170086158445e-06, "loss": 0.2382, "step": 18205 }, { "epoch": 0.62477693891558, "grad_norm": 0.7410917799661184, "learning_rate": 3.260195987495887e-06, "loss": 0.2543, "step": 18206 }, { "epoch": 0.6248112560054907, "grad_norm": 0.7310079445803724, "learning_rate": 3.25967498786918e-06, "loss": 0.2269, "step": 18207 }, { "epoch": 0.6248455730954016, "grad_norm": 1.0077160026154162, "learning_rate": 3.2591540097421597e-06, "loss": 0.264, "step": 18208 }, { "epoch": 0.6248798901853123, "grad_norm": 0.8044007803968752, "learning_rate": 3.2586330531212608e-06, "loss": 0.2619, "step": 18209 }, { "epoch": 0.6249142072752231, "grad_norm": 0.7925308035414239, "learning_rate": 3.258112118012922e-06, "loss": 0.2978, "step": 18210 }, { "epoch": 0.6249485243651338, "grad_norm": 0.7102625051930843, "learning_rate": 3.2575912044235747e-06, "loss": 0.2777, "step": 18211 }, { "epoch": 0.6249828414550446, "grad_norm": 0.6667607534432334, "learning_rate": 3.2570703123596582e-06, "loss": 0.2421, "step": 18212 }, { "epoch": 0.6250171585449554, "grad_norm": 0.7671916892714183, "learning_rate": 3.2565494418276056e-06, "loss": 0.2843, "step": 18213 }, { "epoch": 0.6250514756348662, "grad_norm": 0.7690909645748003, "learning_rate": 3.256028592833852e-06, "loss": 0.2627, "step": 18214 }, { "epoch": 0.6250857927247769, "grad_norm": 0.8746692966149984, "learning_rate": 3.255507765384832e-06, "loss": 0.2708, "step": 18215 }, { "epoch": 0.6251201098146877, "grad_norm": 0.8604697304870412, "learning_rate": 3.254986959486981e-06, "loss": 0.2784, "step": 18216 }, { "epoch": 0.6251544269045984, "grad_norm": 0.8337775570098297, "learning_rate": 3.2544661751467287e-06, "loss": 0.2406, "step": 18217 }, { "epoch": 0.6251887439945093, "grad_norm": 0.8460469663048543, "learning_rate": 3.2539454123705145e-06, "loss": 0.2629, "step": 18218 }, { "epoch": 0.62522306108442, "grad_norm": 0.7041046473865059, "learning_rate": 3.253424671164769e-06, "loss": 0.3012, "step": 18219 }, { "epoch": 0.6252573781743308, "grad_norm": 0.727656774081133, "learning_rate": 3.2529039515359227e-06, "loss": 0.2926, "step": 18220 }, { "epoch": 0.6252916952642416, "grad_norm": 0.8720420386914649, "learning_rate": 3.252383253490413e-06, "loss": 0.2628, "step": 18221 }, { "epoch": 0.6253260123541524, "grad_norm": 0.770064826641804, "learning_rate": 3.2518625770346706e-06, "loss": 0.2809, "step": 18222 }, { "epoch": 0.6253603294440632, "grad_norm": 0.8141130929559934, "learning_rate": 3.2513419221751265e-06, "loss": 0.3074, "step": 18223 }, { "epoch": 0.6253946465339739, "grad_norm": 0.8841320202198846, "learning_rate": 3.2508212889182168e-06, "loss": 0.2767, "step": 18224 }, { "epoch": 0.6254289636238847, "grad_norm": 0.6767920041578888, "learning_rate": 3.2503006772703694e-06, "loss": 0.2053, "step": 18225 }, { "epoch": 0.6254632807137954, "grad_norm": 0.7688675759463043, "learning_rate": 3.249780087238016e-06, "loss": 0.2492, "step": 18226 }, { "epoch": 0.6254975978037063, "grad_norm": 0.8259865344097688, "learning_rate": 3.249259518827591e-06, "loss": 0.2641, "step": 18227 }, { "epoch": 0.625531914893617, "grad_norm": 0.9586009636663948, "learning_rate": 3.248738972045523e-06, "loss": 0.2327, "step": 18228 }, { "epoch": 0.6255662319835278, "grad_norm": 0.7459104045201737, "learning_rate": 3.2482184468982423e-06, "loss": 0.2574, "step": 18229 }, { "epoch": 0.6256005490734385, "grad_norm": 0.8328132293824985, "learning_rate": 3.2476979433921806e-06, "loss": 0.2553, "step": 18230 }, { "epoch": 0.6256348661633494, "grad_norm": 0.8561212788360082, "learning_rate": 3.247177461533769e-06, "loss": 0.2592, "step": 18231 }, { "epoch": 0.6256691832532602, "grad_norm": 0.7483675284017504, "learning_rate": 3.2466570013294348e-06, "loss": 0.2704, "step": 18232 }, { "epoch": 0.6257035003431709, "grad_norm": 0.8155920602121385, "learning_rate": 3.24613656278561e-06, "loss": 0.2945, "step": 18233 }, { "epoch": 0.6257378174330817, "grad_norm": 0.7361410147681817, "learning_rate": 3.2456161459087244e-06, "loss": 0.2248, "step": 18234 }, { "epoch": 0.6257721345229924, "grad_norm": 0.7058867648351822, "learning_rate": 3.245095750705203e-06, "loss": 0.2595, "step": 18235 }, { "epoch": 0.6258064516129033, "grad_norm": 0.6805681527520133, "learning_rate": 3.244575377181479e-06, "loss": 0.2501, "step": 18236 }, { "epoch": 0.625840768702814, "grad_norm": 0.8029369374525192, "learning_rate": 3.2440550253439807e-06, "loss": 0.2393, "step": 18237 }, { "epoch": 0.6258750857927248, "grad_norm": 0.8896541439182494, "learning_rate": 3.2435346951991325e-06, "loss": 0.2412, "step": 18238 }, { "epoch": 0.6259094028826355, "grad_norm": 0.8347824604107144, "learning_rate": 3.243014386753368e-06, "loss": 0.263, "step": 18239 }, { "epoch": 0.6259437199725463, "grad_norm": 0.767903487057448, "learning_rate": 3.2424941000131104e-06, "loss": 0.2725, "step": 18240 }, { "epoch": 0.6259780370624571, "grad_norm": 0.8035171364180465, "learning_rate": 3.2419738349847908e-06, "loss": 0.2813, "step": 18241 }, { "epoch": 0.6260123541523679, "grad_norm": 0.786302214491972, "learning_rate": 3.2414535916748347e-06, "loss": 0.3078, "step": 18242 }, { "epoch": 0.6260466712422786, "grad_norm": 0.7933008527673648, "learning_rate": 3.2409333700896672e-06, "loss": 0.2504, "step": 18243 }, { "epoch": 0.6260809883321894, "grad_norm": 0.904594026184782, "learning_rate": 3.2404131702357187e-06, "loss": 0.3375, "step": 18244 }, { "epoch": 0.6261153054221003, "grad_norm": 0.8205303092056798, "learning_rate": 3.2398929921194144e-06, "loss": 0.2527, "step": 18245 }, { "epoch": 0.626149622512011, "grad_norm": 0.7455939808454126, "learning_rate": 3.239372835747178e-06, "loss": 0.2511, "step": 18246 }, { "epoch": 0.6261839396019218, "grad_norm": 0.7578871552361696, "learning_rate": 3.23885270112544e-06, "loss": 0.2849, "step": 18247 }, { "epoch": 0.6262182566918325, "grad_norm": 0.8128820392563528, "learning_rate": 3.238332588260623e-06, "loss": 0.2773, "step": 18248 }, { "epoch": 0.6262525737817433, "grad_norm": 0.7039031057356193, "learning_rate": 3.2378124971591516e-06, "loss": 0.238, "step": 18249 }, { "epoch": 0.6262868908716541, "grad_norm": 0.8317085146509502, "learning_rate": 3.2372924278274543e-06, "loss": 0.2721, "step": 18250 }, { "epoch": 0.6263212079615649, "grad_norm": 0.8056558848070998, "learning_rate": 3.2367723802719525e-06, "loss": 0.2902, "step": 18251 }, { "epoch": 0.6263555250514756, "grad_norm": 0.7386963404658666, "learning_rate": 3.2362523544990714e-06, "loss": 0.2929, "step": 18252 }, { "epoch": 0.6263898421413864, "grad_norm": 0.669934387710004, "learning_rate": 3.2357323505152383e-06, "loss": 0.2661, "step": 18253 }, { "epoch": 0.6264241592312972, "grad_norm": 0.9178462373627383, "learning_rate": 3.235212368326874e-06, "loss": 0.2872, "step": 18254 }, { "epoch": 0.626458476321208, "grad_norm": 0.8176877472528719, "learning_rate": 3.234692407940403e-06, "loss": 0.3082, "step": 18255 }, { "epoch": 0.6264927934111187, "grad_norm": 0.7110126462243592, "learning_rate": 3.23417246936225e-06, "loss": 0.272, "step": 18256 }, { "epoch": 0.6265271105010295, "grad_norm": 0.8148375312130021, "learning_rate": 3.233652552598838e-06, "loss": 0.2968, "step": 18257 }, { "epoch": 0.6265614275909402, "grad_norm": 0.8196186557427982, "learning_rate": 3.233132657656587e-06, "loss": 0.3505, "step": 18258 }, { "epoch": 0.6265957446808511, "grad_norm": 0.8539517266391535, "learning_rate": 3.232612784541923e-06, "loss": 0.2707, "step": 18259 }, { "epoch": 0.6266300617707619, "grad_norm": 0.7281417568345842, "learning_rate": 3.2320929332612695e-06, "loss": 0.2607, "step": 18260 }, { "epoch": 0.6266643788606726, "grad_norm": 0.7845715222576867, "learning_rate": 3.231573103821043e-06, "loss": 0.311, "step": 18261 }, { "epoch": 0.6266986959505834, "grad_norm": 0.7424793325108884, "learning_rate": 3.231053296227672e-06, "loss": 0.2136, "step": 18262 }, { "epoch": 0.6267330130404941, "grad_norm": 0.7886077282055859, "learning_rate": 3.2305335104875746e-06, "loss": 0.3045, "step": 18263 }, { "epoch": 0.626767330130405, "grad_norm": 0.7794380026062034, "learning_rate": 3.230013746607171e-06, "loss": 0.2531, "step": 18264 }, { "epoch": 0.6268016472203157, "grad_norm": 0.7925449230494839, "learning_rate": 3.2294940045928862e-06, "loss": 0.3154, "step": 18265 }, { "epoch": 0.6268359643102265, "grad_norm": 0.8467577444596367, "learning_rate": 3.228974284451137e-06, "loss": 0.2523, "step": 18266 }, { "epoch": 0.6268702814001372, "grad_norm": 0.7931496234621287, "learning_rate": 3.2284545861883453e-06, "loss": 0.2919, "step": 18267 }, { "epoch": 0.6269045984900481, "grad_norm": 0.8786034871555565, "learning_rate": 3.227934909810934e-06, "loss": 0.2286, "step": 18268 }, { "epoch": 0.6269389155799588, "grad_norm": 0.8620265188448168, "learning_rate": 3.2274152553253205e-06, "loss": 0.2647, "step": 18269 }, { "epoch": 0.6269732326698696, "grad_norm": 0.864004537995216, "learning_rate": 3.2268956227379232e-06, "loss": 0.3064, "step": 18270 }, { "epoch": 0.6270075497597803, "grad_norm": 0.7372982808451355, "learning_rate": 3.2263760120551647e-06, "loss": 0.2502, "step": 18271 }, { "epoch": 0.6270418668496911, "grad_norm": 0.8056061162716895, "learning_rate": 3.225856423283461e-06, "loss": 0.2562, "step": 18272 }, { "epoch": 0.627076183939602, "grad_norm": 0.7433315347297198, "learning_rate": 3.225336856429235e-06, "loss": 0.26, "step": 18273 }, { "epoch": 0.6271105010295127, "grad_norm": 1.027863254667971, "learning_rate": 3.224817311498902e-06, "loss": 0.2728, "step": 18274 }, { "epoch": 0.6271448181194235, "grad_norm": 0.7918732322234073, "learning_rate": 3.22429778849888e-06, "loss": 0.266, "step": 18275 }, { "epoch": 0.6271791352093342, "grad_norm": 0.7947460411896774, "learning_rate": 3.2237782874355906e-06, "loss": 0.2826, "step": 18276 }, { "epoch": 0.6272134522992451, "grad_norm": 0.7785167188542658, "learning_rate": 3.2232588083154505e-06, "loss": 0.261, "step": 18277 }, { "epoch": 0.6272477693891558, "grad_norm": 0.8497146663363175, "learning_rate": 3.2227393511448733e-06, "loss": 0.2875, "step": 18278 }, { "epoch": 0.6272820864790666, "grad_norm": 0.803989769551759, "learning_rate": 3.2222199159302812e-06, "loss": 0.2843, "step": 18279 }, { "epoch": 0.6273164035689773, "grad_norm": 0.770018378284829, "learning_rate": 3.221700502678089e-06, "loss": 0.2889, "step": 18280 }, { "epoch": 0.6273507206588881, "grad_norm": 0.6663182102876306, "learning_rate": 3.221181111394714e-06, "loss": 0.2127, "step": 18281 }, { "epoch": 0.627385037748799, "grad_norm": 0.7758971533430896, "learning_rate": 3.2206617420865717e-06, "loss": 0.3161, "step": 18282 }, { "epoch": 0.6274193548387097, "grad_norm": 0.7363084593548731, "learning_rate": 3.220142394760081e-06, "loss": 0.3174, "step": 18283 }, { "epoch": 0.6274536719286204, "grad_norm": 0.7629148357054126, "learning_rate": 3.2196230694216528e-06, "loss": 0.3108, "step": 18284 }, { "epoch": 0.6274879890185312, "grad_norm": 0.8120404416316079, "learning_rate": 3.2191037660777086e-06, "loss": 0.2752, "step": 18285 }, { "epoch": 0.627522306108442, "grad_norm": 0.7923347096344094, "learning_rate": 3.218584484734661e-06, "loss": 0.2837, "step": 18286 }, { "epoch": 0.6275566231983528, "grad_norm": 0.7732706848968174, "learning_rate": 3.218065225398923e-06, "loss": 0.2819, "step": 18287 }, { "epoch": 0.6275909402882636, "grad_norm": 0.7182043556525397, "learning_rate": 3.2175459880769124e-06, "loss": 0.2643, "step": 18288 }, { "epoch": 0.6276252573781743, "grad_norm": 0.8688198700805769, "learning_rate": 3.2170267727750445e-06, "loss": 0.2481, "step": 18289 }, { "epoch": 0.6276595744680851, "grad_norm": 0.769605870442923, "learning_rate": 3.2165075794997296e-06, "loss": 0.2747, "step": 18290 }, { "epoch": 0.6276938915579959, "grad_norm": 0.92327256944607, "learning_rate": 3.2159884082573867e-06, "loss": 0.2457, "step": 18291 }, { "epoch": 0.6277282086479067, "grad_norm": 0.8024631933431897, "learning_rate": 3.215469259054427e-06, "loss": 0.2334, "step": 18292 }, { "epoch": 0.6277625257378174, "grad_norm": 0.8529030794068142, "learning_rate": 3.214950131897261e-06, "loss": 0.3485, "step": 18293 }, { "epoch": 0.6277968428277282, "grad_norm": 0.7585422189937393, "learning_rate": 3.2144310267923085e-06, "loss": 0.279, "step": 18294 }, { "epoch": 0.6278311599176389, "grad_norm": 0.8333982938274757, "learning_rate": 3.2139119437459765e-06, "loss": 0.2769, "step": 18295 }, { "epoch": 0.6278654770075498, "grad_norm": 0.9687210074280606, "learning_rate": 3.213392882764681e-06, "loss": 0.2888, "step": 18296 }, { "epoch": 0.6278997940974606, "grad_norm": 0.7535407380161128, "learning_rate": 3.2128738438548336e-06, "loss": 0.2409, "step": 18297 }, { "epoch": 0.6279341111873713, "grad_norm": 0.6891019867598557, "learning_rate": 3.2123548270228467e-06, "loss": 0.2704, "step": 18298 }, { "epoch": 0.627968428277282, "grad_norm": 0.785136378938882, "learning_rate": 3.2118358322751297e-06, "loss": 0.2802, "step": 18299 }, { "epoch": 0.6280027453671929, "grad_norm": 0.8543178345880789, "learning_rate": 3.211316859618098e-06, "loss": 0.2626, "step": 18300 }, { "epoch": 0.6280370624571037, "grad_norm": 0.7746636505519001, "learning_rate": 3.2107979090581613e-06, "loss": 0.3099, "step": 18301 }, { "epoch": 0.6280713795470144, "grad_norm": 0.6563377571432025, "learning_rate": 3.210278980601729e-06, "loss": 0.2422, "step": 18302 }, { "epoch": 0.6281056966369252, "grad_norm": 0.8237094526987024, "learning_rate": 3.2097600742552135e-06, "loss": 0.241, "step": 18303 }, { "epoch": 0.6281400137268359, "grad_norm": 0.8631203538414358, "learning_rate": 3.2092411900250254e-06, "loss": 0.2807, "step": 18304 }, { "epoch": 0.6281743308167468, "grad_norm": 0.7692075096812583, "learning_rate": 3.2087223279175748e-06, "loss": 0.3147, "step": 18305 }, { "epoch": 0.6282086479066575, "grad_norm": 0.7695659599959871, "learning_rate": 3.208203487939272e-06, "loss": 0.2981, "step": 18306 }, { "epoch": 0.6282429649965683, "grad_norm": 0.8584867432083966, "learning_rate": 3.207684670096525e-06, "loss": 0.2702, "step": 18307 }, { "epoch": 0.628277282086479, "grad_norm": 0.696417867786045, "learning_rate": 3.2071658743957453e-06, "loss": 0.2466, "step": 18308 }, { "epoch": 0.6283115991763898, "grad_norm": 0.7163342269954636, "learning_rate": 3.206647100843341e-06, "loss": 0.2804, "step": 18309 }, { "epoch": 0.6283459162663007, "grad_norm": 0.649057261482061, "learning_rate": 3.2061283494457192e-06, "loss": 0.2456, "step": 18310 }, { "epoch": 0.6283802333562114, "grad_norm": 0.7887887901462544, "learning_rate": 3.2056096202092913e-06, "loss": 0.2779, "step": 18311 }, { "epoch": 0.6284145504461222, "grad_norm": 0.7382666671270297, "learning_rate": 3.205090913140465e-06, "loss": 0.2908, "step": 18312 }, { "epoch": 0.6284488675360329, "grad_norm": 0.8063992122179593, "learning_rate": 3.204572228245646e-06, "loss": 0.2624, "step": 18313 }, { "epoch": 0.6284831846259438, "grad_norm": 0.7296572010033702, "learning_rate": 3.204053565531246e-06, "loss": 0.3015, "step": 18314 }, { "epoch": 0.6285175017158545, "grad_norm": 0.8513172302357448, "learning_rate": 3.203534925003671e-06, "loss": 0.3086, "step": 18315 }, { "epoch": 0.6285518188057653, "grad_norm": 0.7683911498885426, "learning_rate": 3.2030163066693243e-06, "loss": 0.2711, "step": 18316 }, { "epoch": 0.628586135895676, "grad_norm": 0.6657359391982131, "learning_rate": 3.202497710534619e-06, "loss": 0.3011, "step": 18317 }, { "epoch": 0.6286204529855868, "grad_norm": 0.7856718864506843, "learning_rate": 3.2019791366059573e-06, "loss": 0.2598, "step": 18318 }, { "epoch": 0.6286547700754976, "grad_norm": 0.785976011680213, "learning_rate": 3.2014605848897458e-06, "loss": 0.2924, "step": 18319 }, { "epoch": 0.6286890871654084, "grad_norm": 0.7551970284427063, "learning_rate": 3.2009420553923943e-06, "loss": 0.2284, "step": 18320 }, { "epoch": 0.6287234042553191, "grad_norm": 0.7724212078225091, "learning_rate": 3.200423548120306e-06, "loss": 0.2787, "step": 18321 }, { "epoch": 0.6287577213452299, "grad_norm": 0.8284558682707323, "learning_rate": 3.199905063079885e-06, "loss": 0.3361, "step": 18322 }, { "epoch": 0.6287920384351408, "grad_norm": 0.764786463222507, "learning_rate": 3.1993866002775396e-06, "loss": 0.2266, "step": 18323 }, { "epoch": 0.6288263555250515, "grad_norm": 0.8321042304197453, "learning_rate": 3.1988681597196737e-06, "loss": 0.3011, "step": 18324 }, { "epoch": 0.6288606726149623, "grad_norm": 0.8170117712121637, "learning_rate": 3.198349741412691e-06, "loss": 0.2352, "step": 18325 }, { "epoch": 0.628894989704873, "grad_norm": 0.7106850423511776, "learning_rate": 3.197831345362996e-06, "loss": 0.2552, "step": 18326 }, { "epoch": 0.6289293067947838, "grad_norm": 0.7278680924838364, "learning_rate": 3.197312971576996e-06, "loss": 0.3166, "step": 18327 }, { "epoch": 0.6289636238846946, "grad_norm": 0.8504651961321427, "learning_rate": 3.1967946200610893e-06, "loss": 0.2717, "step": 18328 }, { "epoch": 0.6289979409746054, "grad_norm": 0.7951033771346684, "learning_rate": 3.196276290821685e-06, "loss": 0.2612, "step": 18329 }, { "epoch": 0.6290322580645161, "grad_norm": 0.8443219318065011, "learning_rate": 3.1957579838651846e-06, "loss": 0.3279, "step": 18330 }, { "epoch": 0.6290665751544269, "grad_norm": 0.6768629523371752, "learning_rate": 3.1952396991979885e-06, "loss": 0.2411, "step": 18331 }, { "epoch": 0.6291008922443376, "grad_norm": 0.7838870864117602, "learning_rate": 3.1947214368265046e-06, "loss": 0.2366, "step": 18332 }, { "epoch": 0.6291352093342485, "grad_norm": 0.7944060864616853, "learning_rate": 3.19420319675713e-06, "loss": 0.3464, "step": 18333 }, { "epoch": 0.6291695264241592, "grad_norm": 0.7199856185864135, "learning_rate": 3.19368497899627e-06, "loss": 0.2665, "step": 18334 }, { "epoch": 0.62920384351407, "grad_norm": 0.7425920664055297, "learning_rate": 3.193166783550327e-06, "loss": 0.2948, "step": 18335 }, { "epoch": 0.6292381606039807, "grad_norm": 1.0503753477564053, "learning_rate": 3.1926486104257e-06, "loss": 0.3172, "step": 18336 }, { "epoch": 0.6292724776938916, "grad_norm": 0.8514674125501229, "learning_rate": 3.192130459628795e-06, "loss": 0.2338, "step": 18337 }, { "epoch": 0.6293067947838024, "grad_norm": 0.8218463370357922, "learning_rate": 3.19161233116601e-06, "loss": 0.2482, "step": 18338 }, { "epoch": 0.6293411118737131, "grad_norm": 0.8053208679907073, "learning_rate": 3.191094225043745e-06, "loss": 0.2694, "step": 18339 }, { "epoch": 0.6293754289636239, "grad_norm": 0.7464698537070623, "learning_rate": 3.190576141268403e-06, "loss": 0.2811, "step": 18340 }, { "epoch": 0.6294097460535346, "grad_norm": 0.7872054149437516, "learning_rate": 3.1900580798463823e-06, "loss": 0.279, "step": 18341 }, { "epoch": 0.6294440631434455, "grad_norm": 0.7760303548849419, "learning_rate": 3.189540040784083e-06, "loss": 0.2752, "step": 18342 }, { "epoch": 0.6294783802333562, "grad_norm": 0.9349721302057367, "learning_rate": 3.1890220240879073e-06, "loss": 0.2326, "step": 18343 }, { "epoch": 0.629512697323267, "grad_norm": 0.808155563933366, "learning_rate": 3.1885040297642534e-06, "loss": 0.2671, "step": 18344 }, { "epoch": 0.6295470144131777, "grad_norm": 0.8557811382335286, "learning_rate": 3.1879860578195183e-06, "loss": 0.2965, "step": 18345 }, { "epoch": 0.6295813315030886, "grad_norm": 0.7972405701749822, "learning_rate": 3.1874681082601042e-06, "loss": 0.2431, "step": 18346 }, { "epoch": 0.6296156485929993, "grad_norm": 0.7645222827468074, "learning_rate": 3.1869501810924085e-06, "loss": 0.2424, "step": 18347 }, { "epoch": 0.6296499656829101, "grad_norm": 0.7370749480307677, "learning_rate": 3.1864322763228283e-06, "loss": 0.2445, "step": 18348 }, { "epoch": 0.6296842827728208, "grad_norm": 0.8229333264988022, "learning_rate": 3.185914393957764e-06, "loss": 0.3114, "step": 18349 }, { "epoch": 0.6297185998627316, "grad_norm": 0.8017774355053511, "learning_rate": 3.185396534003613e-06, "loss": 0.3044, "step": 18350 }, { "epoch": 0.6297529169526425, "grad_norm": 0.6872608516570231, "learning_rate": 3.18487869646677e-06, "loss": 0.2508, "step": 18351 }, { "epoch": 0.6297872340425532, "grad_norm": 0.7174584761401437, "learning_rate": 3.1843608813536365e-06, "loss": 0.2123, "step": 18352 }, { "epoch": 0.629821551132464, "grad_norm": 0.8210327892929493, "learning_rate": 3.1838430886706078e-06, "loss": 0.2446, "step": 18353 }, { "epoch": 0.6298558682223747, "grad_norm": 0.7768247128604269, "learning_rate": 3.1833253184240783e-06, "loss": 0.2928, "step": 18354 }, { "epoch": 0.6298901853122855, "grad_norm": 0.8372034838835328, "learning_rate": 3.182807570620447e-06, "loss": 0.2827, "step": 18355 }, { "epoch": 0.6299245024021963, "grad_norm": 0.7389158220339104, "learning_rate": 3.182289845266111e-06, "loss": 0.2757, "step": 18356 }, { "epoch": 0.6299588194921071, "grad_norm": 0.8390105614168119, "learning_rate": 3.181772142367462e-06, "loss": 0.2798, "step": 18357 }, { "epoch": 0.6299931365820178, "grad_norm": 0.8665101217666141, "learning_rate": 3.181254461930901e-06, "loss": 0.243, "step": 18358 }, { "epoch": 0.6300274536719286, "grad_norm": 0.808799160963522, "learning_rate": 3.1807368039628207e-06, "loss": 0.2682, "step": 18359 }, { "epoch": 0.6300617707618394, "grad_norm": 0.7999183153670556, "learning_rate": 3.180219168469614e-06, "loss": 0.3149, "step": 18360 }, { "epoch": 0.6300960878517502, "grad_norm": 0.7534802068435511, "learning_rate": 3.1797015554576795e-06, "loss": 0.2669, "step": 18361 }, { "epoch": 0.630130404941661, "grad_norm": 0.7844655541590511, "learning_rate": 3.1791839649334094e-06, "loss": 0.3056, "step": 18362 }, { "epoch": 0.6301647220315717, "grad_norm": 0.7056801966077343, "learning_rate": 3.1786663969031974e-06, "loss": 0.2956, "step": 18363 }, { "epoch": 0.6301990391214825, "grad_norm": 0.8648681103770705, "learning_rate": 3.1781488513734403e-06, "loss": 0.3119, "step": 18364 }, { "epoch": 0.6302333562113933, "grad_norm": 0.6815248583963577, "learning_rate": 3.1776313283505306e-06, "loss": 0.2236, "step": 18365 }, { "epoch": 0.6302676733013041, "grad_norm": 0.9791106279382328, "learning_rate": 3.177113827840859e-06, "loss": 0.2551, "step": 18366 }, { "epoch": 0.6303019903912148, "grad_norm": 0.7805134720880373, "learning_rate": 3.176596349850822e-06, "loss": 0.2919, "step": 18367 }, { "epoch": 0.6303363074811256, "grad_norm": 0.8421592424735834, "learning_rate": 3.1760788943868106e-06, "loss": 0.2971, "step": 18368 }, { "epoch": 0.6303706245710364, "grad_norm": 0.734912130462911, "learning_rate": 3.175561461455219e-06, "loss": 0.3124, "step": 18369 }, { "epoch": 0.6304049416609472, "grad_norm": 0.7561624986548844, "learning_rate": 3.175044051062438e-06, "loss": 0.2285, "step": 18370 }, { "epoch": 0.6304392587508579, "grad_norm": 0.7037184145224649, "learning_rate": 3.1745266632148598e-06, "loss": 0.2343, "step": 18371 }, { "epoch": 0.6304735758407687, "grad_norm": 0.7443041086712453, "learning_rate": 3.1740092979188763e-06, "loss": 0.2485, "step": 18372 }, { "epoch": 0.6305078929306794, "grad_norm": 0.7061977494453348, "learning_rate": 3.17349195518088e-06, "loss": 0.2814, "step": 18373 }, { "epoch": 0.6305422100205903, "grad_norm": 0.713316829003528, "learning_rate": 3.1729746350072594e-06, "loss": 0.2564, "step": 18374 }, { "epoch": 0.630576527110501, "grad_norm": 0.7649633096095676, "learning_rate": 3.172457337404409e-06, "loss": 0.2622, "step": 18375 }, { "epoch": 0.6306108442004118, "grad_norm": 0.8290556094292335, "learning_rate": 3.1719400623787172e-06, "loss": 0.3803, "step": 18376 }, { "epoch": 0.6306451612903226, "grad_norm": 0.8095898218831545, "learning_rate": 3.171422809936573e-06, "loss": 0.2661, "step": 18377 }, { "epoch": 0.6306794783802333, "grad_norm": 0.7898745150803235, "learning_rate": 3.170905580084369e-06, "loss": 0.283, "step": 18378 }, { "epoch": 0.6307137954701442, "grad_norm": 0.8938484871963379, "learning_rate": 3.1703883728284947e-06, "loss": 0.2549, "step": 18379 }, { "epoch": 0.6307481125600549, "grad_norm": 0.8064513627433578, "learning_rate": 3.169871188175337e-06, "loss": 0.3118, "step": 18380 }, { "epoch": 0.6307824296499657, "grad_norm": 0.8463015798351157, "learning_rate": 3.1693540261312892e-06, "loss": 0.2951, "step": 18381 }, { "epoch": 0.6308167467398764, "grad_norm": 0.7700722701407411, "learning_rate": 3.168836886702738e-06, "loss": 0.2622, "step": 18382 }, { "epoch": 0.6308510638297873, "grad_norm": 0.6708119363594397, "learning_rate": 3.16831976989607e-06, "loss": 0.3257, "step": 18383 }, { "epoch": 0.630885380919698, "grad_norm": 0.877215173270797, "learning_rate": 3.1678026757176784e-06, "loss": 0.3016, "step": 18384 }, { "epoch": 0.6309196980096088, "grad_norm": 0.7264233133158888, "learning_rate": 3.167285604173947e-06, "loss": 0.2638, "step": 18385 }, { "epoch": 0.6309540150995195, "grad_norm": 0.8307145431361779, "learning_rate": 3.1667685552712647e-06, "loss": 0.3084, "step": 18386 }, { "epoch": 0.6309883321894303, "grad_norm": 0.6913228132289627, "learning_rate": 3.166251529016021e-06, "loss": 0.2668, "step": 18387 }, { "epoch": 0.6310226492793412, "grad_norm": 0.7725150992684103, "learning_rate": 3.165734525414602e-06, "loss": 0.2203, "step": 18388 }, { "epoch": 0.6310569663692519, "grad_norm": 0.8352653550022687, "learning_rate": 3.165217544473392e-06, "loss": 0.3044, "step": 18389 }, { "epoch": 0.6310912834591627, "grad_norm": 0.7566693516251809, "learning_rate": 3.1647005861987823e-06, "loss": 0.2373, "step": 18390 }, { "epoch": 0.6311256005490734, "grad_norm": 0.7773976573846492, "learning_rate": 3.1641836505971564e-06, "loss": 0.3029, "step": 18391 }, { "epoch": 0.6311599176389843, "grad_norm": 0.7927320626864357, "learning_rate": 3.163666737674901e-06, "loss": 0.2889, "step": 18392 }, { "epoch": 0.631194234728895, "grad_norm": 0.7862511770958184, "learning_rate": 3.163149847438402e-06, "loss": 0.2754, "step": 18393 }, { "epoch": 0.6312285518188058, "grad_norm": 0.7739093795094429, "learning_rate": 3.1626329798940457e-06, "loss": 0.2779, "step": 18394 }, { "epoch": 0.6312628689087165, "grad_norm": 0.7532407077471681, "learning_rate": 3.1621161350482156e-06, "loss": 0.2685, "step": 18395 }, { "epoch": 0.6312971859986273, "grad_norm": 0.8390892648673322, "learning_rate": 3.1615993129072997e-06, "loss": 0.3003, "step": 18396 }, { "epoch": 0.6313315030885381, "grad_norm": 0.7114467514040548, "learning_rate": 3.1610825134776784e-06, "loss": 0.2986, "step": 18397 }, { "epoch": 0.6313658201784489, "grad_norm": 0.739764235450714, "learning_rate": 3.160565736765741e-06, "loss": 0.2811, "step": 18398 }, { "epoch": 0.6314001372683596, "grad_norm": 0.96880053189103, "learning_rate": 3.160048982777868e-06, "loss": 0.2861, "step": 18399 }, { "epoch": 0.6314344543582704, "grad_norm": 0.7025883204839326, "learning_rate": 3.159532251520445e-06, "loss": 0.2677, "step": 18400 }, { "epoch": 0.6314687714481811, "grad_norm": 0.8354603011169436, "learning_rate": 3.159015542999856e-06, "loss": 0.3063, "step": 18401 }, { "epoch": 0.631503088538092, "grad_norm": 1.1415970900067158, "learning_rate": 3.1584988572224833e-06, "loss": 0.3557, "step": 18402 }, { "epoch": 0.6315374056280028, "grad_norm": 0.8226593222684598, "learning_rate": 3.157982194194709e-06, "loss": 0.263, "step": 18403 }, { "epoch": 0.6315717227179135, "grad_norm": 0.8406882508975432, "learning_rate": 3.1574655539229188e-06, "loss": 0.2467, "step": 18404 }, { "epoch": 0.6316060398078243, "grad_norm": 0.7534919881730406, "learning_rate": 3.156948936413493e-06, "loss": 0.2973, "step": 18405 }, { "epoch": 0.6316403568977351, "grad_norm": 0.6940169054308817, "learning_rate": 3.1564323416728134e-06, "loss": 0.2468, "step": 18406 }, { "epoch": 0.6316746739876459, "grad_norm": 0.8211016017359443, "learning_rate": 3.155915769707264e-06, "loss": 0.2724, "step": 18407 }, { "epoch": 0.6317089910775566, "grad_norm": 0.8154936323944046, "learning_rate": 3.155399220523225e-06, "loss": 0.3054, "step": 18408 }, { "epoch": 0.6317433081674674, "grad_norm": 0.7144165834562144, "learning_rate": 3.154882694127076e-06, "loss": 0.268, "step": 18409 }, { "epoch": 0.6317776252573781, "grad_norm": 0.8775046898072686, "learning_rate": 3.1543661905252034e-06, "loss": 0.2776, "step": 18410 }, { "epoch": 0.631811942347289, "grad_norm": 0.7711688295748762, "learning_rate": 3.153849709723984e-06, "loss": 0.266, "step": 18411 }, { "epoch": 0.6318462594371997, "grad_norm": 0.7506505786507366, "learning_rate": 3.1533332517297964e-06, "loss": 0.2347, "step": 18412 }, { "epoch": 0.6318805765271105, "grad_norm": 0.7286218140282489, "learning_rate": 3.1528168165490266e-06, "loss": 0.2122, "step": 18413 }, { "epoch": 0.6319148936170212, "grad_norm": 0.7760872083689981, "learning_rate": 3.15230040418805e-06, "loss": 0.3094, "step": 18414 }, { "epoch": 0.6319492107069321, "grad_norm": 0.6886288741590146, "learning_rate": 3.1517840146532473e-06, "loss": 0.2444, "step": 18415 }, { "epoch": 0.6319835277968429, "grad_norm": 0.7982315462225271, "learning_rate": 3.1512676479509985e-06, "loss": 0.2451, "step": 18416 }, { "epoch": 0.6320178448867536, "grad_norm": 0.7733582276310892, "learning_rate": 3.1507513040876835e-06, "loss": 0.2417, "step": 18417 }, { "epoch": 0.6320521619766644, "grad_norm": 0.6682374793429414, "learning_rate": 3.150234983069679e-06, "loss": 0.2189, "step": 18418 }, { "epoch": 0.6320864790665751, "grad_norm": 0.8278037673135041, "learning_rate": 3.149718684903366e-06, "loss": 0.2634, "step": 18419 }, { "epoch": 0.632120796156486, "grad_norm": 0.7438998842378477, "learning_rate": 3.149202409595121e-06, "loss": 0.2697, "step": 18420 }, { "epoch": 0.6321551132463967, "grad_norm": 0.7209007666099954, "learning_rate": 3.1486861571513204e-06, "loss": 0.2611, "step": 18421 }, { "epoch": 0.6321894303363075, "grad_norm": 0.7632747579760053, "learning_rate": 3.1481699275783453e-06, "loss": 0.2766, "step": 18422 }, { "epoch": 0.6322237474262182, "grad_norm": 0.7448500042207883, "learning_rate": 3.147653720882573e-06, "loss": 0.2329, "step": 18423 }, { "epoch": 0.632258064516129, "grad_norm": 0.7904645663012271, "learning_rate": 3.147137537070377e-06, "loss": 0.2428, "step": 18424 }, { "epoch": 0.6322923816060398, "grad_norm": 0.7054097078231597, "learning_rate": 3.1466213761481386e-06, "loss": 0.242, "step": 18425 }, { "epoch": 0.6323266986959506, "grad_norm": 0.8006125182467964, "learning_rate": 3.1461052381222323e-06, "loss": 0.2413, "step": 18426 }, { "epoch": 0.6323610157858613, "grad_norm": 0.7670047952741842, "learning_rate": 3.145589122999032e-06, "loss": 0.2422, "step": 18427 }, { "epoch": 0.6323953328757721, "grad_norm": 0.8024095843208353, "learning_rate": 3.1450730307849187e-06, "loss": 0.28, "step": 18428 }, { "epoch": 0.632429649965683, "grad_norm": 0.9722617633891165, "learning_rate": 3.1445569614862625e-06, "loss": 0.2967, "step": 18429 }, { "epoch": 0.6324639670555937, "grad_norm": 0.7747991980157075, "learning_rate": 3.1440409151094436e-06, "loss": 0.2806, "step": 18430 }, { "epoch": 0.6324982841455045, "grad_norm": 0.774706299854285, "learning_rate": 3.1435248916608363e-06, "loss": 0.3033, "step": 18431 }, { "epoch": 0.6325326012354152, "grad_norm": 0.759247867820716, "learning_rate": 3.143008891146812e-06, "loss": 0.2929, "step": 18432 }, { "epoch": 0.632566918325326, "grad_norm": 0.8217338703107425, "learning_rate": 3.1424929135737505e-06, "loss": 0.3278, "step": 18433 }, { "epoch": 0.6326012354152368, "grad_norm": 0.8393535625542379, "learning_rate": 3.141976958948023e-06, "loss": 0.2694, "step": 18434 }, { "epoch": 0.6326355525051476, "grad_norm": 0.6926269557433546, "learning_rate": 3.1414610272760015e-06, "loss": 0.2693, "step": 18435 }, { "epoch": 0.6326698695950583, "grad_norm": 0.8581626327489986, "learning_rate": 3.140945118564065e-06, "loss": 0.2085, "step": 18436 }, { "epoch": 0.6327041866849691, "grad_norm": 0.7696352692883145, "learning_rate": 3.1404292328185826e-06, "loss": 0.2834, "step": 18437 }, { "epoch": 0.63273850377488, "grad_norm": 0.8334074396371265, "learning_rate": 3.1399133700459285e-06, "loss": 0.3036, "step": 18438 }, { "epoch": 0.6327728208647907, "grad_norm": 0.8899726346166914, "learning_rate": 3.1393975302524778e-06, "loss": 0.3479, "step": 18439 }, { "epoch": 0.6328071379547014, "grad_norm": 0.7953447212452098, "learning_rate": 3.1388817134446014e-06, "loss": 0.2732, "step": 18440 }, { "epoch": 0.6328414550446122, "grad_norm": 0.7656291632197653, "learning_rate": 3.138365919628669e-06, "loss": 0.2372, "step": 18441 }, { "epoch": 0.632875772134523, "grad_norm": 0.8006941896725686, "learning_rate": 3.137850148811058e-06, "loss": 0.2956, "step": 18442 }, { "epoch": 0.6329100892244338, "grad_norm": 0.8336165300891534, "learning_rate": 3.137334400998136e-06, "loss": 0.2618, "step": 18443 }, { "epoch": 0.6329444063143446, "grad_norm": 0.767006085449289, "learning_rate": 3.136818676196276e-06, "loss": 0.3323, "step": 18444 }, { "epoch": 0.6329787234042553, "grad_norm": 0.7990184432034885, "learning_rate": 3.136302974411849e-06, "loss": 0.2468, "step": 18445 }, { "epoch": 0.6330130404941661, "grad_norm": 0.7478555878103456, "learning_rate": 3.1357872956512263e-06, "loss": 0.2582, "step": 18446 }, { "epoch": 0.6330473575840768, "grad_norm": 0.8263763324196891, "learning_rate": 3.1352716399207765e-06, "loss": 0.2993, "step": 18447 }, { "epoch": 0.6330816746739877, "grad_norm": 0.8490099593208569, "learning_rate": 3.1347560072268733e-06, "loss": 0.247, "step": 18448 }, { "epoch": 0.6331159917638984, "grad_norm": 0.7351034241999311, "learning_rate": 3.1342403975758857e-06, "loss": 0.2845, "step": 18449 }, { "epoch": 0.6331503088538092, "grad_norm": 0.8322932134326471, "learning_rate": 3.1337248109741804e-06, "loss": 0.2575, "step": 18450 }, { "epoch": 0.6331846259437199, "grad_norm": 0.7245356839856938, "learning_rate": 3.133209247428131e-06, "loss": 0.2717, "step": 18451 }, { "epoch": 0.6332189430336308, "grad_norm": 0.8401169503031248, "learning_rate": 3.1326937069441037e-06, "loss": 0.2749, "step": 18452 }, { "epoch": 0.6332532601235416, "grad_norm": 0.7108048283628079, "learning_rate": 3.1321781895284675e-06, "loss": 0.2605, "step": 18453 }, { "epoch": 0.6332875772134523, "grad_norm": 0.7614325387468149, "learning_rate": 3.131662695187595e-06, "loss": 0.2912, "step": 18454 }, { "epoch": 0.633321894303363, "grad_norm": 0.7923413254808035, "learning_rate": 3.1311472239278508e-06, "loss": 0.252, "step": 18455 }, { "epoch": 0.6333562113932738, "grad_norm": 0.8022189828262624, "learning_rate": 3.130631775755602e-06, "loss": 0.2823, "step": 18456 }, { "epoch": 0.6333905284831847, "grad_norm": 0.763580511231861, "learning_rate": 3.1301163506772203e-06, "loss": 0.3062, "step": 18457 }, { "epoch": 0.6334248455730954, "grad_norm": 0.7212820784034795, "learning_rate": 3.1296009486990695e-06, "loss": 0.22, "step": 18458 }, { "epoch": 0.6334591626630062, "grad_norm": 0.7275337730292081, "learning_rate": 3.1290855698275193e-06, "loss": 0.2978, "step": 18459 }, { "epoch": 0.6334934797529169, "grad_norm": 0.7921462537090102, "learning_rate": 3.1285702140689347e-06, "loss": 0.2549, "step": 18460 }, { "epoch": 0.6335277968428277, "grad_norm": 0.9461056196224654, "learning_rate": 3.1280548814296823e-06, "loss": 0.2313, "step": 18461 }, { "epoch": 0.6335621139327385, "grad_norm": 0.7683304570889866, "learning_rate": 3.1275395719161316e-06, "loss": 0.255, "step": 18462 }, { "epoch": 0.6335964310226493, "grad_norm": 0.7476357963945679, "learning_rate": 3.1270242855346465e-06, "loss": 0.2696, "step": 18463 }, { "epoch": 0.63363074811256, "grad_norm": 0.7227860976150948, "learning_rate": 3.1265090222915906e-06, "loss": 0.2804, "step": 18464 }, { "epoch": 0.6336650652024708, "grad_norm": 0.7684126566466905, "learning_rate": 3.1259937821933335e-06, "loss": 0.3379, "step": 18465 }, { "epoch": 0.6336993822923817, "grad_norm": 0.8499500646082963, "learning_rate": 3.125478565246237e-06, "loss": 0.259, "step": 18466 }, { "epoch": 0.6337336993822924, "grad_norm": 0.7704868358211019, "learning_rate": 3.1249633714566676e-06, "loss": 0.2412, "step": 18467 }, { "epoch": 0.6337680164722032, "grad_norm": 0.7378903842582608, "learning_rate": 3.1244482008309893e-06, "loss": 0.244, "step": 18468 }, { "epoch": 0.6338023335621139, "grad_norm": 0.8626123925438833, "learning_rate": 3.1239330533755687e-06, "loss": 0.2805, "step": 18469 }, { "epoch": 0.6338366506520247, "grad_norm": 0.7195174658247226, "learning_rate": 3.123417929096765e-06, "loss": 0.2594, "step": 18470 }, { "epoch": 0.6338709677419355, "grad_norm": 0.6893292875668795, "learning_rate": 3.1229028280009477e-06, "loss": 0.2386, "step": 18471 }, { "epoch": 0.6339052848318463, "grad_norm": 0.7993096823947985, "learning_rate": 3.122387750094477e-06, "loss": 0.2499, "step": 18472 }, { "epoch": 0.633939601921757, "grad_norm": 0.9364205917561779, "learning_rate": 3.1218726953837147e-06, "loss": 0.2702, "step": 18473 }, { "epoch": 0.6339739190116678, "grad_norm": 0.742145432279269, "learning_rate": 3.1213576638750267e-06, "loss": 0.2722, "step": 18474 }, { "epoch": 0.6340082361015786, "grad_norm": 0.871039851163846, "learning_rate": 3.120842655574775e-06, "loss": 0.221, "step": 18475 }, { "epoch": 0.6340425531914894, "grad_norm": 0.7403173140656828, "learning_rate": 3.1203276704893197e-06, "loss": 0.2725, "step": 18476 }, { "epoch": 0.6340768702814001, "grad_norm": 0.7865405041791851, "learning_rate": 3.119812708625026e-06, "loss": 0.2568, "step": 18477 }, { "epoch": 0.6341111873713109, "grad_norm": 0.7422936851987891, "learning_rate": 3.1192977699882556e-06, "loss": 0.2882, "step": 18478 }, { "epoch": 0.6341455044612216, "grad_norm": 0.8160253308736477, "learning_rate": 3.1187828545853654e-06, "loss": 0.3021, "step": 18479 }, { "epoch": 0.6341798215511325, "grad_norm": 0.7578662209792113, "learning_rate": 3.118267962422722e-06, "loss": 0.2293, "step": 18480 }, { "epoch": 0.6342141386410433, "grad_norm": 0.8290665995351678, "learning_rate": 3.1177530935066837e-06, "loss": 0.2662, "step": 18481 }, { "epoch": 0.634248455730954, "grad_norm": 0.8405087846900694, "learning_rate": 3.1172382478436104e-06, "loss": 0.3249, "step": 18482 }, { "epoch": 0.6342827728208648, "grad_norm": 0.8341443330455195, "learning_rate": 3.1167234254398637e-06, "loss": 0.2629, "step": 18483 }, { "epoch": 0.6343170899107755, "grad_norm": 0.7704403975763532, "learning_rate": 3.1162086263018047e-06, "loss": 0.2811, "step": 18484 }, { "epoch": 0.6343514070006864, "grad_norm": 0.7274367650383775, "learning_rate": 3.1156938504357896e-06, "loss": 0.2447, "step": 18485 }, { "epoch": 0.6343857240905971, "grad_norm": 0.6668032512612344, "learning_rate": 3.115179097848182e-06, "loss": 0.2095, "step": 18486 }, { "epoch": 0.6344200411805079, "grad_norm": 0.7080994750006401, "learning_rate": 3.1146643685453396e-06, "loss": 0.2648, "step": 18487 }, { "epoch": 0.6344543582704186, "grad_norm": 0.8741772597952832, "learning_rate": 3.114149662533619e-06, "loss": 0.2978, "step": 18488 }, { "epoch": 0.6344886753603295, "grad_norm": 0.7819997538385938, "learning_rate": 3.113634979819381e-06, "loss": 0.2538, "step": 18489 }, { "epoch": 0.6345229924502402, "grad_norm": 0.8365488781153354, "learning_rate": 3.113120320408985e-06, "loss": 0.2728, "step": 18490 }, { "epoch": 0.634557309540151, "grad_norm": 0.7384427860491362, "learning_rate": 3.112605684308785e-06, "loss": 0.2915, "step": 18491 }, { "epoch": 0.6345916266300617, "grad_norm": 0.7229725067199931, "learning_rate": 3.112091071525144e-06, "loss": 0.315, "step": 18492 }, { "epoch": 0.6346259437199725, "grad_norm": 0.863669261383885, "learning_rate": 3.1115764820644143e-06, "loss": 0.2477, "step": 18493 }, { "epoch": 0.6346602608098834, "grad_norm": 0.7401574223577426, "learning_rate": 3.1110619159329574e-06, "loss": 0.3113, "step": 18494 }, { "epoch": 0.6346945778997941, "grad_norm": 0.7734128792310797, "learning_rate": 3.110547373137128e-06, "loss": 0.2631, "step": 18495 }, { "epoch": 0.6347288949897049, "grad_norm": 0.7527250327296758, "learning_rate": 3.1100328536832815e-06, "loss": 0.2652, "step": 18496 }, { "epoch": 0.6347632120796156, "grad_norm": 0.8942012663688562, "learning_rate": 3.1095183575777765e-06, "loss": 0.2802, "step": 18497 }, { "epoch": 0.6347975291695265, "grad_norm": 0.8914546420119632, "learning_rate": 3.1090038848269688e-06, "loss": 0.3363, "step": 18498 }, { "epoch": 0.6348318462594372, "grad_norm": 0.7481328073344733, "learning_rate": 3.108489435437212e-06, "loss": 0.2769, "step": 18499 }, { "epoch": 0.634866163349348, "grad_norm": 0.7334497904476096, "learning_rate": 3.107975009414864e-06, "loss": 0.2317, "step": 18500 }, { "epoch": 0.6349004804392587, "grad_norm": 0.7246956114748514, "learning_rate": 3.1074606067662793e-06, "loss": 0.2431, "step": 18501 }, { "epoch": 0.6349347975291695, "grad_norm": 0.8494460667512429, "learning_rate": 3.1069462274978105e-06, "loss": 0.3188, "step": 18502 }, { "epoch": 0.6349691146190803, "grad_norm": 0.7933396498867363, "learning_rate": 3.106431871615816e-06, "loss": 0.2499, "step": 18503 }, { "epoch": 0.6350034317089911, "grad_norm": 0.7888415044303345, "learning_rate": 3.105917539126647e-06, "loss": 0.3169, "step": 18504 }, { "epoch": 0.6350377487989018, "grad_norm": 0.781460212031169, "learning_rate": 3.1054032300366575e-06, "loss": 0.258, "step": 18505 }, { "epoch": 0.6350720658888126, "grad_norm": 0.7502789772804558, "learning_rate": 3.1048889443522046e-06, "loss": 0.241, "step": 18506 }, { "epoch": 0.6351063829787233, "grad_norm": 0.9066042124471471, "learning_rate": 3.104374682079638e-06, "loss": 0.3312, "step": 18507 }, { "epoch": 0.6351407000686342, "grad_norm": 0.824914806753118, "learning_rate": 3.103860443225312e-06, "loss": 0.2839, "step": 18508 }, { "epoch": 0.635175017158545, "grad_norm": 0.671242890928204, "learning_rate": 3.10334622779558e-06, "loss": 0.2048, "step": 18509 }, { "epoch": 0.6352093342484557, "grad_norm": 0.7295259040429748, "learning_rate": 3.1028320357967933e-06, "loss": 0.229, "step": 18510 }, { "epoch": 0.6352436513383665, "grad_norm": 0.8171037882792584, "learning_rate": 3.1023178672353054e-06, "loss": 0.3404, "step": 18511 }, { "epoch": 0.6352779684282773, "grad_norm": 0.7171043657773889, "learning_rate": 3.101803722117468e-06, "loss": 0.2267, "step": 18512 }, { "epoch": 0.6353122855181881, "grad_norm": 0.7645955859212631, "learning_rate": 3.101289600449633e-06, "loss": 0.2526, "step": 18513 }, { "epoch": 0.6353466026080988, "grad_norm": 0.8348561683062454, "learning_rate": 3.100775502238149e-06, "loss": 0.2976, "step": 18514 }, { "epoch": 0.6353809196980096, "grad_norm": 0.7796451336187272, "learning_rate": 3.100261427489372e-06, "loss": 0.2419, "step": 18515 }, { "epoch": 0.6354152367879203, "grad_norm": 0.7867729247241816, "learning_rate": 3.0997473762096505e-06, "loss": 0.2706, "step": 18516 }, { "epoch": 0.6354495538778312, "grad_norm": 0.7295557381190075, "learning_rate": 3.0992333484053317e-06, "loss": 0.2426, "step": 18517 }, { "epoch": 0.635483870967742, "grad_norm": 0.7849289411374937, "learning_rate": 3.09871934408277e-06, "loss": 0.2563, "step": 18518 }, { "epoch": 0.6355181880576527, "grad_norm": 0.835716607686362, "learning_rate": 3.0982053632483157e-06, "loss": 0.243, "step": 18519 }, { "epoch": 0.6355525051475635, "grad_norm": 0.8885571237582915, "learning_rate": 3.0976914059083146e-06, "loss": 0.2497, "step": 18520 }, { "epoch": 0.6355868222374743, "grad_norm": 0.8422320449522455, "learning_rate": 3.0971774720691206e-06, "loss": 0.2798, "step": 18521 }, { "epoch": 0.6356211393273851, "grad_norm": 0.8205197157570026, "learning_rate": 3.09666356173708e-06, "loss": 0.2864, "step": 18522 }, { "epoch": 0.6356554564172958, "grad_norm": 0.7849101626221372, "learning_rate": 3.0961496749185398e-06, "loss": 0.3338, "step": 18523 }, { "epoch": 0.6356897735072066, "grad_norm": 0.8089119357680212, "learning_rate": 3.0956358116198533e-06, "loss": 0.2986, "step": 18524 }, { "epoch": 0.6357240905971173, "grad_norm": 0.74525580900092, "learning_rate": 3.0951219718473637e-06, "loss": 0.2984, "step": 18525 }, { "epoch": 0.6357584076870282, "grad_norm": 0.7157507352838381, "learning_rate": 3.094608155607424e-06, "loss": 0.3236, "step": 18526 }, { "epoch": 0.6357927247769389, "grad_norm": 0.6960897358698468, "learning_rate": 3.0940943629063767e-06, "loss": 0.2527, "step": 18527 }, { "epoch": 0.6358270418668497, "grad_norm": 0.7927229084525413, "learning_rate": 3.0935805937505713e-06, "loss": 0.3051, "step": 18528 }, { "epoch": 0.6358613589567604, "grad_norm": 0.719406366768171, "learning_rate": 3.093066848146357e-06, "loss": 0.2701, "step": 18529 }, { "epoch": 0.6358956760466712, "grad_norm": 0.7807002969876506, "learning_rate": 3.0925531261000785e-06, "loss": 0.2563, "step": 18530 }, { "epoch": 0.635929993136582, "grad_norm": 0.7538957579243817, "learning_rate": 3.0920394276180797e-06, "loss": 0.2684, "step": 18531 }, { "epoch": 0.6359643102264928, "grad_norm": 0.6958040274998128, "learning_rate": 3.091525752706712e-06, "loss": 0.2307, "step": 18532 }, { "epoch": 0.6359986273164036, "grad_norm": 0.769921671678329, "learning_rate": 3.091012101372318e-06, "loss": 0.2845, "step": 18533 }, { "epoch": 0.6360329444063143, "grad_norm": 0.7587156863084771, "learning_rate": 3.090498473621243e-06, "loss": 0.2647, "step": 18534 }, { "epoch": 0.6360672614962252, "grad_norm": 0.818480727828649, "learning_rate": 3.089984869459833e-06, "loss": 0.2755, "step": 18535 }, { "epoch": 0.6361015785861359, "grad_norm": 0.7773999068865065, "learning_rate": 3.0894712888944344e-06, "loss": 0.3239, "step": 18536 }, { "epoch": 0.6361358956760467, "grad_norm": 0.7382997510871626, "learning_rate": 3.0889577319313886e-06, "loss": 0.2264, "step": 18537 }, { "epoch": 0.6361702127659574, "grad_norm": 0.96033280549048, "learning_rate": 3.088444198577044e-06, "loss": 0.2432, "step": 18538 }, { "epoch": 0.6362045298558682, "grad_norm": 0.7244925642762399, "learning_rate": 3.0879306888377423e-06, "loss": 0.2334, "step": 18539 }, { "epoch": 0.636238846945779, "grad_norm": 0.8446211316900427, "learning_rate": 3.0874172027198258e-06, "loss": 0.2451, "step": 18540 }, { "epoch": 0.6362731640356898, "grad_norm": 0.7941203975227241, "learning_rate": 3.086903740229641e-06, "loss": 0.247, "step": 18541 }, { "epoch": 0.6363074811256005, "grad_norm": 0.7295340591330389, "learning_rate": 3.0863903013735314e-06, "loss": 0.2747, "step": 18542 }, { "epoch": 0.6363417982155113, "grad_norm": 0.8081243302626193, "learning_rate": 3.085876886157836e-06, "loss": 0.3236, "step": 18543 }, { "epoch": 0.6363761153054222, "grad_norm": 0.7467465343788064, "learning_rate": 3.0853634945889023e-06, "loss": 0.2898, "step": 18544 }, { "epoch": 0.6364104323953329, "grad_norm": 0.7822135997405952, "learning_rate": 3.08485012667307e-06, "loss": 0.3807, "step": 18545 }, { "epoch": 0.6364447494852437, "grad_norm": 0.8007759411907354, "learning_rate": 3.08433678241668e-06, "loss": 0.2795, "step": 18546 }, { "epoch": 0.6364790665751544, "grad_norm": 0.8321218433298375, "learning_rate": 3.0838234618260777e-06, "loss": 0.296, "step": 18547 }, { "epoch": 0.6365133836650652, "grad_norm": 0.7931733993110245, "learning_rate": 3.0833101649076015e-06, "loss": 0.3249, "step": 18548 }, { "epoch": 0.636547700754976, "grad_norm": 0.7816345946959486, "learning_rate": 3.082796891667592e-06, "loss": 0.2606, "step": 18549 }, { "epoch": 0.6365820178448868, "grad_norm": 0.7500176782248567, "learning_rate": 3.0822836421123944e-06, "loss": 0.2552, "step": 18550 }, { "epoch": 0.6366163349347975, "grad_norm": 0.7758679540046125, "learning_rate": 3.0817704162483464e-06, "loss": 0.3011, "step": 18551 }, { "epoch": 0.6366506520247083, "grad_norm": 0.7107218871018073, "learning_rate": 3.0812572140817857e-06, "loss": 0.2347, "step": 18552 }, { "epoch": 0.636684969114619, "grad_norm": 0.8255183637603454, "learning_rate": 3.0807440356190576e-06, "loss": 0.3427, "step": 18553 }, { "epoch": 0.6367192862045299, "grad_norm": 0.7099539836080299, "learning_rate": 3.0802308808664973e-06, "loss": 0.2304, "step": 18554 }, { "epoch": 0.6367536032944406, "grad_norm": 0.7430652569529667, "learning_rate": 3.0797177498304487e-06, "loss": 0.2579, "step": 18555 }, { "epoch": 0.6367879203843514, "grad_norm": 0.6972890466065984, "learning_rate": 3.079204642517247e-06, "loss": 0.271, "step": 18556 }, { "epoch": 0.6368222374742621, "grad_norm": 0.7439982359355751, "learning_rate": 3.078691558933232e-06, "loss": 0.2467, "step": 18557 }, { "epoch": 0.636856554564173, "grad_norm": 0.7856756051128136, "learning_rate": 3.0781784990847436e-06, "loss": 0.2382, "step": 18558 }, { "epoch": 0.6368908716540838, "grad_norm": 0.8184679159268006, "learning_rate": 3.07766546297812e-06, "loss": 0.257, "step": 18559 }, { "epoch": 0.6369251887439945, "grad_norm": 0.7897430656909942, "learning_rate": 3.077152450619696e-06, "loss": 0.2311, "step": 18560 }, { "epoch": 0.6369595058339053, "grad_norm": 0.7414752453298722, "learning_rate": 3.0766394620158142e-06, "loss": 0.2517, "step": 18561 }, { "epoch": 0.636993822923816, "grad_norm": 0.6791993669516432, "learning_rate": 3.076126497172809e-06, "loss": 0.3285, "step": 18562 }, { "epoch": 0.6370281400137269, "grad_norm": 0.965096565359773, "learning_rate": 3.0756135560970165e-06, "loss": 0.2275, "step": 18563 }, { "epoch": 0.6370624571036376, "grad_norm": 0.7938644543740261, "learning_rate": 3.0751006387947752e-06, "loss": 0.2806, "step": 18564 }, { "epoch": 0.6370967741935484, "grad_norm": 0.6788923858091185, "learning_rate": 3.074587745272423e-06, "loss": 0.2156, "step": 18565 }, { "epoch": 0.6371310912834591, "grad_norm": 0.7774881567257373, "learning_rate": 3.0740748755362917e-06, "loss": 0.299, "step": 18566 }, { "epoch": 0.63716540837337, "grad_norm": 0.7837922029315733, "learning_rate": 3.0735620295927222e-06, "loss": 0.2463, "step": 18567 }, { "epoch": 0.6371997254632807, "grad_norm": 0.8493897794154819, "learning_rate": 3.0730492074480478e-06, "loss": 0.2594, "step": 18568 }, { "epoch": 0.6372340425531915, "grad_norm": 0.76852732104872, "learning_rate": 3.072536409108602e-06, "loss": 0.2674, "step": 18569 }, { "epoch": 0.6372683596431022, "grad_norm": 0.7798612787429096, "learning_rate": 3.072023634580723e-06, "loss": 0.3074, "step": 18570 }, { "epoch": 0.637302676733013, "grad_norm": 0.7691786202404859, "learning_rate": 3.0715108838707443e-06, "loss": 0.2848, "step": 18571 }, { "epoch": 0.6373369938229239, "grad_norm": 0.8100841411420542, "learning_rate": 3.0709981569849977e-06, "loss": 0.2706, "step": 18572 }, { "epoch": 0.6373713109128346, "grad_norm": 0.7900719974804086, "learning_rate": 3.0704854539298234e-06, "loss": 0.3051, "step": 18573 }, { "epoch": 0.6374056280027454, "grad_norm": 0.8143334303701816, "learning_rate": 3.069972774711551e-06, "loss": 0.275, "step": 18574 }, { "epoch": 0.6374399450926561, "grad_norm": 0.8652707983732297, "learning_rate": 3.0694601193365125e-06, "loss": 0.2898, "step": 18575 }, { "epoch": 0.6374742621825669, "grad_norm": 0.7439363382640946, "learning_rate": 3.068947487811046e-06, "loss": 0.3086, "step": 18576 }, { "epoch": 0.6375085792724777, "grad_norm": 0.8454157105613478, "learning_rate": 3.068434880141481e-06, "loss": 0.2526, "step": 18577 }, { "epoch": 0.6375428963623885, "grad_norm": 0.7620369205087032, "learning_rate": 3.0679222963341506e-06, "loss": 0.2304, "step": 18578 }, { "epoch": 0.6375772134522992, "grad_norm": 0.8501807127156127, "learning_rate": 3.067409736395388e-06, "loss": 0.2962, "step": 18579 }, { "epoch": 0.63761153054221, "grad_norm": 0.7689049207863479, "learning_rate": 3.0668972003315255e-06, "loss": 0.2547, "step": 18580 }, { "epoch": 0.6376458476321208, "grad_norm": 0.7963618955391512, "learning_rate": 3.0663846881488922e-06, "loss": 0.2815, "step": 18581 }, { "epoch": 0.6376801647220316, "grad_norm": 0.8114836700619807, "learning_rate": 3.0658721998538242e-06, "loss": 0.2381, "step": 18582 }, { "epoch": 0.6377144818119423, "grad_norm": 0.6970556218308802, "learning_rate": 3.06535973545265e-06, "loss": 0.2365, "step": 18583 }, { "epoch": 0.6377487989018531, "grad_norm": 0.8054486819678845, "learning_rate": 3.064847294951699e-06, "loss": 0.2627, "step": 18584 }, { "epoch": 0.6377831159917638, "grad_norm": 0.7958046723367276, "learning_rate": 3.0643348783573045e-06, "loss": 0.3016, "step": 18585 }, { "epoch": 0.6378174330816747, "grad_norm": 0.737143511963419, "learning_rate": 3.0638224856757954e-06, "loss": 0.2417, "step": 18586 }, { "epoch": 0.6378517501715855, "grad_norm": 0.8336680269979458, "learning_rate": 3.063310116913502e-06, "loss": 0.2891, "step": 18587 }, { "epoch": 0.6378860672614962, "grad_norm": 0.7223032294584127, "learning_rate": 3.0627977720767553e-06, "loss": 0.2943, "step": 18588 }, { "epoch": 0.637920384351407, "grad_norm": 0.8354668750846265, "learning_rate": 3.0622854511718818e-06, "loss": 0.2657, "step": 18589 }, { "epoch": 0.6379547014413178, "grad_norm": 0.7729632421287254, "learning_rate": 3.0617731542052144e-06, "loss": 0.3713, "step": 18590 }, { "epoch": 0.6379890185312286, "grad_norm": 0.7703872327181278, "learning_rate": 3.0612608811830803e-06, "loss": 0.2486, "step": 18591 }, { "epoch": 0.6380233356211393, "grad_norm": 0.8222575236983176, "learning_rate": 3.0607486321118045e-06, "loss": 0.2926, "step": 18592 }, { "epoch": 0.6380576527110501, "grad_norm": 0.7367792440284364, "learning_rate": 3.0602364069977215e-06, "loss": 0.2924, "step": 18593 }, { "epoch": 0.6380919698009608, "grad_norm": 0.9796586860267803, "learning_rate": 3.0597242058471542e-06, "loss": 0.2975, "step": 18594 }, { "epoch": 0.6381262868908717, "grad_norm": 0.8440800505353377, "learning_rate": 3.059212028666432e-06, "loss": 0.2598, "step": 18595 }, { "epoch": 0.6381606039807824, "grad_norm": 0.7334148718596024, "learning_rate": 3.0586998754618846e-06, "loss": 0.2911, "step": 18596 }, { "epoch": 0.6381949210706932, "grad_norm": 0.6878949260486136, "learning_rate": 3.058187746239836e-06, "loss": 0.2474, "step": 18597 }, { "epoch": 0.638229238160604, "grad_norm": 0.7247854842142994, "learning_rate": 3.0576756410066124e-06, "loss": 0.2325, "step": 18598 }, { "epoch": 0.6382635552505147, "grad_norm": 0.7402266020540578, "learning_rate": 3.0571635597685433e-06, "loss": 0.2467, "step": 18599 }, { "epoch": 0.6382978723404256, "grad_norm": 0.811847836313975, "learning_rate": 3.056651502531953e-06, "loss": 0.2918, "step": 18600 }, { "epoch": 0.6383321894303363, "grad_norm": 0.8295250293340113, "learning_rate": 3.0561394693031677e-06, "loss": 0.342, "step": 18601 }, { "epoch": 0.6383665065202471, "grad_norm": 0.7279674511497218, "learning_rate": 3.055627460088512e-06, "loss": 0.2379, "step": 18602 }, { "epoch": 0.6384008236101578, "grad_norm": 0.772512813024256, "learning_rate": 3.055115474894314e-06, "loss": 0.306, "step": 18603 }, { "epoch": 0.6384351407000687, "grad_norm": 0.7347091497964773, "learning_rate": 3.0546035137268943e-06, "loss": 0.2705, "step": 18604 }, { "epoch": 0.6384694577899794, "grad_norm": 0.8660703402504616, "learning_rate": 3.0540915765925826e-06, "loss": 0.2585, "step": 18605 }, { "epoch": 0.6385037748798902, "grad_norm": 0.7229949410948919, "learning_rate": 3.0535796634977e-06, "loss": 0.2273, "step": 18606 }, { "epoch": 0.6385380919698009, "grad_norm": 0.7651913372983372, "learning_rate": 3.05306777444857e-06, "loss": 0.2846, "step": 18607 }, { "epoch": 0.6385724090597117, "grad_norm": 0.7930822737556327, "learning_rate": 3.0525559094515184e-06, "loss": 0.275, "step": 18608 }, { "epoch": 0.6386067261496226, "grad_norm": 0.7622488327023994, "learning_rate": 3.0520440685128695e-06, "loss": 0.2822, "step": 18609 }, { "epoch": 0.6386410432395333, "grad_norm": 0.7932587466415612, "learning_rate": 3.0515322516389425e-06, "loss": 0.2481, "step": 18610 }, { "epoch": 0.638675360329444, "grad_norm": 0.7831113319555266, "learning_rate": 3.0510204588360647e-06, "loss": 0.2305, "step": 18611 }, { "epoch": 0.6387096774193548, "grad_norm": 0.8244605752755483, "learning_rate": 3.0505086901105575e-06, "loss": 0.2971, "step": 18612 }, { "epoch": 0.6387439945092657, "grad_norm": 0.8473783601544648, "learning_rate": 3.0499969454687394e-06, "loss": 0.2757, "step": 18613 }, { "epoch": 0.6387783115991764, "grad_norm": 0.7903175172003548, "learning_rate": 3.049485224916939e-06, "loss": 0.2787, "step": 18614 }, { "epoch": 0.6388126286890872, "grad_norm": 0.7595105714548045, "learning_rate": 3.048973528461473e-06, "loss": 0.2536, "step": 18615 }, { "epoch": 0.6388469457789979, "grad_norm": 0.7673081231455503, "learning_rate": 3.048461856108663e-06, "loss": 0.2471, "step": 18616 }, { "epoch": 0.6388812628689087, "grad_norm": 0.8103079730864001, "learning_rate": 3.047950207864834e-06, "loss": 0.3096, "step": 18617 }, { "epoch": 0.6389155799588195, "grad_norm": 0.6790153375411832, "learning_rate": 3.0474385837363017e-06, "loss": 0.2298, "step": 18618 }, { "epoch": 0.6389498970487303, "grad_norm": 0.8044790363552644, "learning_rate": 3.046926983729392e-06, "loss": 0.2138, "step": 18619 }, { "epoch": 0.638984214138641, "grad_norm": 0.7871008032137279, "learning_rate": 3.0464154078504218e-06, "loss": 0.3146, "step": 18620 }, { "epoch": 0.6390185312285518, "grad_norm": 0.6969486050487601, "learning_rate": 3.0459038561057096e-06, "loss": 0.2431, "step": 18621 }, { "epoch": 0.6390528483184625, "grad_norm": 0.7745570881732293, "learning_rate": 3.0453923285015797e-06, "loss": 0.2706, "step": 18622 }, { "epoch": 0.6390871654083734, "grad_norm": 0.9045390215795371, "learning_rate": 3.044880825044347e-06, "loss": 0.3006, "step": 18623 }, { "epoch": 0.6391214824982842, "grad_norm": 0.7055541196631627, "learning_rate": 3.044369345740331e-06, "loss": 0.2764, "step": 18624 }, { "epoch": 0.6391557995881949, "grad_norm": 0.7324880584234279, "learning_rate": 3.0438578905958543e-06, "loss": 0.2499, "step": 18625 }, { "epoch": 0.6391901166781057, "grad_norm": 0.7773279126183268, "learning_rate": 3.0433464596172323e-06, "loss": 0.2837, "step": 18626 }, { "epoch": 0.6392244337680165, "grad_norm": 0.946357595109085, "learning_rate": 3.042835052810782e-06, "loss": 0.2967, "step": 18627 }, { "epoch": 0.6392587508579273, "grad_norm": 0.7248035778159383, "learning_rate": 3.042323670182825e-06, "loss": 0.243, "step": 18628 }, { "epoch": 0.639293067947838, "grad_norm": 0.7717044483276164, "learning_rate": 3.0418123117396746e-06, "loss": 0.2539, "step": 18629 }, { "epoch": 0.6393273850377488, "grad_norm": 0.7755814578198971, "learning_rate": 3.0413009774876513e-06, "loss": 0.3038, "step": 18630 }, { "epoch": 0.6393617021276595, "grad_norm": 0.8980795349631784, "learning_rate": 3.0407896674330702e-06, "loss": 0.2754, "step": 18631 }, { "epoch": 0.6393960192175704, "grad_norm": 0.8338132139093338, "learning_rate": 3.0402783815822494e-06, "loss": 0.3, "step": 18632 }, { "epoch": 0.6394303363074811, "grad_norm": 0.7216148123354005, "learning_rate": 3.0397671199415025e-06, "loss": 0.2915, "step": 18633 }, { "epoch": 0.6394646533973919, "grad_norm": 0.6741805133734601, "learning_rate": 3.0392558825171493e-06, "loss": 0.2197, "step": 18634 }, { "epoch": 0.6394989704873026, "grad_norm": 0.7457281751438378, "learning_rate": 3.0387446693155045e-06, "loss": 0.2326, "step": 18635 }, { "epoch": 0.6395332875772135, "grad_norm": 0.8856900275306329, "learning_rate": 3.0382334803428792e-06, "loss": 0.269, "step": 18636 }, { "epoch": 0.6395676046671243, "grad_norm": 0.8038265203996171, "learning_rate": 3.037722315605595e-06, "loss": 0.2946, "step": 18637 }, { "epoch": 0.639601921757035, "grad_norm": 0.7965507485914317, "learning_rate": 3.037211175109963e-06, "loss": 0.2614, "step": 18638 }, { "epoch": 0.6396362388469458, "grad_norm": 0.8303362245912258, "learning_rate": 3.036700058862297e-06, "loss": 0.3336, "step": 18639 }, { "epoch": 0.6396705559368565, "grad_norm": 0.91093052714302, "learning_rate": 3.036188966868915e-06, "loss": 0.2551, "step": 18640 }, { "epoch": 0.6397048730267674, "grad_norm": 0.7360477958944699, "learning_rate": 3.035677899136129e-06, "loss": 0.358, "step": 18641 }, { "epoch": 0.6397391901166781, "grad_norm": 0.7760070423477248, "learning_rate": 3.0351668556702495e-06, "loss": 0.2876, "step": 18642 }, { "epoch": 0.6397735072065889, "grad_norm": 0.7106659190885697, "learning_rate": 3.034655836477596e-06, "loss": 0.2378, "step": 18643 }, { "epoch": 0.6398078242964996, "grad_norm": 0.6864234895283288, "learning_rate": 3.0341448415644765e-06, "loss": 0.2022, "step": 18644 }, { "epoch": 0.6398421413864104, "grad_norm": 0.7393477138024024, "learning_rate": 3.0336338709372054e-06, "loss": 0.2219, "step": 18645 }, { "epoch": 0.6398764584763212, "grad_norm": 0.7506835780210187, "learning_rate": 3.0331229246020955e-06, "loss": 0.2626, "step": 18646 }, { "epoch": 0.639910775566232, "grad_norm": 0.8086338348941118, "learning_rate": 3.03261200256546e-06, "loss": 0.3356, "step": 18647 }, { "epoch": 0.6399450926561427, "grad_norm": 0.9432788566084159, "learning_rate": 3.032101104833607e-06, "loss": 0.2736, "step": 18648 }, { "epoch": 0.6399794097460535, "grad_norm": 0.7567743852182518, "learning_rate": 3.0315902314128525e-06, "loss": 0.3608, "step": 18649 }, { "epoch": 0.6400137268359644, "grad_norm": 0.726386744103428, "learning_rate": 3.031079382309504e-06, "loss": 0.2257, "step": 18650 }, { "epoch": 0.6400480439258751, "grad_norm": 0.8281583901070401, "learning_rate": 3.030568557529876e-06, "loss": 0.284, "step": 18651 }, { "epoch": 0.6400823610157859, "grad_norm": 0.8246565713323324, "learning_rate": 3.0300577570802763e-06, "loss": 0.2731, "step": 18652 }, { "epoch": 0.6401166781056966, "grad_norm": 0.757040641619264, "learning_rate": 3.0295469809670166e-06, "loss": 0.2693, "step": 18653 }, { "epoch": 0.6401509951956074, "grad_norm": 0.7830273324091482, "learning_rate": 3.0290362291964063e-06, "loss": 0.2874, "step": 18654 }, { "epoch": 0.6401853122855182, "grad_norm": 0.8205270769061457, "learning_rate": 3.028525501774757e-06, "loss": 0.2939, "step": 18655 }, { "epoch": 0.640219629375429, "grad_norm": 0.6165739028325974, "learning_rate": 3.0280147987083742e-06, "loss": 0.2416, "step": 18656 }, { "epoch": 0.6402539464653397, "grad_norm": 0.7262075969535859, "learning_rate": 3.0275041200035714e-06, "loss": 0.2759, "step": 18657 }, { "epoch": 0.6402882635552505, "grad_norm": 0.8889818561468036, "learning_rate": 3.0269934656666555e-06, "loss": 0.2378, "step": 18658 }, { "epoch": 0.6403225806451613, "grad_norm": 0.7719344137990839, "learning_rate": 3.0264828357039333e-06, "loss": 0.2649, "step": 18659 }, { "epoch": 0.6403568977350721, "grad_norm": 0.7097880101753912, "learning_rate": 3.0259722301217154e-06, "loss": 0.239, "step": 18660 }, { "epoch": 0.6403912148249828, "grad_norm": 0.8329457136399389, "learning_rate": 3.02546164892631e-06, "loss": 0.2482, "step": 18661 }, { "epoch": 0.6404255319148936, "grad_norm": 0.7963315591665713, "learning_rate": 3.024951092124022e-06, "loss": 0.2742, "step": 18662 }, { "epoch": 0.6404598490048043, "grad_norm": 0.7736713965246889, "learning_rate": 3.024440559721163e-06, "loss": 0.2907, "step": 18663 }, { "epoch": 0.6404941660947152, "grad_norm": 0.7710689446251748, "learning_rate": 3.0239300517240372e-06, "loss": 0.2605, "step": 18664 }, { "epoch": 0.640528483184626, "grad_norm": 0.7627191675405145, "learning_rate": 3.02341956813895e-06, "loss": 0.2544, "step": 18665 }, { "epoch": 0.6405628002745367, "grad_norm": 0.7684552863366532, "learning_rate": 3.022909108972211e-06, "loss": 0.3069, "step": 18666 }, { "epoch": 0.6405971173644475, "grad_norm": 0.845744886534961, "learning_rate": 3.0223986742301246e-06, "loss": 0.2973, "step": 18667 }, { "epoch": 0.6406314344543582, "grad_norm": 0.6634585160536878, "learning_rate": 3.021888263918996e-06, "loss": 0.222, "step": 18668 }, { "epoch": 0.6406657515442691, "grad_norm": 0.9028769286165905, "learning_rate": 3.021377878045133e-06, "loss": 0.2755, "step": 18669 }, { "epoch": 0.6407000686341798, "grad_norm": 0.7750178407109407, "learning_rate": 3.0208675166148393e-06, "loss": 0.2629, "step": 18670 }, { "epoch": 0.6407343857240906, "grad_norm": 0.8411853869515586, "learning_rate": 3.020357179634419e-06, "loss": 0.2757, "step": 18671 }, { "epoch": 0.6407687028140013, "grad_norm": 0.7729711027447522, "learning_rate": 3.019846867110179e-06, "loss": 0.2718, "step": 18672 }, { "epoch": 0.6408030199039122, "grad_norm": 0.7303287045615383, "learning_rate": 3.0193365790484218e-06, "loss": 0.2481, "step": 18673 }, { "epoch": 0.640837336993823, "grad_norm": 0.8365246834836546, "learning_rate": 3.018826315455451e-06, "loss": 0.2564, "step": 18674 }, { "epoch": 0.6408716540837337, "grad_norm": 0.8116225978019489, "learning_rate": 3.0183160763375717e-06, "loss": 0.2838, "step": 18675 }, { "epoch": 0.6409059711736445, "grad_norm": 0.8226076574612918, "learning_rate": 3.0178058617010876e-06, "loss": 0.2617, "step": 18676 }, { "epoch": 0.6409402882635552, "grad_norm": 0.7415208009644031, "learning_rate": 3.0172956715522993e-06, "loss": 0.2626, "step": 18677 }, { "epoch": 0.6409746053534661, "grad_norm": 0.7904445917861099, "learning_rate": 3.016785505897514e-06, "loss": 0.2247, "step": 18678 }, { "epoch": 0.6410089224433768, "grad_norm": 0.78630982274627, "learning_rate": 3.0162753647430285e-06, "loss": 0.2396, "step": 18679 }, { "epoch": 0.6410432395332876, "grad_norm": 0.8608914910461317, "learning_rate": 3.015765248095151e-06, "loss": 0.3163, "step": 18680 }, { "epoch": 0.6410775566231983, "grad_norm": 0.8443626122252431, "learning_rate": 3.0152551559601807e-06, "loss": 0.2666, "step": 18681 }, { "epoch": 0.6411118737131092, "grad_norm": 0.8120526541005724, "learning_rate": 3.014745088344416e-06, "loss": 0.2884, "step": 18682 }, { "epoch": 0.6411461908030199, "grad_norm": 0.8590019672929322, "learning_rate": 3.0142350452541622e-06, "loss": 0.2903, "step": 18683 }, { "epoch": 0.6411805078929307, "grad_norm": 0.6991979460419233, "learning_rate": 3.0137250266957207e-06, "loss": 0.2787, "step": 18684 }, { "epoch": 0.6412148249828414, "grad_norm": 0.8220582016569199, "learning_rate": 3.013215032675388e-06, "loss": 0.2873, "step": 18685 }, { "epoch": 0.6412491420727522, "grad_norm": 0.7952588111386123, "learning_rate": 3.01270506319947e-06, "loss": 0.298, "step": 18686 }, { "epoch": 0.641283459162663, "grad_norm": 0.6998966447753584, "learning_rate": 3.012195118274264e-06, "loss": 0.2203, "step": 18687 }, { "epoch": 0.6413177762525738, "grad_norm": 0.8697936151780714, "learning_rate": 3.011685197906068e-06, "loss": 0.275, "step": 18688 }, { "epoch": 0.6413520933424846, "grad_norm": 0.7765921088044488, "learning_rate": 3.0111753021011845e-06, "loss": 0.3604, "step": 18689 }, { "epoch": 0.6413864104323953, "grad_norm": 0.7076237203451562, "learning_rate": 3.0106654308659108e-06, "loss": 0.3049, "step": 18690 }, { "epoch": 0.6414207275223061, "grad_norm": 0.8275730975827214, "learning_rate": 3.010155584206545e-06, "loss": 0.2946, "step": 18691 }, { "epoch": 0.6414550446122169, "grad_norm": 0.7189148431950002, "learning_rate": 3.0096457621293895e-06, "loss": 0.3823, "step": 18692 }, { "epoch": 0.6414893617021277, "grad_norm": 0.7534911738301862, "learning_rate": 3.009135964640739e-06, "loss": 0.3248, "step": 18693 }, { "epoch": 0.6415236787920384, "grad_norm": 0.7701500880296315, "learning_rate": 3.0086261917468918e-06, "loss": 0.2949, "step": 18694 }, { "epoch": 0.6415579958819492, "grad_norm": 0.8318581854956439, "learning_rate": 3.008116443454148e-06, "loss": 0.2967, "step": 18695 }, { "epoch": 0.64159231297186, "grad_norm": 0.8258284034496058, "learning_rate": 3.007606719768802e-06, "loss": 0.3089, "step": 18696 }, { "epoch": 0.6416266300617708, "grad_norm": 0.8204571120257865, "learning_rate": 3.0070970206971523e-06, "loss": 0.2351, "step": 18697 }, { "epoch": 0.6416609471516815, "grad_norm": 0.7807637101682978, "learning_rate": 3.0065873462454953e-06, "loss": 0.2881, "step": 18698 }, { "epoch": 0.6416952642415923, "grad_norm": 0.9286072030139635, "learning_rate": 3.0060776964201282e-06, "loss": 0.3356, "step": 18699 }, { "epoch": 0.641729581331503, "grad_norm": 0.7810206782193279, "learning_rate": 3.005568071227345e-06, "loss": 0.2322, "step": 18700 }, { "epoch": 0.6417638984214139, "grad_norm": 0.8188403757562073, "learning_rate": 3.0050584706734447e-06, "loss": 0.27, "step": 18701 }, { "epoch": 0.6417982155113247, "grad_norm": 0.7363929413510745, "learning_rate": 3.004548894764721e-06, "loss": 0.2926, "step": 18702 }, { "epoch": 0.6418325326012354, "grad_norm": 0.667078980662984, "learning_rate": 3.0040393435074677e-06, "loss": 0.2317, "step": 18703 }, { "epoch": 0.6418668496911462, "grad_norm": 0.7573117883097532, "learning_rate": 3.003529816907982e-06, "loss": 0.2842, "step": 18704 }, { "epoch": 0.641901166781057, "grad_norm": 0.7542658255397483, "learning_rate": 3.003020314972559e-06, "loss": 0.2402, "step": 18705 }, { "epoch": 0.6419354838709678, "grad_norm": 0.7082297037168416, "learning_rate": 3.0025108377074892e-06, "loss": 0.2759, "step": 18706 }, { "epoch": 0.6419698009608785, "grad_norm": 0.740553295580713, "learning_rate": 3.002001385119071e-06, "loss": 0.2872, "step": 18707 }, { "epoch": 0.6420041180507893, "grad_norm": 0.7604803892176599, "learning_rate": 3.001491957213597e-06, "loss": 0.2531, "step": 18708 }, { "epoch": 0.6420384351407, "grad_norm": 0.7633247657807898, "learning_rate": 3.0009825539973574e-06, "loss": 0.3113, "step": 18709 }, { "epoch": 0.6420727522306109, "grad_norm": 0.8657036162697402, "learning_rate": 3.00047317547665e-06, "loss": 0.2991, "step": 18710 }, { "epoch": 0.6421070693205216, "grad_norm": 0.9036704103656932, "learning_rate": 2.999963821657763e-06, "loss": 0.2954, "step": 18711 }, { "epoch": 0.6421413864104324, "grad_norm": 0.740499365798643, "learning_rate": 2.999454492546994e-06, "loss": 0.2372, "step": 18712 }, { "epoch": 0.6421757035003431, "grad_norm": 0.8333148039134199, "learning_rate": 2.9989451881506298e-06, "loss": 0.334, "step": 18713 }, { "epoch": 0.6422100205902539, "grad_norm": 0.9251897481385549, "learning_rate": 2.9984359084749646e-06, "loss": 0.2512, "step": 18714 }, { "epoch": 0.6422443376801648, "grad_norm": 0.8990907205772597, "learning_rate": 2.997926653526292e-06, "loss": 0.2789, "step": 18715 }, { "epoch": 0.6422786547700755, "grad_norm": 0.9470037169746703, "learning_rate": 2.9974174233109023e-06, "loss": 0.2519, "step": 18716 }, { "epoch": 0.6423129718599863, "grad_norm": 0.8402408277039456, "learning_rate": 2.9969082178350827e-06, "loss": 0.2413, "step": 18717 }, { "epoch": 0.642347288949897, "grad_norm": 0.7798231418532457, "learning_rate": 2.9963990371051287e-06, "loss": 0.2566, "step": 18718 }, { "epoch": 0.6423816060398079, "grad_norm": 0.8096033245475909, "learning_rate": 2.9958898811273273e-06, "loss": 0.2867, "step": 18719 }, { "epoch": 0.6424159231297186, "grad_norm": 0.8307141036150752, "learning_rate": 2.9953807499079707e-06, "loss": 0.2603, "step": 18720 }, { "epoch": 0.6424502402196294, "grad_norm": 0.7522585339266705, "learning_rate": 2.9948716434533478e-06, "loss": 0.26, "step": 18721 }, { "epoch": 0.6424845573095401, "grad_norm": 0.7634569081204313, "learning_rate": 2.994362561769748e-06, "loss": 0.2424, "step": 18722 }, { "epoch": 0.6425188743994509, "grad_norm": 0.8575825133322934, "learning_rate": 2.9938535048634588e-06, "loss": 0.273, "step": 18723 }, { "epoch": 0.6425531914893617, "grad_norm": 0.9208239676610933, "learning_rate": 2.993344472740773e-06, "loss": 0.2255, "step": 18724 }, { "epoch": 0.6425875085792725, "grad_norm": 0.7826440239433309, "learning_rate": 2.9928354654079766e-06, "loss": 0.2869, "step": 18725 }, { "epoch": 0.6426218256691832, "grad_norm": 0.7276849591637117, "learning_rate": 2.992326482871356e-06, "loss": 0.2649, "step": 18726 }, { "epoch": 0.642656142759094, "grad_norm": 0.7909671707526346, "learning_rate": 2.991817525137202e-06, "loss": 0.2945, "step": 18727 }, { "epoch": 0.6426904598490049, "grad_norm": 0.6787640871835598, "learning_rate": 2.9913085922118013e-06, "loss": 0.2626, "step": 18728 }, { "epoch": 0.6427247769389156, "grad_norm": 0.7919422232759261, "learning_rate": 2.9907996841014398e-06, "loss": 0.2591, "step": 18729 }, { "epoch": 0.6427590940288264, "grad_norm": 0.8035790555136261, "learning_rate": 2.9902908008124076e-06, "loss": 0.2625, "step": 18730 }, { "epoch": 0.6427934111187371, "grad_norm": 0.743376180760586, "learning_rate": 2.9897819423509894e-06, "loss": 0.2374, "step": 18731 }, { "epoch": 0.6428277282086479, "grad_norm": 0.7779540151012094, "learning_rate": 2.9892731087234692e-06, "loss": 0.2548, "step": 18732 }, { "epoch": 0.6428620452985587, "grad_norm": 0.8741561491468686, "learning_rate": 2.988764299936138e-06, "loss": 0.2994, "step": 18733 }, { "epoch": 0.6428963623884695, "grad_norm": 0.7454791846727478, "learning_rate": 2.988255515995278e-06, "loss": 0.3082, "step": 18734 }, { "epoch": 0.6429306794783802, "grad_norm": 0.7940635192942146, "learning_rate": 2.987746756907175e-06, "loss": 0.2828, "step": 18735 }, { "epoch": 0.642964996568291, "grad_norm": 0.8321937533969785, "learning_rate": 2.987238022678116e-06, "loss": 0.3067, "step": 18736 }, { "epoch": 0.6429993136582017, "grad_norm": 0.7178758932105282, "learning_rate": 2.986729313314385e-06, "loss": 0.2756, "step": 18737 }, { "epoch": 0.6430336307481126, "grad_norm": 0.916240597779077, "learning_rate": 2.9862206288222638e-06, "loss": 0.2789, "step": 18738 }, { "epoch": 0.6430679478380233, "grad_norm": 0.8538193495369399, "learning_rate": 2.985711969208041e-06, "loss": 0.3315, "step": 18739 }, { "epoch": 0.6431022649279341, "grad_norm": 0.8028119693764921, "learning_rate": 2.9852033344779975e-06, "loss": 0.2916, "step": 18740 }, { "epoch": 0.6431365820178448, "grad_norm": 0.9122463669149979, "learning_rate": 2.984694724638418e-06, "loss": 0.2935, "step": 18741 }, { "epoch": 0.6431708991077557, "grad_norm": 0.8296251702090223, "learning_rate": 2.984186139695585e-06, "loss": 0.2411, "step": 18742 }, { "epoch": 0.6432052161976665, "grad_norm": 0.7357236132418139, "learning_rate": 2.9836775796557826e-06, "loss": 0.2449, "step": 18743 }, { "epoch": 0.6432395332875772, "grad_norm": 0.9103879530246111, "learning_rate": 2.9831690445252926e-06, "loss": 0.3564, "step": 18744 }, { "epoch": 0.643273850377488, "grad_norm": 0.870914420090084, "learning_rate": 2.9826605343103986e-06, "loss": 0.2621, "step": 18745 }, { "epoch": 0.6433081674673987, "grad_norm": 0.9706603031077851, "learning_rate": 2.9821520490173794e-06, "loss": 0.2892, "step": 18746 }, { "epoch": 0.6433424845573096, "grad_norm": 0.7802767410304686, "learning_rate": 2.981643588652522e-06, "loss": 0.2583, "step": 18747 }, { "epoch": 0.6433768016472203, "grad_norm": 0.8588179634611117, "learning_rate": 2.9811351532221044e-06, "loss": 0.247, "step": 18748 }, { "epoch": 0.6434111187371311, "grad_norm": 0.7994747121654832, "learning_rate": 2.9806267427324054e-06, "loss": 0.2331, "step": 18749 }, { "epoch": 0.6434454358270418, "grad_norm": 0.7787613590062237, "learning_rate": 2.980118357189711e-06, "loss": 0.3077, "step": 18750 }, { "epoch": 0.6434797529169527, "grad_norm": 0.7685815508496773, "learning_rate": 2.9796099966002995e-06, "loss": 0.2573, "step": 18751 }, { "epoch": 0.6435140700068634, "grad_norm": 0.7271079802447203, "learning_rate": 2.979101660970449e-06, "loss": 0.2765, "step": 18752 }, { "epoch": 0.6435483870967742, "grad_norm": 0.7509877099470937, "learning_rate": 2.9785933503064434e-06, "loss": 0.2898, "step": 18753 }, { "epoch": 0.643582704186685, "grad_norm": 0.7375005873291166, "learning_rate": 2.978085064614561e-06, "loss": 0.3195, "step": 18754 }, { "epoch": 0.6436170212765957, "grad_norm": 0.832101917147375, "learning_rate": 2.9775768039010774e-06, "loss": 0.2733, "step": 18755 }, { "epoch": 0.6436513383665066, "grad_norm": 0.7803761555837743, "learning_rate": 2.9770685681722765e-06, "loss": 0.2447, "step": 18756 }, { "epoch": 0.6436856554564173, "grad_norm": 0.6766838208235262, "learning_rate": 2.9765603574344336e-06, "loss": 0.2502, "step": 18757 }, { "epoch": 0.6437199725463281, "grad_norm": 0.7855611256257367, "learning_rate": 2.9760521716938284e-06, "loss": 0.2573, "step": 18758 }, { "epoch": 0.6437542896362388, "grad_norm": 0.7389648341145624, "learning_rate": 2.97554401095674e-06, "loss": 0.2837, "step": 18759 }, { "epoch": 0.6437886067261496, "grad_norm": 0.6803982888243825, "learning_rate": 2.975035875229446e-06, "loss": 0.2609, "step": 18760 }, { "epoch": 0.6438229238160604, "grad_norm": 0.918318670932443, "learning_rate": 2.9745277645182202e-06, "loss": 0.3202, "step": 18761 }, { "epoch": 0.6438572409059712, "grad_norm": 0.770709374011451, "learning_rate": 2.9740196788293446e-06, "loss": 0.2414, "step": 18762 }, { "epoch": 0.6438915579958819, "grad_norm": 0.7820120464692484, "learning_rate": 2.9735116181690928e-06, "loss": 0.3079, "step": 18763 }, { "epoch": 0.6439258750857927, "grad_norm": 0.8430694497320951, "learning_rate": 2.973003582543742e-06, "loss": 0.2473, "step": 18764 }, { "epoch": 0.6439601921757035, "grad_norm": 0.8422063383902154, "learning_rate": 2.9724955719595694e-06, "loss": 0.3398, "step": 18765 }, { "epoch": 0.6439945092656143, "grad_norm": 0.7771681633680178, "learning_rate": 2.971987586422851e-06, "loss": 0.2979, "step": 18766 }, { "epoch": 0.644028826355525, "grad_norm": 0.7275273322090239, "learning_rate": 2.971479625939859e-06, "loss": 0.298, "step": 18767 }, { "epoch": 0.6440631434454358, "grad_norm": 0.9224832180480506, "learning_rate": 2.9709716905168735e-06, "loss": 0.3404, "step": 18768 }, { "epoch": 0.6440974605353466, "grad_norm": 0.7862743908917291, "learning_rate": 2.9704637801601664e-06, "loss": 0.2506, "step": 18769 }, { "epoch": 0.6441317776252574, "grad_norm": 0.7823954390729839, "learning_rate": 2.9699558948760123e-06, "loss": 0.3032, "step": 18770 }, { "epoch": 0.6441660947151682, "grad_norm": 0.7897456981258084, "learning_rate": 2.9694480346706865e-06, "loss": 0.268, "step": 18771 }, { "epoch": 0.6442004118050789, "grad_norm": 0.9150152345440467, "learning_rate": 2.9689401995504646e-06, "loss": 0.2594, "step": 18772 }, { "epoch": 0.6442347288949897, "grad_norm": 0.780191721460666, "learning_rate": 2.968432389521616e-06, "loss": 0.3586, "step": 18773 }, { "epoch": 0.6442690459849005, "grad_norm": 0.7777590205903618, "learning_rate": 2.9679246045904187e-06, "loss": 0.3097, "step": 18774 }, { "epoch": 0.6443033630748113, "grad_norm": 0.7970191599633419, "learning_rate": 2.967416844763142e-06, "loss": 0.2594, "step": 18775 }, { "epoch": 0.644337680164722, "grad_norm": 0.7545410182827088, "learning_rate": 2.9669091100460622e-06, "loss": 0.3147, "step": 18776 }, { "epoch": 0.6443719972546328, "grad_norm": 0.8882452185678175, "learning_rate": 2.96640140044545e-06, "loss": 0.2875, "step": 18777 }, { "epoch": 0.6444063143445435, "grad_norm": 0.78372722681884, "learning_rate": 2.9658937159675755e-06, "loss": 0.2417, "step": 18778 }, { "epoch": 0.6444406314344544, "grad_norm": 0.7291517321917039, "learning_rate": 2.9653860566187138e-06, "loss": 0.2518, "step": 18779 }, { "epoch": 0.6444749485243652, "grad_norm": 0.8320741449277255, "learning_rate": 2.964878422405136e-06, "loss": 0.2689, "step": 18780 }, { "epoch": 0.6445092656142759, "grad_norm": 0.8632090956691334, "learning_rate": 2.964370813333111e-06, "loss": 0.2908, "step": 18781 }, { "epoch": 0.6445435827041867, "grad_norm": 0.7862391997614789, "learning_rate": 2.9638632294089124e-06, "loss": 0.3041, "step": 18782 }, { "epoch": 0.6445778997940974, "grad_norm": 0.7904202913640065, "learning_rate": 2.96335567063881e-06, "loss": 0.3084, "step": 18783 }, { "epoch": 0.6446122168840083, "grad_norm": 0.7909808579149872, "learning_rate": 2.9628481370290726e-06, "loss": 0.3063, "step": 18784 }, { "epoch": 0.644646533973919, "grad_norm": 0.841261002447755, "learning_rate": 2.9623406285859723e-06, "loss": 0.2557, "step": 18785 }, { "epoch": 0.6446808510638298, "grad_norm": 0.7052266475156744, "learning_rate": 2.9618331453157777e-06, "loss": 0.2259, "step": 18786 }, { "epoch": 0.6447151681537405, "grad_norm": 0.7205010709305514, "learning_rate": 2.961325687224758e-06, "loss": 0.2578, "step": 18787 }, { "epoch": 0.6447494852436514, "grad_norm": 0.8269521569692189, "learning_rate": 2.960818254319183e-06, "loss": 0.2696, "step": 18788 }, { "epoch": 0.6447838023335621, "grad_norm": 0.7176559955889037, "learning_rate": 2.960310846605321e-06, "loss": 0.3128, "step": 18789 }, { "epoch": 0.6448181194234729, "grad_norm": 0.7861091516087487, "learning_rate": 2.959803464089439e-06, "loss": 0.2637, "step": 18790 }, { "epoch": 0.6448524365133836, "grad_norm": 0.6702374986390685, "learning_rate": 2.9592961067778086e-06, "loss": 0.2707, "step": 18791 }, { "epoch": 0.6448867536032944, "grad_norm": 0.8069876618724077, "learning_rate": 2.958788774676696e-06, "loss": 0.2943, "step": 18792 }, { "epoch": 0.6449210706932053, "grad_norm": 0.7307778489544314, "learning_rate": 2.9582814677923654e-06, "loss": 0.3251, "step": 18793 }, { "epoch": 0.644955387783116, "grad_norm": 0.7113560575184695, "learning_rate": 2.957774186131088e-06, "loss": 0.2825, "step": 18794 }, { "epoch": 0.6449897048730268, "grad_norm": 0.7322265576707999, "learning_rate": 2.9572669296991315e-06, "loss": 0.2466, "step": 18795 }, { "epoch": 0.6450240219629375, "grad_norm": 0.8194462857983079, "learning_rate": 2.9567596985027576e-06, "loss": 0.3024, "step": 18796 }, { "epoch": 0.6450583390528484, "grad_norm": 0.7329724879238075, "learning_rate": 2.956252492548237e-06, "loss": 0.3059, "step": 18797 }, { "epoch": 0.6450926561427591, "grad_norm": 0.7646969257794299, "learning_rate": 2.9557453118418356e-06, "loss": 0.2737, "step": 18798 }, { "epoch": 0.6451269732326699, "grad_norm": 0.7718286967839136, "learning_rate": 2.9552381563898143e-06, "loss": 0.2861, "step": 18799 }, { "epoch": 0.6451612903225806, "grad_norm": 0.7597529281980666, "learning_rate": 2.954731026198444e-06, "loss": 0.2512, "step": 18800 }, { "epoch": 0.6451956074124914, "grad_norm": 0.7485552930708425, "learning_rate": 2.9542239212739867e-06, "loss": 0.2964, "step": 18801 }, { "epoch": 0.6452299245024022, "grad_norm": 0.8368255563243128, "learning_rate": 2.9537168416227058e-06, "loss": 0.2953, "step": 18802 }, { "epoch": 0.645264241592313, "grad_norm": 0.766577605983557, "learning_rate": 2.9532097872508704e-06, "loss": 0.2123, "step": 18803 }, { "epoch": 0.6452985586822237, "grad_norm": 0.7775722928589995, "learning_rate": 2.9527027581647405e-06, "loss": 0.3184, "step": 18804 }, { "epoch": 0.6453328757721345, "grad_norm": 0.7792705744065609, "learning_rate": 2.9521957543705802e-06, "loss": 0.2702, "step": 18805 }, { "epoch": 0.6453671928620452, "grad_norm": 0.8743607698150511, "learning_rate": 2.951688775874656e-06, "loss": 0.2863, "step": 18806 }, { "epoch": 0.6454015099519561, "grad_norm": 0.9393681451923463, "learning_rate": 2.9511818226832267e-06, "loss": 0.2789, "step": 18807 }, { "epoch": 0.6454358270418669, "grad_norm": 0.8910357493829921, "learning_rate": 2.950674894802559e-06, "loss": 0.2303, "step": 18808 }, { "epoch": 0.6454701441317776, "grad_norm": 0.8830297444466737, "learning_rate": 2.9501679922389125e-06, "loss": 0.2672, "step": 18809 }, { "epoch": 0.6455044612216884, "grad_norm": 0.7474255632695618, "learning_rate": 2.94966111499855e-06, "loss": 0.2815, "step": 18810 }, { "epoch": 0.6455387783115992, "grad_norm": 0.8351204714806415, "learning_rate": 2.9491542630877355e-06, "loss": 0.297, "step": 18811 }, { "epoch": 0.64557309540151, "grad_norm": 0.8751597208928569, "learning_rate": 2.9486474365127295e-06, "loss": 0.3438, "step": 18812 }, { "epoch": 0.6456074124914207, "grad_norm": 0.7174891594016509, "learning_rate": 2.948140635279791e-06, "loss": 0.2598, "step": 18813 }, { "epoch": 0.6456417295813315, "grad_norm": 0.7387972834727178, "learning_rate": 2.947633859395184e-06, "loss": 0.2674, "step": 18814 }, { "epoch": 0.6456760466712422, "grad_norm": 0.9295307682971145, "learning_rate": 2.9471271088651677e-06, "loss": 0.286, "step": 18815 }, { "epoch": 0.6457103637611531, "grad_norm": 0.7486584201604607, "learning_rate": 2.9466203836960017e-06, "loss": 0.2733, "step": 18816 }, { "epoch": 0.6457446808510638, "grad_norm": 0.7602804986258366, "learning_rate": 2.9461136838939474e-06, "loss": 0.2445, "step": 18817 }, { "epoch": 0.6457789979409746, "grad_norm": 0.6740142910657142, "learning_rate": 2.9456070094652643e-06, "loss": 0.2317, "step": 18818 }, { "epoch": 0.6458133150308853, "grad_norm": 0.7800032485768296, "learning_rate": 2.94510036041621e-06, "loss": 0.2324, "step": 18819 }, { "epoch": 0.6458476321207962, "grad_norm": 0.7785535117938505, "learning_rate": 2.944593736753047e-06, "loss": 0.3228, "step": 18820 }, { "epoch": 0.645881949210707, "grad_norm": 0.8314978880729704, "learning_rate": 2.9440871384820317e-06, "loss": 0.2966, "step": 18821 }, { "epoch": 0.6459162663006177, "grad_norm": 0.7693972126048163, "learning_rate": 2.943580565609421e-06, "loss": 0.2609, "step": 18822 }, { "epoch": 0.6459505833905285, "grad_norm": 0.8315826315654836, "learning_rate": 2.9430740181414768e-06, "loss": 0.307, "step": 18823 }, { "epoch": 0.6459849004804392, "grad_norm": 0.7855953227535546, "learning_rate": 2.942567496084454e-06, "loss": 0.2867, "step": 18824 }, { "epoch": 0.6460192175703501, "grad_norm": 0.660321256674237, "learning_rate": 2.9420609994446104e-06, "loss": 0.2741, "step": 18825 }, { "epoch": 0.6460535346602608, "grad_norm": 0.9036950106221048, "learning_rate": 2.9415545282282052e-06, "loss": 0.3813, "step": 18826 }, { "epoch": 0.6460878517501716, "grad_norm": 0.704574016054449, "learning_rate": 2.9410480824414943e-06, "loss": 0.2378, "step": 18827 }, { "epoch": 0.6461221688400823, "grad_norm": 0.873798821087633, "learning_rate": 2.9405416620907314e-06, "loss": 0.3072, "step": 18828 }, { "epoch": 0.6461564859299931, "grad_norm": 0.7917316832065263, "learning_rate": 2.9400352671821777e-06, "loss": 0.3002, "step": 18829 }, { "epoch": 0.646190803019904, "grad_norm": 0.7576213701419159, "learning_rate": 2.9395288977220855e-06, "loss": 0.2562, "step": 18830 }, { "epoch": 0.6462251201098147, "grad_norm": 0.6922091818534463, "learning_rate": 2.939022553716711e-06, "loss": 0.2626, "step": 18831 }, { "epoch": 0.6462594371997255, "grad_norm": 0.7965790878728028, "learning_rate": 2.9385162351723107e-06, "loss": 0.3253, "step": 18832 }, { "epoch": 0.6462937542896362, "grad_norm": 0.7401842270220698, "learning_rate": 2.938009942095139e-06, "loss": 0.2809, "step": 18833 }, { "epoch": 0.6463280713795471, "grad_norm": 0.9323333031652495, "learning_rate": 2.937503674491449e-06, "loss": 0.3048, "step": 18834 }, { "epoch": 0.6463623884694578, "grad_norm": 0.7236674067829795, "learning_rate": 2.936997432367499e-06, "loss": 0.2472, "step": 18835 }, { "epoch": 0.6463967055593686, "grad_norm": 0.8414620924915506, "learning_rate": 2.936491215729538e-06, "loss": 0.2504, "step": 18836 }, { "epoch": 0.6464310226492793, "grad_norm": 0.7086555850373943, "learning_rate": 2.935985024583823e-06, "loss": 0.2968, "step": 18837 }, { "epoch": 0.6464653397391901, "grad_norm": 0.7861521217580765, "learning_rate": 2.935478858936607e-06, "loss": 0.288, "step": 18838 }, { "epoch": 0.6464996568291009, "grad_norm": 0.8141979110968449, "learning_rate": 2.9349727187941423e-06, "loss": 0.3521, "step": 18839 }, { "epoch": 0.6465339739190117, "grad_norm": 0.785068689835301, "learning_rate": 2.9344666041626824e-06, "loss": 0.2702, "step": 18840 }, { "epoch": 0.6465682910089224, "grad_norm": 0.7127241854944493, "learning_rate": 2.93396051504848e-06, "loss": 0.216, "step": 18841 }, { "epoch": 0.6466026080988332, "grad_norm": 0.9227890715815582, "learning_rate": 2.933454451457785e-06, "loss": 0.2511, "step": 18842 }, { "epoch": 0.646636925188744, "grad_norm": 0.7469957188084727, "learning_rate": 2.932948413396853e-06, "loss": 0.2844, "step": 18843 }, { "epoch": 0.6466712422786548, "grad_norm": 0.7553369752374545, "learning_rate": 2.9324424008719333e-06, "loss": 0.2982, "step": 18844 }, { "epoch": 0.6467055593685656, "grad_norm": 0.7856063126232093, "learning_rate": 2.9319364138892746e-06, "loss": 0.331, "step": 18845 }, { "epoch": 0.6467398764584763, "grad_norm": 0.7592766179441632, "learning_rate": 2.931430452455132e-06, "loss": 0.336, "step": 18846 }, { "epoch": 0.646774193548387, "grad_norm": 0.7937383823464845, "learning_rate": 2.930924516575755e-06, "loss": 0.2682, "step": 18847 }, { "epoch": 0.6468085106382979, "grad_norm": 0.8907051534281923, "learning_rate": 2.930418606257392e-06, "loss": 0.2908, "step": 18848 }, { "epoch": 0.6468428277282087, "grad_norm": 0.6958237401141003, "learning_rate": 2.9299127215062957e-06, "loss": 0.2141, "step": 18849 }, { "epoch": 0.6468771448181194, "grad_norm": 0.8171515789020919, "learning_rate": 2.929406862328714e-06, "loss": 0.2586, "step": 18850 }, { "epoch": 0.6469114619080302, "grad_norm": 0.7513448469676381, "learning_rate": 2.928901028730895e-06, "loss": 0.234, "step": 18851 }, { "epoch": 0.6469457789979409, "grad_norm": 0.8960787070804166, "learning_rate": 2.928395220719091e-06, "loss": 0.3167, "step": 18852 }, { "epoch": 0.6469800960878518, "grad_norm": 0.8627649926662802, "learning_rate": 2.9278894382995477e-06, "loss": 0.2415, "step": 18853 }, { "epoch": 0.6470144131777625, "grad_norm": 0.6837311372874523, "learning_rate": 2.927383681478513e-06, "loss": 0.2364, "step": 18854 }, { "epoch": 0.6470487302676733, "grad_norm": 0.6874045322383809, "learning_rate": 2.926877950262239e-06, "loss": 0.2858, "step": 18855 }, { "epoch": 0.647083047357584, "grad_norm": 0.7552769724559503, "learning_rate": 2.9263722446569708e-06, "loss": 0.2442, "step": 18856 }, { "epoch": 0.6471173644474949, "grad_norm": 0.7718235304894225, "learning_rate": 2.9258665646689545e-06, "loss": 0.3082, "step": 18857 }, { "epoch": 0.6471516815374057, "grad_norm": 0.8372396305643512, "learning_rate": 2.92536091030444e-06, "loss": 0.2956, "step": 18858 }, { "epoch": 0.6471859986273164, "grad_norm": 0.84834437987051, "learning_rate": 2.924855281569673e-06, "loss": 0.2829, "step": 18859 }, { "epoch": 0.6472203157172272, "grad_norm": 0.8168641679047273, "learning_rate": 2.924349678470897e-06, "loss": 0.2501, "step": 18860 }, { "epoch": 0.6472546328071379, "grad_norm": 0.7396167813686254, "learning_rate": 2.9238441010143624e-06, "loss": 0.2473, "step": 18861 }, { "epoch": 0.6472889498970488, "grad_norm": 0.8272605941921912, "learning_rate": 2.923338549206314e-06, "loss": 0.2418, "step": 18862 }, { "epoch": 0.6473232669869595, "grad_norm": 0.7853896131149551, "learning_rate": 2.9228330230529944e-06, "loss": 0.2869, "step": 18863 }, { "epoch": 0.6473575840768703, "grad_norm": 0.7974512557981631, "learning_rate": 2.922327522560654e-06, "loss": 0.29, "step": 18864 }, { "epoch": 0.647391901166781, "grad_norm": 0.7289650791209149, "learning_rate": 2.921822047735534e-06, "loss": 0.2554, "step": 18865 }, { "epoch": 0.6474262182566919, "grad_norm": 0.7582898694933662, "learning_rate": 2.9213165985838778e-06, "loss": 0.2635, "step": 18866 }, { "epoch": 0.6474605353466026, "grad_norm": 0.7215205739613506, "learning_rate": 2.9208111751119333e-06, "loss": 0.2943, "step": 18867 }, { "epoch": 0.6474948524365134, "grad_norm": 0.7157692322600846, "learning_rate": 2.920305777325941e-06, "loss": 0.2694, "step": 18868 }, { "epoch": 0.6475291695264241, "grad_norm": 0.7136873129091866, "learning_rate": 2.9198004052321472e-06, "loss": 0.3057, "step": 18869 }, { "epoch": 0.6475634866163349, "grad_norm": 0.811557338339106, "learning_rate": 2.9192950588367947e-06, "loss": 0.3549, "step": 18870 }, { "epoch": 0.6475978037062458, "grad_norm": 0.7946120779201937, "learning_rate": 2.9187897381461248e-06, "loss": 0.2907, "step": 18871 }, { "epoch": 0.6476321207961565, "grad_norm": 0.7850607679513456, "learning_rate": 2.9182844431663815e-06, "loss": 0.2679, "step": 18872 }, { "epoch": 0.6476664378860673, "grad_norm": 0.7155439805335609, "learning_rate": 2.9177791739038074e-06, "loss": 0.2072, "step": 18873 }, { "epoch": 0.647700754975978, "grad_norm": 0.8080153009444047, "learning_rate": 2.9172739303646435e-06, "loss": 0.3178, "step": 18874 }, { "epoch": 0.6477350720658888, "grad_norm": 0.6842144536206409, "learning_rate": 2.916768712555131e-06, "loss": 0.243, "step": 18875 }, { "epoch": 0.6477693891557996, "grad_norm": 0.7598752087946046, "learning_rate": 2.916263520481516e-06, "loss": 0.2455, "step": 18876 }, { "epoch": 0.6478037062457104, "grad_norm": 0.8740395886022244, "learning_rate": 2.915758354150031e-06, "loss": 0.2391, "step": 18877 }, { "epoch": 0.6478380233356211, "grad_norm": 0.9063276953192022, "learning_rate": 2.9152532135669254e-06, "loss": 0.2591, "step": 18878 }, { "epoch": 0.6478723404255319, "grad_norm": 1.002069704798499, "learning_rate": 2.9147480987384336e-06, "loss": 0.2186, "step": 18879 }, { "epoch": 0.6479066575154427, "grad_norm": 0.682804534589719, "learning_rate": 2.9142430096707985e-06, "loss": 0.2711, "step": 18880 }, { "epoch": 0.6479409746053535, "grad_norm": 0.8899054663516823, "learning_rate": 2.913737946370259e-06, "loss": 0.2572, "step": 18881 }, { "epoch": 0.6479752916952642, "grad_norm": 0.8629923023370899, "learning_rate": 2.9132329088430543e-06, "loss": 0.2411, "step": 18882 }, { "epoch": 0.648009608785175, "grad_norm": 0.8697189003420032, "learning_rate": 2.912727897095425e-06, "loss": 0.2941, "step": 18883 }, { "epoch": 0.6480439258750857, "grad_norm": 0.7253163142321042, "learning_rate": 2.9122229111336085e-06, "loss": 0.258, "step": 18884 }, { "epoch": 0.6480782429649966, "grad_norm": 0.8522264038784464, "learning_rate": 2.911717950963846e-06, "loss": 0.2579, "step": 18885 }, { "epoch": 0.6481125600549074, "grad_norm": 0.7528239411996774, "learning_rate": 2.911213016592369e-06, "loss": 0.2636, "step": 18886 }, { "epoch": 0.6481468771448181, "grad_norm": 0.7834014203452156, "learning_rate": 2.9107081080254225e-06, "loss": 0.2738, "step": 18887 }, { "epoch": 0.6481811942347289, "grad_norm": 0.741224792866245, "learning_rate": 2.9102032252692432e-06, "loss": 0.2851, "step": 18888 }, { "epoch": 0.6482155113246397, "grad_norm": 0.9428945403282104, "learning_rate": 2.9096983683300623e-06, "loss": 0.2795, "step": 18889 }, { "epoch": 0.6482498284145505, "grad_norm": 0.8239171987143568, "learning_rate": 2.9091935372141255e-06, "loss": 0.3259, "step": 18890 }, { "epoch": 0.6482841455044612, "grad_norm": 0.7657708446512906, "learning_rate": 2.9086887319276625e-06, "loss": 0.2501, "step": 18891 }, { "epoch": 0.648318462594372, "grad_norm": 0.7334815006031236, "learning_rate": 2.9081839524769123e-06, "loss": 0.2806, "step": 18892 }, { "epoch": 0.6483527796842827, "grad_norm": 0.7844631777822192, "learning_rate": 2.9076791988681104e-06, "loss": 0.284, "step": 18893 }, { "epoch": 0.6483870967741936, "grad_norm": 0.8240807854398171, "learning_rate": 2.9071744711074924e-06, "loss": 0.2576, "step": 18894 }, { "epoch": 0.6484214138641043, "grad_norm": 0.6700064398148642, "learning_rate": 2.906669769201294e-06, "loss": 0.2493, "step": 18895 }, { "epoch": 0.6484557309540151, "grad_norm": 0.7282306335134846, "learning_rate": 2.9061650931557504e-06, "loss": 0.2521, "step": 18896 }, { "epoch": 0.6484900480439258, "grad_norm": 0.7382741766390593, "learning_rate": 2.9056604429770972e-06, "loss": 0.227, "step": 18897 }, { "epoch": 0.6485243651338366, "grad_norm": 0.7576198997184328, "learning_rate": 2.905155818671564e-06, "loss": 0.306, "step": 18898 }, { "epoch": 0.6485586822237475, "grad_norm": 0.8120910945015813, "learning_rate": 2.9046512202453926e-06, "loss": 0.2754, "step": 18899 }, { "epoch": 0.6485929993136582, "grad_norm": 0.7484063203283997, "learning_rate": 2.9041466477048074e-06, "loss": 0.2832, "step": 18900 }, { "epoch": 0.648627316403569, "grad_norm": 0.7319865762167762, "learning_rate": 2.9036421010560513e-06, "loss": 0.2357, "step": 18901 }, { "epoch": 0.6486616334934797, "grad_norm": 0.745566167969335, "learning_rate": 2.903137580305351e-06, "loss": 0.2612, "step": 18902 }, { "epoch": 0.6486959505833906, "grad_norm": 0.7340511434523834, "learning_rate": 2.902633085458939e-06, "loss": 0.3241, "step": 18903 }, { "epoch": 0.6487302676733013, "grad_norm": 0.7314226234092885, "learning_rate": 2.9021286165230545e-06, "loss": 0.2242, "step": 18904 }, { "epoch": 0.6487645847632121, "grad_norm": 0.8466893354658478, "learning_rate": 2.901624173503923e-06, "loss": 0.3508, "step": 18905 }, { "epoch": 0.6487989018531228, "grad_norm": 0.7557384017412425, "learning_rate": 2.9011197564077775e-06, "loss": 0.2515, "step": 18906 }, { "epoch": 0.6488332189430336, "grad_norm": 0.7622329685590519, "learning_rate": 2.900615365240851e-06, "loss": 0.2594, "step": 18907 }, { "epoch": 0.6488675360329444, "grad_norm": 0.7824276348084527, "learning_rate": 2.900111000009374e-06, "loss": 0.2466, "step": 18908 }, { "epoch": 0.6489018531228552, "grad_norm": 0.785491324274538, "learning_rate": 2.899606660719577e-06, "loss": 0.2478, "step": 18909 }, { "epoch": 0.648936170212766, "grad_norm": 0.7080625745927533, "learning_rate": 2.899102347377691e-06, "loss": 0.2582, "step": 18910 }, { "epoch": 0.6489704873026767, "grad_norm": 0.7452020467424173, "learning_rate": 2.898598059989949e-06, "loss": 0.2695, "step": 18911 }, { "epoch": 0.6490048043925875, "grad_norm": 0.7485036189633179, "learning_rate": 2.8980937985625734e-06, "loss": 0.2564, "step": 18912 }, { "epoch": 0.6490391214824983, "grad_norm": 0.9124600919702138, "learning_rate": 2.897589563101802e-06, "loss": 0.2858, "step": 18913 }, { "epoch": 0.6490734385724091, "grad_norm": 0.7302179073660172, "learning_rate": 2.8970853536138586e-06, "loss": 0.2866, "step": 18914 }, { "epoch": 0.6491077556623198, "grad_norm": 0.6864597331559518, "learning_rate": 2.896581170104974e-06, "loss": 0.2768, "step": 18915 }, { "epoch": 0.6491420727522306, "grad_norm": 0.9331794163810451, "learning_rate": 2.8960770125813765e-06, "loss": 0.2261, "step": 18916 }, { "epoch": 0.6491763898421414, "grad_norm": 0.710257608937156, "learning_rate": 2.8955728810492944e-06, "loss": 0.2772, "step": 18917 }, { "epoch": 0.6492107069320522, "grad_norm": 0.7474052288869417, "learning_rate": 2.8950687755149557e-06, "loss": 0.2295, "step": 18918 }, { "epoch": 0.6492450240219629, "grad_norm": 0.7661265356858603, "learning_rate": 2.8945646959845887e-06, "loss": 0.3065, "step": 18919 }, { "epoch": 0.6492793411118737, "grad_norm": 0.6613516173726913, "learning_rate": 2.8940606424644224e-06, "loss": 0.2862, "step": 18920 }, { "epoch": 0.6493136582017844, "grad_norm": 0.7894368508249064, "learning_rate": 2.893556614960677e-06, "loss": 0.2126, "step": 18921 }, { "epoch": 0.6493479752916953, "grad_norm": 0.8385964855610027, "learning_rate": 2.8930526134795878e-06, "loss": 0.2638, "step": 18922 }, { "epoch": 0.649382292381606, "grad_norm": 0.7654276207204603, "learning_rate": 2.8925486380273756e-06, "loss": 0.3319, "step": 18923 }, { "epoch": 0.6494166094715168, "grad_norm": 0.710027823037656, "learning_rate": 2.8920446886102672e-06, "loss": 0.256, "step": 18924 }, { "epoch": 0.6494509265614276, "grad_norm": 0.8536241363006615, "learning_rate": 2.8915407652344894e-06, "loss": 0.3016, "step": 18925 }, { "epoch": 0.6494852436513384, "grad_norm": 0.6983222901606517, "learning_rate": 2.891036867906267e-06, "loss": 0.2457, "step": 18926 }, { "epoch": 0.6495195607412492, "grad_norm": 0.8493038002859891, "learning_rate": 2.8905329966318255e-06, "loss": 0.2394, "step": 18927 }, { "epoch": 0.6495538778311599, "grad_norm": 0.8608491209055708, "learning_rate": 2.8900291514173893e-06, "loss": 0.2604, "step": 18928 }, { "epoch": 0.6495881949210707, "grad_norm": 0.7779393063385159, "learning_rate": 2.8895253322691854e-06, "loss": 0.2873, "step": 18929 }, { "epoch": 0.6496225120109814, "grad_norm": 0.8142770470701198, "learning_rate": 2.889021539193431e-06, "loss": 0.2255, "step": 18930 }, { "epoch": 0.6496568291008923, "grad_norm": 0.7518681076135033, "learning_rate": 2.8885177721963563e-06, "loss": 0.271, "step": 18931 }, { "epoch": 0.649691146190803, "grad_norm": 0.8307261522635927, "learning_rate": 2.8880140312841833e-06, "loss": 0.2619, "step": 18932 }, { "epoch": 0.6497254632807138, "grad_norm": 0.826480577240424, "learning_rate": 2.887510316463134e-06, "loss": 0.3042, "step": 18933 }, { "epoch": 0.6497597803706245, "grad_norm": 0.784354096637098, "learning_rate": 2.8870066277394336e-06, "loss": 0.2822, "step": 18934 }, { "epoch": 0.6497940974605353, "grad_norm": 0.7657017996017337, "learning_rate": 2.8865029651193e-06, "loss": 0.229, "step": 18935 }, { "epoch": 0.6498284145504462, "grad_norm": 0.7673839996841424, "learning_rate": 2.885999328608961e-06, "loss": 0.2477, "step": 18936 }, { "epoch": 0.6498627316403569, "grad_norm": 0.7754603367603407, "learning_rate": 2.8854957182146336e-06, "loss": 0.2517, "step": 18937 }, { "epoch": 0.6498970487302677, "grad_norm": 0.7414241409996659, "learning_rate": 2.8849921339425423e-06, "loss": 0.2748, "step": 18938 }, { "epoch": 0.6499313658201784, "grad_norm": 0.8223949985355496, "learning_rate": 2.8844885757989065e-06, "loss": 0.2836, "step": 18939 }, { "epoch": 0.6499656829100893, "grad_norm": 0.7981452924362975, "learning_rate": 2.8839850437899487e-06, "loss": 0.2904, "step": 18940 }, { "epoch": 0.65, "grad_norm": 0.8563743877700141, "learning_rate": 2.8834815379218876e-06, "loss": 0.3142, "step": 18941 }, { "epoch": 0.6500343170899108, "grad_norm": 0.6657127501524877, "learning_rate": 2.8829780582009446e-06, "loss": 0.1907, "step": 18942 }, { "epoch": 0.6500686341798215, "grad_norm": 0.8380097912428421, "learning_rate": 2.8824746046333423e-06, "loss": 0.2839, "step": 18943 }, { "epoch": 0.6501029512697323, "grad_norm": 0.7326765596836998, "learning_rate": 2.8819711772252935e-06, "loss": 0.2424, "step": 18944 }, { "epoch": 0.6501372683596431, "grad_norm": 0.7523026333102766, "learning_rate": 2.8814677759830245e-06, "loss": 0.295, "step": 18945 }, { "epoch": 0.6501715854495539, "grad_norm": 0.8542746087508268, "learning_rate": 2.8809644009127496e-06, "loss": 0.3608, "step": 18946 }, { "epoch": 0.6502059025394646, "grad_norm": 0.7833167853253745, "learning_rate": 2.880461052020687e-06, "loss": 0.2784, "step": 18947 }, { "epoch": 0.6502402196293754, "grad_norm": 0.7808644969278967, "learning_rate": 2.879957729313061e-06, "loss": 0.2998, "step": 18948 }, { "epoch": 0.6502745367192863, "grad_norm": 0.7286524592853227, "learning_rate": 2.879454432796084e-06, "loss": 0.2402, "step": 18949 }, { "epoch": 0.650308853809197, "grad_norm": 0.75652135880298, "learning_rate": 2.878951162475974e-06, "loss": 0.2464, "step": 18950 }, { "epoch": 0.6503431708991078, "grad_norm": 0.7877475443645807, "learning_rate": 2.8784479183589503e-06, "loss": 0.2439, "step": 18951 }, { "epoch": 0.6503774879890185, "grad_norm": 0.8412696948103519, "learning_rate": 2.8779447004512305e-06, "loss": 0.3328, "step": 18952 }, { "epoch": 0.6504118050789293, "grad_norm": 0.7810770025721775, "learning_rate": 2.8774415087590266e-06, "loss": 0.3434, "step": 18953 }, { "epoch": 0.6504461221688401, "grad_norm": 0.7884494753895344, "learning_rate": 2.87693834328856e-06, "loss": 0.2918, "step": 18954 }, { "epoch": 0.6504804392587509, "grad_norm": 0.7797449892718994, "learning_rate": 2.8764352040460463e-06, "loss": 0.2824, "step": 18955 }, { "epoch": 0.6505147563486616, "grad_norm": 0.735457530296111, "learning_rate": 2.8759320910376963e-06, "loss": 0.2418, "step": 18956 }, { "epoch": 0.6505490734385724, "grad_norm": 0.7229616482620884, "learning_rate": 2.875429004269733e-06, "loss": 0.2814, "step": 18957 }, { "epoch": 0.6505833905284831, "grad_norm": 0.8335504546138689, "learning_rate": 2.8749259437483654e-06, "loss": 0.3038, "step": 18958 }, { "epoch": 0.650617707618394, "grad_norm": 0.7969930106647666, "learning_rate": 2.8744229094798102e-06, "loss": 0.3208, "step": 18959 }, { "epoch": 0.6506520247083047, "grad_norm": 0.9317999604517831, "learning_rate": 2.8739199014702814e-06, "loss": 0.2784, "step": 18960 }, { "epoch": 0.6506863417982155, "grad_norm": 0.8117690697006796, "learning_rate": 2.8734169197259936e-06, "loss": 0.3415, "step": 18961 }, { "epoch": 0.6507206588881262, "grad_norm": 0.91422640990342, "learning_rate": 2.8729139642531602e-06, "loss": 0.3011, "step": 18962 }, { "epoch": 0.6507549759780371, "grad_norm": 0.7930092586497103, "learning_rate": 2.872411035057995e-06, "loss": 0.2598, "step": 18963 }, { "epoch": 0.6507892930679479, "grad_norm": 0.7626092202330896, "learning_rate": 2.8719081321467114e-06, "loss": 0.2876, "step": 18964 }, { "epoch": 0.6508236101578586, "grad_norm": 0.8596082333740714, "learning_rate": 2.871405255525521e-06, "loss": 0.2842, "step": 18965 }, { "epoch": 0.6508579272477694, "grad_norm": 0.7327011368599621, "learning_rate": 2.8709024052006397e-06, "loss": 0.2549, "step": 18966 }, { "epoch": 0.6508922443376801, "grad_norm": 0.8622638144101572, "learning_rate": 2.8703995811782726e-06, "loss": 0.3278, "step": 18967 }, { "epoch": 0.650926561427591, "grad_norm": 0.8525724255922614, "learning_rate": 2.869896783464639e-06, "loss": 0.2942, "step": 18968 }, { "epoch": 0.6509608785175017, "grad_norm": 0.8531080878929509, "learning_rate": 2.8693940120659456e-06, "loss": 0.2879, "step": 18969 }, { "epoch": 0.6509951956074125, "grad_norm": 0.7426255052481336, "learning_rate": 2.868891266988404e-06, "loss": 0.3054, "step": 18970 }, { "epoch": 0.6510295126973232, "grad_norm": 0.7919469378073427, "learning_rate": 2.868388548238229e-06, "loss": 0.2607, "step": 18971 }, { "epoch": 0.6510638297872341, "grad_norm": 0.7472676791343812, "learning_rate": 2.867885855821627e-06, "loss": 0.2877, "step": 18972 }, { "epoch": 0.6510981468771448, "grad_norm": 0.7826224035407444, "learning_rate": 2.867383189744809e-06, "loss": 0.2582, "step": 18973 }, { "epoch": 0.6511324639670556, "grad_norm": 0.7815116841662983, "learning_rate": 2.866880550013986e-06, "loss": 0.3162, "step": 18974 }, { "epoch": 0.6511667810569663, "grad_norm": 0.7668223293116654, "learning_rate": 2.866377936635366e-06, "loss": 0.2656, "step": 18975 }, { "epoch": 0.6512010981468771, "grad_norm": 0.7737752374368065, "learning_rate": 2.8658753496151596e-06, "loss": 0.3108, "step": 18976 }, { "epoch": 0.651235415236788, "grad_norm": 0.790649668685454, "learning_rate": 2.865372788959575e-06, "loss": 0.275, "step": 18977 }, { "epoch": 0.6512697323266987, "grad_norm": 0.7924955739630147, "learning_rate": 2.864870254674823e-06, "loss": 0.2497, "step": 18978 }, { "epoch": 0.6513040494166095, "grad_norm": 0.7377064538698391, "learning_rate": 2.864367746767105e-06, "loss": 0.2615, "step": 18979 }, { "epoch": 0.6513383665065202, "grad_norm": 0.7845112438462931, "learning_rate": 2.863865265242638e-06, "loss": 0.3388, "step": 18980 }, { "epoch": 0.651372683596431, "grad_norm": 0.7044485758659147, "learning_rate": 2.863362810107624e-06, "loss": 0.265, "step": 18981 }, { "epoch": 0.6514070006863418, "grad_norm": 0.8831972875157524, "learning_rate": 2.86286038136827e-06, "loss": 0.275, "step": 18982 }, { "epoch": 0.6514413177762526, "grad_norm": 0.7862483029236368, "learning_rate": 2.8623579790307855e-06, "loss": 0.2887, "step": 18983 }, { "epoch": 0.6514756348661633, "grad_norm": 0.7052004558820817, "learning_rate": 2.861855603101375e-06, "loss": 0.2564, "step": 18984 }, { "epoch": 0.6515099519560741, "grad_norm": 0.7921170701399473, "learning_rate": 2.861353253586246e-06, "loss": 0.2359, "step": 18985 }, { "epoch": 0.651544269045985, "grad_norm": 0.8702255792899679, "learning_rate": 2.8608509304916045e-06, "loss": 0.2821, "step": 18986 }, { "epoch": 0.6515785861358957, "grad_norm": 1.0292345897097728, "learning_rate": 2.860348633823657e-06, "loss": 0.2526, "step": 18987 }, { "epoch": 0.6516129032258065, "grad_norm": 0.7776138302939066, "learning_rate": 2.8598463635886043e-06, "loss": 0.2743, "step": 18988 }, { "epoch": 0.6516472203157172, "grad_norm": 0.7689715076046897, "learning_rate": 2.859344119792658e-06, "loss": 0.2626, "step": 18989 }, { "epoch": 0.651681537405628, "grad_norm": 0.7301401530686809, "learning_rate": 2.8588419024420176e-06, "loss": 0.2641, "step": 18990 }, { "epoch": 0.6517158544955388, "grad_norm": 0.6457989583987572, "learning_rate": 2.8583397115428892e-06, "loss": 0.2157, "step": 18991 }, { "epoch": 0.6517501715854496, "grad_norm": 0.9815165446671721, "learning_rate": 2.857837547101476e-06, "loss": 0.2401, "step": 18992 }, { "epoch": 0.6517844886753603, "grad_norm": 0.7350091480560158, "learning_rate": 2.8573354091239812e-06, "loss": 0.341, "step": 18993 }, { "epoch": 0.6518188057652711, "grad_norm": 0.9260493300084497, "learning_rate": 2.8568332976166136e-06, "loss": 0.2997, "step": 18994 }, { "epoch": 0.6518531228551819, "grad_norm": 0.8878763463880817, "learning_rate": 2.856331212585569e-06, "loss": 0.2841, "step": 18995 }, { "epoch": 0.6518874399450927, "grad_norm": 0.7465751124747508, "learning_rate": 2.8558291540370542e-06, "loss": 0.283, "step": 18996 }, { "epoch": 0.6519217570350034, "grad_norm": 0.7830454268462924, "learning_rate": 2.8553271219772694e-06, "loss": 0.2347, "step": 18997 }, { "epoch": 0.6519560741249142, "grad_norm": 0.7872768617635791, "learning_rate": 2.8548251164124176e-06, "loss": 0.263, "step": 18998 }, { "epoch": 0.6519903912148249, "grad_norm": 0.6891305840395026, "learning_rate": 2.8543231373487005e-06, "loss": 0.2625, "step": 18999 }, { "epoch": 0.6520247083047358, "grad_norm": 0.7606544055321027, "learning_rate": 2.8538211847923196e-06, "loss": 0.2272, "step": 19000 }, { "epoch": 0.6520590253946466, "grad_norm": 0.7090462786172843, "learning_rate": 2.853319258749478e-06, "loss": 0.2322, "step": 19001 }, { "epoch": 0.6520933424845573, "grad_norm": 0.7788462522583881, "learning_rate": 2.8528173592263696e-06, "loss": 0.2564, "step": 19002 }, { "epoch": 0.652127659574468, "grad_norm": 0.8826233663516375, "learning_rate": 2.852315486229204e-06, "loss": 0.2553, "step": 19003 }, { "epoch": 0.6521619766643788, "grad_norm": 0.77737211617235, "learning_rate": 2.8518136397641736e-06, "loss": 0.3109, "step": 19004 }, { "epoch": 0.6521962937542897, "grad_norm": 0.8124752661442372, "learning_rate": 2.851311819837482e-06, "loss": 0.3483, "step": 19005 }, { "epoch": 0.6522306108442004, "grad_norm": 0.794828904689203, "learning_rate": 2.850810026455328e-06, "loss": 0.2961, "step": 19006 }, { "epoch": 0.6522649279341112, "grad_norm": 0.8233077716492233, "learning_rate": 2.8503082596239097e-06, "loss": 0.2368, "step": 19007 }, { "epoch": 0.6522992450240219, "grad_norm": 0.6993947776395079, "learning_rate": 2.849806519349427e-06, "loss": 0.2436, "step": 19008 }, { "epoch": 0.6523335621139328, "grad_norm": 0.7392213451851701, "learning_rate": 2.849304805638078e-06, "loss": 0.2566, "step": 19009 }, { "epoch": 0.6523678792038435, "grad_norm": 0.7796233792339227, "learning_rate": 2.8488031184960617e-06, "loss": 0.2407, "step": 19010 }, { "epoch": 0.6524021962937543, "grad_norm": 0.7496783269508532, "learning_rate": 2.848301457929572e-06, "loss": 0.2656, "step": 19011 }, { "epoch": 0.652436513383665, "grad_norm": 0.7541136346631834, "learning_rate": 2.8477998239448123e-06, "loss": 0.3297, "step": 19012 }, { "epoch": 0.6524708304735758, "grad_norm": 0.7921944655247505, "learning_rate": 2.8472982165479745e-06, "loss": 0.2626, "step": 19013 }, { "epoch": 0.6525051475634867, "grad_norm": 0.6991434707192722, "learning_rate": 2.8467966357452558e-06, "loss": 0.2553, "step": 19014 }, { "epoch": 0.6525394646533974, "grad_norm": 0.8153145919254026, "learning_rate": 2.8462950815428588e-06, "loss": 0.2407, "step": 19015 }, { "epoch": 0.6525737817433082, "grad_norm": 0.7435907606455953, "learning_rate": 2.845793553946973e-06, "loss": 0.2534, "step": 19016 }, { "epoch": 0.6526080988332189, "grad_norm": 0.8863330918647773, "learning_rate": 2.8452920529637952e-06, "loss": 0.27, "step": 19017 }, { "epoch": 0.6526424159231298, "grad_norm": 0.8458869073391698, "learning_rate": 2.844790578599522e-06, "loss": 0.2336, "step": 19018 }, { "epoch": 0.6526767330130405, "grad_norm": 0.7235018471938408, "learning_rate": 2.8442891308603493e-06, "loss": 0.3059, "step": 19019 }, { "epoch": 0.6527110501029513, "grad_norm": 0.7469791439393532, "learning_rate": 2.8437877097524714e-06, "loss": 0.3168, "step": 19020 }, { "epoch": 0.652745367192862, "grad_norm": 0.7592962439759051, "learning_rate": 2.843286315282082e-06, "loss": 0.2548, "step": 19021 }, { "epoch": 0.6527796842827728, "grad_norm": 0.7491583860154861, "learning_rate": 2.842784947455377e-06, "loss": 0.2574, "step": 19022 }, { "epoch": 0.6528140013726836, "grad_norm": 0.8045780267925253, "learning_rate": 2.8422836062785454e-06, "loss": 0.2543, "step": 19023 }, { "epoch": 0.6528483184625944, "grad_norm": 0.8049269672633402, "learning_rate": 2.8417822917577886e-06, "loss": 0.2276, "step": 19024 }, { "epoch": 0.6528826355525051, "grad_norm": 0.8191595254957295, "learning_rate": 2.841281003899291e-06, "loss": 0.2327, "step": 19025 }, { "epoch": 0.6529169526424159, "grad_norm": 0.7554814206070342, "learning_rate": 2.8407797427092537e-06, "loss": 0.2706, "step": 19026 }, { "epoch": 0.6529512697323266, "grad_norm": 0.7745613833745382, "learning_rate": 2.8402785081938635e-06, "loss": 0.2406, "step": 19027 }, { "epoch": 0.6529855868222375, "grad_norm": 0.9035692681353982, "learning_rate": 2.8397773003593145e-06, "loss": 0.2613, "step": 19028 }, { "epoch": 0.6530199039121483, "grad_norm": 0.7171261653845523, "learning_rate": 2.839276119211798e-06, "loss": 0.2681, "step": 19029 }, { "epoch": 0.653054221002059, "grad_norm": 0.797456633802242, "learning_rate": 2.838774964757506e-06, "loss": 0.2504, "step": 19030 }, { "epoch": 0.6530885380919698, "grad_norm": 0.7137436898201184, "learning_rate": 2.8382738370026286e-06, "loss": 0.2698, "step": 19031 }, { "epoch": 0.6531228551818806, "grad_norm": 0.8292585824200698, "learning_rate": 2.837772735953359e-06, "loss": 0.2741, "step": 19032 }, { "epoch": 0.6531571722717914, "grad_norm": 0.7492601550355247, "learning_rate": 2.8372716616158878e-06, "loss": 0.2881, "step": 19033 }, { "epoch": 0.6531914893617021, "grad_norm": 0.7340774311009719, "learning_rate": 2.8367706139963997e-06, "loss": 0.2672, "step": 19034 }, { "epoch": 0.6532258064516129, "grad_norm": 0.8231456628687418, "learning_rate": 2.836269593101093e-06, "loss": 0.2914, "step": 19035 }, { "epoch": 0.6532601235415236, "grad_norm": 0.7791126625849721, "learning_rate": 2.8357685989361504e-06, "loss": 0.2576, "step": 19036 }, { "epoch": 0.6532944406314345, "grad_norm": 1.0045162842229738, "learning_rate": 2.8352676315077617e-06, "loss": 0.2713, "step": 19037 }, { "epoch": 0.6533287577213452, "grad_norm": 0.8381258767746572, "learning_rate": 2.834766690822121e-06, "loss": 0.2452, "step": 19038 }, { "epoch": 0.653363074811256, "grad_norm": 0.7355331104525373, "learning_rate": 2.834265776885412e-06, "loss": 0.2964, "step": 19039 }, { "epoch": 0.6533973919011667, "grad_norm": 0.7688036046275701, "learning_rate": 2.8337648897038237e-06, "loss": 0.275, "step": 19040 }, { "epoch": 0.6534317089910776, "grad_norm": 1.079641177920342, "learning_rate": 2.8332640292835454e-06, "loss": 0.3146, "step": 19041 }, { "epoch": 0.6534660260809884, "grad_norm": 0.8582225188019549, "learning_rate": 2.832763195630764e-06, "loss": 0.3099, "step": 19042 }, { "epoch": 0.6535003431708991, "grad_norm": 0.7367546392259404, "learning_rate": 2.832262388751666e-06, "loss": 0.2566, "step": 19043 }, { "epoch": 0.6535346602608099, "grad_norm": 0.6590714783331653, "learning_rate": 2.831761608652439e-06, "loss": 0.3048, "step": 19044 }, { "epoch": 0.6535689773507206, "grad_norm": 0.777304337541964, "learning_rate": 2.8312608553392717e-06, "loss": 0.3234, "step": 19045 }, { "epoch": 0.6536032944406315, "grad_norm": 0.8442210515936202, "learning_rate": 2.8307601288183434e-06, "loss": 0.2573, "step": 19046 }, { "epoch": 0.6536376115305422, "grad_norm": 0.7458145951670626, "learning_rate": 2.830259429095849e-06, "loss": 0.2207, "step": 19047 }, { "epoch": 0.653671928620453, "grad_norm": 0.8401357493969935, "learning_rate": 2.829758756177968e-06, "loss": 0.2406, "step": 19048 }, { "epoch": 0.6537062457103637, "grad_norm": 0.724586968613761, "learning_rate": 2.8292581100708865e-06, "loss": 0.2749, "step": 19049 }, { "epoch": 0.6537405628002745, "grad_norm": 0.7588767028782338, "learning_rate": 2.828757490780791e-06, "loss": 0.2557, "step": 19050 }, { "epoch": 0.6537748798901853, "grad_norm": 0.7656800260932859, "learning_rate": 2.828256898313865e-06, "loss": 0.2468, "step": 19051 }, { "epoch": 0.6538091969800961, "grad_norm": 0.7853938814168825, "learning_rate": 2.8277563326762925e-06, "loss": 0.2637, "step": 19052 }, { "epoch": 0.6538435140700068, "grad_norm": 0.6307014492180556, "learning_rate": 2.8272557938742583e-06, "loss": 0.2737, "step": 19053 }, { "epoch": 0.6538778311599176, "grad_norm": 0.6858452813231811, "learning_rate": 2.8267552819139478e-06, "loss": 0.2085, "step": 19054 }, { "epoch": 0.6539121482498285, "grad_norm": 0.7270419870833693, "learning_rate": 2.8262547968015374e-06, "loss": 0.2887, "step": 19055 }, { "epoch": 0.6539464653397392, "grad_norm": 0.8762538413940301, "learning_rate": 2.8257543385432195e-06, "loss": 0.2583, "step": 19056 }, { "epoch": 0.65398078242965, "grad_norm": 0.7469296139963867, "learning_rate": 2.8252539071451675e-06, "loss": 0.255, "step": 19057 }, { "epoch": 0.6540150995195607, "grad_norm": 0.7379404746933527, "learning_rate": 2.8247535026135696e-06, "loss": 0.2627, "step": 19058 }, { "epoch": 0.6540494166094715, "grad_norm": 0.747110021292161, "learning_rate": 2.824253124954609e-06, "loss": 0.2965, "step": 19059 }, { "epoch": 0.6540837336993823, "grad_norm": 0.7768099348601162, "learning_rate": 2.8237527741744595e-06, "loss": 0.3038, "step": 19060 }, { "epoch": 0.6541180507892931, "grad_norm": 0.7780967003667203, "learning_rate": 2.8232524502793115e-06, "loss": 0.2849, "step": 19061 }, { "epoch": 0.6541523678792038, "grad_norm": 0.72030467925087, "learning_rate": 2.8227521532753398e-06, "loss": 0.2384, "step": 19062 }, { "epoch": 0.6541866849691146, "grad_norm": 0.7076559605881685, "learning_rate": 2.822251883168726e-06, "loss": 0.2581, "step": 19063 }, { "epoch": 0.6542210020590254, "grad_norm": 0.8029161225004854, "learning_rate": 2.821751639965652e-06, "loss": 0.2581, "step": 19064 }, { "epoch": 0.6542553191489362, "grad_norm": 0.7135514775963098, "learning_rate": 2.8212514236722965e-06, "loss": 0.2862, "step": 19065 }, { "epoch": 0.654289636238847, "grad_norm": 0.7399403769147277, "learning_rate": 2.82075123429484e-06, "loss": 0.2461, "step": 19066 }, { "epoch": 0.6543239533287577, "grad_norm": 0.7506119000854915, "learning_rate": 2.820251071839461e-06, "loss": 0.3186, "step": 19067 }, { "epoch": 0.6543582704186685, "grad_norm": 0.756634429120704, "learning_rate": 2.819750936312341e-06, "loss": 0.2488, "step": 19068 }, { "epoch": 0.6543925875085793, "grad_norm": 0.7505386858127459, "learning_rate": 2.8192508277196518e-06, "loss": 0.2194, "step": 19069 }, { "epoch": 0.6544269045984901, "grad_norm": 0.7795635854740917, "learning_rate": 2.81875074606758e-06, "loss": 0.3027, "step": 19070 }, { "epoch": 0.6544612216884008, "grad_norm": 0.7440767304723379, "learning_rate": 2.8182506913622977e-06, "loss": 0.2852, "step": 19071 }, { "epoch": 0.6544955387783116, "grad_norm": 0.8229307656965327, "learning_rate": 2.817750663609985e-06, "loss": 0.2771, "step": 19072 }, { "epoch": 0.6545298558682223, "grad_norm": 0.7959628827700842, "learning_rate": 2.8172506628168177e-06, "loss": 0.292, "step": 19073 }, { "epoch": 0.6545641729581332, "grad_norm": 0.9072975811302656, "learning_rate": 2.8167506889889738e-06, "loss": 0.2579, "step": 19074 }, { "epoch": 0.6545984900480439, "grad_norm": 0.6824536110321534, "learning_rate": 2.81625074213263e-06, "loss": 0.2549, "step": 19075 }, { "epoch": 0.6546328071379547, "grad_norm": 0.8021436644306125, "learning_rate": 2.8157508222539624e-06, "loss": 0.2516, "step": 19076 }, { "epoch": 0.6546671242278654, "grad_norm": 0.7151682262828795, "learning_rate": 2.815250929359149e-06, "loss": 0.2288, "step": 19077 }, { "epoch": 0.6547014413177763, "grad_norm": 0.7327493246570645, "learning_rate": 2.8147510634543584e-06, "loss": 0.2231, "step": 19078 }, { "epoch": 0.654735758407687, "grad_norm": 0.8089911384945353, "learning_rate": 2.8142512245457755e-06, "loss": 0.291, "step": 19079 }, { "epoch": 0.6547700754975978, "grad_norm": 0.8086727830905818, "learning_rate": 2.8137514126395683e-06, "loss": 0.2651, "step": 19080 }, { "epoch": 0.6548043925875086, "grad_norm": 0.7689487256259753, "learning_rate": 2.813251627741911e-06, "loss": 0.2299, "step": 19081 }, { "epoch": 0.6548387096774193, "grad_norm": 0.8386426716362464, "learning_rate": 2.8127518698589844e-06, "loss": 0.2837, "step": 19082 }, { "epoch": 0.6548730267673302, "grad_norm": 0.7693959673165833, "learning_rate": 2.8122521389969565e-06, "loss": 0.2851, "step": 19083 }, { "epoch": 0.6549073438572409, "grad_norm": 0.762919529513333, "learning_rate": 2.811752435162003e-06, "loss": 0.2756, "step": 19084 }, { "epoch": 0.6549416609471517, "grad_norm": 0.7220835234812686, "learning_rate": 2.8112527583602965e-06, "loss": 0.2666, "step": 19085 }, { "epoch": 0.6549759780370624, "grad_norm": 0.7511446469741079, "learning_rate": 2.8107531085980103e-06, "loss": 0.2516, "step": 19086 }, { "epoch": 0.6550102951269733, "grad_norm": 0.7248023956203339, "learning_rate": 2.8102534858813176e-06, "loss": 0.3025, "step": 19087 }, { "epoch": 0.655044612216884, "grad_norm": 0.7522897391128807, "learning_rate": 2.8097538902163894e-06, "loss": 0.2884, "step": 19088 }, { "epoch": 0.6550789293067948, "grad_norm": 0.9564202947781246, "learning_rate": 2.809254321609399e-06, "loss": 0.2423, "step": 19089 }, { "epoch": 0.6551132463967055, "grad_norm": 0.7992446310627671, "learning_rate": 2.8087547800665166e-06, "loss": 0.2755, "step": 19090 }, { "epoch": 0.6551475634866163, "grad_norm": 0.8033437803058024, "learning_rate": 2.8082552655939167e-06, "loss": 0.2581, "step": 19091 }, { "epoch": 0.6551818805765272, "grad_norm": 1.1836328825550175, "learning_rate": 2.807755778197763e-06, "loss": 0.2998, "step": 19092 }, { "epoch": 0.6552161976664379, "grad_norm": 0.8167545134667006, "learning_rate": 2.8072563178842355e-06, "loss": 0.2651, "step": 19093 }, { "epoch": 0.6552505147563487, "grad_norm": 0.8335467365913822, "learning_rate": 2.806756884659497e-06, "loss": 0.2792, "step": 19094 }, { "epoch": 0.6552848318462594, "grad_norm": 0.7012747323866445, "learning_rate": 2.8062574785297207e-06, "loss": 0.2196, "step": 19095 }, { "epoch": 0.6553191489361702, "grad_norm": 0.7325400600682055, "learning_rate": 2.805758099501076e-06, "loss": 0.2502, "step": 19096 }, { "epoch": 0.655353466026081, "grad_norm": 0.7980589482549657, "learning_rate": 2.805258747579731e-06, "loss": 0.2877, "step": 19097 }, { "epoch": 0.6553877831159918, "grad_norm": 0.8019837623632248, "learning_rate": 2.8047594227718556e-06, "loss": 0.2665, "step": 19098 }, { "epoch": 0.6554221002059025, "grad_norm": 0.8573382104827535, "learning_rate": 2.8042601250836175e-06, "loss": 0.2766, "step": 19099 }, { "epoch": 0.6554564172958133, "grad_norm": 0.6885036628027982, "learning_rate": 2.803760854521188e-06, "loss": 0.2637, "step": 19100 }, { "epoch": 0.6554907343857241, "grad_norm": 0.8146557553106761, "learning_rate": 2.803261611090729e-06, "loss": 0.2996, "step": 19101 }, { "epoch": 0.6555250514756349, "grad_norm": 0.8056932249832087, "learning_rate": 2.802762394798415e-06, "loss": 0.3046, "step": 19102 }, { "epoch": 0.6555593685655456, "grad_norm": 0.7430054794438213, "learning_rate": 2.8022632056504083e-06, "loss": 0.3497, "step": 19103 }, { "epoch": 0.6555936856554564, "grad_norm": 0.7340763217807085, "learning_rate": 2.8017640436528753e-06, "loss": 0.2702, "step": 19104 }, { "epoch": 0.6556280027453671, "grad_norm": 0.697371613838242, "learning_rate": 2.801264908811989e-06, "loss": 0.3132, "step": 19105 }, { "epoch": 0.655662319835278, "grad_norm": 0.7878525343397487, "learning_rate": 2.800765801133909e-06, "loss": 0.2746, "step": 19106 }, { "epoch": 0.6556966369251888, "grad_norm": 0.8924371579285795, "learning_rate": 2.800266720624803e-06, "loss": 0.298, "step": 19107 }, { "epoch": 0.6557309540150995, "grad_norm": 0.7330831238214588, "learning_rate": 2.7997676672908376e-06, "loss": 0.2302, "step": 19108 }, { "epoch": 0.6557652711050103, "grad_norm": 0.7129464013652871, "learning_rate": 2.7992686411381774e-06, "loss": 0.2532, "step": 19109 }, { "epoch": 0.6557995881949211, "grad_norm": 0.8634538444456148, "learning_rate": 2.798769642172987e-06, "loss": 0.2893, "step": 19110 }, { "epoch": 0.6558339052848319, "grad_norm": 0.7989379366327916, "learning_rate": 2.798270670401432e-06, "loss": 0.2906, "step": 19111 }, { "epoch": 0.6558682223747426, "grad_norm": 0.758934053987319, "learning_rate": 2.7977717258296768e-06, "loss": 0.2955, "step": 19112 }, { "epoch": 0.6559025394646534, "grad_norm": 0.8209641488593438, "learning_rate": 2.7972728084638813e-06, "loss": 0.302, "step": 19113 }, { "epoch": 0.6559368565545641, "grad_norm": 0.8042186246841617, "learning_rate": 2.7967739183102153e-06, "loss": 0.2647, "step": 19114 }, { "epoch": 0.655971173644475, "grad_norm": 0.7680935159268533, "learning_rate": 2.7962750553748375e-06, "loss": 0.2767, "step": 19115 }, { "epoch": 0.6560054907343857, "grad_norm": 0.903442963088288, "learning_rate": 2.7957762196639114e-06, "loss": 0.2736, "step": 19116 }, { "epoch": 0.6560398078242965, "grad_norm": 0.8389539332448166, "learning_rate": 2.7952774111836012e-06, "loss": 0.2313, "step": 19117 }, { "epoch": 0.6560741249142072, "grad_norm": 0.6670656845754697, "learning_rate": 2.7947786299400674e-06, "loss": 0.2608, "step": 19118 }, { "epoch": 0.656108442004118, "grad_norm": 0.7950965355375457, "learning_rate": 2.794279875939473e-06, "loss": 0.2859, "step": 19119 }, { "epoch": 0.6561427590940289, "grad_norm": 0.7136289180698493, "learning_rate": 2.793781149187979e-06, "loss": 0.2421, "step": 19120 }, { "epoch": 0.6561770761839396, "grad_norm": 0.7752441257094941, "learning_rate": 2.7932824496917466e-06, "loss": 0.2428, "step": 19121 }, { "epoch": 0.6562113932738504, "grad_norm": 0.7813007411526086, "learning_rate": 2.7927837774569367e-06, "loss": 0.2555, "step": 19122 }, { "epoch": 0.6562457103637611, "grad_norm": 0.8923485939141482, "learning_rate": 2.7922851324897123e-06, "loss": 0.3408, "step": 19123 }, { "epoch": 0.656280027453672, "grad_norm": 0.789083675289884, "learning_rate": 2.7917865147962263e-06, "loss": 0.2507, "step": 19124 }, { "epoch": 0.6563143445435827, "grad_norm": 0.7859929633203216, "learning_rate": 2.7912879243826464e-06, "loss": 0.2436, "step": 19125 }, { "epoch": 0.6563486616334935, "grad_norm": 0.7190393644005523, "learning_rate": 2.790789361255131e-06, "loss": 0.213, "step": 19126 }, { "epoch": 0.6563829787234042, "grad_norm": 0.8236396795271107, "learning_rate": 2.7902908254198326e-06, "loss": 0.2912, "step": 19127 }, { "epoch": 0.656417295813315, "grad_norm": 0.8112366925471448, "learning_rate": 2.789792316882919e-06, "loss": 0.242, "step": 19128 }, { "epoch": 0.6564516129032258, "grad_norm": 0.9918903997163007, "learning_rate": 2.789293835650543e-06, "loss": 0.245, "step": 19129 }, { "epoch": 0.6564859299931366, "grad_norm": 0.7578049691937668, "learning_rate": 2.7887953817288633e-06, "loss": 0.2594, "step": 19130 }, { "epoch": 0.6565202470830473, "grad_norm": 0.7989064613062125, "learning_rate": 2.788296955124039e-06, "loss": 0.3268, "step": 19131 }, { "epoch": 0.6565545641729581, "grad_norm": 0.7439781598766589, "learning_rate": 2.787798555842227e-06, "loss": 0.3023, "step": 19132 }, { "epoch": 0.656588881262869, "grad_norm": 0.7656597215650538, "learning_rate": 2.7873001838895852e-06, "loss": 0.2871, "step": 19133 }, { "epoch": 0.6566231983527797, "grad_norm": 0.7698027906724934, "learning_rate": 2.78680183927227e-06, "loss": 0.3312, "step": 19134 }, { "epoch": 0.6566575154426905, "grad_norm": 0.949681969930683, "learning_rate": 2.7863035219964397e-06, "loss": 0.2459, "step": 19135 }, { "epoch": 0.6566918325326012, "grad_norm": 0.743246838538052, "learning_rate": 2.7858052320682437e-06, "loss": 0.2346, "step": 19136 }, { "epoch": 0.656726149622512, "grad_norm": 0.8484535068599278, "learning_rate": 2.7853069694938473e-06, "loss": 0.2598, "step": 19137 }, { "epoch": 0.6567604667124228, "grad_norm": 0.7722522860547821, "learning_rate": 2.7848087342793995e-06, "loss": 0.3245, "step": 19138 }, { "epoch": 0.6567947838023336, "grad_norm": 0.8353244515603191, "learning_rate": 2.784310526431057e-06, "loss": 0.2812, "step": 19139 }, { "epoch": 0.6568291008922443, "grad_norm": 0.8338556951068045, "learning_rate": 2.783812345954974e-06, "loss": 0.2818, "step": 19140 }, { "epoch": 0.6568634179821551, "grad_norm": 0.7338411537654264, "learning_rate": 2.7833141928573053e-06, "loss": 0.275, "step": 19141 }, { "epoch": 0.6568977350720658, "grad_norm": 0.7813653623007203, "learning_rate": 2.7828160671442063e-06, "loss": 0.2829, "step": 19142 }, { "epoch": 0.6569320521619767, "grad_norm": 0.8009629189430115, "learning_rate": 2.7823179688218295e-06, "loss": 0.2592, "step": 19143 }, { "epoch": 0.6569663692518874, "grad_norm": 0.7802417390723917, "learning_rate": 2.7818198978963305e-06, "loss": 0.3057, "step": 19144 }, { "epoch": 0.6570006863417982, "grad_norm": 0.7787394984969939, "learning_rate": 2.781321854373857e-06, "loss": 0.259, "step": 19145 }, { "epoch": 0.657035003431709, "grad_norm": 0.7877814220230765, "learning_rate": 2.780823838260569e-06, "loss": 0.2714, "step": 19146 }, { "epoch": 0.6570693205216198, "grad_norm": 0.7129967557615585, "learning_rate": 2.7803258495626128e-06, "loss": 0.2891, "step": 19147 }, { "epoch": 0.6571036376115306, "grad_norm": 1.1671569170775222, "learning_rate": 2.779827888286141e-06, "loss": 0.2893, "step": 19148 }, { "epoch": 0.6571379547014413, "grad_norm": 0.7825682551825751, "learning_rate": 2.779329954437311e-06, "loss": 0.2735, "step": 19149 }, { "epoch": 0.6571722717913521, "grad_norm": 0.7293710626304944, "learning_rate": 2.778832048022267e-06, "loss": 0.26, "step": 19150 }, { "epoch": 0.6572065888812628, "grad_norm": 0.7978377767637229, "learning_rate": 2.778334169047167e-06, "loss": 0.2623, "step": 19151 }, { "epoch": 0.6572409059711737, "grad_norm": 0.8180931731204892, "learning_rate": 2.7778363175181566e-06, "loss": 0.2694, "step": 19152 }, { "epoch": 0.6572752230610844, "grad_norm": 0.7187186667435286, "learning_rate": 2.777338493441387e-06, "loss": 0.2649, "step": 19153 }, { "epoch": 0.6573095401509952, "grad_norm": 0.793289481968349, "learning_rate": 2.776840696823009e-06, "loss": 0.2998, "step": 19154 }, { "epoch": 0.6573438572409059, "grad_norm": 0.6850253907823044, "learning_rate": 2.776342927669173e-06, "loss": 0.274, "step": 19155 }, { "epoch": 0.6573781743308168, "grad_norm": 0.8115028694833791, "learning_rate": 2.775845185986027e-06, "loss": 0.26, "step": 19156 }, { "epoch": 0.6574124914207276, "grad_norm": 0.7719750129945794, "learning_rate": 2.7753474717797204e-06, "loss": 0.2893, "step": 19157 }, { "epoch": 0.6574468085106383, "grad_norm": 0.7153327523332043, "learning_rate": 2.7748497850564045e-06, "loss": 0.2084, "step": 19158 }, { "epoch": 0.657481125600549, "grad_norm": 0.7272802374312126, "learning_rate": 2.774352125822221e-06, "loss": 0.2741, "step": 19159 }, { "epoch": 0.6575154426904598, "grad_norm": 0.77663360928967, "learning_rate": 2.773854494083326e-06, "loss": 0.2307, "step": 19160 }, { "epoch": 0.6575497597803707, "grad_norm": 0.933148490675732, "learning_rate": 2.7733568898458617e-06, "loss": 0.2591, "step": 19161 }, { "epoch": 0.6575840768702814, "grad_norm": 0.8248834568771203, "learning_rate": 2.772859313115977e-06, "loss": 0.2649, "step": 19162 }, { "epoch": 0.6576183939601922, "grad_norm": 0.7751583561883815, "learning_rate": 2.772361763899819e-06, "loss": 0.2507, "step": 19163 }, { "epoch": 0.6576527110501029, "grad_norm": 0.7808885649067432, "learning_rate": 2.7718642422035337e-06, "loss": 0.2628, "step": 19164 }, { "epoch": 0.6576870281400137, "grad_norm": 0.9129697449258523, "learning_rate": 2.7713667480332684e-06, "loss": 0.234, "step": 19165 }, { "epoch": 0.6577213452299245, "grad_norm": 0.7245609090897506, "learning_rate": 2.7708692813951683e-06, "loss": 0.2299, "step": 19166 }, { "epoch": 0.6577556623198353, "grad_norm": 0.7386220342402727, "learning_rate": 2.770371842295382e-06, "loss": 0.265, "step": 19167 }, { "epoch": 0.657789979409746, "grad_norm": 0.8469551719245484, "learning_rate": 2.7698744307400473e-06, "loss": 0.2853, "step": 19168 }, { "epoch": 0.6578242964996568, "grad_norm": 0.7594240372940694, "learning_rate": 2.7693770467353153e-06, "loss": 0.2523, "step": 19169 }, { "epoch": 0.6578586135895677, "grad_norm": 0.7499081458288147, "learning_rate": 2.7688796902873327e-06, "loss": 0.3085, "step": 19170 }, { "epoch": 0.6578929306794784, "grad_norm": 0.7536174610466145, "learning_rate": 2.7683823614022355e-06, "loss": 0.2933, "step": 19171 }, { "epoch": 0.6579272477693892, "grad_norm": 0.8675464474471911, "learning_rate": 2.767885060086177e-06, "loss": 0.2894, "step": 19172 }, { "epoch": 0.6579615648592999, "grad_norm": 0.7318566580709212, "learning_rate": 2.7673877863452937e-06, "loss": 0.2741, "step": 19173 }, { "epoch": 0.6579958819492107, "grad_norm": 0.8889742615165336, "learning_rate": 2.766890540185731e-06, "loss": 0.2567, "step": 19174 }, { "epoch": 0.6580301990391215, "grad_norm": 0.6777661269540375, "learning_rate": 2.766393321613633e-06, "loss": 0.2399, "step": 19175 }, { "epoch": 0.6580645161290323, "grad_norm": 0.8614997623995243, "learning_rate": 2.7658961306351407e-06, "loss": 0.2493, "step": 19176 }, { "epoch": 0.658098833218943, "grad_norm": 0.7440949872268118, "learning_rate": 2.7653989672563973e-06, "loss": 0.2392, "step": 19177 }, { "epoch": 0.6581331503088538, "grad_norm": 0.7291618308359764, "learning_rate": 2.764901831483544e-06, "loss": 0.2595, "step": 19178 }, { "epoch": 0.6581674673987646, "grad_norm": 0.7258150638604713, "learning_rate": 2.764404723322725e-06, "loss": 0.2746, "step": 19179 }, { "epoch": 0.6582017844886754, "grad_norm": 0.8549589374471073, "learning_rate": 2.763907642780075e-06, "loss": 0.3556, "step": 19180 }, { "epoch": 0.6582361015785861, "grad_norm": 0.8171440598702865, "learning_rate": 2.7634105898617436e-06, "loss": 0.2304, "step": 19181 }, { "epoch": 0.6582704186684969, "grad_norm": 0.8287043038529854, "learning_rate": 2.7629135645738626e-06, "loss": 0.2871, "step": 19182 }, { "epoch": 0.6583047357584076, "grad_norm": 0.8652333904998336, "learning_rate": 2.76241656692258e-06, "loss": 0.2519, "step": 19183 }, { "epoch": 0.6583390528483185, "grad_norm": 0.6832967757401468, "learning_rate": 2.76191959691403e-06, "loss": 0.2376, "step": 19184 }, { "epoch": 0.6583733699382293, "grad_norm": 0.833462050613462, "learning_rate": 2.7614226545543536e-06, "loss": 0.2391, "step": 19185 }, { "epoch": 0.65840768702814, "grad_norm": 0.7780530576613249, "learning_rate": 2.760925739849691e-06, "loss": 0.2905, "step": 19186 }, { "epoch": 0.6584420041180508, "grad_norm": 0.9151595148272204, "learning_rate": 2.76042885280618e-06, "loss": 0.2738, "step": 19187 }, { "epoch": 0.6584763212079615, "grad_norm": 0.7104727790175118, "learning_rate": 2.7599319934299594e-06, "loss": 0.2396, "step": 19188 }, { "epoch": 0.6585106382978724, "grad_norm": 0.70897578001399, "learning_rate": 2.7594351617271682e-06, "loss": 0.2558, "step": 19189 }, { "epoch": 0.6585449553877831, "grad_norm": 0.7067336710020484, "learning_rate": 2.758938357703944e-06, "loss": 0.2735, "step": 19190 }, { "epoch": 0.6585792724776939, "grad_norm": 0.7885742128762185, "learning_rate": 2.758441581366421e-06, "loss": 0.273, "step": 19191 }, { "epoch": 0.6586135895676046, "grad_norm": 0.723739549455991, "learning_rate": 2.7579448327207402e-06, "loss": 0.2297, "step": 19192 }, { "epoch": 0.6586479066575155, "grad_norm": 0.7819278287610382, "learning_rate": 2.757448111773039e-06, "loss": 0.3048, "step": 19193 }, { "epoch": 0.6586822237474262, "grad_norm": 0.8138170251058374, "learning_rate": 2.756951418529448e-06, "loss": 0.2592, "step": 19194 }, { "epoch": 0.658716540837337, "grad_norm": 0.7895876493355275, "learning_rate": 2.7564547529961107e-06, "loss": 0.3021, "step": 19195 }, { "epoch": 0.6587508579272477, "grad_norm": 0.8294293370675792, "learning_rate": 2.755958115179157e-06, "loss": 0.3018, "step": 19196 }, { "epoch": 0.6587851750171585, "grad_norm": 0.8575664989361559, "learning_rate": 2.755461505084725e-06, "loss": 0.2689, "step": 19197 }, { "epoch": 0.6588194921070694, "grad_norm": 0.7968366442381262, "learning_rate": 2.754964922718949e-06, "loss": 0.287, "step": 19198 }, { "epoch": 0.6588538091969801, "grad_norm": 0.7419870726382009, "learning_rate": 2.7544683680879636e-06, "loss": 0.2863, "step": 19199 }, { "epoch": 0.6588881262868909, "grad_norm": 0.7522830022440196, "learning_rate": 2.753971841197904e-06, "loss": 0.3022, "step": 19200 }, { "epoch": 0.6589224433768016, "grad_norm": 0.7306761029029928, "learning_rate": 2.753475342054904e-06, "loss": 0.26, "step": 19201 }, { "epoch": 0.6589567604667125, "grad_norm": 1.3862243947126627, "learning_rate": 2.7529788706650983e-06, "loss": 0.269, "step": 19202 }, { "epoch": 0.6589910775566232, "grad_norm": 0.8401854977526306, "learning_rate": 2.752482427034615e-06, "loss": 0.2655, "step": 19203 }, { "epoch": 0.659025394646534, "grad_norm": 1.096966440147193, "learning_rate": 2.751986011169595e-06, "loss": 0.2419, "step": 19204 }, { "epoch": 0.6590597117364447, "grad_norm": 0.7346244579331482, "learning_rate": 2.751489623076165e-06, "loss": 0.2624, "step": 19205 }, { "epoch": 0.6590940288263555, "grad_norm": 0.8782186093716275, "learning_rate": 2.7509932627604597e-06, "loss": 0.2635, "step": 19206 }, { "epoch": 0.6591283459162663, "grad_norm": 0.7924876678831244, "learning_rate": 2.75049693022861e-06, "loss": 0.3068, "step": 19207 }, { "epoch": 0.6591626630061771, "grad_norm": 0.7057081496244815, "learning_rate": 2.750000625486748e-06, "loss": 0.3042, "step": 19208 }, { "epoch": 0.6591969800960878, "grad_norm": 0.7835401655512491, "learning_rate": 2.749504348541005e-06, "loss": 0.2754, "step": 19209 }, { "epoch": 0.6592312971859986, "grad_norm": 0.7478244923297321, "learning_rate": 2.7490080993975123e-06, "loss": 0.2367, "step": 19210 }, { "epoch": 0.6592656142759094, "grad_norm": 0.7134996907491403, "learning_rate": 2.7485118780624015e-06, "loss": 0.2523, "step": 19211 }, { "epoch": 0.6592999313658202, "grad_norm": 0.736066941781018, "learning_rate": 2.748015684541798e-06, "loss": 0.3024, "step": 19212 }, { "epoch": 0.659334248455731, "grad_norm": 0.7981139741633816, "learning_rate": 2.747519518841839e-06, "loss": 0.2831, "step": 19213 }, { "epoch": 0.6593685655456417, "grad_norm": 0.7221265337419055, "learning_rate": 2.7470233809686464e-06, "loss": 0.2313, "step": 19214 }, { "epoch": 0.6594028826355525, "grad_norm": 0.8381239424822107, "learning_rate": 2.746527270928355e-06, "loss": 0.2239, "step": 19215 }, { "epoch": 0.6594371997254633, "grad_norm": 0.8063018126236418, "learning_rate": 2.746031188727094e-06, "loss": 0.2607, "step": 19216 }, { "epoch": 0.6594715168153741, "grad_norm": 0.7558098993291087, "learning_rate": 2.7455351343709856e-06, "loss": 0.2905, "step": 19217 }, { "epoch": 0.6595058339052848, "grad_norm": 0.7651925236733625, "learning_rate": 2.745039107866166e-06, "loss": 0.2516, "step": 19218 }, { "epoch": 0.6595401509951956, "grad_norm": 0.7846913005510403, "learning_rate": 2.7445431092187574e-06, "loss": 0.2885, "step": 19219 }, { "epoch": 0.6595744680851063, "grad_norm": 0.9290221068397632, "learning_rate": 2.744047138434889e-06, "loss": 0.2606, "step": 19220 }, { "epoch": 0.6596087851750172, "grad_norm": 0.7272211196860954, "learning_rate": 2.7435511955206885e-06, "loss": 0.2799, "step": 19221 }, { "epoch": 0.659643102264928, "grad_norm": 0.8037657982785353, "learning_rate": 2.743055280482282e-06, "loss": 0.2836, "step": 19222 }, { "epoch": 0.6596774193548387, "grad_norm": 0.7511826958572776, "learning_rate": 2.7425593933257953e-06, "loss": 0.2367, "step": 19223 }, { "epoch": 0.6597117364447495, "grad_norm": 0.755237829167029, "learning_rate": 2.7420635340573565e-06, "loss": 0.2445, "step": 19224 }, { "epoch": 0.6597460535346603, "grad_norm": 0.7583639116057511, "learning_rate": 2.7415677026830923e-06, "loss": 0.2699, "step": 19225 }, { "epoch": 0.6597803706245711, "grad_norm": 0.7600122414270825, "learning_rate": 2.741071899209121e-06, "loss": 0.2545, "step": 19226 }, { "epoch": 0.6598146877144818, "grad_norm": 0.7213967424325574, "learning_rate": 2.7405761236415774e-06, "loss": 0.2685, "step": 19227 }, { "epoch": 0.6598490048043926, "grad_norm": 0.7946228171738507, "learning_rate": 2.74008037598658e-06, "loss": 0.2588, "step": 19228 }, { "epoch": 0.6598833218943033, "grad_norm": 0.8446065062980073, "learning_rate": 2.7395846562502536e-06, "loss": 0.3201, "step": 19229 }, { "epoch": 0.6599176389842142, "grad_norm": 0.754177878144139, "learning_rate": 2.739088964438724e-06, "loss": 0.2386, "step": 19230 }, { "epoch": 0.6599519560741249, "grad_norm": 0.7348773497030037, "learning_rate": 2.7385933005581144e-06, "loss": 0.3006, "step": 19231 }, { "epoch": 0.6599862731640357, "grad_norm": 0.7570785848601731, "learning_rate": 2.7380976646145483e-06, "loss": 0.2473, "step": 19232 }, { "epoch": 0.6600205902539464, "grad_norm": 0.8016562321000221, "learning_rate": 2.737602056614148e-06, "loss": 0.2707, "step": 19233 }, { "epoch": 0.6600549073438572, "grad_norm": 0.8020357784938783, "learning_rate": 2.7371064765630383e-06, "loss": 0.2827, "step": 19234 }, { "epoch": 0.660089224433768, "grad_norm": 0.6675247050999428, "learning_rate": 2.7366109244673365e-06, "loss": 0.2768, "step": 19235 }, { "epoch": 0.6601235415236788, "grad_norm": 0.8222652245343277, "learning_rate": 2.7361154003331693e-06, "loss": 0.2889, "step": 19236 }, { "epoch": 0.6601578586135896, "grad_norm": 0.724033100656197, "learning_rate": 2.735619904166659e-06, "loss": 0.3216, "step": 19237 }, { "epoch": 0.6601921757035003, "grad_norm": 0.8234838615962914, "learning_rate": 2.7351244359739215e-06, "loss": 0.3225, "step": 19238 }, { "epoch": 0.6602264927934112, "grad_norm": 0.7605545570185892, "learning_rate": 2.734628995761084e-06, "loss": 0.2864, "step": 19239 }, { "epoch": 0.6602608098833219, "grad_norm": 0.787179399679004, "learning_rate": 2.7341335835342624e-06, "loss": 0.3007, "step": 19240 }, { "epoch": 0.6602951269732327, "grad_norm": 0.8043510085857181, "learning_rate": 2.7336381992995788e-06, "loss": 0.2301, "step": 19241 }, { "epoch": 0.6603294440631434, "grad_norm": 0.82379139074868, "learning_rate": 2.733142843063153e-06, "loss": 0.2742, "step": 19242 }, { "epoch": 0.6603637611530542, "grad_norm": 0.8722036936870805, "learning_rate": 2.732647514831104e-06, "loss": 0.2715, "step": 19243 }, { "epoch": 0.660398078242965, "grad_norm": 0.7730282588565042, "learning_rate": 2.732152214609552e-06, "loss": 0.2451, "step": 19244 }, { "epoch": 0.6604323953328758, "grad_norm": 0.848495775966452, "learning_rate": 2.7316569424046153e-06, "loss": 0.2819, "step": 19245 }, { "epoch": 0.6604667124227865, "grad_norm": 0.7769200425696327, "learning_rate": 2.731161698222412e-06, "loss": 0.2755, "step": 19246 }, { "epoch": 0.6605010295126973, "grad_norm": 0.8178176316894439, "learning_rate": 2.730666482069061e-06, "loss": 0.3157, "step": 19247 }, { "epoch": 0.6605353466026082, "grad_norm": 0.7229871362971261, "learning_rate": 2.730171293950682e-06, "loss": 0.247, "step": 19248 }, { "epoch": 0.6605696636925189, "grad_norm": 0.7491404785206546, "learning_rate": 2.729676133873386e-06, "loss": 0.2928, "step": 19249 }, { "epoch": 0.6606039807824297, "grad_norm": 1.3118688937335936, "learning_rate": 2.7291810018432984e-06, "loss": 0.2501, "step": 19250 }, { "epoch": 0.6606382978723404, "grad_norm": 0.8451006867514069, "learning_rate": 2.72868589786653e-06, "loss": 0.3176, "step": 19251 }, { "epoch": 0.6606726149622512, "grad_norm": 0.7580920179941636, "learning_rate": 2.7281908219492e-06, "loss": 0.2168, "step": 19252 }, { "epoch": 0.660706932052162, "grad_norm": 0.7767267965214416, "learning_rate": 2.727695774097423e-06, "loss": 0.3304, "step": 19253 }, { "epoch": 0.6607412491420728, "grad_norm": 0.8835867483824378, "learning_rate": 2.727200754317315e-06, "loss": 0.2613, "step": 19254 }, { "epoch": 0.6607755662319835, "grad_norm": 0.7573435633271545, "learning_rate": 2.726705762614993e-06, "loss": 0.2434, "step": 19255 }, { "epoch": 0.6608098833218943, "grad_norm": 1.0694359851849995, "learning_rate": 2.72621079899657e-06, "loss": 0.2076, "step": 19256 }, { "epoch": 0.660844200411805, "grad_norm": 0.846290378080682, "learning_rate": 2.725715863468164e-06, "loss": 0.2504, "step": 19257 }, { "epoch": 0.6608785175017159, "grad_norm": 0.7720085692939189, "learning_rate": 2.7252209560358832e-06, "loss": 0.2612, "step": 19258 }, { "epoch": 0.6609128345916266, "grad_norm": 0.7596074446557555, "learning_rate": 2.724726076705847e-06, "loss": 0.266, "step": 19259 }, { "epoch": 0.6609471516815374, "grad_norm": 0.7461242465283596, "learning_rate": 2.7242312254841697e-06, "loss": 0.2835, "step": 19260 }, { "epoch": 0.6609814687714481, "grad_norm": 0.7847777603954612, "learning_rate": 2.7237364023769576e-06, "loss": 0.3138, "step": 19261 }, { "epoch": 0.661015785861359, "grad_norm": 0.7901147135428657, "learning_rate": 2.723241607390333e-06, "loss": 0.2694, "step": 19262 }, { "epoch": 0.6610501029512698, "grad_norm": 0.7999371580820515, "learning_rate": 2.7227468405304013e-06, "loss": 0.3125, "step": 19263 }, { "epoch": 0.6610844200411805, "grad_norm": 0.9000668546526891, "learning_rate": 2.7222521018032777e-06, "loss": 0.3218, "step": 19264 }, { "epoch": 0.6611187371310913, "grad_norm": 0.7212736620287792, "learning_rate": 2.721757391215074e-06, "loss": 0.2835, "step": 19265 }, { "epoch": 0.661153054221002, "grad_norm": 0.7151891970745169, "learning_rate": 2.721262708771902e-06, "loss": 0.2442, "step": 19266 }, { "epoch": 0.6611873713109129, "grad_norm": 0.6552789507343686, "learning_rate": 2.7207680544798715e-06, "loss": 0.2445, "step": 19267 }, { "epoch": 0.6612216884008236, "grad_norm": 0.6866281089011868, "learning_rate": 2.720273428345095e-06, "loss": 0.2666, "step": 19268 }, { "epoch": 0.6612560054907344, "grad_norm": 0.7534577802063103, "learning_rate": 2.7197788303736837e-06, "loss": 0.2558, "step": 19269 }, { "epoch": 0.6612903225806451, "grad_norm": 0.9326021943972586, "learning_rate": 2.7192842605717435e-06, "loss": 0.2747, "step": 19270 }, { "epoch": 0.661324639670556, "grad_norm": 0.6986550540487726, "learning_rate": 2.7187897189453903e-06, "loss": 0.2781, "step": 19271 }, { "epoch": 0.6613589567604667, "grad_norm": 0.7565197089043407, "learning_rate": 2.7182952055007288e-06, "loss": 0.2624, "step": 19272 }, { "epoch": 0.6613932738503775, "grad_norm": 0.9044040598484284, "learning_rate": 2.71780072024387e-06, "loss": 0.2576, "step": 19273 }, { "epoch": 0.6614275909402882, "grad_norm": 0.7735082035583188, "learning_rate": 2.717306263180922e-06, "loss": 0.2181, "step": 19274 }, { "epoch": 0.661461908030199, "grad_norm": 0.9237304398005532, "learning_rate": 2.7168118343179926e-06, "loss": 0.2298, "step": 19275 }, { "epoch": 0.6614962251201099, "grad_norm": 0.9045891299599932, "learning_rate": 2.716317433661194e-06, "loss": 0.2321, "step": 19276 }, { "epoch": 0.6615305422100206, "grad_norm": 0.7793803544216098, "learning_rate": 2.7158230612166304e-06, "loss": 0.3633, "step": 19277 }, { "epoch": 0.6615648592999314, "grad_norm": 0.8052292117902056, "learning_rate": 2.7153287169904086e-06, "loss": 0.2589, "step": 19278 }, { "epoch": 0.6615991763898421, "grad_norm": 0.7410689369175725, "learning_rate": 2.7148344009886375e-06, "loss": 0.2369, "step": 19279 }, { "epoch": 0.6616334934797529, "grad_norm": 0.7184774188328128, "learning_rate": 2.714340113217423e-06, "loss": 0.2822, "step": 19280 }, { "epoch": 0.6616678105696637, "grad_norm": 0.8537154426748879, "learning_rate": 2.713845853682871e-06, "loss": 0.2779, "step": 19281 }, { "epoch": 0.6617021276595745, "grad_norm": 0.7844427496703252, "learning_rate": 2.7133516223910884e-06, "loss": 0.288, "step": 19282 }, { "epoch": 0.6617364447494852, "grad_norm": 0.7866012124560372, "learning_rate": 2.7128574193481828e-06, "loss": 0.2578, "step": 19283 }, { "epoch": 0.661770761839396, "grad_norm": 0.7381875607477172, "learning_rate": 2.712363244560253e-06, "loss": 0.2495, "step": 19284 }, { "epoch": 0.6618050789293068, "grad_norm": 0.7217747127115263, "learning_rate": 2.7118690980334122e-06, "loss": 0.3062, "step": 19285 }, { "epoch": 0.6618393960192176, "grad_norm": 0.8004303383859563, "learning_rate": 2.7113749797737586e-06, "loss": 0.2954, "step": 19286 }, { "epoch": 0.6618737131091283, "grad_norm": 0.8739546808152521, "learning_rate": 2.7108808897873995e-06, "loss": 0.2689, "step": 19287 }, { "epoch": 0.6619080301990391, "grad_norm": 0.677609585123055, "learning_rate": 2.710386828080438e-06, "loss": 0.2464, "step": 19288 }, { "epoch": 0.6619423472889499, "grad_norm": 0.8110301908438308, "learning_rate": 2.709892794658978e-06, "loss": 0.2878, "step": 19289 }, { "epoch": 0.6619766643788607, "grad_norm": 0.8045819739963082, "learning_rate": 2.709398789529122e-06, "loss": 0.2773, "step": 19290 }, { "epoch": 0.6620109814687715, "grad_norm": 0.7685230691214877, "learning_rate": 2.708904812696974e-06, "loss": 0.2529, "step": 19291 }, { "epoch": 0.6620452985586822, "grad_norm": 0.7267579399370488, "learning_rate": 2.7084108641686373e-06, "loss": 0.2779, "step": 19292 }, { "epoch": 0.662079615648593, "grad_norm": 0.8903958366029515, "learning_rate": 2.707916943950209e-06, "loss": 0.251, "step": 19293 }, { "epoch": 0.6621139327385038, "grad_norm": 0.7902847602201494, "learning_rate": 2.7074230520477984e-06, "loss": 0.259, "step": 19294 }, { "epoch": 0.6621482498284146, "grad_norm": 0.7159738926757078, "learning_rate": 2.706929188467501e-06, "loss": 0.251, "step": 19295 }, { "epoch": 0.6621825669183253, "grad_norm": 0.8039818356832524, "learning_rate": 2.70643535321542e-06, "loss": 0.2742, "step": 19296 }, { "epoch": 0.6622168840082361, "grad_norm": 0.7722902599846957, "learning_rate": 2.705941546297656e-06, "loss": 0.2249, "step": 19297 }, { "epoch": 0.6622512010981468, "grad_norm": 0.7519178540589044, "learning_rate": 2.7054477677203106e-06, "loss": 0.2357, "step": 19298 }, { "epoch": 0.6622855181880577, "grad_norm": 0.8036141766163218, "learning_rate": 2.7049540174894816e-06, "loss": 0.3087, "step": 19299 }, { "epoch": 0.6623198352779684, "grad_norm": 0.7452474627487472, "learning_rate": 2.704460295611271e-06, "loss": 0.2798, "step": 19300 }, { "epoch": 0.6623541523678792, "grad_norm": 0.7991942682746044, "learning_rate": 2.703966602091778e-06, "loss": 0.2669, "step": 19301 }, { "epoch": 0.66238846945779, "grad_norm": 0.7556099792586811, "learning_rate": 2.7034729369370977e-06, "loss": 0.2822, "step": 19302 }, { "epoch": 0.6624227865477007, "grad_norm": 0.768321336737902, "learning_rate": 2.7029793001533334e-06, "loss": 0.273, "step": 19303 }, { "epoch": 0.6624571036376116, "grad_norm": 0.8527376480602584, "learning_rate": 2.7024856917465836e-06, "loss": 0.2331, "step": 19304 }, { "epoch": 0.6624914207275223, "grad_norm": 0.9189574464694381, "learning_rate": 2.701992111722941e-06, "loss": 0.3254, "step": 19305 }, { "epoch": 0.6625257378174331, "grad_norm": 0.7844437334489165, "learning_rate": 2.7014985600885096e-06, "loss": 0.2222, "step": 19306 }, { "epoch": 0.6625600549073438, "grad_norm": 0.8727478404789967, "learning_rate": 2.7010050368493806e-06, "loss": 0.3114, "step": 19307 }, { "epoch": 0.6625943719972547, "grad_norm": 0.8067755140836369, "learning_rate": 2.7005115420116577e-06, "loss": 0.2736, "step": 19308 }, { "epoch": 0.6626286890871654, "grad_norm": 0.9094726404961531, "learning_rate": 2.7000180755814322e-06, "loss": 0.3193, "step": 19309 }, { "epoch": 0.6626630061770762, "grad_norm": 0.7859511161334628, "learning_rate": 2.6995246375648008e-06, "loss": 0.2672, "step": 19310 }, { "epoch": 0.6626973232669869, "grad_norm": 0.7467074585469401, "learning_rate": 2.699031227967861e-06, "loss": 0.2472, "step": 19311 }, { "epoch": 0.6627316403568977, "grad_norm": 0.778472112404814, "learning_rate": 2.698537846796708e-06, "loss": 0.2494, "step": 19312 }, { "epoch": 0.6627659574468086, "grad_norm": 0.7887896243159688, "learning_rate": 2.698044494057436e-06, "loss": 0.2683, "step": 19313 }, { "epoch": 0.6628002745367193, "grad_norm": 0.7538725436356256, "learning_rate": 2.6975511697561412e-06, "loss": 0.2476, "step": 19314 }, { "epoch": 0.66283459162663, "grad_norm": 0.807909006301339, "learning_rate": 2.6970578738989187e-06, "loss": 0.2826, "step": 19315 }, { "epoch": 0.6628689087165408, "grad_norm": 0.7214908170563052, "learning_rate": 2.6965646064918575e-06, "loss": 0.2187, "step": 19316 }, { "epoch": 0.6629032258064517, "grad_norm": 0.7960705804711586, "learning_rate": 2.6960713675410596e-06, "loss": 0.2992, "step": 19317 }, { "epoch": 0.6629375428963624, "grad_norm": 0.7177072525341323, "learning_rate": 2.695578157052611e-06, "loss": 0.2797, "step": 19318 }, { "epoch": 0.6629718599862732, "grad_norm": 0.7554234972539605, "learning_rate": 2.6950849750326058e-06, "loss": 0.277, "step": 19319 }, { "epoch": 0.6630061770761839, "grad_norm": 0.7484704633598138, "learning_rate": 2.6945918214871425e-06, "loss": 0.2863, "step": 19320 }, { "epoch": 0.6630404941660947, "grad_norm": 0.6976730085936608, "learning_rate": 2.694098696422307e-06, "loss": 0.245, "step": 19321 }, { "epoch": 0.6630748112560055, "grad_norm": 0.8004044709422683, "learning_rate": 2.6936055998441946e-06, "loss": 0.2736, "step": 19322 }, { "epoch": 0.6631091283459163, "grad_norm": 0.7360144098009704, "learning_rate": 2.6931125317588953e-06, "loss": 0.238, "step": 19323 }, { "epoch": 0.663143445435827, "grad_norm": 0.8542112094048686, "learning_rate": 2.6926194921725036e-06, "loss": 0.2624, "step": 19324 }, { "epoch": 0.6631777625257378, "grad_norm": 0.7471710050058783, "learning_rate": 2.692126481091103e-06, "loss": 0.2707, "step": 19325 }, { "epoch": 0.6632120796156485, "grad_norm": 1.0275347061531073, "learning_rate": 2.6916334985207922e-06, "loss": 0.2805, "step": 19326 }, { "epoch": 0.6632463967055594, "grad_norm": 0.7978125430920938, "learning_rate": 2.6911405444676597e-06, "loss": 0.2722, "step": 19327 }, { "epoch": 0.6632807137954702, "grad_norm": 0.6532332974221797, "learning_rate": 2.6906476189377894e-06, "loss": 0.1941, "step": 19328 }, { "epoch": 0.6633150308853809, "grad_norm": 0.7828344128085327, "learning_rate": 2.69015472193728e-06, "loss": 0.2655, "step": 19329 }, { "epoch": 0.6633493479752917, "grad_norm": 0.7571396112548361, "learning_rate": 2.689661853472214e-06, "loss": 0.2592, "step": 19330 }, { "epoch": 0.6633836650652025, "grad_norm": 0.7903824113162886, "learning_rate": 2.6891690135486814e-06, "loss": 0.2976, "step": 19331 }, { "epoch": 0.6634179821551133, "grad_norm": 0.7539350365013561, "learning_rate": 2.6886762021727723e-06, "loss": 0.2838, "step": 19332 }, { "epoch": 0.663452299245024, "grad_norm": 0.8161757064918387, "learning_rate": 2.688183419350574e-06, "loss": 0.2616, "step": 19333 }, { "epoch": 0.6634866163349348, "grad_norm": 0.753458040267569, "learning_rate": 2.687690665088174e-06, "loss": 0.2802, "step": 19334 }, { "epoch": 0.6635209334248455, "grad_norm": 0.8150562146076223, "learning_rate": 2.6871979393916597e-06, "loss": 0.2782, "step": 19335 }, { "epoch": 0.6635552505147564, "grad_norm": 0.7773266044126016, "learning_rate": 2.68670524226712e-06, "loss": 0.3176, "step": 19336 }, { "epoch": 0.6635895676046671, "grad_norm": 0.7797690175297852, "learning_rate": 2.6862125737206374e-06, "loss": 0.2544, "step": 19337 }, { "epoch": 0.6636238846945779, "grad_norm": 0.7185213996636798, "learning_rate": 2.685719933758304e-06, "loss": 0.2474, "step": 19338 }, { "epoch": 0.6636582017844886, "grad_norm": 0.8950675029821589, "learning_rate": 2.6852273223861987e-06, "loss": 0.2417, "step": 19339 }, { "epoch": 0.6636925188743995, "grad_norm": 0.8448107716080329, "learning_rate": 2.684734739610415e-06, "loss": 0.2836, "step": 19340 }, { "epoch": 0.6637268359643103, "grad_norm": 1.0023350056045575, "learning_rate": 2.6842421854370323e-06, "loss": 0.2698, "step": 19341 }, { "epoch": 0.663761153054221, "grad_norm": 0.8069458300022498, "learning_rate": 2.6837496598721357e-06, "loss": 0.2511, "step": 19342 }, { "epoch": 0.6637954701441318, "grad_norm": 0.8753379080238181, "learning_rate": 2.683257162921815e-06, "loss": 0.2765, "step": 19343 }, { "epoch": 0.6638297872340425, "grad_norm": 0.7034598049669277, "learning_rate": 2.6827646945921493e-06, "loss": 0.2832, "step": 19344 }, { "epoch": 0.6638641043239534, "grad_norm": 0.8030081861714304, "learning_rate": 2.6822722548892246e-06, "loss": 0.2577, "step": 19345 }, { "epoch": 0.6638984214138641, "grad_norm": 0.7347970595294457, "learning_rate": 2.6817798438191234e-06, "loss": 0.2488, "step": 19346 }, { "epoch": 0.6639327385037749, "grad_norm": 0.7171698259325112, "learning_rate": 2.681287461387929e-06, "loss": 0.2639, "step": 19347 }, { "epoch": 0.6639670555936856, "grad_norm": 0.669586056123806, "learning_rate": 2.680795107601726e-06, "loss": 0.2107, "step": 19348 }, { "epoch": 0.6640013726835964, "grad_norm": 0.8147585952720122, "learning_rate": 2.6803027824665946e-06, "loss": 0.2548, "step": 19349 }, { "epoch": 0.6640356897735072, "grad_norm": 0.7282968383066809, "learning_rate": 2.67981048598862e-06, "loss": 0.2554, "step": 19350 }, { "epoch": 0.664070006863418, "grad_norm": 0.7570633447498486, "learning_rate": 2.6793182181738774e-06, "loss": 0.2575, "step": 19351 }, { "epoch": 0.6641043239533287, "grad_norm": 0.8635989050574328, "learning_rate": 2.6788259790284578e-06, "loss": 0.3005, "step": 19352 }, { "epoch": 0.6641386410432395, "grad_norm": 0.8023439244644918, "learning_rate": 2.678333768558434e-06, "loss": 0.3255, "step": 19353 }, { "epoch": 0.6641729581331504, "grad_norm": 0.8230012307845129, "learning_rate": 2.6778415867698893e-06, "loss": 0.3036, "step": 19354 }, { "epoch": 0.6642072752230611, "grad_norm": 0.738257615771971, "learning_rate": 2.6773494336689056e-06, "loss": 0.254, "step": 19355 }, { "epoch": 0.6642415923129719, "grad_norm": 0.7985755197224156, "learning_rate": 2.6768573092615606e-06, "loss": 0.2682, "step": 19356 }, { "epoch": 0.6642759094028826, "grad_norm": 0.7861080081118224, "learning_rate": 2.6763652135539352e-06, "loss": 0.3097, "step": 19357 }, { "epoch": 0.6643102264927934, "grad_norm": 0.7548208724143852, "learning_rate": 2.6758731465521095e-06, "loss": 0.2709, "step": 19358 }, { "epoch": 0.6643445435827042, "grad_norm": 0.8482731962563175, "learning_rate": 2.675381108262162e-06, "loss": 0.2874, "step": 19359 }, { "epoch": 0.664378860672615, "grad_norm": 0.78400752612784, "learning_rate": 2.6748890986901676e-06, "loss": 0.3164, "step": 19360 }, { "epoch": 0.6644131777625257, "grad_norm": 0.8358115649819924, "learning_rate": 2.6743971178422114e-06, "loss": 0.3081, "step": 19361 }, { "epoch": 0.6644474948524365, "grad_norm": 0.7571715470653039, "learning_rate": 2.673905165724365e-06, "loss": 0.253, "step": 19362 }, { "epoch": 0.6644818119423472, "grad_norm": 0.7624382347251514, "learning_rate": 2.6734132423427088e-06, "loss": 0.2917, "step": 19363 }, { "epoch": 0.6645161290322581, "grad_norm": 0.7542251515924193, "learning_rate": 2.6729213477033193e-06, "loss": 0.3229, "step": 19364 }, { "epoch": 0.6645504461221688, "grad_norm": 0.7970012204793377, "learning_rate": 2.672429481812274e-06, "loss": 0.2778, "step": 19365 }, { "epoch": 0.6645847632120796, "grad_norm": 0.7597967552663673, "learning_rate": 2.671937644675648e-06, "loss": 0.2337, "step": 19366 }, { "epoch": 0.6646190803019904, "grad_norm": 0.8065357814991488, "learning_rate": 2.6714458362995183e-06, "loss": 0.3199, "step": 19367 }, { "epoch": 0.6646533973919012, "grad_norm": 0.7636180754778026, "learning_rate": 2.6709540566899627e-06, "loss": 0.2587, "step": 19368 }, { "epoch": 0.664687714481812, "grad_norm": 0.7243485461920889, "learning_rate": 2.67046230585305e-06, "loss": 0.2528, "step": 19369 }, { "epoch": 0.6647220315717227, "grad_norm": 0.8047999049241664, "learning_rate": 2.669970583794861e-06, "loss": 0.3052, "step": 19370 }, { "epoch": 0.6647563486616335, "grad_norm": 0.7602251593378837, "learning_rate": 2.66947889052147e-06, "loss": 0.2536, "step": 19371 }, { "epoch": 0.6647906657515442, "grad_norm": 0.7757635224205922, "learning_rate": 2.6689872260389498e-06, "loss": 0.2537, "step": 19372 }, { "epoch": 0.6648249828414551, "grad_norm": 0.7831807351740163, "learning_rate": 2.6684955903533765e-06, "loss": 0.2969, "step": 19373 }, { "epoch": 0.6648592999313658, "grad_norm": 0.7897742536198707, "learning_rate": 2.668003983470818e-06, "loss": 0.2327, "step": 19374 }, { "epoch": 0.6648936170212766, "grad_norm": 0.7192050781754907, "learning_rate": 2.6675124053973557e-06, "loss": 0.266, "step": 19375 }, { "epoch": 0.6649279341111873, "grad_norm": 0.9791714942134109, "learning_rate": 2.6670208561390564e-06, "loss": 0.2276, "step": 19376 }, { "epoch": 0.6649622512010982, "grad_norm": 0.7978837766942174, "learning_rate": 2.666529335701994e-06, "loss": 0.2151, "step": 19377 }, { "epoch": 0.664996568291009, "grad_norm": 0.6978548699161098, "learning_rate": 2.6660378440922406e-06, "loss": 0.24, "step": 19378 }, { "epoch": 0.6650308853809197, "grad_norm": 0.7290905223247455, "learning_rate": 2.6655463813158693e-06, "loss": 0.2683, "step": 19379 }, { "epoch": 0.6650652024708305, "grad_norm": 0.6933557464933053, "learning_rate": 2.6650549473789505e-06, "loss": 0.27, "step": 19380 }, { "epoch": 0.6650995195607412, "grad_norm": 0.9973732422280164, "learning_rate": 2.664563542287556e-06, "loss": 0.278, "step": 19381 }, { "epoch": 0.6651338366506521, "grad_norm": 0.7149570936192857, "learning_rate": 2.6640721660477575e-06, "loss": 0.2732, "step": 19382 }, { "epoch": 0.6651681537405628, "grad_norm": 0.7391626055846151, "learning_rate": 2.66358081866562e-06, "loss": 0.2846, "step": 19383 }, { "epoch": 0.6652024708304736, "grad_norm": 0.7548955675274941, "learning_rate": 2.663089500147221e-06, "loss": 0.2543, "step": 19384 }, { "epoch": 0.6652367879203843, "grad_norm": 0.852699919454734, "learning_rate": 2.662598210498626e-06, "loss": 0.2426, "step": 19385 }, { "epoch": 0.6652711050102951, "grad_norm": 0.7443790985033049, "learning_rate": 2.662106949725902e-06, "loss": 0.2366, "step": 19386 }, { "epoch": 0.6653054221002059, "grad_norm": 0.7562926627794415, "learning_rate": 2.6616157178351256e-06, "loss": 0.2418, "step": 19387 }, { "epoch": 0.6653397391901167, "grad_norm": 0.7887178323454458, "learning_rate": 2.6611245148323577e-06, "loss": 0.2851, "step": 19388 }, { "epoch": 0.6653740562800274, "grad_norm": 0.7745197287603427, "learning_rate": 2.66063334072367e-06, "loss": 0.2981, "step": 19389 }, { "epoch": 0.6654083733699382, "grad_norm": 0.75062944848923, "learning_rate": 2.6601421955151303e-06, "loss": 0.2551, "step": 19390 }, { "epoch": 0.665442690459849, "grad_norm": 0.826552904724474, "learning_rate": 2.659651079212805e-06, "loss": 0.301, "step": 19391 }, { "epoch": 0.6654770075497598, "grad_norm": 0.8313411165618851, "learning_rate": 2.659159991822763e-06, "loss": 0.2683, "step": 19392 }, { "epoch": 0.6655113246396706, "grad_norm": 0.8544595621947217, "learning_rate": 2.6586689333510697e-06, "loss": 0.2732, "step": 19393 }, { "epoch": 0.6655456417295813, "grad_norm": 0.8445323151106074, "learning_rate": 2.6581779038037934e-06, "loss": 0.2771, "step": 19394 }, { "epoch": 0.6655799588194921, "grad_norm": 0.8813724653123343, "learning_rate": 2.6576869031869957e-06, "loss": 0.2998, "step": 19395 }, { "epoch": 0.6656142759094029, "grad_norm": 0.778278204783246, "learning_rate": 2.6571959315067492e-06, "loss": 0.2838, "step": 19396 }, { "epoch": 0.6656485929993137, "grad_norm": 0.8215188131976439, "learning_rate": 2.6567049887691133e-06, "loss": 0.2326, "step": 19397 }, { "epoch": 0.6656829100892244, "grad_norm": 0.81492006408645, "learning_rate": 2.6562140749801557e-06, "loss": 0.3217, "step": 19398 }, { "epoch": 0.6657172271791352, "grad_norm": 0.6933711841029606, "learning_rate": 2.6557231901459403e-06, "loss": 0.2564, "step": 19399 }, { "epoch": 0.665751544269046, "grad_norm": 0.8129872939219053, "learning_rate": 2.6552323342725318e-06, "loss": 0.2579, "step": 19400 }, { "epoch": 0.6657858613589568, "grad_norm": 0.7629611739906791, "learning_rate": 2.6547415073659937e-06, "loss": 0.3221, "step": 19401 }, { "epoch": 0.6658201784488675, "grad_norm": 0.8297725764733634, "learning_rate": 2.65425070943239e-06, "loss": 0.2956, "step": 19402 }, { "epoch": 0.6658544955387783, "grad_norm": 0.9057149436825858, "learning_rate": 2.653759940477785e-06, "loss": 0.2764, "step": 19403 }, { "epoch": 0.665888812628689, "grad_norm": 0.7889127470688349, "learning_rate": 2.6532692005082394e-06, "loss": 0.2672, "step": 19404 }, { "epoch": 0.6659231297185999, "grad_norm": 0.7903039102874547, "learning_rate": 2.6527784895298187e-06, "loss": 0.2502, "step": 19405 }, { "epoch": 0.6659574468085107, "grad_norm": 0.7873990479542813, "learning_rate": 2.652287807548579e-06, "loss": 0.2458, "step": 19406 }, { "epoch": 0.6659917638984214, "grad_norm": 0.8490598361278334, "learning_rate": 2.6517971545705913e-06, "loss": 0.3068, "step": 19407 }, { "epoch": 0.6660260809883322, "grad_norm": 0.8493554730363967, "learning_rate": 2.6513065306019093e-06, "loss": 0.2827, "step": 19408 }, { "epoch": 0.6660603980782429, "grad_norm": 0.8265034713590871, "learning_rate": 2.650815935648595e-06, "loss": 0.2361, "step": 19409 }, { "epoch": 0.6660947151681538, "grad_norm": 0.8818335315380419, "learning_rate": 2.650325369716715e-06, "loss": 0.2516, "step": 19410 }, { "epoch": 0.6661290322580645, "grad_norm": 0.7585920120142001, "learning_rate": 2.6498348328123234e-06, "loss": 0.3155, "step": 19411 }, { "epoch": 0.6661633493479753, "grad_norm": 0.7551684134747526, "learning_rate": 2.649344324941483e-06, "loss": 0.2731, "step": 19412 }, { "epoch": 0.666197666437886, "grad_norm": 0.7733018109673688, "learning_rate": 2.648853846110252e-06, "loss": 0.26, "step": 19413 }, { "epoch": 0.6662319835277969, "grad_norm": 0.7560458508295711, "learning_rate": 2.648363396324691e-06, "loss": 0.2565, "step": 19414 }, { "epoch": 0.6662663006177076, "grad_norm": 0.7959821906669898, "learning_rate": 2.647872975590859e-06, "loss": 0.3072, "step": 19415 }, { "epoch": 0.6663006177076184, "grad_norm": 0.8650867281195745, "learning_rate": 2.6473825839148137e-06, "loss": 0.2683, "step": 19416 }, { "epoch": 0.6663349347975291, "grad_norm": 0.7064495297173646, "learning_rate": 2.646892221302615e-06, "loss": 0.2951, "step": 19417 }, { "epoch": 0.6663692518874399, "grad_norm": 0.793740050039854, "learning_rate": 2.6464018877603158e-06, "loss": 0.2986, "step": 19418 }, { "epoch": 0.6664035689773508, "grad_norm": 0.8162990632029812, "learning_rate": 2.645911583293982e-06, "loss": 0.3173, "step": 19419 }, { "epoch": 0.6664378860672615, "grad_norm": 0.7220802248811893, "learning_rate": 2.6454213079096634e-06, "loss": 0.2316, "step": 19420 }, { "epoch": 0.6664722031571723, "grad_norm": 0.7888085715511898, "learning_rate": 2.6449310616134193e-06, "loss": 0.3338, "step": 19421 }, { "epoch": 0.666506520247083, "grad_norm": 0.7728220208794523, "learning_rate": 2.644440844411307e-06, "loss": 0.2111, "step": 19422 }, { "epoch": 0.6665408373369939, "grad_norm": 0.8295747524462188, "learning_rate": 2.6439506563093802e-06, "loss": 0.2946, "step": 19423 }, { "epoch": 0.6665751544269046, "grad_norm": 0.69421263493656, "learning_rate": 2.643460497313697e-06, "loss": 0.3181, "step": 19424 }, { "epoch": 0.6666094715168154, "grad_norm": 0.7428120945953198, "learning_rate": 2.642970367430312e-06, "loss": 0.2652, "step": 19425 }, { "epoch": 0.6666437886067261, "grad_norm": 0.7857487602056611, "learning_rate": 2.642480266665282e-06, "loss": 0.2472, "step": 19426 }, { "epoch": 0.6666781056966369, "grad_norm": 0.7209951541161307, "learning_rate": 2.641990195024656e-06, "loss": 0.2722, "step": 19427 }, { "epoch": 0.6667124227865477, "grad_norm": 0.8291744270476658, "learning_rate": 2.6415001525144956e-06, "loss": 0.2349, "step": 19428 }, { "epoch": 0.6667467398764585, "grad_norm": 0.8672490298065008, "learning_rate": 2.6410101391408487e-06, "loss": 0.3228, "step": 19429 }, { "epoch": 0.6667810569663692, "grad_norm": 0.7866940427037834, "learning_rate": 2.64052015490977e-06, "loss": 0.3148, "step": 19430 }, { "epoch": 0.66681537405628, "grad_norm": 0.7924452541319015, "learning_rate": 2.640030199827317e-06, "loss": 0.2762, "step": 19431 }, { "epoch": 0.6668496911461907, "grad_norm": 0.755447795138833, "learning_rate": 2.639540273899537e-06, "loss": 0.2583, "step": 19432 }, { "epoch": 0.6668840082361016, "grad_norm": 0.7626584272048323, "learning_rate": 2.639050377132487e-06, "loss": 0.2855, "step": 19433 }, { "epoch": 0.6669183253260124, "grad_norm": 0.7622747007801933, "learning_rate": 2.6385605095322165e-06, "loss": 0.2634, "step": 19434 }, { "epoch": 0.6669526424159231, "grad_norm": 0.7430198667125139, "learning_rate": 2.6380706711047765e-06, "loss": 0.371, "step": 19435 }, { "epoch": 0.6669869595058339, "grad_norm": 0.8305845261427216, "learning_rate": 2.6375808618562206e-06, "loss": 0.2326, "step": 19436 }, { "epoch": 0.6670212765957447, "grad_norm": 0.7817735650571215, "learning_rate": 2.6370910817925982e-06, "loss": 0.3178, "step": 19437 }, { "epoch": 0.6670555936856555, "grad_norm": 0.8844127196368864, "learning_rate": 2.636601330919961e-06, "loss": 0.259, "step": 19438 }, { "epoch": 0.6670899107755662, "grad_norm": 0.7081294863548869, "learning_rate": 2.6361116092443585e-06, "loss": 0.2543, "step": 19439 }, { "epoch": 0.667124227865477, "grad_norm": 0.7426697876358134, "learning_rate": 2.635621916771843e-06, "loss": 0.2625, "step": 19440 }, { "epoch": 0.6671585449553877, "grad_norm": 0.7961739265331305, "learning_rate": 2.635132253508459e-06, "loss": 0.3206, "step": 19441 }, { "epoch": 0.6671928620452986, "grad_norm": 0.7197605879271842, "learning_rate": 2.634642619460262e-06, "loss": 0.2686, "step": 19442 }, { "epoch": 0.6672271791352093, "grad_norm": 0.6989627014441623, "learning_rate": 2.634153014633296e-06, "loss": 0.2689, "step": 19443 }, { "epoch": 0.6672614962251201, "grad_norm": 0.8218730475542818, "learning_rate": 2.6336634390336115e-06, "loss": 0.2777, "step": 19444 }, { "epoch": 0.6672958133150309, "grad_norm": 0.7886776265957719, "learning_rate": 2.6331738926672555e-06, "loss": 0.2886, "step": 19445 }, { "epoch": 0.6673301304049417, "grad_norm": 0.7423896431141713, "learning_rate": 2.632684375540277e-06, "loss": 0.2525, "step": 19446 }, { "epoch": 0.6673644474948525, "grad_norm": 0.8053227620583745, "learning_rate": 2.6321948876587233e-06, "loss": 0.296, "step": 19447 }, { "epoch": 0.6673987645847632, "grad_norm": 0.7955588547599692, "learning_rate": 2.6317054290286406e-06, "loss": 0.3158, "step": 19448 }, { "epoch": 0.667433081674674, "grad_norm": 0.9027746806602877, "learning_rate": 2.6312159996560783e-06, "loss": 0.3351, "step": 19449 }, { "epoch": 0.6674673987645847, "grad_norm": 0.8416840886578041, "learning_rate": 2.630726599547077e-06, "loss": 0.2821, "step": 19450 }, { "epoch": 0.6675017158544956, "grad_norm": 0.7948202595714836, "learning_rate": 2.6302372287076903e-06, "loss": 0.2731, "step": 19451 }, { "epoch": 0.6675360329444063, "grad_norm": 0.8489660588710545, "learning_rate": 2.6297478871439577e-06, "loss": 0.3735, "step": 19452 }, { "epoch": 0.6675703500343171, "grad_norm": 0.823569280963602, "learning_rate": 2.6292585748619236e-06, "loss": 0.2771, "step": 19453 }, { "epoch": 0.6676046671242278, "grad_norm": 0.7728418207329297, "learning_rate": 2.628769291867641e-06, "loss": 0.2854, "step": 19454 }, { "epoch": 0.6676389842141386, "grad_norm": 0.7172508023743168, "learning_rate": 2.628280038167147e-06, "loss": 0.295, "step": 19455 }, { "epoch": 0.6676733013040494, "grad_norm": 0.6985036236401635, "learning_rate": 2.6277908137664874e-06, "loss": 0.2835, "step": 19456 }, { "epoch": 0.6677076183939602, "grad_norm": 0.8262088863942845, "learning_rate": 2.6273016186717073e-06, "loss": 0.3271, "step": 19457 }, { "epoch": 0.667741935483871, "grad_norm": 0.7957061426527595, "learning_rate": 2.6268124528888483e-06, "loss": 0.2728, "step": 19458 }, { "epoch": 0.6677762525737817, "grad_norm": 0.8004537351207958, "learning_rate": 2.6263233164239553e-06, "loss": 0.2588, "step": 19459 }, { "epoch": 0.6678105696636926, "grad_norm": 0.8081094678976688, "learning_rate": 2.62583420928307e-06, "loss": 0.2928, "step": 19460 }, { "epoch": 0.6678448867536033, "grad_norm": 0.7940126919923283, "learning_rate": 2.625345131472238e-06, "loss": 0.278, "step": 19461 }, { "epoch": 0.6678792038435141, "grad_norm": 0.8301136781157769, "learning_rate": 2.6248560829974933e-06, "loss": 0.2909, "step": 19462 }, { "epoch": 0.6679135209334248, "grad_norm": 0.7396184635570761, "learning_rate": 2.6243670638648866e-06, "loss": 0.2435, "step": 19463 }, { "epoch": 0.6679478380233356, "grad_norm": 0.7674712811251178, "learning_rate": 2.6238780740804515e-06, "loss": 0.2322, "step": 19464 }, { "epoch": 0.6679821551132464, "grad_norm": 0.8197418511604381, "learning_rate": 2.6233891136502366e-06, "loss": 0.2902, "step": 19465 }, { "epoch": 0.6680164722031572, "grad_norm": 1.07935336239081, "learning_rate": 2.622900182580276e-06, "loss": 0.2734, "step": 19466 }, { "epoch": 0.6680507892930679, "grad_norm": 0.8647886454806086, "learning_rate": 2.622411280876612e-06, "loss": 0.3272, "step": 19467 }, { "epoch": 0.6680851063829787, "grad_norm": 0.7053480693006839, "learning_rate": 2.621922408545285e-06, "loss": 0.2884, "step": 19468 }, { "epoch": 0.6681194234728896, "grad_norm": 0.7597546761265126, "learning_rate": 2.621433565592334e-06, "loss": 0.271, "step": 19469 }, { "epoch": 0.6681537405628003, "grad_norm": 0.7157918677081837, "learning_rate": 2.6209447520237986e-06, "loss": 0.2646, "step": 19470 }, { "epoch": 0.668188057652711, "grad_norm": 0.751620004639292, "learning_rate": 2.6204559678457164e-06, "loss": 0.2685, "step": 19471 }, { "epoch": 0.6682223747426218, "grad_norm": 0.759044841713054, "learning_rate": 2.6199672130641283e-06, "loss": 0.2776, "step": 19472 }, { "epoch": 0.6682566918325326, "grad_norm": 0.7522364731962902, "learning_rate": 2.619478487685067e-06, "loss": 0.3118, "step": 19473 }, { "epoch": 0.6682910089224434, "grad_norm": 0.7736421555011974, "learning_rate": 2.6189897917145775e-06, "loss": 0.265, "step": 19474 }, { "epoch": 0.6683253260123542, "grad_norm": 0.7645636857755606, "learning_rate": 2.618501125158691e-06, "loss": 0.3189, "step": 19475 }, { "epoch": 0.6683596431022649, "grad_norm": 0.8437786431070756, "learning_rate": 2.6180124880234448e-06, "loss": 0.2702, "step": 19476 }, { "epoch": 0.6683939601921757, "grad_norm": 0.7139675546390745, "learning_rate": 2.6175238803148804e-06, "loss": 0.2564, "step": 19477 }, { "epoch": 0.6684282772820864, "grad_norm": 0.7479356410665035, "learning_rate": 2.617035302039029e-06, "loss": 0.2882, "step": 19478 }, { "epoch": 0.6684625943719973, "grad_norm": 0.717975806519468, "learning_rate": 2.6165467532019288e-06, "loss": 0.2314, "step": 19479 }, { "epoch": 0.668496911461908, "grad_norm": 0.7893943262883633, "learning_rate": 2.6160582338096142e-06, "loss": 0.2528, "step": 19480 }, { "epoch": 0.6685312285518188, "grad_norm": 0.8089772467436208, "learning_rate": 2.615569743868121e-06, "loss": 0.2904, "step": 19481 }, { "epoch": 0.6685655456417295, "grad_norm": 0.7538079313821975, "learning_rate": 2.6150812833834837e-06, "loss": 0.263, "step": 19482 }, { "epoch": 0.6685998627316404, "grad_norm": 0.7021871485124654, "learning_rate": 2.614592852361736e-06, "loss": 0.3306, "step": 19483 }, { "epoch": 0.6686341798215512, "grad_norm": 0.7477858575232287, "learning_rate": 2.614104450808915e-06, "loss": 0.2676, "step": 19484 }, { "epoch": 0.6686684969114619, "grad_norm": 0.8177320474560071, "learning_rate": 2.613616078731048e-06, "loss": 0.2767, "step": 19485 }, { "epoch": 0.6687028140013727, "grad_norm": 0.791055007315526, "learning_rate": 2.6131277361341758e-06, "loss": 0.2995, "step": 19486 }, { "epoch": 0.6687371310912834, "grad_norm": 0.9390342152906154, "learning_rate": 2.6126394230243252e-06, "loss": 0.2411, "step": 19487 }, { "epoch": 0.6687714481811943, "grad_norm": 0.7179511194197417, "learning_rate": 2.6121511394075317e-06, "loss": 0.2704, "step": 19488 }, { "epoch": 0.668805765271105, "grad_norm": 0.9278432404368658, "learning_rate": 2.611662885289826e-06, "loss": 0.2594, "step": 19489 }, { "epoch": 0.6688400823610158, "grad_norm": 0.7829482991430734, "learning_rate": 2.611174660677242e-06, "loss": 0.2777, "step": 19490 }, { "epoch": 0.6688743994509265, "grad_norm": 0.827574919005647, "learning_rate": 2.61068646557581e-06, "loss": 0.271, "step": 19491 }, { "epoch": 0.6689087165408374, "grad_norm": 0.7395636619944991, "learning_rate": 2.61019829999156e-06, "loss": 0.2726, "step": 19492 }, { "epoch": 0.6689430336307481, "grad_norm": 0.9360420833546462, "learning_rate": 2.6097101639305266e-06, "loss": 0.2609, "step": 19493 }, { "epoch": 0.6689773507206589, "grad_norm": 0.7269091328555052, "learning_rate": 2.6092220573987325e-06, "loss": 0.2528, "step": 19494 }, { "epoch": 0.6690116678105696, "grad_norm": 0.7524342111872081, "learning_rate": 2.608733980402217e-06, "loss": 0.2338, "step": 19495 }, { "epoch": 0.6690459849004804, "grad_norm": 0.7585383575018773, "learning_rate": 2.6082459329470015e-06, "loss": 0.2691, "step": 19496 }, { "epoch": 0.6690803019903913, "grad_norm": 0.8889114483336982, "learning_rate": 2.607757915039121e-06, "loss": 0.283, "step": 19497 }, { "epoch": 0.669114619080302, "grad_norm": 0.8525735514963103, "learning_rate": 2.607269926684604e-06, "loss": 0.2991, "step": 19498 }, { "epoch": 0.6691489361702128, "grad_norm": 0.7383010398438309, "learning_rate": 2.6067819678894736e-06, "loss": 0.2554, "step": 19499 }, { "epoch": 0.6691832532601235, "grad_norm": 0.7552515142723971, "learning_rate": 2.6062940386597655e-06, "loss": 0.2873, "step": 19500 }, { "epoch": 0.6692175703500343, "grad_norm": 0.7635568821631764, "learning_rate": 2.6058061390015022e-06, "loss": 0.1984, "step": 19501 }, { "epoch": 0.6692518874399451, "grad_norm": 0.8470779534274577, "learning_rate": 2.6053182689207126e-06, "loss": 0.2409, "step": 19502 }, { "epoch": 0.6692862045298559, "grad_norm": 0.7309803504552606, "learning_rate": 2.604830428423424e-06, "loss": 0.2794, "step": 19503 }, { "epoch": 0.6693205216197666, "grad_norm": 0.7119376187375251, "learning_rate": 2.6043426175156623e-06, "loss": 0.2384, "step": 19504 }, { "epoch": 0.6693548387096774, "grad_norm": 0.7351602431182181, "learning_rate": 2.603854836203454e-06, "loss": 0.2455, "step": 19505 }, { "epoch": 0.6693891557995882, "grad_norm": 0.7140802989724077, "learning_rate": 2.603367084492827e-06, "loss": 0.2227, "step": 19506 }, { "epoch": 0.669423472889499, "grad_norm": 0.799942117705294, "learning_rate": 2.6028793623898067e-06, "loss": 0.2535, "step": 19507 }, { "epoch": 0.6694577899794097, "grad_norm": 0.8316974016314035, "learning_rate": 2.602391669900414e-06, "loss": 0.2939, "step": 19508 }, { "epoch": 0.6694921070693205, "grad_norm": 0.9225122603491406, "learning_rate": 2.6019040070306802e-06, "loss": 0.2617, "step": 19509 }, { "epoch": 0.6695264241592312, "grad_norm": 0.7796654096269746, "learning_rate": 2.601416373786624e-06, "loss": 0.2898, "step": 19510 }, { "epoch": 0.6695607412491421, "grad_norm": 0.7542607967138683, "learning_rate": 2.6009287701742723e-06, "loss": 0.2645, "step": 19511 }, { "epoch": 0.6695950583390529, "grad_norm": 0.8105761169442788, "learning_rate": 2.600441196199649e-06, "loss": 0.2396, "step": 19512 }, { "epoch": 0.6696293754289636, "grad_norm": 0.909548891241384, "learning_rate": 2.599953651868777e-06, "loss": 0.2655, "step": 19513 }, { "epoch": 0.6696636925188744, "grad_norm": 0.7791499192634254, "learning_rate": 2.5994661371876794e-06, "loss": 0.252, "step": 19514 }, { "epoch": 0.6696980096087852, "grad_norm": 0.7635005687129516, "learning_rate": 2.5989786521623784e-06, "loss": 0.2719, "step": 19515 }, { "epoch": 0.669732326698696, "grad_norm": 0.7354181409581213, "learning_rate": 2.5984911967988995e-06, "loss": 0.2628, "step": 19516 }, { "epoch": 0.6697666437886067, "grad_norm": 0.8066402445309551, "learning_rate": 2.598003771103258e-06, "loss": 0.2666, "step": 19517 }, { "epoch": 0.6698009608785175, "grad_norm": 0.7785427488183709, "learning_rate": 2.597516375081483e-06, "loss": 0.2301, "step": 19518 }, { "epoch": 0.6698352779684282, "grad_norm": 0.7402951020189867, "learning_rate": 2.5970290087395907e-06, "loss": 0.2599, "step": 19519 }, { "epoch": 0.6698695950583391, "grad_norm": 0.8818322015292733, "learning_rate": 2.5965416720836013e-06, "loss": 0.274, "step": 19520 }, { "epoch": 0.6699039121482498, "grad_norm": 0.705661650283691, "learning_rate": 2.596054365119541e-06, "loss": 0.2797, "step": 19521 }, { "epoch": 0.6699382292381606, "grad_norm": 0.8097578999319499, "learning_rate": 2.5955670878534246e-06, "loss": 0.3001, "step": 19522 }, { "epoch": 0.6699725463280713, "grad_norm": 0.7183868193815316, "learning_rate": 2.595079840291273e-06, "loss": 0.2451, "step": 19523 }, { "epoch": 0.6700068634179821, "grad_norm": 0.7327638300511172, "learning_rate": 2.594592622439106e-06, "loss": 0.2393, "step": 19524 }, { "epoch": 0.670041180507893, "grad_norm": 0.7508900266889391, "learning_rate": 2.5941054343029438e-06, "loss": 0.2749, "step": 19525 }, { "epoch": 0.6700754975978037, "grad_norm": 0.9874125683781251, "learning_rate": 2.593618275888803e-06, "loss": 0.2571, "step": 19526 }, { "epoch": 0.6701098146877145, "grad_norm": 0.7099832556948237, "learning_rate": 2.5931311472027033e-06, "loss": 0.277, "step": 19527 }, { "epoch": 0.6701441317776252, "grad_norm": 0.8199871187598434, "learning_rate": 2.592644048250662e-06, "loss": 0.2768, "step": 19528 }, { "epoch": 0.6701784488675361, "grad_norm": 0.717436207484497, "learning_rate": 2.592156979038697e-06, "loss": 0.2516, "step": 19529 }, { "epoch": 0.6702127659574468, "grad_norm": 0.6851507630777867, "learning_rate": 2.5916699395728272e-06, "loss": 0.2831, "step": 19530 }, { "epoch": 0.6702470830473576, "grad_norm": 0.7962463529724549, "learning_rate": 2.591182929859063e-06, "loss": 0.2874, "step": 19531 }, { "epoch": 0.6702814001372683, "grad_norm": 0.8713954813266543, "learning_rate": 2.59069594990343e-06, "loss": 0.338, "step": 19532 }, { "epoch": 0.6703157172271791, "grad_norm": 0.7431728796665282, "learning_rate": 2.5902089997119375e-06, "loss": 0.2589, "step": 19533 }, { "epoch": 0.67035003431709, "grad_norm": 0.9720044186847449, "learning_rate": 2.5897220792906023e-06, "loss": 0.2644, "step": 19534 }, { "epoch": 0.6703843514070007, "grad_norm": 0.7722683490015056, "learning_rate": 2.5892351886454415e-06, "loss": 0.255, "step": 19535 }, { "epoch": 0.6704186684969115, "grad_norm": 0.9014517256711153, "learning_rate": 2.5887483277824695e-06, "loss": 0.275, "step": 19536 }, { "epoch": 0.6704529855868222, "grad_norm": 0.8245693774051902, "learning_rate": 2.5882614967077e-06, "loss": 0.3368, "step": 19537 }, { "epoch": 0.6704873026767331, "grad_norm": 0.7982394154689176, "learning_rate": 2.5877746954271484e-06, "loss": 0.256, "step": 19538 }, { "epoch": 0.6705216197666438, "grad_norm": 0.7909327743825371, "learning_rate": 2.5872879239468296e-06, "loss": 0.2771, "step": 19539 }, { "epoch": 0.6705559368565546, "grad_norm": 0.7877512901279566, "learning_rate": 2.5868011822727516e-06, "loss": 0.292, "step": 19540 }, { "epoch": 0.6705902539464653, "grad_norm": 0.8355051103706786, "learning_rate": 2.5863144704109337e-06, "loss": 0.2431, "step": 19541 }, { "epoch": 0.6706245710363761, "grad_norm": 0.6914699161987558, "learning_rate": 2.585827788367389e-06, "loss": 0.2705, "step": 19542 }, { "epoch": 0.6706588881262869, "grad_norm": 0.8061706994442664, "learning_rate": 2.585341136148122e-06, "loss": 0.26, "step": 19543 }, { "epoch": 0.6706932052161977, "grad_norm": 0.8471342737117937, "learning_rate": 2.584854513759155e-06, "loss": 0.267, "step": 19544 }, { "epoch": 0.6707275223061084, "grad_norm": 0.7433644715002319, "learning_rate": 2.5843679212064924e-06, "loss": 0.2228, "step": 19545 }, { "epoch": 0.6707618393960192, "grad_norm": 0.7752500872821817, "learning_rate": 2.5838813584961477e-06, "loss": 0.2536, "step": 19546 }, { "epoch": 0.6707961564859299, "grad_norm": 0.8146355448745886, "learning_rate": 2.5833948256341324e-06, "loss": 0.2706, "step": 19547 }, { "epoch": 0.6708304735758408, "grad_norm": 0.6922375798297177, "learning_rate": 2.582908322626456e-06, "loss": 0.2553, "step": 19548 }, { "epoch": 0.6708647906657516, "grad_norm": 0.7930301754754019, "learning_rate": 2.5824218494791297e-06, "loss": 0.2209, "step": 19549 }, { "epoch": 0.6708991077556623, "grad_norm": 0.7965329613388717, "learning_rate": 2.581935406198162e-06, "loss": 0.2271, "step": 19550 }, { "epoch": 0.6709334248455731, "grad_norm": 0.7879186488561872, "learning_rate": 2.581448992789566e-06, "loss": 0.2757, "step": 19551 }, { "epoch": 0.6709677419354839, "grad_norm": 0.7651548551373695, "learning_rate": 2.580962609259343e-06, "loss": 0.344, "step": 19552 }, { "epoch": 0.6710020590253947, "grad_norm": 0.8861954696056534, "learning_rate": 2.580476255613512e-06, "loss": 0.247, "step": 19553 }, { "epoch": 0.6710363761153054, "grad_norm": 0.7407824063400077, "learning_rate": 2.579989931858073e-06, "loss": 0.2774, "step": 19554 }, { "epoch": 0.6710706932052162, "grad_norm": 0.7836432055750492, "learning_rate": 2.579503637999037e-06, "loss": 0.2627, "step": 19555 }, { "epoch": 0.6711050102951269, "grad_norm": 0.7497284640339704, "learning_rate": 2.5790173740424117e-06, "loss": 0.2622, "step": 19556 }, { "epoch": 0.6711393273850378, "grad_norm": 0.7381191448076663, "learning_rate": 2.5785311399942044e-06, "loss": 0.227, "step": 19557 }, { "epoch": 0.6711736444749485, "grad_norm": 0.875974648193035, "learning_rate": 2.5780449358604213e-06, "loss": 0.2687, "step": 19558 }, { "epoch": 0.6712079615648593, "grad_norm": 0.8417011826556017, "learning_rate": 2.57755876164707e-06, "loss": 0.2961, "step": 19559 }, { "epoch": 0.67124227865477, "grad_norm": 0.7609681445529459, "learning_rate": 2.5770726173601545e-06, "loss": 0.3025, "step": 19560 }, { "epoch": 0.6712765957446809, "grad_norm": 0.7607754079881187, "learning_rate": 2.5765865030056835e-06, "loss": 0.2633, "step": 19561 }, { "epoch": 0.6713109128345917, "grad_norm": 0.8432068636236233, "learning_rate": 2.5761004185896622e-06, "loss": 0.2505, "step": 19562 }, { "epoch": 0.6713452299245024, "grad_norm": 0.778565386486646, "learning_rate": 2.57561436411809e-06, "loss": 0.2856, "step": 19563 }, { "epoch": 0.6713795470144132, "grad_norm": 0.8844546157499245, "learning_rate": 2.575128339596978e-06, "loss": 0.3076, "step": 19564 }, { "epoch": 0.6714138641043239, "grad_norm": 0.7281658782887519, "learning_rate": 2.57464234503233e-06, "loss": 0.2501, "step": 19565 }, { "epoch": 0.6714481811942348, "grad_norm": 0.7720259100235864, "learning_rate": 2.574156380430144e-06, "loss": 0.2966, "step": 19566 }, { "epoch": 0.6714824982841455, "grad_norm": 0.7740378542712664, "learning_rate": 2.5736704457964323e-06, "loss": 0.2517, "step": 19567 }, { "epoch": 0.6715168153740563, "grad_norm": 0.8223643030389934, "learning_rate": 2.5731845411371913e-06, "loss": 0.2888, "step": 19568 }, { "epoch": 0.671551132463967, "grad_norm": 0.8498861002207097, "learning_rate": 2.5726986664584252e-06, "loss": 0.253, "step": 19569 }, { "epoch": 0.6715854495538778, "grad_norm": 0.691094599143938, "learning_rate": 2.572212821766138e-06, "loss": 0.334, "step": 19570 }, { "epoch": 0.6716197666437886, "grad_norm": 0.700135499969403, "learning_rate": 2.5717270070663304e-06, "loss": 0.2325, "step": 19571 }, { "epoch": 0.6716540837336994, "grad_norm": 0.711249625856502, "learning_rate": 2.5712412223650046e-06, "loss": 0.243, "step": 19572 }, { "epoch": 0.6716884008236101, "grad_norm": 0.7549105132945244, "learning_rate": 2.5707554676681616e-06, "loss": 0.3056, "step": 19573 }, { "epoch": 0.6717227179135209, "grad_norm": 0.7201876355610777, "learning_rate": 2.5702697429818047e-06, "loss": 0.2818, "step": 19574 }, { "epoch": 0.6717570350034318, "grad_norm": 0.7872375214250621, "learning_rate": 2.5697840483119285e-06, "loss": 0.2708, "step": 19575 }, { "epoch": 0.6717913520933425, "grad_norm": 0.7707403683428786, "learning_rate": 2.5692983836645413e-06, "loss": 0.2748, "step": 19576 }, { "epoch": 0.6718256691832533, "grad_norm": 0.8449340127873135, "learning_rate": 2.568812749045636e-06, "loss": 0.2167, "step": 19577 }, { "epoch": 0.671859986273164, "grad_norm": 0.8251161272936094, "learning_rate": 2.5683271444612156e-06, "loss": 0.3169, "step": 19578 }, { "epoch": 0.6718943033630748, "grad_norm": 0.6977199255266215, "learning_rate": 2.5678415699172775e-06, "loss": 0.2823, "step": 19579 }, { "epoch": 0.6719286204529856, "grad_norm": 0.8912119597423641, "learning_rate": 2.5673560254198215e-06, "loss": 0.254, "step": 19580 }, { "epoch": 0.6719629375428964, "grad_norm": 0.7807275986031819, "learning_rate": 2.566870510974845e-06, "loss": 0.2633, "step": 19581 }, { "epoch": 0.6719972546328071, "grad_norm": 0.9695526812773257, "learning_rate": 2.566385026588347e-06, "loss": 0.2531, "step": 19582 }, { "epoch": 0.6720315717227179, "grad_norm": 0.8491780477259449, "learning_rate": 2.565899572266327e-06, "loss": 0.2888, "step": 19583 }, { "epoch": 0.6720658888126287, "grad_norm": 0.7594311366753022, "learning_rate": 2.5654141480147753e-06, "loss": 0.2319, "step": 19584 }, { "epoch": 0.6721002059025395, "grad_norm": 0.7350525218502921, "learning_rate": 2.5649287538396983e-06, "loss": 0.27, "step": 19585 }, { "epoch": 0.6721345229924502, "grad_norm": 0.7933995078825706, "learning_rate": 2.5644433897470845e-06, "loss": 0.2844, "step": 19586 }, { "epoch": 0.672168840082361, "grad_norm": 0.7380710984929092, "learning_rate": 2.563958055742932e-06, "loss": 0.2411, "step": 19587 }, { "epoch": 0.6722031571722717, "grad_norm": 0.8851877552249712, "learning_rate": 2.5634727518332408e-06, "loss": 0.3006, "step": 19588 }, { "epoch": 0.6722374742621826, "grad_norm": 0.8560924607007703, "learning_rate": 2.5629874780239994e-06, "loss": 0.3162, "step": 19589 }, { "epoch": 0.6722717913520934, "grad_norm": 0.9272751344758907, "learning_rate": 2.5625022343212104e-06, "loss": 0.2279, "step": 19590 }, { "epoch": 0.6723061084420041, "grad_norm": 0.7920197764066103, "learning_rate": 2.5620170207308625e-06, "loss": 0.2851, "step": 19591 }, { "epoch": 0.6723404255319149, "grad_norm": 0.7246561966205047, "learning_rate": 2.5615318372589517e-06, "loss": 0.241, "step": 19592 }, { "epoch": 0.6723747426218256, "grad_norm": 0.9232633258042543, "learning_rate": 2.5610466839114724e-06, "loss": 0.232, "step": 19593 }, { "epoch": 0.6724090597117365, "grad_norm": 0.767401395959524, "learning_rate": 2.5605615606944167e-06, "loss": 0.2641, "step": 19594 }, { "epoch": 0.6724433768016472, "grad_norm": 0.7826142953437383, "learning_rate": 2.56007646761378e-06, "loss": 0.2804, "step": 19595 }, { "epoch": 0.672477693891558, "grad_norm": 0.8169075742544768, "learning_rate": 2.5595914046755534e-06, "loss": 0.3062, "step": 19596 }, { "epoch": 0.6725120109814687, "grad_norm": 0.7906404635420128, "learning_rate": 2.559106371885732e-06, "loss": 0.2699, "step": 19597 }, { "epoch": 0.6725463280713796, "grad_norm": 0.7513472798934892, "learning_rate": 2.5586213692503015e-06, "loss": 0.2543, "step": 19598 }, { "epoch": 0.6725806451612903, "grad_norm": 0.7671496215085867, "learning_rate": 2.5581363967752613e-06, "loss": 0.2467, "step": 19599 }, { "epoch": 0.6726149622512011, "grad_norm": 1.081356913242408, "learning_rate": 2.557651454466597e-06, "loss": 0.2924, "step": 19600 }, { "epoch": 0.6726492793411118, "grad_norm": 0.8478417046701703, "learning_rate": 2.5571665423303016e-06, "loss": 0.2776, "step": 19601 }, { "epoch": 0.6726835964310226, "grad_norm": 0.8150558409498792, "learning_rate": 2.5566816603723654e-06, "loss": 0.2533, "step": 19602 }, { "epoch": 0.6727179135209335, "grad_norm": 0.7694541302786595, "learning_rate": 2.556196808598779e-06, "loss": 0.2424, "step": 19603 }, { "epoch": 0.6727522306108442, "grad_norm": 0.8272645371637384, "learning_rate": 2.5557119870155324e-06, "loss": 0.212, "step": 19604 }, { "epoch": 0.672786547700755, "grad_norm": 0.790974068853289, "learning_rate": 2.5552271956286134e-06, "loss": 0.2752, "step": 19605 }, { "epoch": 0.6728208647906657, "grad_norm": 0.8419639271782067, "learning_rate": 2.554742434444014e-06, "loss": 0.2383, "step": 19606 }, { "epoch": 0.6728551818805766, "grad_norm": 0.7975520321855233, "learning_rate": 2.5542577034677174e-06, "loss": 0.2605, "step": 19607 }, { "epoch": 0.6728894989704873, "grad_norm": 0.74476714067375, "learning_rate": 2.553773002705717e-06, "loss": 0.3065, "step": 19608 }, { "epoch": 0.6729238160603981, "grad_norm": 0.7498497973175703, "learning_rate": 2.553288332164002e-06, "loss": 0.3081, "step": 19609 }, { "epoch": 0.6729581331503088, "grad_norm": 0.8096798989340964, "learning_rate": 2.552803691848552e-06, "loss": 0.2871, "step": 19610 }, { "epoch": 0.6729924502402196, "grad_norm": 0.7843597232018813, "learning_rate": 2.5523190817653643e-06, "loss": 0.2644, "step": 19611 }, { "epoch": 0.6730267673301304, "grad_norm": 0.6877530000066423, "learning_rate": 2.5518345019204177e-06, "loss": 0.2493, "step": 19612 }, { "epoch": 0.6730610844200412, "grad_norm": 0.8685229763585065, "learning_rate": 2.5513499523197015e-06, "loss": 0.2849, "step": 19613 }, { "epoch": 0.673095401509952, "grad_norm": 0.641482450685847, "learning_rate": 2.550865432969203e-06, "loss": 0.222, "step": 19614 }, { "epoch": 0.6731297185998627, "grad_norm": 0.8959674259395409, "learning_rate": 2.5503809438749056e-06, "loss": 0.2424, "step": 19615 }, { "epoch": 0.6731640356897735, "grad_norm": 0.8077847602890705, "learning_rate": 2.549896485042796e-06, "loss": 0.3242, "step": 19616 }, { "epoch": 0.6731983527796843, "grad_norm": 0.787293582122319, "learning_rate": 2.5494120564788584e-06, "loss": 0.2228, "step": 19617 }, { "epoch": 0.6732326698695951, "grad_norm": 0.9336191902365795, "learning_rate": 2.54892765818908e-06, "loss": 0.2369, "step": 19618 }, { "epoch": 0.6732669869595058, "grad_norm": 0.8922737585573752, "learning_rate": 2.5484432901794386e-06, "loss": 0.2554, "step": 19619 }, { "epoch": 0.6733013040494166, "grad_norm": 0.7453767046910971, "learning_rate": 2.5479589524559267e-06, "loss": 0.2519, "step": 19620 }, { "epoch": 0.6733356211393274, "grad_norm": 0.7590541929684727, "learning_rate": 2.5474746450245186e-06, "loss": 0.2847, "step": 19621 }, { "epoch": 0.6733699382292382, "grad_norm": 0.7733505899140348, "learning_rate": 2.546990367891206e-06, "loss": 0.2804, "step": 19622 }, { "epoch": 0.6734042553191489, "grad_norm": 0.7640205650528283, "learning_rate": 2.5465061210619657e-06, "loss": 0.2507, "step": 19623 }, { "epoch": 0.6734385724090597, "grad_norm": 0.8006182348094276, "learning_rate": 2.5460219045427814e-06, "loss": 0.2663, "step": 19624 }, { "epoch": 0.6734728894989704, "grad_norm": 0.7809423398629205, "learning_rate": 2.545537718339636e-06, "loss": 0.2613, "step": 19625 }, { "epoch": 0.6735072065888813, "grad_norm": 0.7709294932532268, "learning_rate": 2.5450535624585094e-06, "loss": 0.2182, "step": 19626 }, { "epoch": 0.673541523678792, "grad_norm": 0.8644474669765316, "learning_rate": 2.544569436905385e-06, "loss": 0.3, "step": 19627 }, { "epoch": 0.6735758407687028, "grad_norm": 0.7948989789267592, "learning_rate": 2.5440853416862415e-06, "loss": 0.2904, "step": 19628 }, { "epoch": 0.6736101578586136, "grad_norm": 0.7515017993477173, "learning_rate": 2.543601276807063e-06, "loss": 0.2912, "step": 19629 }, { "epoch": 0.6736444749485244, "grad_norm": 0.805620361206098, "learning_rate": 2.543117242273823e-06, "loss": 0.2664, "step": 19630 }, { "epoch": 0.6736787920384352, "grad_norm": 0.6959428722381721, "learning_rate": 2.5426332380925054e-06, "loss": 0.2311, "step": 19631 }, { "epoch": 0.6737131091283459, "grad_norm": 0.7845731911442233, "learning_rate": 2.542149264269093e-06, "loss": 0.2241, "step": 19632 }, { "epoch": 0.6737474262182567, "grad_norm": 0.830324489062822, "learning_rate": 2.541665320809556e-06, "loss": 0.2205, "step": 19633 }, { "epoch": 0.6737817433081674, "grad_norm": 0.7293167853892483, "learning_rate": 2.5411814077198814e-06, "loss": 0.339, "step": 19634 }, { "epoch": 0.6738160603980783, "grad_norm": 0.733371528641737, "learning_rate": 2.5406975250060426e-06, "loss": 0.2336, "step": 19635 }, { "epoch": 0.673850377487989, "grad_norm": 0.6776126965152026, "learning_rate": 2.540213672674019e-06, "loss": 0.253, "step": 19636 }, { "epoch": 0.6738846945778998, "grad_norm": 0.8101167856111651, "learning_rate": 2.5397298507297867e-06, "loss": 0.2498, "step": 19637 }, { "epoch": 0.6739190116678105, "grad_norm": 0.8751754237733855, "learning_rate": 2.5392460591793246e-06, "loss": 0.2304, "step": 19638 }, { "epoch": 0.6739533287577213, "grad_norm": 0.722403045734547, "learning_rate": 2.5387622980286077e-06, "loss": 0.2847, "step": 19639 }, { "epoch": 0.6739876458476322, "grad_norm": 0.7192174329793738, "learning_rate": 2.538278567283613e-06, "loss": 0.2608, "step": 19640 }, { "epoch": 0.6740219629375429, "grad_norm": 0.6761026953538043, "learning_rate": 2.5377948669503188e-06, "loss": 0.3033, "step": 19641 }, { "epoch": 0.6740562800274537, "grad_norm": 1.0815566467749422, "learning_rate": 2.537311197034694e-06, "loss": 0.2435, "step": 19642 }, { "epoch": 0.6740905971173644, "grad_norm": 0.743467703457653, "learning_rate": 2.5368275575427227e-06, "loss": 0.2813, "step": 19643 }, { "epoch": 0.6741249142072753, "grad_norm": 0.7776166511587859, "learning_rate": 2.536343948480373e-06, "loss": 0.3096, "step": 19644 }, { "epoch": 0.674159231297186, "grad_norm": 0.7243693961469709, "learning_rate": 2.5358603698536214e-06, "loss": 0.2565, "step": 19645 }, { "epoch": 0.6741935483870968, "grad_norm": 0.7676496861971269, "learning_rate": 2.535376821668443e-06, "loss": 0.2957, "step": 19646 }, { "epoch": 0.6742278654770075, "grad_norm": 0.7815644191550082, "learning_rate": 2.5348933039308088e-06, "loss": 0.2568, "step": 19647 }, { "epoch": 0.6742621825669183, "grad_norm": 0.640137828218011, "learning_rate": 2.5344098166466947e-06, "loss": 0.2128, "step": 19648 }, { "epoch": 0.6742964996568291, "grad_norm": 0.70216117763037, "learning_rate": 2.5339263598220726e-06, "loss": 0.2326, "step": 19649 }, { "epoch": 0.6743308167467399, "grad_norm": 0.8225849197213339, "learning_rate": 2.533442933462917e-06, "loss": 0.2451, "step": 19650 }, { "epoch": 0.6743651338366506, "grad_norm": 0.8192738272112597, "learning_rate": 2.532959537575195e-06, "loss": 0.2875, "step": 19651 }, { "epoch": 0.6743994509265614, "grad_norm": 0.722365419462336, "learning_rate": 2.532476172164883e-06, "loss": 0.3081, "step": 19652 }, { "epoch": 0.6744337680164723, "grad_norm": 0.8202729726813321, "learning_rate": 2.5319928372379504e-06, "loss": 0.3213, "step": 19653 }, { "epoch": 0.674468085106383, "grad_norm": 0.7887761092902585, "learning_rate": 2.531509532800369e-06, "loss": 0.2883, "step": 19654 }, { "epoch": 0.6745024021962938, "grad_norm": 0.7435486524900703, "learning_rate": 2.5310262588581123e-06, "loss": 0.2848, "step": 19655 }, { "epoch": 0.6745367192862045, "grad_norm": 0.835973331836617, "learning_rate": 2.530543015417143e-06, "loss": 0.2703, "step": 19656 }, { "epoch": 0.6745710363761153, "grad_norm": 0.7377749580804772, "learning_rate": 2.53005980248344e-06, "loss": 0.2244, "step": 19657 }, { "epoch": 0.6746053534660261, "grad_norm": 0.6502627053085241, "learning_rate": 2.529576620062966e-06, "loss": 0.2715, "step": 19658 }, { "epoch": 0.6746396705559369, "grad_norm": 0.7509346888249069, "learning_rate": 2.529093468161693e-06, "loss": 0.2805, "step": 19659 }, { "epoch": 0.6746739876458476, "grad_norm": 0.8816294041407358, "learning_rate": 2.5286103467855895e-06, "loss": 0.2953, "step": 19660 }, { "epoch": 0.6747083047357584, "grad_norm": 0.8306506105754978, "learning_rate": 2.528127255940623e-06, "loss": 0.2799, "step": 19661 }, { "epoch": 0.6747426218256691, "grad_norm": 0.8458226367506505, "learning_rate": 2.5276441956327633e-06, "loss": 0.2519, "step": 19662 }, { "epoch": 0.67477693891558, "grad_norm": 0.7675168699068293, "learning_rate": 2.5271611658679773e-06, "loss": 0.2806, "step": 19663 }, { "epoch": 0.6748112560054907, "grad_norm": 0.8215210922087475, "learning_rate": 2.5266781666522332e-06, "loss": 0.2432, "step": 19664 }, { "epoch": 0.6748455730954015, "grad_norm": 0.733988224294122, "learning_rate": 2.5261951979914935e-06, "loss": 0.3107, "step": 19665 }, { "epoch": 0.6748798901853122, "grad_norm": 0.7544439484842207, "learning_rate": 2.5257122598917304e-06, "loss": 0.2994, "step": 19666 }, { "epoch": 0.6749142072752231, "grad_norm": 0.7837095156887859, "learning_rate": 2.525229352358907e-06, "loss": 0.2856, "step": 19667 }, { "epoch": 0.6749485243651339, "grad_norm": 1.1172742435034997, "learning_rate": 2.5247464753989892e-06, "loss": 0.3008, "step": 19668 }, { "epoch": 0.6749828414550446, "grad_norm": 0.923602252392443, "learning_rate": 2.5242636290179433e-06, "loss": 0.271, "step": 19669 }, { "epoch": 0.6750171585449554, "grad_norm": 0.8145096695393549, "learning_rate": 2.5237808132217334e-06, "loss": 0.2892, "step": 19670 }, { "epoch": 0.6750514756348661, "grad_norm": 0.7857622443080414, "learning_rate": 2.5232980280163245e-06, "loss": 0.3326, "step": 19671 }, { "epoch": 0.675085792724777, "grad_norm": 0.7039720823608521, "learning_rate": 2.5228152734076812e-06, "loss": 0.2727, "step": 19672 }, { "epoch": 0.6751201098146877, "grad_norm": 0.7991367261324983, "learning_rate": 2.5223325494017687e-06, "loss": 0.2174, "step": 19673 }, { "epoch": 0.6751544269045985, "grad_norm": 0.929847348110175, "learning_rate": 2.5218498560045453e-06, "loss": 0.3131, "step": 19674 }, { "epoch": 0.6751887439945092, "grad_norm": 0.7857611682785028, "learning_rate": 2.52136719322198e-06, "loss": 0.3126, "step": 19675 }, { "epoch": 0.6752230610844201, "grad_norm": 0.7881826623759692, "learning_rate": 2.520884561060034e-06, "loss": 0.2879, "step": 19676 }, { "epoch": 0.6752573781743308, "grad_norm": 0.8406423602746, "learning_rate": 2.5204019595246654e-06, "loss": 0.3247, "step": 19677 }, { "epoch": 0.6752916952642416, "grad_norm": 0.7914679858116234, "learning_rate": 2.5199193886218444e-06, "loss": 0.3212, "step": 19678 }, { "epoch": 0.6753260123541523, "grad_norm": 0.7400579710354558, "learning_rate": 2.5194368483575248e-06, "loss": 0.3268, "step": 19679 }, { "epoch": 0.6753603294440631, "grad_norm": 0.8938544168126807, "learning_rate": 2.5189543387376714e-06, "loss": 0.2797, "step": 19680 }, { "epoch": 0.675394646533974, "grad_norm": 0.7489042004417475, "learning_rate": 2.5184718597682447e-06, "loss": 0.2951, "step": 19681 }, { "epoch": 0.6754289636238847, "grad_norm": 0.7561302249819996, "learning_rate": 2.517989411455205e-06, "loss": 0.2859, "step": 19682 }, { "epoch": 0.6754632807137955, "grad_norm": 0.8598155969219662, "learning_rate": 2.5175069938045118e-06, "loss": 0.2754, "step": 19683 }, { "epoch": 0.6754975978037062, "grad_norm": 0.7926953495050583, "learning_rate": 2.517024606822126e-06, "loss": 0.3086, "step": 19684 }, { "epoch": 0.675531914893617, "grad_norm": 0.7259427949197331, "learning_rate": 2.516542250514006e-06, "loss": 0.2811, "step": 19685 }, { "epoch": 0.6755662319835278, "grad_norm": 0.7187686285621502, "learning_rate": 2.5160599248861106e-06, "loss": 0.2566, "step": 19686 }, { "epoch": 0.6756005490734386, "grad_norm": 0.7875073459989084, "learning_rate": 2.5155776299444017e-06, "loss": 0.2572, "step": 19687 }, { "epoch": 0.6756348661633493, "grad_norm": 0.7642272805689538, "learning_rate": 2.5150953656948307e-06, "loss": 0.2566, "step": 19688 }, { "epoch": 0.6756691832532601, "grad_norm": 0.8659754385339008, "learning_rate": 2.5146131321433627e-06, "loss": 0.2856, "step": 19689 }, { "epoch": 0.675703500343171, "grad_norm": 0.7967385596591047, "learning_rate": 2.51413092929595e-06, "loss": 0.3114, "step": 19690 }, { "epoch": 0.6757378174330817, "grad_norm": 0.7922580583966056, "learning_rate": 2.5136487571585494e-06, "loss": 0.263, "step": 19691 }, { "epoch": 0.6757721345229925, "grad_norm": 0.8352632828200138, "learning_rate": 2.5131666157371237e-06, "loss": 0.329, "step": 19692 }, { "epoch": 0.6758064516129032, "grad_norm": 0.9269085811693051, "learning_rate": 2.5126845050376224e-06, "loss": 0.2662, "step": 19693 }, { "epoch": 0.675840768702814, "grad_norm": 0.7831141611844762, "learning_rate": 2.5122024250660047e-06, "loss": 0.2625, "step": 19694 }, { "epoch": 0.6758750857927248, "grad_norm": 0.6973759107305612, "learning_rate": 2.511720375828226e-06, "loss": 0.2661, "step": 19695 }, { "epoch": 0.6759094028826356, "grad_norm": 0.780411972850868, "learning_rate": 2.5112383573302424e-06, "loss": 0.2889, "step": 19696 }, { "epoch": 0.6759437199725463, "grad_norm": 0.808158268623394, "learning_rate": 2.5107563695780036e-06, "loss": 0.3234, "step": 19697 }, { "epoch": 0.6759780370624571, "grad_norm": 0.7559029639783813, "learning_rate": 2.5102744125774696e-06, "loss": 0.2928, "step": 19698 }, { "epoch": 0.6760123541523679, "grad_norm": 0.8463309141400849, "learning_rate": 2.5097924863345947e-06, "loss": 0.2942, "step": 19699 }, { "epoch": 0.6760466712422787, "grad_norm": 0.6641198964719681, "learning_rate": 2.509310590855326e-06, "loss": 0.2498, "step": 19700 }, { "epoch": 0.6760809883321894, "grad_norm": 0.7045653602558074, "learning_rate": 2.508828726145625e-06, "loss": 0.2674, "step": 19701 }, { "epoch": 0.6761153054221002, "grad_norm": 0.7834591473763447, "learning_rate": 2.5083468922114385e-06, "loss": 0.2531, "step": 19702 }, { "epoch": 0.6761496225120109, "grad_norm": 0.7637251890742217, "learning_rate": 2.507865089058722e-06, "loss": 0.2255, "step": 19703 }, { "epoch": 0.6761839396019218, "grad_norm": 0.7418612672708549, "learning_rate": 2.507383316693426e-06, "loss": 0.2706, "step": 19704 }, { "epoch": 0.6762182566918326, "grad_norm": 0.6917859659397838, "learning_rate": 2.506901575121503e-06, "loss": 0.2505, "step": 19705 }, { "epoch": 0.6762525737817433, "grad_norm": 0.7657015788130505, "learning_rate": 2.506419864348905e-06, "loss": 0.2398, "step": 19706 }, { "epoch": 0.6762868908716541, "grad_norm": 0.767199543583704, "learning_rate": 2.505938184381582e-06, "loss": 0.2567, "step": 19707 }, { "epoch": 0.6763212079615648, "grad_norm": 0.7322986785072885, "learning_rate": 2.505456535225487e-06, "loss": 0.2317, "step": 19708 }, { "epoch": 0.6763555250514757, "grad_norm": 0.8378136782000365, "learning_rate": 2.5049749168865646e-06, "loss": 0.2723, "step": 19709 }, { "epoch": 0.6763898421413864, "grad_norm": 0.7499019609707849, "learning_rate": 2.5044933293707706e-06, "loss": 0.2522, "step": 19710 }, { "epoch": 0.6764241592312972, "grad_norm": 0.818477709629926, "learning_rate": 2.504011772684052e-06, "loss": 0.2904, "step": 19711 }, { "epoch": 0.6764584763212079, "grad_norm": 0.8650498403723923, "learning_rate": 2.5035302468323565e-06, "loss": 0.3235, "step": 19712 }, { "epoch": 0.6764927934111188, "grad_norm": 0.701539509785898, "learning_rate": 2.503048751821635e-06, "loss": 0.2319, "step": 19713 }, { "epoch": 0.6765271105010295, "grad_norm": 0.9013182743372403, "learning_rate": 2.5025672876578346e-06, "loss": 0.3062, "step": 19714 }, { "epoch": 0.6765614275909403, "grad_norm": 0.7862724257216112, "learning_rate": 2.502085854346904e-06, "loss": 0.2433, "step": 19715 }, { "epoch": 0.676595744680851, "grad_norm": 0.7570935578763759, "learning_rate": 2.50160445189479e-06, "loss": 0.2454, "step": 19716 }, { "epoch": 0.6766300617707618, "grad_norm": 0.7961879840852213, "learning_rate": 2.5011230803074403e-06, "loss": 0.2732, "step": 19717 }, { "epoch": 0.6766643788606727, "grad_norm": 0.8286743998478818, "learning_rate": 2.500641739590801e-06, "loss": 0.2695, "step": 19718 }, { "epoch": 0.6766986959505834, "grad_norm": 0.9013347030032016, "learning_rate": 2.50016042975082e-06, "loss": 0.2516, "step": 19719 }, { "epoch": 0.6767330130404942, "grad_norm": 0.7815129411889261, "learning_rate": 2.499679150793442e-06, "loss": 0.2824, "step": 19720 }, { "epoch": 0.6767673301304049, "grad_norm": 0.7941437323588187, "learning_rate": 2.499197902724613e-06, "loss": 0.2853, "step": 19721 }, { "epoch": 0.6768016472203158, "grad_norm": 0.7555047692951027, "learning_rate": 2.4987166855502798e-06, "loss": 0.284, "step": 19722 }, { "epoch": 0.6768359643102265, "grad_norm": 0.7946933581696485, "learning_rate": 2.4982354992763824e-06, "loss": 0.3289, "step": 19723 }, { "epoch": 0.6768702814001373, "grad_norm": 0.826030717461148, "learning_rate": 2.497754343908872e-06, "loss": 0.2868, "step": 19724 }, { "epoch": 0.676904598490048, "grad_norm": 0.8112609571496531, "learning_rate": 2.497273219453688e-06, "loss": 0.2907, "step": 19725 }, { "epoch": 0.6769389155799588, "grad_norm": 0.7093548893594787, "learning_rate": 2.4967921259167748e-06, "loss": 0.2724, "step": 19726 }, { "epoch": 0.6769732326698696, "grad_norm": 0.7714417696290707, "learning_rate": 2.496311063304077e-06, "loss": 0.2623, "step": 19727 }, { "epoch": 0.6770075497597804, "grad_norm": 0.7479075538232635, "learning_rate": 2.4958300316215368e-06, "loss": 0.2474, "step": 19728 }, { "epoch": 0.6770418668496911, "grad_norm": 0.7608304828252009, "learning_rate": 2.495349030875097e-06, "loss": 0.2756, "step": 19729 }, { "epoch": 0.6770761839396019, "grad_norm": 0.843093730507613, "learning_rate": 2.4948680610707e-06, "loss": 0.2894, "step": 19730 }, { "epoch": 0.6771105010295126, "grad_norm": 0.7566090324929235, "learning_rate": 2.494387122214289e-06, "loss": 0.2435, "step": 19731 }, { "epoch": 0.6771448181194235, "grad_norm": 0.7599319292994843, "learning_rate": 2.4939062143118e-06, "loss": 0.2855, "step": 19732 }, { "epoch": 0.6771791352093343, "grad_norm": 0.7981504686368146, "learning_rate": 2.493425337369182e-06, "loss": 0.2677, "step": 19733 }, { "epoch": 0.677213452299245, "grad_norm": 0.8356064298520332, "learning_rate": 2.4929444913923694e-06, "loss": 0.2842, "step": 19734 }, { "epoch": 0.6772477693891558, "grad_norm": 0.8432898430992551, "learning_rate": 2.4924636763873043e-06, "loss": 0.286, "step": 19735 }, { "epoch": 0.6772820864790666, "grad_norm": 0.8161826330648254, "learning_rate": 2.491982892359928e-06, "loss": 0.3261, "step": 19736 }, { "epoch": 0.6773164035689774, "grad_norm": 0.7102315933679845, "learning_rate": 2.491502139316178e-06, "loss": 0.2947, "step": 19737 }, { "epoch": 0.6773507206588881, "grad_norm": 0.7892416671304978, "learning_rate": 2.491021417261995e-06, "loss": 0.282, "step": 19738 }, { "epoch": 0.6773850377487989, "grad_norm": 0.7111597497699969, "learning_rate": 2.490540726203317e-06, "loss": 0.2021, "step": 19739 }, { "epoch": 0.6774193548387096, "grad_norm": 0.7648906235871288, "learning_rate": 2.490060066146085e-06, "loss": 0.3004, "step": 19740 }, { "epoch": 0.6774536719286205, "grad_norm": 0.7298151693071193, "learning_rate": 2.4895794370962303e-06, "loss": 0.2209, "step": 19741 }, { "epoch": 0.6774879890185312, "grad_norm": 0.803236277557398, "learning_rate": 2.4890988390596965e-06, "loss": 0.2577, "step": 19742 }, { "epoch": 0.677522306108442, "grad_norm": 0.7806659419500522, "learning_rate": 2.488618272042421e-06, "loss": 0.2544, "step": 19743 }, { "epoch": 0.6775566231983527, "grad_norm": 0.73241311243644, "learning_rate": 2.4881377360503354e-06, "loss": 0.2442, "step": 19744 }, { "epoch": 0.6775909402882636, "grad_norm": 0.7629381773490939, "learning_rate": 2.487657231089383e-06, "loss": 0.2364, "step": 19745 }, { "epoch": 0.6776252573781744, "grad_norm": 0.8383022229945525, "learning_rate": 2.4871767571654932e-06, "loss": 0.2796, "step": 19746 }, { "epoch": 0.6776595744680851, "grad_norm": 0.9066070061289836, "learning_rate": 2.486696314284608e-06, "loss": 0.2843, "step": 19747 }, { "epoch": 0.6776938915579959, "grad_norm": 0.8663582238486401, "learning_rate": 2.4862159024526577e-06, "loss": 0.2733, "step": 19748 }, { "epoch": 0.6777282086479066, "grad_norm": 0.971305929167258, "learning_rate": 2.4857355216755797e-06, "loss": 0.3009, "step": 19749 }, { "epoch": 0.6777625257378175, "grad_norm": 0.8523934948866195, "learning_rate": 2.485255171959308e-06, "loss": 0.2843, "step": 19750 }, { "epoch": 0.6777968428277282, "grad_norm": 0.7914989196526397, "learning_rate": 2.484774853309776e-06, "loss": 0.2631, "step": 19751 }, { "epoch": 0.677831159917639, "grad_norm": 0.8677114487468169, "learning_rate": 2.4842945657329187e-06, "loss": 0.2505, "step": 19752 }, { "epoch": 0.6778654770075497, "grad_norm": 0.8220591843322405, "learning_rate": 2.4838143092346687e-06, "loss": 0.2952, "step": 19753 }, { "epoch": 0.6778997940974605, "grad_norm": 0.8340551679729565, "learning_rate": 2.4833340838209618e-06, "loss": 0.2637, "step": 19754 }, { "epoch": 0.6779341111873713, "grad_norm": 0.7240949518863564, "learning_rate": 2.482853889497724e-06, "loss": 0.2486, "step": 19755 }, { "epoch": 0.6779684282772821, "grad_norm": 0.8167278566465118, "learning_rate": 2.482373726270895e-06, "loss": 0.2561, "step": 19756 }, { "epoch": 0.6780027453671928, "grad_norm": 0.7704982144544418, "learning_rate": 2.481893594146402e-06, "loss": 0.2697, "step": 19757 }, { "epoch": 0.6780370624571036, "grad_norm": 0.7899693441066972, "learning_rate": 2.481413493130176e-06, "loss": 0.272, "step": 19758 }, { "epoch": 0.6780713795470145, "grad_norm": 0.7322313396385759, "learning_rate": 2.4809334232281535e-06, "loss": 0.2932, "step": 19759 }, { "epoch": 0.6781056966369252, "grad_norm": 0.778660644117231, "learning_rate": 2.48045338444626e-06, "loss": 0.3294, "step": 19760 }, { "epoch": 0.678140013726836, "grad_norm": 0.8249859449111646, "learning_rate": 2.479973376790427e-06, "loss": 0.3307, "step": 19761 }, { "epoch": 0.6781743308167467, "grad_norm": 0.7277762990392811, "learning_rate": 2.4794934002665853e-06, "loss": 0.2753, "step": 19762 }, { "epoch": 0.6782086479066575, "grad_norm": 0.9318030472523459, "learning_rate": 2.4790134548806638e-06, "loss": 0.3136, "step": 19763 }, { "epoch": 0.6782429649965683, "grad_norm": 0.7219647137409467, "learning_rate": 2.478533540638592e-06, "loss": 0.3091, "step": 19764 }, { "epoch": 0.6782772820864791, "grad_norm": 0.8305951645195995, "learning_rate": 2.478053657546298e-06, "loss": 0.2548, "step": 19765 }, { "epoch": 0.6783115991763898, "grad_norm": 0.7547041457227027, "learning_rate": 2.4775738056097133e-06, "loss": 0.3096, "step": 19766 }, { "epoch": 0.6783459162663006, "grad_norm": 0.7746891745965727, "learning_rate": 2.477093984834759e-06, "loss": 0.2707, "step": 19767 }, { "epoch": 0.6783802333562114, "grad_norm": 0.7206726846022634, "learning_rate": 2.4766141952273717e-06, "loss": 0.2899, "step": 19768 }, { "epoch": 0.6784145504461222, "grad_norm": 0.7161278339207134, "learning_rate": 2.4761344367934715e-06, "loss": 0.217, "step": 19769 }, { "epoch": 0.678448867536033, "grad_norm": 0.7839091542533259, "learning_rate": 2.4756547095389876e-06, "loss": 0.2955, "step": 19770 }, { "epoch": 0.6784831846259437, "grad_norm": 0.7149012607210256, "learning_rate": 2.475175013469847e-06, "loss": 0.2452, "step": 19771 }, { "epoch": 0.6785175017158545, "grad_norm": 0.9261690146594797, "learning_rate": 2.4746953485919755e-06, "loss": 0.2563, "step": 19772 }, { "epoch": 0.6785518188057653, "grad_norm": 0.8034950043974682, "learning_rate": 2.474215714911299e-06, "loss": 0.2841, "step": 19773 }, { "epoch": 0.6785861358956761, "grad_norm": 0.7283263135309909, "learning_rate": 2.473736112433742e-06, "loss": 0.2475, "step": 19774 }, { "epoch": 0.6786204529855868, "grad_norm": 0.8255825061790896, "learning_rate": 2.4732565411652327e-06, "loss": 0.2915, "step": 19775 }, { "epoch": 0.6786547700754976, "grad_norm": 0.7777362675111212, "learning_rate": 2.4727770011116887e-06, "loss": 0.2701, "step": 19776 }, { "epoch": 0.6786890871654083, "grad_norm": 0.8044825845340395, "learning_rate": 2.4722974922790417e-06, "loss": 0.2666, "step": 19777 }, { "epoch": 0.6787234042553192, "grad_norm": 0.8682025176532199, "learning_rate": 2.471818014673209e-06, "loss": 0.2852, "step": 19778 }, { "epoch": 0.6787577213452299, "grad_norm": 0.7590774251769912, "learning_rate": 2.471338568300121e-06, "loss": 0.257, "step": 19779 }, { "epoch": 0.6787920384351407, "grad_norm": 0.8459758398755619, "learning_rate": 2.4708591531656943e-06, "loss": 0.2682, "step": 19780 }, { "epoch": 0.6788263555250514, "grad_norm": 1.022540170243092, "learning_rate": 2.4703797692758522e-06, "loss": 0.24, "step": 19781 }, { "epoch": 0.6788606726149623, "grad_norm": 0.7436384553810317, "learning_rate": 2.4699004166365233e-06, "loss": 0.3048, "step": 19782 }, { "epoch": 0.678894989704873, "grad_norm": 0.7763330130152788, "learning_rate": 2.469421095253623e-06, "loss": 0.2679, "step": 19783 }, { "epoch": 0.6789293067947838, "grad_norm": 0.7690950766795651, "learning_rate": 2.4689418051330745e-06, "loss": 0.2993, "step": 19784 }, { "epoch": 0.6789636238846946, "grad_norm": 0.7022059001861123, "learning_rate": 2.4684625462807987e-06, "loss": 0.266, "step": 19785 }, { "epoch": 0.6789979409746053, "grad_norm": 0.7660416403956625, "learning_rate": 2.467983318702717e-06, "loss": 0.2952, "step": 19786 }, { "epoch": 0.6790322580645162, "grad_norm": 0.8392171363580728, "learning_rate": 2.467504122404749e-06, "loss": 0.2509, "step": 19787 }, { "epoch": 0.6790665751544269, "grad_norm": 0.7729498630001518, "learning_rate": 2.467024957392815e-06, "loss": 0.2767, "step": 19788 }, { "epoch": 0.6791008922443377, "grad_norm": 0.7224412867817913, "learning_rate": 2.4665458236728367e-06, "loss": 0.3251, "step": 19789 }, { "epoch": 0.6791352093342484, "grad_norm": 0.7158378460888783, "learning_rate": 2.466066721250727e-06, "loss": 0.2628, "step": 19790 }, { "epoch": 0.6791695264241593, "grad_norm": 0.7672306806432553, "learning_rate": 2.4655876501324133e-06, "loss": 0.2543, "step": 19791 }, { "epoch": 0.67920384351407, "grad_norm": 0.7914205166785911, "learning_rate": 2.4651086103238063e-06, "loss": 0.2944, "step": 19792 }, { "epoch": 0.6792381606039808, "grad_norm": 0.8173730847922107, "learning_rate": 2.464629601830828e-06, "loss": 0.2946, "step": 19793 }, { "epoch": 0.6792724776938915, "grad_norm": 0.6836730219123366, "learning_rate": 2.4641506246593945e-06, "loss": 0.206, "step": 19794 }, { "epoch": 0.6793067947838023, "grad_norm": 0.7635305160461588, "learning_rate": 2.463671678815423e-06, "loss": 0.2922, "step": 19795 }, { "epoch": 0.6793411118737132, "grad_norm": 0.8070462578833768, "learning_rate": 2.4631927643048315e-06, "loss": 0.2567, "step": 19796 }, { "epoch": 0.6793754289636239, "grad_norm": 0.9244107567605951, "learning_rate": 2.462713881133536e-06, "loss": 0.2486, "step": 19797 }, { "epoch": 0.6794097460535347, "grad_norm": 0.858121229759485, "learning_rate": 2.4622350293074537e-06, "loss": 0.2609, "step": 19798 }, { "epoch": 0.6794440631434454, "grad_norm": 0.8320832164924451, "learning_rate": 2.461756208832495e-06, "loss": 0.2544, "step": 19799 }, { "epoch": 0.6794783802333562, "grad_norm": 0.8475420772260442, "learning_rate": 2.4612774197145832e-06, "loss": 0.3176, "step": 19800 }, { "epoch": 0.679512697323267, "grad_norm": 1.1724624720062051, "learning_rate": 2.460798661959627e-06, "loss": 0.2942, "step": 19801 }, { "epoch": 0.6795470144131778, "grad_norm": 0.7605497101889374, "learning_rate": 2.460319935573541e-06, "loss": 0.3065, "step": 19802 }, { "epoch": 0.6795813315030885, "grad_norm": 0.7428294520615573, "learning_rate": 2.459841240562245e-06, "loss": 0.2388, "step": 19803 }, { "epoch": 0.6796156485929993, "grad_norm": 0.6762275829565869, "learning_rate": 2.459362576931647e-06, "loss": 0.2525, "step": 19804 }, { "epoch": 0.6796499656829101, "grad_norm": 0.8503263726667097, "learning_rate": 2.458883944687662e-06, "loss": 0.2644, "step": 19805 }, { "epoch": 0.6796842827728209, "grad_norm": 0.817409958088766, "learning_rate": 2.4584053438362037e-06, "loss": 0.2939, "step": 19806 }, { "epoch": 0.6797185998627316, "grad_norm": 0.8874305376475368, "learning_rate": 2.457926774383186e-06, "loss": 0.2534, "step": 19807 }, { "epoch": 0.6797529169526424, "grad_norm": 0.7768247206775523, "learning_rate": 2.4574482363345154e-06, "loss": 0.2816, "step": 19808 }, { "epoch": 0.6797872340425531, "grad_norm": 0.7670670274042286, "learning_rate": 2.456969729696109e-06, "loss": 0.2833, "step": 19809 }, { "epoch": 0.679821551132464, "grad_norm": 0.7812852274183978, "learning_rate": 2.456491254473877e-06, "loss": 0.2209, "step": 19810 }, { "epoch": 0.6798558682223748, "grad_norm": 0.7621760312538497, "learning_rate": 2.4560128106737304e-06, "loss": 0.3326, "step": 19811 }, { "epoch": 0.6798901853122855, "grad_norm": 0.7198286127480917, "learning_rate": 2.455534398301581e-06, "loss": 0.2889, "step": 19812 }, { "epoch": 0.6799245024021963, "grad_norm": 0.723073746965857, "learning_rate": 2.4550560173633344e-06, "loss": 0.2893, "step": 19813 }, { "epoch": 0.679958819492107, "grad_norm": 0.7423089362815287, "learning_rate": 2.4545776678649064e-06, "loss": 0.2966, "step": 19814 }, { "epoch": 0.6799931365820179, "grad_norm": 0.7057553500157879, "learning_rate": 2.4540993498122025e-06, "loss": 0.2661, "step": 19815 }, { "epoch": 0.6800274536719286, "grad_norm": 0.7253966914264803, "learning_rate": 2.4536210632111322e-06, "loss": 0.2744, "step": 19816 }, { "epoch": 0.6800617707618394, "grad_norm": 0.8262542118752041, "learning_rate": 2.4531428080676056e-06, "loss": 0.2857, "step": 19817 }, { "epoch": 0.6800960878517501, "grad_norm": 0.8285328733541603, "learning_rate": 2.4526645843875295e-06, "loss": 0.2756, "step": 19818 }, { "epoch": 0.680130404941661, "grad_norm": 0.7802725303716534, "learning_rate": 2.4521863921768137e-06, "loss": 0.2549, "step": 19819 }, { "epoch": 0.6801647220315717, "grad_norm": 0.8030399301948878, "learning_rate": 2.4517082314413635e-06, "loss": 0.2841, "step": 19820 }, { "epoch": 0.6801990391214825, "grad_norm": 0.848696733610172, "learning_rate": 2.4512301021870893e-06, "loss": 0.3113, "step": 19821 }, { "epoch": 0.6802333562113932, "grad_norm": 0.7235862573591114, "learning_rate": 2.450752004419892e-06, "loss": 0.2363, "step": 19822 }, { "epoch": 0.680267673301304, "grad_norm": 0.7511980340330884, "learning_rate": 2.450273938145685e-06, "loss": 0.2765, "step": 19823 }, { "epoch": 0.6803019903912149, "grad_norm": 0.6910276326973018, "learning_rate": 2.4497959033703693e-06, "loss": 0.28, "step": 19824 }, { "epoch": 0.6803363074811256, "grad_norm": 0.7962725901353271, "learning_rate": 2.44931790009985e-06, "loss": 0.2821, "step": 19825 }, { "epoch": 0.6803706245710364, "grad_norm": 0.8406322383992298, "learning_rate": 2.4488399283400377e-06, "loss": 0.2924, "step": 19826 }, { "epoch": 0.6804049416609471, "grad_norm": 0.6507729350349077, "learning_rate": 2.4483619880968323e-06, "loss": 0.2457, "step": 19827 }, { "epoch": 0.680439258750858, "grad_norm": 0.8002260119152708, "learning_rate": 2.4478840793761392e-06, "loss": 0.2502, "step": 19828 }, { "epoch": 0.6804735758407687, "grad_norm": 0.9949358417024916, "learning_rate": 2.447406202183863e-06, "loss": 0.2799, "step": 19829 }, { "epoch": 0.6805078929306795, "grad_norm": 0.8217446625634937, "learning_rate": 2.4469283565259073e-06, "loss": 0.2354, "step": 19830 }, { "epoch": 0.6805422100205902, "grad_norm": 0.6781619087861732, "learning_rate": 2.4464505424081747e-06, "loss": 0.2435, "step": 19831 }, { "epoch": 0.680576527110501, "grad_norm": 0.8069836658946543, "learning_rate": 2.445972759836569e-06, "loss": 0.3461, "step": 19832 }, { "epoch": 0.6806108442004118, "grad_norm": 0.7657232878192044, "learning_rate": 2.4454950088169936e-06, "loss": 0.2577, "step": 19833 }, { "epoch": 0.6806451612903226, "grad_norm": 1.1445914982707466, "learning_rate": 2.445017289355346e-06, "loss": 0.281, "step": 19834 }, { "epoch": 0.6806794783802333, "grad_norm": 0.7477522674683987, "learning_rate": 2.444539601457534e-06, "loss": 0.2101, "step": 19835 }, { "epoch": 0.6807137954701441, "grad_norm": 0.7474655491112141, "learning_rate": 2.444061945129454e-06, "loss": 0.28, "step": 19836 }, { "epoch": 0.6807481125600549, "grad_norm": 0.802961460195613, "learning_rate": 2.4435843203770094e-06, "loss": 0.2722, "step": 19837 }, { "epoch": 0.6807824296499657, "grad_norm": 0.7853594892045828, "learning_rate": 2.443106727206099e-06, "loss": 0.2468, "step": 19838 }, { "epoch": 0.6808167467398765, "grad_norm": 0.8123664577702023, "learning_rate": 2.4426291656226246e-06, "loss": 0.2786, "step": 19839 }, { "epoch": 0.6808510638297872, "grad_norm": 0.721768727058369, "learning_rate": 2.4421516356324855e-06, "loss": 0.2684, "step": 19840 }, { "epoch": 0.680885380919698, "grad_norm": 0.7591746123233688, "learning_rate": 2.4416741372415805e-06, "loss": 0.2874, "step": 19841 }, { "epoch": 0.6809196980096088, "grad_norm": 0.7938923828880717, "learning_rate": 2.4411966704558088e-06, "loss": 0.3035, "step": 19842 }, { "epoch": 0.6809540150995196, "grad_norm": 0.7295970358643066, "learning_rate": 2.4407192352810686e-06, "loss": 0.2471, "step": 19843 }, { "epoch": 0.6809883321894303, "grad_norm": 0.7712835662072742, "learning_rate": 2.440241831723261e-06, "loss": 0.298, "step": 19844 }, { "epoch": 0.6810226492793411, "grad_norm": 0.7263805156011085, "learning_rate": 2.439764459788277e-06, "loss": 0.2577, "step": 19845 }, { "epoch": 0.6810569663692518, "grad_norm": 0.7528052308743245, "learning_rate": 2.4392871194820224e-06, "loss": 0.2413, "step": 19846 }, { "epoch": 0.6810912834591627, "grad_norm": 0.7755755243094097, "learning_rate": 2.4388098108103874e-06, "loss": 0.2735, "step": 19847 }, { "epoch": 0.6811256005490735, "grad_norm": 0.6799897937020023, "learning_rate": 2.4383325337792692e-06, "loss": 0.2509, "step": 19848 }, { "epoch": 0.6811599176389842, "grad_norm": 0.7802162213206698, "learning_rate": 2.43785528839457e-06, "loss": 0.2406, "step": 19849 }, { "epoch": 0.681194234728895, "grad_norm": 0.7550906423243177, "learning_rate": 2.43737807466218e-06, "loss": 0.2425, "step": 19850 }, { "epoch": 0.6812285518188058, "grad_norm": 0.7189317935940553, "learning_rate": 2.4369008925879956e-06, "loss": 0.2782, "step": 19851 }, { "epoch": 0.6812628689087166, "grad_norm": 0.7813594579936637, "learning_rate": 2.4364237421779125e-06, "loss": 0.3217, "step": 19852 }, { "epoch": 0.6812971859986273, "grad_norm": 0.7832868684769894, "learning_rate": 2.4359466234378255e-06, "loss": 0.2436, "step": 19853 }, { "epoch": 0.6813315030885381, "grad_norm": 0.8144696820157544, "learning_rate": 2.435469536373628e-06, "loss": 0.2369, "step": 19854 }, { "epoch": 0.6813658201784488, "grad_norm": 0.7944655938127525, "learning_rate": 2.434992480991215e-06, "loss": 0.276, "step": 19855 }, { "epoch": 0.6814001372683597, "grad_norm": 0.8088257039185672, "learning_rate": 2.4345154572964812e-06, "loss": 0.2491, "step": 19856 }, { "epoch": 0.6814344543582704, "grad_norm": 0.8301121141675373, "learning_rate": 2.4340384652953142e-06, "loss": 0.2349, "step": 19857 }, { "epoch": 0.6814687714481812, "grad_norm": 0.7477515577862992, "learning_rate": 2.4335615049936133e-06, "loss": 0.2264, "step": 19858 }, { "epoch": 0.6815030885380919, "grad_norm": 0.7417095597055341, "learning_rate": 2.4330845763972665e-06, "loss": 0.2413, "step": 19859 }, { "epoch": 0.6815374056280027, "grad_norm": 0.6830526871337781, "learning_rate": 2.432607679512167e-06, "loss": 0.2482, "step": 19860 }, { "epoch": 0.6815717227179136, "grad_norm": 0.7136773946185889, "learning_rate": 2.432130814344207e-06, "loss": 0.2235, "step": 19861 }, { "epoch": 0.6816060398078243, "grad_norm": 0.8451031552983357, "learning_rate": 2.431653980899276e-06, "loss": 0.2653, "step": 19862 }, { "epoch": 0.6816403568977351, "grad_norm": 0.8318131999114919, "learning_rate": 2.431177179183266e-06, "loss": 0.2795, "step": 19863 }, { "epoch": 0.6816746739876458, "grad_norm": 0.7721510426203608, "learning_rate": 2.430700409202067e-06, "loss": 0.2432, "step": 19864 }, { "epoch": 0.6817089910775567, "grad_norm": 0.7627446915459187, "learning_rate": 2.430223670961571e-06, "loss": 0.2719, "step": 19865 }, { "epoch": 0.6817433081674674, "grad_norm": 0.7719719648815422, "learning_rate": 2.429746964467661e-06, "loss": 0.2448, "step": 19866 }, { "epoch": 0.6817776252573782, "grad_norm": 0.7442198799604621, "learning_rate": 2.4292702897262353e-06, "loss": 0.2521, "step": 19867 }, { "epoch": 0.6818119423472889, "grad_norm": 0.8445946019371992, "learning_rate": 2.4287936467431757e-06, "loss": 0.3061, "step": 19868 }, { "epoch": 0.6818462594371997, "grad_norm": 0.8617923034170447, "learning_rate": 2.428317035524371e-06, "loss": 0.3031, "step": 19869 }, { "epoch": 0.6818805765271105, "grad_norm": 0.753018934576526, "learning_rate": 2.4278404560757147e-06, "loss": 0.2809, "step": 19870 }, { "epoch": 0.6819148936170213, "grad_norm": 0.7048510248927687, "learning_rate": 2.427363908403087e-06, "loss": 0.2785, "step": 19871 }, { "epoch": 0.681949210706932, "grad_norm": 0.6523973464176792, "learning_rate": 2.4268873925123825e-06, "loss": 0.2606, "step": 19872 }, { "epoch": 0.6819835277968428, "grad_norm": 0.7726775051529221, "learning_rate": 2.4264109084094828e-06, "loss": 0.2616, "step": 19873 }, { "epoch": 0.6820178448867537, "grad_norm": 0.8355678417743981, "learning_rate": 2.4259344561002756e-06, "loss": 0.2848, "step": 19874 }, { "epoch": 0.6820521619766644, "grad_norm": 0.7041052948019154, "learning_rate": 2.425458035590647e-06, "loss": 0.2439, "step": 19875 }, { "epoch": 0.6820864790665752, "grad_norm": 0.7571037419025691, "learning_rate": 2.424981646886483e-06, "loss": 0.3079, "step": 19876 }, { "epoch": 0.6821207961564859, "grad_norm": 0.9156985208475091, "learning_rate": 2.424505289993668e-06, "loss": 0.3372, "step": 19877 }, { "epoch": 0.6821551132463967, "grad_norm": 0.861397481220697, "learning_rate": 2.424028964918088e-06, "loss": 0.2263, "step": 19878 }, { "epoch": 0.6821894303363075, "grad_norm": 0.7355549728344367, "learning_rate": 2.423552671665629e-06, "loss": 0.2268, "step": 19879 }, { "epoch": 0.6822237474262183, "grad_norm": 0.6890743271871403, "learning_rate": 2.4230764102421683e-06, "loss": 0.2245, "step": 19880 }, { "epoch": 0.682258064516129, "grad_norm": 0.7498939345891094, "learning_rate": 2.4226001806535983e-06, "loss": 0.2884, "step": 19881 }, { "epoch": 0.6822923816060398, "grad_norm": 0.7750723141201368, "learning_rate": 2.4221239829057963e-06, "loss": 0.2912, "step": 19882 }, { "epoch": 0.6823266986959505, "grad_norm": 0.8683635790011792, "learning_rate": 2.421647817004647e-06, "loss": 0.2561, "step": 19883 }, { "epoch": 0.6823610157858614, "grad_norm": 0.7172433292975599, "learning_rate": 2.4211716829560327e-06, "loss": 0.2294, "step": 19884 }, { "epoch": 0.6823953328757721, "grad_norm": 0.8153305579615094, "learning_rate": 2.4206955807658355e-06, "loss": 0.2567, "step": 19885 }, { "epoch": 0.6824296499656829, "grad_norm": 0.8516548499053769, "learning_rate": 2.4202195104399374e-06, "loss": 0.2909, "step": 19886 }, { "epoch": 0.6824639670555936, "grad_norm": 0.9237685615639022, "learning_rate": 2.4197434719842194e-06, "loss": 0.2979, "step": 19887 }, { "epoch": 0.6824982841455045, "grad_norm": 1.0190113568445576, "learning_rate": 2.419267465404565e-06, "loss": 0.2907, "step": 19888 }, { "epoch": 0.6825326012354153, "grad_norm": 0.788407054797974, "learning_rate": 2.4187914907068476e-06, "loss": 0.3199, "step": 19889 }, { "epoch": 0.682566918325326, "grad_norm": 0.8418929748152841, "learning_rate": 2.418315547896956e-06, "loss": 0.3172, "step": 19890 }, { "epoch": 0.6826012354152368, "grad_norm": 0.7881230391268184, "learning_rate": 2.4178396369807644e-06, "loss": 0.2406, "step": 19891 }, { "epoch": 0.6826355525051475, "grad_norm": 0.8471237028681938, "learning_rate": 2.4173637579641514e-06, "loss": 0.2241, "step": 19892 }, { "epoch": 0.6826698695950584, "grad_norm": 0.8940133636548055, "learning_rate": 2.416887910853002e-06, "loss": 0.3126, "step": 19893 }, { "epoch": 0.6827041866849691, "grad_norm": 0.7795623793701031, "learning_rate": 2.4164120956531884e-06, "loss": 0.2838, "step": 19894 }, { "epoch": 0.6827385037748799, "grad_norm": 0.7267125138813278, "learning_rate": 2.4159363123705913e-06, "loss": 0.2383, "step": 19895 }, { "epoch": 0.6827728208647906, "grad_norm": 0.684473803351045, "learning_rate": 2.415460561011089e-06, "loss": 0.2327, "step": 19896 }, { "epoch": 0.6828071379547015, "grad_norm": 0.7798108137993431, "learning_rate": 2.4149848415805578e-06, "loss": 0.2393, "step": 19897 }, { "epoch": 0.6828414550446122, "grad_norm": 0.772346941562135, "learning_rate": 2.4145091540848747e-06, "loss": 0.322, "step": 19898 }, { "epoch": 0.682875772134523, "grad_norm": 0.7699515516990705, "learning_rate": 2.4140334985299172e-06, "loss": 0.2592, "step": 19899 }, { "epoch": 0.6829100892244337, "grad_norm": 0.7917527633698904, "learning_rate": 2.413557874921563e-06, "loss": 0.3016, "step": 19900 }, { "epoch": 0.6829444063143445, "grad_norm": 0.8132912540237431, "learning_rate": 2.413082283265681e-06, "loss": 0.3018, "step": 19901 }, { "epoch": 0.6829787234042554, "grad_norm": 0.7689887345034273, "learning_rate": 2.4126067235681553e-06, "loss": 0.2942, "step": 19902 }, { "epoch": 0.6830130404941661, "grad_norm": 0.8150742905986205, "learning_rate": 2.412131195834854e-06, "loss": 0.2789, "step": 19903 }, { "epoch": 0.6830473575840769, "grad_norm": 0.8159157916153625, "learning_rate": 2.4116557000716577e-06, "loss": 0.2631, "step": 19904 }, { "epoch": 0.6830816746739876, "grad_norm": 0.8636540836482988, "learning_rate": 2.4111802362844355e-06, "loss": 0.2998, "step": 19905 }, { "epoch": 0.6831159917638984, "grad_norm": 0.7229697658256811, "learning_rate": 2.4107048044790637e-06, "loss": 0.2733, "step": 19906 }, { "epoch": 0.6831503088538092, "grad_norm": 0.722127923105183, "learning_rate": 2.410229404661415e-06, "loss": 0.2715, "step": 19907 }, { "epoch": 0.68318462594372, "grad_norm": 0.7735418782317596, "learning_rate": 2.4097540368373627e-06, "loss": 0.2586, "step": 19908 }, { "epoch": 0.6832189430336307, "grad_norm": 0.9058392536129098, "learning_rate": 2.4092787010127793e-06, "loss": 0.2762, "step": 19909 }, { "epoch": 0.6832532601235415, "grad_norm": 0.790632228775643, "learning_rate": 2.408803397193537e-06, "loss": 0.2764, "step": 19910 }, { "epoch": 0.6832875772134523, "grad_norm": 0.7615583429652047, "learning_rate": 2.4083281253855094e-06, "loss": 0.2793, "step": 19911 }, { "epoch": 0.6833218943033631, "grad_norm": 0.7992568931838271, "learning_rate": 2.407852885594563e-06, "loss": 0.2821, "step": 19912 }, { "epoch": 0.6833562113932738, "grad_norm": 0.7766759996389417, "learning_rate": 2.4073776778265735e-06, "loss": 0.327, "step": 19913 }, { "epoch": 0.6833905284831846, "grad_norm": 0.8088751626614852, "learning_rate": 2.406902502087412e-06, "loss": 0.2731, "step": 19914 }, { "epoch": 0.6834248455730954, "grad_norm": 0.778556164320592, "learning_rate": 2.406427358382943e-06, "loss": 0.264, "step": 19915 }, { "epoch": 0.6834591626630062, "grad_norm": 0.7729236537868255, "learning_rate": 2.405952246719044e-06, "loss": 0.2542, "step": 19916 }, { "epoch": 0.683493479752917, "grad_norm": 0.6643956026826061, "learning_rate": 2.4054771671015787e-06, "loss": 0.2529, "step": 19917 }, { "epoch": 0.6835277968428277, "grad_norm": 0.6880397583942767, "learning_rate": 2.4050021195364184e-06, "loss": 0.2625, "step": 19918 }, { "epoch": 0.6835621139327385, "grad_norm": 0.7745070022411784, "learning_rate": 2.404527104029431e-06, "loss": 0.29, "step": 19919 }, { "epoch": 0.6835964310226493, "grad_norm": 0.8262957127995512, "learning_rate": 2.404052120586485e-06, "loss": 0.264, "step": 19920 }, { "epoch": 0.6836307481125601, "grad_norm": 0.8189630305584061, "learning_rate": 2.403577169213449e-06, "loss": 0.2321, "step": 19921 }, { "epoch": 0.6836650652024708, "grad_norm": 0.8538954656784622, "learning_rate": 2.4031022499161897e-06, "loss": 0.267, "step": 19922 }, { "epoch": 0.6836993822923816, "grad_norm": 0.7671417809805195, "learning_rate": 2.4026273627005767e-06, "loss": 0.2854, "step": 19923 }, { "epoch": 0.6837336993822923, "grad_norm": 0.836956898318347, "learning_rate": 2.4021525075724704e-06, "loss": 0.2718, "step": 19924 }, { "epoch": 0.6837680164722032, "grad_norm": 0.7516652249662079, "learning_rate": 2.401677684537745e-06, "loss": 0.2844, "step": 19925 }, { "epoch": 0.683802333562114, "grad_norm": 0.7022525957206274, "learning_rate": 2.40120289360226e-06, "loss": 0.2536, "step": 19926 }, { "epoch": 0.6838366506520247, "grad_norm": 0.7556105162882736, "learning_rate": 2.400728134771884e-06, "loss": 0.2572, "step": 19927 }, { "epoch": 0.6838709677419355, "grad_norm": 0.7681509576946781, "learning_rate": 2.400253408052481e-06, "loss": 0.2918, "step": 19928 }, { "epoch": 0.6839052848318462, "grad_norm": 0.734211639905699, "learning_rate": 2.399778713449916e-06, "loss": 0.2545, "step": 19929 }, { "epoch": 0.6839396019217571, "grad_norm": 0.8485142127273594, "learning_rate": 2.3993040509700536e-06, "loss": 0.2636, "step": 19930 }, { "epoch": 0.6839739190116678, "grad_norm": 0.7782691225086289, "learning_rate": 2.3988294206187578e-06, "loss": 0.3146, "step": 19931 }, { "epoch": 0.6840082361015786, "grad_norm": 0.7563104800224286, "learning_rate": 2.398354822401894e-06, "loss": 0.2595, "step": 19932 }, { "epoch": 0.6840425531914893, "grad_norm": 0.850830348198207, "learning_rate": 2.397880256325319e-06, "loss": 0.2731, "step": 19933 }, { "epoch": 0.6840768702814002, "grad_norm": 0.797206188980269, "learning_rate": 2.397405722394903e-06, "loss": 0.2621, "step": 19934 }, { "epoch": 0.6841111873713109, "grad_norm": 0.7659565868680893, "learning_rate": 2.396931220616502e-06, "loss": 0.2407, "step": 19935 }, { "epoch": 0.6841455044612217, "grad_norm": 0.7950762495783246, "learning_rate": 2.396456750995982e-06, "loss": 0.2594, "step": 19936 }, { "epoch": 0.6841798215511324, "grad_norm": 0.6984404627286962, "learning_rate": 2.3959823135392056e-06, "loss": 0.2737, "step": 19937 }, { "epoch": 0.6842141386410432, "grad_norm": 0.7575709890604095, "learning_rate": 2.3955079082520275e-06, "loss": 0.262, "step": 19938 }, { "epoch": 0.684248455730954, "grad_norm": 0.9269517670672827, "learning_rate": 2.3950335351403175e-06, "loss": 0.2665, "step": 19939 }, { "epoch": 0.6842827728208648, "grad_norm": 0.7762539431930353, "learning_rate": 2.394559194209928e-06, "loss": 0.2435, "step": 19940 }, { "epoch": 0.6843170899107756, "grad_norm": 0.7533918353573229, "learning_rate": 2.394084885466723e-06, "loss": 0.2403, "step": 19941 }, { "epoch": 0.6843514070006863, "grad_norm": 0.8134636441137026, "learning_rate": 2.3936106089165606e-06, "loss": 0.2729, "step": 19942 }, { "epoch": 0.6843857240905972, "grad_norm": 0.8428699437728653, "learning_rate": 2.3931363645653007e-06, "loss": 0.2692, "step": 19943 }, { "epoch": 0.6844200411805079, "grad_norm": 0.7891035931745769, "learning_rate": 2.392662152418801e-06, "loss": 0.29, "step": 19944 }, { "epoch": 0.6844543582704187, "grad_norm": 0.922677074595975, "learning_rate": 2.392187972482921e-06, "loss": 0.3077, "step": 19945 }, { "epoch": 0.6844886753603294, "grad_norm": 0.7543302659523502, "learning_rate": 2.3917138247635202e-06, "loss": 0.2876, "step": 19946 }, { "epoch": 0.6845229924502402, "grad_norm": 0.843171131674726, "learning_rate": 2.3912397092664503e-06, "loss": 0.231, "step": 19947 }, { "epoch": 0.684557309540151, "grad_norm": 0.8437748110337898, "learning_rate": 2.390765625997576e-06, "loss": 0.2883, "step": 19948 }, { "epoch": 0.6845916266300618, "grad_norm": 0.8229685495288361, "learning_rate": 2.390291574962748e-06, "loss": 0.2543, "step": 19949 }, { "epoch": 0.6846259437199725, "grad_norm": 0.806993205518873, "learning_rate": 2.3898175561678253e-06, "loss": 0.3092, "step": 19950 }, { "epoch": 0.6846602608098833, "grad_norm": 0.8419897067943705, "learning_rate": 2.3893435696186632e-06, "loss": 0.2648, "step": 19951 }, { "epoch": 0.684694577899794, "grad_norm": 0.6801457376015182, "learning_rate": 2.3888696153211184e-06, "loss": 0.2221, "step": 19952 }, { "epoch": 0.6847288949897049, "grad_norm": 0.9487839756650734, "learning_rate": 2.388395693281045e-06, "loss": 0.2917, "step": 19953 }, { "epoch": 0.6847632120796157, "grad_norm": 0.7742763339933523, "learning_rate": 2.387921803504297e-06, "loss": 0.3082, "step": 19954 }, { "epoch": 0.6847975291695264, "grad_norm": 0.7643409867556851, "learning_rate": 2.3874479459967324e-06, "loss": 0.2644, "step": 19955 }, { "epoch": 0.6848318462594372, "grad_norm": 0.7280688089644821, "learning_rate": 2.386974120764199e-06, "loss": 0.2389, "step": 19956 }, { "epoch": 0.684866163349348, "grad_norm": 0.7370919533784804, "learning_rate": 2.386500327812557e-06, "loss": 0.2853, "step": 19957 }, { "epoch": 0.6849004804392588, "grad_norm": 0.7687574586701211, "learning_rate": 2.3860265671476552e-06, "loss": 0.3108, "step": 19958 }, { "epoch": 0.6849347975291695, "grad_norm": 0.734278513195234, "learning_rate": 2.385552838775345e-06, "loss": 0.2628, "step": 19959 }, { "epoch": 0.6849691146190803, "grad_norm": 0.7238086093754992, "learning_rate": 2.3850791427014853e-06, "loss": 0.2807, "step": 19960 }, { "epoch": 0.685003431708991, "grad_norm": 0.7595884210108329, "learning_rate": 2.3846054789319223e-06, "loss": 0.2671, "step": 19961 }, { "epoch": 0.6850377487989019, "grad_norm": 0.840882894140842, "learning_rate": 2.3841318474725095e-06, "loss": 0.2737, "step": 19962 }, { "epoch": 0.6850720658888126, "grad_norm": 0.7655028631629647, "learning_rate": 2.3836582483290976e-06, "loss": 0.2426, "step": 19963 }, { "epoch": 0.6851063829787234, "grad_norm": 0.741725326541644, "learning_rate": 2.3831846815075376e-06, "loss": 0.2764, "step": 19964 }, { "epoch": 0.6851407000686341, "grad_norm": 0.8636305744603282, "learning_rate": 2.3827111470136804e-06, "loss": 0.3133, "step": 19965 }, { "epoch": 0.685175017158545, "grad_norm": 0.7354052402246282, "learning_rate": 2.382237644853375e-06, "loss": 0.3051, "step": 19966 }, { "epoch": 0.6852093342484558, "grad_norm": 0.8293229342523789, "learning_rate": 2.3817641750324722e-06, "loss": 0.2724, "step": 19967 }, { "epoch": 0.6852436513383665, "grad_norm": 0.7185976748008389, "learning_rate": 2.3812907375568197e-06, "loss": 0.2535, "step": 19968 }, { "epoch": 0.6852779684282773, "grad_norm": 0.7480818681745636, "learning_rate": 2.380817332432269e-06, "loss": 0.2433, "step": 19969 }, { "epoch": 0.685312285518188, "grad_norm": 1.0023138442849455, "learning_rate": 2.380343959664663e-06, "loss": 0.2725, "step": 19970 }, { "epoch": 0.6853466026080989, "grad_norm": 0.793305858291476, "learning_rate": 2.379870619259857e-06, "loss": 0.2484, "step": 19971 }, { "epoch": 0.6853809196980096, "grad_norm": 0.7948768991340299, "learning_rate": 2.379397311223693e-06, "loss": 0.2485, "step": 19972 }, { "epoch": 0.6854152367879204, "grad_norm": 0.7344186362825733, "learning_rate": 2.3789240355620193e-06, "loss": 0.2921, "step": 19973 }, { "epoch": 0.6854495538778311, "grad_norm": 0.8032075378012147, "learning_rate": 2.378450792280684e-06, "loss": 0.289, "step": 19974 }, { "epoch": 0.6854838709677419, "grad_norm": 0.7627122817737858, "learning_rate": 2.377977581385532e-06, "loss": 0.3349, "step": 19975 }, { "epoch": 0.6855181880576527, "grad_norm": 0.7719215193740648, "learning_rate": 2.3775044028824103e-06, "loss": 0.2888, "step": 19976 }, { "epoch": 0.6855525051475635, "grad_norm": 0.8479035870454807, "learning_rate": 2.3770312567771643e-06, "loss": 0.2652, "step": 19977 }, { "epoch": 0.6855868222374742, "grad_norm": 0.8048478688054732, "learning_rate": 2.3765581430756406e-06, "loss": 0.3103, "step": 19978 }, { "epoch": 0.685621139327385, "grad_norm": 0.8382024603825005, "learning_rate": 2.376085061783679e-06, "loss": 0.2827, "step": 19979 }, { "epoch": 0.6856554564172959, "grad_norm": 0.7825035184221476, "learning_rate": 2.375612012907129e-06, "loss": 0.2979, "step": 19980 }, { "epoch": 0.6856897735072066, "grad_norm": 0.788439250514901, "learning_rate": 2.3751389964518344e-06, "loss": 0.2798, "step": 19981 }, { "epoch": 0.6857240905971174, "grad_norm": 0.7446661152818314, "learning_rate": 2.3746660124236336e-06, "loss": 0.2469, "step": 19982 }, { "epoch": 0.6857584076870281, "grad_norm": 0.8616922360729593, "learning_rate": 2.3741930608283774e-06, "loss": 0.3081, "step": 19983 }, { "epoch": 0.6857927247769389, "grad_norm": 0.7562496605565728, "learning_rate": 2.3737201416719025e-06, "loss": 0.2941, "step": 19984 }, { "epoch": 0.6858270418668497, "grad_norm": 0.7392433093453289, "learning_rate": 2.373247254960053e-06, "loss": 0.2868, "step": 19985 }, { "epoch": 0.6858613589567605, "grad_norm": 0.7755440897904803, "learning_rate": 2.372774400698672e-06, "loss": 0.3238, "step": 19986 }, { "epoch": 0.6858956760466712, "grad_norm": 0.8034518095200115, "learning_rate": 2.372301578893599e-06, "loss": 0.2488, "step": 19987 }, { "epoch": 0.685929993136582, "grad_norm": 0.7637917510148912, "learning_rate": 2.3718287895506773e-06, "loss": 0.2371, "step": 19988 }, { "epoch": 0.6859643102264928, "grad_norm": 0.7006008253352117, "learning_rate": 2.371356032675747e-06, "loss": 0.2461, "step": 19989 }, { "epoch": 0.6859986273164036, "grad_norm": 0.7937384677832234, "learning_rate": 2.3708833082746496e-06, "loss": 0.3018, "step": 19990 }, { "epoch": 0.6860329444063143, "grad_norm": 0.7585439434512306, "learning_rate": 2.3704106163532197e-06, "loss": 0.2493, "step": 19991 }, { "epoch": 0.6860672614962251, "grad_norm": 0.6723209540215258, "learning_rate": 2.3699379569173053e-06, "loss": 0.268, "step": 19992 }, { "epoch": 0.6861015785861359, "grad_norm": 0.7047116776930274, "learning_rate": 2.369465329972739e-06, "loss": 0.2427, "step": 19993 }, { "epoch": 0.6861358956760467, "grad_norm": 0.7791065360058986, "learning_rate": 2.3689927355253613e-06, "loss": 0.2608, "step": 19994 }, { "epoch": 0.6861702127659575, "grad_norm": 0.7203805774849181, "learning_rate": 2.368520173581011e-06, "loss": 0.272, "step": 19995 }, { "epoch": 0.6862045298558682, "grad_norm": 0.9010594006864564, "learning_rate": 2.368047644145526e-06, "loss": 0.3067, "step": 19996 }, { "epoch": 0.686238846945779, "grad_norm": 0.8585287047717498, "learning_rate": 2.367575147224744e-06, "loss": 0.2821, "step": 19997 }, { "epoch": 0.6862731640356897, "grad_norm": 0.8048391451880129, "learning_rate": 2.3671026828245014e-06, "loss": 0.2349, "step": 19998 }, { "epoch": 0.6863074811256006, "grad_norm": 0.7997562408243226, "learning_rate": 2.3666302509506354e-06, "loss": 0.3024, "step": 19999 }, { "epoch": 0.6863417982155113, "grad_norm": 0.7887871010943797, "learning_rate": 2.3661578516089832e-06, "loss": 0.2876, "step": 20000 }, { "epoch": 0.6863761153054221, "grad_norm": 0.808424685654999, "learning_rate": 2.3656854848053817e-06, "loss": 0.2959, "step": 20001 }, { "epoch": 0.6864104323953328, "grad_norm": 0.7171743857430002, "learning_rate": 2.3652131505456606e-06, "loss": 0.2455, "step": 20002 }, { "epoch": 0.6864447494852437, "grad_norm": 0.7976725006434031, "learning_rate": 2.364740848835661e-06, "loss": 0.3048, "step": 20003 }, { "epoch": 0.6864790665751545, "grad_norm": 0.7626794030247145, "learning_rate": 2.3642685796812182e-06, "loss": 0.296, "step": 20004 }, { "epoch": 0.6865133836650652, "grad_norm": 0.8569375623596243, "learning_rate": 2.36379634308816e-06, "loss": 0.2971, "step": 20005 }, { "epoch": 0.686547700754976, "grad_norm": 0.7409183598616844, "learning_rate": 2.3633241390623284e-06, "loss": 0.2608, "step": 20006 }, { "epoch": 0.6865820178448867, "grad_norm": 0.6816734111407194, "learning_rate": 2.3628519676095505e-06, "loss": 0.2305, "step": 20007 }, { "epoch": 0.6866163349347976, "grad_norm": 0.8736654546712109, "learning_rate": 2.362379828735663e-06, "loss": 0.2742, "step": 20008 }, { "epoch": 0.6866506520247083, "grad_norm": 0.7701564342216468, "learning_rate": 2.361907722446497e-06, "loss": 0.276, "step": 20009 }, { "epoch": 0.6866849691146191, "grad_norm": 0.7559314312917337, "learning_rate": 2.3614356487478853e-06, "loss": 0.2277, "step": 20010 }, { "epoch": 0.6867192862045298, "grad_norm": 0.6471814268695109, "learning_rate": 2.36096360764566e-06, "loss": 0.2863, "step": 20011 }, { "epoch": 0.6867536032944407, "grad_norm": 0.8239799598594327, "learning_rate": 2.360491599145652e-06, "loss": 0.2862, "step": 20012 }, { "epoch": 0.6867879203843514, "grad_norm": 0.7031707993739453, "learning_rate": 2.3600196232536952e-06, "loss": 0.2062, "step": 20013 }, { "epoch": 0.6868222374742622, "grad_norm": 0.8593711211684287, "learning_rate": 2.3595476799756145e-06, "loss": 0.2638, "step": 20014 }, { "epoch": 0.6868565545641729, "grad_norm": 0.9839014556661599, "learning_rate": 2.359075769317248e-06, "loss": 0.3251, "step": 20015 }, { "epoch": 0.6868908716540837, "grad_norm": 0.8376727993528903, "learning_rate": 2.3586038912844184e-06, "loss": 0.3147, "step": 20016 }, { "epoch": 0.6869251887439946, "grad_norm": 0.7535282362903868, "learning_rate": 2.3581320458829586e-06, "loss": 0.2514, "step": 20017 }, { "epoch": 0.6869595058339053, "grad_norm": 0.7512542253364898, "learning_rate": 2.3576602331186972e-06, "loss": 0.2319, "step": 20018 }, { "epoch": 0.686993822923816, "grad_norm": 0.7111943866770423, "learning_rate": 2.357188452997463e-06, "loss": 0.2623, "step": 20019 }, { "epoch": 0.6870281400137268, "grad_norm": 0.710061206233298, "learning_rate": 2.356716705525084e-06, "loss": 0.2714, "step": 20020 }, { "epoch": 0.6870624571036376, "grad_norm": 0.827547708473018, "learning_rate": 2.3562449907073885e-06, "loss": 0.2449, "step": 20021 }, { "epoch": 0.6870967741935484, "grad_norm": 0.8644210092640691, "learning_rate": 2.355773308550206e-06, "loss": 0.3085, "step": 20022 }, { "epoch": 0.6871310912834592, "grad_norm": 0.7817140270297396, "learning_rate": 2.3553016590593574e-06, "loss": 0.2455, "step": 20023 }, { "epoch": 0.6871654083733699, "grad_norm": 0.6923618660317914, "learning_rate": 2.354830042240675e-06, "loss": 0.2725, "step": 20024 }, { "epoch": 0.6871997254632807, "grad_norm": 0.7059527626362903, "learning_rate": 2.3543584580999856e-06, "loss": 0.2512, "step": 20025 }, { "epoch": 0.6872340425531915, "grad_norm": 0.8176963729498808, "learning_rate": 2.353886906643109e-06, "loss": 0.2457, "step": 20026 }, { "epoch": 0.6872683596431023, "grad_norm": 0.7096616797030229, "learning_rate": 2.3534153878758793e-06, "loss": 0.2788, "step": 20027 }, { "epoch": 0.687302676733013, "grad_norm": 0.8253654698190429, "learning_rate": 2.3529439018041115e-06, "loss": 0.2921, "step": 20028 }, { "epoch": 0.6873369938229238, "grad_norm": 0.7238796364093791, "learning_rate": 2.352472448433641e-06, "loss": 0.2803, "step": 20029 }, { "epoch": 0.6873713109128345, "grad_norm": 0.8806162820490654, "learning_rate": 2.352001027770284e-06, "loss": 0.2438, "step": 20030 }, { "epoch": 0.6874056280027454, "grad_norm": 0.680134527933835, "learning_rate": 2.351529639819867e-06, "loss": 0.2379, "step": 20031 }, { "epoch": 0.6874399450926562, "grad_norm": 0.8043243903251945, "learning_rate": 2.3510582845882137e-06, "loss": 0.2676, "step": 20032 }, { "epoch": 0.6874742621825669, "grad_norm": 0.7827411147321576, "learning_rate": 2.350586962081146e-06, "loss": 0.2563, "step": 20033 }, { "epoch": 0.6875085792724777, "grad_norm": 0.7768261455034896, "learning_rate": 2.3501156723044887e-06, "loss": 0.2383, "step": 20034 }, { "epoch": 0.6875428963623885, "grad_norm": 0.7625318675036296, "learning_rate": 2.3496444152640623e-06, "loss": 0.2377, "step": 20035 }, { "epoch": 0.6875772134522993, "grad_norm": 0.9412125019301294, "learning_rate": 2.349173190965691e-06, "loss": 0.2489, "step": 20036 }, { "epoch": 0.68761153054221, "grad_norm": 0.7956132371804651, "learning_rate": 2.3487019994151904e-06, "loss": 0.265, "step": 20037 }, { "epoch": 0.6876458476321208, "grad_norm": 0.7532506673439838, "learning_rate": 2.3482308406183894e-06, "loss": 0.2582, "step": 20038 }, { "epoch": 0.6876801647220315, "grad_norm": 0.7781110024942381, "learning_rate": 2.347759714581103e-06, "loss": 0.2945, "step": 20039 }, { "epoch": 0.6877144818119424, "grad_norm": 0.7724368992897642, "learning_rate": 2.3472886213091536e-06, "loss": 0.2635, "step": 20040 }, { "epoch": 0.6877487989018531, "grad_norm": 0.7117505918554489, "learning_rate": 2.34681756080836e-06, "loss": 0.2365, "step": 20041 }, { "epoch": 0.6877831159917639, "grad_norm": 0.8150910807203929, "learning_rate": 2.346346533084542e-06, "loss": 0.2657, "step": 20042 }, { "epoch": 0.6878174330816746, "grad_norm": 0.7572306407336085, "learning_rate": 2.345875538143518e-06, "loss": 0.283, "step": 20043 }, { "epoch": 0.6878517501715854, "grad_norm": 0.8773979176498936, "learning_rate": 2.345404575991109e-06, "loss": 0.2306, "step": 20044 }, { "epoch": 0.6878860672614963, "grad_norm": 0.7877211810505366, "learning_rate": 2.344933646633132e-06, "loss": 0.2644, "step": 20045 }, { "epoch": 0.687920384351407, "grad_norm": 0.8261718619687592, "learning_rate": 2.3444627500754015e-06, "loss": 0.3364, "step": 20046 }, { "epoch": 0.6879547014413178, "grad_norm": 0.7445206676996513, "learning_rate": 2.343991886323739e-06, "loss": 0.2496, "step": 20047 }, { "epoch": 0.6879890185312285, "grad_norm": 0.8411745331724342, "learning_rate": 2.343521055383962e-06, "loss": 0.3069, "step": 20048 }, { "epoch": 0.6880233356211394, "grad_norm": 0.7732616644970685, "learning_rate": 2.3430502572618813e-06, "loss": 0.2714, "step": 20049 }, { "epoch": 0.6880576527110501, "grad_norm": 0.7755576081143878, "learning_rate": 2.342579491963321e-06, "loss": 0.2781, "step": 20050 }, { "epoch": 0.6880919698009609, "grad_norm": 0.9483148476623162, "learning_rate": 2.342108759494091e-06, "loss": 0.2929, "step": 20051 }, { "epoch": 0.6881262868908716, "grad_norm": 0.7545135438902567, "learning_rate": 2.3416380598600085e-06, "loss": 0.2789, "step": 20052 }, { "epoch": 0.6881606039807824, "grad_norm": 0.7798125814262991, "learning_rate": 2.3411673930668884e-06, "loss": 0.2715, "step": 20053 }, { "epoch": 0.6881949210706932, "grad_norm": 0.785379991220132, "learning_rate": 2.3406967591205447e-06, "loss": 0.2436, "step": 20054 }, { "epoch": 0.688229238160604, "grad_norm": 0.8883876234890395, "learning_rate": 2.3402261580267932e-06, "loss": 0.2443, "step": 20055 }, { "epoch": 0.6882635552505147, "grad_norm": 0.8150733808033803, "learning_rate": 2.3397555897914457e-06, "loss": 0.2792, "step": 20056 }, { "epoch": 0.6882978723404255, "grad_norm": 0.7349055316750414, "learning_rate": 2.3392850544203185e-06, "loss": 0.2929, "step": 20057 }, { "epoch": 0.6883321894303364, "grad_norm": 0.8324806589114642, "learning_rate": 2.3388145519192183e-06, "loss": 0.3179, "step": 20058 }, { "epoch": 0.6883665065202471, "grad_norm": 0.8294235506253476, "learning_rate": 2.338344082293966e-06, "loss": 0.2709, "step": 20059 }, { "epoch": 0.6884008236101579, "grad_norm": 0.7866307126740986, "learning_rate": 2.337873645550365e-06, "loss": 0.2223, "step": 20060 }, { "epoch": 0.6884351407000686, "grad_norm": 0.758726012361509, "learning_rate": 2.3374032416942355e-06, "loss": 0.2327, "step": 20061 }, { "epoch": 0.6884694577899794, "grad_norm": 0.7096579146789729, "learning_rate": 2.3369328707313825e-06, "loss": 0.2497, "step": 20062 }, { "epoch": 0.6885037748798902, "grad_norm": 0.8587055865751494, "learning_rate": 2.336462532667618e-06, "loss": 0.311, "step": 20063 }, { "epoch": 0.688538091969801, "grad_norm": 0.7245823087635329, "learning_rate": 2.335992227508756e-06, "loss": 0.3212, "step": 20064 }, { "epoch": 0.6885724090597117, "grad_norm": 0.732812692972873, "learning_rate": 2.335521955260603e-06, "loss": 0.2838, "step": 20065 }, { "epoch": 0.6886067261496225, "grad_norm": 0.7683182507581322, "learning_rate": 2.335051715928969e-06, "loss": 0.2584, "step": 20066 }, { "epoch": 0.6886410432395332, "grad_norm": 0.7671423790642543, "learning_rate": 2.334581509519665e-06, "loss": 0.2713, "step": 20067 }, { "epoch": 0.6886753603294441, "grad_norm": 0.7835119753221389, "learning_rate": 2.3341113360385005e-06, "loss": 0.2565, "step": 20068 }, { "epoch": 0.6887096774193548, "grad_norm": 0.7655909400606153, "learning_rate": 2.3336411954912787e-06, "loss": 0.283, "step": 20069 }, { "epoch": 0.6887439945092656, "grad_norm": 0.7146465186902217, "learning_rate": 2.3331710878838126e-06, "loss": 0.2649, "step": 20070 }, { "epoch": 0.6887783115991764, "grad_norm": 0.8279797012927615, "learning_rate": 2.3327010132219107e-06, "loss": 0.2666, "step": 20071 }, { "epoch": 0.6888126286890872, "grad_norm": 0.7593292068981985, "learning_rate": 2.3322309715113746e-06, "loss": 0.2522, "step": 20072 }, { "epoch": 0.688846945778998, "grad_norm": 0.7525985461319687, "learning_rate": 2.3317609627580173e-06, "loss": 0.2574, "step": 20073 }, { "epoch": 0.6888812628689087, "grad_norm": 0.7285289171179001, "learning_rate": 2.3312909869676413e-06, "loss": 0.298, "step": 20074 }, { "epoch": 0.6889155799588195, "grad_norm": 0.6838298272971073, "learning_rate": 2.3308210441460538e-06, "loss": 0.2732, "step": 20075 }, { "epoch": 0.6889498970487302, "grad_norm": 0.8118522032148904, "learning_rate": 2.33035113429906e-06, "loss": 0.2768, "step": 20076 }, { "epoch": 0.6889842141386411, "grad_norm": 0.7487719288415683, "learning_rate": 2.329881257432466e-06, "loss": 0.2229, "step": 20077 }, { "epoch": 0.6890185312285518, "grad_norm": 0.9435108397646341, "learning_rate": 2.329411413552075e-06, "loss": 0.2701, "step": 20078 }, { "epoch": 0.6890528483184626, "grad_norm": 0.8629923864976238, "learning_rate": 2.328941602663693e-06, "loss": 0.2756, "step": 20079 }, { "epoch": 0.6890871654083733, "grad_norm": 0.871849491208662, "learning_rate": 2.328471824773125e-06, "loss": 0.2356, "step": 20080 }, { "epoch": 0.6891214824982842, "grad_norm": 0.7855726414757372, "learning_rate": 2.3280020798861697e-06, "loss": 0.2676, "step": 20081 }, { "epoch": 0.689155799588195, "grad_norm": 0.7781287073895732, "learning_rate": 2.327532368008637e-06, "loss": 0.2975, "step": 20082 }, { "epoch": 0.6891901166781057, "grad_norm": 0.7408158813708897, "learning_rate": 2.327062689146324e-06, "loss": 0.2547, "step": 20083 }, { "epoch": 0.6892244337680165, "grad_norm": 0.9304962048748131, "learning_rate": 2.326593043305035e-06, "loss": 0.226, "step": 20084 }, { "epoch": 0.6892587508579272, "grad_norm": 0.8122242520212616, "learning_rate": 2.3261234304905724e-06, "loss": 0.3245, "step": 20085 }, { "epoch": 0.6892930679478381, "grad_norm": 0.7483769498432452, "learning_rate": 2.325653850708737e-06, "loss": 0.2889, "step": 20086 }, { "epoch": 0.6893273850377488, "grad_norm": 0.6998388335617856, "learning_rate": 2.3251843039653305e-06, "loss": 0.2809, "step": 20087 }, { "epoch": 0.6893617021276596, "grad_norm": 0.787871294184651, "learning_rate": 2.324714790266154e-06, "loss": 0.2448, "step": 20088 }, { "epoch": 0.6893960192175703, "grad_norm": 0.720491019704149, "learning_rate": 2.324245309617008e-06, "loss": 0.2702, "step": 20089 }, { "epoch": 0.6894303363074811, "grad_norm": 0.7411155977965183, "learning_rate": 2.323775862023688e-06, "loss": 0.2268, "step": 20090 }, { "epoch": 0.6894646533973919, "grad_norm": 0.8676144283546621, "learning_rate": 2.3233064474919996e-06, "loss": 0.2597, "step": 20091 }, { "epoch": 0.6894989704873027, "grad_norm": 0.7956324710375757, "learning_rate": 2.3228370660277386e-06, "loss": 0.2441, "step": 20092 }, { "epoch": 0.6895332875772134, "grad_norm": 0.6779813015879724, "learning_rate": 2.322367717636705e-06, "loss": 0.3021, "step": 20093 }, { "epoch": 0.6895676046671242, "grad_norm": 0.800775062084612, "learning_rate": 2.3218984023246983e-06, "loss": 0.2687, "step": 20094 }, { "epoch": 0.689601921757035, "grad_norm": 0.9104835526173203, "learning_rate": 2.32142912009751e-06, "loss": 0.2219, "step": 20095 }, { "epoch": 0.6896362388469458, "grad_norm": 0.8203979519467371, "learning_rate": 2.3209598709609464e-06, "loss": 0.2411, "step": 20096 }, { "epoch": 0.6896705559368566, "grad_norm": 0.777521265548712, "learning_rate": 2.320490654920798e-06, "loss": 0.2729, "step": 20097 }, { "epoch": 0.6897048730267673, "grad_norm": 0.9827966933264567, "learning_rate": 2.3200214719828634e-06, "loss": 0.2679, "step": 20098 }, { "epoch": 0.6897391901166781, "grad_norm": 0.7889430882586352, "learning_rate": 2.3195523221529393e-06, "loss": 0.2843, "step": 20099 }, { "epoch": 0.6897735072065889, "grad_norm": 0.8331886664779787, "learning_rate": 2.319083205436821e-06, "loss": 0.2795, "step": 20100 }, { "epoch": 0.6898078242964997, "grad_norm": 0.6226017143607502, "learning_rate": 2.318614121840304e-06, "loss": 0.2201, "step": 20101 }, { "epoch": 0.6898421413864104, "grad_norm": 0.805615017472114, "learning_rate": 2.318145071369184e-06, "loss": 0.2466, "step": 20102 }, { "epoch": 0.6898764584763212, "grad_norm": 0.8187885360877762, "learning_rate": 2.3176760540292557e-06, "loss": 0.2781, "step": 20103 }, { "epoch": 0.689910775566232, "grad_norm": 0.7710263975108717, "learning_rate": 2.3172070698263096e-06, "loss": 0.2516, "step": 20104 }, { "epoch": 0.6899450926561428, "grad_norm": 0.790082803126008, "learning_rate": 2.3167381187661457e-06, "loss": 0.3252, "step": 20105 }, { "epoch": 0.6899794097460535, "grad_norm": 0.6636323585805294, "learning_rate": 2.316269200854552e-06, "loss": 0.2449, "step": 20106 }, { "epoch": 0.6900137268359643, "grad_norm": 0.8624068284153661, "learning_rate": 2.3158003160973235e-06, "loss": 0.2965, "step": 20107 }, { "epoch": 0.690048043925875, "grad_norm": 0.6161820741039166, "learning_rate": 2.315331464500253e-06, "loss": 0.2426, "step": 20108 }, { "epoch": 0.6900823610157859, "grad_norm": 0.8194894002979493, "learning_rate": 2.3148626460691313e-06, "loss": 0.2868, "step": 20109 }, { "epoch": 0.6901166781056967, "grad_norm": 0.8483146886457874, "learning_rate": 2.3143938608097516e-06, "loss": 0.2453, "step": 20110 }, { "epoch": 0.6901509951956074, "grad_norm": 0.8579463867229955, "learning_rate": 2.3139251087279045e-06, "loss": 0.3028, "step": 20111 }, { "epoch": 0.6901853122855182, "grad_norm": 0.78135549631348, "learning_rate": 2.3134563898293834e-06, "loss": 0.2356, "step": 20112 }, { "epoch": 0.6902196293754289, "grad_norm": 0.7368078956126946, "learning_rate": 2.312987704119972e-06, "loss": 0.2711, "step": 20113 }, { "epoch": 0.6902539464653398, "grad_norm": 0.8201709161112962, "learning_rate": 2.312519051605467e-06, "loss": 0.2964, "step": 20114 }, { "epoch": 0.6902882635552505, "grad_norm": 0.7353891285546088, "learning_rate": 2.3120504322916576e-06, "loss": 0.2756, "step": 20115 }, { "epoch": 0.6903225806451613, "grad_norm": 0.8664977363806683, "learning_rate": 2.3115818461843275e-06, "loss": 0.2935, "step": 20116 }, { "epoch": 0.690356897735072, "grad_norm": 0.7983516378475012, "learning_rate": 2.3111132932892732e-06, "loss": 0.2715, "step": 20117 }, { "epoch": 0.6903912148249829, "grad_norm": 0.8582158666116319, "learning_rate": 2.3106447736122774e-06, "loss": 0.3054, "step": 20118 }, { "epoch": 0.6904255319148936, "grad_norm": 0.7688672383496723, "learning_rate": 2.3101762871591295e-06, "loss": 0.2118, "step": 20119 }, { "epoch": 0.6904598490048044, "grad_norm": 0.7738750777900884, "learning_rate": 2.309707833935618e-06, "loss": 0.2547, "step": 20120 }, { "epoch": 0.6904941660947151, "grad_norm": 0.7555690418710087, "learning_rate": 2.3092394139475296e-06, "loss": 0.2872, "step": 20121 }, { "epoch": 0.6905284831846259, "grad_norm": 0.695176149037122, "learning_rate": 2.30877102720065e-06, "loss": 0.2407, "step": 20122 }, { "epoch": 0.6905628002745368, "grad_norm": 0.8809139838251641, "learning_rate": 2.308302673700768e-06, "loss": 0.2945, "step": 20123 }, { "epoch": 0.6905971173644475, "grad_norm": 0.7501479572390342, "learning_rate": 2.307834353453668e-06, "loss": 0.2848, "step": 20124 }, { "epoch": 0.6906314344543583, "grad_norm": 0.7457493732611649, "learning_rate": 2.307366066465136e-06, "loss": 0.2736, "step": 20125 }, { "epoch": 0.690665751544269, "grad_norm": 0.6909365352184299, "learning_rate": 2.306897812740958e-06, "loss": 0.2399, "step": 20126 }, { "epoch": 0.6907000686341799, "grad_norm": 0.7616867556769066, "learning_rate": 2.3064295922869145e-06, "loss": 0.2439, "step": 20127 }, { "epoch": 0.6907343857240906, "grad_norm": 0.7251117994421009, "learning_rate": 2.305961405108797e-06, "loss": 0.2313, "step": 20128 }, { "epoch": 0.6907687028140014, "grad_norm": 0.816462047599742, "learning_rate": 2.305493251212383e-06, "loss": 0.2705, "step": 20129 }, { "epoch": 0.6908030199039121, "grad_norm": 0.7470796026438369, "learning_rate": 2.3050251306034566e-06, "loss": 0.2425, "step": 20130 }, { "epoch": 0.6908373369938229, "grad_norm": 0.9309606498824899, "learning_rate": 2.3045570432878066e-06, "loss": 0.3162, "step": 20131 }, { "epoch": 0.6908716540837337, "grad_norm": 0.8420282202380147, "learning_rate": 2.3040889892712096e-06, "loss": 0.3066, "step": 20132 }, { "epoch": 0.6909059711736445, "grad_norm": 1.0471560235606872, "learning_rate": 2.30362096855945e-06, "loss": 0.327, "step": 20133 }, { "epoch": 0.6909402882635552, "grad_norm": 0.7024677472611015, "learning_rate": 2.3031529811583103e-06, "loss": 0.2279, "step": 20134 }, { "epoch": 0.690974605353466, "grad_norm": 0.7887436895109924, "learning_rate": 2.3026850270735707e-06, "loss": 0.2639, "step": 20135 }, { "epoch": 0.6910089224433767, "grad_norm": 0.8437198212122665, "learning_rate": 2.3022171063110133e-06, "loss": 0.2784, "step": 20136 }, { "epoch": 0.6910432395332876, "grad_norm": 0.85849560241958, "learning_rate": 2.3017492188764177e-06, "loss": 0.2772, "step": 20137 }, { "epoch": 0.6910775566231984, "grad_norm": 0.8637526954241174, "learning_rate": 2.3012813647755667e-06, "loss": 0.2347, "step": 20138 }, { "epoch": 0.6911118737131091, "grad_norm": 0.7893292595698805, "learning_rate": 2.3008135440142345e-06, "loss": 0.266, "step": 20139 }, { "epoch": 0.6911461908030199, "grad_norm": 0.7401436191702133, "learning_rate": 2.300345756598208e-06, "loss": 0.2824, "step": 20140 }, { "epoch": 0.6911805078929307, "grad_norm": 0.7722246791833918, "learning_rate": 2.2998780025332596e-06, "loss": 0.2585, "step": 20141 }, { "epoch": 0.6912148249828415, "grad_norm": 0.8062124689891029, "learning_rate": 2.2994102818251705e-06, "loss": 0.2536, "step": 20142 }, { "epoch": 0.6912491420727522, "grad_norm": 1.0662859351121219, "learning_rate": 2.2989425944797195e-06, "loss": 0.2802, "step": 20143 }, { "epoch": 0.691283459162663, "grad_norm": 0.8420911583978988, "learning_rate": 2.298474940502683e-06, "loss": 0.2612, "step": 20144 }, { "epoch": 0.6913177762525737, "grad_norm": 0.8540161530335599, "learning_rate": 2.2980073198998393e-06, "loss": 0.2837, "step": 20145 }, { "epoch": 0.6913520933424846, "grad_norm": 0.7460328018440388, "learning_rate": 2.2975397326769646e-06, "loss": 0.2905, "step": 20146 }, { "epoch": 0.6913864104323953, "grad_norm": 0.8609862661349876, "learning_rate": 2.2970721788398375e-06, "loss": 0.2322, "step": 20147 }, { "epoch": 0.6914207275223061, "grad_norm": 0.7925586376528785, "learning_rate": 2.2966046583942294e-06, "loss": 0.2706, "step": 20148 }, { "epoch": 0.6914550446122169, "grad_norm": 0.8092460935308995, "learning_rate": 2.2961371713459217e-06, "loss": 0.317, "step": 20149 }, { "epoch": 0.6914893617021277, "grad_norm": 0.6982815196479142, "learning_rate": 2.295669717700685e-06, "loss": 0.231, "step": 20150 }, { "epoch": 0.6915236787920385, "grad_norm": 0.7838813059169161, "learning_rate": 2.2952022974642967e-06, "loss": 0.2676, "step": 20151 }, { "epoch": 0.6915579958819492, "grad_norm": 0.8516649796546346, "learning_rate": 2.2947349106425304e-06, "loss": 0.3022, "step": 20152 }, { "epoch": 0.69159231297186, "grad_norm": 0.7418329203212195, "learning_rate": 2.2942675572411598e-06, "loss": 0.2407, "step": 20153 }, { "epoch": 0.6916266300617707, "grad_norm": 0.7899122417073566, "learning_rate": 2.293800237265959e-06, "loss": 0.1966, "step": 20154 }, { "epoch": 0.6916609471516816, "grad_norm": 0.8294773767109392, "learning_rate": 2.2933329507227017e-06, "loss": 0.2422, "step": 20155 }, { "epoch": 0.6916952642415923, "grad_norm": 0.8126723329462717, "learning_rate": 2.2928656976171597e-06, "loss": 0.2489, "step": 20156 }, { "epoch": 0.6917295813315031, "grad_norm": 0.7551902206563029, "learning_rate": 2.2923984779551056e-06, "loss": 0.2868, "step": 20157 }, { "epoch": 0.6917638984214138, "grad_norm": 0.7480016290522803, "learning_rate": 2.291931291742312e-06, "loss": 0.2923, "step": 20158 }, { "epoch": 0.6917982155113246, "grad_norm": 0.8011979253556427, "learning_rate": 2.2914641389845497e-06, "loss": 0.2774, "step": 20159 }, { "epoch": 0.6918325326012355, "grad_norm": 0.7785229641138038, "learning_rate": 2.29099701968759e-06, "loss": 0.244, "step": 20160 }, { "epoch": 0.6918668496911462, "grad_norm": 0.8325602694737467, "learning_rate": 2.2905299338572062e-06, "loss": 0.353, "step": 20161 }, { "epoch": 0.691901166781057, "grad_norm": 0.761504779449508, "learning_rate": 2.2900628814991614e-06, "loss": 0.2226, "step": 20162 }, { "epoch": 0.6919354838709677, "grad_norm": 0.8169966775064113, "learning_rate": 2.289595862619235e-06, "loss": 0.2791, "step": 20163 }, { "epoch": 0.6919698009608786, "grad_norm": 0.7594232653554682, "learning_rate": 2.2891288772231896e-06, "loss": 0.2292, "step": 20164 }, { "epoch": 0.6920041180507893, "grad_norm": 1.0460644720045507, "learning_rate": 2.288661925316796e-06, "loss": 0.2599, "step": 20165 }, { "epoch": 0.6920384351407001, "grad_norm": 0.8592255098800973, "learning_rate": 2.2881950069058232e-06, "loss": 0.2758, "step": 20166 }, { "epoch": 0.6920727522306108, "grad_norm": 0.6724656251531841, "learning_rate": 2.2877281219960396e-06, "loss": 0.2442, "step": 20167 }, { "epoch": 0.6921070693205216, "grad_norm": 0.7381285174736321, "learning_rate": 2.2872612705932128e-06, "loss": 0.3055, "step": 20168 }, { "epoch": 0.6921413864104324, "grad_norm": 0.8745058889625358, "learning_rate": 2.28679445270311e-06, "loss": 0.2888, "step": 20169 }, { "epoch": 0.6921757035003432, "grad_norm": 0.7563768267896795, "learning_rate": 2.2863276683315003e-06, "loss": 0.2824, "step": 20170 }, { "epoch": 0.6922100205902539, "grad_norm": 0.6964459002103487, "learning_rate": 2.285860917484146e-06, "loss": 0.2431, "step": 20171 }, { "epoch": 0.6922443376801647, "grad_norm": 0.7893451046750923, "learning_rate": 2.285394200166818e-06, "loss": 0.2645, "step": 20172 }, { "epoch": 0.6922786547700756, "grad_norm": 0.7257344730843607, "learning_rate": 2.284927516385278e-06, "loss": 0.2345, "step": 20173 }, { "epoch": 0.6923129718599863, "grad_norm": 0.8208613521124535, "learning_rate": 2.2844608661452916e-06, "loss": 0.3432, "step": 20174 }, { "epoch": 0.692347288949897, "grad_norm": 0.7261743671307334, "learning_rate": 2.2839942494526284e-06, "loss": 0.2826, "step": 20175 }, { "epoch": 0.6923816060398078, "grad_norm": 0.7447895333470179, "learning_rate": 2.2835276663130486e-06, "loss": 0.2673, "step": 20176 }, { "epoch": 0.6924159231297186, "grad_norm": 0.8362815017803725, "learning_rate": 2.283061116732317e-06, "loss": 0.3108, "step": 20177 }, { "epoch": 0.6924502402196294, "grad_norm": 0.8196739217858581, "learning_rate": 2.2825946007161976e-06, "loss": 0.2636, "step": 20178 }, { "epoch": 0.6924845573095402, "grad_norm": 0.7713657051855786, "learning_rate": 2.2821281182704537e-06, "loss": 0.2867, "step": 20179 }, { "epoch": 0.6925188743994509, "grad_norm": 0.9288464491843614, "learning_rate": 2.281661669400848e-06, "loss": 0.2175, "step": 20180 }, { "epoch": 0.6925531914893617, "grad_norm": 0.7319899734515675, "learning_rate": 2.281195254113143e-06, "loss": 0.2791, "step": 20181 }, { "epoch": 0.6925875085792724, "grad_norm": 0.7275921913996333, "learning_rate": 2.280728872413103e-06, "loss": 0.2366, "step": 20182 }, { "epoch": 0.6926218256691833, "grad_norm": 0.9616893186245398, "learning_rate": 2.280262524306484e-06, "loss": 0.2826, "step": 20183 }, { "epoch": 0.692656142759094, "grad_norm": 0.8261029363644103, "learning_rate": 2.2797962097990543e-06, "loss": 0.2471, "step": 20184 }, { "epoch": 0.6926904598490048, "grad_norm": 0.6485371645974128, "learning_rate": 2.279329928896567e-06, "loss": 0.2191, "step": 20185 }, { "epoch": 0.6927247769389155, "grad_norm": 0.8096119926999756, "learning_rate": 2.2788636816047898e-06, "loss": 0.2561, "step": 20186 }, { "epoch": 0.6927590940288264, "grad_norm": 0.8744119560167041, "learning_rate": 2.278397467929478e-06, "loss": 0.2521, "step": 20187 }, { "epoch": 0.6927934111187372, "grad_norm": 0.714756248121916, "learning_rate": 2.277931287876392e-06, "loss": 0.2558, "step": 20188 }, { "epoch": 0.6928277282086479, "grad_norm": 0.8300160910895678, "learning_rate": 2.2774651414512914e-06, "loss": 0.2777, "step": 20189 }, { "epoch": 0.6928620452985587, "grad_norm": 0.7820900388395261, "learning_rate": 2.2769990286599355e-06, "loss": 0.2608, "step": 20190 }, { "epoch": 0.6928963623884694, "grad_norm": 0.755822963111672, "learning_rate": 2.2765329495080806e-06, "loss": 0.2458, "step": 20191 }, { "epoch": 0.6929306794783803, "grad_norm": 0.8536728199649782, "learning_rate": 2.2760669040014873e-06, "loss": 0.2602, "step": 20192 }, { "epoch": 0.692964996568291, "grad_norm": 0.7804994990505839, "learning_rate": 2.275600892145912e-06, "loss": 0.2469, "step": 20193 }, { "epoch": 0.6929993136582018, "grad_norm": 0.7693411826450681, "learning_rate": 2.2751349139471086e-06, "loss": 0.3218, "step": 20194 }, { "epoch": 0.6930336307481125, "grad_norm": 0.785886479820518, "learning_rate": 2.2746689694108398e-06, "loss": 0.2442, "step": 20195 }, { "epoch": 0.6930679478380234, "grad_norm": 0.7611712562498095, "learning_rate": 2.2742030585428567e-06, "loss": 0.2318, "step": 20196 }, { "epoch": 0.6931022649279341, "grad_norm": 0.75121831293424, "learning_rate": 2.273737181348915e-06, "loss": 0.2774, "step": 20197 }, { "epoch": 0.6931365820178449, "grad_norm": 2.037215910886854, "learning_rate": 2.2732713378347755e-06, "loss": 0.3819, "step": 20198 }, { "epoch": 0.6931708991077556, "grad_norm": 0.7356955105554961, "learning_rate": 2.272805528006188e-06, "loss": 0.2611, "step": 20199 }, { "epoch": 0.6932052161976664, "grad_norm": 0.8512826235190702, "learning_rate": 2.272339751868908e-06, "loss": 0.2777, "step": 20200 }, { "epoch": 0.6932395332875773, "grad_norm": 0.8154071060642958, "learning_rate": 2.2718740094286904e-06, "loss": 0.2959, "step": 20201 }, { "epoch": 0.693273850377488, "grad_norm": 0.7933243662191137, "learning_rate": 2.2714083006912894e-06, "loss": 0.2515, "step": 20202 }, { "epoch": 0.6933081674673988, "grad_norm": 0.8422804074057104, "learning_rate": 2.2709426256624573e-06, "loss": 0.2944, "step": 20203 }, { "epoch": 0.6933424845573095, "grad_norm": 0.7586864938616262, "learning_rate": 2.270476984347947e-06, "loss": 0.3258, "step": 20204 }, { "epoch": 0.6933768016472203, "grad_norm": 0.8110108769964385, "learning_rate": 2.270011376753513e-06, "loss": 0.3071, "step": 20205 }, { "epoch": 0.6934111187371311, "grad_norm": 0.7252424863079575, "learning_rate": 2.269545802884902e-06, "loss": 0.2567, "step": 20206 }, { "epoch": 0.6934454358270419, "grad_norm": 0.7956490463038016, "learning_rate": 2.2690802627478735e-06, "loss": 0.2642, "step": 20207 }, { "epoch": 0.6934797529169526, "grad_norm": 0.8545893530884351, "learning_rate": 2.268614756348172e-06, "loss": 0.3279, "step": 20208 }, { "epoch": 0.6935140700068634, "grad_norm": 0.7271844158944413, "learning_rate": 2.268149283691551e-06, "loss": 0.2684, "step": 20209 }, { "epoch": 0.6935483870967742, "grad_norm": 0.81254732863182, "learning_rate": 2.26768384478376e-06, "loss": 0.3045, "step": 20210 }, { "epoch": 0.693582704186685, "grad_norm": 0.8065819423306797, "learning_rate": 2.26721843963055e-06, "loss": 0.2652, "step": 20211 }, { "epoch": 0.6936170212765957, "grad_norm": 0.7808010604616796, "learning_rate": 2.26675306823767e-06, "loss": 0.2762, "step": 20212 }, { "epoch": 0.6936513383665065, "grad_norm": 0.7843907266184, "learning_rate": 2.2662877306108695e-06, "loss": 0.2783, "step": 20213 }, { "epoch": 0.6936856554564172, "grad_norm": 0.811138362974232, "learning_rate": 2.2658224267558984e-06, "loss": 0.2756, "step": 20214 }, { "epoch": 0.6937199725463281, "grad_norm": 0.6790523203086437, "learning_rate": 2.265357156678499e-06, "loss": 0.2463, "step": 20215 }, { "epoch": 0.6937542896362389, "grad_norm": 0.744051611846457, "learning_rate": 2.2648919203844285e-06, "loss": 0.2411, "step": 20216 }, { "epoch": 0.6937886067261496, "grad_norm": 0.7451777207307473, "learning_rate": 2.2644267178794253e-06, "loss": 0.2474, "step": 20217 }, { "epoch": 0.6938229238160604, "grad_norm": 0.7716181419867877, "learning_rate": 2.2639615491692444e-06, "loss": 0.2663, "step": 20218 }, { "epoch": 0.6938572409059712, "grad_norm": 0.793801809934678, "learning_rate": 2.2634964142596265e-06, "loss": 0.2937, "step": 20219 }, { "epoch": 0.693891557995882, "grad_norm": 0.7561162862876719, "learning_rate": 2.263031313156319e-06, "loss": 0.3067, "step": 20220 }, { "epoch": 0.6939258750857927, "grad_norm": 0.8269317738495187, "learning_rate": 2.2625662458650714e-06, "loss": 0.3188, "step": 20221 }, { "epoch": 0.6939601921757035, "grad_norm": 0.7732703714945751, "learning_rate": 2.262101212391625e-06, "loss": 0.2944, "step": 20222 }, { "epoch": 0.6939945092656142, "grad_norm": 0.8221039923120941, "learning_rate": 2.2616362127417257e-06, "loss": 0.235, "step": 20223 }, { "epoch": 0.6940288263555251, "grad_norm": 0.795949976632328, "learning_rate": 2.2611712469211184e-06, "loss": 0.2985, "step": 20224 }, { "epoch": 0.6940631434454358, "grad_norm": 0.82152741392054, "learning_rate": 2.2607063149355475e-06, "loss": 0.2559, "step": 20225 }, { "epoch": 0.6940974605353466, "grad_norm": 0.817434143930237, "learning_rate": 2.260241416790756e-06, "loss": 0.2523, "step": 20226 }, { "epoch": 0.6941317776252574, "grad_norm": 0.8378396674608112, "learning_rate": 2.2597765524924883e-06, "loss": 0.2725, "step": 20227 }, { "epoch": 0.6941660947151681, "grad_norm": 0.8201017798199296, "learning_rate": 2.2593117220464874e-06, "loss": 0.2322, "step": 20228 }, { "epoch": 0.694200411805079, "grad_norm": 0.7813955006340717, "learning_rate": 2.2588469254584915e-06, "loss": 0.2358, "step": 20229 }, { "epoch": 0.6942347288949897, "grad_norm": 0.8543458525102773, "learning_rate": 2.2583821627342494e-06, "loss": 0.2692, "step": 20230 }, { "epoch": 0.6942690459849005, "grad_norm": 0.756983538261004, "learning_rate": 2.257917433879498e-06, "loss": 0.2615, "step": 20231 }, { "epoch": 0.6943033630748112, "grad_norm": 0.7964888060063573, "learning_rate": 2.2574527388999796e-06, "loss": 0.2671, "step": 20232 }, { "epoch": 0.6943376801647221, "grad_norm": 0.6608452670028656, "learning_rate": 2.2569880778014346e-06, "loss": 0.2585, "step": 20233 }, { "epoch": 0.6943719972546328, "grad_norm": 0.7617190718424077, "learning_rate": 2.2565234505896044e-06, "loss": 0.3249, "step": 20234 }, { "epoch": 0.6944063143445436, "grad_norm": 0.8460418708920454, "learning_rate": 2.256058857270228e-06, "loss": 0.2372, "step": 20235 }, { "epoch": 0.6944406314344543, "grad_norm": 0.7007907006547989, "learning_rate": 2.255594297849045e-06, "loss": 0.23, "step": 20236 }, { "epoch": 0.6944749485243651, "grad_norm": 0.9024672099474398, "learning_rate": 2.255129772331798e-06, "loss": 0.2603, "step": 20237 }, { "epoch": 0.694509265614276, "grad_norm": 0.718954308396857, "learning_rate": 2.2546652807242174e-06, "loss": 0.2674, "step": 20238 }, { "epoch": 0.6945435827041867, "grad_norm": 0.7874615032753017, "learning_rate": 2.2542008230320505e-06, "loss": 0.2347, "step": 20239 }, { "epoch": 0.6945778997940975, "grad_norm": 0.8875250042846056, "learning_rate": 2.2537363992610294e-06, "loss": 0.29, "step": 20240 }, { "epoch": 0.6946122168840082, "grad_norm": 0.8976309154915635, "learning_rate": 2.2532720094168915e-06, "loss": 0.2617, "step": 20241 }, { "epoch": 0.6946465339739191, "grad_norm": 0.8081789621909338, "learning_rate": 2.2528076535053796e-06, "loss": 0.2951, "step": 20242 }, { "epoch": 0.6946808510638298, "grad_norm": 0.7489367455501108, "learning_rate": 2.2523433315322236e-06, "loss": 0.2313, "step": 20243 }, { "epoch": 0.6947151681537406, "grad_norm": 0.7972007865057937, "learning_rate": 2.251879043503163e-06, "loss": 0.2746, "step": 20244 }, { "epoch": 0.6947494852436513, "grad_norm": 0.7570366350949185, "learning_rate": 2.2514147894239323e-06, "loss": 0.2983, "step": 20245 }, { "epoch": 0.6947838023335621, "grad_norm": 0.758596064320596, "learning_rate": 2.2509505693002675e-06, "loss": 0.2995, "step": 20246 }, { "epoch": 0.6948181194234729, "grad_norm": 0.683617298107964, "learning_rate": 2.250486383137903e-06, "loss": 0.2598, "step": 20247 }, { "epoch": 0.6948524365133837, "grad_norm": 0.7426522252318043, "learning_rate": 2.2500222309425745e-06, "loss": 0.2583, "step": 20248 }, { "epoch": 0.6948867536032944, "grad_norm": 0.715103098679635, "learning_rate": 2.2495581127200145e-06, "loss": 0.2535, "step": 20249 }, { "epoch": 0.6949210706932052, "grad_norm": 0.7847595462608835, "learning_rate": 2.2490940284759573e-06, "loss": 0.266, "step": 20250 }, { "epoch": 0.6949553877831159, "grad_norm": 0.6855471412499823, "learning_rate": 2.248629978216138e-06, "loss": 0.2349, "step": 20251 }, { "epoch": 0.6949897048730268, "grad_norm": 0.8523347122375975, "learning_rate": 2.248165961946284e-06, "loss": 0.284, "step": 20252 }, { "epoch": 0.6950240219629376, "grad_norm": 0.8347132491380644, "learning_rate": 2.247701979672135e-06, "loss": 0.3122, "step": 20253 }, { "epoch": 0.6950583390528483, "grad_norm": 0.8989645170195685, "learning_rate": 2.2472380313994174e-06, "loss": 0.2452, "step": 20254 }, { "epoch": 0.6950926561427591, "grad_norm": 0.8023153074039155, "learning_rate": 2.246774117133864e-06, "loss": 0.3123, "step": 20255 }, { "epoch": 0.6951269732326699, "grad_norm": 0.7354918813882612, "learning_rate": 2.2463102368812068e-06, "loss": 0.2373, "step": 20256 }, { "epoch": 0.6951612903225807, "grad_norm": 0.7737293322526749, "learning_rate": 2.245846390647176e-06, "loss": 0.3044, "step": 20257 }, { "epoch": 0.6951956074124914, "grad_norm": 0.7791408004564699, "learning_rate": 2.245382578437502e-06, "loss": 0.2573, "step": 20258 }, { "epoch": 0.6952299245024022, "grad_norm": 0.7767540655275607, "learning_rate": 2.2449188002579155e-06, "loss": 0.2716, "step": 20259 }, { "epoch": 0.6952642415923129, "grad_norm": 0.7705209759087908, "learning_rate": 2.244455056114146e-06, "loss": 0.2483, "step": 20260 }, { "epoch": 0.6952985586822238, "grad_norm": 0.8716303708086558, "learning_rate": 2.243991346011918e-06, "loss": 0.3318, "step": 20261 }, { "epoch": 0.6953328757721345, "grad_norm": 0.7587439084421438, "learning_rate": 2.2435276699569673e-06, "loss": 0.2347, "step": 20262 }, { "epoch": 0.6953671928620453, "grad_norm": 0.7179738369941946, "learning_rate": 2.243064027955017e-06, "loss": 0.277, "step": 20263 }, { "epoch": 0.695401509951956, "grad_norm": 0.7111310721717146, "learning_rate": 2.2426004200117947e-06, "loss": 0.2389, "step": 20264 }, { "epoch": 0.6954358270418668, "grad_norm": 0.9105593766683859, "learning_rate": 2.242136846133033e-06, "loss": 0.2053, "step": 20265 }, { "epoch": 0.6954701441317777, "grad_norm": 0.7898268155633802, "learning_rate": 2.2416733063244526e-06, "loss": 0.2121, "step": 20266 }, { "epoch": 0.6955044612216884, "grad_norm": 0.7007239601855948, "learning_rate": 2.2412098005917836e-06, "loss": 0.2736, "step": 20267 }, { "epoch": 0.6955387783115992, "grad_norm": 0.7349309449721221, "learning_rate": 2.2407463289407507e-06, "loss": 0.2573, "step": 20268 }, { "epoch": 0.6955730954015099, "grad_norm": 0.7724314218804315, "learning_rate": 2.24028289137708e-06, "loss": 0.2659, "step": 20269 }, { "epoch": 0.6956074124914208, "grad_norm": 0.8215942177205985, "learning_rate": 2.2398194879064966e-06, "loss": 0.3006, "step": 20270 }, { "epoch": 0.6956417295813315, "grad_norm": 0.7784608015326271, "learning_rate": 2.239356118534725e-06, "loss": 0.2813, "step": 20271 }, { "epoch": 0.6956760466712423, "grad_norm": 0.8173519515273832, "learning_rate": 2.238892783267492e-06, "loss": 0.3146, "step": 20272 }, { "epoch": 0.695710363761153, "grad_norm": 0.9839246631931129, "learning_rate": 2.2384294821105162e-06, "loss": 0.3342, "step": 20273 }, { "epoch": 0.6957446808510638, "grad_norm": 0.7609488804985223, "learning_rate": 2.2379662150695282e-06, "loss": 0.323, "step": 20274 }, { "epoch": 0.6957789979409746, "grad_norm": 0.6714365572854953, "learning_rate": 2.237502982150245e-06, "loss": 0.2546, "step": 20275 }, { "epoch": 0.6958133150308854, "grad_norm": 0.842727722082408, "learning_rate": 2.2370397833583923e-06, "loss": 0.2895, "step": 20276 }, { "epoch": 0.6958476321207961, "grad_norm": 0.774607084828002, "learning_rate": 2.236576618699691e-06, "loss": 0.3391, "step": 20277 }, { "epoch": 0.6958819492107069, "grad_norm": 0.7517175959252445, "learning_rate": 2.2361134881798648e-06, "loss": 0.2706, "step": 20278 }, { "epoch": 0.6959162663006178, "grad_norm": 0.7616045643832198, "learning_rate": 2.2356503918046328e-06, "loss": 0.2548, "step": 20279 }, { "epoch": 0.6959505833905285, "grad_norm": 0.7779540818617932, "learning_rate": 2.235187329579718e-06, "loss": 0.2343, "step": 20280 }, { "epoch": 0.6959849004804393, "grad_norm": 0.7543256785212118, "learning_rate": 2.2347243015108404e-06, "loss": 0.2747, "step": 20281 }, { "epoch": 0.69601921757035, "grad_norm": 0.7412654890874059, "learning_rate": 2.23426130760372e-06, "loss": 0.2729, "step": 20282 }, { "epoch": 0.6960535346602608, "grad_norm": 0.7044840751753564, "learning_rate": 2.2337983478640784e-06, "loss": 0.2516, "step": 20283 }, { "epoch": 0.6960878517501716, "grad_norm": 0.7646604495222682, "learning_rate": 2.2333354222976294e-06, "loss": 0.2457, "step": 20284 }, { "epoch": 0.6961221688400824, "grad_norm": 0.7156091027875886, "learning_rate": 2.2328725309100974e-06, "loss": 0.2582, "step": 20285 }, { "epoch": 0.6961564859299931, "grad_norm": 0.8235536649682086, "learning_rate": 2.2324096737072008e-06, "loss": 0.2824, "step": 20286 }, { "epoch": 0.6961908030199039, "grad_norm": 0.8021610178886324, "learning_rate": 2.231946850694653e-06, "loss": 0.2828, "step": 20287 }, { "epoch": 0.6962251201098146, "grad_norm": 0.7607689124550289, "learning_rate": 2.2314840618781773e-06, "loss": 0.2461, "step": 20288 }, { "epoch": 0.6962594371997255, "grad_norm": 0.7567586443491623, "learning_rate": 2.231021307263487e-06, "loss": 0.2579, "step": 20289 }, { "epoch": 0.6962937542896362, "grad_norm": 1.039929299768115, "learning_rate": 2.2305585868563005e-06, "loss": 0.3071, "step": 20290 }, { "epoch": 0.696328071379547, "grad_norm": 0.6961248441282692, "learning_rate": 2.2300959006623336e-06, "loss": 0.2444, "step": 20291 }, { "epoch": 0.6963623884694577, "grad_norm": 0.813694360766952, "learning_rate": 2.2296332486873017e-06, "loss": 0.2716, "step": 20292 }, { "epoch": 0.6963967055593686, "grad_norm": 0.7551485382999943, "learning_rate": 2.229170630936922e-06, "loss": 0.2983, "step": 20293 }, { "epoch": 0.6964310226492794, "grad_norm": 0.9379706823033332, "learning_rate": 2.2287080474169084e-06, "loss": 0.2501, "step": 20294 }, { "epoch": 0.6964653397391901, "grad_norm": 0.6903994314292157, "learning_rate": 2.228245498132977e-06, "loss": 0.2549, "step": 20295 }, { "epoch": 0.6964996568291009, "grad_norm": 1.0400030311044643, "learning_rate": 2.2277829830908382e-06, "loss": 0.2981, "step": 20296 }, { "epoch": 0.6965339739190116, "grad_norm": 0.9542639526397957, "learning_rate": 2.227320502296212e-06, "loss": 0.2612, "step": 20297 }, { "epoch": 0.6965682910089225, "grad_norm": 0.7616374583871093, "learning_rate": 2.2268580557548065e-06, "loss": 0.247, "step": 20298 }, { "epoch": 0.6966026080988332, "grad_norm": 0.7977670243933407, "learning_rate": 2.226395643472336e-06, "loss": 0.3137, "step": 20299 }, { "epoch": 0.696636925188744, "grad_norm": 0.8057667772176274, "learning_rate": 2.225933265454514e-06, "loss": 0.2479, "step": 20300 }, { "epoch": 0.6966712422786547, "grad_norm": 0.918599063094428, "learning_rate": 2.2254709217070523e-06, "loss": 0.335, "step": 20301 }, { "epoch": 0.6967055593685656, "grad_norm": 0.7946670780469042, "learning_rate": 2.225008612235663e-06, "loss": 0.2489, "step": 20302 }, { "epoch": 0.6967398764584763, "grad_norm": 0.7854881035914033, "learning_rate": 2.2245463370460565e-06, "loss": 0.342, "step": 20303 }, { "epoch": 0.6967741935483871, "grad_norm": 0.9414373142696718, "learning_rate": 2.224084096143946e-06, "loss": 0.2675, "step": 20304 }, { "epoch": 0.6968085106382979, "grad_norm": 0.7373157838326762, "learning_rate": 2.2236218895350363e-06, "loss": 0.2717, "step": 20305 }, { "epoch": 0.6968428277282086, "grad_norm": 0.7710719494485068, "learning_rate": 2.2231597172250453e-06, "loss": 0.2624, "step": 20306 }, { "epoch": 0.6968771448181195, "grad_norm": 0.7863450432200062, "learning_rate": 2.2226975792196764e-06, "loss": 0.2434, "step": 20307 }, { "epoch": 0.6969114619080302, "grad_norm": 0.6475415813619129, "learning_rate": 2.2222354755246388e-06, "loss": 0.1987, "step": 20308 }, { "epoch": 0.696945778997941, "grad_norm": 0.7397861399161956, "learning_rate": 2.2217734061456477e-06, "loss": 0.2374, "step": 20309 }, { "epoch": 0.6969800960878517, "grad_norm": 0.7722385213649989, "learning_rate": 2.2213113710884045e-06, "loss": 0.315, "step": 20310 }, { "epoch": 0.6970144131777625, "grad_norm": 0.8076555967355559, "learning_rate": 2.22084937035862e-06, "loss": 0.2548, "step": 20311 }, { "epoch": 0.6970487302676733, "grad_norm": 0.8762845231952365, "learning_rate": 2.2203874039620017e-06, "loss": 0.3378, "step": 20312 }, { "epoch": 0.6970830473575841, "grad_norm": 0.7140667264218258, "learning_rate": 2.219925471904256e-06, "loss": 0.308, "step": 20313 }, { "epoch": 0.6971173644474948, "grad_norm": 1.014593625190488, "learning_rate": 2.2194635741910892e-06, "loss": 0.2814, "step": 20314 }, { "epoch": 0.6971516815374056, "grad_norm": 0.7355661397883069, "learning_rate": 2.2190017108282092e-06, "loss": 0.2758, "step": 20315 }, { "epoch": 0.6971859986273165, "grad_norm": 0.8743624211034384, "learning_rate": 2.21853988182132e-06, "loss": 0.2974, "step": 20316 }, { "epoch": 0.6972203157172272, "grad_norm": 0.7077947242194389, "learning_rate": 2.218078087176128e-06, "loss": 0.2543, "step": 20317 }, { "epoch": 0.697254632807138, "grad_norm": 0.6895525216320781, "learning_rate": 2.2176163268983397e-06, "loss": 0.264, "step": 20318 }, { "epoch": 0.6972889498970487, "grad_norm": 0.7926901676609555, "learning_rate": 2.217154600993654e-06, "loss": 0.2792, "step": 20319 }, { "epoch": 0.6973232669869595, "grad_norm": 0.71364690347568, "learning_rate": 2.2166929094677823e-06, "loss": 0.2446, "step": 20320 }, { "epoch": 0.6973575840768703, "grad_norm": 0.7799036040230125, "learning_rate": 2.2162312523264227e-06, "loss": 0.3349, "step": 20321 }, { "epoch": 0.6973919011667811, "grad_norm": 0.7604168040275485, "learning_rate": 2.2157696295752805e-06, "loss": 0.2793, "step": 20322 }, { "epoch": 0.6974262182566918, "grad_norm": 0.7607138605917836, "learning_rate": 2.2153080412200588e-06, "loss": 0.2469, "step": 20323 }, { "epoch": 0.6974605353466026, "grad_norm": 0.7183401600159508, "learning_rate": 2.2148464872664593e-06, "loss": 0.2108, "step": 20324 }, { "epoch": 0.6974948524365134, "grad_norm": 0.7996665416058413, "learning_rate": 2.2143849677201844e-06, "loss": 0.2565, "step": 20325 }, { "epoch": 0.6975291695264242, "grad_norm": 0.7454478790790194, "learning_rate": 2.2139234825869353e-06, "loss": 0.2512, "step": 20326 }, { "epoch": 0.6975634866163349, "grad_norm": 0.7905537400601599, "learning_rate": 2.213462031872416e-06, "loss": 0.2407, "step": 20327 }, { "epoch": 0.6975978037062457, "grad_norm": 0.6935832489965161, "learning_rate": 2.2130006155823195e-06, "loss": 0.2636, "step": 20328 }, { "epoch": 0.6976321207961564, "grad_norm": 0.7531055583761511, "learning_rate": 2.212539233722354e-06, "loss": 0.2975, "step": 20329 }, { "epoch": 0.6976664378860673, "grad_norm": 0.8976088165634188, "learning_rate": 2.212077886298218e-06, "loss": 0.2796, "step": 20330 }, { "epoch": 0.697700754975978, "grad_norm": 0.7942353764366366, "learning_rate": 2.2116165733156053e-06, "loss": 0.275, "step": 20331 }, { "epoch": 0.6977350720658888, "grad_norm": 0.8137677442760956, "learning_rate": 2.2111552947802227e-06, "loss": 0.3231, "step": 20332 }, { "epoch": 0.6977693891557996, "grad_norm": 0.7259503649347403, "learning_rate": 2.210694050697763e-06, "loss": 0.2738, "step": 20333 }, { "epoch": 0.6978037062457103, "grad_norm": 0.7363805098039709, "learning_rate": 2.2102328410739265e-06, "loss": 0.2486, "step": 20334 }, { "epoch": 0.6978380233356212, "grad_norm": 0.8146046213557916, "learning_rate": 2.2097716659144106e-06, "loss": 0.2911, "step": 20335 }, { "epoch": 0.6978723404255319, "grad_norm": 0.7791551631615782, "learning_rate": 2.2093105252249132e-06, "loss": 0.2261, "step": 20336 }, { "epoch": 0.6979066575154427, "grad_norm": 0.8174056775186906, "learning_rate": 2.2088494190111297e-06, "loss": 0.2586, "step": 20337 }, { "epoch": 0.6979409746053534, "grad_norm": 0.6888902889230208, "learning_rate": 2.208388347278758e-06, "loss": 0.2915, "step": 20338 }, { "epoch": 0.6979752916952643, "grad_norm": 0.7830644130798399, "learning_rate": 2.2079273100334953e-06, "loss": 0.2409, "step": 20339 }, { "epoch": 0.698009608785175, "grad_norm": 0.813625223638682, "learning_rate": 2.207466307281032e-06, "loss": 0.2585, "step": 20340 }, { "epoch": 0.6980439258750858, "grad_norm": 0.720840526244432, "learning_rate": 2.20700533902707e-06, "loss": 0.2876, "step": 20341 }, { "epoch": 0.6980782429649965, "grad_norm": 0.816370989061348, "learning_rate": 2.206544405277297e-06, "loss": 0.3299, "step": 20342 }, { "epoch": 0.6981125600549073, "grad_norm": 0.7557941231048599, "learning_rate": 2.206083506037415e-06, "loss": 0.2962, "step": 20343 }, { "epoch": 0.6981468771448182, "grad_norm": 0.73765115035178, "learning_rate": 2.205622641313112e-06, "loss": 0.2693, "step": 20344 }, { "epoch": 0.6981811942347289, "grad_norm": 0.8128827914098996, "learning_rate": 2.205161811110083e-06, "loss": 0.3302, "step": 20345 }, { "epoch": 0.6982155113246397, "grad_norm": 0.8385467471644328, "learning_rate": 2.204701015434022e-06, "loss": 0.3165, "step": 20346 }, { "epoch": 0.6982498284145504, "grad_norm": 0.8646440528433207, "learning_rate": 2.2042402542906205e-06, "loss": 0.2553, "step": 20347 }, { "epoch": 0.6982841455044613, "grad_norm": 0.7464079161682831, "learning_rate": 2.203779527685571e-06, "loss": 0.2527, "step": 20348 }, { "epoch": 0.698318462594372, "grad_norm": 0.7431448874274331, "learning_rate": 2.203318835624566e-06, "loss": 0.2872, "step": 20349 }, { "epoch": 0.6983527796842828, "grad_norm": 0.7899842787957325, "learning_rate": 2.2028581781132972e-06, "loss": 0.2308, "step": 20350 }, { "epoch": 0.6983870967741935, "grad_norm": 0.8049506441307632, "learning_rate": 2.202397555157451e-06, "loss": 0.2821, "step": 20351 }, { "epoch": 0.6984214138641043, "grad_norm": 0.8208894277913944, "learning_rate": 2.201936966762724e-06, "loss": 0.241, "step": 20352 }, { "epoch": 0.6984557309540151, "grad_norm": 0.8009478077772293, "learning_rate": 2.201476412934804e-06, "loss": 0.2352, "step": 20353 }, { "epoch": 0.6984900480439259, "grad_norm": 0.7237005090738718, "learning_rate": 2.2010158936793773e-06, "loss": 0.2274, "step": 20354 }, { "epoch": 0.6985243651338366, "grad_norm": 0.7997664301569851, "learning_rate": 2.2005554090021397e-06, "loss": 0.2445, "step": 20355 }, { "epoch": 0.6985586822237474, "grad_norm": 0.6880008298483801, "learning_rate": 2.2000949589087745e-06, "loss": 0.2441, "step": 20356 }, { "epoch": 0.6985929993136581, "grad_norm": 0.7766825806409136, "learning_rate": 2.199634543404971e-06, "loss": 0.2983, "step": 20357 }, { "epoch": 0.698627316403569, "grad_norm": 0.672068313022345, "learning_rate": 2.199174162496418e-06, "loss": 0.2143, "step": 20358 }, { "epoch": 0.6986616334934798, "grad_norm": 0.7165560466632106, "learning_rate": 2.198713816188803e-06, "loss": 0.2812, "step": 20359 }, { "epoch": 0.6986959505833905, "grad_norm": 0.7155103938296234, "learning_rate": 2.1982535044878127e-06, "loss": 0.265, "step": 20360 }, { "epoch": 0.6987302676733013, "grad_norm": 0.7821396599710645, "learning_rate": 2.1977932273991343e-06, "loss": 0.2694, "step": 20361 }, { "epoch": 0.6987645847632121, "grad_norm": 0.8555706498748702, "learning_rate": 2.1973329849284548e-06, "loss": 0.2995, "step": 20362 }, { "epoch": 0.6987989018531229, "grad_norm": 0.808477154019906, "learning_rate": 2.1968727770814553e-06, "loss": 0.2135, "step": 20363 }, { "epoch": 0.6988332189430336, "grad_norm": 0.8489340208123739, "learning_rate": 2.196412603863828e-06, "loss": 0.2673, "step": 20364 }, { "epoch": 0.6988675360329444, "grad_norm": 0.7291372876995694, "learning_rate": 2.1959524652812527e-06, "loss": 0.2703, "step": 20365 }, { "epoch": 0.6989018531228551, "grad_norm": 0.8113048472772593, "learning_rate": 2.195492361339415e-06, "loss": 0.2474, "step": 20366 }, { "epoch": 0.698936170212766, "grad_norm": 0.9207542490086197, "learning_rate": 2.195032292044e-06, "loss": 0.2669, "step": 20367 }, { "epoch": 0.6989704873026767, "grad_norm": 0.8252685195984638, "learning_rate": 2.19457225740069e-06, "loss": 0.3197, "step": 20368 }, { "epoch": 0.6990048043925875, "grad_norm": 0.7578987707181204, "learning_rate": 2.1941122574151696e-06, "loss": 0.2392, "step": 20369 }, { "epoch": 0.6990391214824982, "grad_norm": 0.8040025247441588, "learning_rate": 2.1936522920931207e-06, "loss": 0.2787, "step": 20370 }, { "epoch": 0.6990734385724091, "grad_norm": 0.8250800185510007, "learning_rate": 2.1931923614402277e-06, "loss": 0.284, "step": 20371 }, { "epoch": 0.6991077556623199, "grad_norm": 0.7349203582740474, "learning_rate": 2.192732465462167e-06, "loss": 0.2716, "step": 20372 }, { "epoch": 0.6991420727522306, "grad_norm": 0.635671372827393, "learning_rate": 2.1922726041646275e-06, "loss": 0.2078, "step": 20373 }, { "epoch": 0.6991763898421414, "grad_norm": 0.8644819549822594, "learning_rate": 2.1918127775532828e-06, "loss": 0.2252, "step": 20374 }, { "epoch": 0.6992107069320521, "grad_norm": 0.796174873028692, "learning_rate": 2.191352985633819e-06, "loss": 0.2688, "step": 20375 }, { "epoch": 0.699245024021963, "grad_norm": 0.8021258068423366, "learning_rate": 2.190893228411916e-06, "loss": 0.3119, "step": 20376 }, { "epoch": 0.6992793411118737, "grad_norm": 0.7702801456647627, "learning_rate": 2.1904335058932493e-06, "loss": 0.2712, "step": 20377 }, { "epoch": 0.6993136582017845, "grad_norm": 0.7370388568005768, "learning_rate": 2.1899738180835038e-06, "loss": 0.2853, "step": 20378 }, { "epoch": 0.6993479752916952, "grad_norm": 0.7966985152575166, "learning_rate": 2.1895141649883537e-06, "loss": 0.2617, "step": 20379 }, { "epoch": 0.699382292381606, "grad_norm": 0.773699559243596, "learning_rate": 2.18905454661348e-06, "loss": 0.2437, "step": 20380 }, { "epoch": 0.6994166094715168, "grad_norm": 0.9182280510493234, "learning_rate": 2.18859496296456e-06, "loss": 0.281, "step": 20381 }, { "epoch": 0.6994509265614276, "grad_norm": 0.748999232812161, "learning_rate": 2.188135414047271e-06, "loss": 0.2701, "step": 20382 }, { "epoch": 0.6994852436513384, "grad_norm": 0.778819009526446, "learning_rate": 2.1876758998672916e-06, "loss": 0.2402, "step": 20383 }, { "epoch": 0.6995195607412491, "grad_norm": 0.7802827734861276, "learning_rate": 2.1872164204302964e-06, "loss": 0.3202, "step": 20384 }, { "epoch": 0.69955387783116, "grad_norm": 0.7230915640973778, "learning_rate": 2.1867569757419656e-06, "loss": 0.2213, "step": 20385 }, { "epoch": 0.6995881949210707, "grad_norm": 0.7464000976395494, "learning_rate": 2.1862975658079683e-06, "loss": 0.2901, "step": 20386 }, { "epoch": 0.6996225120109815, "grad_norm": 0.7236797573471754, "learning_rate": 2.1858381906339882e-06, "loss": 0.2846, "step": 20387 }, { "epoch": 0.6996568291008922, "grad_norm": 0.8289679728639401, "learning_rate": 2.185378850225694e-06, "loss": 0.2671, "step": 20388 }, { "epoch": 0.699691146190803, "grad_norm": 0.8442272232080977, "learning_rate": 2.1849195445887626e-06, "loss": 0.2467, "step": 20389 }, { "epoch": 0.6997254632807138, "grad_norm": 0.7532654631245682, "learning_rate": 2.184460273728869e-06, "loss": 0.2802, "step": 20390 }, { "epoch": 0.6997597803706246, "grad_norm": 0.808259396726655, "learning_rate": 2.184001037651685e-06, "loss": 0.2942, "step": 20391 }, { "epoch": 0.6997940974605353, "grad_norm": 0.7154315143369692, "learning_rate": 2.183541836362886e-06, "loss": 0.2537, "step": 20392 }, { "epoch": 0.6998284145504461, "grad_norm": 0.713675945813059, "learning_rate": 2.1830826698681427e-06, "loss": 0.2566, "step": 20393 }, { "epoch": 0.699862731640357, "grad_norm": 0.7653416805782642, "learning_rate": 2.182623538173132e-06, "loss": 0.2934, "step": 20394 }, { "epoch": 0.6998970487302677, "grad_norm": 0.779876977358923, "learning_rate": 2.182164441283518e-06, "loss": 0.2408, "step": 20395 }, { "epoch": 0.6999313658201785, "grad_norm": 0.760844796940136, "learning_rate": 2.1817053792049798e-06, "loss": 0.2433, "step": 20396 }, { "epoch": 0.6999656829100892, "grad_norm": 0.8678916173170814, "learning_rate": 2.181246351943187e-06, "loss": 0.2968, "step": 20397 }, { "epoch": 0.7, "grad_norm": 0.7644394176761096, "learning_rate": 2.180787359503806e-06, "loss": 0.2919, "step": 20398 }, { "epoch": 0.7000343170899108, "grad_norm": 0.8207870152595892, "learning_rate": 2.1803284018925137e-06, "loss": 0.2701, "step": 20399 }, { "epoch": 0.7000686341798216, "grad_norm": 0.8790429561499888, "learning_rate": 2.1798694791149746e-06, "loss": 0.2271, "step": 20400 }, { "epoch": 0.7001029512697323, "grad_norm": 0.8411308016187369, "learning_rate": 2.17941059117686e-06, "loss": 0.2474, "step": 20401 }, { "epoch": 0.7001372683596431, "grad_norm": 0.7468155360034473, "learning_rate": 2.178951738083839e-06, "loss": 0.3118, "step": 20402 }, { "epoch": 0.7001715854495538, "grad_norm": 0.7428759801489798, "learning_rate": 2.1784929198415813e-06, "loss": 0.2197, "step": 20403 }, { "epoch": 0.7002059025394647, "grad_norm": 0.8296210998144837, "learning_rate": 2.1780341364557527e-06, "loss": 0.329, "step": 20404 }, { "epoch": 0.7002402196293754, "grad_norm": 0.7535816750353285, "learning_rate": 2.1775753879320233e-06, "loss": 0.2535, "step": 20405 }, { "epoch": 0.7002745367192862, "grad_norm": 0.7237849757866601, "learning_rate": 2.1771166742760586e-06, "loss": 0.2336, "step": 20406 }, { "epoch": 0.7003088538091969, "grad_norm": 0.7672742854894331, "learning_rate": 2.1766579954935268e-06, "loss": 0.242, "step": 20407 }, { "epoch": 0.7003431708991078, "grad_norm": 0.7779115517317701, "learning_rate": 2.176199351590095e-06, "loss": 0.2609, "step": 20408 }, { "epoch": 0.7003774879890186, "grad_norm": 0.7600130882489485, "learning_rate": 2.175740742571425e-06, "loss": 0.242, "step": 20409 }, { "epoch": 0.7004118050789293, "grad_norm": 1.0214154483258242, "learning_rate": 2.1752821684431887e-06, "loss": 0.2882, "step": 20410 }, { "epoch": 0.7004461221688401, "grad_norm": 0.825193722824024, "learning_rate": 2.174823629211046e-06, "loss": 0.2448, "step": 20411 }, { "epoch": 0.7004804392587508, "grad_norm": 0.8537339692947439, "learning_rate": 2.1743651248806635e-06, "loss": 0.2962, "step": 20412 }, { "epoch": 0.7005147563486617, "grad_norm": 0.8511247544370961, "learning_rate": 2.1739066554577054e-06, "loss": 0.2756, "step": 20413 }, { "epoch": 0.7005490734385724, "grad_norm": 0.7014896769213762, "learning_rate": 2.1734482209478356e-06, "loss": 0.2695, "step": 20414 }, { "epoch": 0.7005833905284832, "grad_norm": 0.7478252276725472, "learning_rate": 2.1729898213567178e-06, "loss": 0.2643, "step": 20415 }, { "epoch": 0.7006177076183939, "grad_norm": 0.8484916600411032, "learning_rate": 2.172531456690015e-06, "loss": 0.2948, "step": 20416 }, { "epoch": 0.7006520247083048, "grad_norm": 0.7026802901981918, "learning_rate": 2.172073126953391e-06, "loss": 0.2817, "step": 20417 }, { "epoch": 0.7006863417982155, "grad_norm": 0.8199550757585566, "learning_rate": 2.1716148321525033e-06, "loss": 0.2688, "step": 20418 }, { "epoch": 0.7007206588881263, "grad_norm": 0.7673437716888749, "learning_rate": 2.1711565722930184e-06, "loss": 0.2618, "step": 20419 }, { "epoch": 0.700754975978037, "grad_norm": 0.6997918675511993, "learning_rate": 2.1706983473805976e-06, "loss": 0.2702, "step": 20420 }, { "epoch": 0.7007892930679478, "grad_norm": 0.8006825117171333, "learning_rate": 2.1702401574208965e-06, "loss": 0.3099, "step": 20421 }, { "epoch": 0.7008236101578587, "grad_norm": 0.8042456569494677, "learning_rate": 2.1697820024195825e-06, "loss": 0.2722, "step": 20422 }, { "epoch": 0.7008579272477694, "grad_norm": 0.7349968977113542, "learning_rate": 2.1693238823823106e-06, "loss": 0.2753, "step": 20423 }, { "epoch": 0.7008922443376802, "grad_norm": 0.757693027443735, "learning_rate": 2.1688657973147414e-06, "loss": 0.265, "step": 20424 }, { "epoch": 0.7009265614275909, "grad_norm": 0.739603579051059, "learning_rate": 2.168407747222535e-06, "loss": 0.2561, "step": 20425 }, { "epoch": 0.7009608785175017, "grad_norm": 0.7973368944630709, "learning_rate": 2.1679497321113497e-06, "loss": 0.2554, "step": 20426 }, { "epoch": 0.7009951956074125, "grad_norm": 0.7944921040847706, "learning_rate": 2.167491751986843e-06, "loss": 0.3187, "step": 20427 }, { "epoch": 0.7010295126973233, "grad_norm": 0.7865071607359678, "learning_rate": 2.1670338068546742e-06, "loss": 0.2676, "step": 20428 }, { "epoch": 0.701063829787234, "grad_norm": 0.7851318966159224, "learning_rate": 2.166575896720501e-06, "loss": 0.2807, "step": 20429 }, { "epoch": 0.7010981468771448, "grad_norm": 0.7325955414976805, "learning_rate": 2.1661180215899758e-06, "loss": 0.2569, "step": 20430 }, { "epoch": 0.7011324639670556, "grad_norm": 0.7819851390382858, "learning_rate": 2.1656601814687617e-06, "loss": 0.2335, "step": 20431 }, { "epoch": 0.7011667810569664, "grad_norm": 0.795934532006111, "learning_rate": 2.1652023763625104e-06, "loss": 0.2307, "step": 20432 }, { "epoch": 0.7012010981468771, "grad_norm": 0.6862093444494942, "learning_rate": 2.1647446062768777e-06, "loss": 0.2644, "step": 20433 }, { "epoch": 0.7012354152367879, "grad_norm": 0.7671145460964499, "learning_rate": 2.1642868712175203e-06, "loss": 0.2544, "step": 20434 }, { "epoch": 0.7012697323266986, "grad_norm": 0.8517220144222893, "learning_rate": 2.163829171190093e-06, "loss": 0.3216, "step": 20435 }, { "epoch": 0.7013040494166095, "grad_norm": 0.7758673015346866, "learning_rate": 2.1633715062002484e-06, "loss": 0.2522, "step": 20436 }, { "epoch": 0.7013383665065203, "grad_norm": 0.9096590112451178, "learning_rate": 2.1629138762536428e-06, "loss": 0.2879, "step": 20437 }, { "epoch": 0.701372683596431, "grad_norm": 0.7488548542992782, "learning_rate": 2.162456281355928e-06, "loss": 0.2931, "step": 20438 }, { "epoch": 0.7014070006863418, "grad_norm": 0.870408099104588, "learning_rate": 2.161998721512758e-06, "loss": 0.2431, "step": 20439 }, { "epoch": 0.7014413177762526, "grad_norm": 0.7900318729753307, "learning_rate": 2.1615411967297844e-06, "loss": 0.2729, "step": 20440 }, { "epoch": 0.7014756348661634, "grad_norm": 0.781104577253782, "learning_rate": 2.1610837070126604e-06, "loss": 0.2745, "step": 20441 }, { "epoch": 0.7015099519560741, "grad_norm": 0.8300572226590769, "learning_rate": 2.1606262523670372e-06, "loss": 0.259, "step": 20442 }, { "epoch": 0.7015442690459849, "grad_norm": 0.8014408320529325, "learning_rate": 2.160168832798568e-06, "loss": 0.2514, "step": 20443 }, { "epoch": 0.7015785861358956, "grad_norm": 0.7609121440837346, "learning_rate": 2.159711448312899e-06, "loss": 0.2881, "step": 20444 }, { "epoch": 0.7016129032258065, "grad_norm": 0.7827487009083143, "learning_rate": 2.159254098915686e-06, "loss": 0.2436, "step": 20445 }, { "epoch": 0.7016472203157172, "grad_norm": 0.7454942045058113, "learning_rate": 2.158796784612576e-06, "loss": 0.2325, "step": 20446 }, { "epoch": 0.701681537405628, "grad_norm": 0.794112575478707, "learning_rate": 2.1583395054092188e-06, "loss": 0.2895, "step": 20447 }, { "epoch": 0.7017158544955387, "grad_norm": 0.78812091128463, "learning_rate": 2.157882261311264e-06, "loss": 0.276, "step": 20448 }, { "epoch": 0.7017501715854495, "grad_norm": 0.7415683949727622, "learning_rate": 2.15742505232436e-06, "loss": 0.2464, "step": 20449 }, { "epoch": 0.7017844886753604, "grad_norm": 0.8432203355538649, "learning_rate": 2.1569678784541554e-06, "loss": 0.2913, "step": 20450 }, { "epoch": 0.7018188057652711, "grad_norm": 0.7614506361855127, "learning_rate": 2.1565107397062977e-06, "loss": 0.2539, "step": 20451 }, { "epoch": 0.7018531228551819, "grad_norm": 0.7536737340279897, "learning_rate": 2.156053636086437e-06, "loss": 0.2866, "step": 20452 }, { "epoch": 0.7018874399450926, "grad_norm": 0.7225346395336203, "learning_rate": 2.1555965676002144e-06, "loss": 0.2271, "step": 20453 }, { "epoch": 0.7019217570350035, "grad_norm": 0.813219804249499, "learning_rate": 2.155139534253283e-06, "loss": 0.3414, "step": 20454 }, { "epoch": 0.7019560741249142, "grad_norm": 0.8393458365516036, "learning_rate": 2.1546825360512845e-06, "loss": 0.3042, "step": 20455 }, { "epoch": 0.701990391214825, "grad_norm": 0.7924649813572612, "learning_rate": 2.154225572999866e-06, "loss": 0.2849, "step": 20456 }, { "epoch": 0.7020247083047357, "grad_norm": 0.7630183968033141, "learning_rate": 2.1537686451046726e-06, "loss": 0.236, "step": 20457 }, { "epoch": 0.7020590253946465, "grad_norm": 0.756998961960356, "learning_rate": 2.1533117523713486e-06, "loss": 0.3092, "step": 20458 }, { "epoch": 0.7020933424845573, "grad_norm": 0.7050754803309355, "learning_rate": 2.15285489480554e-06, "loss": 0.258, "step": 20459 }, { "epoch": 0.7021276595744681, "grad_norm": 0.7514566713019794, "learning_rate": 2.152398072412889e-06, "loss": 0.2887, "step": 20460 }, { "epoch": 0.7021619766643789, "grad_norm": 0.8245912025378974, "learning_rate": 2.151941285199042e-06, "loss": 0.2625, "step": 20461 }, { "epoch": 0.7021962937542896, "grad_norm": 0.7218865987340564, "learning_rate": 2.151484533169636e-06, "loss": 0.2466, "step": 20462 }, { "epoch": 0.7022306108442005, "grad_norm": 0.7756447871145273, "learning_rate": 2.1510278163303195e-06, "loss": 0.2609, "step": 20463 }, { "epoch": 0.7022649279341112, "grad_norm": 0.7222702717255933, "learning_rate": 2.150571134686735e-06, "loss": 0.2382, "step": 20464 }, { "epoch": 0.702299245024022, "grad_norm": 0.8193449966245199, "learning_rate": 2.1501144882445172e-06, "loss": 0.2404, "step": 20465 }, { "epoch": 0.7023335621139327, "grad_norm": 0.9170353216269024, "learning_rate": 2.149657877009317e-06, "loss": 0.2564, "step": 20466 }, { "epoch": 0.7023678792038435, "grad_norm": 0.8190431656287147, "learning_rate": 2.1492013009867686e-06, "loss": 0.2705, "step": 20467 }, { "epoch": 0.7024021962937543, "grad_norm": 0.9150278576118178, "learning_rate": 2.148744760182514e-06, "loss": 0.2745, "step": 20468 }, { "epoch": 0.7024365133836651, "grad_norm": 0.8204952922626882, "learning_rate": 2.1482882546021936e-06, "loss": 0.2822, "step": 20469 }, { "epoch": 0.7024708304735758, "grad_norm": 0.7996954597281976, "learning_rate": 2.1478317842514473e-06, "loss": 0.2866, "step": 20470 }, { "epoch": 0.7025051475634866, "grad_norm": 0.7406038638675024, "learning_rate": 2.1473753491359143e-06, "loss": 0.3073, "step": 20471 }, { "epoch": 0.7025394646533973, "grad_norm": 0.885533444560669, "learning_rate": 2.1469189492612323e-06, "loss": 0.2363, "step": 20472 }, { "epoch": 0.7025737817433082, "grad_norm": 0.8071700102452402, "learning_rate": 2.146462584633041e-06, "loss": 0.2759, "step": 20473 }, { "epoch": 0.702608098833219, "grad_norm": 0.7867614257420816, "learning_rate": 2.1460062552569773e-06, "loss": 0.2801, "step": 20474 }, { "epoch": 0.7026424159231297, "grad_norm": 0.7360933137739483, "learning_rate": 2.145549961138681e-06, "loss": 0.2904, "step": 20475 }, { "epoch": 0.7026767330130405, "grad_norm": 0.8105491282381213, "learning_rate": 2.1450937022837825e-06, "loss": 0.2899, "step": 20476 }, { "epoch": 0.7027110501029513, "grad_norm": 0.8472180924125747, "learning_rate": 2.144637478697927e-06, "loss": 0.2729, "step": 20477 }, { "epoch": 0.7027453671928621, "grad_norm": 0.7296790795979139, "learning_rate": 2.144181290386745e-06, "loss": 0.2633, "step": 20478 }, { "epoch": 0.7027796842827728, "grad_norm": 0.7591771394201411, "learning_rate": 2.143725137355873e-06, "loss": 0.2676, "step": 20479 }, { "epoch": 0.7028140013726836, "grad_norm": 0.7220638355813639, "learning_rate": 2.143269019610947e-06, "loss": 0.2663, "step": 20480 }, { "epoch": 0.7028483184625943, "grad_norm": 0.9171254181861573, "learning_rate": 2.1428129371576018e-06, "loss": 0.3067, "step": 20481 }, { "epoch": 0.7028826355525052, "grad_norm": 0.738138759461101, "learning_rate": 2.1423568900014714e-06, "loss": 0.2657, "step": 20482 }, { "epoch": 0.7029169526424159, "grad_norm": 0.7455860886163087, "learning_rate": 2.14190087814819e-06, "loss": 0.3684, "step": 20483 }, { "epoch": 0.7029512697323267, "grad_norm": 1.0349595272681262, "learning_rate": 2.141444901603393e-06, "loss": 0.2135, "step": 20484 }, { "epoch": 0.7029855868222374, "grad_norm": 0.6847409469327909, "learning_rate": 2.1409889603727073e-06, "loss": 0.2592, "step": 20485 }, { "epoch": 0.7030199039121483, "grad_norm": 0.8611364075053176, "learning_rate": 2.140533054461772e-06, "loss": 0.2733, "step": 20486 }, { "epoch": 0.703054221002059, "grad_norm": 0.7258169507671752, "learning_rate": 2.1400771838762187e-06, "loss": 0.3026, "step": 20487 }, { "epoch": 0.7030885380919698, "grad_norm": 0.9207752631798897, "learning_rate": 2.1396213486216736e-06, "loss": 0.3443, "step": 20488 }, { "epoch": 0.7031228551818806, "grad_norm": 0.877128845080094, "learning_rate": 2.139165548703776e-06, "loss": 0.3094, "step": 20489 }, { "epoch": 0.7031571722717913, "grad_norm": 0.7144780604385488, "learning_rate": 2.13870978412815e-06, "loss": 0.2972, "step": 20490 }, { "epoch": 0.7031914893617022, "grad_norm": 0.7285900877096356, "learning_rate": 2.1382540549004294e-06, "loss": 0.2246, "step": 20491 }, { "epoch": 0.7032258064516129, "grad_norm": 0.7720422736720669, "learning_rate": 2.1377983610262436e-06, "loss": 0.2923, "step": 20492 }, { "epoch": 0.7032601235415237, "grad_norm": 0.7836591152885448, "learning_rate": 2.1373427025112216e-06, "loss": 0.2847, "step": 20493 }, { "epoch": 0.7032944406314344, "grad_norm": 0.7305391222793165, "learning_rate": 2.1368870793609932e-06, "loss": 0.2428, "step": 20494 }, { "epoch": 0.7033287577213452, "grad_norm": 0.746900299809125, "learning_rate": 2.136431491581187e-06, "loss": 0.2869, "step": 20495 }, { "epoch": 0.703363074811256, "grad_norm": 0.771747460252994, "learning_rate": 2.1359759391774327e-06, "loss": 0.244, "step": 20496 }, { "epoch": 0.7033973919011668, "grad_norm": 0.751413983689799, "learning_rate": 2.1355204221553525e-06, "loss": 0.2812, "step": 20497 }, { "epoch": 0.7034317089910775, "grad_norm": 0.7549327255509404, "learning_rate": 2.135064940520582e-06, "loss": 0.2052, "step": 20498 }, { "epoch": 0.7034660260809883, "grad_norm": 0.7277141360575011, "learning_rate": 2.1346094942787405e-06, "loss": 0.2512, "step": 20499 }, { "epoch": 0.7035003431708992, "grad_norm": 0.7142238695220525, "learning_rate": 2.134154083435461e-06, "loss": 0.2157, "step": 20500 }, { "epoch": 0.7035346602608099, "grad_norm": 0.7929693568049763, "learning_rate": 2.133698707996365e-06, "loss": 0.2426, "step": 20501 }, { "epoch": 0.7035689773507207, "grad_norm": 0.7062459535577783, "learning_rate": 2.1332433679670783e-06, "loss": 0.2073, "step": 20502 }, { "epoch": 0.7036032944406314, "grad_norm": 0.7710438219306762, "learning_rate": 2.1327880633532307e-06, "loss": 0.2153, "step": 20503 }, { "epoch": 0.7036376115305422, "grad_norm": 0.8338913150180047, "learning_rate": 2.132332794160442e-06, "loss": 0.272, "step": 20504 }, { "epoch": 0.703671928620453, "grad_norm": 0.7459257577787172, "learning_rate": 2.131877560394338e-06, "loss": 0.2479, "step": 20505 }, { "epoch": 0.7037062457103638, "grad_norm": 0.8722381961732071, "learning_rate": 2.1314223620605434e-06, "loss": 0.3241, "step": 20506 }, { "epoch": 0.7037405628002745, "grad_norm": 0.8655435238424449, "learning_rate": 2.1309671991646807e-06, "loss": 0.2595, "step": 20507 }, { "epoch": 0.7037748798901853, "grad_norm": 0.8637966460689525, "learning_rate": 2.130512071712373e-06, "loss": 0.2189, "step": 20508 }, { "epoch": 0.7038091969800961, "grad_norm": 0.7707133232652589, "learning_rate": 2.1300569797092436e-06, "loss": 0.259, "step": 20509 }, { "epoch": 0.7038435140700069, "grad_norm": 0.7752213922074134, "learning_rate": 2.129601923160916e-06, "loss": 0.3137, "step": 20510 }, { "epoch": 0.7038778311599176, "grad_norm": 0.829713157871021, "learning_rate": 2.129146902073006e-06, "loss": 0.2302, "step": 20511 }, { "epoch": 0.7039121482498284, "grad_norm": 0.7974346892997872, "learning_rate": 2.1286919164511432e-06, "loss": 0.2646, "step": 20512 }, { "epoch": 0.7039464653397391, "grad_norm": 0.724658367482721, "learning_rate": 2.1282369663009418e-06, "loss": 0.3123, "step": 20513 }, { "epoch": 0.70398078242965, "grad_norm": 0.7307170367708324, "learning_rate": 2.127782051628025e-06, "loss": 0.2973, "step": 20514 }, { "epoch": 0.7040150995195608, "grad_norm": 1.0853591378985419, "learning_rate": 2.1273271724380116e-06, "loss": 0.2754, "step": 20515 }, { "epoch": 0.7040494166094715, "grad_norm": 0.7503343035376628, "learning_rate": 2.126872328736522e-06, "loss": 0.204, "step": 20516 }, { "epoch": 0.7040837336993823, "grad_norm": 0.7119576753240506, "learning_rate": 2.126417520529176e-06, "loss": 0.2673, "step": 20517 }, { "epoch": 0.704118050789293, "grad_norm": 0.864100032100892, "learning_rate": 2.12596274782159e-06, "loss": 0.269, "step": 20518 }, { "epoch": 0.7041523678792039, "grad_norm": 0.8256127382203743, "learning_rate": 2.125508010619386e-06, "loss": 0.2142, "step": 20519 }, { "epoch": 0.7041866849691146, "grad_norm": 0.6988752163464991, "learning_rate": 2.1250533089281755e-06, "loss": 0.2402, "step": 20520 }, { "epoch": 0.7042210020590254, "grad_norm": 0.8753876037390868, "learning_rate": 2.1245986427535827e-06, "loss": 0.3397, "step": 20521 }, { "epoch": 0.7042553191489361, "grad_norm": 0.724446223306705, "learning_rate": 2.1241440121012197e-06, "loss": 0.3185, "step": 20522 }, { "epoch": 0.704289636238847, "grad_norm": 0.732594567887639, "learning_rate": 2.1236894169767046e-06, "loss": 0.2848, "step": 20523 }, { "epoch": 0.7043239533287577, "grad_norm": 0.8035799843298141, "learning_rate": 2.1232348573856532e-06, "loss": 0.2847, "step": 20524 }, { "epoch": 0.7043582704186685, "grad_norm": 0.8019470499475722, "learning_rate": 2.122780333333681e-06, "loss": 0.3123, "step": 20525 }, { "epoch": 0.7043925875085792, "grad_norm": 0.7970762021693351, "learning_rate": 2.122325844826404e-06, "loss": 0.2248, "step": 20526 }, { "epoch": 0.70442690459849, "grad_norm": 0.7928905646249516, "learning_rate": 2.1218713918694357e-06, "loss": 0.2828, "step": 20527 }, { "epoch": 0.7044612216884009, "grad_norm": 0.7731363574984321, "learning_rate": 2.121416974468392e-06, "loss": 0.2667, "step": 20528 }, { "epoch": 0.7044955387783116, "grad_norm": 0.8458180512354722, "learning_rate": 2.1209625926288825e-06, "loss": 0.311, "step": 20529 }, { "epoch": 0.7045298558682224, "grad_norm": 0.8060370024756962, "learning_rate": 2.1205082463565254e-06, "loss": 0.2147, "step": 20530 }, { "epoch": 0.7045641729581331, "grad_norm": 1.0421710699692168, "learning_rate": 2.120053935656931e-06, "loss": 0.2809, "step": 20531 }, { "epoch": 0.704598490048044, "grad_norm": 0.822285016936512, "learning_rate": 2.1195996605357132e-06, "loss": 0.2174, "step": 20532 }, { "epoch": 0.7046328071379547, "grad_norm": 0.8333161684106558, "learning_rate": 2.119145420998485e-06, "loss": 0.2975, "step": 20533 }, { "epoch": 0.7046671242278655, "grad_norm": 0.7161053542544978, "learning_rate": 2.1186912170508533e-06, "loss": 0.2462, "step": 20534 }, { "epoch": 0.7047014413177762, "grad_norm": 0.86622112518762, "learning_rate": 2.1182370486984354e-06, "loss": 0.2869, "step": 20535 }, { "epoch": 0.704735758407687, "grad_norm": 0.7418941980894141, "learning_rate": 2.117782915946837e-06, "loss": 0.3102, "step": 20536 }, { "epoch": 0.7047700754975978, "grad_norm": 0.8863524918033151, "learning_rate": 2.1173288188016707e-06, "loss": 0.2922, "step": 20537 }, { "epoch": 0.7048043925875086, "grad_norm": 0.8153457089550271, "learning_rate": 2.1168747572685457e-06, "loss": 0.2507, "step": 20538 }, { "epoch": 0.7048387096774194, "grad_norm": 0.7671520555084355, "learning_rate": 2.1164207313530714e-06, "loss": 0.2947, "step": 20539 }, { "epoch": 0.7048730267673301, "grad_norm": 0.8282348453306704, "learning_rate": 2.115966741060858e-06, "loss": 0.3068, "step": 20540 }, { "epoch": 0.7049073438572409, "grad_norm": 0.7734929196228494, "learning_rate": 2.115512786397512e-06, "loss": 0.3214, "step": 20541 }, { "epoch": 0.7049416609471517, "grad_norm": 1.042729148211403, "learning_rate": 2.115058867368645e-06, "loss": 0.2593, "step": 20542 }, { "epoch": 0.7049759780370625, "grad_norm": 0.8515843055399881, "learning_rate": 2.1146049839798587e-06, "loss": 0.2764, "step": 20543 }, { "epoch": 0.7050102951269732, "grad_norm": 0.710781934925739, "learning_rate": 2.114151136236767e-06, "loss": 0.2523, "step": 20544 }, { "epoch": 0.705044612216884, "grad_norm": 0.8337336567191101, "learning_rate": 2.1136973241449716e-06, "loss": 0.259, "step": 20545 }, { "epoch": 0.7050789293067948, "grad_norm": 0.8047390962410623, "learning_rate": 2.113243547710079e-06, "loss": 0.2622, "step": 20546 }, { "epoch": 0.7051132463967056, "grad_norm": 0.7292660042016217, "learning_rate": 2.1127898069377006e-06, "loss": 0.2837, "step": 20547 }, { "epoch": 0.7051475634866163, "grad_norm": 0.7347759453595738, "learning_rate": 2.112336101833436e-06, "loss": 0.2723, "step": 20548 }, { "epoch": 0.7051818805765271, "grad_norm": 0.7468501808667516, "learning_rate": 2.111882432402892e-06, "loss": 0.2463, "step": 20549 }, { "epoch": 0.7052161976664378, "grad_norm": 0.8073784098384782, "learning_rate": 2.1114287986516736e-06, "loss": 0.2339, "step": 20550 }, { "epoch": 0.7052505147563487, "grad_norm": 0.7709850745575031, "learning_rate": 2.110975200585384e-06, "loss": 0.2904, "step": 20551 }, { "epoch": 0.7052848318462595, "grad_norm": 0.7740690384655355, "learning_rate": 2.1105216382096283e-06, "loss": 0.2177, "step": 20552 }, { "epoch": 0.7053191489361702, "grad_norm": 0.7594813826859425, "learning_rate": 2.110068111530009e-06, "loss": 0.2322, "step": 20553 }, { "epoch": 0.705353466026081, "grad_norm": 0.7845571684917775, "learning_rate": 2.1096146205521293e-06, "loss": 0.2839, "step": 20554 }, { "epoch": 0.7053877831159918, "grad_norm": 0.7612487777006203, "learning_rate": 2.109161165281589e-06, "loss": 0.2724, "step": 20555 }, { "epoch": 0.7054221002059026, "grad_norm": 0.6965768750143345, "learning_rate": 2.1087077457239953e-06, "loss": 0.2662, "step": 20556 }, { "epoch": 0.7054564172958133, "grad_norm": 0.7306224871214505, "learning_rate": 2.108254361884944e-06, "loss": 0.2695, "step": 20557 }, { "epoch": 0.7054907343857241, "grad_norm": 0.7263535614670704, "learning_rate": 2.1078010137700395e-06, "loss": 0.2987, "step": 20558 }, { "epoch": 0.7055250514756348, "grad_norm": 0.7536803918156512, "learning_rate": 2.1073477013848815e-06, "loss": 0.2981, "step": 20559 }, { "epoch": 0.7055593685655457, "grad_norm": 0.7515090025307677, "learning_rate": 2.10689442473507e-06, "loss": 0.3056, "step": 20560 }, { "epoch": 0.7055936856554564, "grad_norm": 0.7831366391910551, "learning_rate": 2.1064411838262043e-06, "loss": 0.2785, "step": 20561 }, { "epoch": 0.7056280027453672, "grad_norm": 0.7492750334451318, "learning_rate": 2.105987978663884e-06, "loss": 0.2233, "step": 20562 }, { "epoch": 0.7056623198352779, "grad_norm": 0.7677426535983403, "learning_rate": 2.105534809253709e-06, "loss": 0.285, "step": 20563 }, { "epoch": 0.7056966369251887, "grad_norm": 0.729742309906559, "learning_rate": 2.1050816756012766e-06, "loss": 0.2291, "step": 20564 }, { "epoch": 0.7057309540150996, "grad_norm": 0.6914231145376517, "learning_rate": 2.1046285777121865e-06, "loss": 0.2431, "step": 20565 }, { "epoch": 0.7057652711050103, "grad_norm": 0.9613870030232841, "learning_rate": 2.104175515592031e-06, "loss": 0.2624, "step": 20566 }, { "epoch": 0.7057995881949211, "grad_norm": 0.7156225077130738, "learning_rate": 2.1037224892464155e-06, "loss": 0.2513, "step": 20567 }, { "epoch": 0.7058339052848318, "grad_norm": 1.3772298657413733, "learning_rate": 2.103269498680929e-06, "loss": 0.236, "step": 20568 }, { "epoch": 0.7058682223747427, "grad_norm": 0.7334324326866918, "learning_rate": 2.102816543901169e-06, "loss": 0.2664, "step": 20569 }, { "epoch": 0.7059025394646534, "grad_norm": 0.7228923329155482, "learning_rate": 2.102363624912736e-06, "loss": 0.3076, "step": 20570 }, { "epoch": 0.7059368565545642, "grad_norm": 0.8050783463477691, "learning_rate": 2.101910741721221e-06, "loss": 0.2933, "step": 20571 }, { "epoch": 0.7059711736444749, "grad_norm": 0.6943299228935486, "learning_rate": 2.1014578943322195e-06, "loss": 0.2574, "step": 20572 }, { "epoch": 0.7060054907343857, "grad_norm": 0.9258043586533501, "learning_rate": 2.1010050827513266e-06, "loss": 0.2178, "step": 20573 }, { "epoch": 0.7060398078242965, "grad_norm": 0.7726035599734593, "learning_rate": 2.100552306984136e-06, "loss": 0.2949, "step": 20574 }, { "epoch": 0.7060741249142073, "grad_norm": 0.7520236701037333, "learning_rate": 2.100099567036241e-06, "loss": 0.2435, "step": 20575 }, { "epoch": 0.706108442004118, "grad_norm": 0.8800821811564356, "learning_rate": 2.0996468629132355e-06, "loss": 0.2798, "step": 20576 }, { "epoch": 0.7061427590940288, "grad_norm": 0.7899448618282686, "learning_rate": 2.099194194620713e-06, "loss": 0.2814, "step": 20577 }, { "epoch": 0.7061770761839397, "grad_norm": 0.7404178230878646, "learning_rate": 2.0987415621642597e-06, "loss": 0.253, "step": 20578 }, { "epoch": 0.7062113932738504, "grad_norm": 0.8388402518793104, "learning_rate": 2.0982889655494766e-06, "loss": 0.3137, "step": 20579 }, { "epoch": 0.7062457103637612, "grad_norm": 0.7674032830342227, "learning_rate": 2.097836404781948e-06, "loss": 0.2646, "step": 20580 }, { "epoch": 0.7062800274536719, "grad_norm": 0.7519706528261344, "learning_rate": 2.0973838798672674e-06, "loss": 0.2369, "step": 20581 }, { "epoch": 0.7063143445435827, "grad_norm": 0.831473246290848, "learning_rate": 2.0969313908110247e-06, "loss": 0.2726, "step": 20582 }, { "epoch": 0.7063486616334935, "grad_norm": 0.6951141717356814, "learning_rate": 2.0964789376188096e-06, "loss": 0.2529, "step": 20583 }, { "epoch": 0.7063829787234043, "grad_norm": 0.7470729513692996, "learning_rate": 2.096026520296212e-06, "loss": 0.2349, "step": 20584 }, { "epoch": 0.706417295813315, "grad_norm": 0.787839480737038, "learning_rate": 2.0955741388488213e-06, "loss": 0.3071, "step": 20585 }, { "epoch": 0.7064516129032258, "grad_norm": 0.9482641209321955, "learning_rate": 2.0951217932822282e-06, "loss": 0.3367, "step": 20586 }, { "epoch": 0.7064859299931365, "grad_norm": 0.8306635956557691, "learning_rate": 2.0946694836020142e-06, "loss": 0.2622, "step": 20587 }, { "epoch": 0.7065202470830474, "grad_norm": 0.7822552236366257, "learning_rate": 2.0942172098137757e-06, "loss": 0.2539, "step": 20588 }, { "epoch": 0.7065545641729581, "grad_norm": 0.7894850056874092, "learning_rate": 2.0937649719230934e-06, "loss": 0.2794, "step": 20589 }, { "epoch": 0.7065888812628689, "grad_norm": 0.6866263954889936, "learning_rate": 2.093312769935555e-06, "loss": 0.3124, "step": 20590 }, { "epoch": 0.7066231983527796, "grad_norm": 0.7660116277451904, "learning_rate": 2.0928606038567525e-06, "loss": 0.2413, "step": 20591 }, { "epoch": 0.7066575154426905, "grad_norm": 0.8255126463755238, "learning_rate": 2.092408473692265e-06, "loss": 0.2803, "step": 20592 }, { "epoch": 0.7066918325326013, "grad_norm": 0.7221547365370051, "learning_rate": 2.0919563794476818e-06, "loss": 0.2746, "step": 20593 }, { "epoch": 0.706726149622512, "grad_norm": 0.8719869759083522, "learning_rate": 2.091504321128587e-06, "loss": 0.3283, "step": 20594 }, { "epoch": 0.7067604667124228, "grad_norm": 0.8437520581236079, "learning_rate": 2.0910522987405645e-06, "loss": 0.2446, "step": 20595 }, { "epoch": 0.7067947838023335, "grad_norm": 0.829495663741052, "learning_rate": 2.0906003122891997e-06, "loss": 0.3, "step": 20596 }, { "epoch": 0.7068291008922444, "grad_norm": 0.8131345962335643, "learning_rate": 2.090148361780077e-06, "loss": 0.2475, "step": 20597 }, { "epoch": 0.7068634179821551, "grad_norm": 0.7440769570970436, "learning_rate": 2.0896964472187776e-06, "loss": 0.2972, "step": 20598 }, { "epoch": 0.7068977350720659, "grad_norm": 0.803278403397358, "learning_rate": 2.089244568610886e-06, "loss": 0.2225, "step": 20599 }, { "epoch": 0.7069320521619766, "grad_norm": 0.7896876124361224, "learning_rate": 2.0887927259619855e-06, "loss": 0.2826, "step": 20600 }, { "epoch": 0.7069663692518875, "grad_norm": 0.7849283544891158, "learning_rate": 2.088340919277654e-06, "loss": 0.3142, "step": 20601 }, { "epoch": 0.7070006863417982, "grad_norm": 0.7232851354402078, "learning_rate": 2.0878891485634785e-06, "loss": 0.2472, "step": 20602 }, { "epoch": 0.707035003431709, "grad_norm": 0.7464778120949768, "learning_rate": 2.0874374138250357e-06, "loss": 0.2908, "step": 20603 }, { "epoch": 0.7070693205216197, "grad_norm": 0.7651600513805346, "learning_rate": 2.0869857150679084e-06, "loss": 0.2775, "step": 20604 }, { "epoch": 0.7071036376115305, "grad_norm": 0.7499705676717595, "learning_rate": 2.086534052297676e-06, "loss": 0.2176, "step": 20605 }, { "epoch": 0.7071379547014414, "grad_norm": 0.7854889772326478, "learning_rate": 2.0860824255199185e-06, "loss": 0.2578, "step": 20606 }, { "epoch": 0.7071722717913521, "grad_norm": 0.7639788845903552, "learning_rate": 2.0856308347402156e-06, "loss": 0.3056, "step": 20607 }, { "epoch": 0.7072065888812629, "grad_norm": 0.8433400534413942, "learning_rate": 2.0851792799641457e-06, "loss": 0.3184, "step": 20608 }, { "epoch": 0.7072409059711736, "grad_norm": 0.9589070807554937, "learning_rate": 2.0847277611972896e-06, "loss": 0.2708, "step": 20609 }, { "epoch": 0.7072752230610844, "grad_norm": 0.8121858014767889, "learning_rate": 2.0842762784452194e-06, "loss": 0.2613, "step": 20610 }, { "epoch": 0.7073095401509952, "grad_norm": 0.8379228836920151, "learning_rate": 2.0838248317135196e-06, "loss": 0.2757, "step": 20611 }, { "epoch": 0.707343857240906, "grad_norm": 0.8188946202358116, "learning_rate": 2.083373421007763e-06, "loss": 0.225, "step": 20612 }, { "epoch": 0.7073781743308167, "grad_norm": 0.9695011726624947, "learning_rate": 2.082922046333525e-06, "loss": 0.2589, "step": 20613 }, { "epoch": 0.7074124914207275, "grad_norm": 0.9589342726130777, "learning_rate": 2.0824707076963874e-06, "loss": 0.2127, "step": 20614 }, { "epoch": 0.7074468085106383, "grad_norm": 0.7288193992821999, "learning_rate": 2.0820194051019215e-06, "loss": 0.2387, "step": 20615 }, { "epoch": 0.7074811256005491, "grad_norm": 0.8350222046827034, "learning_rate": 2.0815681385557035e-06, "loss": 0.266, "step": 20616 }, { "epoch": 0.7075154426904599, "grad_norm": 1.0988288349592947, "learning_rate": 2.0811169080633087e-06, "loss": 0.2408, "step": 20617 }, { "epoch": 0.7075497597803706, "grad_norm": 0.8510458874616145, "learning_rate": 2.0806657136303114e-06, "loss": 0.3196, "step": 20618 }, { "epoch": 0.7075840768702814, "grad_norm": 0.993347060364798, "learning_rate": 2.080214555262286e-06, "loss": 0.2194, "step": 20619 }, { "epoch": 0.7076183939601922, "grad_norm": 0.7546271012520874, "learning_rate": 2.079763432964805e-06, "loss": 0.2565, "step": 20620 }, { "epoch": 0.707652711050103, "grad_norm": 0.7812481197792454, "learning_rate": 2.0793123467434445e-06, "loss": 0.2605, "step": 20621 }, { "epoch": 0.7076870281400137, "grad_norm": 0.8458324711579425, "learning_rate": 2.078861296603771e-06, "loss": 0.2815, "step": 20622 }, { "epoch": 0.7077213452299245, "grad_norm": 0.7715300094421166, "learning_rate": 2.0784102825513635e-06, "loss": 0.2647, "step": 20623 }, { "epoch": 0.7077556623198353, "grad_norm": 0.8637473701328963, "learning_rate": 2.077959304591788e-06, "loss": 0.2363, "step": 20624 }, { "epoch": 0.7077899794097461, "grad_norm": 0.7299160273230961, "learning_rate": 2.0775083627306215e-06, "loss": 0.3072, "step": 20625 }, { "epoch": 0.7078242964996568, "grad_norm": 0.9542326497646777, "learning_rate": 2.0770574569734296e-06, "loss": 0.2705, "step": 20626 }, { "epoch": 0.7078586135895676, "grad_norm": 0.7976717571768629, "learning_rate": 2.0766065873257863e-06, "loss": 0.3212, "step": 20627 }, { "epoch": 0.7078929306794783, "grad_norm": 0.7526287165785281, "learning_rate": 2.076155753793259e-06, "loss": 0.3064, "step": 20628 }, { "epoch": 0.7079272477693892, "grad_norm": 0.8291858214867467, "learning_rate": 2.07570495638142e-06, "loss": 0.2762, "step": 20629 }, { "epoch": 0.7079615648593, "grad_norm": 0.756444301871286, "learning_rate": 2.0752541950958357e-06, "loss": 0.3084, "step": 20630 }, { "epoch": 0.7079958819492107, "grad_norm": 0.6791753330455255, "learning_rate": 2.074803469942076e-06, "loss": 0.2219, "step": 20631 }, { "epoch": 0.7080301990391215, "grad_norm": 0.7182318986656513, "learning_rate": 2.0743527809257107e-06, "loss": 0.2075, "step": 20632 }, { "epoch": 0.7080645161290322, "grad_norm": 0.8323472279241418, "learning_rate": 2.0739021280523024e-06, "loss": 0.2838, "step": 20633 }, { "epoch": 0.7080988332189431, "grad_norm": 0.7403081787920787, "learning_rate": 2.0734515113274258e-06, "loss": 0.4018, "step": 20634 }, { "epoch": 0.7081331503088538, "grad_norm": 0.7285404347163082, "learning_rate": 2.073000930756641e-06, "loss": 0.2628, "step": 20635 }, { "epoch": 0.7081674673987646, "grad_norm": 0.7384567135668066, "learning_rate": 2.0725503863455167e-06, "loss": 0.2499, "step": 20636 }, { "epoch": 0.7082017844886753, "grad_norm": 0.866349468327179, "learning_rate": 2.0720998780996215e-06, "loss": 0.3283, "step": 20637 }, { "epoch": 0.7082361015785862, "grad_norm": 0.8013386932886407, "learning_rate": 2.071649406024518e-06, "loss": 0.2641, "step": 20638 }, { "epoch": 0.7082704186684969, "grad_norm": 0.9033181458946241, "learning_rate": 2.0711989701257713e-06, "loss": 0.2317, "step": 20639 }, { "epoch": 0.7083047357584077, "grad_norm": 0.7559421862892245, "learning_rate": 2.070748570408947e-06, "loss": 0.2584, "step": 20640 }, { "epoch": 0.7083390528483184, "grad_norm": 0.7766570950846473, "learning_rate": 2.070298206879609e-06, "loss": 0.2499, "step": 20641 }, { "epoch": 0.7083733699382292, "grad_norm": 0.7482080231650683, "learning_rate": 2.0698478795433206e-06, "loss": 0.2285, "step": 20642 }, { "epoch": 0.70840768702814, "grad_norm": 0.9358769586863944, "learning_rate": 2.0693975884056455e-06, "loss": 0.232, "step": 20643 }, { "epoch": 0.7084420041180508, "grad_norm": 0.7209732543015149, "learning_rate": 2.0689473334721483e-06, "loss": 0.2447, "step": 20644 }, { "epoch": 0.7084763212079616, "grad_norm": 0.7321034851283095, "learning_rate": 2.0684971147483863e-06, "loss": 0.212, "step": 20645 }, { "epoch": 0.7085106382978723, "grad_norm": 0.7384810575892548, "learning_rate": 2.0680469322399275e-06, "loss": 0.2314, "step": 20646 }, { "epoch": 0.7085449553877832, "grad_norm": 0.8149590828319644, "learning_rate": 2.0675967859523293e-06, "loss": 0.2657, "step": 20647 }, { "epoch": 0.7085792724776939, "grad_norm": 0.7041899464090361, "learning_rate": 2.067146675891154e-06, "loss": 0.2664, "step": 20648 }, { "epoch": 0.7086135895676047, "grad_norm": 0.7188395287281264, "learning_rate": 2.0666966020619624e-06, "loss": 0.2837, "step": 20649 }, { "epoch": 0.7086479066575154, "grad_norm": 0.8044556096923526, "learning_rate": 2.0662465644703135e-06, "loss": 0.2547, "step": 20650 }, { "epoch": 0.7086822237474262, "grad_norm": 0.748604035922655, "learning_rate": 2.0657965631217687e-06, "loss": 0.2311, "step": 20651 }, { "epoch": 0.708716540837337, "grad_norm": 0.7692191367876189, "learning_rate": 2.065346598021886e-06, "loss": 0.1944, "step": 20652 }, { "epoch": 0.7087508579272478, "grad_norm": 0.8461966421022392, "learning_rate": 2.064896669176226e-06, "loss": 0.2581, "step": 20653 }, { "epoch": 0.7087851750171585, "grad_norm": 0.7638777683659186, "learning_rate": 2.0644467765903425e-06, "loss": 0.2806, "step": 20654 }, { "epoch": 0.7088194921070693, "grad_norm": 0.7795373043783468, "learning_rate": 2.0639969202698002e-06, "loss": 0.2538, "step": 20655 }, { "epoch": 0.70885380919698, "grad_norm": 0.7819087186354128, "learning_rate": 2.063547100220149e-06, "loss": 0.2541, "step": 20656 }, { "epoch": 0.7088881262868909, "grad_norm": 0.9063192198001697, "learning_rate": 2.063097316446952e-06, "loss": 0.2515, "step": 20657 }, { "epoch": 0.7089224433768017, "grad_norm": 1.0320641193532083, "learning_rate": 2.062647568955765e-06, "loss": 0.2993, "step": 20658 }, { "epoch": 0.7089567604667124, "grad_norm": 0.9649821165084358, "learning_rate": 2.0621978577521395e-06, "loss": 0.2792, "step": 20659 }, { "epoch": 0.7089910775566232, "grad_norm": 0.7175644537639463, "learning_rate": 2.061748182841638e-06, "loss": 0.2416, "step": 20660 }, { "epoch": 0.709025394646534, "grad_norm": 0.7519848828688012, "learning_rate": 2.0612985442298096e-06, "loss": 0.2477, "step": 20661 }, { "epoch": 0.7090597117364448, "grad_norm": 0.8467655838198449, "learning_rate": 2.060848941922212e-06, "loss": 0.2294, "step": 20662 }, { "epoch": 0.7090940288263555, "grad_norm": 0.7156117983036491, "learning_rate": 2.060399375924399e-06, "loss": 0.2161, "step": 20663 }, { "epoch": 0.7091283459162663, "grad_norm": 0.7179408875700193, "learning_rate": 2.059949846241924e-06, "loss": 0.2736, "step": 20664 }, { "epoch": 0.709162663006177, "grad_norm": 0.8264228796733389, "learning_rate": 2.0595003528803416e-06, "loss": 0.3284, "step": 20665 }, { "epoch": 0.7091969800960879, "grad_norm": 0.7078501066183119, "learning_rate": 2.059050895845203e-06, "loss": 0.2926, "step": 20666 }, { "epoch": 0.7092312971859986, "grad_norm": 0.8855047666973135, "learning_rate": 2.058601475142065e-06, "loss": 0.2769, "step": 20667 }, { "epoch": 0.7092656142759094, "grad_norm": 0.6927370864708879, "learning_rate": 2.0581520907764714e-06, "loss": 0.2775, "step": 20668 }, { "epoch": 0.7092999313658201, "grad_norm": 0.8139260560266846, "learning_rate": 2.057702742753983e-06, "loss": 0.2713, "step": 20669 }, { "epoch": 0.709334248455731, "grad_norm": 0.790469201413969, "learning_rate": 2.057253431080145e-06, "loss": 0.2471, "step": 20670 }, { "epoch": 0.7093685655456418, "grad_norm": 0.7989089575580907, "learning_rate": 2.05680415576051e-06, "loss": 0.2382, "step": 20671 }, { "epoch": 0.7094028826355525, "grad_norm": 0.7567863037730399, "learning_rate": 2.0563549168006285e-06, "loss": 0.2736, "step": 20672 }, { "epoch": 0.7094371997254633, "grad_norm": 0.927435369319964, "learning_rate": 2.05590571420605e-06, "loss": 0.2961, "step": 20673 }, { "epoch": 0.709471516815374, "grad_norm": 0.7309294076840814, "learning_rate": 2.0554565479823237e-06, "loss": 0.2721, "step": 20674 }, { "epoch": 0.7095058339052849, "grad_norm": 0.7591166067161096, "learning_rate": 2.0550074181349987e-06, "loss": 0.2948, "step": 20675 }, { "epoch": 0.7095401509951956, "grad_norm": 0.7554799467103774, "learning_rate": 2.0545583246696255e-06, "loss": 0.2797, "step": 20676 }, { "epoch": 0.7095744680851064, "grad_norm": 0.8522416611655307, "learning_rate": 2.0541092675917463e-06, "loss": 0.2587, "step": 20677 }, { "epoch": 0.7096087851750171, "grad_norm": 0.772578253206053, "learning_rate": 2.053660246906917e-06, "loss": 0.3107, "step": 20678 }, { "epoch": 0.7096431022649279, "grad_norm": 0.7229041552334353, "learning_rate": 2.0532112626206774e-06, "loss": 0.2644, "step": 20679 }, { "epoch": 0.7096774193548387, "grad_norm": 0.8067799352496876, "learning_rate": 2.0527623147385754e-06, "loss": 0.2131, "step": 20680 }, { "epoch": 0.7097117364447495, "grad_norm": 0.8068832076948158, "learning_rate": 2.052313403266162e-06, "loss": 0.3041, "step": 20681 }, { "epoch": 0.7097460535346602, "grad_norm": 0.7704418225297802, "learning_rate": 2.0518645282089794e-06, "loss": 0.2401, "step": 20682 }, { "epoch": 0.709780370624571, "grad_norm": 0.7571091317122642, "learning_rate": 2.0514156895725723e-06, "loss": 0.2647, "step": 20683 }, { "epoch": 0.7098146877144819, "grad_norm": 0.7522890045376761, "learning_rate": 2.050966887362487e-06, "loss": 0.2468, "step": 20684 }, { "epoch": 0.7098490048043926, "grad_norm": 0.7928361349241468, "learning_rate": 2.0505181215842677e-06, "loss": 0.3082, "step": 20685 }, { "epoch": 0.7098833218943034, "grad_norm": 0.7184268511069369, "learning_rate": 2.0500693922434576e-06, "loss": 0.2548, "step": 20686 }, { "epoch": 0.7099176389842141, "grad_norm": 0.7916890507631811, "learning_rate": 2.0496206993456018e-06, "loss": 0.2902, "step": 20687 }, { "epoch": 0.7099519560741249, "grad_norm": 0.7592810236944699, "learning_rate": 2.049172042896242e-06, "loss": 0.3004, "step": 20688 }, { "epoch": 0.7099862731640357, "grad_norm": 0.7764056292724267, "learning_rate": 2.048723422900921e-06, "loss": 0.2741, "step": 20689 }, { "epoch": 0.7100205902539465, "grad_norm": 0.7567457842943466, "learning_rate": 2.0482748393651837e-06, "loss": 0.2722, "step": 20690 }, { "epoch": 0.7100549073438572, "grad_norm": 0.7387002573701367, "learning_rate": 2.0478262922945652e-06, "loss": 0.2863, "step": 20691 }, { "epoch": 0.710089224433768, "grad_norm": 0.7693135849659805, "learning_rate": 2.047377781694615e-06, "loss": 0.2993, "step": 20692 }, { "epoch": 0.7101235415236788, "grad_norm": 0.7737839629344057, "learning_rate": 2.046929307570868e-06, "loss": 0.291, "step": 20693 }, { "epoch": 0.7101578586135896, "grad_norm": 0.7309823785896167, "learning_rate": 2.046480869928866e-06, "loss": 0.2175, "step": 20694 }, { "epoch": 0.7101921757035004, "grad_norm": 0.7254649348291337, "learning_rate": 2.0460324687741496e-06, "loss": 0.2182, "step": 20695 }, { "epoch": 0.7102264927934111, "grad_norm": 0.7495920661729355, "learning_rate": 2.045584104112258e-06, "loss": 0.2216, "step": 20696 }, { "epoch": 0.7102608098833219, "grad_norm": 0.6716036285544279, "learning_rate": 2.0451357759487307e-06, "loss": 0.2777, "step": 20697 }, { "epoch": 0.7102951269732327, "grad_norm": 0.7470928142422586, "learning_rate": 2.0446874842891055e-06, "loss": 0.2465, "step": 20698 }, { "epoch": 0.7103294440631435, "grad_norm": 0.8342602047342308, "learning_rate": 2.044239229138923e-06, "loss": 0.2914, "step": 20699 }, { "epoch": 0.7103637611530542, "grad_norm": 0.8004800226509279, "learning_rate": 2.0437910105037144e-06, "loss": 0.2991, "step": 20700 }, { "epoch": 0.710398078242965, "grad_norm": 0.7784541092648843, "learning_rate": 2.043342828389024e-06, "loss": 0.2789, "step": 20701 }, { "epoch": 0.7104323953328757, "grad_norm": 0.7408752237677116, "learning_rate": 2.042894682800387e-06, "loss": 0.2665, "step": 20702 }, { "epoch": 0.7104667124227866, "grad_norm": 0.797484455644053, "learning_rate": 2.0424465737433353e-06, "loss": 0.2678, "step": 20703 }, { "epoch": 0.7105010295126973, "grad_norm": 0.8578080309002089, "learning_rate": 2.0419985012234114e-06, "loss": 0.2903, "step": 20704 }, { "epoch": 0.7105353466026081, "grad_norm": 0.752916779068443, "learning_rate": 2.0415504652461455e-06, "loss": 0.2367, "step": 20705 }, { "epoch": 0.7105696636925188, "grad_norm": 0.7648439935802547, "learning_rate": 2.041102465817074e-06, "loss": 0.2757, "step": 20706 }, { "epoch": 0.7106039807824297, "grad_norm": 0.8045763759656256, "learning_rate": 2.040654502941732e-06, "loss": 0.2698, "step": 20707 }, { "epoch": 0.7106382978723405, "grad_norm": 0.8048847104000739, "learning_rate": 2.0402065766256542e-06, "loss": 0.2494, "step": 20708 }, { "epoch": 0.7106726149622512, "grad_norm": 0.7621529364543032, "learning_rate": 2.039758686874373e-06, "loss": 0.2777, "step": 20709 }, { "epoch": 0.710706932052162, "grad_norm": 0.6772281075578178, "learning_rate": 2.0393108336934213e-06, "loss": 0.2099, "step": 20710 }, { "epoch": 0.7107412491420727, "grad_norm": 0.8028145662656193, "learning_rate": 2.0388630170883346e-06, "loss": 0.2717, "step": 20711 }, { "epoch": 0.7107755662319836, "grad_norm": 0.8130309096198882, "learning_rate": 2.038415237064639e-06, "loss": 0.2861, "step": 20712 }, { "epoch": 0.7108098833218943, "grad_norm": 0.773756737716357, "learning_rate": 2.037967493627875e-06, "loss": 0.2864, "step": 20713 }, { "epoch": 0.7108442004118051, "grad_norm": 0.9246511317829984, "learning_rate": 2.037519786783566e-06, "loss": 0.2265, "step": 20714 }, { "epoch": 0.7108785175017158, "grad_norm": 0.7885736333386344, "learning_rate": 2.037072116537247e-06, "loss": 0.3272, "step": 20715 }, { "epoch": 0.7109128345916267, "grad_norm": 0.6768476110926774, "learning_rate": 2.0366244828944475e-06, "loss": 0.2402, "step": 20716 }, { "epoch": 0.7109471516815374, "grad_norm": 0.7404557125289634, "learning_rate": 2.0361768858606973e-06, "loss": 0.2122, "step": 20717 }, { "epoch": 0.7109814687714482, "grad_norm": 0.7912740013026937, "learning_rate": 2.0357293254415263e-06, "loss": 0.2629, "step": 20718 }, { "epoch": 0.7110157858613589, "grad_norm": 0.7881424300203889, "learning_rate": 2.0352818016424637e-06, "loss": 0.2364, "step": 20719 }, { "epoch": 0.7110501029512697, "grad_norm": 0.8338094108967488, "learning_rate": 2.0348343144690374e-06, "loss": 0.2989, "step": 20720 }, { "epoch": 0.7110844200411806, "grad_norm": 0.7702065533085302, "learning_rate": 2.0343868639267767e-06, "loss": 0.2844, "step": 20721 }, { "epoch": 0.7111187371310913, "grad_norm": 0.8120232697929415, "learning_rate": 2.03393945002121e-06, "loss": 0.2618, "step": 20722 }, { "epoch": 0.7111530542210021, "grad_norm": 0.7445864860401654, "learning_rate": 2.03349207275786e-06, "loss": 0.292, "step": 20723 }, { "epoch": 0.7111873713109128, "grad_norm": 0.919623973500075, "learning_rate": 2.0330447321422587e-06, "loss": 0.2861, "step": 20724 }, { "epoch": 0.7112216884008236, "grad_norm": 0.8127054768587852, "learning_rate": 2.032597428179932e-06, "loss": 0.2299, "step": 20725 }, { "epoch": 0.7112560054907344, "grad_norm": 0.7383714995820043, "learning_rate": 2.032150160876401e-06, "loss": 0.2679, "step": 20726 }, { "epoch": 0.7112903225806452, "grad_norm": 0.891591972470718, "learning_rate": 2.0317029302371982e-06, "loss": 0.2622, "step": 20727 }, { "epoch": 0.7113246396705559, "grad_norm": 0.728053146425188, "learning_rate": 2.031255736267843e-06, "loss": 0.2521, "step": 20728 }, { "epoch": 0.7113589567604667, "grad_norm": 0.753691460392287, "learning_rate": 2.0308085789738622e-06, "loss": 0.2254, "step": 20729 }, { "epoch": 0.7113932738503775, "grad_norm": 0.7874681629814143, "learning_rate": 2.03036145836078e-06, "loss": 0.2683, "step": 20730 }, { "epoch": 0.7114275909402883, "grad_norm": 0.8051276146630765, "learning_rate": 2.0299143744341193e-06, "loss": 0.2688, "step": 20731 }, { "epoch": 0.711461908030199, "grad_norm": 0.7990615009016954, "learning_rate": 2.029467327199404e-06, "loss": 0.2671, "step": 20732 }, { "epoch": 0.7114962251201098, "grad_norm": 0.7608218817247857, "learning_rate": 2.0290203166621565e-06, "loss": 0.2839, "step": 20733 }, { "epoch": 0.7115305422100205, "grad_norm": 0.8667406217894633, "learning_rate": 2.0285733428279e-06, "loss": 0.2735, "step": 20734 }, { "epoch": 0.7115648592999314, "grad_norm": 0.8389555718410235, "learning_rate": 2.028126405702153e-06, "loss": 0.2932, "step": 20735 }, { "epoch": 0.7115991763898422, "grad_norm": 0.7846935313306146, "learning_rate": 2.027679505290443e-06, "loss": 0.2904, "step": 20736 }, { "epoch": 0.7116334934797529, "grad_norm": 0.7062142304600283, "learning_rate": 2.027232641598285e-06, "loss": 0.2683, "step": 20737 }, { "epoch": 0.7116678105696637, "grad_norm": 0.7616844736956366, "learning_rate": 2.026785814631202e-06, "loss": 0.2591, "step": 20738 }, { "epoch": 0.7117021276595744, "grad_norm": 0.9276576937641676, "learning_rate": 2.026339024394714e-06, "loss": 0.2597, "step": 20739 }, { "epoch": 0.7117364447494853, "grad_norm": 0.8387792896527599, "learning_rate": 2.0258922708943402e-06, "loss": 0.2542, "step": 20740 }, { "epoch": 0.711770761839396, "grad_norm": 0.6854494029310076, "learning_rate": 2.0254455541356e-06, "loss": 0.2435, "step": 20741 }, { "epoch": 0.7118050789293068, "grad_norm": 0.8621365053512449, "learning_rate": 2.0249988741240124e-06, "loss": 0.2871, "step": 20742 }, { "epoch": 0.7118393960192175, "grad_norm": 0.7525933719827729, "learning_rate": 2.024552230865096e-06, "loss": 0.2717, "step": 20743 }, { "epoch": 0.7118737131091284, "grad_norm": 0.6743940243596183, "learning_rate": 2.0241056243643647e-06, "loss": 0.2407, "step": 20744 }, { "epoch": 0.7119080301990391, "grad_norm": 0.7855077701568892, "learning_rate": 2.023659054627343e-06, "loss": 0.2767, "step": 20745 }, { "epoch": 0.7119423472889499, "grad_norm": 0.7966512261833307, "learning_rate": 2.023212521659541e-06, "loss": 0.2907, "step": 20746 }, { "epoch": 0.7119766643788606, "grad_norm": 1.0477036060673925, "learning_rate": 2.022766025466476e-06, "loss": 0.2745, "step": 20747 }, { "epoch": 0.7120109814687714, "grad_norm": 0.6543672812358429, "learning_rate": 2.02231956605367e-06, "loss": 0.2327, "step": 20748 }, { "epoch": 0.7120452985586823, "grad_norm": 0.9269039538683573, "learning_rate": 2.0218731434266316e-06, "loss": 0.2308, "step": 20749 }, { "epoch": 0.712079615648593, "grad_norm": 0.7141638449908184, "learning_rate": 2.021426757590878e-06, "loss": 0.2002, "step": 20750 }, { "epoch": 0.7121139327385038, "grad_norm": 0.8312950975626555, "learning_rate": 2.020980408551925e-06, "loss": 0.2994, "step": 20751 }, { "epoch": 0.7121482498284145, "grad_norm": 0.7872528711654266, "learning_rate": 2.020534096315285e-06, "loss": 0.3176, "step": 20752 }, { "epoch": 0.7121825669183254, "grad_norm": 0.8277597491978558, "learning_rate": 2.020087820886473e-06, "loss": 0.2433, "step": 20753 }, { "epoch": 0.7122168840082361, "grad_norm": 0.7211457425172039, "learning_rate": 2.019641582271002e-06, "loss": 0.2892, "step": 20754 }, { "epoch": 0.7122512010981469, "grad_norm": 0.6895522752848253, "learning_rate": 2.019195380474384e-06, "loss": 0.2397, "step": 20755 }, { "epoch": 0.7122855181880576, "grad_norm": 0.7107275698467875, "learning_rate": 2.0187492155021323e-06, "loss": 0.261, "step": 20756 }, { "epoch": 0.7123198352779684, "grad_norm": 0.7454917465657291, "learning_rate": 2.01830308735976e-06, "loss": 0.2603, "step": 20757 }, { "epoch": 0.7123541523678792, "grad_norm": 0.808581090954974, "learning_rate": 2.0178569960527727e-06, "loss": 0.2557, "step": 20758 }, { "epoch": 0.71238846945779, "grad_norm": 0.9617770851797035, "learning_rate": 2.0174109415866895e-06, "loss": 0.2406, "step": 20759 }, { "epoch": 0.7124227865477007, "grad_norm": 0.733055362609742, "learning_rate": 2.016964923967014e-06, "loss": 0.2611, "step": 20760 }, { "epoch": 0.7124571036376115, "grad_norm": 0.8009605866532905, "learning_rate": 2.0165189431992597e-06, "loss": 0.2768, "step": 20761 }, { "epoch": 0.7124914207275223, "grad_norm": 0.7702523977228085, "learning_rate": 2.0160729992889357e-06, "loss": 0.2878, "step": 20762 }, { "epoch": 0.7125257378174331, "grad_norm": 0.7634910332348622, "learning_rate": 2.0156270922415503e-06, "loss": 0.2721, "step": 20763 }, { "epoch": 0.7125600549073439, "grad_norm": 0.7299491753986082, "learning_rate": 2.015181222062613e-06, "loss": 0.2506, "step": 20764 }, { "epoch": 0.7125943719972546, "grad_norm": 0.8012217744345962, "learning_rate": 2.0147353887576316e-06, "loss": 0.2656, "step": 20765 }, { "epoch": 0.7126286890871654, "grad_norm": 0.7329085828787653, "learning_rate": 2.014289592332116e-06, "loss": 0.2366, "step": 20766 }, { "epoch": 0.7126630061770762, "grad_norm": 0.8348225037451265, "learning_rate": 2.013843832791568e-06, "loss": 0.2436, "step": 20767 }, { "epoch": 0.712697323266987, "grad_norm": 0.6573226865609026, "learning_rate": 2.0133981101414996e-06, "loss": 0.2095, "step": 20768 }, { "epoch": 0.7127316403568977, "grad_norm": 0.7869319563936428, "learning_rate": 2.012952424387417e-06, "loss": 0.2791, "step": 20769 }, { "epoch": 0.7127659574468085, "grad_norm": 0.8218451756077606, "learning_rate": 2.012506775534821e-06, "loss": 0.2435, "step": 20770 }, { "epoch": 0.7128002745367192, "grad_norm": 0.8245196730858875, "learning_rate": 2.0120611635892245e-06, "loss": 0.2653, "step": 20771 }, { "epoch": 0.7128345916266301, "grad_norm": 0.7831812325962672, "learning_rate": 2.0116155885561273e-06, "loss": 0.2583, "step": 20772 }, { "epoch": 0.7128689087165409, "grad_norm": 0.8592814919644085, "learning_rate": 2.0111700504410347e-06, "loss": 0.2823, "step": 20773 }, { "epoch": 0.7129032258064516, "grad_norm": 0.8881680428163693, "learning_rate": 2.010724549249452e-06, "loss": 0.2525, "step": 20774 }, { "epoch": 0.7129375428963624, "grad_norm": 0.7344445988993233, "learning_rate": 2.0102790849868823e-06, "loss": 0.2789, "step": 20775 }, { "epoch": 0.7129718599862732, "grad_norm": 0.7364754771622364, "learning_rate": 2.009833657658829e-06, "loss": 0.2487, "step": 20776 }, { "epoch": 0.713006177076184, "grad_norm": 0.729358387231742, "learning_rate": 2.0093882672707944e-06, "loss": 0.2535, "step": 20777 }, { "epoch": 0.7130404941660947, "grad_norm": 0.8921023551554363, "learning_rate": 2.0089429138282823e-06, "loss": 0.2435, "step": 20778 }, { "epoch": 0.7130748112560055, "grad_norm": 0.7456319014353987, "learning_rate": 2.0084975973367904e-06, "loss": 0.2614, "step": 20779 }, { "epoch": 0.7131091283459162, "grad_norm": 0.7881189504058697, "learning_rate": 2.0080523178018264e-06, "loss": 0.2847, "step": 20780 }, { "epoch": 0.7131434454358271, "grad_norm": 0.7027881628050935, "learning_rate": 2.007607075228884e-06, "loss": 0.2396, "step": 20781 }, { "epoch": 0.7131777625257378, "grad_norm": 0.9293719135844233, "learning_rate": 2.0071618696234714e-06, "loss": 0.3009, "step": 20782 }, { "epoch": 0.7132120796156486, "grad_norm": 0.6956344225818851, "learning_rate": 2.006716700991082e-06, "loss": 0.2952, "step": 20783 }, { "epoch": 0.7132463967055593, "grad_norm": 0.725334193295542, "learning_rate": 2.0062715693372185e-06, "loss": 0.2342, "step": 20784 }, { "epoch": 0.7132807137954701, "grad_norm": 0.8028652123436574, "learning_rate": 2.0058264746673794e-06, "loss": 0.2555, "step": 20785 }, { "epoch": 0.713315030885381, "grad_norm": 0.7972660376496281, "learning_rate": 2.0053814169870634e-06, "loss": 0.2583, "step": 20786 }, { "epoch": 0.7133493479752917, "grad_norm": 0.8511168931700962, "learning_rate": 2.004936396301768e-06, "loss": 0.3122, "step": 20787 }, { "epoch": 0.7133836650652025, "grad_norm": 0.8372327298902742, "learning_rate": 2.004491412616992e-06, "loss": 0.3091, "step": 20788 }, { "epoch": 0.7134179821551132, "grad_norm": 0.7493836035255571, "learning_rate": 2.0040464659382344e-06, "loss": 0.2386, "step": 20789 }, { "epoch": 0.7134522992450241, "grad_norm": 0.7334429370791674, "learning_rate": 2.0036015562709852e-06, "loss": 0.2793, "step": 20790 }, { "epoch": 0.7134866163349348, "grad_norm": 0.8784871740173924, "learning_rate": 2.003156683620748e-06, "loss": 0.3169, "step": 20791 }, { "epoch": 0.7135209334248456, "grad_norm": 0.8761498024295313, "learning_rate": 2.0027118479930176e-06, "loss": 0.2572, "step": 20792 }, { "epoch": 0.7135552505147563, "grad_norm": 0.7615751692646601, "learning_rate": 2.002267049393284e-06, "loss": 0.2235, "step": 20793 }, { "epoch": 0.7135895676046671, "grad_norm": 0.8368108110655339, "learning_rate": 2.0018222878270494e-06, "loss": 0.2636, "step": 20794 }, { "epoch": 0.7136238846945779, "grad_norm": 0.733383127778187, "learning_rate": 2.0013775632998035e-06, "loss": 0.302, "step": 20795 }, { "epoch": 0.7136582017844887, "grad_norm": 0.8102005969860402, "learning_rate": 2.000932875817041e-06, "loss": 0.2979, "step": 20796 }, { "epoch": 0.7136925188743994, "grad_norm": 0.8080710688588513, "learning_rate": 2.000488225384257e-06, "loss": 0.2557, "step": 20797 }, { "epoch": 0.7137268359643102, "grad_norm": 0.7991791180920984, "learning_rate": 2.0000436120069437e-06, "loss": 0.2279, "step": 20798 }, { "epoch": 0.713761153054221, "grad_norm": 0.7104438179327642, "learning_rate": 1.9995990356905943e-06, "loss": 0.2402, "step": 20799 }, { "epoch": 0.7137954701441318, "grad_norm": 0.841552053947198, "learning_rate": 1.9991544964407e-06, "loss": 0.2282, "step": 20800 }, { "epoch": 0.7138297872340426, "grad_norm": 0.8792368396517969, "learning_rate": 1.998709994262755e-06, "loss": 0.2969, "step": 20801 }, { "epoch": 0.7138641043239533, "grad_norm": 0.8019029311136713, "learning_rate": 1.998265529162246e-06, "loss": 0.3247, "step": 20802 }, { "epoch": 0.7138984214138641, "grad_norm": 0.7499509861156487, "learning_rate": 1.9978211011446702e-06, "loss": 0.2633, "step": 20803 }, { "epoch": 0.7139327385037749, "grad_norm": 0.7856665858537759, "learning_rate": 1.9973767102155124e-06, "loss": 0.3004, "step": 20804 }, { "epoch": 0.7139670555936857, "grad_norm": 0.7933937286211404, "learning_rate": 1.9969323563802644e-06, "loss": 0.2662, "step": 20805 }, { "epoch": 0.7140013726835964, "grad_norm": 0.7284989018119099, "learning_rate": 1.9964880396444165e-06, "loss": 0.2811, "step": 20806 }, { "epoch": 0.7140356897735072, "grad_norm": 0.7826555225789115, "learning_rate": 1.9960437600134568e-06, "loss": 0.2696, "step": 20807 }, { "epoch": 0.7140700068634179, "grad_norm": 0.7561268211716957, "learning_rate": 1.995599517492874e-06, "loss": 0.197, "step": 20808 }, { "epoch": 0.7141043239533288, "grad_norm": 0.786300238430602, "learning_rate": 1.9951553120881567e-06, "loss": 0.2548, "step": 20809 }, { "epoch": 0.7141386410432395, "grad_norm": 0.7622509481721512, "learning_rate": 1.9947111438047935e-06, "loss": 0.2653, "step": 20810 }, { "epoch": 0.7141729581331503, "grad_norm": 0.7653896327657739, "learning_rate": 1.9942670126482672e-06, "loss": 0.2554, "step": 20811 }, { "epoch": 0.714207275223061, "grad_norm": 0.7582622403086414, "learning_rate": 1.993822918624069e-06, "loss": 0.2899, "step": 20812 }, { "epoch": 0.7142415923129719, "grad_norm": 0.8118875873411276, "learning_rate": 1.993378861737684e-06, "loss": 0.3098, "step": 20813 }, { "epoch": 0.7142759094028827, "grad_norm": 0.7990109244713091, "learning_rate": 1.992934841994598e-06, "loss": 0.2726, "step": 20814 }, { "epoch": 0.7143102264927934, "grad_norm": 0.8216946818699079, "learning_rate": 1.992490859400298e-06, "loss": 0.2885, "step": 20815 }, { "epoch": 0.7143445435827042, "grad_norm": 0.861975252193098, "learning_rate": 1.9920469139602626e-06, "loss": 0.3301, "step": 20816 }, { "epoch": 0.7143788606726149, "grad_norm": 0.7628911162039738, "learning_rate": 1.9916030056799853e-06, "loss": 0.2715, "step": 20817 }, { "epoch": 0.7144131777625258, "grad_norm": 0.9394755161821904, "learning_rate": 1.991159134564943e-06, "loss": 0.2205, "step": 20818 }, { "epoch": 0.7144474948524365, "grad_norm": 0.9108052979143996, "learning_rate": 1.9907153006206213e-06, "loss": 0.2984, "step": 20819 }, { "epoch": 0.7144818119423473, "grad_norm": 0.750462233102967, "learning_rate": 1.9902715038525042e-06, "loss": 0.2423, "step": 20820 }, { "epoch": 0.714516129032258, "grad_norm": 0.7272911138983988, "learning_rate": 1.9898277442660736e-06, "loss": 0.2948, "step": 20821 }, { "epoch": 0.7145504461221689, "grad_norm": 0.7340598618658688, "learning_rate": 1.9893840218668115e-06, "loss": 0.2999, "step": 20822 }, { "epoch": 0.7145847632120796, "grad_norm": 0.7225179792711329, "learning_rate": 1.9889403366602e-06, "loss": 0.2929, "step": 20823 }, { "epoch": 0.7146190803019904, "grad_norm": 0.6749202522122285, "learning_rate": 1.9884966886517215e-06, "loss": 0.251, "step": 20824 }, { "epoch": 0.7146533973919011, "grad_norm": 0.8775783402059852, "learning_rate": 1.988053077846852e-06, "loss": 0.2746, "step": 20825 }, { "epoch": 0.7146877144818119, "grad_norm": 0.8033070075812445, "learning_rate": 1.987609504251079e-06, "loss": 0.2675, "step": 20826 }, { "epoch": 0.7147220315717228, "grad_norm": 1.005418086066478, "learning_rate": 1.9871659678698763e-06, "loss": 0.2541, "step": 20827 }, { "epoch": 0.7147563486616335, "grad_norm": 0.6891719610442812, "learning_rate": 1.9867224687087253e-06, "loss": 0.2572, "step": 20828 }, { "epoch": 0.7147906657515443, "grad_norm": 0.6853106012220265, "learning_rate": 1.9862790067731054e-06, "loss": 0.2535, "step": 20829 }, { "epoch": 0.714824982841455, "grad_norm": 0.7736162794106146, "learning_rate": 1.985835582068495e-06, "loss": 0.2289, "step": 20830 }, { "epoch": 0.7148592999313658, "grad_norm": 0.7773616057297109, "learning_rate": 1.9853921946003714e-06, "loss": 0.3075, "step": 20831 }, { "epoch": 0.7148936170212766, "grad_norm": 0.7343822278057839, "learning_rate": 1.984948844374212e-06, "loss": 0.2366, "step": 20832 }, { "epoch": 0.7149279341111874, "grad_norm": 0.7448244958586431, "learning_rate": 1.984505531395497e-06, "loss": 0.2229, "step": 20833 }, { "epoch": 0.7149622512010981, "grad_norm": 0.793752211453422, "learning_rate": 1.9840622556696966e-06, "loss": 0.2674, "step": 20834 }, { "epoch": 0.7149965682910089, "grad_norm": 0.7673973304413841, "learning_rate": 1.983619017202293e-06, "loss": 0.2721, "step": 20835 }, { "epoch": 0.7150308853809197, "grad_norm": 0.8614274716895675, "learning_rate": 1.9831758159987606e-06, "loss": 0.3223, "step": 20836 }, { "epoch": 0.7150652024708305, "grad_norm": 0.76356155564429, "learning_rate": 1.9827326520645705e-06, "loss": 0.251, "step": 20837 }, { "epoch": 0.7150995195607412, "grad_norm": 0.8537076030791719, "learning_rate": 1.9822895254052045e-06, "loss": 0.3033, "step": 20838 }, { "epoch": 0.715133836650652, "grad_norm": 0.8358072302630375, "learning_rate": 1.9818464360261315e-06, "loss": 0.3291, "step": 20839 }, { "epoch": 0.7151681537405628, "grad_norm": 0.7752888246513163, "learning_rate": 1.9814033839328263e-06, "loss": 0.2771, "step": 20840 }, { "epoch": 0.7152024708304736, "grad_norm": 0.8114766076172851, "learning_rate": 1.980960369130763e-06, "loss": 0.2422, "step": 20841 }, { "epoch": 0.7152367879203844, "grad_norm": 0.7096832217607556, "learning_rate": 1.980517391625415e-06, "loss": 0.2756, "step": 20842 }, { "epoch": 0.7152711050102951, "grad_norm": 0.6658232048262848, "learning_rate": 1.980074451422254e-06, "loss": 0.266, "step": 20843 }, { "epoch": 0.7153054221002059, "grad_norm": 0.7266753308186898, "learning_rate": 1.9796315485267526e-06, "loss": 0.2556, "step": 20844 }, { "epoch": 0.7153397391901167, "grad_norm": 0.813923277779451, "learning_rate": 1.9791886829443815e-06, "loss": 0.2716, "step": 20845 }, { "epoch": 0.7153740562800275, "grad_norm": 0.9118339306011829, "learning_rate": 1.9787458546806125e-06, "loss": 0.2103, "step": 20846 }, { "epoch": 0.7154083733699382, "grad_norm": 0.9149735708808692, "learning_rate": 1.978303063740918e-06, "loss": 0.2513, "step": 20847 }, { "epoch": 0.715442690459849, "grad_norm": 0.7229527975852528, "learning_rate": 1.9778603101307624e-06, "loss": 0.2721, "step": 20848 }, { "epoch": 0.7154770075497597, "grad_norm": 0.7952536887449672, "learning_rate": 1.977417593855623e-06, "loss": 0.2809, "step": 20849 }, { "epoch": 0.7155113246396706, "grad_norm": 0.7854071286958648, "learning_rate": 1.9769749149209626e-06, "loss": 0.2447, "step": 20850 }, { "epoch": 0.7155456417295813, "grad_norm": 0.7876997539871774, "learning_rate": 1.9765322733322518e-06, "loss": 0.2151, "step": 20851 }, { "epoch": 0.7155799588194921, "grad_norm": 0.780451937643889, "learning_rate": 1.9760896690949627e-06, "loss": 0.2636, "step": 20852 }, { "epoch": 0.7156142759094029, "grad_norm": 0.7732338293823287, "learning_rate": 1.975647102214559e-06, "loss": 0.2377, "step": 20853 }, { "epoch": 0.7156485929993136, "grad_norm": 0.9493457707346178, "learning_rate": 1.975204572696509e-06, "loss": 0.2216, "step": 20854 }, { "epoch": 0.7156829100892245, "grad_norm": 0.777228538725491, "learning_rate": 1.9747620805462797e-06, "loss": 0.2559, "step": 20855 }, { "epoch": 0.7157172271791352, "grad_norm": 0.797689391491787, "learning_rate": 1.97431962576934e-06, "loss": 0.3222, "step": 20856 }, { "epoch": 0.715751544269046, "grad_norm": 0.9297664029707308, "learning_rate": 1.97387720837115e-06, "loss": 0.252, "step": 20857 }, { "epoch": 0.7157858613589567, "grad_norm": 0.8058970373429606, "learning_rate": 1.973434828357181e-06, "loss": 0.2564, "step": 20858 }, { "epoch": 0.7158201784488676, "grad_norm": 0.7823299369796152, "learning_rate": 1.9729924857328975e-06, "loss": 0.2815, "step": 20859 }, { "epoch": 0.7158544955387783, "grad_norm": 0.7672582727513148, "learning_rate": 1.972550180503759e-06, "loss": 0.2759, "step": 20860 }, { "epoch": 0.7158888126286891, "grad_norm": 0.9317538890309122, "learning_rate": 1.9721079126752373e-06, "loss": 0.2801, "step": 20861 }, { "epoch": 0.7159231297185998, "grad_norm": 0.7800977818299741, "learning_rate": 1.97166568225279e-06, "loss": 0.2696, "step": 20862 }, { "epoch": 0.7159574468085106, "grad_norm": 0.7317830008819552, "learning_rate": 1.971223489241883e-06, "loss": 0.2649, "step": 20863 }, { "epoch": 0.7159917638984215, "grad_norm": 0.702080995579787, "learning_rate": 1.9707813336479783e-06, "loss": 0.2164, "step": 20864 }, { "epoch": 0.7160260809883322, "grad_norm": 0.8021871621866797, "learning_rate": 1.9703392154765383e-06, "loss": 0.2783, "step": 20865 }, { "epoch": 0.716060398078243, "grad_norm": 0.7689212961397447, "learning_rate": 1.9698971347330252e-06, "loss": 0.2742, "step": 20866 }, { "epoch": 0.7160947151681537, "grad_norm": 0.7047163352750806, "learning_rate": 1.9694550914229005e-06, "loss": 0.2475, "step": 20867 }, { "epoch": 0.7161290322580646, "grad_norm": 0.7497986470263345, "learning_rate": 1.9690130855516264e-06, "loss": 0.2817, "step": 20868 }, { "epoch": 0.7161633493479753, "grad_norm": 0.8765076332643503, "learning_rate": 1.968571117124658e-06, "loss": 0.2337, "step": 20869 }, { "epoch": 0.7161976664378861, "grad_norm": 0.746681966310979, "learning_rate": 1.9681291861474634e-06, "loss": 0.2456, "step": 20870 }, { "epoch": 0.7162319835277968, "grad_norm": 0.7919338510615594, "learning_rate": 1.967687292625496e-06, "loss": 0.3201, "step": 20871 }, { "epoch": 0.7162663006177076, "grad_norm": 0.8173537223828727, "learning_rate": 1.9672454365642163e-06, "loss": 0.317, "step": 20872 }, { "epoch": 0.7163006177076184, "grad_norm": 0.7440450251081573, "learning_rate": 1.9668036179690835e-06, "loss": 0.2641, "step": 20873 }, { "epoch": 0.7163349347975292, "grad_norm": 0.7793396815135047, "learning_rate": 1.9663618368455557e-06, "loss": 0.2844, "step": 20874 }, { "epoch": 0.7163692518874399, "grad_norm": 0.6904069723370083, "learning_rate": 1.965920093199091e-06, "loss": 0.2526, "step": 20875 }, { "epoch": 0.7164035689773507, "grad_norm": 0.7171023171907218, "learning_rate": 1.9654783870351452e-06, "loss": 0.2521, "step": 20876 }, { "epoch": 0.7164378860672614, "grad_norm": 0.7175326172389264, "learning_rate": 1.965036718359176e-06, "loss": 0.2463, "step": 20877 }, { "epoch": 0.7164722031571723, "grad_norm": 0.8526723174706147, "learning_rate": 1.9645950871766405e-06, "loss": 0.3068, "step": 20878 }, { "epoch": 0.7165065202470831, "grad_norm": 0.7798128032898584, "learning_rate": 1.9641534934929934e-06, "loss": 0.2522, "step": 20879 }, { "epoch": 0.7165408373369938, "grad_norm": 0.9043512035103266, "learning_rate": 1.963711937313691e-06, "loss": 0.3043, "step": 20880 }, { "epoch": 0.7165751544269046, "grad_norm": 0.8406975850102396, "learning_rate": 1.963270418644187e-06, "loss": 0.2709, "step": 20881 }, { "epoch": 0.7166094715168154, "grad_norm": 0.7127494882035574, "learning_rate": 1.9628289374899386e-06, "loss": 0.3001, "step": 20882 }, { "epoch": 0.7166437886067262, "grad_norm": 0.7718265210487593, "learning_rate": 1.9623874938563934e-06, "loss": 0.2704, "step": 20883 }, { "epoch": 0.7166781056966369, "grad_norm": 0.68808804570055, "learning_rate": 1.9619460877490137e-06, "loss": 0.2269, "step": 20884 }, { "epoch": 0.7167124227865477, "grad_norm": 0.7621594069746186, "learning_rate": 1.9615047191732462e-06, "loss": 0.2112, "step": 20885 }, { "epoch": 0.7167467398764584, "grad_norm": 0.8240652268036938, "learning_rate": 1.9610633881345447e-06, "loss": 0.2927, "step": 20886 }, { "epoch": 0.7167810569663693, "grad_norm": 0.6969910688031049, "learning_rate": 1.9606220946383624e-06, "loss": 0.2278, "step": 20887 }, { "epoch": 0.71681537405628, "grad_norm": 0.7361695225373922, "learning_rate": 1.9601808386901506e-06, "loss": 0.2488, "step": 20888 }, { "epoch": 0.7168496911461908, "grad_norm": 0.7965153263991945, "learning_rate": 1.9597396202953606e-06, "loss": 0.3212, "step": 20889 }, { "epoch": 0.7168840082361015, "grad_norm": 0.7878152892383687, "learning_rate": 1.959298439459442e-06, "loss": 0.2593, "step": 20890 }, { "epoch": 0.7169183253260124, "grad_norm": 0.7841208796020285, "learning_rate": 1.9588572961878487e-06, "loss": 0.2361, "step": 20891 }, { "epoch": 0.7169526424159232, "grad_norm": 0.7482196485218059, "learning_rate": 1.9584161904860244e-06, "loss": 0.3116, "step": 20892 }, { "epoch": 0.7169869595058339, "grad_norm": 0.8507880782578668, "learning_rate": 1.9579751223594256e-06, "loss": 0.3542, "step": 20893 }, { "epoch": 0.7170212765957447, "grad_norm": 0.790952648239315, "learning_rate": 1.9575340918134955e-06, "loss": 0.2738, "step": 20894 }, { "epoch": 0.7170555936856554, "grad_norm": 0.8239704995820754, "learning_rate": 1.957093098853684e-06, "loss": 0.2711, "step": 20895 }, { "epoch": 0.7170899107755663, "grad_norm": 0.7982379894330115, "learning_rate": 1.95665214348544e-06, "loss": 0.2412, "step": 20896 }, { "epoch": 0.717124227865477, "grad_norm": 0.9521215767227922, "learning_rate": 1.956211225714211e-06, "loss": 0.2648, "step": 20897 }, { "epoch": 0.7171585449553878, "grad_norm": 0.8100540090808926, "learning_rate": 1.955770345545443e-06, "loss": 0.2692, "step": 20898 }, { "epoch": 0.7171928620452985, "grad_norm": 0.8539932718252635, "learning_rate": 1.9553295029845833e-06, "loss": 0.2576, "step": 20899 }, { "epoch": 0.7172271791352093, "grad_norm": 0.723843708326957, "learning_rate": 1.9548886980370797e-06, "loss": 0.2169, "step": 20900 }, { "epoch": 0.7172614962251201, "grad_norm": 0.8272472036291765, "learning_rate": 1.9544479307083723e-06, "loss": 0.2328, "step": 20901 }, { "epoch": 0.7172958133150309, "grad_norm": 0.8436092054526573, "learning_rate": 1.954007201003912e-06, "loss": 0.3046, "step": 20902 }, { "epoch": 0.7173301304049416, "grad_norm": 0.6732142016085358, "learning_rate": 1.953566508929143e-06, "loss": 0.2538, "step": 20903 }, { "epoch": 0.7173644474948524, "grad_norm": 0.777336338999731, "learning_rate": 1.953125854489504e-06, "loss": 0.3345, "step": 20904 }, { "epoch": 0.7173987645847633, "grad_norm": 1.0186996952558593, "learning_rate": 1.952685237690447e-06, "loss": 0.3136, "step": 20905 }, { "epoch": 0.717433081674674, "grad_norm": 0.7484350993480652, "learning_rate": 1.9522446585374095e-06, "loss": 0.278, "step": 20906 }, { "epoch": 0.7174673987645848, "grad_norm": 0.7487359028404478, "learning_rate": 1.9518041170358355e-06, "loss": 0.2618, "step": 20907 }, { "epoch": 0.7175017158544955, "grad_norm": 0.8003383875512369, "learning_rate": 1.9513636131911674e-06, "loss": 0.3027, "step": 20908 }, { "epoch": 0.7175360329444063, "grad_norm": 0.817628549317405, "learning_rate": 1.950923147008848e-06, "loss": 0.287, "step": 20909 }, { "epoch": 0.7175703500343171, "grad_norm": 0.7355436331662067, "learning_rate": 1.9504827184943183e-06, "loss": 0.2611, "step": 20910 }, { "epoch": 0.7176046671242279, "grad_norm": 0.7755022639301373, "learning_rate": 1.950042327653019e-06, "loss": 0.2607, "step": 20911 }, { "epoch": 0.7176389842141386, "grad_norm": 0.8212132652946786, "learning_rate": 1.9496019744903903e-06, "loss": 0.3027, "step": 20912 }, { "epoch": 0.7176733013040494, "grad_norm": 0.8291930926404057, "learning_rate": 1.9491616590118735e-06, "loss": 0.2145, "step": 20913 }, { "epoch": 0.7177076183939602, "grad_norm": 0.7567219507583902, "learning_rate": 1.948721381222909e-06, "loss": 0.3063, "step": 20914 }, { "epoch": 0.717741935483871, "grad_norm": 0.8113456957743934, "learning_rate": 1.948281141128931e-06, "loss": 0.2726, "step": 20915 }, { "epoch": 0.7177762525737817, "grad_norm": 0.8237254075337986, "learning_rate": 1.9478409387353854e-06, "loss": 0.2494, "step": 20916 }, { "epoch": 0.7178105696636925, "grad_norm": 0.7061032922796796, "learning_rate": 1.9474007740477043e-06, "loss": 0.2298, "step": 20917 }, { "epoch": 0.7178448867536033, "grad_norm": 0.690625185866092, "learning_rate": 1.9469606470713255e-06, "loss": 0.2714, "step": 20918 }, { "epoch": 0.7178792038435141, "grad_norm": 0.7729597127525086, "learning_rate": 1.9465205578116925e-06, "loss": 0.3168, "step": 20919 }, { "epoch": 0.7179135209334249, "grad_norm": 0.7715958869305535, "learning_rate": 1.946080506274236e-06, "loss": 0.3181, "step": 20920 }, { "epoch": 0.7179478380233356, "grad_norm": 0.7840457397598382, "learning_rate": 1.9456404924643947e-06, "loss": 0.2948, "step": 20921 }, { "epoch": 0.7179821551132464, "grad_norm": 0.7973090618203332, "learning_rate": 1.9452005163876038e-06, "loss": 0.2467, "step": 20922 }, { "epoch": 0.7180164722031571, "grad_norm": 0.8803031310589683, "learning_rate": 1.944760578049299e-06, "loss": 0.2575, "step": 20923 }, { "epoch": 0.718050789293068, "grad_norm": 0.7364573614585721, "learning_rate": 1.9443206774549152e-06, "loss": 0.2856, "step": 20924 }, { "epoch": 0.7180851063829787, "grad_norm": 0.8922558199662143, "learning_rate": 1.9438808146098864e-06, "loss": 0.2419, "step": 20925 }, { "epoch": 0.7181194234728895, "grad_norm": 0.7815218141221635, "learning_rate": 1.9434409895196495e-06, "loss": 0.2697, "step": 20926 }, { "epoch": 0.7181537405628002, "grad_norm": 0.7178236656576074, "learning_rate": 1.943001202189632e-06, "loss": 0.2362, "step": 20927 }, { "epoch": 0.7181880576527111, "grad_norm": 0.7168992420314001, "learning_rate": 1.9425614526252736e-06, "loss": 0.2816, "step": 20928 }, { "epoch": 0.7182223747426218, "grad_norm": 0.7550869516339375, "learning_rate": 1.9421217408320024e-06, "loss": 0.2706, "step": 20929 }, { "epoch": 0.7182566918325326, "grad_norm": 0.7673289021344721, "learning_rate": 1.9416820668152518e-06, "loss": 0.3009, "step": 20930 }, { "epoch": 0.7182910089224434, "grad_norm": 0.7931783937990315, "learning_rate": 1.941242430580454e-06, "loss": 0.3254, "step": 20931 }, { "epoch": 0.7183253260123541, "grad_norm": 0.7479722182041173, "learning_rate": 1.940802832133039e-06, "loss": 0.2452, "step": 20932 }, { "epoch": 0.718359643102265, "grad_norm": 0.714360114179364, "learning_rate": 1.9403632714784393e-06, "loss": 0.2433, "step": 20933 }, { "epoch": 0.7183939601921757, "grad_norm": 0.7987791847513394, "learning_rate": 1.9399237486220835e-06, "loss": 0.2573, "step": 20934 }, { "epoch": 0.7184282772820865, "grad_norm": 0.7754164362901261, "learning_rate": 1.939484263569404e-06, "loss": 0.249, "step": 20935 }, { "epoch": 0.7184625943719972, "grad_norm": 0.7063611284152532, "learning_rate": 1.9390448163258247e-06, "loss": 0.2313, "step": 20936 }, { "epoch": 0.7184969114619081, "grad_norm": 0.8801632745468048, "learning_rate": 1.938605406896781e-06, "loss": 0.2375, "step": 20937 }, { "epoch": 0.7185312285518188, "grad_norm": 0.8079209125666745, "learning_rate": 1.9381660352876957e-06, "loss": 0.2403, "step": 20938 }, { "epoch": 0.7185655456417296, "grad_norm": 0.7257577388808604, "learning_rate": 1.9377267015040023e-06, "loss": 0.2599, "step": 20939 }, { "epoch": 0.7185998627316403, "grad_norm": 0.7462309074788703, "learning_rate": 1.9372874055511237e-06, "loss": 0.2608, "step": 20940 }, { "epoch": 0.7186341798215511, "grad_norm": 0.7610256774911323, "learning_rate": 1.9368481474344865e-06, "loss": 0.2876, "step": 20941 }, { "epoch": 0.718668496911462, "grad_norm": 0.7648709314477592, "learning_rate": 1.9364089271595227e-06, "loss": 0.3202, "step": 20942 }, { "epoch": 0.7187028140013727, "grad_norm": 0.8198282225825818, "learning_rate": 1.9359697447316528e-06, "loss": 0.2843, "step": 20943 }, { "epoch": 0.7187371310912835, "grad_norm": 0.7655872042668282, "learning_rate": 1.9355306001563047e-06, "loss": 0.259, "step": 20944 }, { "epoch": 0.7187714481811942, "grad_norm": 0.8039803248986612, "learning_rate": 1.935091493438903e-06, "loss": 0.2549, "step": 20945 }, { "epoch": 0.718805765271105, "grad_norm": 0.8312914364361254, "learning_rate": 1.934652424584873e-06, "loss": 0.268, "step": 20946 }, { "epoch": 0.7188400823610158, "grad_norm": 0.7552265677003459, "learning_rate": 1.934213393599638e-06, "loss": 0.2403, "step": 20947 }, { "epoch": 0.7188743994509266, "grad_norm": 0.863975348560832, "learning_rate": 1.9337744004886223e-06, "loss": 0.2784, "step": 20948 }, { "epoch": 0.7189087165408373, "grad_norm": 0.742724327905946, "learning_rate": 1.9333354452572506e-06, "loss": 0.2558, "step": 20949 }, { "epoch": 0.7189430336307481, "grad_norm": 0.876220121611227, "learning_rate": 1.9328965279109407e-06, "loss": 0.2396, "step": 20950 }, { "epoch": 0.7189773507206589, "grad_norm": 0.76954562941804, "learning_rate": 1.932457648455122e-06, "loss": 0.2823, "step": 20951 }, { "epoch": 0.7190116678105697, "grad_norm": 0.7988869615460388, "learning_rate": 1.9320188068952106e-06, "loss": 0.2715, "step": 20952 }, { "epoch": 0.7190459849004804, "grad_norm": 0.8004704924461118, "learning_rate": 1.93158000323663e-06, "loss": 0.2476, "step": 20953 }, { "epoch": 0.7190803019903912, "grad_norm": 0.7527525385838149, "learning_rate": 1.9311412374848014e-06, "loss": 0.2263, "step": 20954 }, { "epoch": 0.7191146190803019, "grad_norm": 0.7602656719469583, "learning_rate": 1.9307025096451444e-06, "loss": 0.2708, "step": 20955 }, { "epoch": 0.7191489361702128, "grad_norm": 0.7944543458018714, "learning_rate": 1.9302638197230795e-06, "loss": 0.2672, "step": 20956 }, { "epoch": 0.7191832532601236, "grad_norm": 0.811563893559235, "learning_rate": 1.9298251677240265e-06, "loss": 0.2646, "step": 20957 }, { "epoch": 0.7192175703500343, "grad_norm": 0.7604061367266578, "learning_rate": 1.929386553653406e-06, "loss": 0.2675, "step": 20958 }, { "epoch": 0.7192518874399451, "grad_norm": 0.7377001682512587, "learning_rate": 1.9289479775166305e-06, "loss": 0.2529, "step": 20959 }, { "epoch": 0.7192862045298559, "grad_norm": 0.833037737790839, "learning_rate": 1.9285094393191255e-06, "loss": 0.2451, "step": 20960 }, { "epoch": 0.7193205216197667, "grad_norm": 0.8728340574592865, "learning_rate": 1.9280709390663044e-06, "loss": 0.3484, "step": 20961 }, { "epoch": 0.7193548387096774, "grad_norm": 0.7663228927195662, "learning_rate": 1.927632476763583e-06, "loss": 0.2092, "step": 20962 }, { "epoch": 0.7193891557995882, "grad_norm": 0.8397281616619677, "learning_rate": 1.927194052416384e-06, "loss": 0.2792, "step": 20963 }, { "epoch": 0.7194234728894989, "grad_norm": 0.7520150910912387, "learning_rate": 1.9267556660301186e-06, "loss": 0.2587, "step": 20964 }, { "epoch": 0.7194577899794098, "grad_norm": 0.8128176401286505, "learning_rate": 1.9263173176102035e-06, "loss": 0.2746, "step": 20965 }, { "epoch": 0.7194921070693205, "grad_norm": 0.7537680533203273, "learning_rate": 1.925879007162054e-06, "loss": 0.2911, "step": 20966 }, { "epoch": 0.7195264241592313, "grad_norm": 0.9140866347275703, "learning_rate": 1.9254407346910874e-06, "loss": 0.275, "step": 20967 }, { "epoch": 0.719560741249142, "grad_norm": 0.7651928392288425, "learning_rate": 1.9250025002027127e-06, "loss": 0.3042, "step": 20968 }, { "epoch": 0.7195950583390528, "grad_norm": 0.7816048276435487, "learning_rate": 1.9245643037023487e-06, "loss": 0.2345, "step": 20969 }, { "epoch": 0.7196293754289637, "grad_norm": 0.7547085823693345, "learning_rate": 1.924126145195407e-06, "loss": 0.2843, "step": 20970 }, { "epoch": 0.7196636925188744, "grad_norm": 0.7730130674543766, "learning_rate": 1.9236880246873004e-06, "loss": 0.2546, "step": 20971 }, { "epoch": 0.7196980096087852, "grad_norm": 0.8551382084656942, "learning_rate": 1.923249942183444e-06, "loss": 0.2802, "step": 20972 }, { "epoch": 0.7197323266986959, "grad_norm": 0.7315989556666552, "learning_rate": 1.922811897689243e-06, "loss": 0.2575, "step": 20973 }, { "epoch": 0.7197666437886068, "grad_norm": 0.8315641526620824, "learning_rate": 1.9223738912101174e-06, "loss": 0.2323, "step": 20974 }, { "epoch": 0.7198009608785175, "grad_norm": 0.8056537506730861, "learning_rate": 1.9219359227514726e-06, "loss": 0.2216, "step": 20975 }, { "epoch": 0.7198352779684283, "grad_norm": 0.7228972937563183, "learning_rate": 1.9214979923187204e-06, "loss": 0.2396, "step": 20976 }, { "epoch": 0.719869595058339, "grad_norm": 0.7289435511156088, "learning_rate": 1.921060099917272e-06, "loss": 0.3042, "step": 20977 }, { "epoch": 0.7199039121482498, "grad_norm": 0.8241694323548111, "learning_rate": 1.9206222455525357e-06, "loss": 0.3151, "step": 20978 }, { "epoch": 0.7199382292381606, "grad_norm": 0.7097136637335519, "learning_rate": 1.9201844292299216e-06, "loss": 0.2263, "step": 20979 }, { "epoch": 0.7199725463280714, "grad_norm": 0.894745440018714, "learning_rate": 1.919746650954838e-06, "loss": 0.2503, "step": 20980 }, { "epoch": 0.7200068634179821, "grad_norm": 0.8021097053502682, "learning_rate": 1.9193089107326953e-06, "loss": 0.2685, "step": 20981 }, { "epoch": 0.7200411805078929, "grad_norm": 0.7882412896080468, "learning_rate": 1.9188712085688954e-06, "loss": 0.2315, "step": 20982 }, { "epoch": 0.7200754975978038, "grad_norm": 0.8785429770888458, "learning_rate": 1.9184335444688533e-06, "loss": 0.2795, "step": 20983 }, { "epoch": 0.7201098146877145, "grad_norm": 0.8149017530809332, "learning_rate": 1.9179959184379706e-06, "loss": 0.2382, "step": 20984 }, { "epoch": 0.7201441317776253, "grad_norm": 0.8341616868911341, "learning_rate": 1.9175583304816525e-06, "loss": 0.3416, "step": 20985 }, { "epoch": 0.720178448867536, "grad_norm": 0.8626233074271579, "learning_rate": 1.917120780605311e-06, "loss": 0.3033, "step": 20986 }, { "epoch": 0.7202127659574468, "grad_norm": 0.6908893284469322, "learning_rate": 1.9166832688143465e-06, "loss": 0.2824, "step": 20987 }, { "epoch": 0.7202470830473576, "grad_norm": 0.7739046208812863, "learning_rate": 1.9162457951141654e-06, "loss": 0.2524, "step": 20988 }, { "epoch": 0.7202814001372684, "grad_norm": 0.8194859389182932, "learning_rate": 1.915808359510173e-06, "loss": 0.2822, "step": 20989 }, { "epoch": 0.7203157172271791, "grad_norm": 0.7708712483476252, "learning_rate": 1.9153709620077713e-06, "loss": 0.2655, "step": 20990 }, { "epoch": 0.7203500343170899, "grad_norm": 0.7032594491024743, "learning_rate": 1.9149336026123656e-06, "loss": 0.2562, "step": 20991 }, { "epoch": 0.7203843514070006, "grad_norm": 0.8559398575156968, "learning_rate": 1.914496281329358e-06, "loss": 0.2741, "step": 20992 }, { "epoch": 0.7204186684969115, "grad_norm": 0.8969493083738248, "learning_rate": 1.914058998164154e-06, "loss": 0.2998, "step": 20993 }, { "epoch": 0.7204529855868222, "grad_norm": 0.7647783045490765, "learning_rate": 1.913621753122149e-06, "loss": 0.2153, "step": 20994 }, { "epoch": 0.720487302676733, "grad_norm": 0.7090122922446419, "learning_rate": 1.9131845462087528e-06, "loss": 0.2534, "step": 20995 }, { "epoch": 0.7205216197666438, "grad_norm": 0.8229669285593992, "learning_rate": 1.9127473774293603e-06, "loss": 0.2541, "step": 20996 }, { "epoch": 0.7205559368565546, "grad_norm": 0.7348929098631327, "learning_rate": 1.912310246789375e-06, "loss": 0.2537, "step": 20997 }, { "epoch": 0.7205902539464654, "grad_norm": 0.7112268244184095, "learning_rate": 1.911873154294197e-06, "loss": 0.2467, "step": 20998 }, { "epoch": 0.7206245710363761, "grad_norm": 1.048464445138642, "learning_rate": 1.9114360999492256e-06, "loss": 0.2821, "step": 20999 }, { "epoch": 0.7206588881262869, "grad_norm": 0.7112970056682586, "learning_rate": 1.9109990837598603e-06, "loss": 0.2668, "step": 21000 }, { "epoch": 0.7206932052161976, "grad_norm": 0.7607826910901516, "learning_rate": 1.9105621057314994e-06, "loss": 0.2905, "step": 21001 }, { "epoch": 0.7207275223061085, "grad_norm": 0.7267051402738234, "learning_rate": 1.910125165869542e-06, "loss": 0.2638, "step": 21002 }, { "epoch": 0.7207618393960192, "grad_norm": 0.8397113610602673, "learning_rate": 1.909688264179386e-06, "loss": 0.2294, "step": 21003 }, { "epoch": 0.72079615648593, "grad_norm": 0.8635592384027925, "learning_rate": 1.90925140066643e-06, "loss": 0.322, "step": 21004 }, { "epoch": 0.7208304735758407, "grad_norm": 0.8128692674946825, "learning_rate": 1.908814575336066e-06, "loss": 0.2552, "step": 21005 }, { "epoch": 0.7208647906657516, "grad_norm": 0.7002917856613277, "learning_rate": 1.9083777881936975e-06, "loss": 0.258, "step": 21006 }, { "epoch": 0.7208991077556623, "grad_norm": 0.827630337020108, "learning_rate": 1.9079410392447157e-06, "loss": 0.2844, "step": 21007 }, { "epoch": 0.7209334248455731, "grad_norm": 0.7837452702003692, "learning_rate": 1.9075043284945148e-06, "loss": 0.2926, "step": 21008 }, { "epoch": 0.7209677419354839, "grad_norm": 0.8263974887856124, "learning_rate": 1.9070676559484963e-06, "loss": 0.2473, "step": 21009 }, { "epoch": 0.7210020590253946, "grad_norm": 0.7211963894374586, "learning_rate": 1.9066310216120494e-06, "loss": 0.2585, "step": 21010 }, { "epoch": 0.7210363761153055, "grad_norm": 0.7161909639229882, "learning_rate": 1.906194425490569e-06, "loss": 0.2646, "step": 21011 }, { "epoch": 0.7210706932052162, "grad_norm": 0.8460428410485457, "learning_rate": 1.90575786758945e-06, "loss": 0.2112, "step": 21012 }, { "epoch": 0.721105010295127, "grad_norm": 0.6728397255119802, "learning_rate": 1.9053213479140847e-06, "loss": 0.2338, "step": 21013 }, { "epoch": 0.7211393273850377, "grad_norm": 0.9231808249011586, "learning_rate": 1.9048848664698667e-06, "loss": 0.3138, "step": 21014 }, { "epoch": 0.7211736444749485, "grad_norm": 0.8339359527766533, "learning_rate": 1.904448423262187e-06, "loss": 0.2555, "step": 21015 }, { "epoch": 0.7212079615648593, "grad_norm": 0.8963656716979491, "learning_rate": 1.90401201829644e-06, "loss": 0.2734, "step": 21016 }, { "epoch": 0.7212422786547701, "grad_norm": 0.8798055107243007, "learning_rate": 1.9035756515780112e-06, "loss": 0.3064, "step": 21017 }, { "epoch": 0.7212765957446808, "grad_norm": 0.8362571535685703, "learning_rate": 1.903139323112299e-06, "loss": 0.2627, "step": 21018 }, { "epoch": 0.7213109128345916, "grad_norm": 0.6459083760830024, "learning_rate": 1.9027030329046881e-06, "loss": 0.2459, "step": 21019 }, { "epoch": 0.7213452299245025, "grad_norm": 0.7641241863374497, "learning_rate": 1.9022667809605698e-06, "loss": 0.2946, "step": 21020 }, { "epoch": 0.7213795470144132, "grad_norm": 0.7878032484130256, "learning_rate": 1.9018305672853338e-06, "loss": 0.2045, "step": 21021 }, { "epoch": 0.721413864104324, "grad_norm": 0.7610926944701205, "learning_rate": 1.901394391884369e-06, "loss": 0.2375, "step": 21022 }, { "epoch": 0.7214481811942347, "grad_norm": 0.8775560922177147, "learning_rate": 1.9009582547630645e-06, "loss": 0.3169, "step": 21023 }, { "epoch": 0.7214824982841455, "grad_norm": 0.8337792245543881, "learning_rate": 1.9005221559268072e-06, "loss": 0.2332, "step": 21024 }, { "epoch": 0.7215168153740563, "grad_norm": 0.7429483165865801, "learning_rate": 1.9000860953809868e-06, "loss": 0.2409, "step": 21025 }, { "epoch": 0.7215511324639671, "grad_norm": 0.6510063997455044, "learning_rate": 1.8996500731309848e-06, "loss": 0.2164, "step": 21026 }, { "epoch": 0.7215854495538778, "grad_norm": 0.6947141072606644, "learning_rate": 1.8992140891821952e-06, "loss": 0.2281, "step": 21027 }, { "epoch": 0.7216197666437886, "grad_norm": 0.7848262623206032, "learning_rate": 1.8987781435399987e-06, "loss": 0.2466, "step": 21028 }, { "epoch": 0.7216540837336994, "grad_norm": 0.8147370203311878, "learning_rate": 1.8983422362097808e-06, "loss": 0.2879, "step": 21029 }, { "epoch": 0.7216884008236102, "grad_norm": 0.9889928092810996, "learning_rate": 1.8979063671969318e-06, "loss": 0.2764, "step": 21030 }, { "epoch": 0.7217227179135209, "grad_norm": 0.7914158994074961, "learning_rate": 1.8974705365068314e-06, "loss": 0.2687, "step": 21031 }, { "epoch": 0.7217570350034317, "grad_norm": 0.7138043128199306, "learning_rate": 1.897034744144865e-06, "loss": 0.2599, "step": 21032 }, { "epoch": 0.7217913520933424, "grad_norm": 0.8044619134695205, "learning_rate": 1.8965989901164168e-06, "loss": 0.2727, "step": 21033 }, { "epoch": 0.7218256691832533, "grad_norm": 0.8897861956808265, "learning_rate": 1.8961632744268698e-06, "loss": 0.2748, "step": 21034 }, { "epoch": 0.7218599862731641, "grad_norm": 0.8596472111858081, "learning_rate": 1.895727597081607e-06, "loss": 0.2878, "step": 21035 }, { "epoch": 0.7218943033630748, "grad_norm": 0.753512409439497, "learning_rate": 1.8952919580860102e-06, "loss": 0.2412, "step": 21036 }, { "epoch": 0.7219286204529856, "grad_norm": 0.9273800093682835, "learning_rate": 1.8948563574454614e-06, "loss": 0.266, "step": 21037 }, { "epoch": 0.7219629375428963, "grad_norm": 0.7680612786212135, "learning_rate": 1.8944207951653427e-06, "loss": 0.3278, "step": 21038 }, { "epoch": 0.7219972546328072, "grad_norm": 0.7756542673190393, "learning_rate": 1.8939852712510354e-06, "loss": 0.2708, "step": 21039 }, { "epoch": 0.7220315717227179, "grad_norm": 0.8337331535936625, "learning_rate": 1.893549785707916e-06, "loss": 0.2996, "step": 21040 }, { "epoch": 0.7220658888126287, "grad_norm": 0.8232399770716411, "learning_rate": 1.8931143385413704e-06, "loss": 0.2264, "step": 21041 }, { "epoch": 0.7221002059025394, "grad_norm": 0.826883527859266, "learning_rate": 1.8926789297567727e-06, "loss": 0.2804, "step": 21042 }, { "epoch": 0.7221345229924503, "grad_norm": 0.7308899285883148, "learning_rate": 1.8922435593595045e-06, "loss": 0.2585, "step": 21043 }, { "epoch": 0.722168840082361, "grad_norm": 0.7162649729986542, "learning_rate": 1.8918082273549438e-06, "loss": 0.2781, "step": 21044 }, { "epoch": 0.7222031571722718, "grad_norm": 0.7210633964029233, "learning_rate": 1.8913729337484688e-06, "loss": 0.2636, "step": 21045 }, { "epoch": 0.7222374742621825, "grad_norm": 0.8514356582488523, "learning_rate": 1.8909376785454565e-06, "loss": 0.2688, "step": 21046 }, { "epoch": 0.7222717913520933, "grad_norm": 0.7000003223749701, "learning_rate": 1.8905024617512846e-06, "loss": 0.2343, "step": 21047 }, { "epoch": 0.7223061084420042, "grad_norm": 0.6949673699418024, "learning_rate": 1.8900672833713313e-06, "loss": 0.2093, "step": 21048 }, { "epoch": 0.7223404255319149, "grad_norm": 0.8062415265840692, "learning_rate": 1.8896321434109672e-06, "loss": 0.2329, "step": 21049 }, { "epoch": 0.7223747426218257, "grad_norm": 0.8332036539568525, "learning_rate": 1.889197041875575e-06, "loss": 0.2953, "step": 21050 }, { "epoch": 0.7224090597117364, "grad_norm": 0.7506425363022267, "learning_rate": 1.8887619787705247e-06, "loss": 0.2626, "step": 21051 }, { "epoch": 0.7224433768016473, "grad_norm": 0.8443484376859166, "learning_rate": 1.8883269541011918e-06, "loss": 0.2787, "step": 21052 }, { "epoch": 0.722477693891558, "grad_norm": 0.7720634126097916, "learning_rate": 1.887891967872954e-06, "loss": 0.236, "step": 21053 }, { "epoch": 0.7225120109814688, "grad_norm": 0.7924083830438939, "learning_rate": 1.8874570200911813e-06, "loss": 0.2315, "step": 21054 }, { "epoch": 0.7225463280713795, "grad_norm": 0.7553699811171706, "learning_rate": 1.8870221107612485e-06, "loss": 0.2736, "step": 21055 }, { "epoch": 0.7225806451612903, "grad_norm": 0.7219024443651955, "learning_rate": 1.8865872398885277e-06, "loss": 0.2734, "step": 21056 }, { "epoch": 0.7226149622512011, "grad_norm": 0.7431420460203383, "learning_rate": 1.8861524074783922e-06, "loss": 0.2808, "step": 21057 }, { "epoch": 0.7226492793411119, "grad_norm": 0.7372516408170705, "learning_rate": 1.8857176135362126e-06, "loss": 0.2517, "step": 21058 }, { "epoch": 0.7226835964310226, "grad_norm": 0.7604204943919197, "learning_rate": 1.8852828580673616e-06, "loss": 0.2568, "step": 21059 }, { "epoch": 0.7227179135209334, "grad_norm": 0.7882475782443314, "learning_rate": 1.8848481410772113e-06, "loss": 0.2872, "step": 21060 }, { "epoch": 0.7227522306108441, "grad_norm": 0.7730285670111143, "learning_rate": 1.8844134625711264e-06, "loss": 0.2606, "step": 21061 }, { "epoch": 0.722786547700755, "grad_norm": 0.8367909486636742, "learning_rate": 1.8839788225544842e-06, "loss": 0.2654, "step": 21062 }, { "epoch": 0.7228208647906658, "grad_norm": 0.6821991137269969, "learning_rate": 1.8835442210326494e-06, "loss": 0.2403, "step": 21063 }, { "epoch": 0.7228551818805765, "grad_norm": 0.8176369625414058, "learning_rate": 1.883109658010992e-06, "loss": 0.2097, "step": 21064 }, { "epoch": 0.7228894989704873, "grad_norm": 0.8171919811599234, "learning_rate": 1.8826751334948806e-06, "loss": 0.2736, "step": 21065 }, { "epoch": 0.7229238160603981, "grad_norm": 0.9131086432732779, "learning_rate": 1.8822406474896836e-06, "loss": 0.2806, "step": 21066 }, { "epoch": 0.7229581331503089, "grad_norm": 0.7473350317916664, "learning_rate": 1.8818062000007686e-06, "loss": 0.2487, "step": 21067 }, { "epoch": 0.7229924502402196, "grad_norm": 0.8188181154520257, "learning_rate": 1.8813717910335029e-06, "loss": 0.2754, "step": 21068 }, { "epoch": 0.7230267673301304, "grad_norm": 0.7955331690158525, "learning_rate": 1.8809374205932524e-06, "loss": 0.3066, "step": 21069 }, { "epoch": 0.7230610844200411, "grad_norm": 0.8471200923883797, "learning_rate": 1.880503088685383e-06, "loss": 0.2804, "step": 21070 }, { "epoch": 0.723095401509952, "grad_norm": 0.9421009611918999, "learning_rate": 1.8800687953152635e-06, "loss": 0.2993, "step": 21071 }, { "epoch": 0.7231297185998627, "grad_norm": 0.7290166969632011, "learning_rate": 1.8796345404882532e-06, "loss": 0.2578, "step": 21072 }, { "epoch": 0.7231640356897735, "grad_norm": 0.8459295446136949, "learning_rate": 1.8792003242097212e-06, "loss": 0.2717, "step": 21073 }, { "epoch": 0.7231983527796843, "grad_norm": 2.5436743467878573, "learning_rate": 1.8787661464850332e-06, "loss": 0.3025, "step": 21074 }, { "epoch": 0.7232326698695951, "grad_norm": 0.8141355474642931, "learning_rate": 1.8783320073195466e-06, "loss": 0.2903, "step": 21075 }, { "epoch": 0.7232669869595059, "grad_norm": 0.8836796441921041, "learning_rate": 1.8778979067186315e-06, "loss": 0.2596, "step": 21076 }, { "epoch": 0.7233013040494166, "grad_norm": 0.8716639281963204, "learning_rate": 1.8774638446876465e-06, "loss": 0.2442, "step": 21077 }, { "epoch": 0.7233356211393274, "grad_norm": 0.8056131118519108, "learning_rate": 1.8770298212319554e-06, "loss": 0.3169, "step": 21078 }, { "epoch": 0.7233699382292381, "grad_norm": 0.8779933709346031, "learning_rate": 1.8765958363569192e-06, "loss": 0.242, "step": 21079 }, { "epoch": 0.723404255319149, "grad_norm": 0.7105353404727126, "learning_rate": 1.8761618900679007e-06, "loss": 0.2964, "step": 21080 }, { "epoch": 0.7234385724090597, "grad_norm": 0.7526664335805258, "learning_rate": 1.8757279823702594e-06, "loss": 0.2778, "step": 21081 }, { "epoch": 0.7234728894989705, "grad_norm": 0.8645507705228684, "learning_rate": 1.8752941132693565e-06, "loss": 0.3283, "step": 21082 }, { "epoch": 0.7235072065888812, "grad_norm": 0.9346270934655618, "learning_rate": 1.8748602827705537e-06, "loss": 0.2797, "step": 21083 }, { "epoch": 0.723541523678792, "grad_norm": 0.7812174228866537, "learning_rate": 1.874426490879205e-06, "loss": 0.269, "step": 21084 }, { "epoch": 0.7235758407687028, "grad_norm": 0.7553929725189763, "learning_rate": 1.8739927376006762e-06, "loss": 0.2639, "step": 21085 }, { "epoch": 0.7236101578586136, "grad_norm": 0.9277241615941844, "learning_rate": 1.873559022940321e-06, "loss": 0.2869, "step": 21086 }, { "epoch": 0.7236444749485244, "grad_norm": 0.8596362891054593, "learning_rate": 1.8731253469034989e-06, "loss": 0.2402, "step": 21087 }, { "epoch": 0.7236787920384351, "grad_norm": 0.8912243026360664, "learning_rate": 1.8726917094955676e-06, "loss": 0.2701, "step": 21088 }, { "epoch": 0.723713109128346, "grad_norm": 0.8168918708829076, "learning_rate": 1.872258110721884e-06, "loss": 0.2753, "step": 21089 }, { "epoch": 0.7237474262182567, "grad_norm": 0.8849295302814345, "learning_rate": 1.8718245505878052e-06, "loss": 0.2073, "step": 21090 }, { "epoch": 0.7237817433081675, "grad_norm": 0.7281214389467812, "learning_rate": 1.8713910290986865e-06, "loss": 0.2076, "step": 21091 }, { "epoch": 0.7238160603980782, "grad_norm": 0.8016214757780986, "learning_rate": 1.8709575462598861e-06, "loss": 0.2502, "step": 21092 }, { "epoch": 0.723850377487989, "grad_norm": 0.7650944459042188, "learning_rate": 1.8705241020767534e-06, "loss": 0.224, "step": 21093 }, { "epoch": 0.7238846945778998, "grad_norm": 0.7252514723923159, "learning_rate": 1.8700906965546505e-06, "loss": 0.2323, "step": 21094 }, { "epoch": 0.7239190116678106, "grad_norm": 0.8450591887460006, "learning_rate": 1.8696573296989246e-06, "loss": 0.2701, "step": 21095 }, { "epoch": 0.7239533287577213, "grad_norm": 0.8111238038761891, "learning_rate": 1.8692240015149344e-06, "loss": 0.2481, "step": 21096 }, { "epoch": 0.7239876458476321, "grad_norm": 0.9084181116532385, "learning_rate": 1.8687907120080328e-06, "loss": 0.3138, "step": 21097 }, { "epoch": 0.724021962937543, "grad_norm": 0.7346598494590184, "learning_rate": 1.8683574611835687e-06, "loss": 0.2369, "step": 21098 }, { "epoch": 0.7240562800274537, "grad_norm": 0.9061060181204245, "learning_rate": 1.8679242490468997e-06, "loss": 0.2355, "step": 21099 }, { "epoch": 0.7240905971173645, "grad_norm": 0.7841944649304103, "learning_rate": 1.8674910756033738e-06, "loss": 0.2572, "step": 21100 }, { "epoch": 0.7241249142072752, "grad_norm": 0.7264837687165037, "learning_rate": 1.8670579408583427e-06, "loss": 0.2451, "step": 21101 }, { "epoch": 0.724159231297186, "grad_norm": 0.8099542824648683, "learning_rate": 1.8666248448171591e-06, "loss": 0.2815, "step": 21102 }, { "epoch": 0.7241935483870968, "grad_norm": 0.8295262796865891, "learning_rate": 1.866191787485172e-06, "loss": 0.269, "step": 21103 }, { "epoch": 0.7242278654770076, "grad_norm": 0.848128273608263, "learning_rate": 1.8657587688677314e-06, "loss": 0.2632, "step": 21104 }, { "epoch": 0.7242621825669183, "grad_norm": 0.7367841930041155, "learning_rate": 1.865325788970187e-06, "loss": 0.2594, "step": 21105 }, { "epoch": 0.7242964996568291, "grad_norm": 0.8729443413497378, "learning_rate": 1.8648928477978901e-06, "loss": 0.2638, "step": 21106 }, { "epoch": 0.7243308167467398, "grad_norm": 0.7977497057484092, "learning_rate": 1.8644599453561828e-06, "loss": 0.2486, "step": 21107 }, { "epoch": 0.7243651338366507, "grad_norm": 0.7820699981488889, "learning_rate": 1.8640270816504213e-06, "loss": 0.281, "step": 21108 }, { "epoch": 0.7243994509265614, "grad_norm": 0.7205601958858854, "learning_rate": 1.8635942566859467e-06, "loss": 0.2491, "step": 21109 }, { "epoch": 0.7244337680164722, "grad_norm": 0.8767614278190449, "learning_rate": 1.863161470468109e-06, "loss": 0.2354, "step": 21110 }, { "epoch": 0.7244680851063829, "grad_norm": 0.8375244149667501, "learning_rate": 1.8627287230022534e-06, "loss": 0.2529, "step": 21111 }, { "epoch": 0.7245024021962938, "grad_norm": 0.9140066951182981, "learning_rate": 1.8622960142937268e-06, "loss": 0.2656, "step": 21112 }, { "epoch": 0.7245367192862046, "grad_norm": 0.7301701730799812, "learning_rate": 1.861863344347875e-06, "loss": 0.2786, "step": 21113 }, { "epoch": 0.7245710363761153, "grad_norm": 0.6941836548741674, "learning_rate": 1.8614307131700431e-06, "loss": 0.1996, "step": 21114 }, { "epoch": 0.7246053534660261, "grad_norm": 0.7661786842034859, "learning_rate": 1.860998120765577e-06, "loss": 0.2258, "step": 21115 }, { "epoch": 0.7246396705559368, "grad_norm": 0.90339701266339, "learning_rate": 1.8605655671398159e-06, "loss": 0.2539, "step": 21116 }, { "epoch": 0.7246739876458477, "grad_norm": 0.8077263096902144, "learning_rate": 1.860133052298111e-06, "loss": 0.2472, "step": 21117 }, { "epoch": 0.7247083047357584, "grad_norm": 0.8390706564828242, "learning_rate": 1.8597005762457987e-06, "loss": 0.3177, "step": 21118 }, { "epoch": 0.7247426218256692, "grad_norm": 0.7403272131878494, "learning_rate": 1.859268138988224e-06, "loss": 0.2344, "step": 21119 }, { "epoch": 0.7247769389155799, "grad_norm": 0.8153454451801108, "learning_rate": 1.8588357405307321e-06, "loss": 0.2577, "step": 21120 }, { "epoch": 0.7248112560054908, "grad_norm": 0.793738392100821, "learning_rate": 1.8584033808786612e-06, "loss": 0.2908, "step": 21121 }, { "epoch": 0.7248455730954015, "grad_norm": 0.7431225108849907, "learning_rate": 1.8579710600373534e-06, "loss": 0.2965, "step": 21122 }, { "epoch": 0.7248798901853123, "grad_norm": 0.7936900041703963, "learning_rate": 1.8575387780121508e-06, "loss": 0.292, "step": 21123 }, { "epoch": 0.724914207275223, "grad_norm": 0.8352393425164186, "learning_rate": 1.8571065348083921e-06, "loss": 0.2556, "step": 21124 }, { "epoch": 0.7249485243651338, "grad_norm": 0.7976514750272584, "learning_rate": 1.8566743304314178e-06, "loss": 0.2309, "step": 21125 }, { "epoch": 0.7249828414550447, "grad_norm": 0.7880531012965091, "learning_rate": 1.856242164886568e-06, "loss": 0.278, "step": 21126 }, { "epoch": 0.7250171585449554, "grad_norm": 0.8395975650476423, "learning_rate": 1.8558100381791806e-06, "loss": 0.2567, "step": 21127 }, { "epoch": 0.7250514756348662, "grad_norm": 0.950967320124558, "learning_rate": 1.855377950314594e-06, "loss": 0.2297, "step": 21128 }, { "epoch": 0.7250857927247769, "grad_norm": 0.7789863451860779, "learning_rate": 1.854945901298149e-06, "loss": 0.2667, "step": 21129 }, { "epoch": 0.7251201098146877, "grad_norm": 0.8470702714805901, "learning_rate": 1.8545138911351768e-06, "loss": 0.2664, "step": 21130 }, { "epoch": 0.7251544269045985, "grad_norm": 0.7647647140845749, "learning_rate": 1.8540819198310216e-06, "loss": 0.2449, "step": 21131 }, { "epoch": 0.7251887439945093, "grad_norm": 0.8113587468813399, "learning_rate": 1.8536499873910153e-06, "loss": 0.2992, "step": 21132 }, { "epoch": 0.72522306108442, "grad_norm": 0.7721048938150122, "learning_rate": 1.8532180938204948e-06, "loss": 0.238, "step": 21133 }, { "epoch": 0.7252573781743308, "grad_norm": 0.8030506963733737, "learning_rate": 1.8527862391247953e-06, "loss": 0.2645, "step": 21134 }, { "epoch": 0.7252916952642416, "grad_norm": 0.7195300212389649, "learning_rate": 1.8523544233092532e-06, "loss": 0.2819, "step": 21135 }, { "epoch": 0.7253260123541524, "grad_norm": 0.8012067188767552, "learning_rate": 1.8519226463792022e-06, "loss": 0.2685, "step": 21136 }, { "epoch": 0.7253603294440631, "grad_norm": 0.9265729253528286, "learning_rate": 1.8514909083399768e-06, "loss": 0.3052, "step": 21137 }, { "epoch": 0.7253946465339739, "grad_norm": 0.7287907879894289, "learning_rate": 1.851059209196912e-06, "loss": 0.2526, "step": 21138 }, { "epoch": 0.7254289636238846, "grad_norm": 0.7714108211971491, "learning_rate": 1.850627548955336e-06, "loss": 0.3013, "step": 21139 }, { "epoch": 0.7254632807137955, "grad_norm": 0.985624479198393, "learning_rate": 1.8501959276205866e-06, "loss": 0.2374, "step": 21140 }, { "epoch": 0.7254975978037063, "grad_norm": 0.9769206065897692, "learning_rate": 1.8497643451979956e-06, "loss": 0.2553, "step": 21141 }, { "epoch": 0.725531914893617, "grad_norm": 0.8414106484596048, "learning_rate": 1.8493328016928902e-06, "loss": 0.3058, "step": 21142 }, { "epoch": 0.7255662319835278, "grad_norm": 0.8220133471962472, "learning_rate": 1.848901297110608e-06, "loss": 0.2367, "step": 21143 }, { "epoch": 0.7256005490734386, "grad_norm": 0.821277379260031, "learning_rate": 1.8484698314564746e-06, "loss": 0.2837, "step": 21144 }, { "epoch": 0.7256348661633494, "grad_norm": 0.8050148083149558, "learning_rate": 1.8480384047358225e-06, "loss": 0.2996, "step": 21145 }, { "epoch": 0.7256691832532601, "grad_norm": 0.8469739002775551, "learning_rate": 1.8476070169539805e-06, "loss": 0.3178, "step": 21146 }, { "epoch": 0.7257035003431709, "grad_norm": 0.8564602963786684, "learning_rate": 1.8471756681162789e-06, "loss": 0.3326, "step": 21147 }, { "epoch": 0.7257378174330816, "grad_norm": 0.7820206376693014, "learning_rate": 1.8467443582280465e-06, "loss": 0.2726, "step": 21148 }, { "epoch": 0.7257721345229925, "grad_norm": 0.6972548405872497, "learning_rate": 1.8463130872946105e-06, "loss": 0.225, "step": 21149 }, { "epoch": 0.7258064516129032, "grad_norm": 0.7370828961895018, "learning_rate": 1.8458818553213015e-06, "loss": 0.2723, "step": 21150 }, { "epoch": 0.725840768702814, "grad_norm": 0.8318136706483539, "learning_rate": 1.845450662313441e-06, "loss": 0.2893, "step": 21151 }, { "epoch": 0.7258750857927248, "grad_norm": 0.7224326867570485, "learning_rate": 1.8450195082763633e-06, "loss": 0.222, "step": 21152 }, { "epoch": 0.7259094028826355, "grad_norm": 0.7702276395571683, "learning_rate": 1.8445883932153897e-06, "loss": 0.231, "step": 21153 }, { "epoch": 0.7259437199725464, "grad_norm": 0.7712652046720074, "learning_rate": 1.844157317135848e-06, "loss": 0.2894, "step": 21154 }, { "epoch": 0.7259780370624571, "grad_norm": 0.8392079004351187, "learning_rate": 1.8437262800430628e-06, "loss": 0.2399, "step": 21155 }, { "epoch": 0.7260123541523679, "grad_norm": 0.9364218013090843, "learning_rate": 1.8432952819423594e-06, "loss": 0.3228, "step": 21156 }, { "epoch": 0.7260466712422786, "grad_norm": 0.7908121148357261, "learning_rate": 1.8428643228390625e-06, "loss": 0.3101, "step": 21157 }, { "epoch": 0.7260809883321895, "grad_norm": 0.6867936033242054, "learning_rate": 1.8424334027384967e-06, "loss": 0.2678, "step": 21158 }, { "epoch": 0.7261153054221002, "grad_norm": 0.8097366508676874, "learning_rate": 1.8420025216459842e-06, "loss": 0.3033, "step": 21159 }, { "epoch": 0.726149622512011, "grad_norm": 0.810622231617828, "learning_rate": 1.8415716795668486e-06, "loss": 0.2814, "step": 21160 }, { "epoch": 0.7261839396019217, "grad_norm": 0.8484973851797868, "learning_rate": 1.8411408765064148e-06, "loss": 0.2465, "step": 21161 }, { "epoch": 0.7262182566918325, "grad_norm": 0.8355107331387486, "learning_rate": 1.8407101124699983e-06, "loss": 0.2688, "step": 21162 }, { "epoch": 0.7262525737817433, "grad_norm": 0.8190717378209649, "learning_rate": 1.8402793874629277e-06, "loss": 0.2696, "step": 21163 }, { "epoch": 0.7262868908716541, "grad_norm": 0.8039628402242475, "learning_rate": 1.8398487014905226e-06, "loss": 0.2984, "step": 21164 }, { "epoch": 0.7263212079615649, "grad_norm": 0.7908578703415713, "learning_rate": 1.8394180545580985e-06, "loss": 0.3328, "step": 21165 }, { "epoch": 0.7263555250514756, "grad_norm": 0.7981641426992516, "learning_rate": 1.838987446670984e-06, "loss": 0.2581, "step": 21166 }, { "epoch": 0.7263898421413865, "grad_norm": 0.6256990954883042, "learning_rate": 1.8385568778344914e-06, "loss": 0.2357, "step": 21167 }, { "epoch": 0.7264241592312972, "grad_norm": 0.7575158192919372, "learning_rate": 1.8381263480539436e-06, "loss": 0.2296, "step": 21168 }, { "epoch": 0.726458476321208, "grad_norm": 0.8196363370162838, "learning_rate": 1.8376958573346575e-06, "loss": 0.2805, "step": 21169 }, { "epoch": 0.7264927934111187, "grad_norm": 0.7275318430415223, "learning_rate": 1.8372654056819527e-06, "loss": 0.2284, "step": 21170 }, { "epoch": 0.7265271105010295, "grad_norm": 0.8665433103487252, "learning_rate": 1.836834993101147e-06, "loss": 0.2289, "step": 21171 }, { "epoch": 0.7265614275909403, "grad_norm": 0.9299649006022509, "learning_rate": 1.836404619597556e-06, "loss": 0.3115, "step": 21172 }, { "epoch": 0.7265957446808511, "grad_norm": 0.7316164659893054, "learning_rate": 1.8359742851765e-06, "loss": 0.2548, "step": 21173 }, { "epoch": 0.7266300617707618, "grad_norm": 0.6706695461680324, "learning_rate": 1.8355439898432891e-06, "loss": 0.2681, "step": 21174 }, { "epoch": 0.7266643788606726, "grad_norm": 0.7675654814409897, "learning_rate": 1.835113733603246e-06, "loss": 0.2273, "step": 21175 }, { "epoch": 0.7266986959505833, "grad_norm": 0.7062950152469093, "learning_rate": 1.8346835164616816e-06, "loss": 0.2764, "step": 21176 }, { "epoch": 0.7267330130404942, "grad_norm": 0.8300383169526256, "learning_rate": 1.8342533384239115e-06, "loss": 0.2309, "step": 21177 }, { "epoch": 0.726767330130405, "grad_norm": 0.746116647098102, "learning_rate": 1.8338231994952505e-06, "loss": 0.3068, "step": 21178 }, { "epoch": 0.7268016472203157, "grad_norm": 0.7754571024593864, "learning_rate": 1.833393099681013e-06, "loss": 0.2863, "step": 21179 }, { "epoch": 0.7268359643102265, "grad_norm": 0.8274376511123087, "learning_rate": 1.8329630389865116e-06, "loss": 0.2405, "step": 21180 }, { "epoch": 0.7268702814001373, "grad_norm": 0.8451391085447695, "learning_rate": 1.8325330174170592e-06, "loss": 0.2677, "step": 21181 }, { "epoch": 0.7269045984900481, "grad_norm": 0.8685023647185921, "learning_rate": 1.83210303497797e-06, "loss": 0.2521, "step": 21182 }, { "epoch": 0.7269389155799588, "grad_norm": 0.7223367150397472, "learning_rate": 1.8316730916745517e-06, "loss": 0.2213, "step": 21183 }, { "epoch": 0.7269732326698696, "grad_norm": 0.7710500612504811, "learning_rate": 1.8312431875121195e-06, "loss": 0.2209, "step": 21184 }, { "epoch": 0.7270075497597803, "grad_norm": 0.7051400044816918, "learning_rate": 1.8308133224959852e-06, "loss": 0.2629, "step": 21185 }, { "epoch": 0.7270418668496912, "grad_norm": 0.7422428272933761, "learning_rate": 1.8303834966314543e-06, "loss": 0.2798, "step": 21186 }, { "epoch": 0.7270761839396019, "grad_norm": 0.7922075508825267, "learning_rate": 1.829953709923843e-06, "loss": 0.2971, "step": 21187 }, { "epoch": 0.7271105010295127, "grad_norm": 0.856074385259175, "learning_rate": 1.8295239623784561e-06, "loss": 0.296, "step": 21188 }, { "epoch": 0.7271448181194234, "grad_norm": 0.8289602648439217, "learning_rate": 1.829094254000604e-06, "loss": 0.326, "step": 21189 }, { "epoch": 0.7271791352093342, "grad_norm": 0.9144287844841948, "learning_rate": 1.8286645847955958e-06, "loss": 0.27, "step": 21190 }, { "epoch": 0.7272134522992451, "grad_norm": 0.7382919551038737, "learning_rate": 1.8282349547687388e-06, "loss": 0.27, "step": 21191 }, { "epoch": 0.7272477693891558, "grad_norm": 0.8109261762337048, "learning_rate": 1.8278053639253413e-06, "loss": 0.2718, "step": 21192 }, { "epoch": 0.7272820864790666, "grad_norm": 0.7164618862936183, "learning_rate": 1.8273758122707102e-06, "loss": 0.2793, "step": 21193 }, { "epoch": 0.7273164035689773, "grad_norm": 0.8896211139064496, "learning_rate": 1.8269462998101516e-06, "loss": 0.2872, "step": 21194 }, { "epoch": 0.7273507206588882, "grad_norm": 0.7917928630511357, "learning_rate": 1.8265168265489725e-06, "loss": 0.2963, "step": 21195 }, { "epoch": 0.7273850377487989, "grad_norm": 0.7038107279839692, "learning_rate": 1.82608739249248e-06, "loss": 0.2498, "step": 21196 }, { "epoch": 0.7274193548387097, "grad_norm": 0.982710886787291, "learning_rate": 1.825657997645973e-06, "loss": 0.3241, "step": 21197 }, { "epoch": 0.7274536719286204, "grad_norm": 0.7909480288255704, "learning_rate": 1.8252286420147647e-06, "loss": 0.2797, "step": 21198 }, { "epoch": 0.7274879890185312, "grad_norm": 0.6957247492541768, "learning_rate": 1.8247993256041535e-06, "loss": 0.2608, "step": 21199 }, { "epoch": 0.727522306108442, "grad_norm": 0.790435931211314, "learning_rate": 1.8243700484194444e-06, "loss": 0.2643, "step": 21200 }, { "epoch": 0.7275566231983528, "grad_norm": 0.7675229282086561, "learning_rate": 1.8239408104659406e-06, "loss": 0.2914, "step": 21201 }, { "epoch": 0.7275909402882635, "grad_norm": 0.7833939103254678, "learning_rate": 1.8235116117489453e-06, "loss": 0.2911, "step": 21202 }, { "epoch": 0.7276252573781743, "grad_norm": 0.8399622207644867, "learning_rate": 1.823082452273761e-06, "loss": 0.3032, "step": 21203 }, { "epoch": 0.7276595744680852, "grad_norm": 0.8634383093019371, "learning_rate": 1.822653332045689e-06, "loss": 0.2635, "step": 21204 }, { "epoch": 0.7276938915579959, "grad_norm": 0.7519412788640724, "learning_rate": 1.8222242510700321e-06, "loss": 0.2369, "step": 21205 }, { "epoch": 0.7277282086479067, "grad_norm": 0.7587011450004617, "learning_rate": 1.8217952093520863e-06, "loss": 0.2597, "step": 21206 }, { "epoch": 0.7277625257378174, "grad_norm": 0.8437194611297105, "learning_rate": 1.8213662068971572e-06, "loss": 0.3145, "step": 21207 }, { "epoch": 0.7277968428277282, "grad_norm": 0.9127406260699422, "learning_rate": 1.8209372437105443e-06, "loss": 0.2649, "step": 21208 }, { "epoch": 0.727831159917639, "grad_norm": 0.7539519861908156, "learning_rate": 1.820508319797542e-06, "loss": 0.2877, "step": 21209 }, { "epoch": 0.7278654770075498, "grad_norm": 0.8221726844185232, "learning_rate": 1.8200794351634553e-06, "loss": 0.2877, "step": 21210 }, { "epoch": 0.7278997940974605, "grad_norm": 0.733143241316601, "learning_rate": 1.8196505898135785e-06, "loss": 0.2404, "step": 21211 }, { "epoch": 0.7279341111873713, "grad_norm": 0.8146683096584129, "learning_rate": 1.8192217837532111e-06, "loss": 0.2718, "step": 21212 }, { "epoch": 0.727968428277282, "grad_norm": 0.8149045310485202, "learning_rate": 1.8187930169876494e-06, "loss": 0.335, "step": 21213 }, { "epoch": 0.7280027453671929, "grad_norm": 0.872387264509535, "learning_rate": 1.8183642895221916e-06, "loss": 0.3226, "step": 21214 }, { "epoch": 0.7280370624571036, "grad_norm": 0.868864969103784, "learning_rate": 1.8179356013621335e-06, "loss": 0.3033, "step": 21215 }, { "epoch": 0.7280713795470144, "grad_norm": 1.0011636570734068, "learning_rate": 1.8175069525127709e-06, "loss": 0.2653, "step": 21216 }, { "epoch": 0.7281056966369251, "grad_norm": 0.9409537363360014, "learning_rate": 1.8170783429794015e-06, "loss": 0.2677, "step": 21217 }, { "epoch": 0.728140013726836, "grad_norm": 0.827542814387347, "learning_rate": 1.8166497727673143e-06, "loss": 0.3172, "step": 21218 }, { "epoch": 0.7281743308167468, "grad_norm": 0.7691960505309569, "learning_rate": 1.8162212418818114e-06, "loss": 0.2897, "step": 21219 }, { "epoch": 0.7282086479066575, "grad_norm": 0.898020982109736, "learning_rate": 1.8157927503281797e-06, "loss": 0.277, "step": 21220 }, { "epoch": 0.7282429649965683, "grad_norm": 0.7688882074922792, "learning_rate": 1.8153642981117197e-06, "loss": 0.2998, "step": 21221 }, { "epoch": 0.728277282086479, "grad_norm": 0.8188723565312392, "learning_rate": 1.8149358852377192e-06, "loss": 0.2805, "step": 21222 }, { "epoch": 0.7283115991763899, "grad_norm": 0.8349849583093971, "learning_rate": 1.8145075117114708e-06, "loss": 0.2759, "step": 21223 }, { "epoch": 0.7283459162663006, "grad_norm": 0.906557886162174, "learning_rate": 1.814079177538271e-06, "loss": 0.246, "step": 21224 }, { "epoch": 0.7283802333562114, "grad_norm": 0.7586406026172996, "learning_rate": 1.8136508827234072e-06, "loss": 0.2927, "step": 21225 }, { "epoch": 0.7284145504461221, "grad_norm": 0.7332032312385133, "learning_rate": 1.813222627272172e-06, "loss": 0.2774, "step": 21226 }, { "epoch": 0.728448867536033, "grad_norm": 0.7287349445807655, "learning_rate": 1.812794411189856e-06, "loss": 0.2444, "step": 21227 }, { "epoch": 0.7284831846259437, "grad_norm": 0.7455680976680588, "learning_rate": 1.8123662344817505e-06, "loss": 0.2072, "step": 21228 }, { "epoch": 0.7285175017158545, "grad_norm": 0.8250653786258265, "learning_rate": 1.8119380971531404e-06, "loss": 0.2856, "step": 21229 }, { "epoch": 0.7285518188057652, "grad_norm": 0.6749786352550776, "learning_rate": 1.8115099992093193e-06, "loss": 0.2484, "step": 21230 }, { "epoch": 0.728586135895676, "grad_norm": 0.7057098091173012, "learning_rate": 1.8110819406555775e-06, "loss": 0.2778, "step": 21231 }, { "epoch": 0.7286204529855869, "grad_norm": 0.6703159196528851, "learning_rate": 1.8106539214971964e-06, "loss": 0.2459, "step": 21232 }, { "epoch": 0.7286547700754976, "grad_norm": 0.8144549155220057, "learning_rate": 1.8102259417394708e-06, "loss": 0.263, "step": 21233 }, { "epoch": 0.7286890871654084, "grad_norm": 0.7542351689717155, "learning_rate": 1.8097980013876837e-06, "loss": 0.274, "step": 21234 }, { "epoch": 0.7287234042553191, "grad_norm": 0.8732602465806288, "learning_rate": 1.8093701004471226e-06, "loss": 0.3285, "step": 21235 }, { "epoch": 0.7287577213452299, "grad_norm": 0.8321712839819025, "learning_rate": 1.8089422389230737e-06, "loss": 0.272, "step": 21236 }, { "epoch": 0.7287920384351407, "grad_norm": 0.8033520775820993, "learning_rate": 1.8085144168208235e-06, "loss": 0.2739, "step": 21237 }, { "epoch": 0.7288263555250515, "grad_norm": 0.723219133497856, "learning_rate": 1.8080866341456565e-06, "loss": 0.1944, "step": 21238 }, { "epoch": 0.7288606726149622, "grad_norm": 0.7731874850565167, "learning_rate": 1.8076588909028582e-06, "loss": 0.3042, "step": 21239 }, { "epoch": 0.728894989704873, "grad_norm": 0.8765059579594883, "learning_rate": 1.8072311870977133e-06, "loss": 0.2957, "step": 21240 }, { "epoch": 0.7289293067947838, "grad_norm": 0.7750003580102585, "learning_rate": 1.8068035227355019e-06, "loss": 0.2703, "step": 21241 }, { "epoch": 0.7289636238846946, "grad_norm": 0.6798098906991795, "learning_rate": 1.8063758978215134e-06, "loss": 0.2391, "step": 21242 }, { "epoch": 0.7289979409746054, "grad_norm": 1.6608134227337212, "learning_rate": 1.8059483123610256e-06, "loss": 0.2475, "step": 21243 }, { "epoch": 0.7290322580645161, "grad_norm": 0.8247625350039732, "learning_rate": 1.8055207663593222e-06, "loss": 0.2718, "step": 21244 }, { "epoch": 0.7290665751544269, "grad_norm": 0.799455407544775, "learning_rate": 1.8050932598216852e-06, "loss": 0.2987, "step": 21245 }, { "epoch": 0.7291008922443377, "grad_norm": 0.7652225907631653, "learning_rate": 1.8046657927533967e-06, "loss": 0.282, "step": 21246 }, { "epoch": 0.7291352093342485, "grad_norm": 0.8360451647726321, "learning_rate": 1.8042383651597362e-06, "loss": 0.25, "step": 21247 }, { "epoch": 0.7291695264241592, "grad_norm": 0.798616018719763, "learning_rate": 1.8038109770459855e-06, "loss": 0.2627, "step": 21248 }, { "epoch": 0.72920384351407, "grad_norm": 0.7500608329441254, "learning_rate": 1.803383628417425e-06, "loss": 0.252, "step": 21249 }, { "epoch": 0.7292381606039808, "grad_norm": 0.6771650681897923, "learning_rate": 1.8029563192793298e-06, "loss": 0.2337, "step": 21250 }, { "epoch": 0.7292724776938916, "grad_norm": 0.7966223961219618, "learning_rate": 1.8025290496369835e-06, "loss": 0.2855, "step": 21251 }, { "epoch": 0.7293067947838023, "grad_norm": 0.6986950509848793, "learning_rate": 1.8021018194956625e-06, "loss": 0.2265, "step": 21252 }, { "epoch": 0.7293411118737131, "grad_norm": 0.8641857326124762, "learning_rate": 1.8016746288606457e-06, "loss": 0.2926, "step": 21253 }, { "epoch": 0.7293754289636238, "grad_norm": 0.8347019560445892, "learning_rate": 1.8012474777372113e-06, "loss": 0.2509, "step": 21254 }, { "epoch": 0.7294097460535347, "grad_norm": 0.6448521023429998, "learning_rate": 1.8008203661306317e-06, "loss": 0.236, "step": 21255 }, { "epoch": 0.7294440631434455, "grad_norm": 0.7289742363977662, "learning_rate": 1.80039329404619e-06, "loss": 0.2622, "step": 21256 }, { "epoch": 0.7294783802333562, "grad_norm": 0.814772746438488, "learning_rate": 1.7999662614891571e-06, "loss": 0.2874, "step": 21257 }, { "epoch": 0.729512697323267, "grad_norm": 0.7060759703592051, "learning_rate": 1.79953926846481e-06, "loss": 0.2272, "step": 21258 }, { "epoch": 0.7295470144131777, "grad_norm": 0.7938489455378381, "learning_rate": 1.799112314978424e-06, "loss": 0.2896, "step": 21259 }, { "epoch": 0.7295813315030886, "grad_norm": 0.846927222009018, "learning_rate": 1.7986854010352733e-06, "loss": 0.3412, "step": 21260 }, { "epoch": 0.7296156485929993, "grad_norm": 0.8814380815073317, "learning_rate": 1.798258526640632e-06, "loss": 0.2849, "step": 21261 }, { "epoch": 0.7296499656829101, "grad_norm": 0.7914800320272694, "learning_rate": 1.797831691799774e-06, "loss": 0.2356, "step": 21262 }, { "epoch": 0.7296842827728208, "grad_norm": 0.7503362142032634, "learning_rate": 1.7974048965179741e-06, "loss": 0.2661, "step": 21263 }, { "epoch": 0.7297185998627317, "grad_norm": 0.8057044259948141, "learning_rate": 1.7969781408004989e-06, "loss": 0.3046, "step": 21264 }, { "epoch": 0.7297529169526424, "grad_norm": 0.7892216953003808, "learning_rate": 1.7965514246526284e-06, "loss": 0.2776, "step": 21265 }, { "epoch": 0.7297872340425532, "grad_norm": 0.7373709960181805, "learning_rate": 1.796124748079628e-06, "loss": 0.222, "step": 21266 }, { "epoch": 0.7298215511324639, "grad_norm": 0.7453012229567482, "learning_rate": 1.795698111086771e-06, "loss": 0.3255, "step": 21267 }, { "epoch": 0.7298558682223747, "grad_norm": 0.8305081573583859, "learning_rate": 1.7952715136793281e-06, "loss": 0.3062, "step": 21268 }, { "epoch": 0.7298901853122856, "grad_norm": 0.8446231537565353, "learning_rate": 1.794844955862569e-06, "loss": 0.2598, "step": 21269 }, { "epoch": 0.7299245024021963, "grad_norm": 0.7691075225556702, "learning_rate": 1.794418437641764e-06, "loss": 0.3111, "step": 21270 }, { "epoch": 0.7299588194921071, "grad_norm": 0.7639915874627518, "learning_rate": 1.7939919590221815e-06, "loss": 0.2609, "step": 21271 }, { "epoch": 0.7299931365820178, "grad_norm": 0.7407928730771538, "learning_rate": 1.7935655200090928e-06, "loss": 0.2857, "step": 21272 }, { "epoch": 0.7300274536719287, "grad_norm": 0.7479806232916977, "learning_rate": 1.7931391206077598e-06, "loss": 0.2117, "step": 21273 }, { "epoch": 0.7300617707618394, "grad_norm": 0.7633587674349257, "learning_rate": 1.7927127608234563e-06, "loss": 0.2693, "step": 21274 }, { "epoch": 0.7300960878517502, "grad_norm": 0.8312431244038327, "learning_rate": 1.7922864406614488e-06, "loss": 0.234, "step": 21275 }, { "epoch": 0.7301304049416609, "grad_norm": 0.7362118029781037, "learning_rate": 1.7918601601269987e-06, "loss": 0.2619, "step": 21276 }, { "epoch": 0.7301647220315717, "grad_norm": 0.8431032418962517, "learning_rate": 1.7914339192253798e-06, "loss": 0.282, "step": 21277 }, { "epoch": 0.7301990391214825, "grad_norm": 0.8196533497076423, "learning_rate": 1.7910077179618523e-06, "loss": 0.2562, "step": 21278 }, { "epoch": 0.7302333562113933, "grad_norm": 0.6906763650473687, "learning_rate": 1.7905815563416828e-06, "loss": 0.2714, "step": 21279 }, { "epoch": 0.730267673301304, "grad_norm": 0.7639333830302103, "learning_rate": 1.7901554343701366e-06, "loss": 0.2567, "step": 21280 }, { "epoch": 0.7303019903912148, "grad_norm": 0.7894127332501638, "learning_rate": 1.7897293520524779e-06, "loss": 0.3066, "step": 21281 }, { "epoch": 0.7303363074811255, "grad_norm": 0.8754667588904261, "learning_rate": 1.7893033093939698e-06, "loss": 0.2864, "step": 21282 }, { "epoch": 0.7303706245710364, "grad_norm": 0.8303679457791792, "learning_rate": 1.7888773063998766e-06, "loss": 0.2915, "step": 21283 }, { "epoch": 0.7304049416609472, "grad_norm": 0.8799242284391469, "learning_rate": 1.7884513430754597e-06, "loss": 0.2873, "step": 21284 }, { "epoch": 0.7304392587508579, "grad_norm": 0.8053484335692694, "learning_rate": 1.7880254194259827e-06, "loss": 0.2571, "step": 21285 }, { "epoch": 0.7304735758407687, "grad_norm": 0.8422803542786915, "learning_rate": 1.7875995354567083e-06, "loss": 0.2651, "step": 21286 }, { "epoch": 0.7305078929306795, "grad_norm": 0.8079694551821881, "learning_rate": 1.7871736911728932e-06, "loss": 0.2693, "step": 21287 }, { "epoch": 0.7305422100205903, "grad_norm": 0.7252276662902621, "learning_rate": 1.786747886579805e-06, "loss": 0.2614, "step": 21288 }, { "epoch": 0.730576527110501, "grad_norm": 0.8024022225875773, "learning_rate": 1.7863221216826982e-06, "loss": 0.2997, "step": 21289 }, { "epoch": 0.7306108442004118, "grad_norm": 0.8725181957637355, "learning_rate": 1.785896396486833e-06, "loss": 0.2745, "step": 21290 }, { "epoch": 0.7306451612903225, "grad_norm": 0.7549355932157761, "learning_rate": 1.785470710997474e-06, "loss": 0.2505, "step": 21291 }, { "epoch": 0.7306794783802334, "grad_norm": 0.7584861073557352, "learning_rate": 1.7850450652198747e-06, "loss": 0.2276, "step": 21292 }, { "epoch": 0.7307137954701441, "grad_norm": 0.712600232807678, "learning_rate": 1.7846194591592958e-06, "loss": 0.2641, "step": 21293 }, { "epoch": 0.7307481125600549, "grad_norm": 0.7472215050701436, "learning_rate": 1.7841938928209946e-06, "loss": 0.3104, "step": 21294 }, { "epoch": 0.7307824296499656, "grad_norm": 0.7990802036356073, "learning_rate": 1.783768366210229e-06, "loss": 0.2655, "step": 21295 }, { "epoch": 0.7308167467398765, "grad_norm": 0.7946343993144238, "learning_rate": 1.783342879332255e-06, "loss": 0.2491, "step": 21296 }, { "epoch": 0.7308510638297873, "grad_norm": 0.794988900346543, "learning_rate": 1.7829174321923292e-06, "loss": 0.2815, "step": 21297 }, { "epoch": 0.730885380919698, "grad_norm": 0.7911315346142018, "learning_rate": 1.7824920247957101e-06, "loss": 0.2457, "step": 21298 }, { "epoch": 0.7309196980096088, "grad_norm": 0.7611333482065474, "learning_rate": 1.7820666571476469e-06, "loss": 0.2357, "step": 21299 }, { "epoch": 0.7309540150995195, "grad_norm": 0.8434741323295295, "learning_rate": 1.7816413292534019e-06, "loss": 0.2646, "step": 21300 }, { "epoch": 0.7309883321894304, "grad_norm": 0.7953925166150319, "learning_rate": 1.7812160411182245e-06, "loss": 0.2508, "step": 21301 }, { "epoch": 0.7310226492793411, "grad_norm": 0.7005344060660511, "learning_rate": 1.78079079274737e-06, "loss": 0.2845, "step": 21302 }, { "epoch": 0.7310569663692519, "grad_norm": 0.8171683924263553, "learning_rate": 1.780365584146092e-06, "loss": 0.2629, "step": 21303 }, { "epoch": 0.7310912834591626, "grad_norm": 0.7640144472891054, "learning_rate": 1.7799404153196436e-06, "loss": 0.234, "step": 21304 }, { "epoch": 0.7311256005490734, "grad_norm": 0.8281216401879821, "learning_rate": 1.7795152862732768e-06, "loss": 0.2391, "step": 21305 }, { "epoch": 0.7311599176389842, "grad_norm": 0.8067394317900515, "learning_rate": 1.7790901970122442e-06, "loss": 0.2518, "step": 21306 }, { "epoch": 0.731194234728895, "grad_norm": 0.7593266090886663, "learning_rate": 1.7786651475417988e-06, "loss": 0.2098, "step": 21307 }, { "epoch": 0.7312285518188057, "grad_norm": 0.7851430888278786, "learning_rate": 1.7782401378671866e-06, "loss": 0.251, "step": 21308 }, { "epoch": 0.7312628689087165, "grad_norm": 0.8070413038527274, "learning_rate": 1.7778151679936645e-06, "loss": 0.2833, "step": 21309 }, { "epoch": 0.7312971859986274, "grad_norm": 0.7437791949011073, "learning_rate": 1.7773902379264774e-06, "loss": 0.2511, "step": 21310 }, { "epoch": 0.7313315030885381, "grad_norm": 0.7334599928762346, "learning_rate": 1.7769653476708764e-06, "loss": 0.2851, "step": 21311 }, { "epoch": 0.7313658201784489, "grad_norm": 0.7810979223744505, "learning_rate": 1.7765404972321115e-06, "loss": 0.274, "step": 21312 }, { "epoch": 0.7314001372683596, "grad_norm": 0.8067610210463965, "learning_rate": 1.7761156866154306e-06, "loss": 0.2213, "step": 21313 }, { "epoch": 0.7314344543582704, "grad_norm": 0.8085303445676364, "learning_rate": 1.775690915826081e-06, "loss": 0.2734, "step": 21314 }, { "epoch": 0.7314687714481812, "grad_norm": 0.705514653140077, "learning_rate": 1.7752661848693115e-06, "loss": 0.2389, "step": 21315 }, { "epoch": 0.731503088538092, "grad_norm": 0.8541338781020652, "learning_rate": 1.7748414937503683e-06, "loss": 0.3356, "step": 21316 }, { "epoch": 0.7315374056280027, "grad_norm": 0.8401113264764307, "learning_rate": 1.7744168424744983e-06, "loss": 0.2571, "step": 21317 }, { "epoch": 0.7315717227179135, "grad_norm": 0.7318825681748673, "learning_rate": 1.7739922310469482e-06, "loss": 0.2669, "step": 21318 }, { "epoch": 0.7316060398078243, "grad_norm": 0.8370195076221548, "learning_rate": 1.7735676594729622e-06, "loss": 0.2689, "step": 21319 }, { "epoch": 0.7316403568977351, "grad_norm": 0.7476899537386291, "learning_rate": 1.7731431277577865e-06, "loss": 0.2715, "step": 21320 }, { "epoch": 0.7316746739876459, "grad_norm": 0.7530292253292581, "learning_rate": 1.7727186359066667e-06, "loss": 0.2428, "step": 21321 }, { "epoch": 0.7317089910775566, "grad_norm": 0.7387661279963179, "learning_rate": 1.7722941839248426e-06, "loss": 0.2124, "step": 21322 }, { "epoch": 0.7317433081674674, "grad_norm": 0.8685786755020247, "learning_rate": 1.7718697718175637e-06, "loss": 0.297, "step": 21323 }, { "epoch": 0.7317776252573782, "grad_norm": 0.7764841061351908, "learning_rate": 1.7714453995900683e-06, "loss": 0.259, "step": 21324 }, { "epoch": 0.731811942347289, "grad_norm": 0.783187683160606, "learning_rate": 1.7710210672476014e-06, "loss": 0.2176, "step": 21325 }, { "epoch": 0.7318462594371997, "grad_norm": 0.7612909896337312, "learning_rate": 1.770596774795404e-06, "loss": 0.2713, "step": 21326 }, { "epoch": 0.7318805765271105, "grad_norm": 0.7814266174009349, "learning_rate": 1.7701725222387183e-06, "loss": 0.2359, "step": 21327 }, { "epoch": 0.7319148936170212, "grad_norm": 0.7268222025398834, "learning_rate": 1.7697483095827862e-06, "loss": 0.2951, "step": 21328 }, { "epoch": 0.7319492107069321, "grad_norm": 0.8067203853390945, "learning_rate": 1.7693241368328473e-06, "loss": 0.2707, "step": 21329 }, { "epoch": 0.7319835277968428, "grad_norm": 0.7799921015127379, "learning_rate": 1.7689000039941433e-06, "loss": 0.2835, "step": 21330 }, { "epoch": 0.7320178448867536, "grad_norm": 0.657006827080245, "learning_rate": 1.7684759110719096e-06, "loss": 0.2401, "step": 21331 }, { "epoch": 0.7320521619766643, "grad_norm": 0.842504365173669, "learning_rate": 1.7680518580713919e-06, "loss": 0.26, "step": 21332 }, { "epoch": 0.7320864790665752, "grad_norm": 0.7127337901980433, "learning_rate": 1.767627844997824e-06, "loss": 0.2932, "step": 21333 }, { "epoch": 0.732120796156486, "grad_norm": 0.7914725698315095, "learning_rate": 1.7672038718564432e-06, "loss": 0.2223, "step": 21334 }, { "epoch": 0.7321551132463967, "grad_norm": 0.7804986643162626, "learning_rate": 1.766779938652493e-06, "loss": 0.2489, "step": 21335 }, { "epoch": 0.7321894303363075, "grad_norm": 0.6949686444438627, "learning_rate": 1.766356045391206e-06, "loss": 0.3422, "step": 21336 }, { "epoch": 0.7322237474262182, "grad_norm": 0.9178695413291184, "learning_rate": 1.7659321920778195e-06, "loss": 0.2724, "step": 21337 }, { "epoch": 0.7322580645161291, "grad_norm": 0.84275854173241, "learning_rate": 1.7655083787175703e-06, "loss": 0.3073, "step": 21338 }, { "epoch": 0.7322923816060398, "grad_norm": 0.7032710594059584, "learning_rate": 1.7650846053156955e-06, "loss": 0.2783, "step": 21339 }, { "epoch": 0.7323266986959506, "grad_norm": 0.778283264098568, "learning_rate": 1.7646608718774256e-06, "loss": 0.2838, "step": 21340 }, { "epoch": 0.7323610157858613, "grad_norm": 1.1785186815734807, "learning_rate": 1.7642371784080003e-06, "loss": 0.2215, "step": 21341 }, { "epoch": 0.7323953328757722, "grad_norm": 0.788178151177185, "learning_rate": 1.7638135249126537e-06, "loss": 0.2634, "step": 21342 }, { "epoch": 0.7324296499656829, "grad_norm": 0.764468929113431, "learning_rate": 1.7633899113966146e-06, "loss": 0.2949, "step": 21343 }, { "epoch": 0.7324639670555937, "grad_norm": 0.7405525364648974, "learning_rate": 1.7629663378651235e-06, "loss": 0.2555, "step": 21344 }, { "epoch": 0.7324982841455044, "grad_norm": 0.7476119419344852, "learning_rate": 1.7625428043234073e-06, "loss": 0.2562, "step": 21345 }, { "epoch": 0.7325326012354152, "grad_norm": 0.7267143003792869, "learning_rate": 1.7621193107766998e-06, "loss": 0.2717, "step": 21346 }, { "epoch": 0.732566918325326, "grad_norm": 0.8664035817762612, "learning_rate": 1.761695857230234e-06, "loss": 0.3316, "step": 21347 }, { "epoch": 0.7326012354152368, "grad_norm": 1.0107363911962293, "learning_rate": 1.7612724436892397e-06, "loss": 0.32, "step": 21348 }, { "epoch": 0.7326355525051476, "grad_norm": 0.7949743077550524, "learning_rate": 1.7608490701589488e-06, "loss": 0.3265, "step": 21349 }, { "epoch": 0.7326698695950583, "grad_norm": 0.7367781343224246, "learning_rate": 1.7604257366445909e-06, "loss": 0.2489, "step": 21350 }, { "epoch": 0.7327041866849691, "grad_norm": 0.8343653190111955, "learning_rate": 1.7600024431513963e-06, "loss": 0.2176, "step": 21351 }, { "epoch": 0.7327385037748799, "grad_norm": 0.7190168595504974, "learning_rate": 1.7595791896845937e-06, "loss": 0.2403, "step": 21352 }, { "epoch": 0.7327728208647907, "grad_norm": 0.7688426476745407, "learning_rate": 1.7591559762494144e-06, "loss": 0.2557, "step": 21353 }, { "epoch": 0.7328071379547014, "grad_norm": 0.7983577207089797, "learning_rate": 1.7587328028510809e-06, "loss": 0.2771, "step": 21354 }, { "epoch": 0.7328414550446122, "grad_norm": 0.8039725425608107, "learning_rate": 1.7583096694948275e-06, "loss": 0.2383, "step": 21355 }, { "epoch": 0.732875772134523, "grad_norm": 0.8901360885967396, "learning_rate": 1.7578865761858777e-06, "loss": 0.2418, "step": 21356 }, { "epoch": 0.7329100892244338, "grad_norm": 0.6942677770909613, "learning_rate": 1.7574635229294572e-06, "loss": 0.2725, "step": 21357 }, { "epoch": 0.7329444063143445, "grad_norm": 0.7609851652722102, "learning_rate": 1.7570405097307986e-06, "loss": 0.2124, "step": 21358 }, { "epoch": 0.7329787234042553, "grad_norm": 0.9055908495793042, "learning_rate": 1.756617536595121e-06, "loss": 0.3062, "step": 21359 }, { "epoch": 0.733013040494166, "grad_norm": 0.8096514967670181, "learning_rate": 1.7561946035276528e-06, "loss": 0.2292, "step": 21360 }, { "epoch": 0.7330473575840769, "grad_norm": 0.8325396338694638, "learning_rate": 1.7557717105336185e-06, "loss": 0.2361, "step": 21361 }, { "epoch": 0.7330816746739877, "grad_norm": 0.6707246802308927, "learning_rate": 1.755348857618242e-06, "loss": 0.2383, "step": 21362 }, { "epoch": 0.7331159917638984, "grad_norm": 0.8373856227063514, "learning_rate": 1.7549260447867479e-06, "loss": 0.3248, "step": 21363 }, { "epoch": 0.7331503088538092, "grad_norm": 0.8374550813912712, "learning_rate": 1.7545032720443584e-06, "loss": 0.2343, "step": 21364 }, { "epoch": 0.73318462594372, "grad_norm": 0.7933528705648109, "learning_rate": 1.7540805393962989e-06, "loss": 0.3018, "step": 21365 }, { "epoch": 0.7332189430336308, "grad_norm": 0.7379592253452587, "learning_rate": 1.7536578468477872e-06, "loss": 0.2702, "step": 21366 }, { "epoch": 0.7332532601235415, "grad_norm": 0.9283176484343388, "learning_rate": 1.753235194404051e-06, "loss": 0.277, "step": 21367 }, { "epoch": 0.7332875772134523, "grad_norm": 0.7665704722695306, "learning_rate": 1.7528125820703068e-06, "loss": 0.225, "step": 21368 }, { "epoch": 0.733321894303363, "grad_norm": 0.7659141316077372, "learning_rate": 1.7523900098517772e-06, "loss": 0.2125, "step": 21369 }, { "epoch": 0.7333562113932739, "grad_norm": 0.7320035290562121, "learning_rate": 1.7519674777536827e-06, "loss": 0.2871, "step": 21370 }, { "epoch": 0.7333905284831846, "grad_norm": 0.7417581781034032, "learning_rate": 1.7515449857812434e-06, "loss": 0.2318, "step": 21371 }, { "epoch": 0.7334248455730954, "grad_norm": 0.8091361403660919, "learning_rate": 1.7511225339396786e-06, "loss": 0.2969, "step": 21372 }, { "epoch": 0.7334591626630061, "grad_norm": 0.7491249427366807, "learning_rate": 1.7507001222342068e-06, "loss": 0.2235, "step": 21373 }, { "epoch": 0.7334934797529169, "grad_norm": 0.7798250219087188, "learning_rate": 1.7502777506700486e-06, "loss": 0.2727, "step": 21374 }, { "epoch": 0.7335277968428278, "grad_norm": 0.9919362858679766, "learning_rate": 1.7498554192524164e-06, "loss": 0.2999, "step": 21375 }, { "epoch": 0.7335621139327385, "grad_norm": 0.8808585390022106, "learning_rate": 1.7494331279865345e-06, "loss": 0.2686, "step": 21376 }, { "epoch": 0.7335964310226493, "grad_norm": 0.8103149368139546, "learning_rate": 1.7490108768776131e-06, "loss": 0.2791, "step": 21377 }, { "epoch": 0.73363074811256, "grad_norm": 0.7090824904657874, "learning_rate": 1.7485886659308754e-06, "loss": 0.2706, "step": 21378 }, { "epoch": 0.7336650652024709, "grad_norm": 0.8225863842517703, "learning_rate": 1.7481664951515327e-06, "loss": 0.2241, "step": 21379 }, { "epoch": 0.7336993822923816, "grad_norm": 0.6852088380902799, "learning_rate": 1.7477443645447995e-06, "loss": 0.2584, "step": 21380 }, { "epoch": 0.7337336993822924, "grad_norm": 0.9190258313287932, "learning_rate": 1.7473222741158969e-06, "loss": 0.2903, "step": 21381 }, { "epoch": 0.7337680164722031, "grad_norm": 0.6865232516889929, "learning_rate": 1.7469002238700329e-06, "loss": 0.2649, "step": 21382 }, { "epoch": 0.7338023335621139, "grad_norm": 0.7807773690640306, "learning_rate": 1.7464782138124242e-06, "loss": 0.3015, "step": 21383 }, { "epoch": 0.7338366506520247, "grad_norm": 0.7161259590783727, "learning_rate": 1.7460562439482842e-06, "loss": 0.2405, "step": 21384 }, { "epoch": 0.7338709677419355, "grad_norm": 0.6604329548424114, "learning_rate": 1.7456343142828252e-06, "loss": 0.2406, "step": 21385 }, { "epoch": 0.7339052848318462, "grad_norm": 0.7708309219866403, "learning_rate": 1.7452124248212605e-06, "loss": 0.247, "step": 21386 }, { "epoch": 0.733939601921757, "grad_norm": 0.925006939028782, "learning_rate": 1.744790575568801e-06, "loss": 0.3115, "step": 21387 }, { "epoch": 0.7339739190116679, "grad_norm": 0.8919715381553448, "learning_rate": 1.7443687665306608e-06, "loss": 0.3026, "step": 21388 }, { "epoch": 0.7340082361015786, "grad_norm": 0.8552057946737472, "learning_rate": 1.743946997712045e-06, "loss": 0.2837, "step": 21389 }, { "epoch": 0.7340425531914894, "grad_norm": 0.791413043263855, "learning_rate": 1.7435252691181715e-06, "loss": 0.3582, "step": 21390 }, { "epoch": 0.7340768702814001, "grad_norm": 0.7597426113504642, "learning_rate": 1.743103580754245e-06, "loss": 0.2256, "step": 21391 }, { "epoch": 0.7341111873713109, "grad_norm": 0.7658985982326352, "learning_rate": 1.7426819326254768e-06, "loss": 0.2628, "step": 21392 }, { "epoch": 0.7341455044612217, "grad_norm": 0.8775003028376567, "learning_rate": 1.742260324737075e-06, "loss": 0.2229, "step": 21393 }, { "epoch": 0.7341798215511325, "grad_norm": 0.7371757579006046, "learning_rate": 1.741838757094249e-06, "loss": 0.254, "step": 21394 }, { "epoch": 0.7342141386410432, "grad_norm": 0.8470514720986994, "learning_rate": 1.741417229702207e-06, "loss": 0.3138, "step": 21395 }, { "epoch": 0.734248455730954, "grad_norm": 0.7864730663486683, "learning_rate": 1.7409957425661555e-06, "loss": 0.274, "step": 21396 }, { "epoch": 0.7342827728208647, "grad_norm": 0.7302496174049119, "learning_rate": 1.7405742956913035e-06, "loss": 0.3045, "step": 21397 }, { "epoch": 0.7343170899107756, "grad_norm": 0.9222890097248901, "learning_rate": 1.7401528890828533e-06, "loss": 0.3101, "step": 21398 }, { "epoch": 0.7343514070006864, "grad_norm": 0.7283664710932378, "learning_rate": 1.7397315227460165e-06, "loss": 0.2605, "step": 21399 }, { "epoch": 0.7343857240905971, "grad_norm": 0.7584756612735275, "learning_rate": 1.7393101966859937e-06, "loss": 0.2697, "step": 21400 }, { "epoch": 0.7344200411805079, "grad_norm": 0.9892166410151266, "learning_rate": 1.7388889109079903e-06, "loss": 0.2235, "step": 21401 }, { "epoch": 0.7344543582704187, "grad_norm": 0.793286194671625, "learning_rate": 1.7384676654172161e-06, "loss": 0.2929, "step": 21402 }, { "epoch": 0.7344886753603295, "grad_norm": 0.9199521698700545, "learning_rate": 1.738046460218869e-06, "loss": 0.3273, "step": 21403 }, { "epoch": 0.7345229924502402, "grad_norm": 0.8026452746103803, "learning_rate": 1.737625295318155e-06, "loss": 0.2659, "step": 21404 }, { "epoch": 0.734557309540151, "grad_norm": 1.2052297832476437, "learning_rate": 1.737204170720277e-06, "loss": 0.2445, "step": 21405 }, { "epoch": 0.7345916266300617, "grad_norm": 0.7915108472842671, "learning_rate": 1.7367830864304375e-06, "loss": 0.2839, "step": 21406 }, { "epoch": 0.7346259437199726, "grad_norm": 0.7534097626086553, "learning_rate": 1.7363620424538385e-06, "loss": 0.2224, "step": 21407 }, { "epoch": 0.7346602608098833, "grad_norm": 0.8223491932421061, "learning_rate": 1.735941038795681e-06, "loss": 0.248, "step": 21408 }, { "epoch": 0.7346945778997941, "grad_norm": 0.824939664505644, "learning_rate": 1.7355200754611667e-06, "loss": 0.3105, "step": 21409 }, { "epoch": 0.7347288949897048, "grad_norm": 0.7844405060003278, "learning_rate": 1.735099152455496e-06, "loss": 0.2789, "step": 21410 }, { "epoch": 0.7347632120796157, "grad_norm": 0.7565823062410086, "learning_rate": 1.7346782697838705e-06, "loss": 0.2326, "step": 21411 }, { "epoch": 0.7347975291695265, "grad_norm": 0.720631978331064, "learning_rate": 1.7342574274514846e-06, "loss": 0.2733, "step": 21412 }, { "epoch": 0.7348318462594372, "grad_norm": 0.8393924881458951, "learning_rate": 1.7338366254635437e-06, "loss": 0.2595, "step": 21413 }, { "epoch": 0.734866163349348, "grad_norm": 0.7826138884176624, "learning_rate": 1.7334158638252418e-06, "loss": 0.2531, "step": 21414 }, { "epoch": 0.7349004804392587, "grad_norm": 0.7705062467170891, "learning_rate": 1.7329951425417785e-06, "loss": 0.256, "step": 21415 }, { "epoch": 0.7349347975291696, "grad_norm": 0.7090799974686285, "learning_rate": 1.7325744616183516e-06, "loss": 0.3022, "step": 21416 }, { "epoch": 0.7349691146190803, "grad_norm": 0.8741310042783028, "learning_rate": 1.732153821060157e-06, "loss": 0.2423, "step": 21417 }, { "epoch": 0.7350034317089911, "grad_norm": 0.7995991040836284, "learning_rate": 1.7317332208723926e-06, "loss": 0.2204, "step": 21418 }, { "epoch": 0.7350377487989018, "grad_norm": 0.6851091261726999, "learning_rate": 1.7313126610602538e-06, "loss": 0.2509, "step": 21419 }, { "epoch": 0.7350720658888126, "grad_norm": 0.7074855065158668, "learning_rate": 1.7308921416289376e-06, "loss": 0.2699, "step": 21420 }, { "epoch": 0.7351063829787234, "grad_norm": 0.7882682920195404, "learning_rate": 1.7304716625836344e-06, "loss": 0.2653, "step": 21421 }, { "epoch": 0.7351407000686342, "grad_norm": 0.7547841429309148, "learning_rate": 1.7300512239295447e-06, "loss": 0.225, "step": 21422 }, { "epoch": 0.7351750171585449, "grad_norm": 0.7505509762498995, "learning_rate": 1.7296308256718585e-06, "loss": 0.2924, "step": 21423 }, { "epoch": 0.7352093342484557, "grad_norm": 0.7142216153527498, "learning_rate": 1.729210467815769e-06, "loss": 0.2384, "step": 21424 }, { "epoch": 0.7352436513383666, "grad_norm": 0.7361937280711126, "learning_rate": 1.7287901503664744e-06, "loss": 0.284, "step": 21425 }, { "epoch": 0.7352779684282773, "grad_norm": 0.7745638454173275, "learning_rate": 1.7283698733291616e-06, "loss": 0.2625, "step": 21426 }, { "epoch": 0.7353122855181881, "grad_norm": 0.7899705970330572, "learning_rate": 1.7279496367090242e-06, "loss": 0.2533, "step": 21427 }, { "epoch": 0.7353466026080988, "grad_norm": 0.7633141657885182, "learning_rate": 1.7275294405112554e-06, "loss": 0.2525, "step": 21428 }, { "epoch": 0.7353809196980096, "grad_norm": 0.8856539619381139, "learning_rate": 1.7271092847410437e-06, "loss": 0.2698, "step": 21429 }, { "epoch": 0.7354152367879204, "grad_norm": 0.6888069452298059, "learning_rate": 1.726689169403582e-06, "loss": 0.2431, "step": 21430 }, { "epoch": 0.7354495538778312, "grad_norm": 0.8298803669701513, "learning_rate": 1.726269094504059e-06, "loss": 0.2611, "step": 21431 }, { "epoch": 0.7354838709677419, "grad_norm": 0.8168502467014717, "learning_rate": 1.7258490600476657e-06, "loss": 0.2865, "step": 21432 }, { "epoch": 0.7355181880576527, "grad_norm": 0.7392121151769644, "learning_rate": 1.7254290660395874e-06, "loss": 0.2781, "step": 21433 }, { "epoch": 0.7355525051475635, "grad_norm": 0.6643307890871412, "learning_rate": 1.7250091124850177e-06, "loss": 0.2432, "step": 21434 }, { "epoch": 0.7355868222374743, "grad_norm": 0.7661172261375926, "learning_rate": 1.7245891993891412e-06, "loss": 0.3151, "step": 21435 }, { "epoch": 0.735621139327385, "grad_norm": 0.7203695147330403, "learning_rate": 1.7241693267571463e-06, "loss": 0.2732, "step": 21436 }, { "epoch": 0.7356554564172958, "grad_norm": 0.8236295876313321, "learning_rate": 1.7237494945942195e-06, "loss": 0.285, "step": 21437 }, { "epoch": 0.7356897735072065, "grad_norm": 0.7072750201059694, "learning_rate": 1.7233297029055486e-06, "loss": 0.2265, "step": 21438 }, { "epoch": 0.7357240905971174, "grad_norm": 0.7330844978494443, "learning_rate": 1.7229099516963187e-06, "loss": 0.2473, "step": 21439 }, { "epoch": 0.7357584076870282, "grad_norm": 0.7336969436441904, "learning_rate": 1.722490240971716e-06, "loss": 0.2673, "step": 21440 }, { "epoch": 0.7357927247769389, "grad_norm": 0.910623188159266, "learning_rate": 1.7220705707369246e-06, "loss": 0.3217, "step": 21441 }, { "epoch": 0.7358270418668497, "grad_norm": 0.7622653091433453, "learning_rate": 1.72165094099713e-06, "loss": 0.2325, "step": 21442 }, { "epoch": 0.7358613589567604, "grad_norm": 0.7816718398012212, "learning_rate": 1.7212313517575175e-06, "loss": 0.2567, "step": 21443 }, { "epoch": 0.7358956760466713, "grad_norm": 0.7678322935076217, "learning_rate": 1.7208118030232657e-06, "loss": 0.2317, "step": 21444 }, { "epoch": 0.735929993136582, "grad_norm": 0.7597525478223439, "learning_rate": 1.7203922947995621e-06, "loss": 0.3137, "step": 21445 }, { "epoch": 0.7359643102264928, "grad_norm": 0.7625153168078689, "learning_rate": 1.71997282709159e-06, "loss": 0.2901, "step": 21446 }, { "epoch": 0.7359986273164035, "grad_norm": 0.7374779327508382, "learning_rate": 1.7195533999045266e-06, "loss": 0.2289, "step": 21447 }, { "epoch": 0.7360329444063144, "grad_norm": 0.7781770629589565, "learning_rate": 1.719134013243559e-06, "loss": 0.2546, "step": 21448 }, { "epoch": 0.7360672614962251, "grad_norm": 0.7919981485251295, "learning_rate": 1.7187146671138644e-06, "loss": 0.2639, "step": 21449 }, { "epoch": 0.7361015785861359, "grad_norm": 0.7701513297620588, "learning_rate": 1.7182953615206239e-06, "loss": 0.2404, "step": 21450 }, { "epoch": 0.7361358956760466, "grad_norm": 0.6789773903828576, "learning_rate": 1.7178760964690182e-06, "loss": 0.233, "step": 21451 }, { "epoch": 0.7361702127659574, "grad_norm": 0.7877146963234628, "learning_rate": 1.7174568719642272e-06, "loss": 0.2933, "step": 21452 }, { "epoch": 0.7362045298558683, "grad_norm": 0.7861291277450008, "learning_rate": 1.7170376880114292e-06, "loss": 0.2994, "step": 21453 }, { "epoch": 0.736238846945779, "grad_norm": 0.7561769506607132, "learning_rate": 1.7166185446158028e-06, "loss": 0.3156, "step": 21454 }, { "epoch": 0.7362731640356898, "grad_norm": 0.8134373007889121, "learning_rate": 1.7161994417825284e-06, "loss": 0.3119, "step": 21455 }, { "epoch": 0.7363074811256005, "grad_norm": 0.7252586806554551, "learning_rate": 1.7157803795167776e-06, "loss": 0.2254, "step": 21456 }, { "epoch": 0.7363417982155114, "grad_norm": 0.780693631499545, "learning_rate": 1.715361357823735e-06, "loss": 0.242, "step": 21457 }, { "epoch": 0.7363761153054221, "grad_norm": 0.8850131037439412, "learning_rate": 1.714942376708571e-06, "loss": 0.2706, "step": 21458 }, { "epoch": 0.7364104323953329, "grad_norm": 0.8118912871997205, "learning_rate": 1.7145234361764646e-06, "loss": 0.2429, "step": 21459 }, { "epoch": 0.7364447494852436, "grad_norm": 0.6686779586332545, "learning_rate": 1.7141045362325903e-06, "loss": 0.2795, "step": 21460 }, { "epoch": 0.7364790665751544, "grad_norm": 0.7939802483847739, "learning_rate": 1.7136856768821231e-06, "loss": 0.2623, "step": 21461 }, { "epoch": 0.7365133836650652, "grad_norm": 0.7559937738421771, "learning_rate": 1.7132668581302385e-06, "loss": 0.2389, "step": 21462 }, { "epoch": 0.736547700754976, "grad_norm": 0.7717792936670849, "learning_rate": 1.7128480799821095e-06, "loss": 0.2932, "step": 21463 }, { "epoch": 0.7365820178448867, "grad_norm": 0.8258763339373869, "learning_rate": 1.7124293424429123e-06, "loss": 0.2996, "step": 21464 }, { "epoch": 0.7366163349347975, "grad_norm": 0.7963083580671972, "learning_rate": 1.7120106455178137e-06, "loss": 0.2593, "step": 21465 }, { "epoch": 0.7366506520247083, "grad_norm": 0.7367932272249409, "learning_rate": 1.7115919892119936e-06, "loss": 0.2423, "step": 21466 }, { "epoch": 0.7366849691146191, "grad_norm": 0.8863301862347571, "learning_rate": 1.711173373530619e-06, "loss": 0.2838, "step": 21467 }, { "epoch": 0.7367192862045299, "grad_norm": 0.8783777566866796, "learning_rate": 1.710754798478861e-06, "loss": 0.2515, "step": 21468 }, { "epoch": 0.7367536032944406, "grad_norm": 0.843323394496752, "learning_rate": 1.7103362640618965e-06, "loss": 0.2351, "step": 21469 }, { "epoch": 0.7367879203843514, "grad_norm": 0.8060956184978249, "learning_rate": 1.7099177702848896e-06, "loss": 0.2098, "step": 21470 }, { "epoch": 0.7368222374742622, "grad_norm": 0.804329731883821, "learning_rate": 1.7094993171530134e-06, "loss": 0.2278, "step": 21471 }, { "epoch": 0.736856554564173, "grad_norm": 0.7290784910054903, "learning_rate": 1.709080904671437e-06, "loss": 0.2741, "step": 21472 }, { "epoch": 0.7368908716540837, "grad_norm": 0.836782060218166, "learning_rate": 1.7086625328453282e-06, "loss": 0.2514, "step": 21473 }, { "epoch": 0.7369251887439945, "grad_norm": 0.7944884789017304, "learning_rate": 1.7082442016798572e-06, "loss": 0.2784, "step": 21474 }, { "epoch": 0.7369595058339052, "grad_norm": 0.7968958786039173, "learning_rate": 1.7078259111801915e-06, "loss": 0.2694, "step": 21475 }, { "epoch": 0.7369938229238161, "grad_norm": 0.794563667881425, "learning_rate": 1.7074076613514978e-06, "loss": 0.245, "step": 21476 }, { "epoch": 0.7370281400137269, "grad_norm": 0.8592788891716212, "learning_rate": 1.7069894521989439e-06, "loss": 0.2709, "step": 21477 }, { "epoch": 0.7370624571036376, "grad_norm": 0.8549538905635891, "learning_rate": 1.7065712837276977e-06, "loss": 0.2338, "step": 21478 }, { "epoch": 0.7370967741935484, "grad_norm": 0.7986880819912402, "learning_rate": 1.7061531559429201e-06, "loss": 0.2762, "step": 21479 }, { "epoch": 0.7371310912834592, "grad_norm": 0.8532112924431203, "learning_rate": 1.7057350688497837e-06, "loss": 0.245, "step": 21480 }, { "epoch": 0.73716540837337, "grad_norm": 0.7023147691640258, "learning_rate": 1.7053170224534476e-06, "loss": 0.2067, "step": 21481 }, { "epoch": 0.7371997254632807, "grad_norm": 0.7387415241733141, "learning_rate": 1.704899016759079e-06, "loss": 0.2214, "step": 21482 }, { "epoch": 0.7372340425531915, "grad_norm": 0.8285669760366083, "learning_rate": 1.7044810517718407e-06, "loss": 0.2324, "step": 21483 }, { "epoch": 0.7372683596431022, "grad_norm": 0.8308180414468344, "learning_rate": 1.7040631274968971e-06, "loss": 0.3276, "step": 21484 }, { "epoch": 0.7373026767330131, "grad_norm": 0.7158446690696229, "learning_rate": 1.7036452439394114e-06, "loss": 0.2492, "step": 21485 }, { "epoch": 0.7373369938229238, "grad_norm": 0.7630822590515285, "learning_rate": 1.7032274011045447e-06, "loss": 0.2204, "step": 21486 }, { "epoch": 0.7373713109128346, "grad_norm": 0.763996853612398, "learning_rate": 1.7028095989974619e-06, "loss": 0.3018, "step": 21487 }, { "epoch": 0.7374056280027453, "grad_norm": 0.8928486752030008, "learning_rate": 1.702391837623319e-06, "loss": 0.2716, "step": 21488 }, { "epoch": 0.7374399450926561, "grad_norm": 0.7585489119927067, "learning_rate": 1.7019741169872833e-06, "loss": 0.2517, "step": 21489 }, { "epoch": 0.737474262182567, "grad_norm": 0.7499378911621806, "learning_rate": 1.7015564370945109e-06, "loss": 0.2859, "step": 21490 }, { "epoch": 0.7375085792724777, "grad_norm": 0.7614975250591965, "learning_rate": 1.7011387979501615e-06, "loss": 0.3022, "step": 21491 }, { "epoch": 0.7375428963623885, "grad_norm": 0.7366677939053163, "learning_rate": 1.7007211995593998e-06, "loss": 0.2791, "step": 21492 }, { "epoch": 0.7375772134522992, "grad_norm": 0.9051314010570164, "learning_rate": 1.700303641927379e-06, "loss": 0.3163, "step": 21493 }, { "epoch": 0.7376115305422101, "grad_norm": 0.7439884894360571, "learning_rate": 1.6998861250592597e-06, "loss": 0.2551, "step": 21494 }, { "epoch": 0.7376458476321208, "grad_norm": 0.8472591507355778, "learning_rate": 1.699468648960199e-06, "loss": 0.3497, "step": 21495 }, { "epoch": 0.7376801647220316, "grad_norm": 0.6810468065805588, "learning_rate": 1.699051213635356e-06, "loss": 0.2859, "step": 21496 }, { "epoch": 0.7377144818119423, "grad_norm": 0.6553264824869028, "learning_rate": 1.6986338190898866e-06, "loss": 0.2278, "step": 21497 }, { "epoch": 0.7377487989018531, "grad_norm": 0.7946158954423131, "learning_rate": 1.6982164653289474e-06, "loss": 0.2598, "step": 21498 }, { "epoch": 0.7377831159917639, "grad_norm": 0.7725200441609598, "learning_rate": 1.6977991523576958e-06, "loss": 0.221, "step": 21499 }, { "epoch": 0.7378174330816747, "grad_norm": 0.7536106262289458, "learning_rate": 1.6973818801812824e-06, "loss": 0.2691, "step": 21500 }, { "epoch": 0.7378517501715854, "grad_norm": 0.9611910175453642, "learning_rate": 1.6969646488048686e-06, "loss": 0.2896, "step": 21501 }, { "epoch": 0.7378860672614962, "grad_norm": 0.9047271576990165, "learning_rate": 1.6965474582336038e-06, "loss": 0.2786, "step": 21502 }, { "epoch": 0.737920384351407, "grad_norm": 0.7504234221570703, "learning_rate": 1.6961303084726444e-06, "loss": 0.2896, "step": 21503 }, { "epoch": 0.7379547014413178, "grad_norm": 0.7400717032492377, "learning_rate": 1.6957131995271425e-06, "loss": 0.2779, "step": 21504 }, { "epoch": 0.7379890185312286, "grad_norm": 0.7158042909812053, "learning_rate": 1.6952961314022514e-06, "loss": 0.2567, "step": 21505 }, { "epoch": 0.7380233356211393, "grad_norm": 0.697358795431933, "learning_rate": 1.6948791041031242e-06, "loss": 0.3757, "step": 21506 }, { "epoch": 0.7380576527110501, "grad_norm": 0.6990888749193196, "learning_rate": 1.694462117634912e-06, "loss": 0.2565, "step": 21507 }, { "epoch": 0.7380919698009609, "grad_norm": 0.8284927626207681, "learning_rate": 1.6940451720027668e-06, "loss": 0.333, "step": 21508 }, { "epoch": 0.7381262868908717, "grad_norm": 0.7976144608375089, "learning_rate": 1.6936282672118386e-06, "loss": 0.2878, "step": 21509 }, { "epoch": 0.7381606039807824, "grad_norm": 0.7765395862007602, "learning_rate": 1.6932114032672803e-06, "loss": 0.2487, "step": 21510 }, { "epoch": 0.7381949210706932, "grad_norm": 0.7053285183139997, "learning_rate": 1.6927945801742363e-06, "loss": 0.2502, "step": 21511 }, { "epoch": 0.7382292381606039, "grad_norm": 0.7271031401938467, "learning_rate": 1.6923777979378615e-06, "loss": 0.2565, "step": 21512 }, { "epoch": 0.7382635552505148, "grad_norm": 0.679262281562798, "learning_rate": 1.6919610565633038e-06, "loss": 0.2473, "step": 21513 }, { "epoch": 0.7382978723404255, "grad_norm": 0.7630697100297418, "learning_rate": 1.691544356055707e-06, "loss": 0.2004, "step": 21514 }, { "epoch": 0.7383321894303363, "grad_norm": 0.8681093691902777, "learning_rate": 1.691127696420226e-06, "loss": 0.2847, "step": 21515 }, { "epoch": 0.738366506520247, "grad_norm": 0.7661113274615512, "learning_rate": 1.6907110776620033e-06, "loss": 0.2387, "step": 21516 }, { "epoch": 0.7384008236101579, "grad_norm": 0.772100857872014, "learning_rate": 1.6902944997861863e-06, "loss": 0.2368, "step": 21517 }, { "epoch": 0.7384351407000687, "grad_norm": 0.7755735157537635, "learning_rate": 1.6898779627979228e-06, "loss": 0.2453, "step": 21518 }, { "epoch": 0.7384694577899794, "grad_norm": 0.7907504191754767, "learning_rate": 1.6894614667023572e-06, "loss": 0.2866, "step": 21519 }, { "epoch": 0.7385037748798902, "grad_norm": 0.8352221507143366, "learning_rate": 1.689045011504636e-06, "loss": 0.3374, "step": 21520 }, { "epoch": 0.7385380919698009, "grad_norm": 0.7499093161168203, "learning_rate": 1.6886285972099025e-06, "loss": 0.2588, "step": 21521 }, { "epoch": 0.7385724090597118, "grad_norm": 0.7961826878127575, "learning_rate": 1.6882122238233046e-06, "loss": 0.2302, "step": 21522 }, { "epoch": 0.7386067261496225, "grad_norm": 0.802726023344591, "learning_rate": 1.6877958913499798e-06, "loss": 0.2459, "step": 21523 }, { "epoch": 0.7386410432395333, "grad_norm": 0.7872480200834715, "learning_rate": 1.6873795997950786e-06, "loss": 0.2918, "step": 21524 }, { "epoch": 0.738675360329444, "grad_norm": 0.7650160929598306, "learning_rate": 1.6869633491637383e-06, "loss": 0.2466, "step": 21525 }, { "epoch": 0.7387096774193549, "grad_norm": 0.7894525963700028, "learning_rate": 1.6865471394611038e-06, "loss": 0.2368, "step": 21526 }, { "epoch": 0.7387439945092656, "grad_norm": 0.7375422023686168, "learning_rate": 1.6861309706923158e-06, "loss": 0.2097, "step": 21527 }, { "epoch": 0.7387783115991764, "grad_norm": 0.7867297288936212, "learning_rate": 1.685714842862516e-06, "loss": 0.2341, "step": 21528 }, { "epoch": 0.7388126286890871, "grad_norm": 0.7966636402277978, "learning_rate": 1.6852987559768452e-06, "loss": 0.2584, "step": 21529 }, { "epoch": 0.7388469457789979, "grad_norm": 0.6929741787116611, "learning_rate": 1.684882710040444e-06, "loss": 0.2415, "step": 21530 }, { "epoch": 0.7388812628689088, "grad_norm": 0.8109097523053996, "learning_rate": 1.6844667050584535e-06, "loss": 0.2451, "step": 21531 }, { "epoch": 0.7389155799588195, "grad_norm": 0.8338247301428675, "learning_rate": 1.684050741036008e-06, "loss": 0.2823, "step": 21532 }, { "epoch": 0.7389498970487303, "grad_norm": 0.7379518635621695, "learning_rate": 1.683634817978253e-06, "loss": 0.2488, "step": 21533 }, { "epoch": 0.738984214138641, "grad_norm": 0.7661644635199288, "learning_rate": 1.6832189358903195e-06, "loss": 0.2338, "step": 21534 }, { "epoch": 0.7390185312285518, "grad_norm": 0.7462800449410192, "learning_rate": 1.682803094777351e-06, "loss": 0.2532, "step": 21535 }, { "epoch": 0.7390528483184626, "grad_norm": 0.7621315323843195, "learning_rate": 1.6823872946444848e-06, "loss": 0.2533, "step": 21536 }, { "epoch": 0.7390871654083734, "grad_norm": 0.8006302252096841, "learning_rate": 1.6819715354968519e-06, "loss": 0.2806, "step": 21537 }, { "epoch": 0.7391214824982841, "grad_norm": 0.7832856373578202, "learning_rate": 1.6815558173395957e-06, "loss": 0.3528, "step": 21538 }, { "epoch": 0.7391557995881949, "grad_norm": 0.7399872485348378, "learning_rate": 1.6811401401778466e-06, "loss": 0.2924, "step": 21539 }, { "epoch": 0.7391901166781057, "grad_norm": 0.7759859941239963, "learning_rate": 1.6807245040167418e-06, "loss": 0.2483, "step": 21540 }, { "epoch": 0.7392244337680165, "grad_norm": 0.7576134376502174, "learning_rate": 1.6803089088614161e-06, "loss": 0.2372, "step": 21541 }, { "epoch": 0.7392587508579272, "grad_norm": 0.7695684593919456, "learning_rate": 1.6798933547170026e-06, "loss": 0.2746, "step": 21542 }, { "epoch": 0.739293067947838, "grad_norm": 0.8295897520894001, "learning_rate": 1.6794778415886364e-06, "loss": 0.3171, "step": 21543 }, { "epoch": 0.7393273850377488, "grad_norm": 0.8347502164851256, "learning_rate": 1.6790623694814507e-06, "loss": 0.2893, "step": 21544 }, { "epoch": 0.7393617021276596, "grad_norm": 0.8273785167258961, "learning_rate": 1.6786469384005783e-06, "loss": 0.2667, "step": 21545 }, { "epoch": 0.7393960192175704, "grad_norm": 0.81471832596669, "learning_rate": 1.678231548351148e-06, "loss": 0.2848, "step": 21546 }, { "epoch": 0.7394303363074811, "grad_norm": 0.8286606099060531, "learning_rate": 1.677816199338297e-06, "loss": 0.2752, "step": 21547 }, { "epoch": 0.7394646533973919, "grad_norm": 0.725486171001436, "learning_rate": 1.677400891367152e-06, "loss": 0.2465, "step": 21548 }, { "epoch": 0.7394989704873027, "grad_norm": 0.7598478500323763, "learning_rate": 1.676985624442845e-06, "loss": 0.2292, "step": 21549 }, { "epoch": 0.7395332875772135, "grad_norm": 0.7507349490066195, "learning_rate": 1.6765703985705067e-06, "loss": 0.2404, "step": 21550 }, { "epoch": 0.7395676046671242, "grad_norm": 0.7808112234458735, "learning_rate": 1.6761552137552667e-06, "loss": 0.3007, "step": 21551 }, { "epoch": 0.739601921757035, "grad_norm": 0.7087949318630364, "learning_rate": 1.675740070002253e-06, "loss": 0.2421, "step": 21552 }, { "epoch": 0.7396362388469457, "grad_norm": 0.8065828232658062, "learning_rate": 1.6753249673165956e-06, "loss": 0.2922, "step": 21553 }, { "epoch": 0.7396705559368566, "grad_norm": 0.8022950355116489, "learning_rate": 1.6749099057034235e-06, "loss": 0.2458, "step": 21554 }, { "epoch": 0.7397048730267674, "grad_norm": 0.7290113459222382, "learning_rate": 1.6744948851678595e-06, "loss": 0.2747, "step": 21555 }, { "epoch": 0.7397391901166781, "grad_norm": 0.7352300899973334, "learning_rate": 1.6740799057150358e-06, "loss": 0.2361, "step": 21556 }, { "epoch": 0.7397735072065889, "grad_norm": 0.7895611674426587, "learning_rate": 1.6736649673500788e-06, "loss": 0.3145, "step": 21557 }, { "epoch": 0.7398078242964996, "grad_norm": 0.6887422597185134, "learning_rate": 1.6732500700781096e-06, "loss": 0.2466, "step": 21558 }, { "epoch": 0.7398421413864105, "grad_norm": 0.8709553897007873, "learning_rate": 1.6728352139042609e-06, "loss": 0.2626, "step": 21559 }, { "epoch": 0.7398764584763212, "grad_norm": 0.9639585306204416, "learning_rate": 1.672420398833652e-06, "loss": 0.2975, "step": 21560 }, { "epoch": 0.739910775566232, "grad_norm": 0.9812740228182163, "learning_rate": 1.6720056248714094e-06, "loss": 0.2641, "step": 21561 }, { "epoch": 0.7399450926561427, "grad_norm": 0.7597724766832755, "learning_rate": 1.671590892022657e-06, "loss": 0.2407, "step": 21562 }, { "epoch": 0.7399794097460536, "grad_norm": 0.7639647575628183, "learning_rate": 1.671176200292519e-06, "loss": 0.3009, "step": 21563 }, { "epoch": 0.7400137268359643, "grad_norm": 0.6722542974820217, "learning_rate": 1.6707615496861184e-06, "loss": 0.2632, "step": 21564 }, { "epoch": 0.7400480439258751, "grad_norm": 0.8519624673539639, "learning_rate": 1.6703469402085765e-06, "loss": 0.3261, "step": 21565 }, { "epoch": 0.7400823610157858, "grad_norm": 0.827093944763479, "learning_rate": 1.6699323718650168e-06, "loss": 0.229, "step": 21566 }, { "epoch": 0.7401166781056966, "grad_norm": 0.8163932223113185, "learning_rate": 1.6695178446605597e-06, "loss": 0.251, "step": 21567 }, { "epoch": 0.7401509951956075, "grad_norm": 0.7201316579483468, "learning_rate": 1.6691033586003286e-06, "loss": 0.2745, "step": 21568 }, { "epoch": 0.7401853122855182, "grad_norm": 0.7672467832342068, "learning_rate": 1.668688913689439e-06, "loss": 0.2675, "step": 21569 }, { "epoch": 0.740219629375429, "grad_norm": 0.8108443009083196, "learning_rate": 1.6682745099330167e-06, "loss": 0.2582, "step": 21570 }, { "epoch": 0.7402539464653397, "grad_norm": 0.7490672609785641, "learning_rate": 1.6678601473361771e-06, "loss": 0.3158, "step": 21571 }, { "epoch": 0.7402882635552506, "grad_norm": 0.8629018091308249, "learning_rate": 1.6674458259040404e-06, "loss": 0.2883, "step": 21572 }, { "epoch": 0.7403225806451613, "grad_norm": 0.8137983083212366, "learning_rate": 1.6670315456417251e-06, "loss": 0.2576, "step": 21573 }, { "epoch": 0.7403568977350721, "grad_norm": 0.7464625329519364, "learning_rate": 1.666617306554349e-06, "loss": 0.2914, "step": 21574 }, { "epoch": 0.7403912148249828, "grad_norm": 0.7415815451835593, "learning_rate": 1.6662031086470303e-06, "loss": 0.2852, "step": 21575 }, { "epoch": 0.7404255319148936, "grad_norm": 0.7341910106748298, "learning_rate": 1.6657889519248849e-06, "loss": 0.2275, "step": 21576 }, { "epoch": 0.7404598490048044, "grad_norm": 0.7545324766489887, "learning_rate": 1.6653748363930317e-06, "loss": 0.2553, "step": 21577 }, { "epoch": 0.7404941660947152, "grad_norm": 0.8204294748035653, "learning_rate": 1.6649607620565804e-06, "loss": 0.3303, "step": 21578 }, { "epoch": 0.7405284831846259, "grad_norm": 0.7849773011110712, "learning_rate": 1.664546728920653e-06, "loss": 0.2801, "step": 21579 }, { "epoch": 0.7405628002745367, "grad_norm": 0.7770197224340274, "learning_rate": 1.6641327369903637e-06, "loss": 0.2366, "step": 21580 }, { "epoch": 0.7405971173644474, "grad_norm": 0.7932422410838282, "learning_rate": 1.663718786270822e-06, "loss": 0.2337, "step": 21581 }, { "epoch": 0.7406314344543583, "grad_norm": 0.8353706254276324, "learning_rate": 1.6633048767671477e-06, "loss": 0.2448, "step": 21582 }, { "epoch": 0.7406657515442691, "grad_norm": 0.7552094547439359, "learning_rate": 1.66289100848445e-06, "loss": 0.2447, "step": 21583 }, { "epoch": 0.7407000686341798, "grad_norm": 0.7089865735066936, "learning_rate": 1.6624771814278434e-06, "loss": 0.2742, "step": 21584 }, { "epoch": 0.7407343857240906, "grad_norm": 0.8932840928347022, "learning_rate": 1.6620633956024401e-06, "loss": 0.2918, "step": 21585 }, { "epoch": 0.7407687028140014, "grad_norm": 0.8324040034798246, "learning_rate": 1.6616496510133517e-06, "loss": 0.3199, "step": 21586 }, { "epoch": 0.7408030199039122, "grad_norm": 0.6931025192889523, "learning_rate": 1.6612359476656897e-06, "loss": 0.2228, "step": 21587 }, { "epoch": 0.7408373369938229, "grad_norm": 0.8730875308361745, "learning_rate": 1.6608222855645651e-06, "loss": 0.2877, "step": 21588 }, { "epoch": 0.7408716540837337, "grad_norm": 0.8400560665417619, "learning_rate": 1.6604086647150897e-06, "loss": 0.2588, "step": 21589 }, { "epoch": 0.7409059711736444, "grad_norm": 0.8230625548123409, "learning_rate": 1.6599950851223684e-06, "loss": 0.2478, "step": 21590 }, { "epoch": 0.7409402882635553, "grad_norm": 0.727119922941968, "learning_rate": 1.6595815467915166e-06, "loss": 0.2747, "step": 21591 }, { "epoch": 0.740974605353466, "grad_norm": 0.7096115166260754, "learning_rate": 1.6591680497276392e-06, "loss": 0.2816, "step": 21592 }, { "epoch": 0.7410089224433768, "grad_norm": 0.8015188392538845, "learning_rate": 1.6587545939358452e-06, "loss": 0.2575, "step": 21593 }, { "epoch": 0.7410432395332875, "grad_norm": 0.797476081044279, "learning_rate": 1.6583411794212422e-06, "loss": 0.2744, "step": 21594 }, { "epoch": 0.7410775566231984, "grad_norm": 0.7259627455163905, "learning_rate": 1.6579278061889386e-06, "loss": 0.2464, "step": 21595 }, { "epoch": 0.7411118737131092, "grad_norm": 0.7307847400830123, "learning_rate": 1.6575144742440408e-06, "loss": 0.2754, "step": 21596 }, { "epoch": 0.7411461908030199, "grad_norm": 0.8064527460000431, "learning_rate": 1.657101183591654e-06, "loss": 0.2594, "step": 21597 }, { "epoch": 0.7411805078929307, "grad_norm": 0.7729201703316255, "learning_rate": 1.6566879342368846e-06, "loss": 0.2589, "step": 21598 }, { "epoch": 0.7412148249828414, "grad_norm": 0.814123568655077, "learning_rate": 1.6562747261848384e-06, "loss": 0.3158, "step": 21599 }, { "epoch": 0.7412491420727523, "grad_norm": 0.7674631529976438, "learning_rate": 1.6558615594406207e-06, "loss": 0.2381, "step": 21600 }, { "epoch": 0.741283459162663, "grad_norm": 0.7525313456106403, "learning_rate": 1.6554484340093318e-06, "loss": 0.2415, "step": 21601 }, { "epoch": 0.7413177762525738, "grad_norm": 0.6855711557653069, "learning_rate": 1.6550353498960792e-06, "loss": 0.2497, "step": 21602 }, { "epoch": 0.7413520933424845, "grad_norm": 1.2418460230820858, "learning_rate": 1.654622307105967e-06, "loss": 0.2579, "step": 21603 }, { "epoch": 0.7413864104323953, "grad_norm": 0.7403403370633637, "learning_rate": 1.6542093056440923e-06, "loss": 0.2316, "step": 21604 }, { "epoch": 0.7414207275223061, "grad_norm": 0.8126645444491636, "learning_rate": 1.6537963455155643e-06, "loss": 0.2925, "step": 21605 }, { "epoch": 0.7414550446122169, "grad_norm": 0.8320347019426743, "learning_rate": 1.6533834267254794e-06, "loss": 0.2572, "step": 21606 }, { "epoch": 0.7414893617021276, "grad_norm": 0.8149299097809756, "learning_rate": 1.6529705492789405e-06, "loss": 0.3188, "step": 21607 }, { "epoch": 0.7415236787920384, "grad_norm": 0.8032339074296216, "learning_rate": 1.6525577131810483e-06, "loss": 0.2366, "step": 21608 }, { "epoch": 0.7415579958819493, "grad_norm": 0.7823030670939369, "learning_rate": 1.6521449184369031e-06, "loss": 0.2811, "step": 21609 }, { "epoch": 0.74159231297186, "grad_norm": 0.8269413605731996, "learning_rate": 1.6517321650516039e-06, "loss": 0.324, "step": 21610 }, { "epoch": 0.7416266300617708, "grad_norm": 0.7820042355389399, "learning_rate": 1.65131945303025e-06, "loss": 0.2708, "step": 21611 }, { "epoch": 0.7416609471516815, "grad_norm": 0.8614660790828343, "learning_rate": 1.6509067823779418e-06, "loss": 0.2882, "step": 21612 }, { "epoch": 0.7416952642415923, "grad_norm": 0.7687636599505046, "learning_rate": 1.6504941530997726e-06, "loss": 0.2659, "step": 21613 }, { "epoch": 0.7417295813315031, "grad_norm": 0.7573626072996884, "learning_rate": 1.6500815652008455e-06, "loss": 0.3304, "step": 21614 }, { "epoch": 0.7417638984214139, "grad_norm": 0.7860788595369226, "learning_rate": 1.6496690186862541e-06, "loss": 0.2802, "step": 21615 }, { "epoch": 0.7417982155113246, "grad_norm": 0.7984319370544425, "learning_rate": 1.6492565135610962e-06, "loss": 0.2405, "step": 21616 }, { "epoch": 0.7418325326012354, "grad_norm": 0.7982862530711593, "learning_rate": 1.6488440498304665e-06, "loss": 0.2508, "step": 21617 }, { "epoch": 0.7418668496911462, "grad_norm": 0.8222119401282808, "learning_rate": 1.6484316274994621e-06, "loss": 0.2748, "step": 21618 }, { "epoch": 0.741901166781057, "grad_norm": 0.8492331161122153, "learning_rate": 1.6480192465731775e-06, "loss": 0.305, "step": 21619 }, { "epoch": 0.7419354838709677, "grad_norm": 0.7752704781420644, "learning_rate": 1.6476069070567068e-06, "loss": 0.2903, "step": 21620 }, { "epoch": 0.7419698009608785, "grad_norm": 0.8651252227046484, "learning_rate": 1.6471946089551461e-06, "loss": 0.2769, "step": 21621 }, { "epoch": 0.7420041180507893, "grad_norm": 0.7543319360283529, "learning_rate": 1.6467823522735832e-06, "loss": 0.2541, "step": 21622 }, { "epoch": 0.7420384351407001, "grad_norm": 0.8332974619213044, "learning_rate": 1.646370137017117e-06, "loss": 0.254, "step": 21623 }, { "epoch": 0.7420727522306109, "grad_norm": 0.7684828630960195, "learning_rate": 1.6459579631908396e-06, "loss": 0.3015, "step": 21624 }, { "epoch": 0.7421070693205216, "grad_norm": 0.8354102554514091, "learning_rate": 1.6455458307998373e-06, "loss": 0.2828, "step": 21625 }, { "epoch": 0.7421413864104324, "grad_norm": 0.7557532166613751, "learning_rate": 1.645133739849209e-06, "loss": 0.27, "step": 21626 }, { "epoch": 0.7421757035003431, "grad_norm": 0.7845563067379692, "learning_rate": 1.6447216903440404e-06, "loss": 0.2793, "step": 21627 }, { "epoch": 0.742210020590254, "grad_norm": 0.7689551132650799, "learning_rate": 1.6443096822894234e-06, "loss": 0.2477, "step": 21628 }, { "epoch": 0.7422443376801647, "grad_norm": 0.7813495904636988, "learning_rate": 1.6438977156904474e-06, "loss": 0.281, "step": 21629 }, { "epoch": 0.7422786547700755, "grad_norm": 0.7144897734163044, "learning_rate": 1.6434857905522027e-06, "loss": 0.2408, "step": 21630 }, { "epoch": 0.7423129718599862, "grad_norm": 0.7552842222357404, "learning_rate": 1.643073906879778e-06, "loss": 0.3308, "step": 21631 }, { "epoch": 0.7423472889498971, "grad_norm": 0.7251368991257353, "learning_rate": 1.642662064678261e-06, "loss": 0.2768, "step": 21632 }, { "epoch": 0.7423816060398079, "grad_norm": 0.8337413870899497, "learning_rate": 1.64225026395274e-06, "loss": 0.2794, "step": 21633 }, { "epoch": 0.7424159231297186, "grad_norm": 0.7258934187954905, "learning_rate": 1.6418385047083025e-06, "loss": 0.3088, "step": 21634 }, { "epoch": 0.7424502402196294, "grad_norm": 0.8732538710158854, "learning_rate": 1.641426786950036e-06, "loss": 0.3153, "step": 21635 }, { "epoch": 0.7424845573095401, "grad_norm": 0.8804511494583717, "learning_rate": 1.641015110683023e-06, "loss": 0.2904, "step": 21636 }, { "epoch": 0.742518874399451, "grad_norm": 0.7360081593087403, "learning_rate": 1.6406034759123557e-06, "loss": 0.2695, "step": 21637 }, { "epoch": 0.7425531914893617, "grad_norm": 0.7828432523092692, "learning_rate": 1.6401918826431135e-06, "loss": 0.2862, "step": 21638 }, { "epoch": 0.7425875085792725, "grad_norm": 0.7179133951386466, "learning_rate": 1.639780330880384e-06, "loss": 0.2496, "step": 21639 }, { "epoch": 0.7426218256691832, "grad_norm": 0.7342163862513749, "learning_rate": 1.63936882062925e-06, "loss": 0.2486, "step": 21640 }, { "epoch": 0.742656142759094, "grad_norm": 0.7485942682114168, "learning_rate": 1.638957351894797e-06, "loss": 0.2297, "step": 21641 }, { "epoch": 0.7426904598490048, "grad_norm": 0.7492365673521446, "learning_rate": 1.6385459246821072e-06, "loss": 0.2482, "step": 21642 }, { "epoch": 0.7427247769389156, "grad_norm": 0.8079018434023324, "learning_rate": 1.6381345389962634e-06, "loss": 0.2983, "step": 21643 }, { "epoch": 0.7427590940288263, "grad_norm": 0.7471386347993754, "learning_rate": 1.6377231948423494e-06, "loss": 0.259, "step": 21644 }, { "epoch": 0.7427934111187371, "grad_norm": 0.7284551822794634, "learning_rate": 1.6373118922254415e-06, "loss": 0.2414, "step": 21645 }, { "epoch": 0.742827728208648, "grad_norm": 0.8941047896206445, "learning_rate": 1.6369006311506268e-06, "loss": 0.2523, "step": 21646 }, { "epoch": 0.7428620452985587, "grad_norm": 0.7636208618862789, "learning_rate": 1.6364894116229857e-06, "loss": 0.296, "step": 21647 }, { "epoch": 0.7428963623884695, "grad_norm": 0.7599075031391068, "learning_rate": 1.6360782336475927e-06, "loss": 0.2574, "step": 21648 }, { "epoch": 0.7429306794783802, "grad_norm": 0.887313734212165, "learning_rate": 1.6356670972295347e-06, "loss": 0.3269, "step": 21649 }, { "epoch": 0.742964996568291, "grad_norm": 0.7697390246434274, "learning_rate": 1.6352560023738855e-06, "loss": 0.317, "step": 21650 }, { "epoch": 0.7429993136582018, "grad_norm": 0.8206026367718481, "learning_rate": 1.6348449490857253e-06, "loss": 0.2506, "step": 21651 }, { "epoch": 0.7430336307481126, "grad_norm": 0.7749895439887071, "learning_rate": 1.6344339373701328e-06, "loss": 0.3377, "step": 21652 }, { "epoch": 0.7430679478380233, "grad_norm": 0.7770629805374841, "learning_rate": 1.634022967232185e-06, "loss": 0.2475, "step": 21653 }, { "epoch": 0.7431022649279341, "grad_norm": 0.8713073631598717, "learning_rate": 1.6336120386769589e-06, "loss": 0.2545, "step": 21654 }, { "epoch": 0.7431365820178449, "grad_norm": 0.8817914941264037, "learning_rate": 1.6332011517095315e-06, "loss": 0.2489, "step": 21655 }, { "epoch": 0.7431708991077557, "grad_norm": 0.7655399491819438, "learning_rate": 1.6327903063349798e-06, "loss": 0.2738, "step": 21656 }, { "epoch": 0.7432052161976664, "grad_norm": 0.8799351744618851, "learning_rate": 1.632379502558375e-06, "loss": 0.2661, "step": 21657 }, { "epoch": 0.7432395332875772, "grad_norm": 0.7897538741692198, "learning_rate": 1.631968740384799e-06, "loss": 0.3377, "step": 21658 }, { "epoch": 0.7432738503774879, "grad_norm": 0.742091597014404, "learning_rate": 1.6315580198193198e-06, "loss": 0.2379, "step": 21659 }, { "epoch": 0.7433081674673988, "grad_norm": 0.7742905484734021, "learning_rate": 1.6311473408670148e-06, "loss": 0.2965, "step": 21660 }, { "epoch": 0.7433424845573096, "grad_norm": 0.8219531513939349, "learning_rate": 1.6307367035329563e-06, "loss": 0.3081, "step": 21661 }, { "epoch": 0.7433768016472203, "grad_norm": 0.8481719234322742, "learning_rate": 1.6303261078222166e-06, "loss": 0.2778, "step": 21662 }, { "epoch": 0.7434111187371311, "grad_norm": 0.7041027994282039, "learning_rate": 1.6299155537398719e-06, "loss": 0.2794, "step": 21663 }, { "epoch": 0.7434454358270418, "grad_norm": 0.8167019652380223, "learning_rate": 1.6295050412909897e-06, "loss": 0.3033, "step": 21664 }, { "epoch": 0.7434797529169527, "grad_norm": 0.9219631904913042, "learning_rate": 1.6290945704806433e-06, "loss": 0.3203, "step": 21665 }, { "epoch": 0.7435140700068634, "grad_norm": 0.8143764215438606, "learning_rate": 1.6286841413139038e-06, "loss": 0.2884, "step": 21666 }, { "epoch": 0.7435483870967742, "grad_norm": 0.7888949907354587, "learning_rate": 1.6282737537958403e-06, "loss": 0.2498, "step": 21667 }, { "epoch": 0.7435827041866849, "grad_norm": 0.8327178407493094, "learning_rate": 1.627863407931524e-06, "loss": 0.2544, "step": 21668 }, { "epoch": 0.7436170212765958, "grad_norm": 0.7989628731816677, "learning_rate": 1.6274531037260238e-06, "loss": 0.33, "step": 21669 }, { "epoch": 0.7436513383665065, "grad_norm": 0.724937213565293, "learning_rate": 1.6270428411844102e-06, "loss": 0.254, "step": 21670 }, { "epoch": 0.7436856554564173, "grad_norm": 0.75675993743953, "learning_rate": 1.6266326203117466e-06, "loss": 0.2787, "step": 21671 }, { "epoch": 0.743719972546328, "grad_norm": 0.7593160791818336, "learning_rate": 1.6262224411131066e-06, "loss": 0.2657, "step": 21672 }, { "epoch": 0.7437542896362388, "grad_norm": 0.841521687632769, "learning_rate": 1.6258123035935541e-06, "loss": 0.2779, "step": 21673 }, { "epoch": 0.7437886067261497, "grad_norm": 0.7015086962174727, "learning_rate": 1.625402207758156e-06, "loss": 0.2449, "step": 21674 }, { "epoch": 0.7438229238160604, "grad_norm": 0.7545667365475721, "learning_rate": 1.62499215361198e-06, "loss": 0.2812, "step": 21675 }, { "epoch": 0.7438572409059712, "grad_norm": 0.7472495688238844, "learning_rate": 1.6245821411600905e-06, "loss": 0.2559, "step": 21676 }, { "epoch": 0.7438915579958819, "grad_norm": 0.6910893714099768, "learning_rate": 1.6241721704075536e-06, "loss": 0.3228, "step": 21677 }, { "epoch": 0.7439258750857928, "grad_norm": 0.7989779564177053, "learning_rate": 1.6237622413594333e-06, "loss": 0.2541, "step": 21678 }, { "epoch": 0.7439601921757035, "grad_norm": 0.8018062231253718, "learning_rate": 1.6233523540207963e-06, "loss": 0.2259, "step": 21679 }, { "epoch": 0.7439945092656143, "grad_norm": 0.8152968335771993, "learning_rate": 1.6229425083967005e-06, "loss": 0.2306, "step": 21680 }, { "epoch": 0.744028826355525, "grad_norm": 0.751312139484449, "learning_rate": 1.6225327044922163e-06, "loss": 0.2536, "step": 21681 }, { "epoch": 0.7440631434454358, "grad_norm": 0.7740474426460552, "learning_rate": 1.6221229423124007e-06, "loss": 0.2509, "step": 21682 }, { "epoch": 0.7440974605353466, "grad_norm": 0.8777929413512111, "learning_rate": 1.6217132218623184e-06, "loss": 0.2655, "step": 21683 }, { "epoch": 0.7441317776252574, "grad_norm": 0.8171042082426471, "learning_rate": 1.6213035431470304e-06, "loss": 0.284, "step": 21684 }, { "epoch": 0.7441660947151681, "grad_norm": 0.8081859610279273, "learning_rate": 1.6208939061715978e-06, "loss": 0.3397, "step": 21685 }, { "epoch": 0.7442004118050789, "grad_norm": 0.6838451249570593, "learning_rate": 1.6204843109410818e-06, "loss": 0.2385, "step": 21686 }, { "epoch": 0.7442347288949896, "grad_norm": 0.7750436512109846, "learning_rate": 1.6200747574605413e-06, "loss": 0.2535, "step": 21687 }, { "epoch": 0.7442690459849005, "grad_norm": 0.8446105758412414, "learning_rate": 1.6196652457350386e-06, "loss": 0.2605, "step": 21688 }, { "epoch": 0.7443033630748113, "grad_norm": 0.8069920075790199, "learning_rate": 1.6192557757696276e-06, "loss": 0.2649, "step": 21689 }, { "epoch": 0.744337680164722, "grad_norm": 0.8463494200208295, "learning_rate": 1.618846347569371e-06, "loss": 0.3095, "step": 21690 }, { "epoch": 0.7443719972546328, "grad_norm": 0.7815202898399051, "learning_rate": 1.6184369611393263e-06, "loss": 0.2699, "step": 21691 }, { "epoch": 0.7444063143445436, "grad_norm": 0.8119281431993891, "learning_rate": 1.6180276164845505e-06, "loss": 0.2726, "step": 21692 }, { "epoch": 0.7444406314344544, "grad_norm": 0.7570690171996467, "learning_rate": 1.6176183136101015e-06, "loss": 0.1933, "step": 21693 }, { "epoch": 0.7444749485243651, "grad_norm": 0.8360773722133302, "learning_rate": 1.6172090525210322e-06, "loss": 0.3223, "step": 21694 }, { "epoch": 0.7445092656142759, "grad_norm": 0.7541651399801149, "learning_rate": 1.6167998332224038e-06, "loss": 0.2283, "step": 21695 }, { "epoch": 0.7445435827041866, "grad_norm": 0.8758693218761103, "learning_rate": 1.6163906557192677e-06, "loss": 0.2832, "step": 21696 }, { "epoch": 0.7445778997940975, "grad_norm": 0.6924541881232303, "learning_rate": 1.6159815200166801e-06, "loss": 0.3112, "step": 21697 }, { "epoch": 0.7446122168840082, "grad_norm": 0.8111379406967733, "learning_rate": 1.6155724261196953e-06, "loss": 0.2147, "step": 21698 }, { "epoch": 0.744646533973919, "grad_norm": 0.8278122556234625, "learning_rate": 1.6151633740333673e-06, "loss": 0.2993, "step": 21699 }, { "epoch": 0.7446808510638298, "grad_norm": 0.8446660830533191, "learning_rate": 1.6147543637627495e-06, "loss": 0.254, "step": 21700 }, { "epoch": 0.7447151681537406, "grad_norm": 0.7766256122328098, "learning_rate": 1.6143453953128952e-06, "loss": 0.2336, "step": 21701 }, { "epoch": 0.7447494852436514, "grad_norm": 0.8341102413326266, "learning_rate": 1.613936468688857e-06, "loss": 0.244, "step": 21702 }, { "epoch": 0.7447838023335621, "grad_norm": 0.8062959987643088, "learning_rate": 1.6135275838956832e-06, "loss": 0.3156, "step": 21703 }, { "epoch": 0.7448181194234729, "grad_norm": 0.8510320235242151, "learning_rate": 1.6131187409384313e-06, "loss": 0.2811, "step": 21704 }, { "epoch": 0.7448524365133836, "grad_norm": 0.817537852028199, "learning_rate": 1.6127099398221469e-06, "loss": 0.2931, "step": 21705 }, { "epoch": 0.7448867536032945, "grad_norm": 0.6730540701528486, "learning_rate": 1.6123011805518801e-06, "loss": 0.2108, "step": 21706 }, { "epoch": 0.7449210706932052, "grad_norm": 0.7442005074663516, "learning_rate": 1.611892463132686e-06, "loss": 0.2358, "step": 21707 }, { "epoch": 0.744955387783116, "grad_norm": 0.746437806185776, "learning_rate": 1.6114837875696088e-06, "loss": 0.2122, "step": 21708 }, { "epoch": 0.7449897048730267, "grad_norm": 0.8739377061454919, "learning_rate": 1.611075153867699e-06, "loss": 0.2694, "step": 21709 }, { "epoch": 0.7450240219629375, "grad_norm": 0.7745317468368518, "learning_rate": 1.6106665620320038e-06, "loss": 0.2287, "step": 21710 }, { "epoch": 0.7450583390528484, "grad_norm": 0.7379466486749743, "learning_rate": 1.6102580120675738e-06, "loss": 0.2688, "step": 21711 }, { "epoch": 0.7450926561427591, "grad_norm": 0.6735225162283353, "learning_rate": 1.609849503979451e-06, "loss": 0.2702, "step": 21712 }, { "epoch": 0.7451269732326699, "grad_norm": 0.8147505966676131, "learning_rate": 1.609441037772686e-06, "loss": 0.2763, "step": 21713 }, { "epoch": 0.7451612903225806, "grad_norm": 0.7607034378708126, "learning_rate": 1.609032613452326e-06, "loss": 0.2982, "step": 21714 }, { "epoch": 0.7451956074124915, "grad_norm": 0.7856748635134633, "learning_rate": 1.6086242310234112e-06, "loss": 0.2562, "step": 21715 }, { "epoch": 0.7452299245024022, "grad_norm": 0.7632571220561905, "learning_rate": 1.6082158904909934e-06, "loss": 0.287, "step": 21716 }, { "epoch": 0.745264241592313, "grad_norm": 0.7737834332456491, "learning_rate": 1.6078075918601122e-06, "loss": 0.2374, "step": 21717 }, { "epoch": 0.7452985586822237, "grad_norm": 0.7527009366778514, "learning_rate": 1.6073993351358136e-06, "loss": 0.3043, "step": 21718 }, { "epoch": 0.7453328757721345, "grad_norm": 0.8133021121795629, "learning_rate": 1.6069911203231403e-06, "loss": 0.312, "step": 21719 }, { "epoch": 0.7453671928620453, "grad_norm": 0.7733637521092761, "learning_rate": 1.6065829474271361e-06, "loss": 0.2685, "step": 21720 }, { "epoch": 0.7454015099519561, "grad_norm": 0.7751259930950865, "learning_rate": 1.6061748164528434e-06, "loss": 0.2996, "step": 21721 }, { "epoch": 0.7454358270418668, "grad_norm": 0.6723657580342975, "learning_rate": 1.605766727405304e-06, "loss": 0.2756, "step": 21722 }, { "epoch": 0.7454701441317776, "grad_norm": 0.8247212669995734, "learning_rate": 1.6053586802895588e-06, "loss": 0.2611, "step": 21723 }, { "epoch": 0.7455044612216885, "grad_norm": 0.7639944825015165, "learning_rate": 1.6049506751106498e-06, "loss": 0.3091, "step": 21724 }, { "epoch": 0.7455387783115992, "grad_norm": 0.7000277930337846, "learning_rate": 1.604542711873619e-06, "loss": 0.2303, "step": 21725 }, { "epoch": 0.74557309540151, "grad_norm": 0.8929878544888769, "learning_rate": 1.6041347905835003e-06, "loss": 0.2801, "step": 21726 }, { "epoch": 0.7456074124914207, "grad_norm": 0.8697327630558259, "learning_rate": 1.6037269112453407e-06, "loss": 0.2435, "step": 21727 }, { "epoch": 0.7456417295813315, "grad_norm": 0.7984723342636381, "learning_rate": 1.6033190738641735e-06, "loss": 0.2601, "step": 21728 }, { "epoch": 0.7456760466712423, "grad_norm": 0.730906350687729, "learning_rate": 1.6029112784450379e-06, "loss": 0.2292, "step": 21729 }, { "epoch": 0.7457103637611531, "grad_norm": 0.8239916393841755, "learning_rate": 1.6025035249929754e-06, "loss": 0.2724, "step": 21730 }, { "epoch": 0.7457446808510638, "grad_norm": 0.6954581951210785, "learning_rate": 1.60209581351302e-06, "loss": 0.298, "step": 21731 }, { "epoch": 0.7457789979409746, "grad_norm": 0.8658468807542893, "learning_rate": 1.6016881440102083e-06, "loss": 0.258, "step": 21732 }, { "epoch": 0.7458133150308853, "grad_norm": 0.773547959969154, "learning_rate": 1.6012805164895783e-06, "loss": 0.2303, "step": 21733 }, { "epoch": 0.7458476321207962, "grad_norm": 0.7351400505908458, "learning_rate": 1.6008729309561643e-06, "loss": 0.3009, "step": 21734 }, { "epoch": 0.7458819492107069, "grad_norm": 0.9157915623694721, "learning_rate": 1.6004653874150028e-06, "loss": 0.2317, "step": 21735 }, { "epoch": 0.7459162663006177, "grad_norm": 0.7314798306369298, "learning_rate": 1.600057885871128e-06, "loss": 0.235, "step": 21736 }, { "epoch": 0.7459505833905284, "grad_norm": 0.7548344965338623, "learning_rate": 1.5996504263295753e-06, "loss": 0.2575, "step": 21737 }, { "epoch": 0.7459849004804393, "grad_norm": 0.7271041269759215, "learning_rate": 1.5992430087953741e-06, "loss": 0.3347, "step": 21738 }, { "epoch": 0.7460192175703501, "grad_norm": 0.7642936796229827, "learning_rate": 1.598835633273564e-06, "loss": 0.3096, "step": 21739 }, { "epoch": 0.7460535346602608, "grad_norm": 0.7953216318819143, "learning_rate": 1.5984282997691725e-06, "loss": 0.3199, "step": 21740 }, { "epoch": 0.7460878517501716, "grad_norm": 0.9259146697609124, "learning_rate": 1.598021008287234e-06, "loss": 0.2606, "step": 21741 }, { "epoch": 0.7461221688400823, "grad_norm": 0.7558393767986498, "learning_rate": 1.597613758832779e-06, "loss": 0.2794, "step": 21742 }, { "epoch": 0.7461564859299932, "grad_norm": 0.9196059750212654, "learning_rate": 1.5972065514108392e-06, "loss": 0.304, "step": 21743 }, { "epoch": 0.7461908030199039, "grad_norm": 0.8108612392870896, "learning_rate": 1.5967993860264452e-06, "loss": 0.2746, "step": 21744 }, { "epoch": 0.7462251201098147, "grad_norm": 0.6727907450969738, "learning_rate": 1.5963922626846273e-06, "loss": 0.2859, "step": 21745 }, { "epoch": 0.7462594371997254, "grad_norm": 0.8575580645690281, "learning_rate": 1.5959851813904165e-06, "loss": 0.2905, "step": 21746 }, { "epoch": 0.7462937542896363, "grad_norm": 0.7765070279351197, "learning_rate": 1.5955781421488365e-06, "loss": 0.318, "step": 21747 }, { "epoch": 0.746328071379547, "grad_norm": 0.7890444071021031, "learning_rate": 1.5951711449649227e-06, "loss": 0.239, "step": 21748 }, { "epoch": 0.7463623884694578, "grad_norm": 0.7540706343157608, "learning_rate": 1.5947641898436984e-06, "loss": 0.2737, "step": 21749 }, { "epoch": 0.7463967055593685, "grad_norm": 0.8281010888467895, "learning_rate": 1.5943572767901921e-06, "loss": 0.2528, "step": 21750 }, { "epoch": 0.7464310226492793, "grad_norm": 0.7876088241027256, "learning_rate": 1.5939504058094313e-06, "loss": 0.3059, "step": 21751 }, { "epoch": 0.7464653397391902, "grad_norm": 0.719786554664156, "learning_rate": 1.5935435769064417e-06, "loss": 0.2868, "step": 21752 }, { "epoch": 0.7464996568291009, "grad_norm": 0.7556522714432184, "learning_rate": 1.5931367900862499e-06, "loss": 0.2714, "step": 21753 }, { "epoch": 0.7465339739190117, "grad_norm": 0.8567111853851404, "learning_rate": 1.5927300453538813e-06, "loss": 0.2465, "step": 21754 }, { "epoch": 0.7465682910089224, "grad_norm": 1.0024466342361127, "learning_rate": 1.5923233427143602e-06, "loss": 0.2808, "step": 21755 }, { "epoch": 0.7466026080988332, "grad_norm": 1.0964839755514417, "learning_rate": 1.5919166821727116e-06, "loss": 0.2688, "step": 21756 }, { "epoch": 0.746636925188744, "grad_norm": 0.9427548852202748, "learning_rate": 1.5915100637339586e-06, "loss": 0.2396, "step": 21757 }, { "epoch": 0.7466712422786548, "grad_norm": 0.7489170446216681, "learning_rate": 1.591103487403125e-06, "loss": 0.2908, "step": 21758 }, { "epoch": 0.7467055593685655, "grad_norm": 0.7267961577020977, "learning_rate": 1.5906969531852333e-06, "loss": 0.2345, "step": 21759 }, { "epoch": 0.7467398764584763, "grad_norm": 0.8419478401630934, "learning_rate": 1.5902904610853075e-06, "loss": 0.295, "step": 21760 }, { "epoch": 0.7467741935483871, "grad_norm": 0.7063422841356116, "learning_rate": 1.5898840111083647e-06, "loss": 0.2405, "step": 21761 }, { "epoch": 0.7468085106382979, "grad_norm": 0.8177478601660161, "learning_rate": 1.5894776032594322e-06, "loss": 0.2396, "step": 21762 }, { "epoch": 0.7468428277282086, "grad_norm": 0.85021256035324, "learning_rate": 1.5890712375435263e-06, "loss": 0.2635, "step": 21763 }, { "epoch": 0.7468771448181194, "grad_norm": 0.8412397756265556, "learning_rate": 1.5886649139656684e-06, "loss": 0.2822, "step": 21764 }, { "epoch": 0.7469114619080301, "grad_norm": 0.7888255299699424, "learning_rate": 1.5882586325308779e-06, "loss": 0.2302, "step": 21765 }, { "epoch": 0.746945778997941, "grad_norm": 0.7661628978352746, "learning_rate": 1.5878523932441747e-06, "loss": 0.281, "step": 21766 }, { "epoch": 0.7469800960878518, "grad_norm": 0.794134914541612, "learning_rate": 1.5874461961105769e-06, "loss": 0.2652, "step": 21767 }, { "epoch": 0.7470144131777625, "grad_norm": 0.8327494683387388, "learning_rate": 1.5870400411351023e-06, "loss": 0.3261, "step": 21768 }, { "epoch": 0.7470487302676733, "grad_norm": 0.8367253244815649, "learning_rate": 1.586633928322771e-06, "loss": 0.3391, "step": 21769 }, { "epoch": 0.7470830473575841, "grad_norm": 0.858804331222664, "learning_rate": 1.5862278576785944e-06, "loss": 0.2348, "step": 21770 }, { "epoch": 0.7471173644474949, "grad_norm": 0.7881558476064001, "learning_rate": 1.5858218292075961e-06, "loss": 0.2564, "step": 21771 }, { "epoch": 0.7471516815374056, "grad_norm": 0.7563712196445604, "learning_rate": 1.5854158429147864e-06, "loss": 0.2282, "step": 21772 }, { "epoch": 0.7471859986273164, "grad_norm": 0.7569816736998267, "learning_rate": 1.5850098988051815e-06, "loss": 0.267, "step": 21773 }, { "epoch": 0.7472203157172271, "grad_norm": 0.7748112710990156, "learning_rate": 1.584603996883801e-06, "loss": 0.289, "step": 21774 }, { "epoch": 0.747254632807138, "grad_norm": 0.7210577422246575, "learning_rate": 1.5841981371556547e-06, "loss": 0.2752, "step": 21775 }, { "epoch": 0.7472889498970487, "grad_norm": 0.7721344221171418, "learning_rate": 1.5837923196257583e-06, "loss": 0.2931, "step": 21776 }, { "epoch": 0.7473232669869595, "grad_norm": 0.6697756132507512, "learning_rate": 1.5833865442991247e-06, "loss": 0.2626, "step": 21777 }, { "epoch": 0.7473575840768703, "grad_norm": 0.7249885382778514, "learning_rate": 1.5829808111807664e-06, "loss": 0.2604, "step": 21778 }, { "epoch": 0.747391901166781, "grad_norm": 0.754171444179714, "learning_rate": 1.5825751202756968e-06, "loss": 0.2471, "step": 21779 }, { "epoch": 0.7474262182566919, "grad_norm": 0.7567396994338944, "learning_rate": 1.5821694715889274e-06, "loss": 0.2635, "step": 21780 }, { "epoch": 0.7474605353466026, "grad_norm": 0.756428175895949, "learning_rate": 1.5817638651254707e-06, "loss": 0.2855, "step": 21781 }, { "epoch": 0.7474948524365134, "grad_norm": 0.7652122965470789, "learning_rate": 1.5813583008903332e-06, "loss": 0.2321, "step": 21782 }, { "epoch": 0.7475291695264241, "grad_norm": 0.7259783003249155, "learning_rate": 1.580952778888531e-06, "loss": 0.2532, "step": 21783 }, { "epoch": 0.747563486616335, "grad_norm": 0.7425264486310799, "learning_rate": 1.5805472991250698e-06, "loss": 0.2118, "step": 21784 }, { "epoch": 0.7475978037062457, "grad_norm": 0.6942536872404709, "learning_rate": 1.5801418616049597e-06, "loss": 0.2305, "step": 21785 }, { "epoch": 0.7476321207961565, "grad_norm": 0.7565155538868463, "learning_rate": 1.5797364663332099e-06, "loss": 0.2368, "step": 21786 }, { "epoch": 0.7476664378860672, "grad_norm": 0.7980222507197988, "learning_rate": 1.5793311133148281e-06, "loss": 0.2516, "step": 21787 }, { "epoch": 0.747700754975978, "grad_norm": 0.7937154069389585, "learning_rate": 1.5789258025548227e-06, "loss": 0.2209, "step": 21788 }, { "epoch": 0.7477350720658889, "grad_norm": 0.8640746837506375, "learning_rate": 1.5785205340582e-06, "loss": 0.2807, "step": 21789 }, { "epoch": 0.7477693891557996, "grad_norm": 0.7854377897449754, "learning_rate": 1.5781153078299677e-06, "loss": 0.2768, "step": 21790 }, { "epoch": 0.7478037062457104, "grad_norm": 0.7219384752366171, "learning_rate": 1.5777101238751309e-06, "loss": 0.2677, "step": 21791 }, { "epoch": 0.7478380233356211, "grad_norm": 0.7106354337276425, "learning_rate": 1.577304982198697e-06, "loss": 0.2585, "step": 21792 }, { "epoch": 0.747872340425532, "grad_norm": 0.791860836903054, "learning_rate": 1.5768998828056665e-06, "loss": 0.2606, "step": 21793 }, { "epoch": 0.7479066575154427, "grad_norm": 0.8392666255337718, "learning_rate": 1.5764948257010498e-06, "loss": 0.2876, "step": 21794 }, { "epoch": 0.7479409746053535, "grad_norm": 0.8142842340123463, "learning_rate": 1.576089810889847e-06, "loss": 0.2923, "step": 21795 }, { "epoch": 0.7479752916952642, "grad_norm": 0.7627059964513278, "learning_rate": 1.5756848383770606e-06, "loss": 0.2175, "step": 21796 }, { "epoch": 0.748009608785175, "grad_norm": 0.919069013687368, "learning_rate": 1.5752799081676995e-06, "loss": 0.2864, "step": 21797 }, { "epoch": 0.7480439258750858, "grad_norm": 0.8001512870278011, "learning_rate": 1.57487502026676e-06, "loss": 0.292, "step": 21798 }, { "epoch": 0.7480782429649966, "grad_norm": 0.8856440669937055, "learning_rate": 1.5744701746792469e-06, "loss": 0.2785, "step": 21799 }, { "epoch": 0.7481125600549073, "grad_norm": 0.7519965631730858, "learning_rate": 1.574065371410161e-06, "loss": 0.2691, "step": 21800 }, { "epoch": 0.7481468771448181, "grad_norm": 0.7784481850085241, "learning_rate": 1.573660610464503e-06, "loss": 0.3101, "step": 21801 }, { "epoch": 0.7481811942347288, "grad_norm": 0.7238850981852464, "learning_rate": 1.5732558918472735e-06, "loss": 0.2842, "step": 21802 }, { "epoch": 0.7482155113246397, "grad_norm": 0.7519260494297386, "learning_rate": 1.5728512155634723e-06, "loss": 0.2143, "step": 21803 }, { "epoch": 0.7482498284145505, "grad_norm": 0.7735287574374802, "learning_rate": 1.5724465816181001e-06, "loss": 0.233, "step": 21804 }, { "epoch": 0.7482841455044612, "grad_norm": 0.8646328373588607, "learning_rate": 1.572041990016151e-06, "loss": 0.29, "step": 21805 }, { "epoch": 0.748318462594372, "grad_norm": 0.8723840145073604, "learning_rate": 1.5716374407626301e-06, "loss": 0.225, "step": 21806 }, { "epoch": 0.7483527796842828, "grad_norm": 0.897151165558516, "learning_rate": 1.5712329338625292e-06, "loss": 0.3114, "step": 21807 }, { "epoch": 0.7483870967741936, "grad_norm": 0.914485552520908, "learning_rate": 1.5708284693208486e-06, "loss": 0.2837, "step": 21808 }, { "epoch": 0.7484214138641043, "grad_norm": 0.7966232615066825, "learning_rate": 1.5704240471425835e-06, "loss": 0.2457, "step": 21809 }, { "epoch": 0.7484557309540151, "grad_norm": 0.7403872228179452, "learning_rate": 1.5700196673327318e-06, "loss": 0.2314, "step": 21810 }, { "epoch": 0.7484900480439258, "grad_norm": 0.7804282785320611, "learning_rate": 1.5696153298962875e-06, "loss": 0.2917, "step": 21811 }, { "epoch": 0.7485243651338367, "grad_norm": 0.856641792821368, "learning_rate": 1.5692110348382462e-06, "loss": 0.2589, "step": 21812 }, { "epoch": 0.7485586822237474, "grad_norm": 1.0345246862954378, "learning_rate": 1.5688067821636044e-06, "loss": 0.3293, "step": 21813 }, { "epoch": 0.7485929993136582, "grad_norm": 0.8346463469429354, "learning_rate": 1.5684025718773516e-06, "loss": 0.2422, "step": 21814 }, { "epoch": 0.7486273164035689, "grad_norm": 0.7347588281096817, "learning_rate": 1.567998403984487e-06, "loss": 0.2466, "step": 21815 }, { "epoch": 0.7486616334934798, "grad_norm": 2.3141714639837616, "learning_rate": 1.5675942784899972e-06, "loss": 0.2461, "step": 21816 }, { "epoch": 0.7486959505833906, "grad_norm": 0.6960388413404587, "learning_rate": 1.5671901953988804e-06, "loss": 0.2449, "step": 21817 }, { "epoch": 0.7487302676733013, "grad_norm": 0.7914196098287438, "learning_rate": 1.5667861547161284e-06, "loss": 0.2467, "step": 21818 }, { "epoch": 0.7487645847632121, "grad_norm": 0.8073054227239276, "learning_rate": 1.5663821564467269e-06, "loss": 0.2992, "step": 21819 }, { "epoch": 0.7487989018531228, "grad_norm": 0.6807633586545655, "learning_rate": 1.5659782005956743e-06, "loss": 0.2178, "step": 21820 }, { "epoch": 0.7488332189430337, "grad_norm": 0.7506932478085141, "learning_rate": 1.5655742871679553e-06, "loss": 0.2425, "step": 21821 }, { "epoch": 0.7488675360329444, "grad_norm": 0.739465932851273, "learning_rate": 1.5651704161685622e-06, "loss": 0.2592, "step": 21822 }, { "epoch": 0.7489018531228552, "grad_norm": 0.732087691992708, "learning_rate": 1.5647665876024836e-06, "loss": 0.2684, "step": 21823 }, { "epoch": 0.7489361702127659, "grad_norm": 0.7484213578235703, "learning_rate": 1.5643628014747093e-06, "loss": 0.2444, "step": 21824 }, { "epoch": 0.7489704873026767, "grad_norm": 0.7345820458690869, "learning_rate": 1.5639590577902265e-06, "loss": 0.253, "step": 21825 }, { "epoch": 0.7490048043925875, "grad_norm": 0.9232440826622453, "learning_rate": 1.563555356554024e-06, "loss": 0.2586, "step": 21826 }, { "epoch": 0.7490391214824983, "grad_norm": 0.7782886544850022, "learning_rate": 1.5631516977710898e-06, "loss": 0.2679, "step": 21827 }, { "epoch": 0.749073438572409, "grad_norm": 0.7958930547898847, "learning_rate": 1.5627480814464064e-06, "loss": 0.2739, "step": 21828 }, { "epoch": 0.7491077556623198, "grad_norm": 0.7831634833411332, "learning_rate": 1.5623445075849659e-06, "loss": 0.268, "step": 21829 }, { "epoch": 0.7491420727522307, "grad_norm": 0.812822779098682, "learning_rate": 1.5619409761917499e-06, "loss": 0.2909, "step": 21830 }, { "epoch": 0.7491763898421414, "grad_norm": 0.8161422211517746, "learning_rate": 1.5615374872717442e-06, "loss": 0.3068, "step": 21831 }, { "epoch": 0.7492107069320522, "grad_norm": 0.8669841011196723, "learning_rate": 1.561134040829934e-06, "loss": 0.3216, "step": 21832 }, { "epoch": 0.7492450240219629, "grad_norm": 0.8515766464181536, "learning_rate": 1.5607306368713037e-06, "loss": 0.303, "step": 21833 }, { "epoch": 0.7492793411118737, "grad_norm": 0.814709649784033, "learning_rate": 1.560327275400837e-06, "loss": 0.2645, "step": 21834 }, { "epoch": 0.7493136582017845, "grad_norm": 0.7306966292828454, "learning_rate": 1.5599239564235158e-06, "loss": 0.2549, "step": 21835 }, { "epoch": 0.7493479752916953, "grad_norm": 0.8255430739930857, "learning_rate": 1.5595206799443253e-06, "loss": 0.2722, "step": 21836 }, { "epoch": 0.749382292381606, "grad_norm": 0.8799551296804026, "learning_rate": 1.5591174459682424e-06, "loss": 0.2773, "step": 21837 }, { "epoch": 0.7494166094715168, "grad_norm": 0.8462347439644974, "learning_rate": 1.5587142545002554e-06, "loss": 0.2537, "step": 21838 }, { "epoch": 0.7494509265614276, "grad_norm": 0.7356016231699346, "learning_rate": 1.5583111055453392e-06, "loss": 0.2688, "step": 21839 }, { "epoch": 0.7494852436513384, "grad_norm": 0.8404885938027729, "learning_rate": 1.5579079991084755e-06, "loss": 0.3056, "step": 21840 }, { "epoch": 0.7495195607412491, "grad_norm": 0.7773172640111355, "learning_rate": 1.5575049351946487e-06, "loss": 0.2695, "step": 21841 }, { "epoch": 0.7495538778311599, "grad_norm": 0.794359377075833, "learning_rate": 1.5571019138088334e-06, "loss": 0.2431, "step": 21842 }, { "epoch": 0.7495881949210706, "grad_norm": 0.7785436631055008, "learning_rate": 1.556698934956009e-06, "loss": 0.2472, "step": 21843 }, { "epoch": 0.7496225120109815, "grad_norm": 0.9050267194319626, "learning_rate": 1.5562959986411558e-06, "loss": 0.3039, "step": 21844 }, { "epoch": 0.7496568291008923, "grad_norm": 0.7295449472948022, "learning_rate": 1.55589310486925e-06, "loss": 0.2461, "step": 21845 }, { "epoch": 0.749691146190803, "grad_norm": 0.8079155638011106, "learning_rate": 1.5554902536452698e-06, "loss": 0.2415, "step": 21846 }, { "epoch": 0.7497254632807138, "grad_norm": 0.8191696278107923, "learning_rate": 1.5550874449741916e-06, "loss": 0.2716, "step": 21847 }, { "epoch": 0.7497597803706245, "grad_norm": 0.647389179633327, "learning_rate": 1.5546846788609914e-06, "loss": 0.2088, "step": 21848 }, { "epoch": 0.7497940974605354, "grad_norm": 0.7996575771854362, "learning_rate": 1.5542819553106453e-06, "loss": 0.3155, "step": 21849 }, { "epoch": 0.7498284145504461, "grad_norm": 0.7399308681448138, "learning_rate": 1.5538792743281294e-06, "loss": 0.2356, "step": 21850 }, { "epoch": 0.7498627316403569, "grad_norm": 0.7116924531317139, "learning_rate": 1.5534766359184144e-06, "loss": 0.2436, "step": 21851 }, { "epoch": 0.7498970487302676, "grad_norm": 0.7791382999176172, "learning_rate": 1.5530740400864809e-06, "loss": 0.2728, "step": 21852 }, { "epoch": 0.7499313658201785, "grad_norm": 0.6972792586441702, "learning_rate": 1.5526714868372972e-06, "loss": 0.2984, "step": 21853 }, { "epoch": 0.7499656829100892, "grad_norm": 0.7195665455760301, "learning_rate": 1.552268976175838e-06, "loss": 0.2494, "step": 21854 }, { "epoch": 0.75, "grad_norm": 0.8026957970524471, "learning_rate": 1.5518665081070755e-06, "loss": 0.2751, "step": 21855 }, { "epoch": 0.7500343170899108, "grad_norm": 0.7445072945598363, "learning_rate": 1.5514640826359829e-06, "loss": 0.2666, "step": 21856 }, { "epoch": 0.7500686341798215, "grad_norm": 0.7651529310284864, "learning_rate": 1.5510616997675304e-06, "loss": 0.22, "step": 21857 }, { "epoch": 0.7501029512697324, "grad_norm": 0.8552172974930775, "learning_rate": 1.5506593595066893e-06, "loss": 0.2805, "step": 21858 }, { "epoch": 0.7501372683596431, "grad_norm": 0.6518508335031744, "learning_rate": 1.5502570618584327e-06, "loss": 0.1892, "step": 21859 }, { "epoch": 0.7501715854495539, "grad_norm": 0.7211553811336524, "learning_rate": 1.5498548068277241e-06, "loss": 0.2222, "step": 21860 }, { "epoch": 0.7502059025394646, "grad_norm": 0.7577140422753478, "learning_rate": 1.5494525944195404e-06, "loss": 0.2621, "step": 21861 }, { "epoch": 0.7502402196293755, "grad_norm": 0.7872486939510961, "learning_rate": 1.5490504246388455e-06, "loss": 0.2555, "step": 21862 }, { "epoch": 0.7502745367192862, "grad_norm": 0.9200196198936565, "learning_rate": 1.5486482974906075e-06, "loss": 0.3028, "step": 21863 }, { "epoch": 0.750308853809197, "grad_norm": 0.9040617233801148, "learning_rate": 1.5482462129797992e-06, "loss": 0.2419, "step": 21864 }, { "epoch": 0.7503431708991077, "grad_norm": 0.7425819312287429, "learning_rate": 1.5478441711113828e-06, "loss": 0.232, "step": 21865 }, { "epoch": 0.7503774879890185, "grad_norm": 0.7864579031177305, "learning_rate": 1.5474421718903271e-06, "loss": 0.266, "step": 21866 }, { "epoch": 0.7504118050789294, "grad_norm": 0.7285192559277069, "learning_rate": 1.547040215321598e-06, "loss": 0.2717, "step": 21867 }, { "epoch": 0.7504461221688401, "grad_norm": 0.7661907122766652, "learning_rate": 1.546638301410161e-06, "loss": 0.2531, "step": 21868 }, { "epoch": 0.7504804392587509, "grad_norm": 0.8053039833991281, "learning_rate": 1.5462364301609817e-06, "loss": 0.2286, "step": 21869 }, { "epoch": 0.7505147563486616, "grad_norm": 0.7685138822443521, "learning_rate": 1.5458346015790242e-06, "loss": 0.3485, "step": 21870 }, { "epoch": 0.7505490734385724, "grad_norm": 0.7141054984327284, "learning_rate": 1.5454328156692555e-06, "loss": 0.2376, "step": 21871 }, { "epoch": 0.7505833905284832, "grad_norm": 0.8660182147368414, "learning_rate": 1.5450310724366329e-06, "loss": 0.2535, "step": 21872 }, { "epoch": 0.750617707618394, "grad_norm": 0.8001483785146224, "learning_rate": 1.5446293718861266e-06, "loss": 0.2815, "step": 21873 }, { "epoch": 0.7506520247083047, "grad_norm": 0.6916737403918155, "learning_rate": 1.5442277140226947e-06, "loss": 0.2025, "step": 21874 }, { "epoch": 0.7506863417982155, "grad_norm": 0.654400488038289, "learning_rate": 1.5438260988512998e-06, "loss": 0.2799, "step": 21875 }, { "epoch": 0.7507206588881263, "grad_norm": 0.7643302316112224, "learning_rate": 1.5434245263769044e-06, "loss": 0.2518, "step": 21876 }, { "epoch": 0.7507549759780371, "grad_norm": 0.7795679596429194, "learning_rate": 1.5430229966044686e-06, "loss": 0.2363, "step": 21877 }, { "epoch": 0.7507892930679478, "grad_norm": 0.7678878999012778, "learning_rate": 1.542621509538953e-06, "loss": 0.243, "step": 21878 }, { "epoch": 0.7508236101578586, "grad_norm": 0.9123167266933485, "learning_rate": 1.5422200651853182e-06, "loss": 0.2295, "step": 21879 }, { "epoch": 0.7508579272477693, "grad_norm": 0.7278527690324489, "learning_rate": 1.5418186635485227e-06, "loss": 0.2322, "step": 21880 }, { "epoch": 0.7508922443376802, "grad_norm": 0.7896256318939332, "learning_rate": 1.5414173046335263e-06, "loss": 0.2854, "step": 21881 }, { "epoch": 0.750926561427591, "grad_norm": 0.8594161192339341, "learning_rate": 1.5410159884452879e-06, "loss": 0.245, "step": 21882 }, { "epoch": 0.7509608785175017, "grad_norm": 0.7972136625908266, "learning_rate": 1.5406147149887612e-06, "loss": 0.2667, "step": 21883 }, { "epoch": 0.7509951956074125, "grad_norm": 0.8659596068326509, "learning_rate": 1.540213484268908e-06, "loss": 0.2256, "step": 21884 }, { "epoch": 0.7510295126973233, "grad_norm": 0.7538945029901561, "learning_rate": 1.5398122962906854e-06, "loss": 0.2241, "step": 21885 }, { "epoch": 0.7510638297872341, "grad_norm": 0.8128110315169406, "learning_rate": 1.5394111510590443e-06, "loss": 0.2789, "step": 21886 }, { "epoch": 0.7510981468771448, "grad_norm": 0.7150970793657364, "learning_rate": 1.5390100485789473e-06, "loss": 0.2417, "step": 21887 }, { "epoch": 0.7511324639670556, "grad_norm": 0.7149915151266453, "learning_rate": 1.5386089888553436e-06, "loss": 0.2257, "step": 21888 }, { "epoch": 0.7511667810569663, "grad_norm": 0.7551561675484623, "learning_rate": 1.538207971893191e-06, "loss": 0.2593, "step": 21889 }, { "epoch": 0.7512010981468772, "grad_norm": 0.8540324462509775, "learning_rate": 1.5378069976974425e-06, "loss": 0.3056, "step": 21890 }, { "epoch": 0.7512354152367879, "grad_norm": 0.7413599193843377, "learning_rate": 1.5374060662730517e-06, "loss": 0.2374, "step": 21891 }, { "epoch": 0.7512697323266987, "grad_norm": 0.7964242323873103, "learning_rate": 1.5370051776249728e-06, "loss": 0.2522, "step": 21892 }, { "epoch": 0.7513040494166094, "grad_norm": 0.7495159853037223, "learning_rate": 1.536604331758157e-06, "loss": 0.2562, "step": 21893 }, { "epoch": 0.7513383665065202, "grad_norm": 0.7475997769877077, "learning_rate": 1.5362035286775578e-06, "loss": 0.2259, "step": 21894 }, { "epoch": 0.7513726835964311, "grad_norm": 0.8074087948392606, "learning_rate": 1.5358027683881233e-06, "loss": 0.32, "step": 21895 }, { "epoch": 0.7514070006863418, "grad_norm": 0.8060653468674054, "learning_rate": 1.5354020508948098e-06, "loss": 0.2688, "step": 21896 }, { "epoch": 0.7514413177762526, "grad_norm": 0.7956870697482138, "learning_rate": 1.5350013762025622e-06, "loss": 0.2688, "step": 21897 }, { "epoch": 0.7514756348661633, "grad_norm": 0.7909248981205186, "learning_rate": 1.5346007443163336e-06, "loss": 0.2946, "step": 21898 }, { "epoch": 0.7515099519560742, "grad_norm": 0.7427536676943518, "learning_rate": 1.5342001552410725e-06, "loss": 0.2921, "step": 21899 }, { "epoch": 0.7515442690459849, "grad_norm": 0.7948727686727575, "learning_rate": 1.5337996089817276e-06, "loss": 0.2785, "step": 21900 }, { "epoch": 0.7515785861358957, "grad_norm": 0.8571001631026107, "learning_rate": 1.5333991055432474e-06, "loss": 0.2571, "step": 21901 }, { "epoch": 0.7516129032258064, "grad_norm": 0.7008939495366612, "learning_rate": 1.5329986449305795e-06, "loss": 0.2516, "step": 21902 }, { "epoch": 0.7516472203157172, "grad_norm": 0.7352725193747586, "learning_rate": 1.5325982271486729e-06, "loss": 0.2654, "step": 21903 }, { "epoch": 0.751681537405628, "grad_norm": 0.6833609474275687, "learning_rate": 1.5321978522024694e-06, "loss": 0.2752, "step": 21904 }, { "epoch": 0.7517158544955388, "grad_norm": 0.8146078797932277, "learning_rate": 1.5317975200969215e-06, "loss": 0.271, "step": 21905 }, { "epoch": 0.7517501715854495, "grad_norm": 0.8364818193832024, "learning_rate": 1.5313972308369702e-06, "loss": 0.3072, "step": 21906 }, { "epoch": 0.7517844886753603, "grad_norm": 0.8171152519042489, "learning_rate": 1.53099698442756e-06, "loss": 0.2515, "step": 21907 }, { "epoch": 0.7518188057652712, "grad_norm": 0.7899141625831623, "learning_rate": 1.5305967808736416e-06, "loss": 0.3117, "step": 21908 }, { "epoch": 0.7518531228551819, "grad_norm": 0.8361349519073494, "learning_rate": 1.5301966201801533e-06, "loss": 0.2619, "step": 21909 }, { "epoch": 0.7518874399450927, "grad_norm": 0.7939597322557506, "learning_rate": 1.52979650235204e-06, "loss": 0.2581, "step": 21910 }, { "epoch": 0.7519217570350034, "grad_norm": 0.7659726445732925, "learning_rate": 1.5293964273942451e-06, "loss": 0.2469, "step": 21911 }, { "epoch": 0.7519560741249142, "grad_norm": 0.8351476685829938, "learning_rate": 1.528996395311711e-06, "loss": 0.2981, "step": 21912 }, { "epoch": 0.751990391214825, "grad_norm": 0.7234206777430219, "learning_rate": 1.52859640610938e-06, "loss": 0.2592, "step": 21913 }, { "epoch": 0.7520247083047358, "grad_norm": 0.8020303342759141, "learning_rate": 1.5281964597921929e-06, "loss": 0.285, "step": 21914 }, { "epoch": 0.7520590253946465, "grad_norm": 0.8623771665554505, "learning_rate": 1.5277965563650905e-06, "loss": 0.2583, "step": 21915 }, { "epoch": 0.7520933424845573, "grad_norm": 0.7882331650010855, "learning_rate": 1.5273966958330139e-06, "loss": 0.2507, "step": 21916 }, { "epoch": 0.752127659574468, "grad_norm": 0.7717677740346117, "learning_rate": 1.5269968782009038e-06, "loss": 0.2301, "step": 21917 }, { "epoch": 0.7521619766643789, "grad_norm": 0.8032097642633828, "learning_rate": 1.5265971034736948e-06, "loss": 0.2374, "step": 21918 }, { "epoch": 0.7521962937542896, "grad_norm": 0.832504648237397, "learning_rate": 1.5261973716563328e-06, "loss": 0.2581, "step": 21919 }, { "epoch": 0.7522306108442004, "grad_norm": 0.8553465727555031, "learning_rate": 1.5257976827537502e-06, "loss": 0.2802, "step": 21920 }, { "epoch": 0.7522649279341111, "grad_norm": 1.030830652596115, "learning_rate": 1.5253980367708865e-06, "loss": 0.3, "step": 21921 }, { "epoch": 0.752299245024022, "grad_norm": 0.751703978024787, "learning_rate": 1.5249984337126795e-06, "loss": 0.3039, "step": 21922 }, { "epoch": 0.7523335621139328, "grad_norm": 0.789013155706994, "learning_rate": 1.524598873584065e-06, "loss": 0.2413, "step": 21923 }, { "epoch": 0.7523678792038435, "grad_norm": 0.6838642835202435, "learning_rate": 1.5241993563899798e-06, "loss": 0.1911, "step": 21924 }, { "epoch": 0.7524021962937543, "grad_norm": 0.7129986378964214, "learning_rate": 1.523799882135359e-06, "loss": 0.2664, "step": 21925 }, { "epoch": 0.752436513383665, "grad_norm": 0.8581414189804133, "learning_rate": 1.5234004508251393e-06, "loss": 0.357, "step": 21926 }, { "epoch": 0.7524708304735759, "grad_norm": 0.9166714738591994, "learning_rate": 1.5230010624642504e-06, "loss": 0.2821, "step": 21927 }, { "epoch": 0.7525051475634866, "grad_norm": 0.8442352521087767, "learning_rate": 1.5226017170576317e-06, "loss": 0.2668, "step": 21928 }, { "epoch": 0.7525394646533974, "grad_norm": 0.8858465323316544, "learning_rate": 1.522202414610216e-06, "loss": 0.2623, "step": 21929 }, { "epoch": 0.7525737817433081, "grad_norm": 0.7205242073673918, "learning_rate": 1.5218031551269314e-06, "loss": 0.2252, "step": 21930 }, { "epoch": 0.752608098833219, "grad_norm": 0.7597318397714448, "learning_rate": 1.5214039386127166e-06, "loss": 0.251, "step": 21931 }, { "epoch": 0.7526424159231297, "grad_norm": 0.7541540549660675, "learning_rate": 1.521004765072499e-06, "loss": 0.2404, "step": 21932 }, { "epoch": 0.7526767330130405, "grad_norm": 0.834363427049907, "learning_rate": 1.520605634511212e-06, "loss": 0.2696, "step": 21933 }, { "epoch": 0.7527110501029513, "grad_norm": 0.8026875238857252, "learning_rate": 1.520206546933785e-06, "loss": 0.2588, "step": 21934 }, { "epoch": 0.752745367192862, "grad_norm": 0.7775581996966685, "learning_rate": 1.5198075023451498e-06, "loss": 0.2331, "step": 21935 }, { "epoch": 0.7527796842827729, "grad_norm": 0.74881450073838, "learning_rate": 1.5194085007502352e-06, "loss": 0.2413, "step": 21936 }, { "epoch": 0.7528140013726836, "grad_norm": 0.7467376752996447, "learning_rate": 1.5190095421539701e-06, "loss": 0.2625, "step": 21937 }, { "epoch": 0.7528483184625944, "grad_norm": 0.7405861422853923, "learning_rate": 1.5186106265612855e-06, "loss": 0.2652, "step": 21938 }, { "epoch": 0.7528826355525051, "grad_norm": 0.8261376734394735, "learning_rate": 1.5182117539771047e-06, "loss": 0.2435, "step": 21939 }, { "epoch": 0.7529169526424159, "grad_norm": 0.7903238764661807, "learning_rate": 1.5178129244063617e-06, "loss": 0.2631, "step": 21940 }, { "epoch": 0.7529512697323267, "grad_norm": 0.7863700134713788, "learning_rate": 1.517414137853978e-06, "loss": 0.2463, "step": 21941 }, { "epoch": 0.7529855868222375, "grad_norm": 0.8092210660174725, "learning_rate": 1.5170153943248829e-06, "loss": 0.3281, "step": 21942 }, { "epoch": 0.7530199039121482, "grad_norm": 0.8362297205406045, "learning_rate": 1.5166166938240012e-06, "loss": 0.3306, "step": 21943 }, { "epoch": 0.753054221002059, "grad_norm": 0.7728992296601662, "learning_rate": 1.5162180363562595e-06, "loss": 0.2956, "step": 21944 }, { "epoch": 0.7530885380919699, "grad_norm": 0.8073794571864875, "learning_rate": 1.515819421926582e-06, "loss": 0.2476, "step": 21945 }, { "epoch": 0.7531228551818806, "grad_norm": 1.0562780005904717, "learning_rate": 1.5154208505398937e-06, "loss": 0.2393, "step": 21946 }, { "epoch": 0.7531571722717914, "grad_norm": 0.7461770411873929, "learning_rate": 1.5150223222011178e-06, "loss": 0.2508, "step": 21947 }, { "epoch": 0.7531914893617021, "grad_norm": 0.7754078129812915, "learning_rate": 1.5146238369151784e-06, "loss": 0.226, "step": 21948 }, { "epoch": 0.7532258064516129, "grad_norm": 0.7033345601064404, "learning_rate": 1.5142253946869988e-06, "loss": 0.2833, "step": 21949 }, { "epoch": 0.7532601235415237, "grad_norm": 0.7035597496796785, "learning_rate": 1.5138269955214979e-06, "loss": 0.2338, "step": 21950 }, { "epoch": 0.7532944406314345, "grad_norm": 0.8314181503687373, "learning_rate": 1.5134286394236014e-06, "loss": 0.2533, "step": 21951 }, { "epoch": 0.7533287577213452, "grad_norm": 0.786610486287099, "learning_rate": 1.513030326398231e-06, "loss": 0.2605, "step": 21952 }, { "epoch": 0.753363074811256, "grad_norm": 0.8883794762533489, "learning_rate": 1.5126320564503022e-06, "loss": 0.2447, "step": 21953 }, { "epoch": 0.7533973919011668, "grad_norm": 0.7071254790010837, "learning_rate": 1.5122338295847417e-06, "loss": 0.2344, "step": 21954 }, { "epoch": 0.7534317089910776, "grad_norm": 0.7840635442759804, "learning_rate": 1.5118356458064638e-06, "loss": 0.2368, "step": 21955 }, { "epoch": 0.7534660260809883, "grad_norm": 0.8077884052001569, "learning_rate": 1.51143750512039e-06, "loss": 0.2654, "step": 21956 }, { "epoch": 0.7535003431708991, "grad_norm": 0.784644554290788, "learning_rate": 1.5110394075314388e-06, "loss": 0.2797, "step": 21957 }, { "epoch": 0.7535346602608098, "grad_norm": 0.7358329830718398, "learning_rate": 1.5106413530445274e-06, "loss": 0.2555, "step": 21958 }, { "epoch": 0.7535689773507207, "grad_norm": 0.9040146722456301, "learning_rate": 1.5102433416645746e-06, "loss": 0.3231, "step": 21959 }, { "epoch": 0.7536032944406315, "grad_norm": 0.806153241504705, "learning_rate": 1.5098453733964962e-06, "loss": 0.2939, "step": 21960 }, { "epoch": 0.7536376115305422, "grad_norm": 0.8812922373070208, "learning_rate": 1.509447448245211e-06, "loss": 0.2877, "step": 21961 }, { "epoch": 0.753671928620453, "grad_norm": 0.81262293151251, "learning_rate": 1.5090495662156295e-06, "loss": 0.2927, "step": 21962 }, { "epoch": 0.7537062457103637, "grad_norm": 0.720851266668938, "learning_rate": 1.508651727312674e-06, "loss": 0.2749, "step": 21963 }, { "epoch": 0.7537405628002746, "grad_norm": 0.7975746193178926, "learning_rate": 1.5082539315412537e-06, "loss": 0.2675, "step": 21964 }, { "epoch": 0.7537748798901853, "grad_norm": 0.843641055138819, "learning_rate": 1.5078561789062857e-06, "loss": 0.2169, "step": 21965 }, { "epoch": 0.7538091969800961, "grad_norm": 0.7719026218837355, "learning_rate": 1.5074584694126827e-06, "loss": 0.322, "step": 21966 }, { "epoch": 0.7538435140700068, "grad_norm": 0.7879594899818775, "learning_rate": 1.507060803065359e-06, "loss": 0.3213, "step": 21967 }, { "epoch": 0.7538778311599177, "grad_norm": 0.8505229772345654, "learning_rate": 1.5066631798692261e-06, "loss": 0.2896, "step": 21968 }, { "epoch": 0.7539121482498284, "grad_norm": 0.8578860551895419, "learning_rate": 1.5062655998291969e-06, "loss": 0.3082, "step": 21969 }, { "epoch": 0.7539464653397392, "grad_norm": 0.8789220046248868, "learning_rate": 1.5058680629501849e-06, "loss": 0.2382, "step": 21970 }, { "epoch": 0.7539807824296499, "grad_norm": 0.7997140632032498, "learning_rate": 1.5054705692370958e-06, "loss": 0.2515, "step": 21971 }, { "epoch": 0.7540150995195607, "grad_norm": 0.7714416696114491, "learning_rate": 1.5050731186948454e-06, "loss": 0.2756, "step": 21972 }, { "epoch": 0.7540494166094716, "grad_norm": 0.6753621877259725, "learning_rate": 1.5046757113283416e-06, "loss": 0.2452, "step": 21973 }, { "epoch": 0.7540837336993823, "grad_norm": 0.7838683615993779, "learning_rate": 1.5042783471424943e-06, "loss": 0.313, "step": 21974 }, { "epoch": 0.7541180507892931, "grad_norm": 0.7307671171320754, "learning_rate": 1.5038810261422144e-06, "loss": 0.3002, "step": 21975 }, { "epoch": 0.7541523678792038, "grad_norm": 0.7248349472964788, "learning_rate": 1.5034837483324043e-06, "loss": 0.2295, "step": 21976 }, { "epoch": 0.7541866849691147, "grad_norm": 0.7135299377175361, "learning_rate": 1.50308651371798e-06, "loss": 0.2948, "step": 21977 }, { "epoch": 0.7542210020590254, "grad_norm": 0.7781557928765637, "learning_rate": 1.502689322303843e-06, "loss": 0.2901, "step": 21978 }, { "epoch": 0.7542553191489362, "grad_norm": 0.7664902069174324, "learning_rate": 1.5022921740949016e-06, "loss": 0.2948, "step": 21979 }, { "epoch": 0.7542896362388469, "grad_norm": 0.783133279900643, "learning_rate": 1.5018950690960626e-06, "loss": 0.3246, "step": 21980 }, { "epoch": 0.7543239533287577, "grad_norm": 0.8133404167485901, "learning_rate": 1.5014980073122314e-06, "loss": 0.2637, "step": 21981 }, { "epoch": 0.7543582704186685, "grad_norm": 0.8198932974925621, "learning_rate": 1.5011009887483141e-06, "loss": 0.2679, "step": 21982 }, { "epoch": 0.7543925875085793, "grad_norm": 0.6841752182551204, "learning_rate": 1.5007040134092143e-06, "loss": 0.2598, "step": 21983 }, { "epoch": 0.75442690459849, "grad_norm": 0.7877634718263369, "learning_rate": 1.5003070812998382e-06, "loss": 0.351, "step": 21984 }, { "epoch": 0.7544612216884008, "grad_norm": 0.7302590785447627, "learning_rate": 1.4999101924250853e-06, "loss": 0.2849, "step": 21985 }, { "epoch": 0.7544955387783115, "grad_norm": 0.9571483870189034, "learning_rate": 1.4995133467898636e-06, "loss": 0.2326, "step": 21986 }, { "epoch": 0.7545298558682224, "grad_norm": 0.848364102788641, "learning_rate": 1.4991165443990724e-06, "loss": 0.2537, "step": 21987 }, { "epoch": 0.7545641729581332, "grad_norm": 0.7550345424724423, "learning_rate": 1.498719785257614e-06, "loss": 0.2275, "step": 21988 }, { "epoch": 0.7545984900480439, "grad_norm": 0.7900898189451523, "learning_rate": 1.4983230693703915e-06, "loss": 0.2715, "step": 21989 }, { "epoch": 0.7546328071379547, "grad_norm": 0.8341912257014881, "learning_rate": 1.497926396742304e-06, "loss": 0.2934, "step": 21990 }, { "epoch": 0.7546671242278655, "grad_norm": 0.7635631521393808, "learning_rate": 1.4975297673782535e-06, "loss": 0.2697, "step": 21991 }, { "epoch": 0.7547014413177763, "grad_norm": 0.6810226147836661, "learning_rate": 1.4971331812831386e-06, "loss": 0.2941, "step": 21992 }, { "epoch": 0.754735758407687, "grad_norm": 0.7435181420327592, "learning_rate": 1.4967366384618614e-06, "loss": 0.2949, "step": 21993 }, { "epoch": 0.7547700754975978, "grad_norm": 0.8408959827249266, "learning_rate": 1.4963401389193154e-06, "loss": 0.2653, "step": 21994 }, { "epoch": 0.7548043925875085, "grad_norm": 0.7099429842051671, "learning_rate": 1.4959436826604034e-06, "loss": 0.2533, "step": 21995 }, { "epoch": 0.7548387096774194, "grad_norm": 0.8974406736353127, "learning_rate": 1.4955472696900237e-06, "loss": 0.2799, "step": 21996 }, { "epoch": 0.7548730267673301, "grad_norm": 0.7402279125865852, "learning_rate": 1.4951509000130687e-06, "loss": 0.3163, "step": 21997 }, { "epoch": 0.7549073438572409, "grad_norm": 0.8806641265290882, "learning_rate": 1.494754573634441e-06, "loss": 0.3079, "step": 21998 }, { "epoch": 0.7549416609471516, "grad_norm": 0.7588443590658086, "learning_rate": 1.494358290559032e-06, "loss": 0.2685, "step": 21999 }, { "epoch": 0.7549759780370625, "grad_norm": 0.7904776253060827, "learning_rate": 1.4939620507917397e-06, "loss": 0.3091, "step": 22000 }, { "epoch": 0.7550102951269733, "grad_norm": 0.7157452456637828, "learning_rate": 1.4935658543374581e-06, "loss": 0.2819, "step": 22001 }, { "epoch": 0.755044612216884, "grad_norm": 0.7857683575843438, "learning_rate": 1.493169701201082e-06, "loss": 0.2809, "step": 22002 }, { "epoch": 0.7550789293067948, "grad_norm": 0.8489469387080104, "learning_rate": 1.4927735913875063e-06, "loss": 0.309, "step": 22003 }, { "epoch": 0.7551132463967055, "grad_norm": 0.7681521584340274, "learning_rate": 1.4923775249016237e-06, "loss": 0.2611, "step": 22004 }, { "epoch": 0.7551475634866164, "grad_norm": 0.7867567338245443, "learning_rate": 1.4919815017483276e-06, "loss": 0.2431, "step": 22005 }, { "epoch": 0.7551818805765271, "grad_norm": 0.7752479299554242, "learning_rate": 1.4915855219325093e-06, "loss": 0.2501, "step": 22006 }, { "epoch": 0.7552161976664379, "grad_norm": 0.7277248467579628, "learning_rate": 1.4911895854590635e-06, "loss": 0.3013, "step": 22007 }, { "epoch": 0.7552505147563486, "grad_norm": 0.9517064352375245, "learning_rate": 1.4907936923328765e-06, "loss": 0.2616, "step": 22008 }, { "epoch": 0.7552848318462594, "grad_norm": 0.8903329278862961, "learning_rate": 1.490397842558845e-06, "loss": 0.2447, "step": 22009 }, { "epoch": 0.7553191489361702, "grad_norm": 0.7545822109888468, "learning_rate": 1.4900020361418548e-06, "loss": 0.2347, "step": 22010 }, { "epoch": 0.755353466026081, "grad_norm": 0.7626036670111283, "learning_rate": 1.4896062730867973e-06, "loss": 0.2398, "step": 22011 }, { "epoch": 0.7553877831159918, "grad_norm": 0.8051889163815121, "learning_rate": 1.4892105533985617e-06, "loss": 0.2498, "step": 22012 }, { "epoch": 0.7554221002059025, "grad_norm": 0.8882031993409575, "learning_rate": 1.4888148770820365e-06, "loss": 0.2194, "step": 22013 }, { "epoch": 0.7554564172958134, "grad_norm": 0.8249848487155458, "learning_rate": 1.4884192441421096e-06, "loss": 0.2769, "step": 22014 }, { "epoch": 0.7554907343857241, "grad_norm": 0.8925580120516327, "learning_rate": 1.488023654583669e-06, "loss": 0.2908, "step": 22015 }, { "epoch": 0.7555250514756349, "grad_norm": 0.8293809310259344, "learning_rate": 1.4876281084116028e-06, "loss": 0.2261, "step": 22016 }, { "epoch": 0.7555593685655456, "grad_norm": 0.8862845608854643, "learning_rate": 1.4872326056307934e-06, "loss": 0.2784, "step": 22017 }, { "epoch": 0.7555936856554564, "grad_norm": 0.7188533273363764, "learning_rate": 1.4868371462461312e-06, "loss": 0.2782, "step": 22018 }, { "epoch": 0.7556280027453672, "grad_norm": 0.8628114925112335, "learning_rate": 1.4864417302625017e-06, "loss": 0.2905, "step": 22019 }, { "epoch": 0.755662319835278, "grad_norm": 0.7919911745637901, "learning_rate": 1.4860463576847857e-06, "loss": 0.2757, "step": 22020 }, { "epoch": 0.7556966369251887, "grad_norm": 0.8467460239329722, "learning_rate": 1.4856510285178722e-06, "loss": 0.2369, "step": 22021 }, { "epoch": 0.7557309540150995, "grad_norm": 0.8147324112533197, "learning_rate": 1.485255742766642e-06, "loss": 0.2995, "step": 22022 }, { "epoch": 0.7557652711050104, "grad_norm": 0.6575221516865858, "learning_rate": 1.484860500435979e-06, "loss": 0.2211, "step": 22023 }, { "epoch": 0.7557995881949211, "grad_norm": 0.7847023172494735, "learning_rate": 1.4844653015307665e-06, "loss": 0.2654, "step": 22024 }, { "epoch": 0.7558339052848319, "grad_norm": 0.8210059427733306, "learning_rate": 1.484070146055886e-06, "loss": 0.3295, "step": 22025 }, { "epoch": 0.7558682223747426, "grad_norm": 0.7177849564280594, "learning_rate": 1.4836750340162204e-06, "loss": 0.2287, "step": 22026 }, { "epoch": 0.7559025394646534, "grad_norm": 0.710556235731648, "learning_rate": 1.4832799654166497e-06, "loss": 0.2239, "step": 22027 }, { "epoch": 0.7559368565545642, "grad_norm": 0.7426255282710985, "learning_rate": 1.4828849402620566e-06, "loss": 0.2579, "step": 22028 }, { "epoch": 0.755971173644475, "grad_norm": 0.7496553842931674, "learning_rate": 1.4824899585573161e-06, "loss": 0.2484, "step": 22029 }, { "epoch": 0.7560054907343857, "grad_norm": 0.7402244842936969, "learning_rate": 1.4820950203073143e-06, "loss": 0.2891, "step": 22030 }, { "epoch": 0.7560398078242965, "grad_norm": 0.7940371317503722, "learning_rate": 1.4817001255169256e-06, "loss": 0.3175, "step": 22031 }, { "epoch": 0.7560741249142072, "grad_norm": 0.7080113269290087, "learning_rate": 1.48130527419103e-06, "loss": 0.2913, "step": 22032 }, { "epoch": 0.7561084420041181, "grad_norm": 0.797362982629118, "learning_rate": 1.4809104663345053e-06, "loss": 0.3039, "step": 22033 }, { "epoch": 0.7561427590940288, "grad_norm": 0.7503735893198605, "learning_rate": 1.4805157019522287e-06, "loss": 0.239, "step": 22034 }, { "epoch": 0.7561770761839396, "grad_norm": 0.8502047938348071, "learning_rate": 1.480120981049078e-06, "loss": 0.2659, "step": 22035 }, { "epoch": 0.7562113932738503, "grad_norm": 0.8150680949481403, "learning_rate": 1.479726303629928e-06, "loss": 0.3026, "step": 22036 }, { "epoch": 0.7562457103637612, "grad_norm": 0.7315378786601955, "learning_rate": 1.4793316696996557e-06, "loss": 0.2488, "step": 22037 }, { "epoch": 0.756280027453672, "grad_norm": 0.7586100404325371, "learning_rate": 1.4789370792631357e-06, "loss": 0.2996, "step": 22038 }, { "epoch": 0.7563143445435827, "grad_norm": 0.836039500701556, "learning_rate": 1.4785425323252429e-06, "loss": 0.2822, "step": 22039 }, { "epoch": 0.7563486616334935, "grad_norm": 0.7666752311990759, "learning_rate": 1.4781480288908517e-06, "loss": 0.3204, "step": 22040 }, { "epoch": 0.7563829787234042, "grad_norm": 0.6897861984220612, "learning_rate": 1.4777535689648353e-06, "loss": 0.2753, "step": 22041 }, { "epoch": 0.7564172958133151, "grad_norm": 0.7071896526155523, "learning_rate": 1.4773591525520686e-06, "loss": 0.2344, "step": 22042 }, { "epoch": 0.7564516129032258, "grad_norm": 0.8531020699414081, "learning_rate": 1.47696477965742e-06, "loss": 0.2597, "step": 22043 }, { "epoch": 0.7564859299931366, "grad_norm": 0.7423139746186396, "learning_rate": 1.4765704502857664e-06, "loss": 0.2498, "step": 22044 }, { "epoch": 0.7565202470830473, "grad_norm": 0.815935872817222, "learning_rate": 1.476176164441976e-06, "loss": 0.262, "step": 22045 }, { "epoch": 0.7565545641729582, "grad_norm": 0.7483122908249131, "learning_rate": 1.4757819221309206e-06, "loss": 0.2701, "step": 22046 }, { "epoch": 0.7565888812628689, "grad_norm": 0.8108974383510366, "learning_rate": 1.4753877233574705e-06, "loss": 0.2848, "step": 22047 }, { "epoch": 0.7566231983527797, "grad_norm": 0.8287547162487151, "learning_rate": 1.474993568126496e-06, "loss": 0.2871, "step": 22048 }, { "epoch": 0.7566575154426904, "grad_norm": 0.7479135590667845, "learning_rate": 1.4745994564428656e-06, "loss": 0.2671, "step": 22049 }, { "epoch": 0.7566918325326012, "grad_norm": 0.7508459603944571, "learning_rate": 1.4742053883114494e-06, "loss": 0.2426, "step": 22050 }, { "epoch": 0.7567261496225121, "grad_norm": 0.8399176325688397, "learning_rate": 1.4738113637371165e-06, "loss": 0.266, "step": 22051 }, { "epoch": 0.7567604667124228, "grad_norm": 0.8931799386079351, "learning_rate": 1.4734173827247295e-06, "loss": 0.3091, "step": 22052 }, { "epoch": 0.7567947838023336, "grad_norm": 0.8593457523180447, "learning_rate": 1.4730234452791625e-06, "loss": 0.2476, "step": 22053 }, { "epoch": 0.7568291008922443, "grad_norm": 0.7074255006102738, "learning_rate": 1.4726295514052774e-06, "loss": 0.2615, "step": 22054 }, { "epoch": 0.7568634179821551, "grad_norm": 0.8889039982639518, "learning_rate": 1.4722357011079413e-06, "loss": 0.292, "step": 22055 }, { "epoch": 0.7568977350720659, "grad_norm": 0.8045161865546981, "learning_rate": 1.4718418943920204e-06, "loss": 0.2401, "step": 22056 }, { "epoch": 0.7569320521619767, "grad_norm": 0.7615515224562106, "learning_rate": 1.471448131262379e-06, "loss": 0.2439, "step": 22057 }, { "epoch": 0.7569663692518874, "grad_norm": 0.8083717118088003, "learning_rate": 1.471054411723883e-06, "loss": 0.3018, "step": 22058 }, { "epoch": 0.7570006863417982, "grad_norm": 0.8946030885234253, "learning_rate": 1.4706607357813946e-06, "loss": 0.2599, "step": 22059 }, { "epoch": 0.757035003431709, "grad_norm": 0.8090229116923287, "learning_rate": 1.4702671034397798e-06, "loss": 0.2533, "step": 22060 }, { "epoch": 0.7570693205216198, "grad_norm": 0.7758932257255704, "learning_rate": 1.4698735147038966e-06, "loss": 0.2602, "step": 22061 }, { "epoch": 0.7571036376115305, "grad_norm": 0.7262573090101586, "learning_rate": 1.4694799695786115e-06, "loss": 0.2763, "step": 22062 }, { "epoch": 0.7571379547014413, "grad_norm": 0.6868636698010935, "learning_rate": 1.4690864680687872e-06, "loss": 0.2426, "step": 22063 }, { "epoch": 0.757172271791352, "grad_norm": 0.6792599557844365, "learning_rate": 1.46869301017928e-06, "loss": 0.2957, "step": 22064 }, { "epoch": 0.7572065888812629, "grad_norm": 0.8398234673242831, "learning_rate": 1.4682995959149566e-06, "loss": 0.2143, "step": 22065 }, { "epoch": 0.7572409059711737, "grad_norm": 0.717540236314453, "learning_rate": 1.467906225280672e-06, "loss": 0.2344, "step": 22066 }, { "epoch": 0.7572752230610844, "grad_norm": 0.7988520228023981, "learning_rate": 1.4675128982812885e-06, "loss": 0.2414, "step": 22067 }, { "epoch": 0.7573095401509952, "grad_norm": 0.7821607150471424, "learning_rate": 1.4671196149216643e-06, "loss": 0.2309, "step": 22068 }, { "epoch": 0.757343857240906, "grad_norm": 0.7490568786029929, "learning_rate": 1.466726375206659e-06, "loss": 0.253, "step": 22069 }, { "epoch": 0.7573781743308168, "grad_norm": 0.8158882118555855, "learning_rate": 1.4663331791411295e-06, "loss": 0.2416, "step": 22070 }, { "epoch": 0.7574124914207275, "grad_norm": 0.7762244461304717, "learning_rate": 1.4659400267299333e-06, "loss": 0.1985, "step": 22071 }, { "epoch": 0.7574468085106383, "grad_norm": 1.1374333596569193, "learning_rate": 1.4655469179779286e-06, "loss": 0.2491, "step": 22072 }, { "epoch": 0.757481125600549, "grad_norm": 0.9070274225295911, "learning_rate": 1.46515385288997e-06, "loss": 0.2431, "step": 22073 }, { "epoch": 0.7575154426904599, "grad_norm": 0.7906082433464162, "learning_rate": 1.4647608314709167e-06, "loss": 0.2463, "step": 22074 }, { "epoch": 0.7575497597803706, "grad_norm": 0.750866213656311, "learning_rate": 1.4643678537256185e-06, "loss": 0.2555, "step": 22075 }, { "epoch": 0.7575840768702814, "grad_norm": 0.7523759608911701, "learning_rate": 1.463974919658936e-06, "loss": 0.2506, "step": 22076 }, { "epoch": 0.7576183939601921, "grad_norm": 0.8025616107292884, "learning_rate": 1.4635820292757192e-06, "loss": 0.2874, "step": 22077 }, { "epoch": 0.7576527110501029, "grad_norm": 0.6990591136100119, "learning_rate": 1.4631891825808219e-06, "loss": 0.2551, "step": 22078 }, { "epoch": 0.7576870281400138, "grad_norm": 0.8844440377299038, "learning_rate": 1.4627963795791017e-06, "loss": 0.2554, "step": 22079 }, { "epoch": 0.7577213452299245, "grad_norm": 0.7210642085352124, "learning_rate": 1.4624036202754072e-06, "loss": 0.2223, "step": 22080 }, { "epoch": 0.7577556623198353, "grad_norm": 0.6944291704589306, "learning_rate": 1.4620109046745911e-06, "loss": 0.239, "step": 22081 }, { "epoch": 0.757789979409746, "grad_norm": 0.767392293805028, "learning_rate": 1.4616182327815054e-06, "loss": 0.2965, "step": 22082 }, { "epoch": 0.7578242964996569, "grad_norm": 0.8475926870596732, "learning_rate": 1.4612256046010014e-06, "loss": 0.2508, "step": 22083 }, { "epoch": 0.7578586135895676, "grad_norm": 0.7950757088604686, "learning_rate": 1.460833020137929e-06, "loss": 0.2337, "step": 22084 }, { "epoch": 0.7578929306794784, "grad_norm": 0.8164031188590474, "learning_rate": 1.4604404793971382e-06, "loss": 0.2997, "step": 22085 }, { "epoch": 0.7579272477693891, "grad_norm": 0.8268684462126348, "learning_rate": 1.4600479823834806e-06, "loss": 0.2748, "step": 22086 }, { "epoch": 0.7579615648592999, "grad_norm": 0.7391267560000284, "learning_rate": 1.4596555291017995e-06, "loss": 0.2606, "step": 22087 }, { "epoch": 0.7579958819492107, "grad_norm": 0.7592466458809877, "learning_rate": 1.4592631195569495e-06, "loss": 0.2793, "step": 22088 }, { "epoch": 0.7580301990391215, "grad_norm": 0.7379831202924255, "learning_rate": 1.458870753753775e-06, "loss": 0.2699, "step": 22089 }, { "epoch": 0.7580645161290323, "grad_norm": 0.9681941944346283, "learning_rate": 1.4584784316971228e-06, "loss": 0.27, "step": 22090 }, { "epoch": 0.758098833218943, "grad_norm": 0.8275748838496765, "learning_rate": 1.4580861533918412e-06, "loss": 0.2738, "step": 22091 }, { "epoch": 0.7581331503088538, "grad_norm": 0.7963851344822876, "learning_rate": 1.4576939188427751e-06, "loss": 0.2161, "step": 22092 }, { "epoch": 0.7581674673987646, "grad_norm": 0.7896090222420239, "learning_rate": 1.4573017280547714e-06, "loss": 0.2441, "step": 22093 }, { "epoch": 0.7582017844886754, "grad_norm": 0.7983010736353509, "learning_rate": 1.456909581032674e-06, "loss": 0.2862, "step": 22094 }, { "epoch": 0.7582361015785861, "grad_norm": 0.7881058727417676, "learning_rate": 1.4565174777813296e-06, "loss": 0.2584, "step": 22095 }, { "epoch": 0.7582704186684969, "grad_norm": 0.9162166633543275, "learning_rate": 1.4561254183055773e-06, "loss": 0.3305, "step": 22096 }, { "epoch": 0.7583047357584077, "grad_norm": 0.8497784744432338, "learning_rate": 1.4557334026102666e-06, "loss": 0.2338, "step": 22097 }, { "epoch": 0.7583390528483185, "grad_norm": 0.9096276362921448, "learning_rate": 1.4553414307002362e-06, "loss": 0.2881, "step": 22098 }, { "epoch": 0.7583733699382292, "grad_norm": 0.7754957398914618, "learning_rate": 1.4549495025803296e-06, "loss": 0.2499, "step": 22099 }, { "epoch": 0.75840768702814, "grad_norm": 0.7148404016633314, "learning_rate": 1.454557618255389e-06, "loss": 0.2466, "step": 22100 }, { "epoch": 0.7584420041180507, "grad_norm": 0.7232406034731514, "learning_rate": 1.4541657777302536e-06, "loss": 0.2682, "step": 22101 }, { "epoch": 0.7584763212079616, "grad_norm": 0.8195933555167915, "learning_rate": 1.4537739810097696e-06, "loss": 0.2687, "step": 22102 }, { "epoch": 0.7585106382978724, "grad_norm": 0.7711642113145661, "learning_rate": 1.4533822280987713e-06, "loss": 0.2771, "step": 22103 }, { "epoch": 0.7585449553877831, "grad_norm": 0.7723983949858806, "learning_rate": 1.452990519002101e-06, "loss": 0.2448, "step": 22104 }, { "epoch": 0.7585792724776939, "grad_norm": 0.9229172757318704, "learning_rate": 1.4525988537245978e-06, "loss": 0.3072, "step": 22105 }, { "epoch": 0.7586135895676047, "grad_norm": 0.7457349759750056, "learning_rate": 1.4522072322710995e-06, "loss": 0.2359, "step": 22106 }, { "epoch": 0.7586479066575155, "grad_norm": 0.7580834071937403, "learning_rate": 1.451815654646444e-06, "loss": 0.265, "step": 22107 }, { "epoch": 0.7586822237474262, "grad_norm": 0.8546618376575164, "learning_rate": 1.4514241208554703e-06, "loss": 0.2106, "step": 22108 }, { "epoch": 0.758716540837337, "grad_norm": 0.8064235978650361, "learning_rate": 1.4510326309030154e-06, "loss": 0.2559, "step": 22109 }, { "epoch": 0.7587508579272477, "grad_norm": 0.9176696203526902, "learning_rate": 1.450641184793911e-06, "loss": 0.2974, "step": 22110 }, { "epoch": 0.7587851750171586, "grad_norm": 0.8454497763073597, "learning_rate": 1.4502497825330008e-06, "loss": 0.3204, "step": 22111 }, { "epoch": 0.7588194921070693, "grad_norm": 0.9966884372645186, "learning_rate": 1.449858424125114e-06, "loss": 0.2394, "step": 22112 }, { "epoch": 0.7588538091969801, "grad_norm": 0.7487457833737476, "learning_rate": 1.4494671095750867e-06, "loss": 0.2418, "step": 22113 }, { "epoch": 0.7588881262868908, "grad_norm": 0.7208804117315002, "learning_rate": 1.4490758388877545e-06, "loss": 0.2271, "step": 22114 }, { "epoch": 0.7589224433768016, "grad_norm": 0.788840588458108, "learning_rate": 1.44868461206795e-06, "loss": 0.2821, "step": 22115 }, { "epoch": 0.7589567604667125, "grad_norm": 0.6913339247031347, "learning_rate": 1.448293429120507e-06, "loss": 0.2356, "step": 22116 }, { "epoch": 0.7589910775566232, "grad_norm": 0.7441385578026395, "learning_rate": 1.447902290050257e-06, "loss": 0.3217, "step": 22117 }, { "epoch": 0.759025394646534, "grad_norm": 0.7296528232111911, "learning_rate": 1.4475111948620352e-06, "loss": 0.266, "step": 22118 }, { "epoch": 0.7590597117364447, "grad_norm": 0.7308932008725983, "learning_rate": 1.4471201435606674e-06, "loss": 0.3009, "step": 22119 }, { "epoch": 0.7590940288263556, "grad_norm": 0.7628079323158922, "learning_rate": 1.4467291361509911e-06, "loss": 0.2811, "step": 22120 }, { "epoch": 0.7591283459162663, "grad_norm": 0.7426794618640696, "learning_rate": 1.4463381726378323e-06, "loss": 0.2172, "step": 22121 }, { "epoch": 0.7591626630061771, "grad_norm": 0.7678551706857584, "learning_rate": 1.4459472530260215e-06, "loss": 0.239, "step": 22122 }, { "epoch": 0.7591969800960878, "grad_norm": 0.8175533562091519, "learning_rate": 1.4455563773203896e-06, "loss": 0.2619, "step": 22123 }, { "epoch": 0.7592312971859986, "grad_norm": 0.6762554722580443, "learning_rate": 1.4451655455257642e-06, "loss": 0.277, "step": 22124 }, { "epoch": 0.7592656142759094, "grad_norm": 0.7189959701019979, "learning_rate": 1.4447747576469739e-06, "loss": 0.2393, "step": 22125 }, { "epoch": 0.7592999313658202, "grad_norm": 0.697033935400244, "learning_rate": 1.4443840136888466e-06, "loss": 0.2532, "step": 22126 }, { "epoch": 0.7593342484557309, "grad_norm": 0.760215648936181, "learning_rate": 1.4439933136562107e-06, "loss": 0.3038, "step": 22127 }, { "epoch": 0.7593685655456417, "grad_norm": 0.7809684336211367, "learning_rate": 1.4436026575538886e-06, "loss": 0.2816, "step": 22128 }, { "epoch": 0.7594028826355526, "grad_norm": 0.8793526702549677, "learning_rate": 1.443212045386711e-06, "loss": 0.2628, "step": 22129 }, { "epoch": 0.7594371997254633, "grad_norm": 0.8266608357202494, "learning_rate": 1.4428214771595012e-06, "loss": 0.2542, "step": 22130 }, { "epoch": 0.7594715168153741, "grad_norm": 0.7426374418842429, "learning_rate": 1.4424309528770852e-06, "loss": 0.2804, "step": 22131 }, { "epoch": 0.7595058339052848, "grad_norm": 0.7188866246369656, "learning_rate": 1.4420404725442887e-06, "loss": 0.233, "step": 22132 }, { "epoch": 0.7595401509951956, "grad_norm": 0.8757196920151179, "learning_rate": 1.4416500361659307e-06, "loss": 0.2554, "step": 22133 }, { "epoch": 0.7595744680851064, "grad_norm": 0.7279945771071685, "learning_rate": 1.4412596437468413e-06, "loss": 0.2634, "step": 22134 }, { "epoch": 0.7596087851750172, "grad_norm": 0.8087127639565423, "learning_rate": 1.440869295291838e-06, "loss": 0.2314, "step": 22135 }, { "epoch": 0.7596431022649279, "grad_norm": 0.7663948045455998, "learning_rate": 1.4404789908057448e-06, "loss": 0.2213, "step": 22136 }, { "epoch": 0.7596774193548387, "grad_norm": 0.7411443296071759, "learning_rate": 1.4400887302933842e-06, "loss": 0.2663, "step": 22137 }, { "epoch": 0.7597117364447494, "grad_norm": 0.8113104629854094, "learning_rate": 1.4396985137595765e-06, "loss": 0.3103, "step": 22138 }, { "epoch": 0.7597460535346603, "grad_norm": 0.7047728664678569, "learning_rate": 1.439308341209143e-06, "loss": 0.3186, "step": 22139 }, { "epoch": 0.759780370624571, "grad_norm": 0.7087903274191939, "learning_rate": 1.4389182126469037e-06, "loss": 0.272, "step": 22140 }, { "epoch": 0.7598146877144818, "grad_norm": 0.7254823332095643, "learning_rate": 1.438528128077679e-06, "loss": 0.2558, "step": 22141 }, { "epoch": 0.7598490048043925, "grad_norm": 0.8401396704060374, "learning_rate": 1.4381380875062845e-06, "loss": 0.2007, "step": 22142 }, { "epoch": 0.7598833218943034, "grad_norm": 0.6904621359614479, "learning_rate": 1.4377480909375441e-06, "loss": 0.2347, "step": 22143 }, { "epoch": 0.7599176389842142, "grad_norm": 0.769297801608524, "learning_rate": 1.4373581383762713e-06, "loss": 0.2781, "step": 22144 }, { "epoch": 0.7599519560741249, "grad_norm": 0.7529150009256915, "learning_rate": 1.4369682298272835e-06, "loss": 0.2195, "step": 22145 }, { "epoch": 0.7599862731640357, "grad_norm": 0.7578865747292665, "learning_rate": 1.4365783652954024e-06, "loss": 0.2962, "step": 22146 }, { "epoch": 0.7600205902539464, "grad_norm": 0.8759351795484313, "learning_rate": 1.4361885447854395e-06, "loss": 0.3164, "step": 22147 }, { "epoch": 0.7600549073438573, "grad_norm": 0.7246912934982098, "learning_rate": 1.4357987683022117e-06, "loss": 0.2263, "step": 22148 }, { "epoch": 0.760089224433768, "grad_norm": 0.9178201891288434, "learning_rate": 1.4354090358505351e-06, "loss": 0.2821, "step": 22149 }, { "epoch": 0.7601235415236788, "grad_norm": 0.7367438833303189, "learning_rate": 1.4350193474352235e-06, "loss": 0.2099, "step": 22150 }, { "epoch": 0.7601578586135895, "grad_norm": 0.8665756151856059, "learning_rate": 1.4346297030610923e-06, "loss": 0.2609, "step": 22151 }, { "epoch": 0.7601921757035004, "grad_norm": 0.8386942937727817, "learning_rate": 1.4342401027329532e-06, "loss": 0.2294, "step": 22152 }, { "epoch": 0.7602264927934111, "grad_norm": 0.7710344808152064, "learning_rate": 1.433850546455623e-06, "loss": 0.2519, "step": 22153 }, { "epoch": 0.7602608098833219, "grad_norm": 0.9206385958546748, "learning_rate": 1.4334610342339077e-06, "loss": 0.2966, "step": 22154 }, { "epoch": 0.7602951269732326, "grad_norm": 0.8091807604157457, "learning_rate": 1.4330715660726268e-06, "loss": 0.2802, "step": 22155 }, { "epoch": 0.7603294440631434, "grad_norm": 0.8141657158860292, "learning_rate": 1.4326821419765862e-06, "loss": 0.2972, "step": 22156 }, { "epoch": 0.7603637611530543, "grad_norm": 0.8041112221439259, "learning_rate": 1.432292761950599e-06, "loss": 0.2662, "step": 22157 }, { "epoch": 0.760398078242965, "grad_norm": 0.7981272621828235, "learning_rate": 1.4319034259994746e-06, "loss": 0.2489, "step": 22158 }, { "epoch": 0.7604323953328758, "grad_norm": 0.7371720813367606, "learning_rate": 1.4315141341280237e-06, "loss": 0.276, "step": 22159 }, { "epoch": 0.7604667124227865, "grad_norm": 0.7456716894560254, "learning_rate": 1.4311248863410549e-06, "loss": 0.2099, "step": 22160 }, { "epoch": 0.7605010295126973, "grad_norm": 0.7918379531212484, "learning_rate": 1.4307356826433776e-06, "loss": 0.2768, "step": 22161 }, { "epoch": 0.7605353466026081, "grad_norm": 0.7070940318875674, "learning_rate": 1.4303465230397995e-06, "loss": 0.2262, "step": 22162 }, { "epoch": 0.7605696636925189, "grad_norm": 0.7758450816842973, "learning_rate": 1.429957407535128e-06, "loss": 0.2575, "step": 22163 }, { "epoch": 0.7606039807824296, "grad_norm": 0.6856305661633956, "learning_rate": 1.4295683361341723e-06, "loss": 0.2918, "step": 22164 }, { "epoch": 0.7606382978723404, "grad_norm": 0.8090187660970695, "learning_rate": 1.4291793088417338e-06, "loss": 0.2872, "step": 22165 }, { "epoch": 0.7606726149622512, "grad_norm": 0.7313842861388627, "learning_rate": 1.4287903256626251e-06, "loss": 0.3141, "step": 22166 }, { "epoch": 0.760706932052162, "grad_norm": 0.7801227228060462, "learning_rate": 1.4284013866016472e-06, "loss": 0.2497, "step": 22167 }, { "epoch": 0.7607412491420728, "grad_norm": 0.8563887016825776, "learning_rate": 1.428012491663604e-06, "loss": 0.2655, "step": 22168 }, { "epoch": 0.7607755662319835, "grad_norm": 0.766836389694178, "learning_rate": 1.4276236408533045e-06, "loss": 0.2352, "step": 22169 }, { "epoch": 0.7608098833218943, "grad_norm": 0.6990483875965012, "learning_rate": 1.4272348341755493e-06, "loss": 0.249, "step": 22170 }, { "epoch": 0.7608442004118051, "grad_norm": 0.6551074732614007, "learning_rate": 1.4268460716351417e-06, "loss": 0.2537, "step": 22171 }, { "epoch": 0.7608785175017159, "grad_norm": 0.7112490257708877, "learning_rate": 1.4264573532368853e-06, "loss": 0.2934, "step": 22172 }, { "epoch": 0.7609128345916266, "grad_norm": 0.7101871741416319, "learning_rate": 1.426068678985582e-06, "loss": 0.2565, "step": 22173 }, { "epoch": 0.7609471516815374, "grad_norm": 0.7209509446433554, "learning_rate": 1.4256800488860333e-06, "loss": 0.2464, "step": 22174 }, { "epoch": 0.7609814687714482, "grad_norm": 0.7089370798223329, "learning_rate": 1.4252914629430404e-06, "loss": 0.2354, "step": 22175 }, { "epoch": 0.761015785861359, "grad_norm": 1.086182713189966, "learning_rate": 1.4249029211614051e-06, "loss": 0.221, "step": 22176 }, { "epoch": 0.7610501029512697, "grad_norm": 0.8120013778918579, "learning_rate": 1.4245144235459229e-06, "loss": 0.2534, "step": 22177 }, { "epoch": 0.7610844200411805, "grad_norm": 0.9259384461065902, "learning_rate": 1.4241259701013998e-06, "loss": 0.2273, "step": 22178 }, { "epoch": 0.7611187371310912, "grad_norm": 0.7928720432017893, "learning_rate": 1.4237375608326293e-06, "loss": 0.2342, "step": 22179 }, { "epoch": 0.7611530542210021, "grad_norm": 0.8209850746160522, "learning_rate": 1.4233491957444118e-06, "loss": 0.2819, "step": 22180 }, { "epoch": 0.7611873713109129, "grad_norm": 0.7922876616535951, "learning_rate": 1.4229608748415452e-06, "loss": 0.2423, "step": 22181 }, { "epoch": 0.7612216884008236, "grad_norm": 0.7964483904867488, "learning_rate": 1.4225725981288268e-06, "loss": 0.2548, "step": 22182 }, { "epoch": 0.7612560054907344, "grad_norm": 0.6701478089900276, "learning_rate": 1.4221843656110528e-06, "loss": 0.2261, "step": 22183 }, { "epoch": 0.7612903225806451, "grad_norm": 0.7507354485458073, "learning_rate": 1.4217961772930195e-06, "loss": 0.2328, "step": 22184 }, { "epoch": 0.761324639670556, "grad_norm": 0.7535332144846615, "learning_rate": 1.4214080331795243e-06, "loss": 0.2838, "step": 22185 }, { "epoch": 0.7613589567604667, "grad_norm": 0.7942513557743456, "learning_rate": 1.4210199332753577e-06, "loss": 0.272, "step": 22186 }, { "epoch": 0.7613932738503775, "grad_norm": 0.7899039235839543, "learning_rate": 1.42063187758532e-06, "loss": 0.2577, "step": 22187 }, { "epoch": 0.7614275909402882, "grad_norm": 0.816791388271646, "learning_rate": 1.4202438661142009e-06, "loss": 0.2661, "step": 22188 }, { "epoch": 0.7614619080301991, "grad_norm": 0.8567621553867851, "learning_rate": 1.4198558988667939e-06, "loss": 0.2892, "step": 22189 }, { "epoch": 0.7614962251201098, "grad_norm": 0.8790686057133896, "learning_rate": 1.4194679758478964e-06, "loss": 0.314, "step": 22190 }, { "epoch": 0.7615305422100206, "grad_norm": 0.7864808019712074, "learning_rate": 1.4190800970622953e-06, "loss": 0.2687, "step": 22191 }, { "epoch": 0.7615648592999313, "grad_norm": 0.7965790706937059, "learning_rate": 1.4186922625147853e-06, "loss": 0.2559, "step": 22192 }, { "epoch": 0.7615991763898421, "grad_norm": 0.7190881744200385, "learning_rate": 1.4183044722101569e-06, "loss": 0.2755, "step": 22193 }, { "epoch": 0.761633493479753, "grad_norm": 0.7547609210810865, "learning_rate": 1.4179167261532006e-06, "loss": 0.2647, "step": 22194 }, { "epoch": 0.7616678105696637, "grad_norm": 0.7832192084930856, "learning_rate": 1.4175290243487072e-06, "loss": 0.2483, "step": 22195 }, { "epoch": 0.7617021276595745, "grad_norm": 0.774838824652828, "learning_rate": 1.4171413668014656e-06, "loss": 0.2479, "step": 22196 }, { "epoch": 0.7617364447494852, "grad_norm": 0.6731966993785252, "learning_rate": 1.416753753516265e-06, "loss": 0.2659, "step": 22197 }, { "epoch": 0.7617707618393961, "grad_norm": 0.8277148920044948, "learning_rate": 1.416366184497895e-06, "loss": 0.2293, "step": 22198 }, { "epoch": 0.7618050789293068, "grad_norm": 0.8219900682528798, "learning_rate": 1.4159786597511432e-06, "loss": 0.3014, "step": 22199 }, { "epoch": 0.7618393960192176, "grad_norm": 0.7566936192079157, "learning_rate": 1.415591179280794e-06, "loss": 0.2859, "step": 22200 }, { "epoch": 0.7618737131091283, "grad_norm": 0.7890578188644434, "learning_rate": 1.4152037430916398e-06, "loss": 0.2749, "step": 22201 }, { "epoch": 0.7619080301990391, "grad_norm": 0.7222700337206766, "learning_rate": 1.4148163511884626e-06, "loss": 0.2488, "step": 22202 }, { "epoch": 0.7619423472889499, "grad_norm": 0.9746186949971772, "learning_rate": 1.414429003576049e-06, "loss": 0.2973, "step": 22203 }, { "epoch": 0.7619766643788607, "grad_norm": 0.7995985110799504, "learning_rate": 1.4140417002591849e-06, "loss": 0.2464, "step": 22204 }, { "epoch": 0.7620109814687714, "grad_norm": 0.8554618361606017, "learning_rate": 1.4136544412426545e-06, "loss": 0.3026, "step": 22205 }, { "epoch": 0.7620452985586822, "grad_norm": 0.796444018978109, "learning_rate": 1.4132672265312425e-06, "loss": 0.2623, "step": 22206 }, { "epoch": 0.762079615648593, "grad_norm": 0.8585538689224085, "learning_rate": 1.4128800561297323e-06, "loss": 0.3071, "step": 22207 }, { "epoch": 0.7621139327385038, "grad_norm": 0.9138698158217581, "learning_rate": 1.412492930042908e-06, "loss": 0.2265, "step": 22208 }, { "epoch": 0.7621482498284146, "grad_norm": 0.778306393678473, "learning_rate": 1.4121058482755483e-06, "loss": 0.2775, "step": 22209 }, { "epoch": 0.7621825669183253, "grad_norm": 0.7605071833034458, "learning_rate": 1.4117188108324403e-06, "loss": 0.2492, "step": 22210 }, { "epoch": 0.7622168840082361, "grad_norm": 0.7050053981437852, "learning_rate": 1.4113318177183615e-06, "loss": 0.2727, "step": 22211 }, { "epoch": 0.7622512010981469, "grad_norm": 0.7994783371238523, "learning_rate": 1.4109448689380928e-06, "loss": 0.2457, "step": 22212 }, { "epoch": 0.7622855181880577, "grad_norm": 0.6970206945664192, "learning_rate": 1.410557964496419e-06, "loss": 0.2198, "step": 22213 }, { "epoch": 0.7623198352779684, "grad_norm": 0.7828908491224191, "learning_rate": 1.4101711043981149e-06, "loss": 0.2468, "step": 22214 }, { "epoch": 0.7623541523678792, "grad_norm": 0.7545254337873241, "learning_rate": 1.4097842886479612e-06, "loss": 0.2478, "step": 22215 }, { "epoch": 0.7623884694577899, "grad_norm": 0.7056788423322045, "learning_rate": 1.4093975172507374e-06, "loss": 0.2543, "step": 22216 }, { "epoch": 0.7624227865477008, "grad_norm": 0.8437875238919581, "learning_rate": 1.4090107902112205e-06, "loss": 0.2837, "step": 22217 }, { "epoch": 0.7624571036376115, "grad_norm": 0.8143542528132137, "learning_rate": 1.4086241075341888e-06, "loss": 0.315, "step": 22218 }, { "epoch": 0.7624914207275223, "grad_norm": 0.8129758616692486, "learning_rate": 1.4082374692244195e-06, "loss": 0.2676, "step": 22219 }, { "epoch": 0.762525737817433, "grad_norm": 0.7990454948549713, "learning_rate": 1.4078508752866898e-06, "loss": 0.3206, "step": 22220 }, { "epoch": 0.7625600549073439, "grad_norm": 0.8011305013355837, "learning_rate": 1.4074643257257714e-06, "loss": 0.282, "step": 22221 }, { "epoch": 0.7625943719972547, "grad_norm": 0.7022277904833649, "learning_rate": 1.4070778205464458e-06, "loss": 0.2557, "step": 22222 }, { "epoch": 0.7626286890871654, "grad_norm": 0.768645782350083, "learning_rate": 1.4066913597534838e-06, "loss": 0.2602, "step": 22223 }, { "epoch": 0.7626630061770762, "grad_norm": 0.7705173958324324, "learning_rate": 1.4063049433516602e-06, "loss": 0.3114, "step": 22224 }, { "epoch": 0.7626973232669869, "grad_norm": 0.7071564813603619, "learning_rate": 1.405918571345749e-06, "loss": 0.2035, "step": 22225 }, { "epoch": 0.7627316403568978, "grad_norm": 0.7638633649583095, "learning_rate": 1.405532243740524e-06, "loss": 0.2412, "step": 22226 }, { "epoch": 0.7627659574468085, "grad_norm": 0.7182915954876239, "learning_rate": 1.405145960540757e-06, "loss": 0.2831, "step": 22227 }, { "epoch": 0.7628002745367193, "grad_norm": 0.8179082632723136, "learning_rate": 1.4047597217512204e-06, "loss": 0.2758, "step": 22228 }, { "epoch": 0.76283459162663, "grad_norm": 0.7512252760620861, "learning_rate": 1.4043735273766862e-06, "loss": 0.2442, "step": 22229 }, { "epoch": 0.7628689087165408, "grad_norm": 0.7068267877256275, "learning_rate": 1.4039873774219248e-06, "loss": 0.2588, "step": 22230 }, { "epoch": 0.7629032258064516, "grad_norm": 0.8277799012407341, "learning_rate": 1.403601271891708e-06, "loss": 0.239, "step": 22231 }, { "epoch": 0.7629375428963624, "grad_norm": 0.90484504197812, "learning_rate": 1.4032152107908021e-06, "loss": 0.2532, "step": 22232 }, { "epoch": 0.7629718599862731, "grad_norm": 0.8001363198051139, "learning_rate": 1.4028291941239797e-06, "loss": 0.2991, "step": 22233 }, { "epoch": 0.7630061770761839, "grad_norm": 0.7403393334116125, "learning_rate": 1.402443221896011e-06, "loss": 0.2621, "step": 22234 }, { "epoch": 0.7630404941660948, "grad_norm": 0.7472371981860493, "learning_rate": 1.4020572941116584e-06, "loss": 0.2609, "step": 22235 }, { "epoch": 0.7630748112560055, "grad_norm": 0.763470038097012, "learning_rate": 1.4016714107756962e-06, "loss": 0.274, "step": 22236 }, { "epoch": 0.7631091283459163, "grad_norm": 0.9632257916637695, "learning_rate": 1.401285571892887e-06, "loss": 0.2535, "step": 22237 }, { "epoch": 0.763143445435827, "grad_norm": 0.8646092901509812, "learning_rate": 1.400899777467999e-06, "loss": 0.2947, "step": 22238 }, { "epoch": 0.7631777625257378, "grad_norm": 0.8439693232902825, "learning_rate": 1.4005140275057976e-06, "loss": 0.2913, "step": 22239 }, { "epoch": 0.7632120796156486, "grad_norm": 0.8272401073037978, "learning_rate": 1.4001283220110495e-06, "loss": 0.3136, "step": 22240 }, { "epoch": 0.7632463967055594, "grad_norm": 0.9295416969335619, "learning_rate": 1.3997426609885184e-06, "loss": 0.2312, "step": 22241 }, { "epoch": 0.7632807137954701, "grad_norm": 0.7212807031918913, "learning_rate": 1.3993570444429694e-06, "loss": 0.274, "step": 22242 }, { "epoch": 0.7633150308853809, "grad_norm": 0.7905994493254767, "learning_rate": 1.3989714723791674e-06, "loss": 0.2974, "step": 22243 }, { "epoch": 0.7633493479752917, "grad_norm": 0.6582171102895668, "learning_rate": 1.3985859448018718e-06, "loss": 0.196, "step": 22244 }, { "epoch": 0.7633836650652025, "grad_norm": 0.8255153214082878, "learning_rate": 1.3982004617158511e-06, "loss": 0.2305, "step": 22245 }, { "epoch": 0.7634179821551133, "grad_norm": 0.72351051261559, "learning_rate": 1.3978150231258631e-06, "loss": 0.2089, "step": 22246 }, { "epoch": 0.763452299245024, "grad_norm": 0.8101174366339304, "learning_rate": 1.3974296290366701e-06, "loss": 0.3072, "step": 22247 }, { "epoch": 0.7634866163349348, "grad_norm": 0.7426201823634767, "learning_rate": 1.3970442794530342e-06, "loss": 0.2532, "step": 22248 }, { "epoch": 0.7635209334248456, "grad_norm": 0.7808180085281604, "learning_rate": 1.396658974379716e-06, "loss": 0.3086, "step": 22249 }, { "epoch": 0.7635552505147564, "grad_norm": 0.7863486229526593, "learning_rate": 1.3962737138214743e-06, "loss": 0.251, "step": 22250 }, { "epoch": 0.7635895676046671, "grad_norm": 0.8032327077622906, "learning_rate": 1.39588849778307e-06, "loss": 0.2974, "step": 22251 }, { "epoch": 0.7636238846945779, "grad_norm": 0.8247241694013068, "learning_rate": 1.3955033262692625e-06, "loss": 0.252, "step": 22252 }, { "epoch": 0.7636582017844886, "grad_norm": 0.7743163517540471, "learning_rate": 1.3951181992848056e-06, "loss": 0.269, "step": 22253 }, { "epoch": 0.7636925188743995, "grad_norm": 0.7838425334652186, "learning_rate": 1.394733116834464e-06, "loss": 0.2768, "step": 22254 }, { "epoch": 0.7637268359643102, "grad_norm": 0.7958151299905806, "learning_rate": 1.3943480789229896e-06, "loss": 0.2234, "step": 22255 }, { "epoch": 0.763761153054221, "grad_norm": 0.804341465759595, "learning_rate": 1.3939630855551394e-06, "loss": 0.2653, "step": 22256 }, { "epoch": 0.7637954701441317, "grad_norm": 0.8018123131126216, "learning_rate": 1.3935781367356743e-06, "loss": 0.2936, "step": 22257 }, { "epoch": 0.7638297872340426, "grad_norm": 0.7148198064611023, "learning_rate": 1.3931932324693438e-06, "loss": 0.2805, "step": 22258 }, { "epoch": 0.7638641043239534, "grad_norm": 0.7622634162261918, "learning_rate": 1.3928083727609087e-06, "loss": 0.2358, "step": 22259 }, { "epoch": 0.7638984214138641, "grad_norm": 0.8037360098127779, "learning_rate": 1.3924235576151185e-06, "loss": 0.2958, "step": 22260 }, { "epoch": 0.7639327385037749, "grad_norm": 0.7906087860865665, "learning_rate": 1.3920387870367296e-06, "loss": 0.3168, "step": 22261 }, { "epoch": 0.7639670555936856, "grad_norm": 0.767551595777886, "learning_rate": 1.3916540610304952e-06, "loss": 0.2254, "step": 22262 }, { "epoch": 0.7640013726835965, "grad_norm": 0.9445496527846216, "learning_rate": 1.3912693796011678e-06, "loss": 0.3024, "step": 22263 }, { "epoch": 0.7640356897735072, "grad_norm": 0.7183465396679738, "learning_rate": 1.3908847427535e-06, "loss": 0.2725, "step": 22264 }, { "epoch": 0.764070006863418, "grad_norm": 0.7594197169770706, "learning_rate": 1.3905001504922432e-06, "loss": 0.2808, "step": 22265 }, { "epoch": 0.7641043239533287, "grad_norm": 0.7466397163902756, "learning_rate": 1.3901156028221502e-06, "loss": 0.2603, "step": 22266 }, { "epoch": 0.7641386410432396, "grad_norm": 1.0953927262913785, "learning_rate": 1.3897310997479674e-06, "loss": 0.2352, "step": 22267 }, { "epoch": 0.7641729581331503, "grad_norm": 0.9623018760906741, "learning_rate": 1.3893466412744506e-06, "loss": 0.243, "step": 22268 }, { "epoch": 0.7642072752230611, "grad_norm": 0.72389376133052, "learning_rate": 1.3889622274063446e-06, "loss": 0.2416, "step": 22269 }, { "epoch": 0.7642415923129718, "grad_norm": 0.8397190768385597, "learning_rate": 1.3885778581484005e-06, "loss": 0.286, "step": 22270 }, { "epoch": 0.7642759094028826, "grad_norm": 0.745533646327456, "learning_rate": 1.3881935335053664e-06, "loss": 0.2419, "step": 22271 }, { "epoch": 0.7643102264927935, "grad_norm": 0.8165456673458721, "learning_rate": 1.3878092534819904e-06, "loss": 0.262, "step": 22272 }, { "epoch": 0.7643445435827042, "grad_norm": 0.7313972863690139, "learning_rate": 1.3874250180830196e-06, "loss": 0.2221, "step": 22273 }, { "epoch": 0.764378860672615, "grad_norm": 0.7406099105103919, "learning_rate": 1.3870408273132008e-06, "loss": 0.2474, "step": 22274 }, { "epoch": 0.7644131777625257, "grad_norm": 0.7593799569495664, "learning_rate": 1.386656681177282e-06, "loss": 0.2303, "step": 22275 }, { "epoch": 0.7644474948524365, "grad_norm": 0.727241902504236, "learning_rate": 1.3862725796800041e-06, "loss": 0.1932, "step": 22276 }, { "epoch": 0.7644818119423473, "grad_norm": 0.7166156502684442, "learning_rate": 1.3858885228261182e-06, "loss": 0.3012, "step": 22277 }, { "epoch": 0.7645161290322581, "grad_norm": 0.7710329645539725, "learning_rate": 1.3855045106203646e-06, "loss": 0.2084, "step": 22278 }, { "epoch": 0.7645504461221688, "grad_norm": 0.7661138695541178, "learning_rate": 1.3851205430674869e-06, "loss": 0.2578, "step": 22279 }, { "epoch": 0.7645847632120796, "grad_norm": 0.8426521462064546, "learning_rate": 1.3847366201722335e-06, "loss": 0.32, "step": 22280 }, { "epoch": 0.7646190803019904, "grad_norm": 0.9358999265893577, "learning_rate": 1.3843527419393422e-06, "loss": 0.2735, "step": 22281 }, { "epoch": 0.7646533973919012, "grad_norm": 0.8907895278080808, "learning_rate": 1.383968908373558e-06, "loss": 0.2628, "step": 22282 }, { "epoch": 0.7646877144818119, "grad_norm": 0.9387716191332929, "learning_rate": 1.383585119479622e-06, "loss": 0.3021, "step": 22283 }, { "epoch": 0.7647220315717227, "grad_norm": 0.7367709564046466, "learning_rate": 1.3832013752622752e-06, "loss": 0.3287, "step": 22284 }, { "epoch": 0.7647563486616334, "grad_norm": 0.7469147710168128, "learning_rate": 1.3828176757262585e-06, "loss": 0.2247, "step": 22285 }, { "epoch": 0.7647906657515443, "grad_norm": 0.6691110270234153, "learning_rate": 1.3824340208763126e-06, "loss": 0.2306, "step": 22286 }, { "epoch": 0.7648249828414551, "grad_norm": 0.8121379849315328, "learning_rate": 1.3820504107171766e-06, "loss": 0.3058, "step": 22287 }, { "epoch": 0.7648592999313658, "grad_norm": 0.796944220421503, "learning_rate": 1.3816668452535898e-06, "loss": 0.2609, "step": 22288 }, { "epoch": 0.7648936170212766, "grad_norm": 0.6967585765755664, "learning_rate": 1.3812833244902918e-06, "loss": 0.2508, "step": 22289 }, { "epoch": 0.7649279341111874, "grad_norm": 0.8406877587878238, "learning_rate": 1.3808998484320163e-06, "loss": 0.2737, "step": 22290 }, { "epoch": 0.7649622512010982, "grad_norm": 0.7745077230701434, "learning_rate": 1.3805164170835068e-06, "loss": 0.2954, "step": 22291 }, { "epoch": 0.7649965682910089, "grad_norm": 0.8365621112253241, "learning_rate": 1.3801330304494953e-06, "loss": 0.2137, "step": 22292 }, { "epoch": 0.7650308853809197, "grad_norm": 0.7752121921670488, "learning_rate": 1.37974968853472e-06, "loss": 0.284, "step": 22293 }, { "epoch": 0.7650652024708304, "grad_norm": 0.8229509888680875, "learning_rate": 1.3793663913439164e-06, "loss": 0.2411, "step": 22294 }, { "epoch": 0.7650995195607413, "grad_norm": 0.7812505481944941, "learning_rate": 1.3789831388818203e-06, "loss": 0.2307, "step": 22295 }, { "epoch": 0.765133836650652, "grad_norm": 0.8032304263272431, "learning_rate": 1.3785999311531657e-06, "loss": 0.2779, "step": 22296 }, { "epoch": 0.7651681537405628, "grad_norm": 0.8281997876902555, "learning_rate": 1.3782167681626863e-06, "loss": 0.2661, "step": 22297 }, { "epoch": 0.7652024708304735, "grad_norm": 0.7878016212452381, "learning_rate": 1.3778336499151178e-06, "loss": 0.2295, "step": 22298 }, { "epoch": 0.7652367879203843, "grad_norm": 0.826819437790983, "learning_rate": 1.377450576415189e-06, "loss": 0.2976, "step": 22299 }, { "epoch": 0.7652711050102952, "grad_norm": 0.8178121393667543, "learning_rate": 1.3770675476676358e-06, "loss": 0.2706, "step": 22300 }, { "epoch": 0.7653054221002059, "grad_norm": 0.7684264380123744, "learning_rate": 1.3766845636771909e-06, "loss": 0.2734, "step": 22301 }, { "epoch": 0.7653397391901167, "grad_norm": 0.8433066163680109, "learning_rate": 1.3763016244485804e-06, "loss": 0.2612, "step": 22302 }, { "epoch": 0.7653740562800274, "grad_norm": 0.7297050819061847, "learning_rate": 1.3759187299865423e-06, "loss": 0.2381, "step": 22303 }, { "epoch": 0.7654083733699383, "grad_norm": 0.794364371856865, "learning_rate": 1.3755358802958014e-06, "loss": 0.2489, "step": 22304 }, { "epoch": 0.765442690459849, "grad_norm": 0.7187407156562682, "learning_rate": 1.3751530753810883e-06, "loss": 0.244, "step": 22305 }, { "epoch": 0.7654770075497598, "grad_norm": 0.8092846339843173, "learning_rate": 1.374770315247133e-06, "loss": 0.2327, "step": 22306 }, { "epoch": 0.7655113246396705, "grad_norm": 0.7883493184622675, "learning_rate": 1.3743875998986644e-06, "loss": 0.2623, "step": 22307 }, { "epoch": 0.7655456417295813, "grad_norm": 0.8050410445563645, "learning_rate": 1.3740049293404095e-06, "loss": 0.2653, "step": 22308 }, { "epoch": 0.7655799588194921, "grad_norm": 0.7930705984945886, "learning_rate": 1.3736223035770962e-06, "loss": 0.2299, "step": 22309 }, { "epoch": 0.7656142759094029, "grad_norm": 0.6466809688960525, "learning_rate": 1.3732397226134532e-06, "loss": 0.2618, "step": 22310 }, { "epoch": 0.7656485929993136, "grad_norm": 0.8730175827898431, "learning_rate": 1.3728571864542017e-06, "loss": 0.3404, "step": 22311 }, { "epoch": 0.7656829100892244, "grad_norm": 0.8673855012494428, "learning_rate": 1.372474695104074e-06, "loss": 0.3371, "step": 22312 }, { "epoch": 0.7657172271791353, "grad_norm": 0.7776929526471641, "learning_rate": 1.3720922485677901e-06, "loss": 0.2669, "step": 22313 }, { "epoch": 0.765751544269046, "grad_norm": 0.8003575407838845, "learning_rate": 1.3717098468500773e-06, "loss": 0.2327, "step": 22314 }, { "epoch": 0.7657858613589568, "grad_norm": 0.8013198993679513, "learning_rate": 1.3713274899556594e-06, "loss": 0.2587, "step": 22315 }, { "epoch": 0.7658201784488675, "grad_norm": 0.7996739485932647, "learning_rate": 1.370945177889259e-06, "loss": 0.2817, "step": 22316 }, { "epoch": 0.7658544955387783, "grad_norm": 0.8642354021766893, "learning_rate": 1.3705629106556007e-06, "loss": 0.2776, "step": 22317 }, { "epoch": 0.7658888126286891, "grad_norm": 0.8024932081139176, "learning_rate": 1.3701806882594054e-06, "loss": 0.2433, "step": 22318 }, { "epoch": 0.7659231297185999, "grad_norm": 0.8743379550558271, "learning_rate": 1.3697985107053962e-06, "loss": 0.2177, "step": 22319 }, { "epoch": 0.7659574468085106, "grad_norm": 0.8502365251553757, "learning_rate": 1.369416377998294e-06, "loss": 0.2952, "step": 22320 }, { "epoch": 0.7659917638984214, "grad_norm": 0.8170270810982635, "learning_rate": 1.369034290142821e-06, "loss": 0.2844, "step": 22321 }, { "epoch": 0.7660260809883321, "grad_norm": 0.7791011343796242, "learning_rate": 1.368652247143693e-06, "loss": 0.2047, "step": 22322 }, { "epoch": 0.766060398078243, "grad_norm": 0.8317379133089389, "learning_rate": 1.3682702490056337e-06, "loss": 0.3243, "step": 22323 }, { "epoch": 0.7660947151681538, "grad_norm": 0.788966349613, "learning_rate": 1.367888295733364e-06, "loss": 0.2327, "step": 22324 }, { "epoch": 0.7661290322580645, "grad_norm": 1.048028430029871, "learning_rate": 1.3675063873315958e-06, "loss": 0.2326, "step": 22325 }, { "epoch": 0.7661633493479753, "grad_norm": 0.7930668948319137, "learning_rate": 1.3671245238050545e-06, "loss": 0.2835, "step": 22326 }, { "epoch": 0.7661976664378861, "grad_norm": 0.8064973727133795, "learning_rate": 1.3667427051584525e-06, "loss": 0.3176, "step": 22327 }, { "epoch": 0.7662319835277969, "grad_norm": 0.784495812405717, "learning_rate": 1.3663609313965086e-06, "loss": 0.2654, "step": 22328 }, { "epoch": 0.7662663006177076, "grad_norm": 0.6923141410525123, "learning_rate": 1.3659792025239389e-06, "loss": 0.2632, "step": 22329 }, { "epoch": 0.7663006177076184, "grad_norm": 0.7736081107678636, "learning_rate": 1.365597518545459e-06, "loss": 0.2544, "step": 22330 }, { "epoch": 0.7663349347975291, "grad_norm": 0.8159193112933751, "learning_rate": 1.3652158794657849e-06, "loss": 0.2762, "step": 22331 }, { "epoch": 0.76636925188744, "grad_norm": 0.82835570965042, "learning_rate": 1.3648342852896306e-06, "loss": 0.3005, "step": 22332 }, { "epoch": 0.7664035689773507, "grad_norm": 0.710289276665302, "learning_rate": 1.3644527360217125e-06, "loss": 0.2647, "step": 22333 }, { "epoch": 0.7664378860672615, "grad_norm": 0.7482810480126948, "learning_rate": 1.3640712316667387e-06, "loss": 0.26, "step": 22334 }, { "epoch": 0.7664722031571722, "grad_norm": 0.7491012381189736, "learning_rate": 1.3636897722294285e-06, "loss": 0.2494, "step": 22335 }, { "epoch": 0.7665065202470831, "grad_norm": 0.787054156988385, "learning_rate": 1.3633083577144906e-06, "loss": 0.287, "step": 22336 }, { "epoch": 0.7665408373369939, "grad_norm": 0.6719603119653091, "learning_rate": 1.3629269881266377e-06, "loss": 0.2672, "step": 22337 }, { "epoch": 0.7665751544269046, "grad_norm": 0.6950492385154386, "learning_rate": 1.3625456634705819e-06, "loss": 0.2744, "step": 22338 }, { "epoch": 0.7666094715168154, "grad_norm": 0.7427085291634772, "learning_rate": 1.3621643837510333e-06, "loss": 0.2767, "step": 22339 }, { "epoch": 0.7666437886067261, "grad_norm": 0.8760275985208956, "learning_rate": 1.3617831489727025e-06, "loss": 0.2535, "step": 22340 }, { "epoch": 0.766678105696637, "grad_norm": 0.8568475725706781, "learning_rate": 1.3614019591402987e-06, "loss": 0.3423, "step": 22341 }, { "epoch": 0.7667124227865477, "grad_norm": 0.9095180766774504, "learning_rate": 1.3610208142585336e-06, "loss": 0.286, "step": 22342 }, { "epoch": 0.7667467398764585, "grad_norm": 0.6837033885916428, "learning_rate": 1.3606397143321104e-06, "loss": 0.2554, "step": 22343 }, { "epoch": 0.7667810569663692, "grad_norm": 0.8058803463636917, "learning_rate": 1.3602586593657425e-06, "loss": 0.2412, "step": 22344 }, { "epoch": 0.76681537405628, "grad_norm": 0.7543658719338743, "learning_rate": 1.3598776493641359e-06, "loss": 0.2492, "step": 22345 }, { "epoch": 0.7668496911461908, "grad_norm": 0.7496120494320598, "learning_rate": 1.3594966843319945e-06, "loss": 0.3067, "step": 22346 }, { "epoch": 0.7668840082361016, "grad_norm": 0.7223370637431354, "learning_rate": 1.3591157642740298e-06, "loss": 0.2832, "step": 22347 }, { "epoch": 0.7669183253260123, "grad_norm": 0.90434428189876, "learning_rate": 1.3587348891949443e-06, "loss": 0.2916, "step": 22348 }, { "epoch": 0.7669526424159231, "grad_norm": 0.963190606566563, "learning_rate": 1.3583540590994432e-06, "loss": 0.271, "step": 22349 }, { "epoch": 0.766986959505834, "grad_norm": 0.6974432666810729, "learning_rate": 1.3579732739922319e-06, "loss": 0.2459, "step": 22350 }, { "epoch": 0.7670212765957447, "grad_norm": 0.8015570705841726, "learning_rate": 1.357592533878015e-06, "loss": 0.2624, "step": 22351 }, { "epoch": 0.7670555936856555, "grad_norm": 0.7572541762281331, "learning_rate": 1.3572118387614958e-06, "loss": 0.2405, "step": 22352 }, { "epoch": 0.7670899107755662, "grad_norm": 0.8195910958387106, "learning_rate": 1.3568311886473768e-06, "loss": 0.2672, "step": 22353 }, { "epoch": 0.767124227865477, "grad_norm": 0.7441339216601253, "learning_rate": 1.3564505835403613e-06, "loss": 0.2462, "step": 22354 }, { "epoch": 0.7671585449553878, "grad_norm": 0.7515835612841006, "learning_rate": 1.3560700234451508e-06, "loss": 0.2802, "step": 22355 }, { "epoch": 0.7671928620452986, "grad_norm": 0.775857594265195, "learning_rate": 1.3556895083664479e-06, "loss": 0.2499, "step": 22356 }, { "epoch": 0.7672271791352093, "grad_norm": 0.7424854359490612, "learning_rate": 1.3553090383089495e-06, "loss": 0.243, "step": 22357 }, { "epoch": 0.7672614962251201, "grad_norm": 2.3921133263129852, "learning_rate": 1.354928613277362e-06, "loss": 0.2466, "step": 22358 }, { "epoch": 0.7672958133150309, "grad_norm": 0.7154936572240604, "learning_rate": 1.35454823327638e-06, "loss": 0.2036, "step": 22359 }, { "epoch": 0.7673301304049417, "grad_norm": 0.7477521351750271, "learning_rate": 1.3541678983107043e-06, "loss": 0.2053, "step": 22360 }, { "epoch": 0.7673644474948524, "grad_norm": 0.7709889282416126, "learning_rate": 1.3537876083850337e-06, "loss": 0.2556, "step": 22361 }, { "epoch": 0.7673987645847632, "grad_norm": 0.8573848772567043, "learning_rate": 1.3534073635040657e-06, "loss": 0.2829, "step": 22362 }, { "epoch": 0.767433081674674, "grad_norm": 0.8769927967157026, "learning_rate": 1.3530271636724984e-06, "loss": 0.3409, "step": 22363 }, { "epoch": 0.7674673987645848, "grad_norm": 0.7544539577261412, "learning_rate": 1.3526470088950288e-06, "loss": 0.2573, "step": 22364 }, { "epoch": 0.7675017158544956, "grad_norm": 0.8417655728902049, "learning_rate": 1.3522668991763544e-06, "loss": 0.2257, "step": 22365 }, { "epoch": 0.7675360329444063, "grad_norm": 0.8001834074205026, "learning_rate": 1.3518868345211667e-06, "loss": 0.2469, "step": 22366 }, { "epoch": 0.7675703500343171, "grad_norm": 0.7533539017403018, "learning_rate": 1.3515068149341654e-06, "loss": 0.2462, "step": 22367 }, { "epoch": 0.7676046671242278, "grad_norm": 0.7529634652602173, "learning_rate": 1.3511268404200451e-06, "loss": 0.2975, "step": 22368 }, { "epoch": 0.7676389842141387, "grad_norm": 0.7702759478441683, "learning_rate": 1.3507469109834952e-06, "loss": 0.2563, "step": 22369 }, { "epoch": 0.7676733013040494, "grad_norm": 0.7777665119542173, "learning_rate": 1.3503670266292164e-06, "loss": 0.2608, "step": 22370 }, { "epoch": 0.7677076183939602, "grad_norm": 0.7561723284617733, "learning_rate": 1.3499871873618952e-06, "loss": 0.2607, "step": 22371 }, { "epoch": 0.7677419354838709, "grad_norm": 0.8054532849418373, "learning_rate": 1.349607393186228e-06, "loss": 0.2196, "step": 22372 }, { "epoch": 0.7677762525737818, "grad_norm": 0.7527050514548704, "learning_rate": 1.3492276441069047e-06, "loss": 0.2871, "step": 22373 }, { "epoch": 0.7678105696636925, "grad_norm": 0.7547725447441341, "learning_rate": 1.348847940128618e-06, "loss": 0.265, "step": 22374 }, { "epoch": 0.7678448867536033, "grad_norm": 0.8377345822086004, "learning_rate": 1.348468281256058e-06, "loss": 0.3264, "step": 22375 }, { "epoch": 0.767879203843514, "grad_norm": 0.8878598067724218, "learning_rate": 1.3480886674939148e-06, "loss": 0.2424, "step": 22376 }, { "epoch": 0.7679135209334248, "grad_norm": 0.7035032175335235, "learning_rate": 1.3477090988468798e-06, "loss": 0.2387, "step": 22377 }, { "epoch": 0.7679478380233357, "grad_norm": 0.8080639254550175, "learning_rate": 1.3473295753196385e-06, "loss": 0.2566, "step": 22378 }, { "epoch": 0.7679821551132464, "grad_norm": 0.7521820209567661, "learning_rate": 1.3469500969168842e-06, "loss": 0.2339, "step": 22379 }, { "epoch": 0.7680164722031572, "grad_norm": 0.8884458811095457, "learning_rate": 1.3465706636433013e-06, "loss": 0.2681, "step": 22380 }, { "epoch": 0.7680507892930679, "grad_norm": 0.877238744204078, "learning_rate": 1.3461912755035777e-06, "loss": 0.2747, "step": 22381 }, { "epoch": 0.7680851063829788, "grad_norm": 0.8104552534609445, "learning_rate": 1.3458119325024017e-06, "loss": 0.3027, "step": 22382 }, { "epoch": 0.7681194234728895, "grad_norm": 0.7408430741241974, "learning_rate": 1.345432634644458e-06, "loss": 0.2598, "step": 22383 }, { "epoch": 0.7681537405628003, "grad_norm": 0.8374814562064212, "learning_rate": 1.345053381934434e-06, "loss": 0.246, "step": 22384 }, { "epoch": 0.768188057652711, "grad_norm": 0.7289253130593779, "learning_rate": 1.3446741743770143e-06, "loss": 0.2444, "step": 22385 }, { "epoch": 0.7682223747426218, "grad_norm": 0.7989047283016882, "learning_rate": 1.3442950119768827e-06, "loss": 0.2968, "step": 22386 }, { "epoch": 0.7682566918325326, "grad_norm": 0.8051836794759437, "learning_rate": 1.3439158947387248e-06, "loss": 0.2565, "step": 22387 }, { "epoch": 0.7682910089224434, "grad_norm": 0.7627301887211034, "learning_rate": 1.3435368226672247e-06, "loss": 0.2367, "step": 22388 }, { "epoch": 0.7683253260123541, "grad_norm": 0.9085411860756449, "learning_rate": 1.343157795767061e-06, "loss": 0.2719, "step": 22389 }, { "epoch": 0.7683596431022649, "grad_norm": 0.803673743514158, "learning_rate": 1.342778814042921e-06, "loss": 0.2566, "step": 22390 }, { "epoch": 0.7683939601921757, "grad_norm": 0.7659689783888667, "learning_rate": 1.3423998774994862e-06, "loss": 0.2447, "step": 22391 }, { "epoch": 0.7684282772820865, "grad_norm": 0.7196613844028128, "learning_rate": 1.3420209861414335e-06, "loss": 0.243, "step": 22392 }, { "epoch": 0.7684625943719973, "grad_norm": 0.7233322548915304, "learning_rate": 1.3416421399734504e-06, "loss": 0.2224, "step": 22393 }, { "epoch": 0.768496911461908, "grad_norm": 0.7430165691190833, "learning_rate": 1.3412633390002112e-06, "loss": 0.2683, "step": 22394 }, { "epoch": 0.7685312285518188, "grad_norm": 0.7853037845207795, "learning_rate": 1.3408845832263977e-06, "loss": 0.2795, "step": 22395 }, { "epoch": 0.7685655456417296, "grad_norm": 0.8943005742347798, "learning_rate": 1.3405058726566894e-06, "loss": 0.3589, "step": 22396 }, { "epoch": 0.7685998627316404, "grad_norm": 0.7730035119969516, "learning_rate": 1.3401272072957644e-06, "loss": 0.2449, "step": 22397 }, { "epoch": 0.7686341798215511, "grad_norm": 0.7487537805504101, "learning_rate": 1.3397485871483007e-06, "loss": 0.2651, "step": 22398 }, { "epoch": 0.7686684969114619, "grad_norm": 0.8432197175126801, "learning_rate": 1.3393700122189761e-06, "loss": 0.2619, "step": 22399 }, { "epoch": 0.7687028140013726, "grad_norm": 0.7517219823129505, "learning_rate": 1.3389914825124683e-06, "loss": 0.2546, "step": 22400 }, { "epoch": 0.7687371310912835, "grad_norm": 0.8209308553507142, "learning_rate": 1.3386129980334495e-06, "loss": 0.2454, "step": 22401 }, { "epoch": 0.7687714481811943, "grad_norm": 0.7861044837804033, "learning_rate": 1.3382345587866013e-06, "loss": 0.2863, "step": 22402 }, { "epoch": 0.768805765271105, "grad_norm": 0.736032859226102, "learning_rate": 1.3378561647765943e-06, "loss": 0.301, "step": 22403 }, { "epoch": 0.7688400823610158, "grad_norm": 0.7327041840574702, "learning_rate": 1.3374778160081047e-06, "loss": 0.2711, "step": 22404 }, { "epoch": 0.7688743994509266, "grad_norm": 0.6942436212784563, "learning_rate": 1.337099512485807e-06, "loss": 0.2338, "step": 22405 }, { "epoch": 0.7689087165408374, "grad_norm": 0.7973929287725171, "learning_rate": 1.3367212542143744e-06, "loss": 0.2224, "step": 22406 }, { "epoch": 0.7689430336307481, "grad_norm": 0.7747781999787402, "learning_rate": 1.336343041198479e-06, "loss": 0.2633, "step": 22407 }, { "epoch": 0.7689773507206589, "grad_norm": 0.7737052194722325, "learning_rate": 1.3359648734427943e-06, "loss": 0.2385, "step": 22408 }, { "epoch": 0.7690116678105696, "grad_norm": 0.821002336907205, "learning_rate": 1.3355867509519932e-06, "loss": 0.2854, "step": 22409 }, { "epoch": 0.7690459849004805, "grad_norm": 0.7981914744219704, "learning_rate": 1.335208673730743e-06, "loss": 0.2836, "step": 22410 }, { "epoch": 0.7690803019903912, "grad_norm": 0.7547088713814034, "learning_rate": 1.3348306417837176e-06, "loss": 0.2335, "step": 22411 }, { "epoch": 0.769114619080302, "grad_norm": 0.8513561898975652, "learning_rate": 1.3344526551155867e-06, "loss": 0.2671, "step": 22412 }, { "epoch": 0.7691489361702127, "grad_norm": 0.8578637017542325, "learning_rate": 1.33407471373102e-06, "loss": 0.2772, "step": 22413 }, { "epoch": 0.7691832532601235, "grad_norm": 0.7339530089703069, "learning_rate": 1.3336968176346871e-06, "loss": 0.2604, "step": 22414 }, { "epoch": 0.7692175703500344, "grad_norm": 0.7853155038476965, "learning_rate": 1.333318966831253e-06, "loss": 0.2906, "step": 22415 }, { "epoch": 0.7692518874399451, "grad_norm": 0.7083673114120125, "learning_rate": 1.3329411613253912e-06, "loss": 0.2745, "step": 22416 }, { "epoch": 0.7692862045298559, "grad_norm": 0.732169066149319, "learning_rate": 1.3325634011217642e-06, "loss": 0.2433, "step": 22417 }, { "epoch": 0.7693205216197666, "grad_norm": 0.8053236668094299, "learning_rate": 1.3321856862250404e-06, "loss": 0.2824, "step": 22418 }, { "epoch": 0.7693548387096775, "grad_norm": 0.7324774185985817, "learning_rate": 1.3318080166398861e-06, "loss": 0.2226, "step": 22419 }, { "epoch": 0.7693891557995882, "grad_norm": 0.7307337943588278, "learning_rate": 1.3314303923709676e-06, "loss": 0.2525, "step": 22420 }, { "epoch": 0.769423472889499, "grad_norm": 0.6980429497694074, "learning_rate": 1.3310528134229488e-06, "loss": 0.2047, "step": 22421 }, { "epoch": 0.7694577899794097, "grad_norm": 0.8262558508698515, "learning_rate": 1.3306752798004957e-06, "loss": 0.2527, "step": 22422 }, { "epoch": 0.7694921070693205, "grad_norm": 0.8667588455937065, "learning_rate": 1.3302977915082726e-06, "loss": 0.3359, "step": 22423 }, { "epoch": 0.7695264241592313, "grad_norm": 0.7573727355992497, "learning_rate": 1.3299203485509387e-06, "loss": 0.2415, "step": 22424 }, { "epoch": 0.7695607412491421, "grad_norm": 0.7472202107662809, "learning_rate": 1.3295429509331632e-06, "loss": 0.2037, "step": 22425 }, { "epoch": 0.7695950583390528, "grad_norm": 0.7711756455108717, "learning_rate": 1.3291655986596037e-06, "loss": 0.2319, "step": 22426 }, { "epoch": 0.7696293754289636, "grad_norm": 0.8172438099154076, "learning_rate": 1.3287882917349231e-06, "loss": 0.2601, "step": 22427 }, { "epoch": 0.7696636925188745, "grad_norm": 0.7342438120722944, "learning_rate": 1.3284110301637836e-06, "loss": 0.2479, "step": 22428 }, { "epoch": 0.7696980096087852, "grad_norm": 0.8522907117058497, "learning_rate": 1.3280338139508446e-06, "loss": 0.3312, "step": 22429 }, { "epoch": 0.769732326698696, "grad_norm": 0.8608017068220601, "learning_rate": 1.327656643100767e-06, "loss": 0.2368, "step": 22430 }, { "epoch": 0.7697666437886067, "grad_norm": 0.7636399619685911, "learning_rate": 1.32727951761821e-06, "loss": 0.2889, "step": 22431 }, { "epoch": 0.7698009608785175, "grad_norm": 0.8008008250837693, "learning_rate": 1.3269024375078344e-06, "loss": 0.2867, "step": 22432 }, { "epoch": 0.7698352779684283, "grad_norm": 0.8565789170722927, "learning_rate": 1.3265254027742929e-06, "loss": 0.2694, "step": 22433 }, { "epoch": 0.7698695950583391, "grad_norm": 0.6904162343623401, "learning_rate": 1.3261484134222492e-06, "loss": 0.2027, "step": 22434 }, { "epoch": 0.7699039121482498, "grad_norm": 0.7411672509461891, "learning_rate": 1.32577146945636e-06, "loss": 0.2735, "step": 22435 }, { "epoch": 0.7699382292381606, "grad_norm": 0.7847640054446546, "learning_rate": 1.3253945708812777e-06, "loss": 0.2919, "step": 22436 }, { "epoch": 0.7699725463280713, "grad_norm": 0.7187288228053517, "learning_rate": 1.3250177177016643e-06, "loss": 0.3058, "step": 22437 }, { "epoch": 0.7700068634179822, "grad_norm": 0.7726680027664778, "learning_rate": 1.3246409099221707e-06, "loss": 0.2997, "step": 22438 }, { "epoch": 0.7700411805078929, "grad_norm": 0.7036864073807535, "learning_rate": 1.3242641475474537e-06, "loss": 0.2283, "step": 22439 }, { "epoch": 0.7700754975978037, "grad_norm": 0.6561309955796476, "learning_rate": 1.3238874305821682e-06, "loss": 0.2177, "step": 22440 }, { "epoch": 0.7701098146877144, "grad_norm": 0.7896172137290195, "learning_rate": 1.3235107590309665e-06, "loss": 0.2808, "step": 22441 }, { "epoch": 0.7701441317776253, "grad_norm": 0.7567038468097889, "learning_rate": 1.3231341328985038e-06, "loss": 0.2239, "step": 22442 }, { "epoch": 0.7701784488675361, "grad_norm": 0.7977937450940904, "learning_rate": 1.322757552189432e-06, "loss": 0.2692, "step": 22443 }, { "epoch": 0.7702127659574468, "grad_norm": 0.822609448648907, "learning_rate": 1.3223810169084028e-06, "loss": 0.2267, "step": 22444 }, { "epoch": 0.7702470830473576, "grad_norm": 0.7496045885908185, "learning_rate": 1.322004527060069e-06, "loss": 0.2702, "step": 22445 }, { "epoch": 0.7702814001372683, "grad_norm": 0.7570978190656237, "learning_rate": 1.3216280826490824e-06, "loss": 0.2344, "step": 22446 }, { "epoch": 0.7703157172271792, "grad_norm": 0.8183690221997801, "learning_rate": 1.3212516836800899e-06, "loss": 0.2212, "step": 22447 }, { "epoch": 0.7703500343170899, "grad_norm": 0.7599922449815288, "learning_rate": 1.320875330157746e-06, "loss": 0.2807, "step": 22448 }, { "epoch": 0.7703843514070007, "grad_norm": 0.7416165409352101, "learning_rate": 1.320499022086697e-06, "loss": 0.3083, "step": 22449 }, { "epoch": 0.7704186684969114, "grad_norm": 0.7719949323882193, "learning_rate": 1.320122759471591e-06, "loss": 0.2654, "step": 22450 }, { "epoch": 0.7704529855868223, "grad_norm": 0.8856571886959349, "learning_rate": 1.3197465423170807e-06, "loss": 0.2394, "step": 22451 }, { "epoch": 0.770487302676733, "grad_norm": 0.7852474836408595, "learning_rate": 1.31937037062781e-06, "loss": 0.2859, "step": 22452 }, { "epoch": 0.7705216197666438, "grad_norm": 0.8598563044491615, "learning_rate": 1.3189942444084269e-06, "loss": 0.2491, "step": 22453 }, { "epoch": 0.7705559368565545, "grad_norm": 0.8682137818108222, "learning_rate": 1.3186181636635786e-06, "loss": 0.2629, "step": 22454 }, { "epoch": 0.7705902539464653, "grad_norm": 0.7135956431872991, "learning_rate": 1.3182421283979107e-06, "loss": 0.2339, "step": 22455 }, { "epoch": 0.7706245710363762, "grad_norm": 0.7390567331296585, "learning_rate": 1.3178661386160686e-06, "loss": 0.234, "step": 22456 }, { "epoch": 0.7706588881262869, "grad_norm": 0.7674038969674402, "learning_rate": 1.3174901943226976e-06, "loss": 0.3128, "step": 22457 }, { "epoch": 0.7706932052161977, "grad_norm": 0.7537560603615192, "learning_rate": 1.317114295522443e-06, "loss": 0.2346, "step": 22458 }, { "epoch": 0.7707275223061084, "grad_norm": 0.9337811114026707, "learning_rate": 1.3167384422199447e-06, "loss": 0.2485, "step": 22459 }, { "epoch": 0.7707618393960192, "grad_norm": 0.7861259724441637, "learning_rate": 1.3163626344198522e-06, "loss": 0.2046, "step": 22460 }, { "epoch": 0.77079615648593, "grad_norm": 0.7990846472488213, "learning_rate": 1.3159868721268027e-06, "loss": 0.2767, "step": 22461 }, { "epoch": 0.7708304735758408, "grad_norm": 0.8633176049642713, "learning_rate": 1.3156111553454398e-06, "loss": 0.2802, "step": 22462 }, { "epoch": 0.7708647906657515, "grad_norm": 0.7312862684651855, "learning_rate": 1.315235484080406e-06, "loss": 0.2975, "step": 22463 }, { "epoch": 0.7708991077556623, "grad_norm": 0.8803702410346232, "learning_rate": 1.314859858336342e-06, "loss": 0.2507, "step": 22464 }, { "epoch": 0.7709334248455731, "grad_norm": 0.7809788814671869, "learning_rate": 1.3144842781178873e-06, "loss": 0.2336, "step": 22465 }, { "epoch": 0.7709677419354839, "grad_norm": 0.7980925335905658, "learning_rate": 1.3141087434296828e-06, "loss": 0.2702, "step": 22466 }, { "epoch": 0.7710020590253946, "grad_norm": 0.7528740844299919, "learning_rate": 1.3137332542763692e-06, "loss": 0.2648, "step": 22467 }, { "epoch": 0.7710363761153054, "grad_norm": 0.8741465061009728, "learning_rate": 1.31335781066258e-06, "loss": 0.2452, "step": 22468 }, { "epoch": 0.7710706932052162, "grad_norm": 0.7833263836811054, "learning_rate": 1.3129824125929602e-06, "loss": 0.256, "step": 22469 }, { "epoch": 0.771105010295127, "grad_norm": 0.7981589072189021, "learning_rate": 1.3126070600721424e-06, "loss": 0.2728, "step": 22470 }, { "epoch": 0.7711393273850378, "grad_norm": 0.7626501685116601, "learning_rate": 1.3122317531047657e-06, "loss": 0.2996, "step": 22471 }, { "epoch": 0.7711736444749485, "grad_norm": 0.7303450096202828, "learning_rate": 1.3118564916954656e-06, "loss": 0.3001, "step": 22472 }, { "epoch": 0.7712079615648593, "grad_norm": 0.8444961052247759, "learning_rate": 1.3114812758488786e-06, "loss": 0.2309, "step": 22473 }, { "epoch": 0.7712422786547701, "grad_norm": 0.7866349701101283, "learning_rate": 1.3111061055696405e-06, "loss": 0.2459, "step": 22474 }, { "epoch": 0.7712765957446809, "grad_norm": 0.7160166460826729, "learning_rate": 1.3107309808623854e-06, "loss": 0.2682, "step": 22475 }, { "epoch": 0.7713109128345916, "grad_norm": 0.7779737602554251, "learning_rate": 1.3103559017317475e-06, "loss": 0.3076, "step": 22476 }, { "epoch": 0.7713452299245024, "grad_norm": 0.8531497231631208, "learning_rate": 1.3099808681823611e-06, "loss": 0.2378, "step": 22477 }, { "epoch": 0.7713795470144131, "grad_norm": 0.7115423482829435, "learning_rate": 1.3096058802188588e-06, "loss": 0.2224, "step": 22478 }, { "epoch": 0.771413864104324, "grad_norm": 0.8088935754516685, "learning_rate": 1.309230937845873e-06, "loss": 0.2741, "step": 22479 }, { "epoch": 0.7714481811942348, "grad_norm": 0.7049033048110974, "learning_rate": 1.3088560410680362e-06, "loss": 0.2609, "step": 22480 }, { "epoch": 0.7714824982841455, "grad_norm": 0.8324842752342694, "learning_rate": 1.3084811898899812e-06, "loss": 0.3214, "step": 22481 }, { "epoch": 0.7715168153740563, "grad_norm": 0.7067560853484351, "learning_rate": 1.3081063843163339e-06, "loss": 0.2787, "step": 22482 }, { "epoch": 0.771551132463967, "grad_norm": 0.8101905087464, "learning_rate": 1.3077316243517312e-06, "loss": 0.2759, "step": 22483 }, { "epoch": 0.7715854495538779, "grad_norm": 0.7443492276063641, "learning_rate": 1.3073569100007976e-06, "loss": 0.2306, "step": 22484 }, { "epoch": 0.7716197666437886, "grad_norm": 0.8062444538204961, "learning_rate": 1.306982241268165e-06, "loss": 0.3055, "step": 22485 }, { "epoch": 0.7716540837336994, "grad_norm": 0.8094009082292871, "learning_rate": 1.3066076181584602e-06, "loss": 0.2989, "step": 22486 }, { "epoch": 0.7716884008236101, "grad_norm": 0.7370425182078706, "learning_rate": 1.3062330406763125e-06, "loss": 0.2743, "step": 22487 }, { "epoch": 0.771722717913521, "grad_norm": 0.7094537566895274, "learning_rate": 1.3058585088263492e-06, "loss": 0.2413, "step": 22488 }, { "epoch": 0.7717570350034317, "grad_norm": 0.8695865843241953, "learning_rate": 1.3054840226131971e-06, "loss": 0.3279, "step": 22489 }, { "epoch": 0.7717913520933425, "grad_norm": 0.8218884001838325, "learning_rate": 1.305109582041484e-06, "loss": 0.2567, "step": 22490 }, { "epoch": 0.7718256691832532, "grad_norm": 0.8970992135964976, "learning_rate": 1.304735187115831e-06, "loss": 0.2854, "step": 22491 }, { "epoch": 0.771859986273164, "grad_norm": 0.9063685571551142, "learning_rate": 1.3043608378408696e-06, "loss": 0.2679, "step": 22492 }, { "epoch": 0.7718943033630749, "grad_norm": 0.7725366156667779, "learning_rate": 1.3039865342212198e-06, "loss": 0.2684, "step": 22493 }, { "epoch": 0.7719286204529856, "grad_norm": 0.7757717348984887, "learning_rate": 1.3036122762615061e-06, "loss": 0.3097, "step": 22494 }, { "epoch": 0.7719629375428964, "grad_norm": 0.7871089736113362, "learning_rate": 1.3032380639663561e-06, "loss": 0.2815, "step": 22495 }, { "epoch": 0.7719972546328071, "grad_norm": 0.7664826430674936, "learning_rate": 1.3028638973403879e-06, "loss": 0.2509, "step": 22496 }, { "epoch": 0.772031571722718, "grad_norm": 0.7408425597255699, "learning_rate": 1.3024897763882261e-06, "loss": 0.2125, "step": 22497 }, { "epoch": 0.7720658888126287, "grad_norm": 0.8168054677123846, "learning_rate": 1.3021157011144918e-06, "loss": 0.3029, "step": 22498 }, { "epoch": 0.7721002059025395, "grad_norm": 0.7715063485646019, "learning_rate": 1.3017416715238084e-06, "loss": 0.2308, "step": 22499 }, { "epoch": 0.7721345229924502, "grad_norm": 0.7508837938715517, "learning_rate": 1.3013676876207925e-06, "loss": 0.282, "step": 22500 }, { "epoch": 0.772168840082361, "grad_norm": 0.8051735027905095, "learning_rate": 1.3009937494100677e-06, "loss": 0.3103, "step": 22501 }, { "epoch": 0.7722031571722718, "grad_norm": 0.7922665597081342, "learning_rate": 1.3006198568962535e-06, "loss": 0.2925, "step": 22502 }, { "epoch": 0.7722374742621826, "grad_norm": 0.6912006537793406, "learning_rate": 1.3002460100839658e-06, "loss": 0.2712, "step": 22503 }, { "epoch": 0.7722717913520933, "grad_norm": 0.7487363466176413, "learning_rate": 1.2998722089778276e-06, "loss": 0.2384, "step": 22504 }, { "epoch": 0.7723061084420041, "grad_norm": 0.7975930584602479, "learning_rate": 1.299498453582453e-06, "loss": 0.252, "step": 22505 }, { "epoch": 0.7723404255319148, "grad_norm": 0.7771946681794861, "learning_rate": 1.2991247439024607e-06, "loss": 0.2934, "step": 22506 }, { "epoch": 0.7723747426218257, "grad_norm": 0.8327632247245692, "learning_rate": 1.2987510799424674e-06, "loss": 0.3059, "step": 22507 }, { "epoch": 0.7724090597117365, "grad_norm": 0.7624583189532627, "learning_rate": 1.2983774617070887e-06, "loss": 0.2353, "step": 22508 }, { "epoch": 0.7724433768016472, "grad_norm": 0.8825420467044308, "learning_rate": 1.2980038892009418e-06, "loss": 0.2928, "step": 22509 }, { "epoch": 0.772477693891558, "grad_norm": 0.7630329229020761, "learning_rate": 1.2976303624286396e-06, "loss": 0.22, "step": 22510 }, { "epoch": 0.7725120109814688, "grad_norm": 0.8970833541428819, "learning_rate": 1.2972568813947983e-06, "loss": 0.2631, "step": 22511 }, { "epoch": 0.7725463280713796, "grad_norm": 0.7731587556525296, "learning_rate": 1.2968834461040314e-06, "loss": 0.2301, "step": 22512 }, { "epoch": 0.7725806451612903, "grad_norm": 1.199877981933662, "learning_rate": 1.2965100565609533e-06, "loss": 0.2482, "step": 22513 }, { "epoch": 0.7726149622512011, "grad_norm": 0.80447468758661, "learning_rate": 1.296136712770173e-06, "loss": 0.2413, "step": 22514 }, { "epoch": 0.7726492793411118, "grad_norm": 0.770566509889498, "learning_rate": 1.295763414736308e-06, "loss": 0.2765, "step": 22515 }, { "epoch": 0.7726835964310227, "grad_norm": 0.859887060857384, "learning_rate": 1.295390162463966e-06, "loss": 0.2483, "step": 22516 }, { "epoch": 0.7727179135209334, "grad_norm": 0.7680000197244298, "learning_rate": 1.295016955957758e-06, "loss": 0.2298, "step": 22517 }, { "epoch": 0.7727522306108442, "grad_norm": 0.7495508470261892, "learning_rate": 1.2946437952222985e-06, "loss": 0.2466, "step": 22518 }, { "epoch": 0.7727865477007549, "grad_norm": 0.7429201171952731, "learning_rate": 1.2942706802621935e-06, "loss": 0.2439, "step": 22519 }, { "epoch": 0.7728208647906658, "grad_norm": 0.7987284264599277, "learning_rate": 1.293897611082054e-06, "loss": 0.2193, "step": 22520 }, { "epoch": 0.7728551818805766, "grad_norm": 0.6876537172862146, "learning_rate": 1.2935245876864884e-06, "loss": 0.242, "step": 22521 }, { "epoch": 0.7728894989704873, "grad_norm": 0.7530241900772661, "learning_rate": 1.2931516100801056e-06, "loss": 0.2457, "step": 22522 }, { "epoch": 0.7729238160603981, "grad_norm": 0.7890128012518435, "learning_rate": 1.2927786782675122e-06, "loss": 0.2518, "step": 22523 }, { "epoch": 0.7729581331503088, "grad_norm": 0.7745375048629444, "learning_rate": 1.2924057922533162e-06, "loss": 0.2706, "step": 22524 }, { "epoch": 0.7729924502402197, "grad_norm": 0.7622834879357059, "learning_rate": 1.2920329520421259e-06, "loss": 0.2763, "step": 22525 }, { "epoch": 0.7730267673301304, "grad_norm": 0.7829024829253219, "learning_rate": 1.2916601576385424e-06, "loss": 0.2499, "step": 22526 }, { "epoch": 0.7730610844200412, "grad_norm": 0.7537376870720436, "learning_rate": 1.2912874090471765e-06, "loss": 0.3331, "step": 22527 }, { "epoch": 0.7730954015099519, "grad_norm": 0.8623026109323926, "learning_rate": 1.2909147062726302e-06, "loss": 0.2715, "step": 22528 }, { "epoch": 0.7731297185998627, "grad_norm": 0.8397059717942533, "learning_rate": 1.2905420493195075e-06, "loss": 0.2438, "step": 22529 }, { "epoch": 0.7731640356897735, "grad_norm": 0.7439555967105312, "learning_rate": 1.2901694381924135e-06, "loss": 0.2413, "step": 22530 }, { "epoch": 0.7731983527796843, "grad_norm": 0.6757543529606713, "learning_rate": 1.2897968728959504e-06, "loss": 0.2415, "step": 22531 }, { "epoch": 0.773232669869595, "grad_norm": 0.9259981194461248, "learning_rate": 1.2894243534347217e-06, "loss": 0.2969, "step": 22532 }, { "epoch": 0.7732669869595058, "grad_norm": 0.7623673014914526, "learning_rate": 1.2890518798133283e-06, "loss": 0.2189, "step": 22533 }, { "epoch": 0.7733013040494167, "grad_norm": 0.7989585935606216, "learning_rate": 1.2886794520363748e-06, "loss": 0.2253, "step": 22534 }, { "epoch": 0.7733356211393274, "grad_norm": 0.7986126544031232, "learning_rate": 1.288307070108456e-06, "loss": 0.2582, "step": 22535 }, { "epoch": 0.7733699382292382, "grad_norm": 0.8281490494828085, "learning_rate": 1.287934734034179e-06, "loss": 0.3104, "step": 22536 }, { "epoch": 0.7734042553191489, "grad_norm": 0.7638410283634837, "learning_rate": 1.2875624438181394e-06, "loss": 0.2179, "step": 22537 }, { "epoch": 0.7734385724090597, "grad_norm": 0.7320145784741455, "learning_rate": 1.287190199464937e-06, "loss": 0.2684, "step": 22538 }, { "epoch": 0.7734728894989705, "grad_norm": 0.7180880866938987, "learning_rate": 1.286818000979172e-06, "loss": 0.228, "step": 22539 }, { "epoch": 0.7735072065888813, "grad_norm": 0.7729856635000627, "learning_rate": 1.286445848365439e-06, "loss": 0.2451, "step": 22540 }, { "epoch": 0.773541523678792, "grad_norm": 0.8042764204072895, "learning_rate": 1.2860737416283415e-06, "loss": 0.2762, "step": 22541 }, { "epoch": 0.7735758407687028, "grad_norm": 0.8027392736782704, "learning_rate": 1.285701680772471e-06, "loss": 0.2954, "step": 22542 }, { "epoch": 0.7736101578586135, "grad_norm": 0.7778964408782204, "learning_rate": 1.2853296658024256e-06, "loss": 0.2585, "step": 22543 }, { "epoch": 0.7736444749485244, "grad_norm": 0.7661713254558106, "learning_rate": 1.2849576967228016e-06, "loss": 0.2393, "step": 22544 }, { "epoch": 0.7736787920384351, "grad_norm": 0.885394932397416, "learning_rate": 1.2845857735381938e-06, "loss": 0.276, "step": 22545 }, { "epoch": 0.7737131091283459, "grad_norm": 0.7532368932309107, "learning_rate": 1.2842138962531976e-06, "loss": 0.2357, "step": 22546 }, { "epoch": 0.7737474262182567, "grad_norm": 0.7498460573817949, "learning_rate": 1.2838420648724058e-06, "loss": 0.2619, "step": 22547 }, { "epoch": 0.7737817433081675, "grad_norm": 0.7083140466215672, "learning_rate": 1.2834702794004144e-06, "loss": 0.2607, "step": 22548 }, { "epoch": 0.7738160603980783, "grad_norm": 0.7570828650241596, "learning_rate": 1.2830985398418116e-06, "loss": 0.2282, "step": 22549 }, { "epoch": 0.773850377487989, "grad_norm": 0.7392879921882592, "learning_rate": 1.282726846201196e-06, "loss": 0.2732, "step": 22550 }, { "epoch": 0.7738846945778998, "grad_norm": 0.7776217086096435, "learning_rate": 1.2823551984831545e-06, "loss": 0.3026, "step": 22551 }, { "epoch": 0.7739190116678105, "grad_norm": 0.7629859767573001, "learning_rate": 1.2819835966922806e-06, "loss": 0.2431, "step": 22552 }, { "epoch": 0.7739533287577214, "grad_norm": 0.8143478999359054, "learning_rate": 1.2816120408331645e-06, "loss": 0.2636, "step": 22553 }, { "epoch": 0.7739876458476321, "grad_norm": 0.9174135046565364, "learning_rate": 1.2812405309103958e-06, "loss": 0.2952, "step": 22554 }, { "epoch": 0.7740219629375429, "grad_norm": 0.8240524495089294, "learning_rate": 1.2808690669285657e-06, "loss": 0.3023, "step": 22555 }, { "epoch": 0.7740562800274536, "grad_norm": 0.8009655752935224, "learning_rate": 1.2804976488922615e-06, "loss": 0.2482, "step": 22556 }, { "epoch": 0.7740905971173645, "grad_norm": 1.1112080966885107, "learning_rate": 1.280126276806074e-06, "loss": 0.2405, "step": 22557 }, { "epoch": 0.7741249142072752, "grad_norm": 0.7824129524861135, "learning_rate": 1.2797549506745866e-06, "loss": 0.2383, "step": 22558 }, { "epoch": 0.774159231297186, "grad_norm": 0.7951878349837043, "learning_rate": 1.279383670502392e-06, "loss": 0.2294, "step": 22559 }, { "epoch": 0.7741935483870968, "grad_norm": 0.7391163652978404, "learning_rate": 1.279012436294073e-06, "loss": 0.2616, "step": 22560 }, { "epoch": 0.7742278654770075, "grad_norm": 0.9133541717596044, "learning_rate": 1.2786412480542154e-06, "loss": 0.259, "step": 22561 }, { "epoch": 0.7742621825669184, "grad_norm": 0.6945524391432771, "learning_rate": 1.2782701057874096e-06, "loss": 0.2361, "step": 22562 }, { "epoch": 0.7742964996568291, "grad_norm": 0.7887874335415812, "learning_rate": 1.2778990094982364e-06, "loss": 0.2398, "step": 22563 }, { "epoch": 0.7743308167467399, "grad_norm": 0.771241799761764, "learning_rate": 1.277527959191281e-06, "loss": 0.2867, "step": 22564 }, { "epoch": 0.7743651338366506, "grad_norm": 0.8160776994157364, "learning_rate": 1.2771569548711276e-06, "loss": 0.3113, "step": 22565 }, { "epoch": 0.7743994509265614, "grad_norm": 0.7716779151579811, "learning_rate": 1.27678599654236e-06, "loss": 0.2657, "step": 22566 }, { "epoch": 0.7744337680164722, "grad_norm": 0.8562353258822112, "learning_rate": 1.2764150842095607e-06, "loss": 0.2594, "step": 22567 }, { "epoch": 0.774468085106383, "grad_norm": 0.7418648477279004, "learning_rate": 1.2760442178773113e-06, "loss": 0.2404, "step": 22568 }, { "epoch": 0.7745024021962937, "grad_norm": 0.8005546703605212, "learning_rate": 1.275673397550194e-06, "loss": 0.2774, "step": 22569 }, { "epoch": 0.7745367192862045, "grad_norm": 0.8763263408822124, "learning_rate": 1.2753026232327898e-06, "loss": 0.2574, "step": 22570 }, { "epoch": 0.7745710363761154, "grad_norm": 0.6654884732499565, "learning_rate": 1.2749318949296806e-06, "loss": 0.2341, "step": 22571 }, { "epoch": 0.7746053534660261, "grad_norm": 0.7940652145117161, "learning_rate": 1.2745612126454416e-06, "loss": 0.2808, "step": 22572 }, { "epoch": 0.7746396705559369, "grad_norm": 0.834607551449913, "learning_rate": 1.2741905763846585e-06, "loss": 0.2753, "step": 22573 }, { "epoch": 0.7746739876458476, "grad_norm": 0.7840506533894317, "learning_rate": 1.2738199861519057e-06, "loss": 0.2584, "step": 22574 }, { "epoch": 0.7747083047357584, "grad_norm": 0.8795153530227947, "learning_rate": 1.2734494419517623e-06, "loss": 0.2599, "step": 22575 }, { "epoch": 0.7747426218256692, "grad_norm": 0.7841033292804839, "learning_rate": 1.273078943788807e-06, "loss": 0.2497, "step": 22576 }, { "epoch": 0.77477693891558, "grad_norm": 0.7436616095639633, "learning_rate": 1.2727084916676152e-06, "loss": 0.2601, "step": 22577 }, { "epoch": 0.7748112560054907, "grad_norm": 0.8825547929287444, "learning_rate": 1.2723380855927654e-06, "loss": 0.2349, "step": 22578 }, { "epoch": 0.7748455730954015, "grad_norm": 0.8209296395290092, "learning_rate": 1.2719677255688317e-06, "loss": 0.2741, "step": 22579 }, { "epoch": 0.7748798901853123, "grad_norm": 0.7387795459738294, "learning_rate": 1.271597411600392e-06, "loss": 0.2508, "step": 22580 }, { "epoch": 0.7749142072752231, "grad_norm": 0.7465175758274966, "learning_rate": 1.2712271436920165e-06, "loss": 0.3109, "step": 22581 }, { "epoch": 0.7749485243651338, "grad_norm": 0.7531773386820912, "learning_rate": 1.2708569218482852e-06, "loss": 0.2552, "step": 22582 }, { "epoch": 0.7749828414550446, "grad_norm": 0.8920781424226518, "learning_rate": 1.2704867460737674e-06, "loss": 0.2456, "step": 22583 }, { "epoch": 0.7750171585449553, "grad_norm": 0.7257966024347651, "learning_rate": 1.2701166163730365e-06, "loss": 0.2829, "step": 22584 }, { "epoch": 0.7750514756348662, "grad_norm": 0.8089090019270948, "learning_rate": 1.2697465327506687e-06, "loss": 0.2402, "step": 22585 }, { "epoch": 0.775085792724777, "grad_norm": 0.7340833957338546, "learning_rate": 1.269376495211232e-06, "loss": 0.2709, "step": 22586 }, { "epoch": 0.7751201098146877, "grad_norm": 0.7686041535152253, "learning_rate": 1.2690065037592996e-06, "loss": 0.2483, "step": 22587 }, { "epoch": 0.7751544269045985, "grad_norm": 0.741453125168047, "learning_rate": 1.2686365583994415e-06, "loss": 0.2302, "step": 22588 }, { "epoch": 0.7751887439945092, "grad_norm": 0.8455348006148148, "learning_rate": 1.2682666591362286e-06, "loss": 0.281, "step": 22589 }, { "epoch": 0.7752230610844201, "grad_norm": 0.8231590440325643, "learning_rate": 1.2678968059742303e-06, "loss": 0.328, "step": 22590 }, { "epoch": 0.7752573781743308, "grad_norm": 0.8946321019298403, "learning_rate": 1.2675269989180156e-06, "loss": 0.2625, "step": 22591 }, { "epoch": 0.7752916952642416, "grad_norm": 0.7904934498393503, "learning_rate": 1.2671572379721547e-06, "loss": 0.2821, "step": 22592 }, { "epoch": 0.7753260123541523, "grad_norm": 0.8746363501584783, "learning_rate": 1.2667875231412112e-06, "loss": 0.283, "step": 22593 }, { "epoch": 0.7753603294440632, "grad_norm": 0.8189659212632224, "learning_rate": 1.2664178544297585e-06, "loss": 0.2462, "step": 22594 }, { "epoch": 0.7753946465339739, "grad_norm": 0.720579737757817, "learning_rate": 1.266048231842359e-06, "loss": 0.2944, "step": 22595 }, { "epoch": 0.7754289636238847, "grad_norm": 0.796614534843375, "learning_rate": 1.26567865538358e-06, "loss": 0.2178, "step": 22596 }, { "epoch": 0.7754632807137954, "grad_norm": 0.8461770547758241, "learning_rate": 1.2653091250579868e-06, "loss": 0.2638, "step": 22597 }, { "epoch": 0.7754975978037062, "grad_norm": 0.9079471348335765, "learning_rate": 1.264939640870146e-06, "loss": 0.3048, "step": 22598 }, { "epoch": 0.7755319148936171, "grad_norm": 0.7494807172688066, "learning_rate": 1.2645702028246205e-06, "loss": 0.2093, "step": 22599 }, { "epoch": 0.7755662319835278, "grad_norm": 0.9868225389448491, "learning_rate": 1.2642008109259752e-06, "loss": 0.2654, "step": 22600 }, { "epoch": 0.7756005490734386, "grad_norm": 0.6829028235861738, "learning_rate": 1.2638314651787737e-06, "loss": 0.2223, "step": 22601 }, { "epoch": 0.7756348661633493, "grad_norm": 0.7873376160058521, "learning_rate": 1.2634621655875784e-06, "loss": 0.2909, "step": 22602 }, { "epoch": 0.7756691832532602, "grad_norm": 0.8272795905381868, "learning_rate": 1.263092912156953e-06, "loss": 0.273, "step": 22603 }, { "epoch": 0.7757035003431709, "grad_norm": 0.75193461803004, "learning_rate": 1.2627237048914554e-06, "loss": 0.2762, "step": 22604 }, { "epoch": 0.7757378174330817, "grad_norm": 0.8330838519051573, "learning_rate": 1.2623545437956502e-06, "loss": 0.2311, "step": 22605 }, { "epoch": 0.7757721345229924, "grad_norm": 0.851712457700923, "learning_rate": 1.2619854288740984e-06, "loss": 0.2291, "step": 22606 }, { "epoch": 0.7758064516129032, "grad_norm": 0.798954810330301, "learning_rate": 1.2616163601313557e-06, "loss": 0.2561, "step": 22607 }, { "epoch": 0.775840768702814, "grad_norm": 0.7582687448778681, "learning_rate": 1.2612473375719876e-06, "loss": 0.2379, "step": 22608 }, { "epoch": 0.7758750857927248, "grad_norm": 0.7902663158623355, "learning_rate": 1.2608783612005476e-06, "loss": 0.2817, "step": 22609 }, { "epoch": 0.7759094028826355, "grad_norm": 0.7988865554352925, "learning_rate": 1.2605094310215964e-06, "loss": 0.2539, "step": 22610 }, { "epoch": 0.7759437199725463, "grad_norm": 0.8462078062954965, "learning_rate": 1.260140547039691e-06, "loss": 0.3195, "step": 22611 }, { "epoch": 0.775978037062457, "grad_norm": 0.6860063994145298, "learning_rate": 1.2597717092593892e-06, "loss": 0.2683, "step": 22612 }, { "epoch": 0.7760123541523679, "grad_norm": 0.7716475085041441, "learning_rate": 1.2594029176852474e-06, "loss": 0.2633, "step": 22613 }, { "epoch": 0.7760466712422787, "grad_norm": 0.8220050919435763, "learning_rate": 1.2590341723218213e-06, "loss": 0.2865, "step": 22614 }, { "epoch": 0.7760809883321894, "grad_norm": 0.83125125903268, "learning_rate": 1.2586654731736681e-06, "loss": 0.2604, "step": 22615 }, { "epoch": 0.7761153054221002, "grad_norm": 0.8482543309499343, "learning_rate": 1.2582968202453378e-06, "loss": 0.2619, "step": 22616 }, { "epoch": 0.776149622512011, "grad_norm": 0.810331670474948, "learning_rate": 1.2579282135413907e-06, "loss": 0.3056, "step": 22617 }, { "epoch": 0.7761839396019218, "grad_norm": 0.8441425433735571, "learning_rate": 1.2575596530663764e-06, "loss": 0.2672, "step": 22618 }, { "epoch": 0.7762182566918325, "grad_norm": 0.8427026755021952, "learning_rate": 1.2571911388248492e-06, "loss": 0.23, "step": 22619 }, { "epoch": 0.7762525737817433, "grad_norm": 0.8166438725001016, "learning_rate": 1.2568226708213622e-06, "loss": 0.3031, "step": 22620 }, { "epoch": 0.776286890871654, "grad_norm": 0.8193972297461292, "learning_rate": 1.2564542490604664e-06, "loss": 0.3082, "step": 22621 }, { "epoch": 0.7763212079615649, "grad_norm": 0.7713162368136601, "learning_rate": 1.256085873546714e-06, "loss": 0.2884, "step": 22622 }, { "epoch": 0.7763555250514756, "grad_norm": 0.8113614878550189, "learning_rate": 1.2557175442846558e-06, "loss": 0.2704, "step": 22623 }, { "epoch": 0.7763898421413864, "grad_norm": 0.7898570407022815, "learning_rate": 1.2553492612788436e-06, "loss": 0.2578, "step": 22624 }, { "epoch": 0.7764241592312972, "grad_norm": 0.7353486875465463, "learning_rate": 1.2549810245338222e-06, "loss": 0.2383, "step": 22625 }, { "epoch": 0.776458476321208, "grad_norm": 0.9198817730442587, "learning_rate": 1.254612834054147e-06, "loss": 0.2675, "step": 22626 }, { "epoch": 0.7764927934111188, "grad_norm": 0.8779219116746294, "learning_rate": 1.2542446898443616e-06, "loss": 0.2666, "step": 22627 }, { "epoch": 0.7765271105010295, "grad_norm": 0.775350937004628, "learning_rate": 1.253876591909015e-06, "loss": 0.2695, "step": 22628 }, { "epoch": 0.7765614275909403, "grad_norm": 0.7573729628553355, "learning_rate": 1.2535085402526587e-06, "loss": 0.2741, "step": 22629 }, { "epoch": 0.776595744680851, "grad_norm": 0.7561662117410588, "learning_rate": 1.2531405348798343e-06, "loss": 0.2433, "step": 22630 }, { "epoch": 0.7766300617707619, "grad_norm": 0.8095222455166987, "learning_rate": 1.2527725757950904e-06, "loss": 0.2791, "step": 22631 }, { "epoch": 0.7766643788606726, "grad_norm": 0.7542670648903104, "learning_rate": 1.252404663002973e-06, "loss": 0.2434, "step": 22632 }, { "epoch": 0.7766986959505834, "grad_norm": 0.6634359095402808, "learning_rate": 1.252036796508026e-06, "loss": 0.2417, "step": 22633 }, { "epoch": 0.7767330130404941, "grad_norm": 0.934520645613593, "learning_rate": 1.2516689763147948e-06, "loss": 0.2572, "step": 22634 }, { "epoch": 0.7767673301304049, "grad_norm": 0.7058838685995888, "learning_rate": 1.2513012024278237e-06, "loss": 0.2412, "step": 22635 }, { "epoch": 0.7768016472203157, "grad_norm": 0.7397945151615462, "learning_rate": 1.2509334748516555e-06, "loss": 0.2237, "step": 22636 }, { "epoch": 0.7768359643102265, "grad_norm": 0.7577832228330413, "learning_rate": 1.2505657935908328e-06, "loss": 0.2447, "step": 22637 }, { "epoch": 0.7768702814001373, "grad_norm": 0.7935286033508958, "learning_rate": 1.2501981586499002e-06, "loss": 0.2467, "step": 22638 }, { "epoch": 0.776904598490048, "grad_norm": 0.7336506467692312, "learning_rate": 1.2498305700333945e-06, "loss": 0.2463, "step": 22639 }, { "epoch": 0.7769389155799589, "grad_norm": 0.9304591194647757, "learning_rate": 1.249463027745863e-06, "loss": 0.261, "step": 22640 }, { "epoch": 0.7769732326698696, "grad_norm": 0.8509430702918243, "learning_rate": 1.2490955317918414e-06, "loss": 0.2796, "step": 22641 }, { "epoch": 0.7770075497597804, "grad_norm": 0.7966107632414905, "learning_rate": 1.2487280821758714e-06, "loss": 0.2518, "step": 22642 }, { "epoch": 0.7770418668496911, "grad_norm": 0.7476014583984554, "learning_rate": 1.248360678902492e-06, "loss": 0.2964, "step": 22643 }, { "epoch": 0.7770761839396019, "grad_norm": 0.8726263799823925, "learning_rate": 1.247993321976243e-06, "loss": 0.2495, "step": 22644 }, { "epoch": 0.7771105010295127, "grad_norm": 0.9390146447371411, "learning_rate": 1.247626011401662e-06, "loss": 0.2499, "step": 22645 }, { "epoch": 0.7771448181194235, "grad_norm": 0.6841979572214747, "learning_rate": 1.2472587471832864e-06, "loss": 0.2574, "step": 22646 }, { "epoch": 0.7771791352093342, "grad_norm": 0.7965394225387267, "learning_rate": 1.2468915293256555e-06, "loss": 0.2498, "step": 22647 }, { "epoch": 0.777213452299245, "grad_norm": 0.7915721292482357, "learning_rate": 1.2465243578333008e-06, "loss": 0.2963, "step": 22648 }, { "epoch": 0.7772477693891559, "grad_norm": 0.8685028748530043, "learning_rate": 1.2461572327107645e-06, "loss": 0.3106, "step": 22649 }, { "epoch": 0.7772820864790666, "grad_norm": 0.8098572943788672, "learning_rate": 1.2457901539625784e-06, "loss": 0.2627, "step": 22650 }, { "epoch": 0.7773164035689774, "grad_norm": 0.8264920973649788, "learning_rate": 1.2454231215932755e-06, "loss": 0.2871, "step": 22651 }, { "epoch": 0.7773507206588881, "grad_norm": 0.7339900743118833, "learning_rate": 1.2450561356073959e-06, "loss": 0.2527, "step": 22652 }, { "epoch": 0.7773850377487989, "grad_norm": 0.7230850679517126, "learning_rate": 1.244689196009468e-06, "loss": 0.2642, "step": 22653 }, { "epoch": 0.7774193548387097, "grad_norm": 0.8198363707059333, "learning_rate": 1.2443223028040275e-06, "loss": 0.2966, "step": 22654 }, { "epoch": 0.7774536719286205, "grad_norm": 0.8518350874898907, "learning_rate": 1.2439554559956057e-06, "loss": 0.2728, "step": 22655 }, { "epoch": 0.7774879890185312, "grad_norm": 0.6967860469971897, "learning_rate": 1.2435886555887356e-06, "loss": 0.2513, "step": 22656 }, { "epoch": 0.777522306108442, "grad_norm": 0.807497550195791, "learning_rate": 1.2432219015879477e-06, "loss": 0.2619, "step": 22657 }, { "epoch": 0.7775566231983527, "grad_norm": 0.7360001271560723, "learning_rate": 1.2428551939977735e-06, "loss": 0.2252, "step": 22658 }, { "epoch": 0.7775909402882636, "grad_norm": 0.7507798377852667, "learning_rate": 1.2424885328227444e-06, "loss": 0.2144, "step": 22659 }, { "epoch": 0.7776252573781743, "grad_norm": 0.728010494497691, "learning_rate": 1.2421219180673855e-06, "loss": 0.2221, "step": 22660 }, { "epoch": 0.7776595744680851, "grad_norm": 0.83963950731228, "learning_rate": 1.2417553497362322e-06, "loss": 0.3249, "step": 22661 }, { "epoch": 0.7776938915579958, "grad_norm": 0.7576347210680615, "learning_rate": 1.2413888278338081e-06, "loss": 0.2736, "step": 22662 }, { "epoch": 0.7777282086479067, "grad_norm": 0.7573466632327266, "learning_rate": 1.241022352364643e-06, "loss": 0.2598, "step": 22663 }, { "epoch": 0.7777625257378175, "grad_norm": 0.7719810427658119, "learning_rate": 1.2406559233332648e-06, "loss": 0.2616, "step": 22664 }, { "epoch": 0.7777968428277282, "grad_norm": 0.7712358816602969, "learning_rate": 1.2402895407441989e-06, "loss": 0.2836, "step": 22665 }, { "epoch": 0.777831159917639, "grad_norm": 0.7679821373687732, "learning_rate": 1.2399232046019726e-06, "loss": 0.2329, "step": 22666 }, { "epoch": 0.7778654770075497, "grad_norm": 0.8577917719537032, "learning_rate": 1.2395569149111114e-06, "loss": 0.3156, "step": 22667 }, { "epoch": 0.7778997940974606, "grad_norm": 0.7926544079874782, "learning_rate": 1.2391906716761403e-06, "loss": 0.2445, "step": 22668 }, { "epoch": 0.7779341111873713, "grad_norm": 0.7310132498667715, "learning_rate": 1.2388244749015837e-06, "loss": 0.2548, "step": 22669 }, { "epoch": 0.7779684282772821, "grad_norm": 0.9219731661081274, "learning_rate": 1.2384583245919669e-06, "loss": 0.2894, "step": 22670 }, { "epoch": 0.7780027453671928, "grad_norm": 0.718096570083295, "learning_rate": 1.2380922207518093e-06, "loss": 0.2592, "step": 22671 }, { "epoch": 0.7780370624571037, "grad_norm": 0.8274117701753921, "learning_rate": 1.2377261633856375e-06, "loss": 0.2483, "step": 22672 }, { "epoch": 0.7780713795470144, "grad_norm": 0.7991769644556612, "learning_rate": 1.2373601524979746e-06, "loss": 0.3122, "step": 22673 }, { "epoch": 0.7781056966369252, "grad_norm": 0.7547397991015639, "learning_rate": 1.2369941880933367e-06, "loss": 0.3175, "step": 22674 }, { "epoch": 0.7781400137268359, "grad_norm": 0.7349344836608133, "learning_rate": 1.2366282701762517e-06, "loss": 0.2845, "step": 22675 }, { "epoch": 0.7781743308167467, "grad_norm": 0.7940169152522264, "learning_rate": 1.2362623987512351e-06, "loss": 0.3117, "step": 22676 }, { "epoch": 0.7782086479066576, "grad_norm": 0.7947524593064687, "learning_rate": 1.2358965738228079e-06, "loss": 0.2575, "step": 22677 }, { "epoch": 0.7782429649965683, "grad_norm": 0.765198073399478, "learning_rate": 1.235530795395491e-06, "loss": 0.2417, "step": 22678 }, { "epoch": 0.7782772820864791, "grad_norm": 0.7684446701659292, "learning_rate": 1.2351650634738017e-06, "loss": 0.2928, "step": 22679 }, { "epoch": 0.7783115991763898, "grad_norm": 0.9143001866413878, "learning_rate": 1.2347993780622585e-06, "loss": 0.2842, "step": 22680 }, { "epoch": 0.7783459162663006, "grad_norm": 0.8472183009820076, "learning_rate": 1.2344337391653792e-06, "loss": 0.2457, "step": 22681 }, { "epoch": 0.7783802333562114, "grad_norm": 0.7494550043500032, "learning_rate": 1.234068146787683e-06, "loss": 0.2363, "step": 22682 }, { "epoch": 0.7784145504461222, "grad_norm": 0.8441349789310602, "learning_rate": 1.2337026009336804e-06, "loss": 0.2495, "step": 22683 }, { "epoch": 0.7784488675360329, "grad_norm": 0.6747919860631144, "learning_rate": 1.2333371016078943e-06, "loss": 0.2196, "step": 22684 }, { "epoch": 0.7784831846259437, "grad_norm": 0.7010954991084061, "learning_rate": 1.2329716488148357e-06, "loss": 0.2201, "step": 22685 }, { "epoch": 0.7785175017158545, "grad_norm": 0.7232185766836512, "learning_rate": 1.2326062425590202e-06, "loss": 0.2811, "step": 22686 }, { "epoch": 0.7785518188057653, "grad_norm": 0.6780661855714463, "learning_rate": 1.2322408828449623e-06, "loss": 0.2187, "step": 22687 }, { "epoch": 0.778586135895676, "grad_norm": 0.7402760524681012, "learning_rate": 1.2318755696771751e-06, "loss": 0.2382, "step": 22688 }, { "epoch": 0.7786204529855868, "grad_norm": 0.772375275015623, "learning_rate": 1.2315103030601721e-06, "loss": 0.2417, "step": 22689 }, { "epoch": 0.7786547700754975, "grad_norm": 0.7283234483013931, "learning_rate": 1.2311450829984661e-06, "loss": 0.2434, "step": 22690 }, { "epoch": 0.7786890871654084, "grad_norm": 0.7321343315283074, "learning_rate": 1.23077990949657e-06, "loss": 0.2609, "step": 22691 }, { "epoch": 0.7787234042553192, "grad_norm": 0.8045655432237291, "learning_rate": 1.2304147825589907e-06, "loss": 0.2165, "step": 22692 }, { "epoch": 0.7787577213452299, "grad_norm": 0.7785473425345637, "learning_rate": 1.2300497021902442e-06, "loss": 0.3067, "step": 22693 }, { "epoch": 0.7787920384351407, "grad_norm": 0.7745803714885128, "learning_rate": 1.2296846683948371e-06, "loss": 0.236, "step": 22694 }, { "epoch": 0.7788263555250515, "grad_norm": 0.7434485058842242, "learning_rate": 1.2293196811772789e-06, "loss": 0.2266, "step": 22695 }, { "epoch": 0.7788606726149623, "grad_norm": 0.794865161070182, "learning_rate": 1.2289547405420826e-06, "loss": 0.2525, "step": 22696 }, { "epoch": 0.778894989704873, "grad_norm": 0.7192626181299392, "learning_rate": 1.2285898464937512e-06, "loss": 0.2466, "step": 22697 }, { "epoch": 0.7789293067947838, "grad_norm": 0.7619835320440905, "learning_rate": 1.2282249990367978e-06, "loss": 0.2808, "step": 22698 }, { "epoch": 0.7789636238846945, "grad_norm": 0.7166989338383618, "learning_rate": 1.227860198175726e-06, "loss": 0.2339, "step": 22699 }, { "epoch": 0.7789979409746054, "grad_norm": 0.9089062173008596, "learning_rate": 1.2274954439150433e-06, "loss": 0.3116, "step": 22700 }, { "epoch": 0.7790322580645161, "grad_norm": 0.8628113534084717, "learning_rate": 1.227130736259256e-06, "loss": 0.2778, "step": 22701 }, { "epoch": 0.7790665751544269, "grad_norm": 0.756108833161734, "learning_rate": 1.22676607521287e-06, "loss": 0.2379, "step": 22702 }, { "epoch": 0.7791008922443377, "grad_norm": 0.7816983023857706, "learning_rate": 1.2264014607803897e-06, "loss": 0.2462, "step": 22703 }, { "epoch": 0.7791352093342484, "grad_norm": 0.7709354925913294, "learning_rate": 1.2260368929663202e-06, "loss": 0.2859, "step": 22704 }, { "epoch": 0.7791695264241593, "grad_norm": 0.7727693732270541, "learning_rate": 1.2256723717751656e-06, "loss": 0.3129, "step": 22705 }, { "epoch": 0.77920384351407, "grad_norm": 0.8064136641474557, "learning_rate": 1.2253078972114263e-06, "loss": 0.3148, "step": 22706 }, { "epoch": 0.7792381606039808, "grad_norm": 0.7541145669092241, "learning_rate": 1.2249434692796098e-06, "loss": 0.2745, "step": 22707 }, { "epoch": 0.7792724776938915, "grad_norm": 0.746097984195074, "learning_rate": 1.2245790879842135e-06, "loss": 0.2328, "step": 22708 }, { "epoch": 0.7793067947838024, "grad_norm": 0.7857299325462965, "learning_rate": 1.2242147533297411e-06, "loss": 0.2416, "step": 22709 }, { "epoch": 0.7793411118737131, "grad_norm": 0.756579708997338, "learning_rate": 1.2238504653206935e-06, "loss": 0.2813, "step": 22710 }, { "epoch": 0.7793754289636239, "grad_norm": 0.7532309054379742, "learning_rate": 1.2234862239615701e-06, "loss": 0.2341, "step": 22711 }, { "epoch": 0.7794097460535346, "grad_norm": 0.8137435651703179, "learning_rate": 1.2231220292568724e-06, "loss": 0.2623, "step": 22712 }, { "epoch": 0.7794440631434454, "grad_norm": 0.7110435140513283, "learning_rate": 1.222757881211098e-06, "loss": 0.2476, "step": 22713 }, { "epoch": 0.7794783802333562, "grad_norm": 0.7896567552312588, "learning_rate": 1.2223937798287478e-06, "loss": 0.2344, "step": 22714 }, { "epoch": 0.779512697323267, "grad_norm": 0.7527087272275566, "learning_rate": 1.2220297251143154e-06, "loss": 0.2557, "step": 22715 }, { "epoch": 0.7795470144131778, "grad_norm": 0.860079609800985, "learning_rate": 1.2216657170723022e-06, "loss": 0.2827, "step": 22716 }, { "epoch": 0.7795813315030885, "grad_norm": 0.9024385781700033, "learning_rate": 1.2213017557072054e-06, "loss": 0.2663, "step": 22717 }, { "epoch": 0.7796156485929994, "grad_norm": 0.8147610769888687, "learning_rate": 1.2209378410235172e-06, "loss": 0.2365, "step": 22718 }, { "epoch": 0.7796499656829101, "grad_norm": 0.8089956452798673, "learning_rate": 1.2205739730257394e-06, "loss": 0.2402, "step": 22719 }, { "epoch": 0.7796842827728209, "grad_norm": 0.7956076395346195, "learning_rate": 1.2202101517183617e-06, "loss": 0.2767, "step": 22720 }, { "epoch": 0.7797185998627316, "grad_norm": 0.7686888374521011, "learning_rate": 1.2198463771058805e-06, "loss": 0.2602, "step": 22721 }, { "epoch": 0.7797529169526424, "grad_norm": 1.72397174930323, "learning_rate": 1.2194826491927908e-06, "loss": 0.2617, "step": 22722 }, { "epoch": 0.7797872340425532, "grad_norm": 0.8719110946379548, "learning_rate": 1.2191189679835846e-06, "loss": 0.2578, "step": 22723 }, { "epoch": 0.779821551132464, "grad_norm": 0.7467039548801531, "learning_rate": 1.2187553334827562e-06, "loss": 0.2486, "step": 22724 }, { "epoch": 0.7798558682223747, "grad_norm": 0.751882585591169, "learning_rate": 1.2183917456947964e-06, "loss": 0.3005, "step": 22725 }, { "epoch": 0.7798901853122855, "grad_norm": 0.8756984172516815, "learning_rate": 1.2180282046241981e-06, "loss": 0.2385, "step": 22726 }, { "epoch": 0.7799245024021962, "grad_norm": 0.7799395688290762, "learning_rate": 1.2176647102754518e-06, "loss": 0.2401, "step": 22727 }, { "epoch": 0.7799588194921071, "grad_norm": 0.7330088221045382, "learning_rate": 1.21730126265305e-06, "loss": 0.2775, "step": 22728 }, { "epoch": 0.7799931365820179, "grad_norm": 0.7734196802768188, "learning_rate": 1.216937861761478e-06, "loss": 0.263, "step": 22729 }, { "epoch": 0.7800274536719286, "grad_norm": 0.8295835674697016, "learning_rate": 1.2165745076052305e-06, "loss": 0.233, "step": 22730 }, { "epoch": 0.7800617707618394, "grad_norm": 0.7529335274232408, "learning_rate": 1.2162112001887927e-06, "loss": 0.2484, "step": 22731 }, { "epoch": 0.7800960878517502, "grad_norm": 0.7998075354276347, "learning_rate": 1.2158479395166545e-06, "loss": 0.2828, "step": 22732 }, { "epoch": 0.780130404941661, "grad_norm": 0.8788139198072497, "learning_rate": 1.2154847255933028e-06, "loss": 0.2808, "step": 22733 }, { "epoch": 0.7801647220315717, "grad_norm": 0.7844553422937236, "learning_rate": 1.2151215584232245e-06, "loss": 0.2858, "step": 22734 }, { "epoch": 0.7801990391214825, "grad_norm": 0.7381249796414986, "learning_rate": 1.2147584380109072e-06, "loss": 0.2342, "step": 22735 }, { "epoch": 0.7802333562113932, "grad_norm": 0.7504891818571783, "learning_rate": 1.214395364360836e-06, "loss": 0.2714, "step": 22736 }, { "epoch": 0.7802676733013041, "grad_norm": 0.8349326149978713, "learning_rate": 1.2140323374774981e-06, "loss": 0.2646, "step": 22737 }, { "epoch": 0.7803019903912148, "grad_norm": 0.7505381198941949, "learning_rate": 1.2136693573653734e-06, "loss": 0.2761, "step": 22738 }, { "epoch": 0.7803363074811256, "grad_norm": 0.8359739673865767, "learning_rate": 1.213306424028951e-06, "loss": 0.2746, "step": 22739 }, { "epoch": 0.7803706245710363, "grad_norm": 0.7927129697901347, "learning_rate": 1.2129435374727144e-06, "loss": 0.2452, "step": 22740 }, { "epoch": 0.7804049416609472, "grad_norm": 0.7883128902230232, "learning_rate": 1.2125806977011423e-06, "loss": 0.3318, "step": 22741 }, { "epoch": 0.780439258750858, "grad_norm": 0.8483030833289336, "learning_rate": 1.2122179047187227e-06, "loss": 0.2715, "step": 22742 }, { "epoch": 0.7804735758407687, "grad_norm": 0.7628607405992845, "learning_rate": 1.2118551585299338e-06, "loss": 0.2981, "step": 22743 }, { "epoch": 0.7805078929306795, "grad_norm": 0.8608208880861639, "learning_rate": 1.2114924591392573e-06, "loss": 0.2901, "step": 22744 }, { "epoch": 0.7805422100205902, "grad_norm": 0.7086703306365573, "learning_rate": 1.2111298065511746e-06, "loss": 0.2677, "step": 22745 }, { "epoch": 0.7805765271105011, "grad_norm": 0.8111094477521943, "learning_rate": 1.210767200770166e-06, "loss": 0.2255, "step": 22746 }, { "epoch": 0.7806108442004118, "grad_norm": 0.7740174662375123, "learning_rate": 1.2104046418007103e-06, "loss": 0.302, "step": 22747 }, { "epoch": 0.7806451612903226, "grad_norm": 0.7964757243275308, "learning_rate": 1.2100421296472874e-06, "loss": 0.2892, "step": 22748 }, { "epoch": 0.7806794783802333, "grad_norm": 0.8243529414545492, "learning_rate": 1.2096796643143765e-06, "loss": 0.2687, "step": 22749 }, { "epoch": 0.7807137954701441, "grad_norm": 0.7795924635971802, "learning_rate": 1.2093172458064518e-06, "loss": 0.2669, "step": 22750 }, { "epoch": 0.7807481125600549, "grad_norm": 0.7753618143313233, "learning_rate": 1.2089548741279956e-06, "loss": 0.3038, "step": 22751 }, { "epoch": 0.7807824296499657, "grad_norm": 0.7608338152359115, "learning_rate": 1.208592549283481e-06, "loss": 0.2483, "step": 22752 }, { "epoch": 0.7808167467398764, "grad_norm": 0.7697204891254565, "learning_rate": 1.2082302712773846e-06, "loss": 0.2339, "step": 22753 }, { "epoch": 0.7808510638297872, "grad_norm": 0.7982187630717223, "learning_rate": 1.2078680401141824e-06, "loss": 0.2398, "step": 22754 }, { "epoch": 0.7808853809196981, "grad_norm": 0.8099375286551108, "learning_rate": 1.2075058557983493e-06, "loss": 0.33, "step": 22755 }, { "epoch": 0.7809196980096088, "grad_norm": 0.75432126532303, "learning_rate": 1.2071437183343604e-06, "loss": 0.2773, "step": 22756 }, { "epoch": 0.7809540150995196, "grad_norm": 0.9134008985587293, "learning_rate": 1.206781627726688e-06, "loss": 0.2617, "step": 22757 }, { "epoch": 0.7809883321894303, "grad_norm": 0.7893443658868161, "learning_rate": 1.2064195839798064e-06, "loss": 0.2157, "step": 22758 }, { "epoch": 0.7810226492793411, "grad_norm": 0.8077544112131336, "learning_rate": 1.2060575870981882e-06, "loss": 0.228, "step": 22759 }, { "epoch": 0.7810569663692519, "grad_norm": 0.7806428364225342, "learning_rate": 1.2056956370863066e-06, "loss": 0.2786, "step": 22760 }, { "epoch": 0.7810912834591627, "grad_norm": 0.7296177572372677, "learning_rate": 1.2053337339486288e-06, "loss": 0.2479, "step": 22761 }, { "epoch": 0.7811256005490734, "grad_norm": 0.8110869830622394, "learning_rate": 1.2049718776896301e-06, "loss": 0.2334, "step": 22762 }, { "epoch": 0.7811599176389842, "grad_norm": 0.8000245544598187, "learning_rate": 1.2046100683137801e-06, "loss": 0.2827, "step": 22763 }, { "epoch": 0.781194234728895, "grad_norm": 0.8322520533781061, "learning_rate": 1.2042483058255455e-06, "loss": 0.2871, "step": 22764 }, { "epoch": 0.7812285518188058, "grad_norm": 0.8106760159755781, "learning_rate": 1.2038865902294005e-06, "loss": 0.2703, "step": 22765 }, { "epoch": 0.7812628689087165, "grad_norm": 0.7004011159235434, "learning_rate": 1.203524921529809e-06, "loss": 0.2296, "step": 22766 }, { "epoch": 0.7812971859986273, "grad_norm": 0.752459107032603, "learning_rate": 1.203163299731241e-06, "loss": 0.2993, "step": 22767 }, { "epoch": 0.781331503088538, "grad_norm": 0.725960347256133, "learning_rate": 1.2028017248381641e-06, "loss": 0.2724, "step": 22768 }, { "epoch": 0.7813658201784489, "grad_norm": 0.7621823855602514, "learning_rate": 1.2024401968550442e-06, "loss": 0.2583, "step": 22769 }, { "epoch": 0.7814001372683597, "grad_norm": 0.785919800409765, "learning_rate": 1.2020787157863484e-06, "loss": 0.2713, "step": 22770 }, { "epoch": 0.7814344543582704, "grad_norm": 0.7821897517099264, "learning_rate": 1.2017172816365425e-06, "loss": 0.2577, "step": 22771 }, { "epoch": 0.7814687714481812, "grad_norm": 0.7472566858526039, "learning_rate": 1.2013558944100918e-06, "loss": 0.2537, "step": 22772 }, { "epoch": 0.7815030885380919, "grad_norm": 0.7376462941456201, "learning_rate": 1.200994554111458e-06, "loss": 0.2244, "step": 22773 }, { "epoch": 0.7815374056280028, "grad_norm": 0.8437148431762753, "learning_rate": 1.2006332607451098e-06, "loss": 0.269, "step": 22774 }, { "epoch": 0.7815717227179135, "grad_norm": 0.7438373866641728, "learning_rate": 1.2002720143155072e-06, "loss": 0.2754, "step": 22775 }, { "epoch": 0.7816060398078243, "grad_norm": 0.8036579287769801, "learning_rate": 1.1999108148271133e-06, "loss": 0.2236, "step": 22776 }, { "epoch": 0.781640356897735, "grad_norm": 0.7306284853019196, "learning_rate": 1.1995496622843912e-06, "loss": 0.2348, "step": 22777 }, { "epoch": 0.7816746739876459, "grad_norm": 1.0317106149584805, "learning_rate": 1.1991885566918015e-06, "loss": 0.2704, "step": 22778 }, { "epoch": 0.7817089910775566, "grad_norm": 0.773079553447908, "learning_rate": 1.1988274980538062e-06, "loss": 0.262, "step": 22779 }, { "epoch": 0.7817433081674674, "grad_norm": 0.8404121673041438, "learning_rate": 1.1984664863748653e-06, "loss": 0.2784, "step": 22780 }, { "epoch": 0.7817776252573782, "grad_norm": 0.7943021790826009, "learning_rate": 1.1981055216594406e-06, "loss": 0.267, "step": 22781 }, { "epoch": 0.7818119423472889, "grad_norm": 0.7969336285056998, "learning_rate": 1.197744603911986e-06, "loss": 0.2863, "step": 22782 }, { "epoch": 0.7818462594371998, "grad_norm": 0.7353117217766619, "learning_rate": 1.1973837331369658e-06, "loss": 0.3143, "step": 22783 }, { "epoch": 0.7818805765271105, "grad_norm": 0.7821186112711412, "learning_rate": 1.197022909338838e-06, "loss": 0.2387, "step": 22784 }, { "epoch": 0.7819148936170213, "grad_norm": 0.7118546194664902, "learning_rate": 1.1966621325220552e-06, "loss": 0.2791, "step": 22785 }, { "epoch": 0.781949210706932, "grad_norm": 0.8303274141462533, "learning_rate": 1.1963014026910797e-06, "loss": 0.2799, "step": 22786 }, { "epoch": 0.7819835277968429, "grad_norm": 0.7402110302856924, "learning_rate": 1.1959407198503648e-06, "loss": 0.2369, "step": 22787 }, { "epoch": 0.7820178448867536, "grad_norm": 0.9391472158154064, "learning_rate": 1.1955800840043669e-06, "loss": 0.3527, "step": 22788 }, { "epoch": 0.7820521619766644, "grad_norm": 0.8804353084049368, "learning_rate": 1.1952194951575419e-06, "loss": 0.2366, "step": 22789 }, { "epoch": 0.7820864790665751, "grad_norm": 0.7197531157541562, "learning_rate": 1.194858953314344e-06, "loss": 0.2318, "step": 22790 }, { "epoch": 0.7821207961564859, "grad_norm": 0.7328896246902548, "learning_rate": 1.1944984584792274e-06, "loss": 0.2116, "step": 22791 }, { "epoch": 0.7821551132463967, "grad_norm": 0.8053338411994626, "learning_rate": 1.1941380106566453e-06, "loss": 0.263, "step": 22792 }, { "epoch": 0.7821894303363075, "grad_norm": 0.7880410152747344, "learning_rate": 1.1937776098510512e-06, "loss": 0.2591, "step": 22793 }, { "epoch": 0.7822237474262183, "grad_norm": 0.7791382429468721, "learning_rate": 1.1934172560668965e-06, "loss": 0.3267, "step": 22794 }, { "epoch": 0.782258064516129, "grad_norm": 1.0544283142044375, "learning_rate": 1.1930569493086358e-06, "loss": 0.3072, "step": 22795 }, { "epoch": 0.7822923816060398, "grad_norm": 0.7647708436089067, "learning_rate": 1.1926966895807146e-06, "loss": 0.2439, "step": 22796 }, { "epoch": 0.7823266986959506, "grad_norm": 0.7615649362820803, "learning_rate": 1.1923364768875901e-06, "loss": 0.2344, "step": 22797 }, { "epoch": 0.7823610157858614, "grad_norm": 0.8065246699784857, "learning_rate": 1.1919763112337073e-06, "loss": 0.2172, "step": 22798 }, { "epoch": 0.7823953328757721, "grad_norm": 1.0963158101994994, "learning_rate": 1.1916161926235181e-06, "loss": 0.2457, "step": 22799 }, { "epoch": 0.7824296499656829, "grad_norm": 0.9224282878485761, "learning_rate": 1.1912561210614703e-06, "loss": 0.2679, "step": 22800 }, { "epoch": 0.7824639670555937, "grad_norm": 0.7969508237513636, "learning_rate": 1.1908960965520121e-06, "loss": 0.2365, "step": 22801 }, { "epoch": 0.7824982841455045, "grad_norm": 0.8318399524571564, "learning_rate": 1.1905361190995924e-06, "loss": 0.3004, "step": 22802 }, { "epoch": 0.7825326012354152, "grad_norm": 0.8448393313128633, "learning_rate": 1.190176188708657e-06, "loss": 0.2626, "step": 22803 }, { "epoch": 0.782566918325326, "grad_norm": 0.8578834898003928, "learning_rate": 1.1898163053836549e-06, "loss": 0.2674, "step": 22804 }, { "epoch": 0.7826012354152367, "grad_norm": 0.7522860075631674, "learning_rate": 1.1894564691290272e-06, "loss": 0.2221, "step": 22805 }, { "epoch": 0.7826355525051476, "grad_norm": 0.8753671941306721, "learning_rate": 1.1890966799492231e-06, "loss": 0.2337, "step": 22806 }, { "epoch": 0.7826698695950584, "grad_norm": 0.7186259871584519, "learning_rate": 1.1887369378486885e-06, "loss": 0.2821, "step": 22807 }, { "epoch": 0.7827041866849691, "grad_norm": 0.8384779593048024, "learning_rate": 1.1883772428318623e-06, "loss": 0.2418, "step": 22808 }, { "epoch": 0.7827385037748799, "grad_norm": 0.744243955735379, "learning_rate": 1.1880175949031942e-06, "loss": 0.2437, "step": 22809 }, { "epoch": 0.7827728208647907, "grad_norm": 0.8353420320866078, "learning_rate": 1.1876579940671229e-06, "loss": 0.2808, "step": 22810 }, { "epoch": 0.7828071379547015, "grad_norm": 0.9983451457236059, "learning_rate": 1.1872984403280924e-06, "loss": 0.2554, "step": 22811 }, { "epoch": 0.7828414550446122, "grad_norm": 0.7150533806823791, "learning_rate": 1.1869389336905446e-06, "loss": 0.2446, "step": 22812 }, { "epoch": 0.782875772134523, "grad_norm": 0.8272212491853403, "learning_rate": 1.1865794741589204e-06, "loss": 0.2192, "step": 22813 }, { "epoch": 0.7829100892244337, "grad_norm": 0.8799103141847454, "learning_rate": 1.1862200617376613e-06, "loss": 0.2393, "step": 22814 }, { "epoch": 0.7829444063143446, "grad_norm": 0.8294275168456262, "learning_rate": 1.1858606964312064e-06, "loss": 0.3029, "step": 22815 }, { "epoch": 0.7829787234042553, "grad_norm": 0.6964529638318524, "learning_rate": 1.1855013782439972e-06, "loss": 0.2188, "step": 22816 }, { "epoch": 0.7830130404941661, "grad_norm": 0.7806707003492763, "learning_rate": 1.1851421071804687e-06, "loss": 0.316, "step": 22817 }, { "epoch": 0.7830473575840768, "grad_norm": 0.7267040918346203, "learning_rate": 1.1847828832450641e-06, "loss": 0.3022, "step": 22818 }, { "epoch": 0.7830816746739876, "grad_norm": 0.7689604529764804, "learning_rate": 1.1844237064422175e-06, "loss": 0.2143, "step": 22819 }, { "epoch": 0.7831159917638985, "grad_norm": 0.8888437333287171, "learning_rate": 1.184064576776368e-06, "loss": 0.2873, "step": 22820 }, { "epoch": 0.7831503088538092, "grad_norm": 0.8462708274135367, "learning_rate": 1.1837054942519517e-06, "loss": 0.2779, "step": 22821 }, { "epoch": 0.78318462594372, "grad_norm": 0.7846930387676676, "learning_rate": 1.1833464588734023e-06, "loss": 0.2708, "step": 22822 }, { "epoch": 0.7832189430336307, "grad_norm": 0.752567073641952, "learning_rate": 1.1829874706451617e-06, "loss": 0.2763, "step": 22823 }, { "epoch": 0.7832532601235416, "grad_norm": 0.7931858566869427, "learning_rate": 1.1826285295716583e-06, "loss": 0.3308, "step": 22824 }, { "epoch": 0.7832875772134523, "grad_norm": 0.8594920184422052, "learning_rate": 1.1822696356573293e-06, "loss": 0.2453, "step": 22825 }, { "epoch": 0.7833218943033631, "grad_norm": 0.6880976637275152, "learning_rate": 1.1819107889066078e-06, "loss": 0.2125, "step": 22826 }, { "epoch": 0.7833562113932738, "grad_norm": 0.8792480769178251, "learning_rate": 1.1815519893239268e-06, "loss": 0.232, "step": 22827 }, { "epoch": 0.7833905284831846, "grad_norm": 0.7548890040618781, "learning_rate": 1.181193236913719e-06, "loss": 0.2828, "step": 22828 }, { "epoch": 0.7834248455730954, "grad_norm": 0.7042046942954445, "learning_rate": 1.1808345316804171e-06, "loss": 0.2374, "step": 22829 }, { "epoch": 0.7834591626630062, "grad_norm": 0.840397581714706, "learning_rate": 1.1804758736284527e-06, "loss": 0.2982, "step": 22830 }, { "epoch": 0.7834934797529169, "grad_norm": 0.7551960115534199, "learning_rate": 1.1801172627622537e-06, "loss": 0.2621, "step": 22831 }, { "epoch": 0.7835277968428277, "grad_norm": 0.8433411264983214, "learning_rate": 1.1797586990862541e-06, "loss": 0.2402, "step": 22832 }, { "epoch": 0.7835621139327386, "grad_norm": 1.083359567894052, "learning_rate": 1.1794001826048812e-06, "loss": 0.2833, "step": 22833 }, { "epoch": 0.7835964310226493, "grad_norm": 0.8157873148567015, "learning_rate": 1.1790417133225646e-06, "loss": 0.2965, "step": 22834 }, { "epoch": 0.7836307481125601, "grad_norm": 0.7684542052247203, "learning_rate": 1.1786832912437323e-06, "loss": 0.2652, "step": 22835 }, { "epoch": 0.7836650652024708, "grad_norm": 0.7097104610963421, "learning_rate": 1.1783249163728133e-06, "loss": 0.255, "step": 22836 }, { "epoch": 0.7836993822923816, "grad_norm": 0.6854381941197705, "learning_rate": 1.1779665887142338e-06, "loss": 0.2612, "step": 22837 }, { "epoch": 0.7837336993822924, "grad_norm": 0.7430513616496205, "learning_rate": 1.1776083082724215e-06, "loss": 0.2343, "step": 22838 }, { "epoch": 0.7837680164722032, "grad_norm": 0.8150909042408352, "learning_rate": 1.1772500750518035e-06, "loss": 0.2629, "step": 22839 }, { "epoch": 0.7838023335621139, "grad_norm": 0.7627323161418478, "learning_rate": 1.1768918890568004e-06, "loss": 0.2692, "step": 22840 }, { "epoch": 0.7838366506520247, "grad_norm": 0.7089038184358452, "learning_rate": 1.1765337502918438e-06, "loss": 0.2206, "step": 22841 }, { "epoch": 0.7838709677419354, "grad_norm": 0.7124649991645164, "learning_rate": 1.1761756587613544e-06, "loss": 0.2234, "step": 22842 }, { "epoch": 0.7839052848318463, "grad_norm": 0.8661102150419666, "learning_rate": 1.1758176144697558e-06, "loss": 0.2713, "step": 22843 }, { "epoch": 0.783939601921757, "grad_norm": 0.8029258289057839, "learning_rate": 1.1754596174214721e-06, "loss": 0.2929, "step": 22844 }, { "epoch": 0.7839739190116678, "grad_norm": 0.6846698172331669, "learning_rate": 1.175101667620926e-06, "loss": 0.2473, "step": 22845 }, { "epoch": 0.7840082361015785, "grad_norm": 0.8798644229170584, "learning_rate": 1.174743765072539e-06, "loss": 0.319, "step": 22846 }, { "epoch": 0.7840425531914894, "grad_norm": 0.8729164966239229, "learning_rate": 1.1743859097807337e-06, "loss": 0.2796, "step": 22847 }, { "epoch": 0.7840768702814002, "grad_norm": 0.9559684463387246, "learning_rate": 1.174028101749931e-06, "loss": 0.2363, "step": 22848 }, { "epoch": 0.7841111873713109, "grad_norm": 0.7919380592810216, "learning_rate": 1.173670340984548e-06, "loss": 0.2382, "step": 22849 }, { "epoch": 0.7841455044612217, "grad_norm": 0.8305007813390716, "learning_rate": 1.1733126274890082e-06, "loss": 0.2607, "step": 22850 }, { "epoch": 0.7841798215511324, "grad_norm": 0.8027336151070734, "learning_rate": 1.1729549612677304e-06, "loss": 0.2374, "step": 22851 }, { "epoch": 0.7842141386410433, "grad_norm": 0.9150451846516809, "learning_rate": 1.1725973423251297e-06, "loss": 0.2661, "step": 22852 }, { "epoch": 0.784248455730954, "grad_norm": 0.8489695402077518, "learning_rate": 1.1722397706656297e-06, "loss": 0.3024, "step": 22853 }, { "epoch": 0.7842827728208648, "grad_norm": 0.9020340425922493, "learning_rate": 1.1718822462936413e-06, "loss": 0.2514, "step": 22854 }, { "epoch": 0.7843170899107755, "grad_norm": 0.745502342964332, "learning_rate": 1.1715247692135873e-06, "loss": 0.2521, "step": 22855 }, { "epoch": 0.7843514070006864, "grad_norm": 0.6829252275588025, "learning_rate": 1.1711673394298801e-06, "loss": 0.309, "step": 22856 }, { "epoch": 0.7843857240905971, "grad_norm": 0.7936425868331592, "learning_rate": 1.1708099569469367e-06, "loss": 0.2834, "step": 22857 }, { "epoch": 0.7844200411805079, "grad_norm": 0.7529516348856399, "learning_rate": 1.1704526217691714e-06, "loss": 0.2557, "step": 22858 }, { "epoch": 0.7844543582704187, "grad_norm": 0.8143781074538727, "learning_rate": 1.170095333900999e-06, "loss": 0.2431, "step": 22859 }, { "epoch": 0.7844886753603294, "grad_norm": 0.7354900062189299, "learning_rate": 1.169738093346834e-06, "loss": 0.1989, "step": 22860 }, { "epoch": 0.7845229924502403, "grad_norm": 0.8821781255608988, "learning_rate": 1.169380900111089e-06, "loss": 0.2761, "step": 22861 }, { "epoch": 0.784557309540151, "grad_norm": 0.8194703058777216, "learning_rate": 1.1690237541981785e-06, "loss": 0.2485, "step": 22862 }, { "epoch": 0.7845916266300618, "grad_norm": 0.720392475200275, "learning_rate": 1.1686666556125098e-06, "loss": 0.2232, "step": 22863 }, { "epoch": 0.7846259437199725, "grad_norm": 0.7802014430152105, "learning_rate": 1.1683096043585008e-06, "loss": 0.2718, "step": 22864 }, { "epoch": 0.7846602608098833, "grad_norm": 0.8871122375980375, "learning_rate": 1.1679526004405578e-06, "loss": 0.2922, "step": 22865 }, { "epoch": 0.7846945778997941, "grad_norm": 0.8092775680931706, "learning_rate": 1.1675956438630914e-06, "loss": 0.2742, "step": 22866 }, { "epoch": 0.7847288949897049, "grad_norm": 0.7601181749676186, "learning_rate": 1.1672387346305154e-06, "loss": 0.3112, "step": 22867 }, { "epoch": 0.7847632120796156, "grad_norm": 0.7682200669710099, "learning_rate": 1.1668818727472348e-06, "loss": 0.2726, "step": 22868 }, { "epoch": 0.7847975291695264, "grad_norm": 0.7345328878357649, "learning_rate": 1.1665250582176595e-06, "loss": 0.2681, "step": 22869 }, { "epoch": 0.7848318462594372, "grad_norm": 0.7416339158918642, "learning_rate": 1.166168291046198e-06, "loss": 0.2632, "step": 22870 }, { "epoch": 0.784866163349348, "grad_norm": 0.8387086509446071, "learning_rate": 1.1658115712372593e-06, "loss": 0.2671, "step": 22871 }, { "epoch": 0.7849004804392588, "grad_norm": 0.6701634025569395, "learning_rate": 1.1654548987952447e-06, "loss": 0.2279, "step": 22872 }, { "epoch": 0.7849347975291695, "grad_norm": 0.90877427942021, "learning_rate": 1.1650982737245664e-06, "loss": 0.2542, "step": 22873 }, { "epoch": 0.7849691146190803, "grad_norm": 0.8388760615503155, "learning_rate": 1.1647416960296287e-06, "loss": 0.3388, "step": 22874 }, { "epoch": 0.7850034317089911, "grad_norm": 0.7686097782487639, "learning_rate": 1.164385165714833e-06, "loss": 0.2701, "step": 22875 }, { "epoch": 0.7850377487989019, "grad_norm": 0.8005476237383524, "learning_rate": 1.1640286827845897e-06, "loss": 0.2284, "step": 22876 }, { "epoch": 0.7850720658888126, "grad_norm": 0.8314842176114274, "learning_rate": 1.1636722472432981e-06, "loss": 0.2939, "step": 22877 }, { "epoch": 0.7851063829787234, "grad_norm": 0.8245760537291569, "learning_rate": 1.1633158590953636e-06, "loss": 0.244, "step": 22878 }, { "epoch": 0.7851407000686342, "grad_norm": 0.944014475677258, "learning_rate": 1.1629595183451886e-06, "loss": 0.2776, "step": 22879 }, { "epoch": 0.785175017158545, "grad_norm": 0.7997900823206741, "learning_rate": 1.1626032249971748e-06, "loss": 0.2585, "step": 22880 }, { "epoch": 0.7852093342484557, "grad_norm": 0.9754043607409881, "learning_rate": 1.1622469790557245e-06, "loss": 0.3081, "step": 22881 }, { "epoch": 0.7852436513383665, "grad_norm": 0.7738062894521432, "learning_rate": 1.1618907805252378e-06, "loss": 0.2466, "step": 22882 }, { "epoch": 0.7852779684282772, "grad_norm": 0.7918151128058178, "learning_rate": 1.161534629410116e-06, "loss": 0.2436, "step": 22883 }, { "epoch": 0.7853122855181881, "grad_norm": 0.8026569234861444, "learning_rate": 1.161178525714759e-06, "loss": 0.276, "step": 22884 }, { "epoch": 0.7853466026080989, "grad_norm": 0.7475036461921506, "learning_rate": 1.1608224694435666e-06, "loss": 0.2311, "step": 22885 }, { "epoch": 0.7853809196980096, "grad_norm": 0.8371901537014789, "learning_rate": 1.1604664606009341e-06, "loss": 0.2729, "step": 22886 }, { "epoch": 0.7854152367879204, "grad_norm": 0.8396599842233728, "learning_rate": 1.160110499191265e-06, "loss": 0.2511, "step": 22887 }, { "epoch": 0.7854495538778311, "grad_norm": 0.7430116423755688, "learning_rate": 1.1597545852189519e-06, "loss": 0.2118, "step": 22888 }, { "epoch": 0.785483870967742, "grad_norm": 0.8051729158478381, "learning_rate": 1.159398718688392e-06, "loss": 0.2398, "step": 22889 }, { "epoch": 0.7855181880576527, "grad_norm": 0.8571170024035275, "learning_rate": 1.1590428996039865e-06, "loss": 0.2793, "step": 22890 }, { "epoch": 0.7855525051475635, "grad_norm": 0.7403015852693974, "learning_rate": 1.1586871279701261e-06, "loss": 0.2808, "step": 22891 }, { "epoch": 0.7855868222374742, "grad_norm": 0.8347808297136747, "learning_rate": 1.158331403791208e-06, "loss": 0.2519, "step": 22892 }, { "epoch": 0.7856211393273851, "grad_norm": 0.8733616383017919, "learning_rate": 1.157975727071627e-06, "loss": 0.2625, "step": 22893 }, { "epoch": 0.7856554564172958, "grad_norm": 0.8231031033305342, "learning_rate": 1.1576200978157754e-06, "loss": 0.2388, "step": 22894 }, { "epoch": 0.7856897735072066, "grad_norm": 0.9714978878255739, "learning_rate": 1.1572645160280482e-06, "loss": 0.3176, "step": 22895 }, { "epoch": 0.7857240905971173, "grad_norm": 0.9118177385668513, "learning_rate": 1.1569089817128381e-06, "loss": 0.2657, "step": 22896 }, { "epoch": 0.7857584076870281, "grad_norm": 0.7866757012116785, "learning_rate": 1.1565534948745377e-06, "loss": 0.2781, "step": 22897 }, { "epoch": 0.785792724776939, "grad_norm": 0.7963264855589207, "learning_rate": 1.1561980555175355e-06, "loss": 0.2492, "step": 22898 }, { "epoch": 0.7858270418668497, "grad_norm": 0.7334150409271395, "learning_rate": 1.1558426636462271e-06, "loss": 0.2325, "step": 22899 }, { "epoch": 0.7858613589567605, "grad_norm": 0.7819775007765102, "learning_rate": 1.1554873192650002e-06, "loss": 0.2628, "step": 22900 }, { "epoch": 0.7858956760466712, "grad_norm": 0.8092014141095656, "learning_rate": 1.1551320223782448e-06, "loss": 0.2952, "step": 22901 }, { "epoch": 0.7859299931365821, "grad_norm": 0.7390964990005813, "learning_rate": 1.1547767729903502e-06, "loss": 0.2384, "step": 22902 }, { "epoch": 0.7859643102264928, "grad_norm": 0.746292186785933, "learning_rate": 1.154421571105706e-06, "loss": 0.2546, "step": 22903 }, { "epoch": 0.7859986273164036, "grad_norm": 0.9343572477539737, "learning_rate": 1.1540664167286997e-06, "loss": 0.2286, "step": 22904 }, { "epoch": 0.7860329444063143, "grad_norm": 0.6950584111514793, "learning_rate": 1.1537113098637188e-06, "loss": 0.2843, "step": 22905 }, { "epoch": 0.7860672614962251, "grad_norm": 0.8339020877255159, "learning_rate": 1.1533562505151514e-06, "loss": 0.3074, "step": 22906 }, { "epoch": 0.7861015785861359, "grad_norm": 0.7125662906424528, "learning_rate": 1.1530012386873802e-06, "loss": 0.3059, "step": 22907 }, { "epoch": 0.7861358956760467, "grad_norm": 0.8381565408805702, "learning_rate": 1.1526462743847966e-06, "loss": 0.2879, "step": 22908 }, { "epoch": 0.7861702127659574, "grad_norm": 0.8208626041993301, "learning_rate": 1.152291357611781e-06, "loss": 0.2665, "step": 22909 }, { "epoch": 0.7862045298558682, "grad_norm": 0.7114115822951632, "learning_rate": 1.1519364883727196e-06, "loss": 0.2802, "step": 22910 }, { "epoch": 0.786238846945779, "grad_norm": 0.7845821324636436, "learning_rate": 1.151581666671997e-06, "loss": 0.257, "step": 22911 }, { "epoch": 0.7862731640356898, "grad_norm": 0.8020635886163019, "learning_rate": 1.1512268925139957e-06, "loss": 0.2711, "step": 22912 }, { "epoch": 0.7863074811256006, "grad_norm": 0.8377201797985898, "learning_rate": 1.1508721659030991e-06, "loss": 0.2641, "step": 22913 }, { "epoch": 0.7863417982155113, "grad_norm": 0.8283169186904018, "learning_rate": 1.1505174868436892e-06, "loss": 0.2722, "step": 22914 }, { "epoch": 0.7863761153054221, "grad_norm": 0.7794198518697752, "learning_rate": 1.1501628553401473e-06, "loss": 0.296, "step": 22915 }, { "epoch": 0.7864104323953329, "grad_norm": 0.826743659776958, "learning_rate": 1.1498082713968555e-06, "loss": 0.261, "step": 22916 }, { "epoch": 0.7864447494852437, "grad_norm": 0.7486938602178537, "learning_rate": 1.1494537350181934e-06, "loss": 0.2512, "step": 22917 }, { "epoch": 0.7864790665751544, "grad_norm": 0.7983197194595729, "learning_rate": 1.1490992462085403e-06, "loss": 0.3125, "step": 22918 }, { "epoch": 0.7865133836650652, "grad_norm": 0.7793195078740195, "learning_rate": 1.1487448049722772e-06, "loss": 0.328, "step": 22919 }, { "epoch": 0.7865477007549759, "grad_norm": 0.7046418840990104, "learning_rate": 1.1483904113137833e-06, "loss": 0.2347, "step": 22920 }, { "epoch": 0.7865820178448868, "grad_norm": 0.7080120323857939, "learning_rate": 1.1480360652374324e-06, "loss": 0.2627, "step": 22921 }, { "epoch": 0.7866163349347975, "grad_norm": 0.6980685495368887, "learning_rate": 1.1476817667476075e-06, "loss": 0.2245, "step": 22922 }, { "epoch": 0.7866506520247083, "grad_norm": 0.7636209129226271, "learning_rate": 1.1473275158486819e-06, "loss": 0.2384, "step": 22923 }, { "epoch": 0.786684969114619, "grad_norm": 0.7396637432260756, "learning_rate": 1.1469733125450328e-06, "loss": 0.2761, "step": 22924 }, { "epoch": 0.7867192862045299, "grad_norm": 0.9425870345243715, "learning_rate": 1.1466191568410362e-06, "loss": 0.2465, "step": 22925 }, { "epoch": 0.7867536032944407, "grad_norm": 0.7884261519499919, "learning_rate": 1.1462650487410676e-06, "loss": 0.2801, "step": 22926 }, { "epoch": 0.7867879203843514, "grad_norm": 0.777139929539023, "learning_rate": 1.1459109882495011e-06, "loss": 0.2322, "step": 22927 }, { "epoch": 0.7868222374742622, "grad_norm": 0.884776038379134, "learning_rate": 1.1455569753707114e-06, "loss": 0.2693, "step": 22928 }, { "epoch": 0.7868565545641729, "grad_norm": 0.7748724171618159, "learning_rate": 1.1452030101090722e-06, "loss": 0.2319, "step": 22929 }, { "epoch": 0.7868908716540838, "grad_norm": 0.7857041566160142, "learning_rate": 1.1448490924689526e-06, "loss": 0.2595, "step": 22930 }, { "epoch": 0.7869251887439945, "grad_norm": 0.790831881917469, "learning_rate": 1.1444952224547312e-06, "loss": 0.3256, "step": 22931 }, { "epoch": 0.7869595058339053, "grad_norm": 0.7687787152392643, "learning_rate": 1.1441414000707745e-06, "loss": 0.2601, "step": 22932 }, { "epoch": 0.786993822923816, "grad_norm": 0.7455790015495537, "learning_rate": 1.1437876253214542e-06, "loss": 0.2123, "step": 22933 }, { "epoch": 0.7870281400137268, "grad_norm": 0.8440138462827916, "learning_rate": 1.1434338982111442e-06, "loss": 0.2385, "step": 22934 }, { "epoch": 0.7870624571036376, "grad_norm": 0.8123945012791222, "learning_rate": 1.1430802187442113e-06, "loss": 0.3154, "step": 22935 }, { "epoch": 0.7870967741935484, "grad_norm": 0.853502450359604, "learning_rate": 1.1427265869250254e-06, "loss": 0.2785, "step": 22936 }, { "epoch": 0.7871310912834591, "grad_norm": 0.7153401482913417, "learning_rate": 1.142373002757955e-06, "loss": 0.2574, "step": 22937 }, { "epoch": 0.7871654083733699, "grad_norm": 0.7837834364394185, "learning_rate": 1.142019466247369e-06, "loss": 0.2409, "step": 22938 }, { "epoch": 0.7871997254632808, "grad_norm": 0.7752283490311769, "learning_rate": 1.141665977397634e-06, "loss": 0.2439, "step": 22939 }, { "epoch": 0.7872340425531915, "grad_norm": 0.8049464522867822, "learning_rate": 1.1413125362131179e-06, "loss": 0.3002, "step": 22940 }, { "epoch": 0.7872683596431023, "grad_norm": 0.742308566861088, "learning_rate": 1.1409591426981874e-06, "loss": 0.2872, "step": 22941 }, { "epoch": 0.787302676733013, "grad_norm": 0.7137157271820382, "learning_rate": 1.1406057968572054e-06, "loss": 0.2591, "step": 22942 }, { "epoch": 0.7873369938229238, "grad_norm": 0.8043984743261404, "learning_rate": 1.1402524986945418e-06, "loss": 0.2774, "step": 22943 }, { "epoch": 0.7873713109128346, "grad_norm": 0.8071679508550016, "learning_rate": 1.1398992482145572e-06, "loss": 0.2514, "step": 22944 }, { "epoch": 0.7874056280027454, "grad_norm": 0.859627009858341, "learning_rate": 1.1395460454216167e-06, "loss": 0.2848, "step": 22945 }, { "epoch": 0.7874399450926561, "grad_norm": 0.7845996025772074, "learning_rate": 1.1391928903200843e-06, "loss": 0.2801, "step": 22946 }, { "epoch": 0.7874742621825669, "grad_norm": 0.8333546031006551, "learning_rate": 1.1388397829143222e-06, "loss": 0.2886, "step": 22947 }, { "epoch": 0.7875085792724777, "grad_norm": 0.7972643485453588, "learning_rate": 1.1384867232086932e-06, "loss": 0.2552, "step": 22948 }, { "epoch": 0.7875428963623885, "grad_norm": 0.8216400111820595, "learning_rate": 1.1381337112075586e-06, "loss": 0.2354, "step": 22949 }, { "epoch": 0.7875772134522993, "grad_norm": 0.6781817755584849, "learning_rate": 1.137780746915279e-06, "loss": 0.2413, "step": 22950 }, { "epoch": 0.78761153054221, "grad_norm": 0.825961651904048, "learning_rate": 1.1374278303362158e-06, "loss": 0.2712, "step": 22951 }, { "epoch": 0.7876458476321208, "grad_norm": 0.6814911090276446, "learning_rate": 1.1370749614747296e-06, "loss": 0.2651, "step": 22952 }, { "epoch": 0.7876801647220316, "grad_norm": 0.8086802064618601, "learning_rate": 1.1367221403351757e-06, "loss": 0.2343, "step": 22953 }, { "epoch": 0.7877144818119424, "grad_norm": 0.7674092865631744, "learning_rate": 1.136369366921919e-06, "loss": 0.3308, "step": 22954 }, { "epoch": 0.7877487989018531, "grad_norm": 0.816522425253314, "learning_rate": 1.136016641239312e-06, "loss": 0.2563, "step": 22955 }, { "epoch": 0.7877831159917639, "grad_norm": 0.7915448520314271, "learning_rate": 1.1356639632917137e-06, "loss": 0.2363, "step": 22956 }, { "epoch": 0.7878174330816746, "grad_norm": 0.736283431302761, "learning_rate": 1.135311333083484e-06, "loss": 0.2199, "step": 22957 }, { "epoch": 0.7878517501715855, "grad_norm": 0.7565498892496682, "learning_rate": 1.1349587506189757e-06, "loss": 0.2601, "step": 22958 }, { "epoch": 0.7878860672614962, "grad_norm": 0.6586754106400023, "learning_rate": 1.1346062159025461e-06, "loss": 0.2031, "step": 22959 }, { "epoch": 0.787920384351407, "grad_norm": 0.7976586188108163, "learning_rate": 1.1342537289385502e-06, "loss": 0.2748, "step": 22960 }, { "epoch": 0.7879547014413177, "grad_norm": 0.8011990118685726, "learning_rate": 1.1339012897313422e-06, "loss": 0.2568, "step": 22961 }, { "epoch": 0.7879890185312286, "grad_norm": 0.7962088058636786, "learning_rate": 1.1335488982852766e-06, "loss": 0.2381, "step": 22962 }, { "epoch": 0.7880233356211394, "grad_norm": 0.8423458440690772, "learning_rate": 1.1331965546047057e-06, "loss": 0.2604, "step": 22963 }, { "epoch": 0.7880576527110501, "grad_norm": 0.8349669286237761, "learning_rate": 1.1328442586939853e-06, "loss": 0.2415, "step": 22964 }, { "epoch": 0.7880919698009609, "grad_norm": 0.8094882148502178, "learning_rate": 1.1324920105574626e-06, "loss": 0.2309, "step": 22965 }, { "epoch": 0.7881262868908716, "grad_norm": 0.8611150017811072, "learning_rate": 1.1321398101994946e-06, "loss": 0.2941, "step": 22966 }, { "epoch": 0.7881606039807825, "grad_norm": 0.782963084396778, "learning_rate": 1.1317876576244286e-06, "loss": 0.3043, "step": 22967 }, { "epoch": 0.7881949210706932, "grad_norm": 0.809465092026919, "learning_rate": 1.1314355528366156e-06, "loss": 0.2341, "step": 22968 }, { "epoch": 0.788229238160604, "grad_norm": 0.8861199020788533, "learning_rate": 1.1310834958404066e-06, "loss": 0.227, "step": 22969 }, { "epoch": 0.7882635552505147, "grad_norm": 0.8465340808325685, "learning_rate": 1.1307314866401503e-06, "loss": 0.3507, "step": 22970 }, { "epoch": 0.7882978723404256, "grad_norm": 0.9088253039250452, "learning_rate": 1.130379525240195e-06, "loss": 0.2567, "step": 22971 }, { "epoch": 0.7883321894303363, "grad_norm": 0.7977697944330033, "learning_rate": 1.1300276116448895e-06, "loss": 0.2536, "step": 22972 }, { "epoch": 0.7883665065202471, "grad_norm": 0.7866499897735593, "learning_rate": 1.1296757458585822e-06, "loss": 0.3498, "step": 22973 }, { "epoch": 0.7884008236101578, "grad_norm": 0.7932540480335826, "learning_rate": 1.1293239278856155e-06, "loss": 0.2275, "step": 22974 }, { "epoch": 0.7884351407000686, "grad_norm": 0.7764475314817587, "learning_rate": 1.128972157730342e-06, "loss": 0.2803, "step": 22975 }, { "epoch": 0.7884694577899795, "grad_norm": 0.7811181125932467, "learning_rate": 1.128620435397103e-06, "loss": 0.2872, "step": 22976 }, { "epoch": 0.7885037748798902, "grad_norm": 0.8189306741032948, "learning_rate": 1.128268760890243e-06, "loss": 0.3718, "step": 22977 }, { "epoch": 0.788538091969801, "grad_norm": 0.73210911023668, "learning_rate": 1.127917134214112e-06, "loss": 0.2458, "step": 22978 }, { "epoch": 0.7885724090597117, "grad_norm": 0.7852901295786271, "learning_rate": 1.1275655553730474e-06, "loss": 0.2654, "step": 22979 }, { "epoch": 0.7886067261496225, "grad_norm": 0.7360038483748936, "learning_rate": 1.1272140243713981e-06, "loss": 0.2417, "step": 22980 }, { "epoch": 0.7886410432395333, "grad_norm": 0.8553227602339605, "learning_rate": 1.126862541213503e-06, "loss": 0.287, "step": 22981 }, { "epoch": 0.7886753603294441, "grad_norm": 0.7054336274505679, "learning_rate": 1.1265111059037059e-06, "loss": 0.2225, "step": 22982 }, { "epoch": 0.7887096774193548, "grad_norm": 0.7568855870864041, "learning_rate": 1.1261597184463475e-06, "loss": 0.293, "step": 22983 }, { "epoch": 0.7887439945092656, "grad_norm": 0.785476629002216, "learning_rate": 1.1258083788457696e-06, "loss": 0.2672, "step": 22984 }, { "epoch": 0.7887783115991764, "grad_norm": 0.7497077227795648, "learning_rate": 1.1254570871063125e-06, "loss": 0.2395, "step": 22985 }, { "epoch": 0.7888126286890872, "grad_norm": 0.7319770668983564, "learning_rate": 1.1251058432323153e-06, "loss": 0.2558, "step": 22986 }, { "epoch": 0.7888469457789979, "grad_norm": 0.9504073653036884, "learning_rate": 1.1247546472281195e-06, "loss": 0.248, "step": 22987 }, { "epoch": 0.7888812628689087, "grad_norm": 0.907646598577308, "learning_rate": 1.124403499098059e-06, "loss": 0.2918, "step": 22988 }, { "epoch": 0.7889155799588194, "grad_norm": 0.7491726815117479, "learning_rate": 1.124052398846478e-06, "loss": 0.246, "step": 22989 }, { "epoch": 0.7889498970487303, "grad_norm": 0.9246611381671194, "learning_rate": 1.1237013464777085e-06, "loss": 0.2741, "step": 22990 }, { "epoch": 0.7889842141386411, "grad_norm": 0.8928159596277127, "learning_rate": 1.12335034199609e-06, "loss": 0.3333, "step": 22991 }, { "epoch": 0.7890185312285518, "grad_norm": 0.7967277550532966, "learning_rate": 1.1229993854059584e-06, "loss": 0.2742, "step": 22992 }, { "epoch": 0.7890528483184626, "grad_norm": 0.7686390740134006, "learning_rate": 1.122648476711649e-06, "loss": 0.2323, "step": 22993 }, { "epoch": 0.7890871654083733, "grad_norm": 0.7320192523058952, "learning_rate": 1.122297615917497e-06, "loss": 0.2294, "step": 22994 }, { "epoch": 0.7891214824982842, "grad_norm": 0.8765953085318313, "learning_rate": 1.1219468030278374e-06, "loss": 0.2616, "step": 22995 }, { "epoch": 0.7891557995881949, "grad_norm": 0.7312618364631884, "learning_rate": 1.1215960380470048e-06, "loss": 0.2367, "step": 22996 }, { "epoch": 0.7891901166781057, "grad_norm": 0.7535892777724897, "learning_rate": 1.1212453209793283e-06, "loss": 0.3086, "step": 22997 }, { "epoch": 0.7892244337680164, "grad_norm": 0.7568403442865672, "learning_rate": 1.120894651829147e-06, "loss": 0.2754, "step": 22998 }, { "epoch": 0.7892587508579273, "grad_norm": 0.7028509147542041, "learning_rate": 1.1205440306007881e-06, "loss": 0.2682, "step": 22999 }, { "epoch": 0.789293067947838, "grad_norm": 0.7249622918152242, "learning_rate": 1.1201934572985833e-06, "loss": 0.2567, "step": 23000 }, { "epoch": 0.7893273850377488, "grad_norm": 0.8752769927915598, "learning_rate": 1.1198429319268678e-06, "loss": 0.2835, "step": 23001 }, { "epoch": 0.7893617021276595, "grad_norm": 0.7415172433131597, "learning_rate": 1.1194924544899677e-06, "loss": 0.2335, "step": 23002 }, { "epoch": 0.7893960192175703, "grad_norm": 0.7525413331631904, "learning_rate": 1.1191420249922135e-06, "loss": 0.2245, "step": 23003 }, { "epoch": 0.7894303363074812, "grad_norm": 0.7232844038696963, "learning_rate": 1.118791643437936e-06, "loss": 0.2381, "step": 23004 }, { "epoch": 0.7894646533973919, "grad_norm": 0.8854917227947644, "learning_rate": 1.1184413098314617e-06, "loss": 0.2385, "step": 23005 }, { "epoch": 0.7894989704873027, "grad_norm": 0.7005591919276393, "learning_rate": 1.1180910241771199e-06, "loss": 0.2827, "step": 23006 }, { "epoch": 0.7895332875772134, "grad_norm": 0.7757840791151636, "learning_rate": 1.1177407864792377e-06, "loss": 0.2686, "step": 23007 }, { "epoch": 0.7895676046671243, "grad_norm": 0.7484132100126389, "learning_rate": 1.1173905967421432e-06, "loss": 0.2814, "step": 23008 }, { "epoch": 0.789601921757035, "grad_norm": 0.941043378849344, "learning_rate": 1.1170404549701586e-06, "loss": 0.234, "step": 23009 }, { "epoch": 0.7896362388469458, "grad_norm": 0.7050270129812276, "learning_rate": 1.1166903611676144e-06, "loss": 0.2522, "step": 23010 }, { "epoch": 0.7896705559368565, "grad_norm": 0.772341450114082, "learning_rate": 1.1163403153388308e-06, "loss": 0.2825, "step": 23011 }, { "epoch": 0.7897048730267673, "grad_norm": 0.7070810564331358, "learning_rate": 1.1159903174881376e-06, "loss": 0.2399, "step": 23012 }, { "epoch": 0.7897391901166781, "grad_norm": 0.78753100060145, "learning_rate": 1.115640367619854e-06, "loss": 0.2911, "step": 23013 }, { "epoch": 0.7897735072065889, "grad_norm": 0.7894289263348768, "learning_rate": 1.1152904657383052e-06, "loss": 0.2562, "step": 23014 }, { "epoch": 0.7898078242964996, "grad_norm": 0.8316866940445198, "learning_rate": 1.1149406118478134e-06, "loss": 0.3294, "step": 23015 }, { "epoch": 0.7898421413864104, "grad_norm": 0.8753033495127833, "learning_rate": 1.1145908059527006e-06, "loss": 0.2381, "step": 23016 }, { "epoch": 0.7898764584763212, "grad_norm": 0.855408201194358, "learning_rate": 1.1142410480572885e-06, "loss": 0.2587, "step": 23017 }, { "epoch": 0.789910775566232, "grad_norm": 0.7962037341806928, "learning_rate": 1.1138913381658984e-06, "loss": 0.2725, "step": 23018 }, { "epoch": 0.7899450926561428, "grad_norm": 0.7463834913977551, "learning_rate": 1.113541676282851e-06, "loss": 0.3166, "step": 23019 }, { "epoch": 0.7899794097460535, "grad_norm": 0.7687931061744543, "learning_rate": 1.1131920624124627e-06, "loss": 0.2335, "step": 23020 }, { "epoch": 0.7900137268359643, "grad_norm": 0.6730834204381267, "learning_rate": 1.1128424965590574e-06, "loss": 0.2901, "step": 23021 }, { "epoch": 0.7900480439258751, "grad_norm": 0.8621217322492922, "learning_rate": 1.1124929787269496e-06, "loss": 0.2584, "step": 23022 }, { "epoch": 0.7900823610157859, "grad_norm": 0.7527364517526243, "learning_rate": 1.112143508920457e-06, "loss": 0.2248, "step": 23023 }, { "epoch": 0.7901166781056966, "grad_norm": 0.7881204788596348, "learning_rate": 1.1117940871439015e-06, "loss": 0.2342, "step": 23024 }, { "epoch": 0.7901509951956074, "grad_norm": 0.8823111118255651, "learning_rate": 1.1114447134015954e-06, "loss": 0.3068, "step": 23025 }, { "epoch": 0.7901853122855181, "grad_norm": 0.7336523565164165, "learning_rate": 1.111095387697857e-06, "loss": 0.2737, "step": 23026 }, { "epoch": 0.790219629375429, "grad_norm": 0.8943578402972931, "learning_rate": 1.1107461100370003e-06, "loss": 0.262, "step": 23027 }, { "epoch": 0.7902539464653398, "grad_norm": 0.7618709927329158, "learning_rate": 1.1103968804233412e-06, "loss": 0.2754, "step": 23028 }, { "epoch": 0.7902882635552505, "grad_norm": 0.8313173806605514, "learning_rate": 1.1100476988611936e-06, "loss": 0.2445, "step": 23029 }, { "epoch": 0.7903225806451613, "grad_norm": 0.7705001511955386, "learning_rate": 1.1096985653548714e-06, "loss": 0.2497, "step": 23030 }, { "epoch": 0.7903568977350721, "grad_norm": 0.7489481920027148, "learning_rate": 1.1093494799086897e-06, "loss": 0.2588, "step": 23031 }, { "epoch": 0.7903912148249829, "grad_norm": 0.8394021107162472, "learning_rate": 1.1090004425269557e-06, "loss": 0.2655, "step": 23032 }, { "epoch": 0.7904255319148936, "grad_norm": 0.697920296322791, "learning_rate": 1.108651453213988e-06, "loss": 0.2728, "step": 23033 }, { "epoch": 0.7904598490048044, "grad_norm": 0.667574815815741, "learning_rate": 1.1083025119740931e-06, "loss": 0.1853, "step": 23034 }, { "epoch": 0.7904941660947151, "grad_norm": 0.8478799561223176, "learning_rate": 1.1079536188115837e-06, "loss": 0.2638, "step": 23035 }, { "epoch": 0.790528483184626, "grad_norm": 0.8235443893920595, "learning_rate": 1.1076047737307693e-06, "loss": 0.2756, "step": 23036 }, { "epoch": 0.7905628002745367, "grad_norm": 0.7905814312611441, "learning_rate": 1.1072559767359598e-06, "loss": 0.2103, "step": 23037 }, { "epoch": 0.7905971173644475, "grad_norm": 0.789530048606414, "learning_rate": 1.1069072278314647e-06, "loss": 0.2701, "step": 23038 }, { "epoch": 0.7906314344543582, "grad_norm": 0.798893202844432, "learning_rate": 1.1065585270215912e-06, "loss": 0.2846, "step": 23039 }, { "epoch": 0.790665751544269, "grad_norm": 0.8316971393382547, "learning_rate": 1.106209874310648e-06, "loss": 0.3076, "step": 23040 }, { "epoch": 0.7907000686341799, "grad_norm": 0.7929467745925253, "learning_rate": 1.1058612697029426e-06, "loss": 0.3133, "step": 23041 }, { "epoch": 0.7907343857240906, "grad_norm": 0.891628111886446, "learning_rate": 1.1055127132027821e-06, "loss": 0.2406, "step": 23042 }, { "epoch": 0.7907687028140014, "grad_norm": 0.7514182116206941, "learning_rate": 1.1051642048144683e-06, "loss": 0.3193, "step": 23043 }, { "epoch": 0.7908030199039121, "grad_norm": 0.8044800770093409, "learning_rate": 1.104815744542312e-06, "loss": 0.3119, "step": 23044 }, { "epoch": 0.790837336993823, "grad_norm": 0.8409870632487102, "learning_rate": 1.104467332390617e-06, "loss": 0.2557, "step": 23045 }, { "epoch": 0.7908716540837337, "grad_norm": 0.7089664059537611, "learning_rate": 1.1041189683636833e-06, "loss": 0.2664, "step": 23046 }, { "epoch": 0.7909059711736445, "grad_norm": 0.7642781847065239, "learning_rate": 1.1037706524658204e-06, "loss": 0.2436, "step": 23047 }, { "epoch": 0.7909402882635552, "grad_norm": 0.711134098574348, "learning_rate": 1.103422384701327e-06, "loss": 0.2897, "step": 23048 }, { "epoch": 0.790974605353466, "grad_norm": 0.7728566685724924, "learning_rate": 1.1030741650745074e-06, "loss": 0.2894, "step": 23049 }, { "epoch": 0.7910089224433768, "grad_norm": 0.7836792835570355, "learning_rate": 1.1027259935896628e-06, "loss": 0.2993, "step": 23050 }, { "epoch": 0.7910432395332876, "grad_norm": 0.747017107128952, "learning_rate": 1.102377870251095e-06, "loss": 0.2306, "step": 23051 }, { "epoch": 0.7910775566231983, "grad_norm": 0.6888564670884048, "learning_rate": 1.1020297950631048e-06, "loss": 0.2506, "step": 23052 }, { "epoch": 0.7911118737131091, "grad_norm": 0.8053102547056873, "learning_rate": 1.1016817680299912e-06, "loss": 0.2423, "step": 23053 }, { "epoch": 0.79114619080302, "grad_norm": 0.7817261710714973, "learning_rate": 1.101333789156056e-06, "loss": 0.2506, "step": 23054 }, { "epoch": 0.7911805078929307, "grad_norm": 0.8436222919635532, "learning_rate": 1.100985858445594e-06, "loss": 0.2494, "step": 23055 }, { "epoch": 0.7912148249828415, "grad_norm": 0.6923499156450308, "learning_rate": 1.1006379759029084e-06, "loss": 0.2631, "step": 23056 }, { "epoch": 0.7912491420727522, "grad_norm": 0.67613875912804, "learning_rate": 1.100290141532293e-06, "loss": 0.2769, "step": 23057 }, { "epoch": 0.791283459162663, "grad_norm": 0.7156454641568737, "learning_rate": 1.0999423553380463e-06, "loss": 0.2177, "step": 23058 }, { "epoch": 0.7913177762525738, "grad_norm": 0.7413908015693664, "learning_rate": 1.0995946173244647e-06, "loss": 0.2392, "step": 23059 }, { "epoch": 0.7913520933424846, "grad_norm": 0.7387510718494957, "learning_rate": 1.0992469274958438e-06, "loss": 0.1959, "step": 23060 }, { "epoch": 0.7913864104323953, "grad_norm": 0.754707587270127, "learning_rate": 1.0988992858564801e-06, "loss": 0.2322, "step": 23061 }, { "epoch": 0.7914207275223061, "grad_norm": 0.7694529791304636, "learning_rate": 1.098551692410667e-06, "loss": 0.2502, "step": 23062 }, { "epoch": 0.7914550446122168, "grad_norm": 0.8640994740227337, "learning_rate": 1.0982041471627002e-06, "loss": 0.2954, "step": 23063 }, { "epoch": 0.7914893617021277, "grad_norm": 0.8043068313641375, "learning_rate": 1.0978566501168698e-06, "loss": 0.3055, "step": 23064 }, { "epoch": 0.7915236787920384, "grad_norm": 0.8466649981920545, "learning_rate": 1.0975092012774741e-06, "loss": 0.231, "step": 23065 }, { "epoch": 0.7915579958819492, "grad_norm": 0.937079462810716, "learning_rate": 1.0971618006488005e-06, "loss": 0.2934, "step": 23066 }, { "epoch": 0.79159231297186, "grad_norm": 0.9547620629966223, "learning_rate": 1.0968144482351411e-06, "loss": 0.2985, "step": 23067 }, { "epoch": 0.7916266300617708, "grad_norm": 0.823759124043122, "learning_rate": 1.0964671440407915e-06, "loss": 0.2585, "step": 23068 }, { "epoch": 0.7916609471516816, "grad_norm": 0.8170880150366586, "learning_rate": 1.0961198880700375e-06, "loss": 0.2518, "step": 23069 }, { "epoch": 0.7916952642415923, "grad_norm": 0.726394910751798, "learning_rate": 1.0957726803271713e-06, "loss": 0.202, "step": 23070 }, { "epoch": 0.7917295813315031, "grad_norm": 0.7527559150157378, "learning_rate": 1.0954255208164815e-06, "loss": 0.2266, "step": 23071 }, { "epoch": 0.7917638984214138, "grad_norm": 0.7350796875037973, "learning_rate": 1.0950784095422574e-06, "loss": 0.2398, "step": 23072 }, { "epoch": 0.7917982155113247, "grad_norm": 0.8325437003939304, "learning_rate": 1.0947313465087867e-06, "loss": 0.2283, "step": 23073 }, { "epoch": 0.7918325326012354, "grad_norm": 0.9054584369717904, "learning_rate": 1.0943843317203567e-06, "loss": 0.2626, "step": 23074 }, { "epoch": 0.7918668496911462, "grad_norm": 0.8224150804389493, "learning_rate": 1.0940373651812552e-06, "loss": 0.2539, "step": 23075 }, { "epoch": 0.7919011667810569, "grad_norm": 0.7308780089114301, "learning_rate": 1.0936904468957681e-06, "loss": 0.2256, "step": 23076 }, { "epoch": 0.7919354838709678, "grad_norm": 0.721378074393911, "learning_rate": 1.0933435768681822e-06, "loss": 0.2567, "step": 23077 }, { "epoch": 0.7919698009608785, "grad_norm": 0.7680676889140753, "learning_rate": 1.0929967551027793e-06, "loss": 0.3178, "step": 23078 }, { "epoch": 0.7920041180507893, "grad_norm": 0.7611564900225293, "learning_rate": 1.0926499816038494e-06, "loss": 0.2748, "step": 23079 }, { "epoch": 0.7920384351407, "grad_norm": 0.8253253731649971, "learning_rate": 1.0923032563756719e-06, "loss": 0.266, "step": 23080 }, { "epoch": 0.7920727522306108, "grad_norm": 0.8931558735302157, "learning_rate": 1.0919565794225312e-06, "loss": 0.3299, "step": 23081 }, { "epoch": 0.7921070693205217, "grad_norm": 0.7103274259635753, "learning_rate": 1.091609950748711e-06, "loss": 0.2509, "step": 23082 }, { "epoch": 0.7921413864104324, "grad_norm": 0.7980801108166125, "learning_rate": 1.0912633703584935e-06, "loss": 0.2896, "step": 23083 }, { "epoch": 0.7921757035003432, "grad_norm": 0.7686635893558681, "learning_rate": 1.0909168382561592e-06, "loss": 0.2784, "step": 23084 }, { "epoch": 0.7922100205902539, "grad_norm": 0.8658807092034886, "learning_rate": 1.0905703544459906e-06, "loss": 0.247, "step": 23085 }, { "epoch": 0.7922443376801647, "grad_norm": 0.6993192711678174, "learning_rate": 1.0902239189322677e-06, "loss": 0.2665, "step": 23086 }, { "epoch": 0.7922786547700755, "grad_norm": 0.705101722557089, "learning_rate": 1.0898775317192678e-06, "loss": 0.2501, "step": 23087 }, { "epoch": 0.7923129718599863, "grad_norm": 0.8430961119400106, "learning_rate": 1.089531192811274e-06, "loss": 0.2719, "step": 23088 }, { "epoch": 0.792347288949897, "grad_norm": 0.7574272379058989, "learning_rate": 1.089184902212564e-06, "loss": 0.2248, "step": 23089 }, { "epoch": 0.7923816060398078, "grad_norm": 0.7539224677129159, "learning_rate": 1.088838659927412e-06, "loss": 0.2469, "step": 23090 }, { "epoch": 0.7924159231297186, "grad_norm": 0.68568994427829, "learning_rate": 1.0884924659601016e-06, "loss": 0.2149, "step": 23091 }, { "epoch": 0.7924502402196294, "grad_norm": 0.783544093400669, "learning_rate": 1.0881463203149051e-06, "loss": 0.2622, "step": 23092 }, { "epoch": 0.7924845573095401, "grad_norm": 0.7263118738281887, "learning_rate": 1.0878002229960993e-06, "loss": 0.23, "step": 23093 }, { "epoch": 0.7925188743994509, "grad_norm": 0.7335407936218332, "learning_rate": 1.0874541740079614e-06, "loss": 0.2218, "step": 23094 }, { "epoch": 0.7925531914893617, "grad_norm": 0.718277422040712, "learning_rate": 1.087108173354765e-06, "loss": 0.2569, "step": 23095 }, { "epoch": 0.7925875085792725, "grad_norm": 0.7269522207334738, "learning_rate": 1.0867622210407853e-06, "loss": 0.2636, "step": 23096 }, { "epoch": 0.7926218256691833, "grad_norm": 0.7955309875981814, "learning_rate": 1.0864163170702962e-06, "loss": 0.281, "step": 23097 }, { "epoch": 0.792656142759094, "grad_norm": 0.7738029497467512, "learning_rate": 1.086070461447572e-06, "loss": 0.2223, "step": 23098 }, { "epoch": 0.7926904598490048, "grad_norm": 0.8247518037362022, "learning_rate": 1.0857246541768811e-06, "loss": 0.2317, "step": 23099 }, { "epoch": 0.7927247769389156, "grad_norm": 0.7455431683026428, "learning_rate": 1.0853788952625017e-06, "loss": 0.2735, "step": 23100 }, { "epoch": 0.7927590940288264, "grad_norm": 0.7599800232662309, "learning_rate": 1.0850331847086998e-06, "loss": 0.2725, "step": 23101 }, { "epoch": 0.7927934111187371, "grad_norm": 0.8977134834148203, "learning_rate": 1.0846875225197496e-06, "loss": 0.282, "step": 23102 }, { "epoch": 0.7928277282086479, "grad_norm": 0.7766727698794765, "learning_rate": 1.0843419086999195e-06, "loss": 0.2393, "step": 23103 }, { "epoch": 0.7928620452985586, "grad_norm": 0.8091574023664782, "learning_rate": 1.08399634325348e-06, "loss": 0.2989, "step": 23104 }, { "epoch": 0.7928963623884695, "grad_norm": 0.7348592851684542, "learning_rate": 1.0836508261847e-06, "loss": 0.251, "step": 23105 }, { "epoch": 0.7929306794783803, "grad_norm": 0.8081723086655332, "learning_rate": 1.083305357497848e-06, "loss": 0.2436, "step": 23106 }, { "epoch": 0.792964996568291, "grad_norm": 0.8137724849651807, "learning_rate": 1.0829599371971921e-06, "loss": 0.241, "step": 23107 }, { "epoch": 0.7929993136582018, "grad_norm": 0.7994355749685699, "learning_rate": 1.0826145652869996e-06, "loss": 0.2607, "step": 23108 }, { "epoch": 0.7930336307481125, "grad_norm": 0.7911396972426669, "learning_rate": 1.082269241771538e-06, "loss": 0.2474, "step": 23109 }, { "epoch": 0.7930679478380234, "grad_norm": 0.7136746699846943, "learning_rate": 1.0819239666550695e-06, "loss": 0.2295, "step": 23110 }, { "epoch": 0.7931022649279341, "grad_norm": 1.0610953915971164, "learning_rate": 1.0815787399418636e-06, "loss": 0.2597, "step": 23111 }, { "epoch": 0.7931365820178449, "grad_norm": 0.8109762876875932, "learning_rate": 1.0812335616361857e-06, "loss": 0.3073, "step": 23112 }, { "epoch": 0.7931708991077556, "grad_norm": 1.0211344315509947, "learning_rate": 1.0808884317422952e-06, "loss": 0.2694, "step": 23113 }, { "epoch": 0.7932052161976665, "grad_norm": 0.7098569479640701, "learning_rate": 1.0805433502644619e-06, "loss": 0.2687, "step": 23114 }, { "epoch": 0.7932395332875772, "grad_norm": 0.7262442949300417, "learning_rate": 1.0801983172069441e-06, "loss": 0.2462, "step": 23115 }, { "epoch": 0.793273850377488, "grad_norm": 0.7472758532238114, "learning_rate": 1.0798533325740062e-06, "loss": 0.2423, "step": 23116 }, { "epoch": 0.7933081674673987, "grad_norm": 0.7770376316618306, "learning_rate": 1.0795083963699105e-06, "loss": 0.2772, "step": 23117 }, { "epoch": 0.7933424845573095, "grad_norm": 0.7757210525979588, "learning_rate": 1.0791635085989173e-06, "loss": 0.2453, "step": 23118 }, { "epoch": 0.7933768016472204, "grad_norm": 0.760248096620263, "learning_rate": 1.0788186692652873e-06, "loss": 0.2736, "step": 23119 }, { "epoch": 0.7934111187371311, "grad_norm": 0.7499158560183591, "learning_rate": 1.0784738783732813e-06, "loss": 0.261, "step": 23120 }, { "epoch": 0.7934454358270419, "grad_norm": 0.9211509568974741, "learning_rate": 1.0781291359271602e-06, "loss": 0.3059, "step": 23121 }, { "epoch": 0.7934797529169526, "grad_norm": 0.7294698421513295, "learning_rate": 1.077784441931178e-06, "loss": 0.2762, "step": 23122 }, { "epoch": 0.7935140700068635, "grad_norm": 0.7215678088166148, "learning_rate": 1.077439796389599e-06, "loss": 0.2394, "step": 23123 }, { "epoch": 0.7935483870967742, "grad_norm": 0.8582275123725299, "learning_rate": 1.0770951993066764e-06, "loss": 0.2241, "step": 23124 }, { "epoch": 0.793582704186685, "grad_norm": 0.8216608914138778, "learning_rate": 1.0767506506866692e-06, "loss": 0.2774, "step": 23125 }, { "epoch": 0.7936170212765957, "grad_norm": 0.711369306619707, "learning_rate": 1.0764061505338337e-06, "loss": 0.2281, "step": 23126 }, { "epoch": 0.7936513383665065, "grad_norm": 0.7428169837394325, "learning_rate": 1.076061698852426e-06, "loss": 0.2534, "step": 23127 }, { "epoch": 0.7936856554564173, "grad_norm": 0.7921852935636247, "learning_rate": 1.0757172956467004e-06, "loss": 0.2365, "step": 23128 }, { "epoch": 0.7937199725463281, "grad_norm": 0.8243459650057463, "learning_rate": 1.0753729409209129e-06, "loss": 0.2338, "step": 23129 }, { "epoch": 0.7937542896362388, "grad_norm": 0.8157195814360741, "learning_rate": 1.0750286346793187e-06, "loss": 0.2444, "step": 23130 }, { "epoch": 0.7937886067261496, "grad_norm": 0.7693190632119846, "learning_rate": 1.074684376926166e-06, "loss": 0.2685, "step": 23131 }, { "epoch": 0.7938229238160603, "grad_norm": 0.7770308079779084, "learning_rate": 1.0743401676657151e-06, "loss": 0.2783, "step": 23132 }, { "epoch": 0.7938572409059712, "grad_norm": 0.8611436865825958, "learning_rate": 1.0739960069022126e-06, "loss": 0.3583, "step": 23133 }, { "epoch": 0.793891557995882, "grad_norm": 0.7262238655554286, "learning_rate": 1.0736518946399111e-06, "loss": 0.2616, "step": 23134 }, { "epoch": 0.7939258750857927, "grad_norm": 0.7271502797537737, "learning_rate": 1.0733078308830653e-06, "loss": 0.2328, "step": 23135 }, { "epoch": 0.7939601921757035, "grad_norm": 0.8058748579663508, "learning_rate": 1.0729638156359202e-06, "loss": 0.252, "step": 23136 }, { "epoch": 0.7939945092656143, "grad_norm": 0.8065913323766791, "learning_rate": 1.0726198489027318e-06, "loss": 0.252, "step": 23137 }, { "epoch": 0.7940288263555251, "grad_norm": 0.8686354683987615, "learning_rate": 1.0722759306877444e-06, "loss": 0.258, "step": 23138 }, { "epoch": 0.7940631434454358, "grad_norm": 0.9076006328044324, "learning_rate": 1.0719320609952094e-06, "loss": 0.299, "step": 23139 }, { "epoch": 0.7940974605353466, "grad_norm": 0.8226232322246682, "learning_rate": 1.0715882398293736e-06, "loss": 0.274, "step": 23140 }, { "epoch": 0.7941317776252573, "grad_norm": 0.7399343828181004, "learning_rate": 1.071244467194485e-06, "loss": 0.2694, "step": 23141 }, { "epoch": 0.7941660947151682, "grad_norm": 0.819728966756122, "learning_rate": 1.0709007430947904e-06, "loss": 0.2965, "step": 23142 }, { "epoch": 0.7942004118050789, "grad_norm": 0.7789433151516522, "learning_rate": 1.0705570675345366e-06, "loss": 0.2353, "step": 23143 }, { "epoch": 0.7942347288949897, "grad_norm": 0.7733286728067496, "learning_rate": 1.0702134405179697e-06, "loss": 0.2747, "step": 23144 }, { "epoch": 0.7942690459849004, "grad_norm": 0.7224001932842437, "learning_rate": 1.069869862049332e-06, "loss": 0.2679, "step": 23145 }, { "epoch": 0.7943033630748113, "grad_norm": 0.7993375944240638, "learning_rate": 1.0695263321328725e-06, "loss": 0.3308, "step": 23146 }, { "epoch": 0.7943376801647221, "grad_norm": 0.7010542499900492, "learning_rate": 1.0691828507728313e-06, "loss": 0.2356, "step": 23147 }, { "epoch": 0.7943719972546328, "grad_norm": 0.6891076750099431, "learning_rate": 1.0688394179734534e-06, "loss": 0.2918, "step": 23148 }, { "epoch": 0.7944063143445436, "grad_norm": 0.8141078248740521, "learning_rate": 1.0684960337389811e-06, "loss": 0.2731, "step": 23149 }, { "epoch": 0.7944406314344543, "grad_norm": 0.8620335788455151, "learning_rate": 1.0681526980736562e-06, "loss": 0.2372, "step": 23150 }, { "epoch": 0.7944749485243652, "grad_norm": 0.6975819728671612, "learning_rate": 1.0678094109817211e-06, "loss": 0.2769, "step": 23151 }, { "epoch": 0.7945092656142759, "grad_norm": 0.9008702466589606, "learning_rate": 1.0674661724674163e-06, "loss": 0.3204, "step": 23152 }, { "epoch": 0.7945435827041867, "grad_norm": 0.7613657191274305, "learning_rate": 1.0671229825349827e-06, "loss": 0.2854, "step": 23153 }, { "epoch": 0.7945778997940974, "grad_norm": 0.7275684194116971, "learning_rate": 1.066779841188657e-06, "loss": 0.2706, "step": 23154 }, { "epoch": 0.7946122168840082, "grad_norm": 0.8113709059125432, "learning_rate": 1.0664367484326826e-06, "loss": 0.2456, "step": 23155 }, { "epoch": 0.794646533973919, "grad_norm": 0.8409617983955885, "learning_rate": 1.0660937042712965e-06, "loss": 0.2862, "step": 23156 }, { "epoch": 0.7946808510638298, "grad_norm": 0.722520001739146, "learning_rate": 1.0657507087087342e-06, "loss": 0.2396, "step": 23157 }, { "epoch": 0.7947151681537405, "grad_norm": 0.989963355997229, "learning_rate": 1.0654077617492376e-06, "loss": 0.156, "step": 23158 }, { "epoch": 0.7947494852436513, "grad_norm": 0.9582315525525632, "learning_rate": 1.065064863397039e-06, "loss": 0.2422, "step": 23159 }, { "epoch": 0.7947838023335622, "grad_norm": 0.8045678320591384, "learning_rate": 1.064722013656377e-06, "loss": 0.2624, "step": 23160 }, { "epoch": 0.7948181194234729, "grad_norm": 0.6447075466152818, "learning_rate": 1.0643792125314862e-06, "loss": 0.2543, "step": 23161 }, { "epoch": 0.7948524365133837, "grad_norm": 0.7416187466554359, "learning_rate": 1.0640364600266013e-06, "loss": 0.2844, "step": 23162 }, { "epoch": 0.7948867536032944, "grad_norm": 0.7228486583418255, "learning_rate": 1.0636937561459576e-06, "loss": 0.2351, "step": 23163 }, { "epoch": 0.7949210706932052, "grad_norm": 0.7746490790583181, "learning_rate": 1.063351100893788e-06, "loss": 0.2669, "step": 23164 }, { "epoch": 0.794955387783116, "grad_norm": 0.7737048335101353, "learning_rate": 1.0630084942743257e-06, "loss": 0.23, "step": 23165 }, { "epoch": 0.7949897048730268, "grad_norm": 0.8111008764653794, "learning_rate": 1.0626659362918034e-06, "loss": 0.2904, "step": 23166 }, { "epoch": 0.7950240219629375, "grad_norm": 0.8814717863979606, "learning_rate": 1.0623234269504544e-06, "loss": 0.2259, "step": 23167 }, { "epoch": 0.7950583390528483, "grad_norm": 0.8050767051448068, "learning_rate": 1.061980966254506e-06, "loss": 0.2581, "step": 23168 }, { "epoch": 0.7950926561427591, "grad_norm": 0.8119498453867673, "learning_rate": 1.0616385542081942e-06, "loss": 0.2651, "step": 23169 }, { "epoch": 0.7951269732326699, "grad_norm": 0.7553425852589, "learning_rate": 1.0612961908157449e-06, "loss": 0.251, "step": 23170 }, { "epoch": 0.7951612903225806, "grad_norm": 0.7538902793058079, "learning_rate": 1.0609538760813891e-06, "loss": 0.2059, "step": 23171 }, { "epoch": 0.7951956074124914, "grad_norm": 0.778068107297054, "learning_rate": 1.0606116100093562e-06, "loss": 0.2994, "step": 23172 }, { "epoch": 0.7952299245024022, "grad_norm": 0.8393493791251476, "learning_rate": 1.0602693926038733e-06, "loss": 0.2808, "step": 23173 }, { "epoch": 0.795264241592313, "grad_norm": 0.8459877711049232, "learning_rate": 1.0599272238691689e-06, "loss": 0.2998, "step": 23174 }, { "epoch": 0.7952985586822238, "grad_norm": 0.7517224116624583, "learning_rate": 1.05958510380947e-06, "loss": 0.3097, "step": 23175 }, { "epoch": 0.7953328757721345, "grad_norm": 0.7132821818768408, "learning_rate": 1.0592430324290042e-06, "loss": 0.2298, "step": 23176 }, { "epoch": 0.7953671928620453, "grad_norm": 0.7504412708625883, "learning_rate": 1.0589010097319941e-06, "loss": 0.2753, "step": 23177 }, { "epoch": 0.795401509951956, "grad_norm": 0.8252786920532275, "learning_rate": 1.0585590357226683e-06, "loss": 0.2619, "step": 23178 }, { "epoch": 0.7954358270418669, "grad_norm": 0.7402343458495195, "learning_rate": 1.0582171104052514e-06, "loss": 0.2492, "step": 23179 }, { "epoch": 0.7954701441317776, "grad_norm": 0.9507342382619066, "learning_rate": 1.0578752337839643e-06, "loss": 0.2663, "step": 23180 }, { "epoch": 0.7955044612216884, "grad_norm": 0.7754846226448714, "learning_rate": 1.0575334058630349e-06, "loss": 0.2548, "step": 23181 }, { "epoch": 0.7955387783115991, "grad_norm": 0.7407974619194453, "learning_rate": 1.0571916266466825e-06, "loss": 0.2096, "step": 23182 }, { "epoch": 0.79557309540151, "grad_norm": 0.7862119807701371, "learning_rate": 1.0568498961391304e-06, "loss": 0.2373, "step": 23183 }, { "epoch": 0.7956074124914208, "grad_norm": 0.8984977159613826, "learning_rate": 1.0565082143446009e-06, "loss": 0.2589, "step": 23184 }, { "epoch": 0.7956417295813315, "grad_norm": 0.8001145514273067, "learning_rate": 1.0561665812673144e-06, "loss": 0.248, "step": 23185 }, { "epoch": 0.7956760466712423, "grad_norm": 0.7189801123343141, "learning_rate": 1.0558249969114925e-06, "loss": 0.2454, "step": 23186 }, { "epoch": 0.795710363761153, "grad_norm": 0.7923940031183259, "learning_rate": 1.0554834612813536e-06, "loss": 0.3177, "step": 23187 }, { "epoch": 0.7957446808510639, "grad_norm": 0.7699593293025762, "learning_rate": 1.0551419743811192e-06, "loss": 0.279, "step": 23188 }, { "epoch": 0.7957789979409746, "grad_norm": 0.8649536117938609, "learning_rate": 1.0548005362150037e-06, "loss": 0.2702, "step": 23189 }, { "epoch": 0.7958133150308854, "grad_norm": 0.810504112240408, "learning_rate": 1.0544591467872306e-06, "loss": 0.2155, "step": 23190 }, { "epoch": 0.7958476321207961, "grad_norm": 0.6810368577277168, "learning_rate": 1.0541178061020135e-06, "loss": 0.2732, "step": 23191 }, { "epoch": 0.795881949210707, "grad_norm": 0.7646554889624887, "learning_rate": 1.0537765141635702e-06, "loss": 0.2418, "step": 23192 }, { "epoch": 0.7959162663006177, "grad_norm": 0.8719313945499197, "learning_rate": 1.0534352709761176e-06, "loss": 0.263, "step": 23193 }, { "epoch": 0.7959505833905285, "grad_norm": 1.0469880197808203, "learning_rate": 1.0530940765438708e-06, "loss": 0.2657, "step": 23194 }, { "epoch": 0.7959849004804392, "grad_norm": 0.8026139060693152, "learning_rate": 1.0527529308710449e-06, "loss": 0.267, "step": 23195 }, { "epoch": 0.79601921757035, "grad_norm": 0.7360642132653482, "learning_rate": 1.052411833961855e-06, "loss": 0.2174, "step": 23196 }, { "epoch": 0.7960535346602609, "grad_norm": 0.8511585993839758, "learning_rate": 1.0520707858205143e-06, "loss": 0.2482, "step": 23197 }, { "epoch": 0.7960878517501716, "grad_norm": 0.7803816885581555, "learning_rate": 1.0517297864512355e-06, "loss": 0.26, "step": 23198 }, { "epoch": 0.7961221688400824, "grad_norm": 1.0108635792087193, "learning_rate": 1.0513888358582325e-06, "loss": 0.3118, "step": 23199 }, { "epoch": 0.7961564859299931, "grad_norm": 0.7404107477254868, "learning_rate": 1.051047934045717e-06, "loss": 0.2511, "step": 23200 }, { "epoch": 0.7961908030199039, "grad_norm": 0.7752969511942819, "learning_rate": 1.0507070810178998e-06, "loss": 0.3001, "step": 23201 }, { "epoch": 0.7962251201098147, "grad_norm": 0.681670605815994, "learning_rate": 1.0503662767789935e-06, "loss": 0.2541, "step": 23202 }, { "epoch": 0.7962594371997255, "grad_norm": 0.7123202377096234, "learning_rate": 1.0500255213332044e-06, "loss": 0.2413, "step": 23203 }, { "epoch": 0.7962937542896362, "grad_norm": 0.9001768399958683, "learning_rate": 1.0496848146847476e-06, "loss": 0.2435, "step": 23204 }, { "epoch": 0.796328071379547, "grad_norm": 0.800313193952165, "learning_rate": 1.0493441568378282e-06, "loss": 0.2429, "step": 23205 }, { "epoch": 0.7963623884694578, "grad_norm": 0.6437829240128964, "learning_rate": 1.0490035477966553e-06, "loss": 0.2729, "step": 23206 }, { "epoch": 0.7963967055593686, "grad_norm": 1.0822693593999113, "learning_rate": 1.0486629875654376e-06, "loss": 0.2787, "step": 23207 }, { "epoch": 0.7964310226492793, "grad_norm": 0.9998445955137591, "learning_rate": 1.0483224761483817e-06, "loss": 0.2943, "step": 23208 }, { "epoch": 0.7964653397391901, "grad_norm": 0.8293892761399985, "learning_rate": 1.0479820135496944e-06, "loss": 0.2332, "step": 23209 }, { "epoch": 0.7964996568291008, "grad_norm": 0.7947776878775253, "learning_rate": 1.0476415997735812e-06, "loss": 0.2857, "step": 23210 }, { "epoch": 0.7965339739190117, "grad_norm": 0.9277444877450359, "learning_rate": 1.04730123482425e-06, "loss": 0.2629, "step": 23211 }, { "epoch": 0.7965682910089225, "grad_norm": 0.8420562507289683, "learning_rate": 1.0469609187059005e-06, "loss": 0.2688, "step": 23212 }, { "epoch": 0.7966026080988332, "grad_norm": 0.7794125553593267, "learning_rate": 1.0466206514227435e-06, "loss": 0.3099, "step": 23213 }, { "epoch": 0.796636925188744, "grad_norm": 0.7937006434886908, "learning_rate": 1.0462804329789772e-06, "loss": 0.2691, "step": 23214 }, { "epoch": 0.7966712422786548, "grad_norm": 0.77327885628086, "learning_rate": 1.0459402633788063e-06, "loss": 0.2452, "step": 23215 }, { "epoch": 0.7967055593685656, "grad_norm": 0.7892339255457421, "learning_rate": 1.0456001426264339e-06, "loss": 0.2633, "step": 23216 }, { "epoch": 0.7967398764584763, "grad_norm": 0.7039911405456185, "learning_rate": 1.0452600707260614e-06, "loss": 0.2356, "step": 23217 }, { "epoch": 0.7967741935483871, "grad_norm": 0.8088460206934034, "learning_rate": 1.04492004768189e-06, "loss": 0.2524, "step": 23218 }, { "epoch": 0.7968085106382978, "grad_norm": 0.775979887728909, "learning_rate": 1.04458007349812e-06, "loss": 0.2789, "step": 23219 }, { "epoch": 0.7968428277282087, "grad_norm": 0.7806795754788787, "learning_rate": 1.0442401481789527e-06, "loss": 0.287, "step": 23220 }, { "epoch": 0.7968771448181194, "grad_norm": 0.7589764190894203, "learning_rate": 1.043900271728584e-06, "loss": 0.249, "step": 23221 }, { "epoch": 0.7969114619080302, "grad_norm": 0.8528742706665852, "learning_rate": 1.0435604441512165e-06, "loss": 0.2528, "step": 23222 }, { "epoch": 0.796945778997941, "grad_norm": 0.8379487462287574, "learning_rate": 1.0432206654510486e-06, "loss": 0.3249, "step": 23223 }, { "epoch": 0.7969800960878517, "grad_norm": 0.7258283356427284, "learning_rate": 1.0428809356322728e-06, "loss": 0.1964, "step": 23224 }, { "epoch": 0.7970144131777626, "grad_norm": 0.777486568163603, "learning_rate": 1.0425412546990926e-06, "loss": 0.2514, "step": 23225 }, { "epoch": 0.7970487302676733, "grad_norm": 0.7686894807841197, "learning_rate": 1.0422016226556998e-06, "loss": 0.2462, "step": 23226 }, { "epoch": 0.7970830473575841, "grad_norm": 0.7166565017752572, "learning_rate": 1.0418620395062917e-06, "loss": 0.2648, "step": 23227 }, { "epoch": 0.7971173644474948, "grad_norm": 0.7351182664246704, "learning_rate": 1.0415225052550631e-06, "loss": 0.2688, "step": 23228 }, { "epoch": 0.7971516815374057, "grad_norm": 0.7741878103849478, "learning_rate": 1.0411830199062094e-06, "loss": 0.2084, "step": 23229 }, { "epoch": 0.7971859986273164, "grad_norm": 0.8327857514949042, "learning_rate": 1.0408435834639236e-06, "loss": 0.2712, "step": 23230 }, { "epoch": 0.7972203157172272, "grad_norm": 0.821971587277155, "learning_rate": 1.0405041959323992e-06, "loss": 0.2533, "step": 23231 }, { "epoch": 0.7972546328071379, "grad_norm": 0.8458291120424195, "learning_rate": 1.0401648573158297e-06, "loss": 0.2575, "step": 23232 }, { "epoch": 0.7972889498970487, "grad_norm": 0.8286488716754253, "learning_rate": 1.0398255676184061e-06, "loss": 0.2614, "step": 23233 }, { "epoch": 0.7973232669869595, "grad_norm": 0.8549967994469964, "learning_rate": 1.0394863268443217e-06, "loss": 0.2676, "step": 23234 }, { "epoch": 0.7973575840768703, "grad_norm": 0.7481282238608745, "learning_rate": 1.039147134997764e-06, "loss": 0.274, "step": 23235 }, { "epoch": 0.797391901166781, "grad_norm": 0.7587016419230337, "learning_rate": 1.038807992082928e-06, "loss": 0.2207, "step": 23236 }, { "epoch": 0.7974262182566918, "grad_norm": 0.8032164459220621, "learning_rate": 1.0384688981039997e-06, "loss": 0.2429, "step": 23237 }, { "epoch": 0.7974605353466027, "grad_norm": 0.8016658152858579, "learning_rate": 1.0381298530651684e-06, "loss": 0.2182, "step": 23238 }, { "epoch": 0.7974948524365134, "grad_norm": 0.7434407224967555, "learning_rate": 1.0377908569706258e-06, "loss": 0.2798, "step": 23239 }, { "epoch": 0.7975291695264242, "grad_norm": 0.7952121278995025, "learning_rate": 1.0374519098245567e-06, "loss": 0.3062, "step": 23240 }, { "epoch": 0.7975634866163349, "grad_norm": 0.8418474063627872, "learning_rate": 1.0371130116311495e-06, "loss": 0.25, "step": 23241 }, { "epoch": 0.7975978037062457, "grad_norm": 0.7748515347741334, "learning_rate": 1.0367741623945903e-06, "loss": 0.2912, "step": 23242 }, { "epoch": 0.7976321207961565, "grad_norm": 0.7055426449377493, "learning_rate": 1.0364353621190675e-06, "loss": 0.2558, "step": 23243 }, { "epoch": 0.7976664378860673, "grad_norm": 0.7867921045780151, "learning_rate": 1.0360966108087616e-06, "loss": 0.2109, "step": 23244 }, { "epoch": 0.797700754975978, "grad_norm": 0.8023480831889326, "learning_rate": 1.0357579084678621e-06, "loss": 0.2678, "step": 23245 }, { "epoch": 0.7977350720658888, "grad_norm": 0.781571375115334, "learning_rate": 1.0354192551005531e-06, "loss": 0.255, "step": 23246 }, { "epoch": 0.7977693891557995, "grad_norm": 0.8445306744583052, "learning_rate": 1.0350806507110144e-06, "loss": 0.2811, "step": 23247 }, { "epoch": 0.7978037062457104, "grad_norm": 0.8768506208551777, "learning_rate": 1.0347420953034342e-06, "loss": 0.2395, "step": 23248 }, { "epoch": 0.7978380233356211, "grad_norm": 0.8846354035510136, "learning_rate": 1.0344035888819914e-06, "loss": 0.2521, "step": 23249 }, { "epoch": 0.7978723404255319, "grad_norm": 0.7202053493347367, "learning_rate": 1.0340651314508682e-06, "loss": 0.2408, "step": 23250 }, { "epoch": 0.7979066575154427, "grad_norm": 0.7931570651507972, "learning_rate": 1.0337267230142467e-06, "loss": 0.2817, "step": 23251 }, { "epoch": 0.7979409746053535, "grad_norm": 0.7481703399366159, "learning_rate": 1.0333883635763076e-06, "loss": 0.2551, "step": 23252 }, { "epoch": 0.7979752916952643, "grad_norm": 0.8237237556501482, "learning_rate": 1.0330500531412301e-06, "loss": 0.3154, "step": 23253 }, { "epoch": 0.798009608785175, "grad_norm": 0.8966194151769807, "learning_rate": 1.0327117917131945e-06, "loss": 0.2538, "step": 23254 }, { "epoch": 0.7980439258750858, "grad_norm": 0.8394112049891079, "learning_rate": 1.0323735792963807e-06, "loss": 0.3274, "step": 23255 }, { "epoch": 0.7980782429649965, "grad_norm": 0.8041471855075558, "learning_rate": 1.032035415894963e-06, "loss": 0.2565, "step": 23256 }, { "epoch": 0.7981125600549074, "grad_norm": 0.786840534684224, "learning_rate": 1.0316973015131244e-06, "loss": 0.2119, "step": 23257 }, { "epoch": 0.7981468771448181, "grad_norm": 0.8328140961100419, "learning_rate": 1.0313592361550368e-06, "loss": 0.2807, "step": 23258 }, { "epoch": 0.7981811942347289, "grad_norm": 0.7391431259324428, "learning_rate": 1.0310212198248798e-06, "loss": 0.2327, "step": 23259 }, { "epoch": 0.7982155113246396, "grad_norm": 0.9492320480471955, "learning_rate": 1.0306832525268273e-06, "loss": 0.2643, "step": 23260 }, { "epoch": 0.7982498284145505, "grad_norm": 0.7619342321176018, "learning_rate": 1.0303453342650548e-06, "loss": 0.2398, "step": 23261 }, { "epoch": 0.7982841455044613, "grad_norm": 0.684700149454349, "learning_rate": 1.0300074650437403e-06, "loss": 0.2323, "step": 23262 }, { "epoch": 0.798318462594372, "grad_norm": 0.8385031423377737, "learning_rate": 1.0296696448670528e-06, "loss": 0.2641, "step": 23263 }, { "epoch": 0.7983527796842828, "grad_norm": 0.6572209059749972, "learning_rate": 1.029331873739169e-06, "loss": 0.2365, "step": 23264 }, { "epoch": 0.7983870967741935, "grad_norm": 0.7787439409492932, "learning_rate": 1.0289941516642598e-06, "loss": 0.3066, "step": 23265 }, { "epoch": 0.7984214138641044, "grad_norm": 0.6853374316063979, "learning_rate": 1.028656478646498e-06, "loss": 0.2202, "step": 23266 }, { "epoch": 0.7984557309540151, "grad_norm": 0.7157194572440775, "learning_rate": 1.0283188546900557e-06, "loss": 0.2218, "step": 23267 }, { "epoch": 0.7984900480439259, "grad_norm": 0.8760147193652871, "learning_rate": 1.0279812797991035e-06, "loss": 0.251, "step": 23268 }, { "epoch": 0.7985243651338366, "grad_norm": 0.7634761056515721, "learning_rate": 1.0276437539778128e-06, "loss": 0.3027, "step": 23269 }, { "epoch": 0.7985586822237474, "grad_norm": 0.7817968390311973, "learning_rate": 1.0273062772303494e-06, "loss": 0.2241, "step": 23270 }, { "epoch": 0.7985929993136582, "grad_norm": 0.8193565817500374, "learning_rate": 1.0269688495608881e-06, "loss": 0.2329, "step": 23271 }, { "epoch": 0.798627316403569, "grad_norm": 0.6980748982522506, "learning_rate": 1.0266314709735937e-06, "loss": 0.244, "step": 23272 }, { "epoch": 0.7986616334934797, "grad_norm": 0.8818570864471051, "learning_rate": 1.026294141472634e-06, "loss": 0.2525, "step": 23273 }, { "epoch": 0.7986959505833905, "grad_norm": 0.8917365391744095, "learning_rate": 1.0259568610621778e-06, "loss": 0.2724, "step": 23274 }, { "epoch": 0.7987302676733014, "grad_norm": 0.7954975111411696, "learning_rate": 1.0256196297463912e-06, "loss": 0.2626, "step": 23275 }, { "epoch": 0.7987645847632121, "grad_norm": 0.862659368273179, "learning_rate": 1.0252824475294404e-06, "loss": 0.2793, "step": 23276 }, { "epoch": 0.7987989018531229, "grad_norm": 0.8116129575476821, "learning_rate": 1.0249453144154908e-06, "loss": 0.251, "step": 23277 }, { "epoch": 0.7988332189430336, "grad_norm": 0.6666057230531034, "learning_rate": 1.024608230408709e-06, "loss": 0.2412, "step": 23278 }, { "epoch": 0.7988675360329444, "grad_norm": 0.6897973334715256, "learning_rate": 1.024271195513255e-06, "loss": 0.2105, "step": 23279 }, { "epoch": 0.7989018531228552, "grad_norm": 0.746823826017622, "learning_rate": 1.0239342097332978e-06, "loss": 0.3117, "step": 23280 }, { "epoch": 0.798936170212766, "grad_norm": 0.814575822551925, "learning_rate": 1.023597273072996e-06, "loss": 0.2544, "step": 23281 }, { "epoch": 0.7989704873026767, "grad_norm": 0.9416558880840106, "learning_rate": 1.0232603855365143e-06, "loss": 0.3073, "step": 23282 }, { "epoch": 0.7990048043925875, "grad_norm": 0.7425473934317806, "learning_rate": 1.0229235471280142e-06, "loss": 0.2864, "step": 23283 }, { "epoch": 0.7990391214824983, "grad_norm": 0.9652748927861792, "learning_rate": 1.0225867578516564e-06, "loss": 0.2634, "step": 23284 }, { "epoch": 0.7990734385724091, "grad_norm": 0.7515864683761692, "learning_rate": 1.022250017711602e-06, "loss": 0.2891, "step": 23285 }, { "epoch": 0.7991077556623198, "grad_norm": 0.8474044852792003, "learning_rate": 1.0219133267120112e-06, "loss": 0.2523, "step": 23286 }, { "epoch": 0.7991420727522306, "grad_norm": 0.7873710173167628, "learning_rate": 1.0215766848570447e-06, "loss": 0.2657, "step": 23287 }, { "epoch": 0.7991763898421413, "grad_norm": 0.7927593685318907, "learning_rate": 1.021240092150857e-06, "loss": 0.2346, "step": 23288 }, { "epoch": 0.7992107069320522, "grad_norm": 0.8538634014234855, "learning_rate": 1.020903548597611e-06, "loss": 0.2652, "step": 23289 }, { "epoch": 0.799245024021963, "grad_norm": 0.7531706598831459, "learning_rate": 1.0205670542014633e-06, "loss": 0.2336, "step": 23290 }, { "epoch": 0.7992793411118737, "grad_norm": 0.7465716113236023, "learning_rate": 1.0202306089665676e-06, "loss": 0.2367, "step": 23291 }, { "epoch": 0.7993136582017845, "grad_norm": 0.6820612799145289, "learning_rate": 1.0198942128970851e-06, "loss": 0.2408, "step": 23292 }, { "epoch": 0.7993479752916952, "grad_norm": 0.8377403289757921, "learning_rate": 1.0195578659971667e-06, "loss": 0.3266, "step": 23293 }, { "epoch": 0.7993822923816061, "grad_norm": 0.7804798879046244, "learning_rate": 1.0192215682709733e-06, "loss": 0.2782, "step": 23294 }, { "epoch": 0.7994166094715168, "grad_norm": 0.7573397267950087, "learning_rate": 1.0188853197226545e-06, "loss": 0.2738, "step": 23295 }, { "epoch": 0.7994509265614276, "grad_norm": 0.7135957883909114, "learning_rate": 1.0185491203563664e-06, "loss": 0.2462, "step": 23296 }, { "epoch": 0.7994852436513383, "grad_norm": 0.6328735087748308, "learning_rate": 1.0182129701762617e-06, "loss": 0.2245, "step": 23297 }, { "epoch": 0.7995195607412492, "grad_norm": 0.7691570763403399, "learning_rate": 1.0178768691864932e-06, "loss": 0.2631, "step": 23298 }, { "epoch": 0.7995538778311599, "grad_norm": 0.7088669247694113, "learning_rate": 1.0175408173912138e-06, "loss": 0.2691, "step": 23299 }, { "epoch": 0.7995881949210707, "grad_norm": 0.8552806923176214, "learning_rate": 1.017204814794574e-06, "loss": 0.2715, "step": 23300 }, { "epoch": 0.7996225120109814, "grad_norm": 0.8266289692549024, "learning_rate": 1.0168688614007266e-06, "loss": 0.3086, "step": 23301 }, { "epoch": 0.7996568291008922, "grad_norm": 0.8465305610492523, "learning_rate": 1.0165329572138184e-06, "loss": 0.2377, "step": 23302 }, { "epoch": 0.7996911461908031, "grad_norm": 0.733518970833142, "learning_rate": 1.0161971022380035e-06, "loss": 0.2385, "step": 23303 }, { "epoch": 0.7997254632807138, "grad_norm": 0.7438911001185953, "learning_rate": 1.0158612964774272e-06, "loss": 0.3516, "step": 23304 }, { "epoch": 0.7997597803706246, "grad_norm": 0.7568235733932336, "learning_rate": 1.0155255399362391e-06, "loss": 0.2494, "step": 23305 }, { "epoch": 0.7997940974605353, "grad_norm": 0.7681535796992924, "learning_rate": 1.0151898326185894e-06, "loss": 0.2844, "step": 23306 }, { "epoch": 0.7998284145504462, "grad_norm": 0.8108815958684871, "learning_rate": 1.0148541745286223e-06, "loss": 0.2032, "step": 23307 }, { "epoch": 0.7998627316403569, "grad_norm": 0.7497362008361058, "learning_rate": 1.0145185656704853e-06, "loss": 0.2634, "step": 23308 }, { "epoch": 0.7998970487302677, "grad_norm": 0.8548718781711834, "learning_rate": 1.0141830060483255e-06, "loss": 0.2503, "step": 23309 }, { "epoch": 0.7999313658201784, "grad_norm": 0.7032403943422435, "learning_rate": 1.0138474956662869e-06, "loss": 0.2704, "step": 23310 }, { "epoch": 0.7999656829100892, "grad_norm": 0.7952772117184269, "learning_rate": 1.0135120345285159e-06, "loss": 0.3161, "step": 23311 }, { "epoch": 0.8, "grad_norm": 0.7727544808854584, "learning_rate": 1.0131766226391554e-06, "loss": 0.2505, "step": 23312 }, { "epoch": 0.8000343170899108, "grad_norm": 0.8044519904667915, "learning_rate": 1.0128412600023508e-06, "loss": 0.2831, "step": 23313 }, { "epoch": 0.8000686341798215, "grad_norm": 0.8634193453099199, "learning_rate": 1.0125059466222414e-06, "loss": 0.2669, "step": 23314 }, { "epoch": 0.8001029512697323, "grad_norm": 0.8169801870101506, "learning_rate": 1.0121706825029747e-06, "loss": 0.2898, "step": 23315 }, { "epoch": 0.800137268359643, "grad_norm": 0.6639036616212425, "learning_rate": 1.0118354676486885e-06, "loss": 0.2499, "step": 23316 }, { "epoch": 0.8001715854495539, "grad_norm": 0.8459813883035978, "learning_rate": 1.0115003020635256e-06, "loss": 0.2787, "step": 23317 }, { "epoch": 0.8002059025394647, "grad_norm": 0.7883387108366471, "learning_rate": 1.0111651857516259e-06, "loss": 0.2674, "step": 23318 }, { "epoch": 0.8002402196293754, "grad_norm": 0.8097978832000557, "learning_rate": 1.0108301187171304e-06, "loss": 0.3057, "step": 23319 }, { "epoch": 0.8002745367192862, "grad_norm": 0.8169991805598051, "learning_rate": 1.0104951009641773e-06, "loss": 0.2191, "step": 23320 }, { "epoch": 0.800308853809197, "grad_norm": 0.7609442533704271, "learning_rate": 1.0101601324969063e-06, "loss": 0.2546, "step": 23321 }, { "epoch": 0.8003431708991078, "grad_norm": 0.6918477305561155, "learning_rate": 1.0098252133194548e-06, "loss": 0.2608, "step": 23322 }, { "epoch": 0.8003774879890185, "grad_norm": 0.7995415461666069, "learning_rate": 1.009490343435961e-06, "loss": 0.3124, "step": 23323 }, { "epoch": 0.8004118050789293, "grad_norm": 0.7631714586074895, "learning_rate": 1.0091555228505623e-06, "loss": 0.2144, "step": 23324 }, { "epoch": 0.80044612216884, "grad_norm": 0.783941449134763, "learning_rate": 1.008820751567392e-06, "loss": 0.2554, "step": 23325 }, { "epoch": 0.8004804392587509, "grad_norm": 0.7373928925934096, "learning_rate": 1.0084860295905908e-06, "loss": 0.2599, "step": 23326 }, { "epoch": 0.8005147563486616, "grad_norm": 0.7633420786916074, "learning_rate": 1.0081513569242896e-06, "loss": 0.3178, "step": 23327 }, { "epoch": 0.8005490734385724, "grad_norm": 0.7915690803279054, "learning_rate": 1.007816733572623e-06, "loss": 0.2333, "step": 23328 }, { "epoch": 0.8005833905284832, "grad_norm": 0.856498535838696, "learning_rate": 1.0074821595397288e-06, "loss": 0.2531, "step": 23329 }, { "epoch": 0.800617707618394, "grad_norm": 0.671128168608206, "learning_rate": 1.0071476348297366e-06, "loss": 0.1745, "step": 23330 }, { "epoch": 0.8006520247083048, "grad_norm": 0.8250174637177498, "learning_rate": 1.00681315944678e-06, "loss": 0.2994, "step": 23331 }, { "epoch": 0.8006863417982155, "grad_norm": 0.8053356373195398, "learning_rate": 1.0064787333949916e-06, "loss": 0.24, "step": 23332 }, { "epoch": 0.8007206588881263, "grad_norm": 0.9431848387669817, "learning_rate": 1.0061443566785017e-06, "loss": 0.2427, "step": 23333 }, { "epoch": 0.800754975978037, "grad_norm": 0.7598059220049301, "learning_rate": 1.0058100293014423e-06, "loss": 0.2195, "step": 23334 }, { "epoch": 0.8007892930679479, "grad_norm": 0.694790661881006, "learning_rate": 1.005475751267943e-06, "loss": 0.2359, "step": 23335 }, { "epoch": 0.8008236101578586, "grad_norm": 0.6968701548832874, "learning_rate": 1.0051415225821354e-06, "loss": 0.2517, "step": 23336 }, { "epoch": 0.8008579272477694, "grad_norm": 0.73223930819997, "learning_rate": 1.0048073432481438e-06, "loss": 0.2775, "step": 23337 }, { "epoch": 0.8008922443376801, "grad_norm": 0.867305945122546, "learning_rate": 1.0044732132701023e-06, "loss": 0.3422, "step": 23338 }, { "epoch": 0.8009265614275909, "grad_norm": 0.7778127069096564, "learning_rate": 1.0041391326521344e-06, "loss": 0.2623, "step": 23339 }, { "epoch": 0.8009608785175018, "grad_norm": 0.8214148165802845, "learning_rate": 1.0038051013983691e-06, "loss": 0.2511, "step": 23340 }, { "epoch": 0.8009951956074125, "grad_norm": 0.8275192698801008, "learning_rate": 1.0034711195129326e-06, "loss": 0.259, "step": 23341 }, { "epoch": 0.8010295126973233, "grad_norm": 1.0069941894466345, "learning_rate": 1.0031371869999507e-06, "loss": 0.2938, "step": 23342 }, { "epoch": 0.801063829787234, "grad_norm": 0.8752470934545685, "learning_rate": 1.0028033038635488e-06, "loss": 0.3109, "step": 23343 }, { "epoch": 0.8010981468771449, "grad_norm": 0.8502934710823373, "learning_rate": 1.0024694701078524e-06, "loss": 0.3265, "step": 23344 }, { "epoch": 0.8011324639670556, "grad_norm": 0.8042506198890945, "learning_rate": 1.0021356857369856e-06, "loss": 0.2775, "step": 23345 }, { "epoch": 0.8011667810569664, "grad_norm": 0.7574353146959103, "learning_rate": 1.001801950755069e-06, "loss": 0.2613, "step": 23346 }, { "epoch": 0.8012010981468771, "grad_norm": 0.7328007761208476, "learning_rate": 1.001468265166231e-06, "loss": 0.25, "step": 23347 }, { "epoch": 0.8012354152367879, "grad_norm": 0.7954878488746555, "learning_rate": 1.0011346289745888e-06, "loss": 0.2942, "step": 23348 }, { "epoch": 0.8012697323266987, "grad_norm": 0.7047942073015045, "learning_rate": 1.000801042184265e-06, "loss": 0.2396, "step": 23349 }, { "epoch": 0.8013040494166095, "grad_norm": 0.8540500713791921, "learning_rate": 1.0004675047993844e-06, "loss": 0.2374, "step": 23350 }, { "epoch": 0.8013383665065202, "grad_norm": 0.7859946370979052, "learning_rate": 1.000134016824063e-06, "loss": 0.3013, "step": 23351 }, { "epoch": 0.801372683596431, "grad_norm": 0.7976986150166027, "learning_rate": 9.998005782624232e-07, "loss": 0.2125, "step": 23352 }, { "epoch": 0.8014070006863419, "grad_norm": 0.7991837422106366, "learning_rate": 9.994671891185831e-07, "loss": 0.2938, "step": 23353 }, { "epoch": 0.8014413177762526, "grad_norm": 0.8684376249881564, "learning_rate": 9.991338493966619e-07, "loss": 0.3345, "step": 23354 }, { "epoch": 0.8014756348661634, "grad_norm": 0.8019039769238115, "learning_rate": 9.988005591007771e-07, "loss": 0.2313, "step": 23355 }, { "epoch": 0.8015099519560741, "grad_norm": 0.77134075311798, "learning_rate": 9.984673182350462e-07, "loss": 0.2577, "step": 23356 }, { "epoch": 0.8015442690459849, "grad_norm": 0.7894260692882902, "learning_rate": 9.981341268035867e-07, "loss": 0.3037, "step": 23357 }, { "epoch": 0.8015785861358957, "grad_norm": 0.7984496231195135, "learning_rate": 9.978009848105142e-07, "loss": 0.3077, "step": 23358 }, { "epoch": 0.8016129032258065, "grad_norm": 0.9156956915921515, "learning_rate": 9.974678922599457e-07, "loss": 0.2889, "step": 23359 }, { "epoch": 0.8016472203157172, "grad_norm": 0.8244352128145684, "learning_rate": 9.971348491559924e-07, "loss": 0.2742, "step": 23360 }, { "epoch": 0.801681537405628, "grad_norm": 0.8315052692214011, "learning_rate": 9.968018555027735e-07, "loss": 0.2875, "step": 23361 }, { "epoch": 0.8017158544955387, "grad_norm": 0.656333235990038, "learning_rate": 9.964689113043996e-07, "loss": 0.2551, "step": 23362 }, { "epoch": 0.8017501715854496, "grad_norm": 0.8627787923097242, "learning_rate": 9.961360165649842e-07, "loss": 0.2869, "step": 23363 }, { "epoch": 0.8017844886753603, "grad_norm": 0.7252506210715425, "learning_rate": 9.958031712886407e-07, "loss": 0.2732, "step": 23364 }, { "epoch": 0.8018188057652711, "grad_norm": 0.822194618033701, "learning_rate": 9.954703754794798e-07, "loss": 0.2692, "step": 23365 }, { "epoch": 0.8018531228551818, "grad_norm": 0.6977533192633419, "learning_rate": 9.951376291416142e-07, "loss": 0.2289, "step": 23366 }, { "epoch": 0.8018874399450927, "grad_norm": 0.846581005617876, "learning_rate": 9.948049322791542e-07, "loss": 0.2359, "step": 23367 }, { "epoch": 0.8019217570350035, "grad_norm": 0.8246056849209195, "learning_rate": 9.944722848962102e-07, "loss": 0.3185, "step": 23368 }, { "epoch": 0.8019560741249142, "grad_norm": 0.7236345492673597, "learning_rate": 9.941396869968889e-07, "loss": 0.2191, "step": 23369 }, { "epoch": 0.801990391214825, "grad_norm": 0.7919279696984324, "learning_rate": 9.938071385853044e-07, "loss": 0.2544, "step": 23370 }, { "epoch": 0.8020247083047357, "grad_norm": 0.7784952983115186, "learning_rate": 9.934746396655599e-07, "loss": 0.2318, "step": 23371 }, { "epoch": 0.8020590253946466, "grad_norm": 0.8271736291666923, "learning_rate": 9.931421902417642e-07, "loss": 0.313, "step": 23372 }, { "epoch": 0.8020933424845573, "grad_norm": 0.8543450923755539, "learning_rate": 9.928097903180278e-07, "loss": 0.2863, "step": 23373 }, { "epoch": 0.8021276595744681, "grad_norm": 0.7764423280961238, "learning_rate": 9.924774398984532e-07, "loss": 0.2349, "step": 23374 }, { "epoch": 0.8021619766643788, "grad_norm": 0.7250837578781366, "learning_rate": 9.921451389871473e-07, "loss": 0.2193, "step": 23375 }, { "epoch": 0.8021962937542897, "grad_norm": 0.8167326375460707, "learning_rate": 9.918128875882155e-07, "loss": 0.297, "step": 23376 }, { "epoch": 0.8022306108442004, "grad_norm": 0.7740054672690388, "learning_rate": 9.914806857057623e-07, "loss": 0.2598, "step": 23377 }, { "epoch": 0.8022649279341112, "grad_norm": 0.8182323257611249, "learning_rate": 9.911485333438925e-07, "loss": 0.2147, "step": 23378 }, { "epoch": 0.802299245024022, "grad_norm": 0.8375814489477331, "learning_rate": 9.90816430506708e-07, "loss": 0.289, "step": 23379 }, { "epoch": 0.8023335621139327, "grad_norm": 0.7506427641188148, "learning_rate": 9.904843771983136e-07, "loss": 0.2622, "step": 23380 }, { "epoch": 0.8023678792038436, "grad_norm": 0.7828621616081897, "learning_rate": 9.901523734228075e-07, "loss": 0.2655, "step": 23381 }, { "epoch": 0.8024021962937543, "grad_norm": 0.8487223172854588, "learning_rate": 9.898204191842969e-07, "loss": 0.2544, "step": 23382 }, { "epoch": 0.8024365133836651, "grad_norm": 0.7589971700736441, "learning_rate": 9.894885144868776e-07, "loss": 0.2834, "step": 23383 }, { "epoch": 0.8024708304735758, "grad_norm": 0.7905290083951172, "learning_rate": 9.891566593346523e-07, "loss": 0.2461, "step": 23384 }, { "epoch": 0.8025051475634866, "grad_norm": 0.7505197719639859, "learning_rate": 9.888248537317202e-07, "loss": 0.2052, "step": 23385 }, { "epoch": 0.8025394646533974, "grad_norm": 0.7717522235570367, "learning_rate": 9.884930976821804e-07, "loss": 0.284, "step": 23386 }, { "epoch": 0.8025737817433082, "grad_norm": 0.7264234792083344, "learning_rate": 9.881613911901317e-07, "loss": 0.2464, "step": 23387 }, { "epoch": 0.8026080988332189, "grad_norm": 0.7579092998173631, "learning_rate": 9.87829734259671e-07, "loss": 0.2716, "step": 23388 }, { "epoch": 0.8026424159231297, "grad_norm": 0.7669961549327993, "learning_rate": 9.87498126894897e-07, "loss": 0.2727, "step": 23389 }, { "epoch": 0.8026767330130405, "grad_norm": 0.9702435386829411, "learning_rate": 9.87166569099905e-07, "loss": 0.2392, "step": 23390 }, { "epoch": 0.8027110501029513, "grad_norm": 0.8033353474911618, "learning_rate": 9.86835060878793e-07, "loss": 0.2249, "step": 23391 }, { "epoch": 0.802745367192862, "grad_norm": 0.699992563140976, "learning_rate": 9.865036022356521e-07, "loss": 0.2593, "step": 23392 }, { "epoch": 0.8027796842827728, "grad_norm": 0.7534523767303996, "learning_rate": 9.861721931745826e-07, "loss": 0.2287, "step": 23393 }, { "epoch": 0.8028140013726835, "grad_norm": 0.8197151360021143, "learning_rate": 9.85840833699675e-07, "loss": 0.2702, "step": 23394 }, { "epoch": 0.8028483184625944, "grad_norm": 0.7215771190694822, "learning_rate": 9.85509523815022e-07, "loss": 0.2281, "step": 23395 }, { "epoch": 0.8028826355525052, "grad_norm": 0.7155840018900635, "learning_rate": 9.851782635247214e-07, "loss": 0.2538, "step": 23396 }, { "epoch": 0.8029169526424159, "grad_norm": 0.7944222837358272, "learning_rate": 9.848470528328613e-07, "loss": 0.2274, "step": 23397 }, { "epoch": 0.8029512697323267, "grad_norm": 0.7667671224909591, "learning_rate": 9.845158917435344e-07, "loss": 0.2684, "step": 23398 }, { "epoch": 0.8029855868222375, "grad_norm": 1.4037685040037093, "learning_rate": 9.841847802608322e-07, "loss": 0.2789, "step": 23399 }, { "epoch": 0.8030199039121483, "grad_norm": 0.7605988621357369, "learning_rate": 9.838537183888453e-07, "loss": 0.2367, "step": 23400 }, { "epoch": 0.803054221002059, "grad_norm": 0.7527252575537764, "learning_rate": 9.835227061316628e-07, "loss": 0.2246, "step": 23401 }, { "epoch": 0.8030885380919698, "grad_norm": 0.8351134561077623, "learning_rate": 9.831917434933746e-07, "loss": 0.3201, "step": 23402 }, { "epoch": 0.8031228551818805, "grad_norm": 0.6917610711965944, "learning_rate": 9.828608304780706e-07, "loss": 0.2489, "step": 23403 }, { "epoch": 0.8031571722717914, "grad_norm": 0.7347113204969995, "learning_rate": 9.825299670898353e-07, "loss": 0.2456, "step": 23404 }, { "epoch": 0.8031914893617021, "grad_norm": 0.8042881103235243, "learning_rate": 9.821991533327608e-07, "loss": 0.2211, "step": 23405 }, { "epoch": 0.8032258064516129, "grad_norm": 0.7533227453556344, "learning_rate": 9.8186838921093e-07, "loss": 0.2551, "step": 23406 }, { "epoch": 0.8032601235415237, "grad_norm": 0.7730759726133486, "learning_rate": 9.815376747284305e-07, "loss": 0.2652, "step": 23407 }, { "epoch": 0.8032944406314344, "grad_norm": 0.6998652307435234, "learning_rate": 9.812070098893479e-07, "loss": 0.3194, "step": 23408 }, { "epoch": 0.8033287577213453, "grad_norm": 0.869902734072927, "learning_rate": 9.808763946977672e-07, "loss": 0.2324, "step": 23409 }, { "epoch": 0.803363074811256, "grad_norm": 0.8016446672077507, "learning_rate": 9.80545829157773e-07, "loss": 0.2868, "step": 23410 }, { "epoch": 0.8033973919011668, "grad_norm": 0.8011714974853414, "learning_rate": 9.802153132734488e-07, "loss": 0.2652, "step": 23411 }, { "epoch": 0.8034317089910775, "grad_norm": 0.8275239703199696, "learning_rate": 9.798848470488792e-07, "loss": 0.2585, "step": 23412 }, { "epoch": 0.8034660260809884, "grad_norm": 0.776663222966021, "learning_rate": 9.795544304881427e-07, "loss": 0.2293, "step": 23413 }, { "epoch": 0.8035003431708991, "grad_norm": 0.7618869224901292, "learning_rate": 9.792240635953265e-07, "loss": 0.285, "step": 23414 }, { "epoch": 0.8035346602608099, "grad_norm": 0.8873912157206071, "learning_rate": 9.78893746374508e-07, "loss": 0.248, "step": 23415 }, { "epoch": 0.8035689773507206, "grad_norm": 0.6856322882335254, "learning_rate": 9.785634788297671e-07, "loss": 0.3004, "step": 23416 }, { "epoch": 0.8036032944406314, "grad_norm": 0.7057610476263577, "learning_rate": 9.78233260965189e-07, "loss": 0.2543, "step": 23417 }, { "epoch": 0.8036376115305423, "grad_norm": 0.7363100087759302, "learning_rate": 9.779030927848476e-07, "loss": 0.2735, "step": 23418 }, { "epoch": 0.803671928620453, "grad_norm": 0.801658518348141, "learning_rate": 9.775729742928265e-07, "loss": 0.2273, "step": 23419 }, { "epoch": 0.8037062457103638, "grad_norm": 0.7691178284724505, "learning_rate": 9.772429054932004e-07, "loss": 0.242, "step": 23420 }, { "epoch": 0.8037405628002745, "grad_norm": 0.812915636331764, "learning_rate": 9.76912886390048e-07, "loss": 0.279, "step": 23421 }, { "epoch": 0.8037748798901854, "grad_norm": 0.7830495146976217, "learning_rate": 9.765829169874468e-07, "loss": 0.2614, "step": 23422 }, { "epoch": 0.8038091969800961, "grad_norm": 0.7707840735352273, "learning_rate": 9.762529972894724e-07, "loss": 0.3287, "step": 23423 }, { "epoch": 0.8038435140700069, "grad_norm": 0.783726445812103, "learning_rate": 9.759231273002012e-07, "loss": 0.3139, "step": 23424 }, { "epoch": 0.8038778311599176, "grad_norm": 0.9448408700978239, "learning_rate": 9.755933070237078e-07, "loss": 0.2414, "step": 23425 }, { "epoch": 0.8039121482498284, "grad_norm": 0.7941908658962528, "learning_rate": 9.752635364640689e-07, "loss": 0.2764, "step": 23426 }, { "epoch": 0.8039464653397392, "grad_norm": 0.7438749072515344, "learning_rate": 9.74933815625354e-07, "loss": 0.258, "step": 23427 }, { "epoch": 0.80398078242965, "grad_norm": 0.7809295403273434, "learning_rate": 9.74604144511641e-07, "loss": 0.2271, "step": 23428 }, { "epoch": 0.8040150995195607, "grad_norm": 0.763645267674773, "learning_rate": 9.74274523127e-07, "loss": 0.2279, "step": 23429 }, { "epoch": 0.8040494166094715, "grad_norm": 0.7305537612253206, "learning_rate": 9.739449514755033e-07, "loss": 0.2935, "step": 23430 }, { "epoch": 0.8040837336993822, "grad_norm": 0.7968901239183269, "learning_rate": 9.736154295612233e-07, "loss": 0.2415, "step": 23431 }, { "epoch": 0.8041180507892931, "grad_norm": 0.7897042829028956, "learning_rate": 9.732859573882298e-07, "loss": 0.2816, "step": 23432 }, { "epoch": 0.8041523678792039, "grad_norm": 0.7968793755890086, "learning_rate": 9.729565349605942e-07, "loss": 0.2579, "step": 23433 }, { "epoch": 0.8041866849691146, "grad_norm": 0.790108998218912, "learning_rate": 9.726271622823852e-07, "loss": 0.2582, "step": 23434 }, { "epoch": 0.8042210020590254, "grad_norm": 0.8812813055247747, "learning_rate": 9.72297839357673e-07, "loss": 0.303, "step": 23435 }, { "epoch": 0.8042553191489362, "grad_norm": 0.7558470822720662, "learning_rate": 9.719685661905232e-07, "loss": 0.2614, "step": 23436 }, { "epoch": 0.804289636238847, "grad_norm": 0.8090622451866339, "learning_rate": 9.716393427850078e-07, "loss": 0.244, "step": 23437 }, { "epoch": 0.8043239533287577, "grad_norm": 0.7272595086991186, "learning_rate": 9.713101691451904e-07, "loss": 0.208, "step": 23438 }, { "epoch": 0.8043582704186685, "grad_norm": 0.7124638541584969, "learning_rate": 9.709810452751378e-07, "loss": 0.3032, "step": 23439 }, { "epoch": 0.8043925875085792, "grad_norm": 0.7830865873017051, "learning_rate": 9.706519711789196e-07, "loss": 0.2373, "step": 23440 }, { "epoch": 0.8044269045984901, "grad_norm": 0.8933415314496076, "learning_rate": 9.703229468605974e-07, "loss": 0.2696, "step": 23441 }, { "epoch": 0.8044612216884008, "grad_norm": 0.8078734428308765, "learning_rate": 9.699939723242364e-07, "loss": 0.2969, "step": 23442 }, { "epoch": 0.8044955387783116, "grad_norm": 0.775125941542925, "learning_rate": 9.696650475739016e-07, "loss": 0.2559, "step": 23443 }, { "epoch": 0.8045298558682223, "grad_norm": 0.7391141428117335, "learning_rate": 9.693361726136562e-07, "loss": 0.2675, "step": 23444 }, { "epoch": 0.8045641729581332, "grad_norm": 0.916030532902986, "learning_rate": 9.690073474475636e-07, "loss": 0.255, "step": 23445 }, { "epoch": 0.804598490048044, "grad_norm": 0.7392550969942717, "learning_rate": 9.686785720796854e-07, "loss": 0.2083, "step": 23446 }, { "epoch": 0.8046328071379547, "grad_norm": 0.7787683716679122, "learning_rate": 9.683498465140845e-07, "loss": 0.2293, "step": 23447 }, { "epoch": 0.8046671242278655, "grad_norm": 0.7238078487385287, "learning_rate": 9.680211707548175e-07, "loss": 0.2094, "step": 23448 }, { "epoch": 0.8047014413177762, "grad_norm": 0.7645293362218732, "learning_rate": 9.676925448059516e-07, "loss": 0.3293, "step": 23449 }, { "epoch": 0.8047357584076871, "grad_norm": 0.8349132438776941, "learning_rate": 9.673639686715402e-07, "loss": 0.2314, "step": 23450 }, { "epoch": 0.8047700754975978, "grad_norm": 0.7505774849384319, "learning_rate": 9.670354423556482e-07, "loss": 0.259, "step": 23451 }, { "epoch": 0.8048043925875086, "grad_norm": 0.8248315975870024, "learning_rate": 9.667069658623297e-07, "loss": 0.2444, "step": 23452 }, { "epoch": 0.8048387096774193, "grad_norm": 0.8221222037462251, "learning_rate": 9.663785391956442e-07, "loss": 0.2763, "step": 23453 }, { "epoch": 0.8048730267673301, "grad_norm": 0.7331705258483397, "learning_rate": 9.660501623596496e-07, "loss": 0.2722, "step": 23454 }, { "epoch": 0.8049073438572409, "grad_norm": 0.6505447272841867, "learning_rate": 9.65721835358402e-07, "loss": 0.2627, "step": 23455 }, { "epoch": 0.8049416609471517, "grad_norm": 0.7555796622062189, "learning_rate": 9.653935581959573e-07, "loss": 0.231, "step": 23456 }, { "epoch": 0.8049759780370624, "grad_norm": 0.7366429605005833, "learning_rate": 9.650653308763713e-07, "loss": 0.2849, "step": 23457 }, { "epoch": 0.8050102951269732, "grad_norm": 0.7794489172164043, "learning_rate": 9.647371534037004e-07, "loss": 0.28, "step": 23458 }, { "epoch": 0.8050446122168841, "grad_norm": 0.759178157851777, "learning_rate": 9.644090257819944e-07, "loss": 0.2696, "step": 23459 }, { "epoch": 0.8050789293067948, "grad_norm": 0.9819804667632777, "learning_rate": 9.640809480153113e-07, "loss": 0.2498, "step": 23460 }, { "epoch": 0.8051132463967056, "grad_norm": 0.7625131393469872, "learning_rate": 9.637529201077046e-07, "loss": 0.2464, "step": 23461 }, { "epoch": 0.8051475634866163, "grad_norm": 0.8756019462735649, "learning_rate": 9.634249420632213e-07, "loss": 0.3251, "step": 23462 }, { "epoch": 0.8051818805765271, "grad_norm": 0.7740395265998348, "learning_rate": 9.630970138859196e-07, "loss": 0.243, "step": 23463 }, { "epoch": 0.8052161976664379, "grad_norm": 0.8716716651109696, "learning_rate": 9.627691355798462e-07, "loss": 0.3085, "step": 23464 }, { "epoch": 0.8052505147563487, "grad_norm": 0.7550417592032604, "learning_rate": 9.624413071490536e-07, "loss": 0.3018, "step": 23465 }, { "epoch": 0.8052848318462594, "grad_norm": 0.8432591506831986, "learning_rate": 9.621135285975909e-07, "loss": 0.2877, "step": 23466 }, { "epoch": 0.8053191489361702, "grad_norm": 0.9294116988134422, "learning_rate": 9.617857999295078e-07, "loss": 0.2808, "step": 23467 }, { "epoch": 0.8053534660260809, "grad_norm": 0.7794519382869272, "learning_rate": 9.614581211488534e-07, "loss": 0.2895, "step": 23468 }, { "epoch": 0.8053877831159918, "grad_norm": 0.754472143230679, "learning_rate": 9.611304922596747e-07, "loss": 0.219, "step": 23469 }, { "epoch": 0.8054221002059025, "grad_norm": 0.817959255147132, "learning_rate": 9.608029132660224e-07, "loss": 0.2512, "step": 23470 }, { "epoch": 0.8054564172958133, "grad_norm": 0.786587673849253, "learning_rate": 9.604753841719371e-07, "loss": 0.3218, "step": 23471 }, { "epoch": 0.805490734385724, "grad_norm": 0.710999905753506, "learning_rate": 9.601479049814722e-07, "loss": 0.2506, "step": 23472 }, { "epoch": 0.8055250514756349, "grad_norm": 0.8045362864196393, "learning_rate": 9.598204756986685e-07, "loss": 0.3073, "step": 23473 }, { "epoch": 0.8055593685655457, "grad_norm": 0.7745464534344036, "learning_rate": 9.594930963275722e-07, "loss": 0.2782, "step": 23474 }, { "epoch": 0.8055936856554564, "grad_norm": 0.7119635878699553, "learning_rate": 9.591657668722277e-07, "loss": 0.223, "step": 23475 }, { "epoch": 0.8056280027453672, "grad_norm": 0.7598850149169958, "learning_rate": 9.588384873366797e-07, "loss": 0.2371, "step": 23476 }, { "epoch": 0.8056623198352779, "grad_norm": 0.7914869893792348, "learning_rate": 9.5851125772497e-07, "loss": 0.2927, "step": 23477 }, { "epoch": 0.8056966369251888, "grad_norm": 0.8129674553808997, "learning_rate": 9.581840780411417e-07, "loss": 0.2869, "step": 23478 }, { "epoch": 0.8057309540150995, "grad_norm": 0.7998234286717194, "learning_rate": 9.578569482892375e-07, "loss": 0.3189, "step": 23479 }, { "epoch": 0.8057652711050103, "grad_norm": 0.7679450056454503, "learning_rate": 9.575298684732975e-07, "loss": 0.2495, "step": 23480 }, { "epoch": 0.805799588194921, "grad_norm": 0.7718994413722804, "learning_rate": 9.57202838597364e-07, "loss": 0.2641, "step": 23481 }, { "epoch": 0.8058339052848319, "grad_norm": 0.7691149225200573, "learning_rate": 9.568758586654742e-07, "loss": 0.2705, "step": 23482 }, { "epoch": 0.8058682223747426, "grad_norm": 0.7811956311982379, "learning_rate": 9.565489286816704e-07, "loss": 0.2582, "step": 23483 }, { "epoch": 0.8059025394646534, "grad_norm": 0.7676708282197505, "learning_rate": 9.562220486499913e-07, "loss": 0.2865, "step": 23484 }, { "epoch": 0.8059368565545642, "grad_norm": 0.8257921015081734, "learning_rate": 9.558952185744718e-07, "loss": 0.2519, "step": 23485 }, { "epoch": 0.8059711736444749, "grad_norm": 0.8316647837320114, "learning_rate": 9.555684384591546e-07, "loss": 0.2586, "step": 23486 }, { "epoch": 0.8060054907343858, "grad_norm": 0.7596180116608143, "learning_rate": 9.552417083080723e-07, "loss": 0.2471, "step": 23487 }, { "epoch": 0.8060398078242965, "grad_norm": 0.7013900474766229, "learning_rate": 9.549150281252633e-07, "loss": 0.2667, "step": 23488 }, { "epoch": 0.8060741249142073, "grad_norm": 0.9818654939711012, "learning_rate": 9.54588397914763e-07, "loss": 0.3316, "step": 23489 }, { "epoch": 0.806108442004118, "grad_norm": 0.8285326959469577, "learning_rate": 9.542618176806062e-07, "loss": 0.2165, "step": 23490 }, { "epoch": 0.8061427590940288, "grad_norm": 0.733035498557468, "learning_rate": 9.539352874268282e-07, "loss": 0.2801, "step": 23491 }, { "epoch": 0.8061770761839396, "grad_norm": 0.7693524995406696, "learning_rate": 9.536088071574623e-07, "loss": 0.2636, "step": 23492 }, { "epoch": 0.8062113932738504, "grad_norm": 0.8232138318252136, "learning_rate": 9.532823768765436e-07, "loss": 0.2834, "step": 23493 }, { "epoch": 0.8062457103637611, "grad_norm": 0.7538079044459376, "learning_rate": 9.529559965881002e-07, "loss": 0.3099, "step": 23494 }, { "epoch": 0.8062800274536719, "grad_norm": 0.8212078105945912, "learning_rate": 9.526296662961692e-07, "loss": 0.246, "step": 23495 }, { "epoch": 0.8063143445435828, "grad_norm": 0.7895472445937967, "learning_rate": 9.523033860047792e-07, "loss": 0.2702, "step": 23496 }, { "epoch": 0.8063486616334935, "grad_norm": 0.8338221906729792, "learning_rate": 9.519771557179619e-07, "loss": 0.2415, "step": 23497 }, { "epoch": 0.8063829787234043, "grad_norm": 0.7619399735207769, "learning_rate": 9.516509754397469e-07, "loss": 0.2981, "step": 23498 }, { "epoch": 0.806417295813315, "grad_norm": 0.7458230512117748, "learning_rate": 9.513248451741641e-07, "loss": 0.2758, "step": 23499 }, { "epoch": 0.8064516129032258, "grad_norm": 0.8416069013726606, "learning_rate": 9.509987649252428e-07, "loss": 0.2244, "step": 23500 }, { "epoch": 0.8064859299931366, "grad_norm": 0.7754517640872942, "learning_rate": 9.506727346970113e-07, "loss": 0.2459, "step": 23501 }, { "epoch": 0.8065202470830474, "grad_norm": 0.8512927901805233, "learning_rate": 9.50346754493498e-07, "loss": 0.4059, "step": 23502 }, { "epoch": 0.8065545641729581, "grad_norm": 0.7563740707763076, "learning_rate": 9.500208243187265e-07, "loss": 0.2248, "step": 23503 }, { "epoch": 0.8065888812628689, "grad_norm": 0.8004069908869774, "learning_rate": 9.496949441767284e-07, "loss": 0.2911, "step": 23504 }, { "epoch": 0.8066231983527797, "grad_norm": 0.771301130131168, "learning_rate": 9.493691140715261e-07, "loss": 0.257, "step": 23505 }, { "epoch": 0.8066575154426905, "grad_norm": 0.8054454394179069, "learning_rate": 9.490433340071442e-07, "loss": 0.2665, "step": 23506 }, { "epoch": 0.8066918325326012, "grad_norm": 0.790835434728282, "learning_rate": 9.487176039876123e-07, "loss": 0.2526, "step": 23507 }, { "epoch": 0.806726149622512, "grad_norm": 0.7221637967447582, "learning_rate": 9.483919240169487e-07, "loss": 0.2494, "step": 23508 }, { "epoch": 0.8067604667124227, "grad_norm": 0.7447470210992558, "learning_rate": 9.480662940991797e-07, "loss": 0.2589, "step": 23509 }, { "epoch": 0.8067947838023336, "grad_norm": 0.772347599944501, "learning_rate": 9.477407142383277e-07, "loss": 0.2611, "step": 23510 }, { "epoch": 0.8068291008922444, "grad_norm": 0.8057207859246034, "learning_rate": 9.474151844384145e-07, "loss": 0.2769, "step": 23511 }, { "epoch": 0.8068634179821551, "grad_norm": 0.7430640718752733, "learning_rate": 9.470897047034616e-07, "loss": 0.2349, "step": 23512 }, { "epoch": 0.8068977350720659, "grad_norm": 0.8533145351746381, "learning_rate": 9.467642750374906e-07, "loss": 0.2955, "step": 23513 }, { "epoch": 0.8069320521619766, "grad_norm": 0.9176000986236897, "learning_rate": 9.464388954445214e-07, "loss": 0.2715, "step": 23514 }, { "epoch": 0.8069663692518875, "grad_norm": 0.7791849478170227, "learning_rate": 9.461135659285731e-07, "loss": 0.3012, "step": 23515 }, { "epoch": 0.8070006863417982, "grad_norm": 0.7863715494647634, "learning_rate": 9.457882864936674e-07, "loss": 0.2324, "step": 23516 }, { "epoch": 0.807035003431709, "grad_norm": 0.8258709904256124, "learning_rate": 9.454630571438172e-07, "loss": 0.2697, "step": 23517 }, { "epoch": 0.8070693205216197, "grad_norm": 0.8348040717781094, "learning_rate": 9.451378778830472e-07, "loss": 0.2832, "step": 23518 }, { "epoch": 0.8071036376115306, "grad_norm": 0.7511733611544281, "learning_rate": 9.448127487153696e-07, "loss": 0.2262, "step": 23519 }, { "epoch": 0.8071379547014413, "grad_norm": 0.8356404888402323, "learning_rate": 9.444876696448024e-07, "loss": 0.2345, "step": 23520 }, { "epoch": 0.8071722717913521, "grad_norm": 0.7621135189858551, "learning_rate": 9.441626406753618e-07, "loss": 0.2706, "step": 23521 }, { "epoch": 0.8072065888812628, "grad_norm": 0.853353088717603, "learning_rate": 9.438376618110634e-07, "loss": 0.232, "step": 23522 }, { "epoch": 0.8072409059711736, "grad_norm": 0.8775851794957561, "learning_rate": 9.435127330559213e-07, "loss": 0.3181, "step": 23523 }, { "epoch": 0.8072752230610845, "grad_norm": 0.7693506828160862, "learning_rate": 9.4318785441395e-07, "loss": 0.2276, "step": 23524 }, { "epoch": 0.8073095401509952, "grad_norm": 0.7980264147679713, "learning_rate": 9.428630258891647e-07, "loss": 0.307, "step": 23525 }, { "epoch": 0.807343857240906, "grad_norm": 0.8187348879688596, "learning_rate": 9.425382474855737e-07, "loss": 0.2639, "step": 23526 }, { "epoch": 0.8073781743308167, "grad_norm": 0.7180657062908441, "learning_rate": 9.422135192071935e-07, "loss": 0.2634, "step": 23527 }, { "epoch": 0.8074124914207276, "grad_norm": 0.7279500052367535, "learning_rate": 9.418888410580357e-07, "loss": 0.2653, "step": 23528 }, { "epoch": 0.8074468085106383, "grad_norm": 0.7575438105729899, "learning_rate": 9.415642130421071e-07, "loss": 0.2919, "step": 23529 }, { "epoch": 0.8074811256005491, "grad_norm": 0.8392222676497278, "learning_rate": 9.412396351634234e-07, "loss": 0.2645, "step": 23530 }, { "epoch": 0.8075154426904598, "grad_norm": 0.6951383825954988, "learning_rate": 9.409151074259909e-07, "loss": 0.232, "step": 23531 }, { "epoch": 0.8075497597803706, "grad_norm": 0.7274429542554373, "learning_rate": 9.405906298338202e-07, "loss": 0.2338, "step": 23532 }, { "epoch": 0.8075840768702814, "grad_norm": 0.7260722966344948, "learning_rate": 9.402662023909193e-07, "loss": 0.2437, "step": 23533 }, { "epoch": 0.8076183939601922, "grad_norm": 0.8132711527675243, "learning_rate": 9.399418251012959e-07, "loss": 0.2756, "step": 23534 }, { "epoch": 0.807652711050103, "grad_norm": 0.742573853074052, "learning_rate": 9.396174979689576e-07, "loss": 0.2781, "step": 23535 }, { "epoch": 0.8076870281400137, "grad_norm": 0.7849889870996473, "learning_rate": 9.392932209979117e-07, "loss": 0.2167, "step": 23536 }, { "epoch": 0.8077213452299244, "grad_norm": 0.8409467834933515, "learning_rate": 9.389689941921648e-07, "loss": 0.2536, "step": 23537 }, { "epoch": 0.8077556623198353, "grad_norm": 0.865021858653439, "learning_rate": 9.386448175557189e-07, "loss": 0.2365, "step": 23538 }, { "epoch": 0.8077899794097461, "grad_norm": 0.731135490968763, "learning_rate": 9.383206910925835e-07, "loss": 0.2201, "step": 23539 }, { "epoch": 0.8078242964996568, "grad_norm": 0.7933880179357375, "learning_rate": 9.379966148067593e-07, "loss": 0.299, "step": 23540 }, { "epoch": 0.8078586135895676, "grad_norm": 0.8705491089848211, "learning_rate": 9.376725887022509e-07, "loss": 0.2633, "step": 23541 }, { "epoch": 0.8078929306794784, "grad_norm": 0.8644365168099738, "learning_rate": 9.373486127830617e-07, "loss": 0.246, "step": 23542 }, { "epoch": 0.8079272477693892, "grad_norm": 0.7539110092147396, "learning_rate": 9.370246870531935e-07, "loss": 0.2289, "step": 23543 }, { "epoch": 0.8079615648592999, "grad_norm": 0.6986219766327675, "learning_rate": 9.367008115166487e-07, "loss": 0.2397, "step": 23544 }, { "epoch": 0.8079958819492107, "grad_norm": 1.0643172192312396, "learning_rate": 9.363769861774275e-07, "loss": 0.2344, "step": 23545 }, { "epoch": 0.8080301990391214, "grad_norm": 0.8841981161533041, "learning_rate": 9.360532110395315e-07, "loss": 0.2308, "step": 23546 }, { "epoch": 0.8080645161290323, "grad_norm": 0.8614600571259932, "learning_rate": 9.357294861069593e-07, "loss": 0.2814, "step": 23547 }, { "epoch": 0.808098833218943, "grad_norm": 0.7942961724626442, "learning_rate": 9.354058113837128e-07, "loss": 0.2631, "step": 23548 }, { "epoch": 0.8081331503088538, "grad_norm": 0.7509120102453817, "learning_rate": 9.350821868737859e-07, "loss": 0.2831, "step": 23549 }, { "epoch": 0.8081674673987645, "grad_norm": 0.7616716079265038, "learning_rate": 9.347586125811803e-07, "loss": 0.2228, "step": 23550 }, { "epoch": 0.8082017844886754, "grad_norm": 0.731513915750863, "learning_rate": 9.34435088509894e-07, "loss": 0.2727, "step": 23551 }, { "epoch": 0.8082361015785862, "grad_norm": 0.8289468094560837, "learning_rate": 9.341116146639195e-07, "loss": 0.2575, "step": 23552 }, { "epoch": 0.8082704186684969, "grad_norm": 0.6580234917532709, "learning_rate": 9.337881910472585e-07, "loss": 0.2313, "step": 23553 }, { "epoch": 0.8083047357584077, "grad_norm": 0.7464549465669427, "learning_rate": 9.334648176639022e-07, "loss": 0.2761, "step": 23554 }, { "epoch": 0.8083390528483184, "grad_norm": 0.7744531128001575, "learning_rate": 9.331414945178469e-07, "loss": 0.2852, "step": 23555 }, { "epoch": 0.8083733699382293, "grad_norm": 0.7171971559695886, "learning_rate": 9.328182216130871e-07, "loss": 0.248, "step": 23556 }, { "epoch": 0.80840768702814, "grad_norm": 0.8192932038932415, "learning_rate": 9.324949989536159e-07, "loss": 0.2763, "step": 23557 }, { "epoch": 0.8084420041180508, "grad_norm": 0.7193733321492022, "learning_rate": 9.32171826543427e-07, "loss": 0.2223, "step": 23558 }, { "epoch": 0.8084763212079615, "grad_norm": 0.8483137683309309, "learning_rate": 9.318487043865127e-07, "loss": 0.2748, "step": 23559 }, { "epoch": 0.8085106382978723, "grad_norm": 0.7498772375104018, "learning_rate": 9.315256324868654e-07, "loss": 0.2218, "step": 23560 }, { "epoch": 0.8085449553877831, "grad_norm": 0.6972253285521252, "learning_rate": 9.312026108484735e-07, "loss": 0.2538, "step": 23561 }, { "epoch": 0.8085792724776939, "grad_norm": 0.792382792402401, "learning_rate": 9.308796394753322e-07, "loss": 0.2609, "step": 23562 }, { "epoch": 0.8086135895676047, "grad_norm": 0.6793081275512634, "learning_rate": 9.305567183714271e-07, "loss": 0.2202, "step": 23563 }, { "epoch": 0.8086479066575154, "grad_norm": 0.9649297867331277, "learning_rate": 9.302338475407496e-07, "loss": 0.2418, "step": 23564 }, { "epoch": 0.8086822237474263, "grad_norm": 0.6877723478283494, "learning_rate": 9.29911026987288e-07, "loss": 0.244, "step": 23565 }, { "epoch": 0.808716540837337, "grad_norm": 0.7641747651724804, "learning_rate": 9.2958825671503e-07, "loss": 0.2587, "step": 23566 }, { "epoch": 0.8087508579272478, "grad_norm": 0.8306083237775891, "learning_rate": 9.29265536727964e-07, "loss": 0.2533, "step": 23567 }, { "epoch": 0.8087851750171585, "grad_norm": 0.7513974852865173, "learning_rate": 9.28942867030076e-07, "loss": 0.2691, "step": 23568 }, { "epoch": 0.8088194921070693, "grad_norm": 0.6663981823213032, "learning_rate": 9.286202476253536e-07, "loss": 0.272, "step": 23569 }, { "epoch": 0.8088538091969801, "grad_norm": 0.7488050362973486, "learning_rate": 9.282976785177789e-07, "loss": 0.2477, "step": 23570 }, { "epoch": 0.8088881262868909, "grad_norm": 0.7623409833584707, "learning_rate": 9.279751597113406e-07, "loss": 0.2434, "step": 23571 }, { "epoch": 0.8089224433768016, "grad_norm": 0.7737252535129021, "learning_rate": 9.276526912100231e-07, "loss": 0.2378, "step": 23572 }, { "epoch": 0.8089567604667124, "grad_norm": 0.9415440217145833, "learning_rate": 9.273302730178057e-07, "loss": 0.3319, "step": 23573 }, { "epoch": 0.8089910775566233, "grad_norm": 0.7976081515318533, "learning_rate": 9.270079051386771e-07, "loss": 0.2258, "step": 23574 }, { "epoch": 0.809025394646534, "grad_norm": 0.802246664089024, "learning_rate": 9.266855875766151e-07, "loss": 0.3236, "step": 23575 }, { "epoch": 0.8090597117364448, "grad_norm": 0.9433121048667229, "learning_rate": 9.26363320335606e-07, "loss": 0.232, "step": 23576 }, { "epoch": 0.8090940288263555, "grad_norm": 0.7887308231839847, "learning_rate": 9.26041103419627e-07, "loss": 0.2283, "step": 23577 }, { "epoch": 0.8091283459162663, "grad_norm": 0.6919304166748095, "learning_rate": 9.257189368326608e-07, "loss": 0.2575, "step": 23578 }, { "epoch": 0.8091626630061771, "grad_norm": 0.8043979175572168, "learning_rate": 9.253968205786867e-07, "loss": 0.2797, "step": 23579 }, { "epoch": 0.8091969800960879, "grad_norm": 0.7359724149271175, "learning_rate": 9.250747546616845e-07, "loss": 0.2787, "step": 23580 }, { "epoch": 0.8092312971859986, "grad_norm": 0.8375889796352447, "learning_rate": 9.247527390856331e-07, "loss": 0.2642, "step": 23581 }, { "epoch": 0.8092656142759094, "grad_norm": 0.7005608103661494, "learning_rate": 9.244307738545099e-07, "loss": 0.315, "step": 23582 }, { "epoch": 0.8092999313658201, "grad_norm": 0.7535033913132198, "learning_rate": 9.241088589722946e-07, "loss": 0.3077, "step": 23583 }, { "epoch": 0.809334248455731, "grad_norm": 0.8181684083957397, "learning_rate": 9.237869944429595e-07, "loss": 0.2614, "step": 23584 }, { "epoch": 0.8093685655456417, "grad_norm": 0.8875145127870623, "learning_rate": 9.234651802704858e-07, "loss": 0.2687, "step": 23585 }, { "epoch": 0.8094028826355525, "grad_norm": 0.8117584346236612, "learning_rate": 9.231434164588465e-07, "loss": 0.2503, "step": 23586 }, { "epoch": 0.8094371997254632, "grad_norm": 0.7918413624496249, "learning_rate": 9.228217030120168e-07, "loss": 0.3085, "step": 23587 }, { "epoch": 0.8094715168153741, "grad_norm": 0.8459421090167976, "learning_rate": 9.225000399339712e-07, "loss": 0.2965, "step": 23588 }, { "epoch": 0.8095058339052849, "grad_norm": 0.7685239495411037, "learning_rate": 9.221784272286838e-07, "loss": 0.2684, "step": 23589 }, { "epoch": 0.8095401509951956, "grad_norm": 0.755364753891076, "learning_rate": 9.218568649001281e-07, "loss": 0.2503, "step": 23590 }, { "epoch": 0.8095744680851064, "grad_norm": 0.8063376230904095, "learning_rate": 9.215353529522758e-07, "loss": 0.2183, "step": 23591 }, { "epoch": 0.8096087851750171, "grad_norm": 0.7430321017430045, "learning_rate": 9.212138913891005e-07, "loss": 0.2992, "step": 23592 }, { "epoch": 0.809643102264928, "grad_norm": 0.7175954189004282, "learning_rate": 9.208924802145691e-07, "loss": 0.2512, "step": 23593 }, { "epoch": 0.8096774193548387, "grad_norm": 0.737107300675304, "learning_rate": 9.205711194326572e-07, "loss": 0.2426, "step": 23594 }, { "epoch": 0.8097117364447495, "grad_norm": 0.780073609107485, "learning_rate": 9.202498090473344e-07, "loss": 0.2963, "step": 23595 }, { "epoch": 0.8097460535346602, "grad_norm": 0.7846115581666532, "learning_rate": 9.199285490625659e-07, "loss": 0.2727, "step": 23596 }, { "epoch": 0.8097803706245711, "grad_norm": 0.7149754055013507, "learning_rate": 9.196073394823258e-07, "loss": 0.2182, "step": 23597 }, { "epoch": 0.8098146877144818, "grad_norm": 0.7793825157097358, "learning_rate": 9.192861803105785e-07, "loss": 0.2452, "step": 23598 }, { "epoch": 0.8098490048043926, "grad_norm": 0.771843051919864, "learning_rate": 9.189650715512927e-07, "loss": 0.2721, "step": 23599 }, { "epoch": 0.8098833218943033, "grad_norm": 0.9134506051702799, "learning_rate": 9.186440132084356e-07, "loss": 0.2795, "step": 23600 }, { "epoch": 0.8099176389842141, "grad_norm": 0.7929102695003531, "learning_rate": 9.183230052859732e-07, "loss": 0.2986, "step": 23601 }, { "epoch": 0.809951956074125, "grad_norm": 0.691881470594689, "learning_rate": 9.180020477878715e-07, "loss": 0.2308, "step": 23602 }, { "epoch": 0.8099862731640357, "grad_norm": 0.7790117657591814, "learning_rate": 9.176811407180946e-07, "loss": 0.2741, "step": 23603 }, { "epoch": 0.8100205902539465, "grad_norm": 0.9338690223187862, "learning_rate": 9.1736028408061e-07, "loss": 0.2652, "step": 23604 }, { "epoch": 0.8100549073438572, "grad_norm": 0.873197521975356, "learning_rate": 9.170394778793762e-07, "loss": 0.3079, "step": 23605 }, { "epoch": 0.810089224433768, "grad_norm": 0.8089220546471876, "learning_rate": 9.167187221183615e-07, "loss": 0.2557, "step": 23606 }, { "epoch": 0.8101235415236788, "grad_norm": 0.9052204085290415, "learning_rate": 9.163980168015241e-07, "loss": 0.2858, "step": 23607 }, { "epoch": 0.8101578586135896, "grad_norm": 0.7946540064364385, "learning_rate": 9.160773619328312e-07, "loss": 0.2709, "step": 23608 }, { "epoch": 0.8101921757035003, "grad_norm": 0.9103744586900093, "learning_rate": 9.157567575162391e-07, "loss": 0.2658, "step": 23609 }, { "epoch": 0.8102264927934111, "grad_norm": 0.8237163290431816, "learning_rate": 9.154362035557096e-07, "loss": 0.242, "step": 23610 }, { "epoch": 0.8102608098833219, "grad_norm": 0.8119624900321157, "learning_rate": 9.151157000552063e-07, "loss": 0.2698, "step": 23611 }, { "epoch": 0.8102951269732327, "grad_norm": 0.7548672763463328, "learning_rate": 9.147952470186849e-07, "loss": 0.202, "step": 23612 }, { "epoch": 0.8103294440631434, "grad_norm": 0.724404388090116, "learning_rate": 9.144748444501051e-07, "loss": 0.2169, "step": 23613 }, { "epoch": 0.8103637611530542, "grad_norm": 0.7781271833572733, "learning_rate": 9.141544923534251e-07, "loss": 0.2789, "step": 23614 }, { "epoch": 0.810398078242965, "grad_norm": 0.7990881332638367, "learning_rate": 9.138341907326043e-07, "loss": 0.2767, "step": 23615 }, { "epoch": 0.8104323953328758, "grad_norm": 0.927721926151044, "learning_rate": 9.135139395915954e-07, "loss": 0.2843, "step": 23616 }, { "epoch": 0.8104667124227866, "grad_norm": 0.7233782178776551, "learning_rate": 9.131937389343587e-07, "loss": 0.2975, "step": 23617 }, { "epoch": 0.8105010295126973, "grad_norm": 0.8682511669123412, "learning_rate": 9.128735887648499e-07, "loss": 0.2794, "step": 23618 }, { "epoch": 0.8105353466026081, "grad_norm": 0.8076963480611011, "learning_rate": 9.125534890870197e-07, "loss": 0.2572, "step": 23619 }, { "epoch": 0.8105696636925189, "grad_norm": 0.7720246145874429, "learning_rate": 9.122334399048289e-07, "loss": 0.2714, "step": 23620 }, { "epoch": 0.8106039807824297, "grad_norm": 0.8177834893405833, "learning_rate": 9.119134412222264e-07, "loss": 0.2812, "step": 23621 }, { "epoch": 0.8106382978723404, "grad_norm": 0.7937968084672044, "learning_rate": 9.115934930431674e-07, "loss": 0.2771, "step": 23622 }, { "epoch": 0.8106726149622512, "grad_norm": 0.8001856210860046, "learning_rate": 9.112735953716039e-07, "loss": 0.3102, "step": 23623 }, { "epoch": 0.8107069320521619, "grad_norm": 0.647489183070989, "learning_rate": 9.109537482114883e-07, "loss": 0.2325, "step": 23624 }, { "epoch": 0.8107412491420728, "grad_norm": 0.714821563395091, "learning_rate": 9.106339515667717e-07, "loss": 0.2432, "step": 23625 }, { "epoch": 0.8107755662319835, "grad_norm": 0.7378245259478067, "learning_rate": 9.103142054414049e-07, "loss": 0.2611, "step": 23626 }, { "epoch": 0.8108098833218943, "grad_norm": 0.708272363767056, "learning_rate": 9.099945098393392e-07, "loss": 0.257, "step": 23627 }, { "epoch": 0.810844200411805, "grad_norm": 0.8268315128096883, "learning_rate": 9.09674864764521e-07, "loss": 0.2489, "step": 23628 }, { "epoch": 0.8108785175017158, "grad_norm": 0.7672934577575493, "learning_rate": 9.093552702209025e-07, "loss": 0.2393, "step": 23629 }, { "epoch": 0.8109128345916267, "grad_norm": 0.8651975987302901, "learning_rate": 9.090357262124294e-07, "loss": 0.2399, "step": 23630 }, { "epoch": 0.8109471516815374, "grad_norm": 0.831660298157654, "learning_rate": 9.08716232743051e-07, "loss": 0.255, "step": 23631 }, { "epoch": 0.8109814687714482, "grad_norm": 0.8216209807575597, "learning_rate": 9.083967898167128e-07, "loss": 0.2714, "step": 23632 }, { "epoch": 0.8110157858613589, "grad_norm": 0.8242908868494119, "learning_rate": 9.080773974373619e-07, "loss": 0.2424, "step": 23633 }, { "epoch": 0.8110501029512698, "grad_norm": 0.707754496070974, "learning_rate": 9.077580556089444e-07, "loss": 0.25, "step": 23634 }, { "epoch": 0.8110844200411805, "grad_norm": 0.6834808186578403, "learning_rate": 9.074387643354049e-07, "loss": 0.2714, "step": 23635 }, { "epoch": 0.8111187371310913, "grad_norm": 1.0035155703191507, "learning_rate": 9.071195236206881e-07, "loss": 0.2128, "step": 23636 }, { "epoch": 0.811153054221002, "grad_norm": 0.7182514121802291, "learning_rate": 9.068003334687375e-07, "loss": 0.2234, "step": 23637 }, { "epoch": 0.8111873713109128, "grad_norm": 0.8195934475927164, "learning_rate": 9.064811938834972e-07, "loss": 0.2286, "step": 23638 }, { "epoch": 0.8112216884008236, "grad_norm": 0.752177054251259, "learning_rate": 9.061621048689084e-07, "loss": 0.2829, "step": 23639 }, { "epoch": 0.8112560054907344, "grad_norm": 0.7720350166233707, "learning_rate": 9.058430664289142e-07, "loss": 0.2849, "step": 23640 }, { "epoch": 0.8112903225806452, "grad_norm": 0.7669173739663782, "learning_rate": 9.055240785674569e-07, "loss": 0.2832, "step": 23641 }, { "epoch": 0.8113246396705559, "grad_norm": 0.7189491168159335, "learning_rate": 9.052051412884732e-07, "loss": 0.2244, "step": 23642 }, { "epoch": 0.8113589567604668, "grad_norm": 0.8283640528605875, "learning_rate": 9.048862545959087e-07, "loss": 0.2526, "step": 23643 }, { "epoch": 0.8113932738503775, "grad_norm": 0.7854790678151333, "learning_rate": 9.045674184936986e-07, "loss": 0.2295, "step": 23644 }, { "epoch": 0.8114275909402883, "grad_norm": 0.8853748717588159, "learning_rate": 9.042486329857831e-07, "loss": 0.2426, "step": 23645 }, { "epoch": 0.811461908030199, "grad_norm": 0.7988204989160211, "learning_rate": 9.039298980761008e-07, "loss": 0.2344, "step": 23646 }, { "epoch": 0.8114962251201098, "grad_norm": 0.7249115456174094, "learning_rate": 9.036112137685893e-07, "loss": 0.2392, "step": 23647 }, { "epoch": 0.8115305422100206, "grad_norm": 0.7737385383408995, "learning_rate": 9.032925800671849e-07, "loss": 0.2337, "step": 23648 }, { "epoch": 0.8115648592999314, "grad_norm": 0.7833329690457521, "learning_rate": 9.029739969758244e-07, "loss": 0.2766, "step": 23649 }, { "epoch": 0.8115991763898421, "grad_norm": 0.7667040071771237, "learning_rate": 9.026554644984448e-07, "loss": 0.3107, "step": 23650 }, { "epoch": 0.8116334934797529, "grad_norm": 0.7437607228352604, "learning_rate": 9.023369826389778e-07, "loss": 0.2654, "step": 23651 }, { "epoch": 0.8116678105696636, "grad_norm": 0.8435069739708932, "learning_rate": 9.020185514013619e-07, "loss": 0.2561, "step": 23652 }, { "epoch": 0.8117021276595745, "grad_norm": 0.8880095539238839, "learning_rate": 9.017001707895278e-07, "loss": 0.2435, "step": 23653 }, { "epoch": 0.8117364447494853, "grad_norm": 1.047870475629236, "learning_rate": 9.013818408074104e-07, "loss": 0.3017, "step": 23654 }, { "epoch": 0.811770761839396, "grad_norm": 0.6866688225529309, "learning_rate": 9.010635614589414e-07, "loss": 0.2218, "step": 23655 }, { "epoch": 0.8118050789293068, "grad_norm": 0.792972238410847, "learning_rate": 9.00745332748053e-07, "loss": 0.2581, "step": 23656 }, { "epoch": 0.8118393960192176, "grad_norm": 0.7972791007054282, "learning_rate": 9.004271546786774e-07, "loss": 0.3116, "step": 23657 }, { "epoch": 0.8118737131091284, "grad_norm": 0.8405202723863018, "learning_rate": 9.001090272547441e-07, "loss": 0.2149, "step": 23658 }, { "epoch": 0.8119080301990391, "grad_norm": 0.776176431794191, "learning_rate": 8.997909504801855e-07, "loss": 0.2775, "step": 23659 }, { "epoch": 0.8119423472889499, "grad_norm": 0.78926946450537, "learning_rate": 8.994729243589268e-07, "loss": 0.2694, "step": 23660 }, { "epoch": 0.8119766643788606, "grad_norm": 0.7495219511680696, "learning_rate": 8.991549488949003e-07, "loss": 0.2719, "step": 23661 }, { "epoch": 0.8120109814687715, "grad_norm": 0.7480422294429294, "learning_rate": 8.988370240920347e-07, "loss": 0.2506, "step": 23662 }, { "epoch": 0.8120452985586822, "grad_norm": 0.7685460190418126, "learning_rate": 8.985191499542534e-07, "loss": 0.2582, "step": 23663 }, { "epoch": 0.812079615648593, "grad_norm": 0.7804802167086474, "learning_rate": 8.982013264854889e-07, "loss": 0.2826, "step": 23664 }, { "epoch": 0.8121139327385037, "grad_norm": 0.735186591910996, "learning_rate": 8.978835536896635e-07, "loss": 0.2147, "step": 23665 }, { "epoch": 0.8121482498284146, "grad_norm": 0.8036602855088059, "learning_rate": 8.975658315707043e-07, "loss": 0.277, "step": 23666 }, { "epoch": 0.8121825669183254, "grad_norm": 0.8088576451929961, "learning_rate": 8.972481601325362e-07, "loss": 0.3347, "step": 23667 }, { "epoch": 0.8122168840082361, "grad_norm": 0.8654151616261033, "learning_rate": 8.96930539379084e-07, "loss": 0.2347, "step": 23668 }, { "epoch": 0.8122512010981469, "grad_norm": 0.9080971619710442, "learning_rate": 8.966129693142706e-07, "loss": 0.2672, "step": 23669 }, { "epoch": 0.8122855181880576, "grad_norm": 0.7887947790744831, "learning_rate": 8.962954499420201e-07, "loss": 0.266, "step": 23670 }, { "epoch": 0.8123198352779685, "grad_norm": 0.8054808558081045, "learning_rate": 8.95977981266255e-07, "loss": 0.2835, "step": 23671 }, { "epoch": 0.8123541523678792, "grad_norm": 0.7117762169105721, "learning_rate": 8.956605632908971e-07, "loss": 0.2382, "step": 23672 }, { "epoch": 0.81238846945779, "grad_norm": 0.715189586284831, "learning_rate": 8.953431960198694e-07, "loss": 0.228, "step": 23673 }, { "epoch": 0.8124227865477007, "grad_norm": 0.8830233043150166, "learning_rate": 8.950258794570876e-07, "loss": 0.3359, "step": 23674 }, { "epoch": 0.8124571036376115, "grad_norm": 0.7194032021066028, "learning_rate": 8.947086136064787e-07, "loss": 0.3084, "step": 23675 }, { "epoch": 0.8124914207275223, "grad_norm": 0.98197750319848, "learning_rate": 8.943913984719566e-07, "loss": 0.224, "step": 23676 }, { "epoch": 0.8125257378174331, "grad_norm": 0.8947283938561225, "learning_rate": 8.94074234057441e-07, "loss": 0.2604, "step": 23677 }, { "epoch": 0.8125600549073438, "grad_norm": 0.6735705299488468, "learning_rate": 8.937571203668533e-07, "loss": 0.2324, "step": 23678 }, { "epoch": 0.8125943719972546, "grad_norm": 0.6898886928394964, "learning_rate": 8.93440057404108e-07, "loss": 0.2842, "step": 23679 }, { "epoch": 0.8126286890871655, "grad_norm": 0.7672146427326644, "learning_rate": 8.931230451731232e-07, "loss": 0.2346, "step": 23680 }, { "epoch": 0.8126630061770762, "grad_norm": 0.8571318507672272, "learning_rate": 8.928060836778151e-07, "loss": 0.2449, "step": 23681 }, { "epoch": 0.812697323266987, "grad_norm": 0.6803097108084593, "learning_rate": 8.924891729220992e-07, "loss": 0.2696, "step": 23682 }, { "epoch": 0.8127316403568977, "grad_norm": 0.7478251227116091, "learning_rate": 8.921723129098903e-07, "loss": 0.2731, "step": 23683 }, { "epoch": 0.8127659574468085, "grad_norm": 0.731995954020054, "learning_rate": 8.918555036451037e-07, "loss": 0.1933, "step": 23684 }, { "epoch": 0.8128002745367193, "grad_norm": 0.7966892251959989, "learning_rate": 8.915387451316537e-07, "loss": 0.2557, "step": 23685 }, { "epoch": 0.8128345916266301, "grad_norm": 0.7620053656946618, "learning_rate": 8.912220373734498e-07, "loss": 0.2474, "step": 23686 }, { "epoch": 0.8128689087165408, "grad_norm": 0.8929994086024157, "learning_rate": 8.909053803744105e-07, "loss": 0.2469, "step": 23687 }, { "epoch": 0.8129032258064516, "grad_norm": 0.7318713869682352, "learning_rate": 8.905887741384428e-07, "loss": 0.2449, "step": 23688 }, { "epoch": 0.8129375428963624, "grad_norm": 0.7435172828994362, "learning_rate": 8.902722186694596e-07, "loss": 0.2209, "step": 23689 }, { "epoch": 0.8129718599862732, "grad_norm": 0.7654805353669443, "learning_rate": 8.899557139713716e-07, "loss": 0.2808, "step": 23690 }, { "epoch": 0.813006177076184, "grad_norm": 0.7614200785667739, "learning_rate": 8.896392600480891e-07, "loss": 0.2714, "step": 23691 }, { "epoch": 0.8130404941660947, "grad_norm": 0.9010456830338334, "learning_rate": 8.893228569035212e-07, "loss": 0.2821, "step": 23692 }, { "epoch": 0.8130748112560054, "grad_norm": 0.787845622032487, "learning_rate": 8.890065045415768e-07, "loss": 0.3275, "step": 23693 }, { "epoch": 0.8131091283459163, "grad_norm": 0.7806039779429979, "learning_rate": 8.886902029661654e-07, "loss": 0.2617, "step": 23694 }, { "epoch": 0.8131434454358271, "grad_norm": 0.828428193538309, "learning_rate": 8.883739521811907e-07, "loss": 0.3036, "step": 23695 }, { "epoch": 0.8131777625257378, "grad_norm": 0.7838927032220492, "learning_rate": 8.880577521905648e-07, "loss": 0.2334, "step": 23696 }, { "epoch": 0.8132120796156486, "grad_norm": 0.7742307918900667, "learning_rate": 8.8774160299819e-07, "loss": 0.2731, "step": 23697 }, { "epoch": 0.8132463967055593, "grad_norm": 0.7997549365020674, "learning_rate": 8.874255046079728e-07, "loss": 0.3016, "step": 23698 }, { "epoch": 0.8132807137954702, "grad_norm": 0.7327328063621577, "learning_rate": 8.871094570238193e-07, "loss": 0.2301, "step": 23699 }, { "epoch": 0.8133150308853809, "grad_norm": 0.8916086620785172, "learning_rate": 8.86793460249632e-07, "loss": 0.2291, "step": 23700 }, { "epoch": 0.8133493479752917, "grad_norm": 0.7499386094091265, "learning_rate": 8.864775142893178e-07, "loss": 0.2574, "step": 23701 }, { "epoch": 0.8133836650652024, "grad_norm": 0.8067677853655382, "learning_rate": 8.861616191467776e-07, "loss": 0.2381, "step": 23702 }, { "epoch": 0.8134179821551133, "grad_norm": 0.8258259800866257, "learning_rate": 8.858457748259136e-07, "loss": 0.2648, "step": 23703 }, { "epoch": 0.813452299245024, "grad_norm": 0.773915320283754, "learning_rate": 8.85529981330629e-07, "loss": 0.2823, "step": 23704 }, { "epoch": 0.8134866163349348, "grad_norm": 0.744610154658681, "learning_rate": 8.852142386648244e-07, "loss": 0.2678, "step": 23705 }, { "epoch": 0.8135209334248455, "grad_norm": 0.8081130750355281, "learning_rate": 8.848985468324e-07, "loss": 0.2395, "step": 23706 }, { "epoch": 0.8135552505147563, "grad_norm": 0.9718257459156447, "learning_rate": 8.845829058372568e-07, "loss": 0.3017, "step": 23707 }, { "epoch": 0.8135895676046672, "grad_norm": 0.7250018792137857, "learning_rate": 8.842673156832948e-07, "loss": 0.2359, "step": 23708 }, { "epoch": 0.8136238846945779, "grad_norm": 0.7990991877369367, "learning_rate": 8.839517763744093e-07, "loss": 0.263, "step": 23709 }, { "epoch": 0.8136582017844887, "grad_norm": 0.6986214388323532, "learning_rate": 8.836362879145033e-07, "loss": 0.2936, "step": 23710 }, { "epoch": 0.8136925188743994, "grad_norm": 0.8098794595271891, "learning_rate": 8.833208503074703e-07, "loss": 0.2727, "step": 23711 }, { "epoch": 0.8137268359643103, "grad_norm": 0.7537761625270065, "learning_rate": 8.830054635572083e-07, "loss": 0.2882, "step": 23712 }, { "epoch": 0.813761153054221, "grad_norm": 0.7283071101368105, "learning_rate": 8.826901276676142e-07, "loss": 0.2672, "step": 23713 }, { "epoch": 0.8137954701441318, "grad_norm": 0.785741776658344, "learning_rate": 8.823748426425832e-07, "loss": 0.2274, "step": 23714 }, { "epoch": 0.8138297872340425, "grad_norm": 0.8539890201895249, "learning_rate": 8.820596084860106e-07, "loss": 0.3087, "step": 23715 }, { "epoch": 0.8138641043239533, "grad_norm": 0.7436376916959836, "learning_rate": 8.817444252017898e-07, "loss": 0.2635, "step": 23716 }, { "epoch": 0.8138984214138641, "grad_norm": 0.6993343895705266, "learning_rate": 8.814292927938173e-07, "loss": 0.2468, "step": 23717 }, { "epoch": 0.8139327385037749, "grad_norm": 0.7566068570381459, "learning_rate": 8.81114211265981e-07, "loss": 0.2286, "step": 23718 }, { "epoch": 0.8139670555936857, "grad_norm": 0.7272633222497802, "learning_rate": 8.807991806221794e-07, "loss": 0.2084, "step": 23719 }, { "epoch": 0.8140013726835964, "grad_norm": 0.830484966429743, "learning_rate": 8.804842008662995e-07, "loss": 0.3222, "step": 23720 }, { "epoch": 0.8140356897735072, "grad_norm": 0.784954682972913, "learning_rate": 8.801692720022337e-07, "loss": 0.2654, "step": 23721 }, { "epoch": 0.814070006863418, "grad_norm": 0.7506215007950042, "learning_rate": 8.798543940338755e-07, "loss": 0.2829, "step": 23722 }, { "epoch": 0.8141043239533288, "grad_norm": 0.7529694274213428, "learning_rate": 8.795395669651118e-07, "loss": 0.2401, "step": 23723 }, { "epoch": 0.8141386410432395, "grad_norm": 0.811883905138894, "learning_rate": 8.792247907998325e-07, "loss": 0.2499, "step": 23724 }, { "epoch": 0.8141729581331503, "grad_norm": 0.7521947451768237, "learning_rate": 8.789100655419263e-07, "loss": 0.2897, "step": 23725 }, { "epoch": 0.8142072752230611, "grad_norm": 0.8745464728991686, "learning_rate": 8.785953911952821e-07, "loss": 0.2776, "step": 23726 }, { "epoch": 0.8142415923129719, "grad_norm": 0.8225855230117353, "learning_rate": 8.782807677637862e-07, "loss": 0.2838, "step": 23727 }, { "epoch": 0.8142759094028826, "grad_norm": 0.8309048890565652, "learning_rate": 8.779661952513263e-07, "loss": 0.2975, "step": 23728 }, { "epoch": 0.8143102264927934, "grad_norm": 0.7908610493634116, "learning_rate": 8.776516736617891e-07, "loss": 0.2656, "step": 23729 }, { "epoch": 0.8143445435827041, "grad_norm": 0.7670381475024521, "learning_rate": 8.773372029990574e-07, "loss": 0.2925, "step": 23730 }, { "epoch": 0.814378860672615, "grad_norm": 0.8177585360182763, "learning_rate": 8.770227832670197e-07, "loss": 0.3107, "step": 23731 }, { "epoch": 0.8144131777625258, "grad_norm": 0.8062120499374874, "learning_rate": 8.76708414469557e-07, "loss": 0.2458, "step": 23732 }, { "epoch": 0.8144474948524365, "grad_norm": 0.7274518175123641, "learning_rate": 8.763940966105561e-07, "loss": 0.1853, "step": 23733 }, { "epoch": 0.8144818119423473, "grad_norm": 0.8163239983168059, "learning_rate": 8.760798296938977e-07, "loss": 0.2533, "step": 23734 }, { "epoch": 0.8145161290322581, "grad_norm": 0.7782499662472854, "learning_rate": 8.757656137234655e-07, "loss": 0.281, "step": 23735 }, { "epoch": 0.8145504461221689, "grad_norm": 0.8280210094336687, "learning_rate": 8.754514487031401e-07, "loss": 0.246, "step": 23736 }, { "epoch": 0.8145847632120796, "grad_norm": 0.8319701697383196, "learning_rate": 8.751373346368031e-07, "loss": 0.2746, "step": 23737 }, { "epoch": 0.8146190803019904, "grad_norm": 0.7716243532209234, "learning_rate": 8.748232715283355e-07, "loss": 0.265, "step": 23738 }, { "epoch": 0.8146533973919011, "grad_norm": 0.8434055098408457, "learning_rate": 8.745092593816173e-07, "loss": 0.3003, "step": 23739 }, { "epoch": 0.814687714481812, "grad_norm": 0.7404728411712456, "learning_rate": 8.741952982005281e-07, "loss": 0.2385, "step": 23740 }, { "epoch": 0.8147220315717227, "grad_norm": 0.7541371891132207, "learning_rate": 8.738813879889435e-07, "loss": 0.2304, "step": 23741 }, { "epoch": 0.8147563486616335, "grad_norm": 0.746473450380195, "learning_rate": 8.735675287507461e-07, "loss": 0.2861, "step": 23742 }, { "epoch": 0.8147906657515442, "grad_norm": 0.7439007699651904, "learning_rate": 8.732537204898095e-07, "loss": 0.2658, "step": 23743 }, { "epoch": 0.814824982841455, "grad_norm": 0.8191726317732542, "learning_rate": 8.729399632100111e-07, "loss": 0.2503, "step": 23744 }, { "epoch": 0.8148592999313659, "grad_norm": 0.7488569685540047, "learning_rate": 8.7262625691523e-07, "loss": 0.2814, "step": 23745 }, { "epoch": 0.8148936170212766, "grad_norm": 0.8247820924509474, "learning_rate": 8.723126016093381e-07, "loss": 0.2436, "step": 23746 }, { "epoch": 0.8149279341111874, "grad_norm": 0.8524167195135095, "learning_rate": 8.719989972962117e-07, "loss": 0.2531, "step": 23747 }, { "epoch": 0.8149622512010981, "grad_norm": 0.849072239852536, "learning_rate": 8.716854439797251e-07, "loss": 0.269, "step": 23748 }, { "epoch": 0.814996568291009, "grad_norm": 0.826532119722744, "learning_rate": 8.713719416637512e-07, "loss": 0.2702, "step": 23749 }, { "epoch": 0.8150308853809197, "grad_norm": 0.7861525028251294, "learning_rate": 8.710584903521641e-07, "loss": 0.3032, "step": 23750 }, { "epoch": 0.8150652024708305, "grad_norm": 0.7890840816797466, "learning_rate": 8.707450900488351e-07, "loss": 0.2366, "step": 23751 }, { "epoch": 0.8150995195607412, "grad_norm": 0.8350091231431948, "learning_rate": 8.704317407576374e-07, "loss": 0.2355, "step": 23752 }, { "epoch": 0.815133836650652, "grad_norm": 0.7567999617381101, "learning_rate": 8.701184424824382e-07, "loss": 0.2414, "step": 23753 }, { "epoch": 0.8151681537405628, "grad_norm": 0.7530304134166272, "learning_rate": 8.698051952271136e-07, "loss": 0.2392, "step": 23754 }, { "epoch": 0.8152024708304736, "grad_norm": 0.8194775061472082, "learning_rate": 8.694919989955291e-07, "loss": 0.3025, "step": 23755 }, { "epoch": 0.8152367879203843, "grad_norm": 0.9003961711855054, "learning_rate": 8.691788537915557e-07, "loss": 0.2371, "step": 23756 }, { "epoch": 0.8152711050102951, "grad_norm": 0.8745880290481391, "learning_rate": 8.688657596190609e-07, "loss": 0.2953, "step": 23757 }, { "epoch": 0.815305422100206, "grad_norm": 0.7557523520124609, "learning_rate": 8.685527164819135e-07, "loss": 0.2566, "step": 23758 }, { "epoch": 0.8153397391901167, "grad_norm": 0.7777862983712273, "learning_rate": 8.682397243839802e-07, "loss": 0.2469, "step": 23759 }, { "epoch": 0.8153740562800275, "grad_norm": 0.6836980068302283, "learning_rate": 8.679267833291283e-07, "loss": 0.229, "step": 23760 }, { "epoch": 0.8154083733699382, "grad_norm": 0.7004764502913956, "learning_rate": 8.67613893321223e-07, "loss": 0.3052, "step": 23761 }, { "epoch": 0.815442690459849, "grad_norm": 0.7421433306562714, "learning_rate": 8.673010543641308e-07, "loss": 0.2343, "step": 23762 }, { "epoch": 0.8154770075497598, "grad_norm": 0.7588283204702467, "learning_rate": 8.669882664617168e-07, "loss": 0.2749, "step": 23763 }, { "epoch": 0.8155113246396706, "grad_norm": 0.7294640743868546, "learning_rate": 8.666755296178414e-07, "loss": 0.2367, "step": 23764 }, { "epoch": 0.8155456417295813, "grad_norm": 0.7767736251187854, "learning_rate": 8.663628438363735e-07, "loss": 0.2689, "step": 23765 }, { "epoch": 0.8155799588194921, "grad_norm": 0.7802197781534093, "learning_rate": 8.66050209121172e-07, "loss": 0.2833, "step": 23766 }, { "epoch": 0.8156142759094028, "grad_norm": 0.7609128002617765, "learning_rate": 8.65737625476099e-07, "loss": 0.2912, "step": 23767 }, { "epoch": 0.8156485929993137, "grad_norm": 0.845094685707909, "learning_rate": 8.654250929050207e-07, "loss": 0.2292, "step": 23768 }, { "epoch": 0.8156829100892244, "grad_norm": 0.8921655905762005, "learning_rate": 8.65112611411793e-07, "loss": 0.3002, "step": 23769 }, { "epoch": 0.8157172271791352, "grad_norm": 0.7405105695758665, "learning_rate": 8.648001810002788e-07, "loss": 0.2628, "step": 23770 }, { "epoch": 0.815751544269046, "grad_norm": 0.7111423095902841, "learning_rate": 8.644878016743369e-07, "loss": 0.2626, "step": 23771 }, { "epoch": 0.8157858613589568, "grad_norm": 0.8497531353407719, "learning_rate": 8.641754734378266e-07, "loss": 0.2499, "step": 23772 }, { "epoch": 0.8158201784488676, "grad_norm": 0.7880675839646155, "learning_rate": 8.638631962946064e-07, "loss": 0.2544, "step": 23773 }, { "epoch": 0.8158544955387783, "grad_norm": 0.680079794515868, "learning_rate": 8.63550970248534e-07, "loss": 0.2176, "step": 23774 }, { "epoch": 0.8158888126286891, "grad_norm": 0.7150139040437119, "learning_rate": 8.632387953034683e-07, "loss": 0.2972, "step": 23775 }, { "epoch": 0.8159231297185998, "grad_norm": 0.7864419467757846, "learning_rate": 8.629266714632617e-07, "loss": 0.2518, "step": 23776 }, { "epoch": 0.8159574468085107, "grad_norm": 0.7720074966819721, "learning_rate": 8.626145987317752e-07, "loss": 0.2564, "step": 23777 }, { "epoch": 0.8159917638984214, "grad_norm": 0.8272526973867345, "learning_rate": 8.623025771128602e-07, "loss": 0.2744, "step": 23778 }, { "epoch": 0.8160260809883322, "grad_norm": 0.7124143718900462, "learning_rate": 8.619906066103728e-07, "loss": 0.2787, "step": 23779 }, { "epoch": 0.8160603980782429, "grad_norm": 0.7920955054421793, "learning_rate": 8.616786872281674e-07, "loss": 0.2412, "step": 23780 }, { "epoch": 0.8160947151681538, "grad_norm": 0.768265160771229, "learning_rate": 8.613668189700963e-07, "loss": 0.2743, "step": 23781 }, { "epoch": 0.8161290322580645, "grad_norm": 0.8026259266058775, "learning_rate": 8.610550018400138e-07, "loss": 0.267, "step": 23782 }, { "epoch": 0.8161633493479753, "grad_norm": 0.8533552315226467, "learning_rate": 8.607432358417705e-07, "loss": 0.1976, "step": 23783 }, { "epoch": 0.816197666437886, "grad_norm": 0.8961618121110164, "learning_rate": 8.604315209792202e-07, "loss": 0.328, "step": 23784 }, { "epoch": 0.8162319835277968, "grad_norm": 0.6986238677075582, "learning_rate": 8.601198572562097e-07, "loss": 0.2182, "step": 23785 }, { "epoch": 0.8162663006177077, "grad_norm": 0.7984337154437156, "learning_rate": 8.598082446765943e-07, "loss": 0.2909, "step": 23786 }, { "epoch": 0.8163006177076184, "grad_norm": 0.817936102097781, "learning_rate": 8.594966832442197e-07, "loss": 0.2709, "step": 23787 }, { "epoch": 0.8163349347975292, "grad_norm": 0.7997171530239089, "learning_rate": 8.591851729629347e-07, "loss": 0.2498, "step": 23788 }, { "epoch": 0.8163692518874399, "grad_norm": 0.7709248217877804, "learning_rate": 8.588737138365921e-07, "loss": 0.255, "step": 23789 }, { "epoch": 0.8164035689773507, "grad_norm": 0.8235498739854015, "learning_rate": 8.585623058690351e-07, "loss": 0.2584, "step": 23790 }, { "epoch": 0.8164378860672615, "grad_norm": 0.8230944252466058, "learning_rate": 8.582509490641127e-07, "loss": 0.2556, "step": 23791 }, { "epoch": 0.8164722031571723, "grad_norm": 0.7759443581180746, "learning_rate": 8.579396434256709e-07, "loss": 0.2631, "step": 23792 }, { "epoch": 0.816506520247083, "grad_norm": 0.7578226759587638, "learning_rate": 8.576283889575554e-07, "loss": 0.2614, "step": 23793 }, { "epoch": 0.8165408373369938, "grad_norm": 0.7687096942102087, "learning_rate": 8.57317185663612e-07, "loss": 0.2482, "step": 23794 }, { "epoch": 0.8165751544269046, "grad_norm": 0.7871997996132732, "learning_rate": 8.570060335476843e-07, "loss": 0.2219, "step": 23795 }, { "epoch": 0.8166094715168154, "grad_norm": 0.7447361867616138, "learning_rate": 8.566949326136176e-07, "loss": 0.2144, "step": 23796 }, { "epoch": 0.8166437886067262, "grad_norm": 0.8688717728040232, "learning_rate": 8.563838828652537e-07, "loss": 0.2681, "step": 23797 }, { "epoch": 0.8166781056966369, "grad_norm": 0.926531682974059, "learning_rate": 8.560728843064375e-07, "loss": 0.2404, "step": 23798 }, { "epoch": 0.8167124227865477, "grad_norm": 0.7616493707664352, "learning_rate": 8.557619369410069e-07, "loss": 0.3093, "step": 23799 }, { "epoch": 0.8167467398764585, "grad_norm": 0.8517288231090774, "learning_rate": 8.554510407728084e-07, "loss": 0.2321, "step": 23800 }, { "epoch": 0.8167810569663693, "grad_norm": 0.7674918226841674, "learning_rate": 8.551401958056788e-07, "loss": 0.2535, "step": 23801 }, { "epoch": 0.81681537405628, "grad_norm": 0.7202496325090334, "learning_rate": 8.548294020434594e-07, "loss": 0.2376, "step": 23802 }, { "epoch": 0.8168496911461908, "grad_norm": 0.8658930260462586, "learning_rate": 8.545186594899907e-07, "loss": 0.243, "step": 23803 }, { "epoch": 0.8168840082361016, "grad_norm": 0.8235968612553146, "learning_rate": 8.5420796814911e-07, "loss": 0.2453, "step": 23804 }, { "epoch": 0.8169183253260124, "grad_norm": 0.7959932101358677, "learning_rate": 8.538973280246571e-07, "loss": 0.2875, "step": 23805 }, { "epoch": 0.8169526424159231, "grad_norm": 0.7397854415950793, "learning_rate": 8.535867391204683e-07, "loss": 0.2206, "step": 23806 }, { "epoch": 0.8169869595058339, "grad_norm": 0.7309898719722069, "learning_rate": 8.532762014403823e-07, "loss": 0.3043, "step": 23807 }, { "epoch": 0.8170212765957446, "grad_norm": 0.7232751872841782, "learning_rate": 8.52965714988232e-07, "loss": 0.2386, "step": 23808 }, { "epoch": 0.8170555936856555, "grad_norm": 0.705377871636674, "learning_rate": 8.526552797678578e-07, "loss": 0.2527, "step": 23809 }, { "epoch": 0.8170899107755663, "grad_norm": 0.7334111764319817, "learning_rate": 8.52344895783091e-07, "loss": 0.2311, "step": 23810 }, { "epoch": 0.817124227865477, "grad_norm": 0.8139272326046533, "learning_rate": 8.520345630377658e-07, "loss": 0.2834, "step": 23811 }, { "epoch": 0.8171585449553878, "grad_norm": 0.7565946919200701, "learning_rate": 8.517242815357202e-07, "loss": 0.2541, "step": 23812 }, { "epoch": 0.8171928620452985, "grad_norm": 0.7342231041874168, "learning_rate": 8.514140512807833e-07, "loss": 0.2791, "step": 23813 }, { "epoch": 0.8172271791352094, "grad_norm": 0.7405477195505048, "learning_rate": 8.511038722767895e-07, "loss": 0.2519, "step": 23814 }, { "epoch": 0.8172614962251201, "grad_norm": 0.7388117573991978, "learning_rate": 8.5079374452757e-07, "loss": 0.2429, "step": 23815 }, { "epoch": 0.8172958133150309, "grad_norm": 0.7395027790813924, "learning_rate": 8.50483668036956e-07, "loss": 0.221, "step": 23816 }, { "epoch": 0.8173301304049416, "grad_norm": 0.8201695577904947, "learning_rate": 8.501736428087786e-07, "loss": 0.2696, "step": 23817 }, { "epoch": 0.8173644474948525, "grad_norm": 0.7345956296571662, "learning_rate": 8.498636688468676e-07, "loss": 0.2106, "step": 23818 }, { "epoch": 0.8173987645847632, "grad_norm": 0.785549802824828, "learning_rate": 8.495537461550535e-07, "loss": 0.2204, "step": 23819 }, { "epoch": 0.817433081674674, "grad_norm": 0.7299684487498023, "learning_rate": 8.492438747371617e-07, "loss": 0.247, "step": 23820 }, { "epoch": 0.8174673987645847, "grad_norm": 0.8334715651036214, "learning_rate": 8.489340545970248e-07, "loss": 0.2151, "step": 23821 }, { "epoch": 0.8175017158544955, "grad_norm": 0.8573833115986196, "learning_rate": 8.486242857384668e-07, "loss": 0.3021, "step": 23822 }, { "epoch": 0.8175360329444064, "grad_norm": 0.8625346575935867, "learning_rate": 8.483145681653154e-07, "loss": 0.2759, "step": 23823 }, { "epoch": 0.8175703500343171, "grad_norm": 0.7184798818958287, "learning_rate": 8.48004901881398e-07, "loss": 0.2435, "step": 23824 }, { "epoch": 0.8176046671242279, "grad_norm": 0.8124603098185551, "learning_rate": 8.476952868905386e-07, "loss": 0.3096, "step": 23825 }, { "epoch": 0.8176389842141386, "grad_norm": 0.9538634052331041, "learning_rate": 8.473857231965627e-07, "loss": 0.2448, "step": 23826 }, { "epoch": 0.8176733013040495, "grad_norm": 0.8479375294290404, "learning_rate": 8.470762108032948e-07, "loss": 0.2437, "step": 23827 }, { "epoch": 0.8177076183939602, "grad_norm": 0.8781200088410043, "learning_rate": 8.467667497145588e-07, "loss": 0.2356, "step": 23828 }, { "epoch": 0.817741935483871, "grad_norm": 0.8579204641546903, "learning_rate": 8.464573399341769e-07, "loss": 0.2655, "step": 23829 }, { "epoch": 0.8177762525737817, "grad_norm": 0.7448896702541355, "learning_rate": 8.46147981465974e-07, "loss": 0.226, "step": 23830 }, { "epoch": 0.8178105696636925, "grad_norm": 0.824745045083065, "learning_rate": 8.45838674313767e-07, "loss": 0.2962, "step": 23831 }, { "epoch": 0.8178448867536033, "grad_norm": 0.8658274368158713, "learning_rate": 8.455294184813806e-07, "loss": 0.276, "step": 23832 }, { "epoch": 0.8178792038435141, "grad_norm": 0.7918668440427014, "learning_rate": 8.452202139726367e-07, "loss": 0.239, "step": 23833 }, { "epoch": 0.8179135209334248, "grad_norm": 0.7270447711118823, "learning_rate": 8.449110607913502e-07, "loss": 0.2178, "step": 23834 }, { "epoch": 0.8179478380233356, "grad_norm": 0.7919083061046795, "learning_rate": 8.446019589413457e-07, "loss": 0.2773, "step": 23835 }, { "epoch": 0.8179821551132463, "grad_norm": 0.7968178355611341, "learning_rate": 8.442929084264379e-07, "loss": 0.2959, "step": 23836 }, { "epoch": 0.8180164722031572, "grad_norm": 0.7271519100199667, "learning_rate": 8.439839092504459e-07, "loss": 0.2373, "step": 23837 }, { "epoch": 0.818050789293068, "grad_norm": 0.6908677640594079, "learning_rate": 8.43674961417188e-07, "loss": 0.2344, "step": 23838 }, { "epoch": 0.8180851063829787, "grad_norm": 0.7696117516976096, "learning_rate": 8.433660649304792e-07, "loss": 0.2766, "step": 23839 }, { "epoch": 0.8181194234728895, "grad_norm": 0.7819283344053524, "learning_rate": 8.430572197941373e-07, "loss": 0.2519, "step": 23840 }, { "epoch": 0.8181537405628003, "grad_norm": 0.8551023735161263, "learning_rate": 8.427484260119767e-07, "loss": 0.2334, "step": 23841 }, { "epoch": 0.8181880576527111, "grad_norm": 0.7704736540917531, "learning_rate": 8.424396835878135e-07, "loss": 0.2593, "step": 23842 }, { "epoch": 0.8182223747426218, "grad_norm": 0.8658843348453217, "learning_rate": 8.421309925254584e-07, "loss": 0.251, "step": 23843 }, { "epoch": 0.8182566918325326, "grad_norm": 0.7885735576424902, "learning_rate": 8.418223528287295e-07, "loss": 0.2458, "step": 23844 }, { "epoch": 0.8182910089224433, "grad_norm": 0.8351335373576121, "learning_rate": 8.415137645014365e-07, "loss": 0.2335, "step": 23845 }, { "epoch": 0.8183253260123542, "grad_norm": 0.7373497228029683, "learning_rate": 8.412052275473925e-07, "loss": 0.2504, "step": 23846 }, { "epoch": 0.8183596431022649, "grad_norm": 0.800942900051973, "learning_rate": 8.408967419704094e-07, "loss": 0.2389, "step": 23847 }, { "epoch": 0.8183939601921757, "grad_norm": 0.8008445417178661, "learning_rate": 8.405883077742982e-07, "loss": 0.2225, "step": 23848 }, { "epoch": 0.8184282772820864, "grad_norm": 0.8856204148874498, "learning_rate": 8.402799249628685e-07, "loss": 0.2203, "step": 23849 }, { "epoch": 0.8184625943719973, "grad_norm": 0.7932669732848131, "learning_rate": 8.399715935399311e-07, "loss": 0.2469, "step": 23850 }, { "epoch": 0.8184969114619081, "grad_norm": 0.8284721321651944, "learning_rate": 8.396633135092952e-07, "loss": 0.2696, "step": 23851 }, { "epoch": 0.8185312285518188, "grad_norm": 0.7693227444853166, "learning_rate": 8.393550848747667e-07, "loss": 0.2647, "step": 23852 }, { "epoch": 0.8185655456417296, "grad_norm": 0.859456954748236, "learning_rate": 8.390469076401575e-07, "loss": 0.3065, "step": 23853 }, { "epoch": 0.8185998627316403, "grad_norm": 0.7886829473107576, "learning_rate": 8.387387818092712e-07, "loss": 0.3118, "step": 23854 }, { "epoch": 0.8186341798215512, "grad_norm": 0.8354419598148989, "learning_rate": 8.384307073859143e-07, "loss": 0.279, "step": 23855 }, { "epoch": 0.8186684969114619, "grad_norm": 0.7342800854555056, "learning_rate": 8.381226843738971e-07, "loss": 0.2062, "step": 23856 }, { "epoch": 0.8187028140013727, "grad_norm": 0.8060221276097222, "learning_rate": 8.378147127770192e-07, "loss": 0.2814, "step": 23857 }, { "epoch": 0.8187371310912834, "grad_norm": 0.7342752061853622, "learning_rate": 8.375067925990899e-07, "loss": 0.2652, "step": 23858 }, { "epoch": 0.8187714481811942, "grad_norm": 0.8716570074612404, "learning_rate": 8.371989238439105e-07, "loss": 0.3028, "step": 23859 }, { "epoch": 0.818805765271105, "grad_norm": 0.7246262028220773, "learning_rate": 8.368911065152846e-07, "loss": 0.2813, "step": 23860 }, { "epoch": 0.8188400823610158, "grad_norm": 0.8034468861868704, "learning_rate": 8.365833406170159e-07, "loss": 0.266, "step": 23861 }, { "epoch": 0.8188743994509265, "grad_norm": 0.703238829781738, "learning_rate": 8.362756261529054e-07, "loss": 0.2327, "step": 23862 }, { "epoch": 0.8189087165408373, "grad_norm": 0.82020421291257, "learning_rate": 8.359679631267554e-07, "loss": 0.2311, "step": 23863 }, { "epoch": 0.8189430336307482, "grad_norm": 0.7246272905613379, "learning_rate": 8.356603515423667e-07, "loss": 0.2714, "step": 23864 }, { "epoch": 0.8189773507206589, "grad_norm": 0.8155217465147944, "learning_rate": 8.353527914035398e-07, "loss": 0.3253, "step": 23865 }, { "epoch": 0.8190116678105697, "grad_norm": 0.7646472042531046, "learning_rate": 8.350452827140715e-07, "loss": 0.2719, "step": 23866 }, { "epoch": 0.8190459849004804, "grad_norm": 0.6682854710925876, "learning_rate": 8.347378254777655e-07, "loss": 0.2067, "step": 23867 }, { "epoch": 0.8190803019903912, "grad_norm": 0.6940367821929584, "learning_rate": 8.34430419698416e-07, "loss": 0.274, "step": 23868 }, { "epoch": 0.819114619080302, "grad_norm": 0.7885711465261114, "learning_rate": 8.341230653798221e-07, "loss": 0.2172, "step": 23869 }, { "epoch": 0.8191489361702128, "grad_norm": 0.7789655513405096, "learning_rate": 8.338157625257808e-07, "loss": 0.3089, "step": 23870 }, { "epoch": 0.8191832532601235, "grad_norm": 0.7357708480464139, "learning_rate": 8.335085111400887e-07, "loss": 0.2888, "step": 23871 }, { "epoch": 0.8192175703500343, "grad_norm": 0.791998894181122, "learning_rate": 8.332013112265413e-07, "loss": 0.311, "step": 23872 }, { "epoch": 0.8192518874399451, "grad_norm": 0.7175034634687733, "learning_rate": 8.32894162788933e-07, "loss": 0.2416, "step": 23873 }, { "epoch": 0.8192862045298559, "grad_norm": 0.7360334624101834, "learning_rate": 8.325870658310608e-07, "loss": 0.2374, "step": 23874 }, { "epoch": 0.8193205216197667, "grad_norm": 0.7417462078850956, "learning_rate": 8.322800203567144e-07, "loss": 0.2369, "step": 23875 }, { "epoch": 0.8193548387096774, "grad_norm": 0.8437436893078959, "learning_rate": 8.319730263696901e-07, "loss": 0.2557, "step": 23876 }, { "epoch": 0.8193891557995882, "grad_norm": 0.7539214233084216, "learning_rate": 8.316660838737806e-07, "loss": 0.2538, "step": 23877 }, { "epoch": 0.819423472889499, "grad_norm": 0.7648782822936557, "learning_rate": 8.313591928727749e-07, "loss": 0.2705, "step": 23878 }, { "epoch": 0.8194577899794098, "grad_norm": 0.8434447427943956, "learning_rate": 8.310523533704684e-07, "loss": 0.3142, "step": 23879 }, { "epoch": 0.8194921070693205, "grad_norm": 0.887371608544535, "learning_rate": 8.307455653706481e-07, "loss": 0.26, "step": 23880 }, { "epoch": 0.8195264241592313, "grad_norm": 0.7564138582156112, "learning_rate": 8.30438828877106e-07, "loss": 0.2558, "step": 23881 }, { "epoch": 0.819560741249142, "grad_norm": 0.7707353441479768, "learning_rate": 8.301321438936305e-07, "loss": 0.2637, "step": 23882 }, { "epoch": 0.8195950583390529, "grad_norm": 0.86882908118656, "learning_rate": 8.298255104240105e-07, "loss": 0.3204, "step": 23883 }, { "epoch": 0.8196293754289636, "grad_norm": 0.8061224481481567, "learning_rate": 8.295189284720351e-07, "loss": 0.2598, "step": 23884 }, { "epoch": 0.8196636925188744, "grad_norm": 0.9472428637645158, "learning_rate": 8.292123980414907e-07, "loss": 0.2349, "step": 23885 }, { "epoch": 0.8196980096087851, "grad_norm": 0.7689227987728041, "learning_rate": 8.289059191361654e-07, "loss": 0.2592, "step": 23886 }, { "epoch": 0.819732326698696, "grad_norm": 0.7688320422391136, "learning_rate": 8.285994917598422e-07, "loss": 0.2569, "step": 23887 }, { "epoch": 0.8197666437886068, "grad_norm": 0.7897398532482778, "learning_rate": 8.282931159163115e-07, "loss": 0.2354, "step": 23888 }, { "epoch": 0.8198009608785175, "grad_norm": 0.7046918989816192, "learning_rate": 8.279867916093532e-07, "loss": 0.2556, "step": 23889 }, { "epoch": 0.8198352779684283, "grad_norm": 0.7216872121811645, "learning_rate": 8.276805188427562e-07, "loss": 0.1773, "step": 23890 }, { "epoch": 0.819869595058339, "grad_norm": 0.7693317758102162, "learning_rate": 8.273742976203008e-07, "loss": 0.3018, "step": 23891 }, { "epoch": 0.8199039121482499, "grad_norm": 0.7207657039257301, "learning_rate": 8.270681279457716e-07, "loss": 0.2566, "step": 23892 }, { "epoch": 0.8199382292381606, "grad_norm": 0.9099293281742545, "learning_rate": 8.267620098229501e-07, "loss": 0.2548, "step": 23893 }, { "epoch": 0.8199725463280714, "grad_norm": 0.7867238246265736, "learning_rate": 8.264559432556179e-07, "loss": 0.2498, "step": 23894 }, { "epoch": 0.8200068634179821, "grad_norm": 0.8455192213640321, "learning_rate": 8.261499282475576e-07, "loss": 0.27, "step": 23895 }, { "epoch": 0.820041180507893, "grad_norm": 0.7620088234631617, "learning_rate": 8.25843964802548e-07, "loss": 0.3017, "step": 23896 }, { "epoch": 0.8200754975978037, "grad_norm": 0.7236504246177197, "learning_rate": 8.255380529243711e-07, "loss": 0.2712, "step": 23897 }, { "epoch": 0.8201098146877145, "grad_norm": 0.7459265774246462, "learning_rate": 8.252321926168022e-07, "loss": 0.2518, "step": 23898 }, { "epoch": 0.8201441317776252, "grad_norm": 0.7621998837705012, "learning_rate": 8.249263838836235e-07, "loss": 0.2708, "step": 23899 }, { "epoch": 0.820178448867536, "grad_norm": 0.7831958235971103, "learning_rate": 8.246206267286127e-07, "loss": 0.3204, "step": 23900 }, { "epoch": 0.8202127659574469, "grad_norm": 0.8325383098203635, "learning_rate": 8.243149211555441e-07, "loss": 0.2052, "step": 23901 }, { "epoch": 0.8202470830473576, "grad_norm": 0.7297997355269776, "learning_rate": 8.240092671681982e-07, "loss": 0.2129, "step": 23902 }, { "epoch": 0.8202814001372684, "grad_norm": 0.913275780598515, "learning_rate": 8.237036647703478e-07, "loss": 0.2263, "step": 23903 }, { "epoch": 0.8203157172271791, "grad_norm": 0.6652261389907383, "learning_rate": 8.233981139657699e-07, "loss": 0.2309, "step": 23904 }, { "epoch": 0.8203500343170899, "grad_norm": 0.8004887221888691, "learning_rate": 8.230926147582386e-07, "loss": 0.2678, "step": 23905 }, { "epoch": 0.8203843514070007, "grad_norm": 0.7234184558348348, "learning_rate": 8.227871671515286e-07, "loss": 0.2401, "step": 23906 }, { "epoch": 0.8204186684969115, "grad_norm": 0.7771974970066358, "learning_rate": 8.224817711494126e-07, "loss": 0.2893, "step": 23907 }, { "epoch": 0.8204529855868222, "grad_norm": 0.7896059442069455, "learning_rate": 8.221764267556637e-07, "loss": 0.3056, "step": 23908 }, { "epoch": 0.820487302676733, "grad_norm": 0.7668459890831645, "learning_rate": 8.218711339740559e-07, "loss": 0.2881, "step": 23909 }, { "epoch": 0.8205216197666438, "grad_norm": 0.6867488706731614, "learning_rate": 8.215658928083564e-07, "loss": 0.2848, "step": 23910 }, { "epoch": 0.8205559368565546, "grad_norm": 0.8836190993641835, "learning_rate": 8.21260703262341e-07, "loss": 0.2849, "step": 23911 }, { "epoch": 0.8205902539464653, "grad_norm": 0.7142427545576941, "learning_rate": 8.209555653397766e-07, "loss": 0.2296, "step": 23912 }, { "epoch": 0.8206245710363761, "grad_norm": 0.7285750305634349, "learning_rate": 8.206504790444336e-07, "loss": 0.2064, "step": 23913 }, { "epoch": 0.8206588881262868, "grad_norm": 0.8241591584419048, "learning_rate": 8.203454443800818e-07, "loss": 0.2463, "step": 23914 }, { "epoch": 0.8206932052161977, "grad_norm": 0.8023998645022402, "learning_rate": 8.200404613504887e-07, "loss": 0.219, "step": 23915 }, { "epoch": 0.8207275223061085, "grad_norm": 0.7452126575046878, "learning_rate": 8.197355299594222e-07, "loss": 0.24, "step": 23916 }, { "epoch": 0.8207618393960192, "grad_norm": 0.7272625058704251, "learning_rate": 8.194306502106497e-07, "loss": 0.2546, "step": 23917 }, { "epoch": 0.82079615648593, "grad_norm": 0.8485743269100477, "learning_rate": 8.191258221079373e-07, "loss": 0.274, "step": 23918 }, { "epoch": 0.8208304735758407, "grad_norm": 0.754594659267731, "learning_rate": 8.18821045655051e-07, "loss": 0.2795, "step": 23919 }, { "epoch": 0.8208647906657516, "grad_norm": 0.8221393398296333, "learning_rate": 8.185163208557573e-07, "loss": 0.2533, "step": 23920 }, { "epoch": 0.8208991077556623, "grad_norm": 0.8501465770440234, "learning_rate": 8.182116477138169e-07, "loss": 0.2902, "step": 23921 }, { "epoch": 0.8209334248455731, "grad_norm": 0.9606774993646316, "learning_rate": 8.179070262329969e-07, "loss": 0.2546, "step": 23922 }, { "epoch": 0.8209677419354838, "grad_norm": 0.7800164094048501, "learning_rate": 8.176024564170615e-07, "loss": 0.268, "step": 23923 }, { "epoch": 0.8210020590253947, "grad_norm": 0.8539685753826627, "learning_rate": 8.172979382697688e-07, "loss": 0.3224, "step": 23924 }, { "epoch": 0.8210363761153054, "grad_norm": 0.8001751850500248, "learning_rate": 8.169934717948858e-07, "loss": 0.2742, "step": 23925 }, { "epoch": 0.8210706932052162, "grad_norm": 0.7461115617759383, "learning_rate": 8.166890569961705e-07, "loss": 0.2833, "step": 23926 }, { "epoch": 0.821105010295127, "grad_norm": 0.7952595491375637, "learning_rate": 8.163846938773845e-07, "loss": 0.2925, "step": 23927 }, { "epoch": 0.8211393273850377, "grad_norm": 0.7231964413166907, "learning_rate": 8.160803824422886e-07, "loss": 0.2508, "step": 23928 }, { "epoch": 0.8211736444749486, "grad_norm": 0.7889470045554312, "learning_rate": 8.157761226946409e-07, "loss": 0.2311, "step": 23929 }, { "epoch": 0.8212079615648593, "grad_norm": 0.7559321654777897, "learning_rate": 8.154719146382011e-07, "loss": 0.2082, "step": 23930 }, { "epoch": 0.8212422786547701, "grad_norm": 0.791313135059212, "learning_rate": 8.15167758276727e-07, "loss": 0.3219, "step": 23931 }, { "epoch": 0.8212765957446808, "grad_norm": 0.7699184227533953, "learning_rate": 8.148636536139775e-07, "loss": 0.2338, "step": 23932 }, { "epoch": 0.8213109128345917, "grad_norm": 0.7583047711635953, "learning_rate": 8.14559600653706e-07, "loss": 0.2539, "step": 23933 }, { "epoch": 0.8213452299245024, "grad_norm": 0.7434173051613705, "learning_rate": 8.142555993996731e-07, "loss": 0.2504, "step": 23934 }, { "epoch": 0.8213795470144132, "grad_norm": 0.7561927107301051, "learning_rate": 8.13951649855631e-07, "loss": 0.2932, "step": 23935 }, { "epoch": 0.8214138641043239, "grad_norm": 0.7607466567121378, "learning_rate": 8.136477520253355e-07, "loss": 0.2602, "step": 23936 }, { "epoch": 0.8214481811942347, "grad_norm": 0.6892321656273082, "learning_rate": 8.133439059125414e-07, "loss": 0.2371, "step": 23937 }, { "epoch": 0.8214824982841455, "grad_norm": 0.9162462480903267, "learning_rate": 8.130401115210024e-07, "loss": 0.2484, "step": 23938 }, { "epoch": 0.8215168153740563, "grad_norm": 0.8072399418715306, "learning_rate": 8.127363688544714e-07, "loss": 0.3631, "step": 23939 }, { "epoch": 0.821551132463967, "grad_norm": 0.7341736335527208, "learning_rate": 8.124326779167002e-07, "loss": 0.2504, "step": 23940 }, { "epoch": 0.8215854495538778, "grad_norm": 0.6771356289727885, "learning_rate": 8.121290387114428e-07, "loss": 0.2121, "step": 23941 }, { "epoch": 0.8216197666437886, "grad_norm": 0.6826459165268909, "learning_rate": 8.118254512424456e-07, "loss": 0.2257, "step": 23942 }, { "epoch": 0.8216540837336994, "grad_norm": 0.7506764009365502, "learning_rate": 8.115219155134635e-07, "loss": 0.2496, "step": 23943 }, { "epoch": 0.8216884008236102, "grad_norm": 0.77474250316322, "learning_rate": 8.112184315282456e-07, "loss": 0.267, "step": 23944 }, { "epoch": 0.8217227179135209, "grad_norm": 0.7058458161221322, "learning_rate": 8.109149992905385e-07, "loss": 0.2126, "step": 23945 }, { "epoch": 0.8217570350034317, "grad_norm": 0.7422383082722496, "learning_rate": 8.106116188040946e-07, "loss": 0.2445, "step": 23946 }, { "epoch": 0.8217913520933425, "grad_norm": 0.8798078983883234, "learning_rate": 8.103082900726589e-07, "loss": 0.2459, "step": 23947 }, { "epoch": 0.8218256691832533, "grad_norm": 0.7608620179350434, "learning_rate": 8.100050130999793e-07, "loss": 0.2827, "step": 23948 }, { "epoch": 0.821859986273164, "grad_norm": 0.8668552147149116, "learning_rate": 8.097017878898028e-07, "loss": 0.295, "step": 23949 }, { "epoch": 0.8218943033630748, "grad_norm": 0.7604589459431907, "learning_rate": 8.093986144458754e-07, "loss": 0.2166, "step": 23950 }, { "epoch": 0.8219286204529855, "grad_norm": 0.798091458399968, "learning_rate": 8.090954927719425e-07, "loss": 0.2866, "step": 23951 }, { "epoch": 0.8219629375428964, "grad_norm": 0.7944566122645114, "learning_rate": 8.087924228717487e-07, "loss": 0.341, "step": 23952 }, { "epoch": 0.8219972546328072, "grad_norm": 0.6975025926669928, "learning_rate": 8.084894047490377e-07, "loss": 0.2582, "step": 23953 }, { "epoch": 0.8220315717227179, "grad_norm": 0.8186216025081801, "learning_rate": 8.081864384075533e-07, "loss": 0.244, "step": 23954 }, { "epoch": 0.8220658888126287, "grad_norm": 0.7565228151574879, "learning_rate": 8.078835238510396e-07, "loss": 0.2723, "step": 23955 }, { "epoch": 0.8221002059025395, "grad_norm": 0.7557148184788959, "learning_rate": 8.075806610832348e-07, "loss": 0.2768, "step": 23956 }, { "epoch": 0.8221345229924503, "grad_norm": 0.8694409361107257, "learning_rate": 8.072778501078854e-07, "loss": 0.2696, "step": 23957 }, { "epoch": 0.822168840082361, "grad_norm": 0.8400891595248342, "learning_rate": 8.069750909287294e-07, "loss": 0.2563, "step": 23958 }, { "epoch": 0.8222031571722718, "grad_norm": 0.7989448501491367, "learning_rate": 8.066723835495066e-07, "loss": 0.2499, "step": 23959 }, { "epoch": 0.8222374742621825, "grad_norm": 0.8457341158578009, "learning_rate": 8.063697279739585e-07, "loss": 0.2945, "step": 23960 }, { "epoch": 0.8222717913520934, "grad_norm": 0.8527511959849112, "learning_rate": 8.060671242058227e-07, "loss": 0.2497, "step": 23961 }, { "epoch": 0.8223061084420041, "grad_norm": 0.7934608420502862, "learning_rate": 8.057645722488377e-07, "loss": 0.2855, "step": 23962 }, { "epoch": 0.8223404255319149, "grad_norm": 0.8875683486283041, "learning_rate": 8.054620721067418e-07, "loss": 0.2799, "step": 23963 }, { "epoch": 0.8223747426218256, "grad_norm": 0.815993850657284, "learning_rate": 8.051596237832726e-07, "loss": 0.2411, "step": 23964 }, { "epoch": 0.8224090597117364, "grad_norm": 0.7598299467967076, "learning_rate": 8.048572272821631e-07, "loss": 0.2186, "step": 23965 }, { "epoch": 0.8224433768016473, "grad_norm": 0.8485217399465488, "learning_rate": 8.045548826071525e-07, "loss": 0.2606, "step": 23966 }, { "epoch": 0.822477693891558, "grad_norm": 0.8330866398892551, "learning_rate": 8.042525897619768e-07, "loss": 0.3126, "step": 23967 }, { "epoch": 0.8225120109814688, "grad_norm": 0.7150332354832205, "learning_rate": 8.039503487503658e-07, "loss": 0.2822, "step": 23968 }, { "epoch": 0.8225463280713795, "grad_norm": 0.8077986047708999, "learning_rate": 8.036481595760581e-07, "loss": 0.2282, "step": 23969 }, { "epoch": 0.8225806451612904, "grad_norm": 0.7850754637425414, "learning_rate": 8.033460222427842e-07, "loss": 0.2706, "step": 23970 }, { "epoch": 0.8226149622512011, "grad_norm": 0.756070153892307, "learning_rate": 8.030439367542774e-07, "loss": 0.2397, "step": 23971 }, { "epoch": 0.8226492793411119, "grad_norm": 0.8354502261986562, "learning_rate": 8.027419031142697e-07, "loss": 0.2731, "step": 23972 }, { "epoch": 0.8226835964310226, "grad_norm": 0.7481300086539494, "learning_rate": 8.024399213264916e-07, "loss": 0.2284, "step": 23973 }, { "epoch": 0.8227179135209334, "grad_norm": 0.812137423552717, "learning_rate": 8.021379913946753e-07, "loss": 0.2527, "step": 23974 }, { "epoch": 0.8227522306108442, "grad_norm": 0.782381998221836, "learning_rate": 8.018361133225494e-07, "loss": 0.2504, "step": 23975 }, { "epoch": 0.822786547700755, "grad_norm": 0.8001189062403556, "learning_rate": 8.015342871138449e-07, "loss": 0.2937, "step": 23976 }, { "epoch": 0.8228208647906657, "grad_norm": 0.7608558841111358, "learning_rate": 8.01232512772287e-07, "loss": 0.2825, "step": 23977 }, { "epoch": 0.8228551818805765, "grad_norm": 0.7502633431115703, "learning_rate": 8.009307903016083e-07, "loss": 0.2903, "step": 23978 }, { "epoch": 0.8228894989704874, "grad_norm": 0.7819081715533316, "learning_rate": 8.006291197055332e-07, "loss": 0.2586, "step": 23979 }, { "epoch": 0.8229238160603981, "grad_norm": 0.848174747115647, "learning_rate": 8.003275009877892e-07, "loss": 0.2917, "step": 23980 }, { "epoch": 0.8229581331503089, "grad_norm": 0.866950940929236, "learning_rate": 8.000259341521027e-07, "loss": 0.282, "step": 23981 }, { "epoch": 0.8229924502402196, "grad_norm": 0.7180013334562637, "learning_rate": 7.997244192021975e-07, "loss": 0.2309, "step": 23982 }, { "epoch": 0.8230267673301304, "grad_norm": 0.7829687396752999, "learning_rate": 7.994229561418026e-07, "loss": 0.2882, "step": 23983 }, { "epoch": 0.8230610844200412, "grad_norm": 0.7916134977741587, "learning_rate": 7.991215449746387e-07, "loss": 0.2573, "step": 23984 }, { "epoch": 0.823095401509952, "grad_norm": 0.7397090320270915, "learning_rate": 7.988201857044303e-07, "loss": 0.2318, "step": 23985 }, { "epoch": 0.8231297185998627, "grad_norm": 0.7941257607330453, "learning_rate": 7.985188783349001e-07, "loss": 0.2338, "step": 23986 }, { "epoch": 0.8231640356897735, "grad_norm": 0.7955839596545963, "learning_rate": 7.982176228697713e-07, "loss": 0.2744, "step": 23987 }, { "epoch": 0.8231983527796842, "grad_norm": 0.784436178081813, "learning_rate": 7.979164193127648e-07, "loss": 0.219, "step": 23988 }, { "epoch": 0.8232326698695951, "grad_norm": 0.7719259043449456, "learning_rate": 7.976152676676013e-07, "loss": 0.2786, "step": 23989 }, { "epoch": 0.8232669869595058, "grad_norm": 0.7793639577911499, "learning_rate": 7.973141679380031e-07, "loss": 0.2834, "step": 23990 }, { "epoch": 0.8233013040494166, "grad_norm": 0.6994730093469781, "learning_rate": 7.970131201276865e-07, "loss": 0.3016, "step": 23991 }, { "epoch": 0.8233356211393273, "grad_norm": 0.6844573798572503, "learning_rate": 7.967121242403747e-07, "loss": 0.2186, "step": 23992 }, { "epoch": 0.8233699382292382, "grad_norm": 0.7189937488935582, "learning_rate": 7.964111802797825e-07, "loss": 0.2427, "step": 23993 }, { "epoch": 0.823404255319149, "grad_norm": 0.8771511465068704, "learning_rate": 7.961102882496296e-07, "loss": 0.2543, "step": 23994 }, { "epoch": 0.8234385724090597, "grad_norm": 0.7344047576989851, "learning_rate": 7.958094481536333e-07, "loss": 0.2221, "step": 23995 }, { "epoch": 0.8234728894989705, "grad_norm": 0.7863051913278967, "learning_rate": 7.955086599955086e-07, "loss": 0.2812, "step": 23996 }, { "epoch": 0.8235072065888812, "grad_norm": 0.7572732562817047, "learning_rate": 7.952079237789734e-07, "loss": 0.2638, "step": 23997 }, { "epoch": 0.8235415236787921, "grad_norm": 0.7503609019976941, "learning_rate": 7.949072395077417e-07, "loss": 0.239, "step": 23998 }, { "epoch": 0.8235758407687028, "grad_norm": 0.8178089180142334, "learning_rate": 7.94606607185529e-07, "loss": 0.3143, "step": 23999 }, { "epoch": 0.8236101578586136, "grad_norm": 0.8629047787510803, "learning_rate": 7.943060268160469e-07, "loss": 0.2499, "step": 24000 }, { "epoch": 0.8236444749485243, "grad_norm": 0.8261005280409384, "learning_rate": 7.940054984030126e-07, "loss": 0.2463, "step": 24001 }, { "epoch": 0.8236787920384352, "grad_norm": 0.8400518205363331, "learning_rate": 7.937050219501357e-07, "loss": 0.217, "step": 24002 }, { "epoch": 0.8237131091283459, "grad_norm": 1.011425987007616, "learning_rate": 7.934045974611293e-07, "loss": 0.2738, "step": 24003 }, { "epoch": 0.8237474262182567, "grad_norm": 0.7759386639166715, "learning_rate": 7.931042249397042e-07, "loss": 0.2476, "step": 24004 }, { "epoch": 0.8237817433081674, "grad_norm": 0.7691775129443382, "learning_rate": 7.928039043895719e-07, "loss": 0.2766, "step": 24005 }, { "epoch": 0.8238160603980782, "grad_norm": 0.7527506359110985, "learning_rate": 7.925036358144422e-07, "loss": 0.2478, "step": 24006 }, { "epoch": 0.8238503774879891, "grad_norm": 0.8378700721219139, "learning_rate": 7.92203419218025e-07, "loss": 0.2408, "step": 24007 }, { "epoch": 0.8238846945778998, "grad_norm": 0.7219477539001848, "learning_rate": 7.919032546040295e-07, "loss": 0.2181, "step": 24008 }, { "epoch": 0.8239190116678106, "grad_norm": 0.8471064329922996, "learning_rate": 7.91603141976161e-07, "loss": 0.2603, "step": 24009 }, { "epoch": 0.8239533287577213, "grad_norm": 0.8694986184611043, "learning_rate": 7.9130308133813e-07, "loss": 0.2957, "step": 24010 }, { "epoch": 0.8239876458476321, "grad_norm": 0.7800560044330854, "learning_rate": 7.91003072693644e-07, "loss": 0.2327, "step": 24011 }, { "epoch": 0.8240219629375429, "grad_norm": 0.7392056560789537, "learning_rate": 7.907031160464051e-07, "loss": 0.3005, "step": 24012 }, { "epoch": 0.8240562800274537, "grad_norm": 0.7266601491638662, "learning_rate": 7.904032114001248e-07, "loss": 0.2559, "step": 24013 }, { "epoch": 0.8240905971173644, "grad_norm": 0.8294000159224116, "learning_rate": 7.901033587585017e-07, "loss": 0.1871, "step": 24014 }, { "epoch": 0.8241249142072752, "grad_norm": 0.7953045703302598, "learning_rate": 7.898035581252461e-07, "loss": 0.2356, "step": 24015 }, { "epoch": 0.824159231297186, "grad_norm": 0.7905101029423244, "learning_rate": 7.895038095040575e-07, "loss": 0.2565, "step": 24016 }, { "epoch": 0.8241935483870968, "grad_norm": 0.8186080796706252, "learning_rate": 7.892041128986405e-07, "loss": 0.2503, "step": 24017 }, { "epoch": 0.8242278654770075, "grad_norm": 0.9241700725282007, "learning_rate": 7.889044683126979e-07, "loss": 0.2998, "step": 24018 }, { "epoch": 0.8242621825669183, "grad_norm": 0.8508472406033578, "learning_rate": 7.886048757499304e-07, "loss": 0.247, "step": 24019 }, { "epoch": 0.824296499656829, "grad_norm": 0.8164468102520179, "learning_rate": 7.883053352140396e-07, "loss": 0.2423, "step": 24020 }, { "epoch": 0.8243308167467399, "grad_norm": 0.7790464346128609, "learning_rate": 7.88005846708726e-07, "loss": 0.2652, "step": 24021 }, { "epoch": 0.8243651338366507, "grad_norm": 0.8029153459306715, "learning_rate": 7.877064102376907e-07, "loss": 0.302, "step": 24022 }, { "epoch": 0.8243994509265614, "grad_norm": 0.8770529408075354, "learning_rate": 7.874070258046296e-07, "loss": 0.2855, "step": 24023 }, { "epoch": 0.8244337680164722, "grad_norm": 0.7745153733627584, "learning_rate": 7.871076934132455e-07, "loss": 0.2514, "step": 24024 }, { "epoch": 0.824468085106383, "grad_norm": 0.8412299878371369, "learning_rate": 7.868084130672327e-07, "loss": 0.2442, "step": 24025 }, { "epoch": 0.8245024021962938, "grad_norm": 0.7844609037172873, "learning_rate": 7.865091847702899e-07, "loss": 0.2753, "step": 24026 }, { "epoch": 0.8245367192862045, "grad_norm": 0.9061303384650625, "learning_rate": 7.862100085261137e-07, "loss": 0.2354, "step": 24027 }, { "epoch": 0.8245710363761153, "grad_norm": 0.8490013378999407, "learning_rate": 7.859108843384e-07, "loss": 0.2863, "step": 24028 }, { "epoch": 0.824605353466026, "grad_norm": 0.7915228863488266, "learning_rate": 7.856118122108447e-07, "loss": 0.2769, "step": 24029 }, { "epoch": 0.8246396705559369, "grad_norm": 0.9065073566451757, "learning_rate": 7.853127921471415e-07, "loss": 0.2783, "step": 24030 }, { "epoch": 0.8246739876458477, "grad_norm": 0.7653308116233358, "learning_rate": 7.850138241509858e-07, "loss": 0.2646, "step": 24031 }, { "epoch": 0.8247083047357584, "grad_norm": 0.7490234222169893, "learning_rate": 7.84714908226068e-07, "loss": 0.254, "step": 24032 }, { "epoch": 0.8247426218256692, "grad_norm": 0.7624763463386959, "learning_rate": 7.844160443760846e-07, "loss": 0.3668, "step": 24033 }, { "epoch": 0.8247769389155799, "grad_norm": 0.7584877556398947, "learning_rate": 7.841172326047263e-07, "loss": 0.2603, "step": 24034 }, { "epoch": 0.8248112560054908, "grad_norm": 0.8601292745450463, "learning_rate": 7.838184729156829e-07, "loss": 0.2781, "step": 24035 }, { "epoch": 0.8248455730954015, "grad_norm": 0.8754197082094666, "learning_rate": 7.835197653126487e-07, "loss": 0.3133, "step": 24036 }, { "epoch": 0.8248798901853123, "grad_norm": 0.8358049970566996, "learning_rate": 7.832211097993104e-07, "loss": 0.3269, "step": 24037 }, { "epoch": 0.824914207275223, "grad_norm": 0.7894490424844202, "learning_rate": 7.829225063793594e-07, "loss": 0.2506, "step": 24038 }, { "epoch": 0.8249485243651339, "grad_norm": 0.8889075854425131, "learning_rate": 7.826239550564841e-07, "loss": 0.2219, "step": 24039 }, { "epoch": 0.8249828414550446, "grad_norm": 0.7968974572183553, "learning_rate": 7.823254558343729e-07, "loss": 0.2839, "step": 24040 }, { "epoch": 0.8250171585449554, "grad_norm": 0.7949114751374976, "learning_rate": 7.820270087167131e-07, "loss": 0.2803, "step": 24041 }, { "epoch": 0.8250514756348661, "grad_norm": 0.8153793939068836, "learning_rate": 7.817286137071923e-07, "loss": 0.2819, "step": 24042 }, { "epoch": 0.8250857927247769, "grad_norm": 0.8569481588613643, "learning_rate": 7.814302708094973e-07, "loss": 0.3015, "step": 24043 }, { "epoch": 0.8251201098146878, "grad_norm": 0.9434572011099827, "learning_rate": 7.811319800273103e-07, "loss": 0.2994, "step": 24044 }, { "epoch": 0.8251544269045985, "grad_norm": 0.821703894063172, "learning_rate": 7.808337413643219e-07, "loss": 0.2283, "step": 24045 }, { "epoch": 0.8251887439945093, "grad_norm": 0.8757120483423596, "learning_rate": 7.80535554824211e-07, "loss": 0.2805, "step": 24046 }, { "epoch": 0.82522306108442, "grad_norm": 0.889083963712389, "learning_rate": 7.802374204106666e-07, "loss": 0.3044, "step": 24047 }, { "epoch": 0.8252573781743309, "grad_norm": 0.8911210377955863, "learning_rate": 7.799393381273673e-07, "loss": 0.2817, "step": 24048 }, { "epoch": 0.8252916952642416, "grad_norm": 0.8748950201421367, "learning_rate": 7.796413079779964e-07, "loss": 0.2704, "step": 24049 }, { "epoch": 0.8253260123541524, "grad_norm": 0.7680243013079657, "learning_rate": 7.793433299662395e-07, "loss": 0.2257, "step": 24050 }, { "epoch": 0.8253603294440631, "grad_norm": 0.7194873903313669, "learning_rate": 7.790454040957734e-07, "loss": 0.2237, "step": 24051 }, { "epoch": 0.8253946465339739, "grad_norm": 0.8090171606980013, "learning_rate": 7.787475303702801e-07, "loss": 0.2882, "step": 24052 }, { "epoch": 0.8254289636238847, "grad_norm": 0.83802847720352, "learning_rate": 7.784497087934395e-07, "loss": 0.3156, "step": 24053 }, { "epoch": 0.8254632807137955, "grad_norm": 0.9073704712141675, "learning_rate": 7.781519393689308e-07, "loss": 0.2233, "step": 24054 }, { "epoch": 0.8254975978037062, "grad_norm": 0.7815931286756478, "learning_rate": 7.778542221004332e-07, "loss": 0.2533, "step": 24055 }, { "epoch": 0.825531914893617, "grad_norm": 0.7551241186614845, "learning_rate": 7.775565569916238e-07, "loss": 0.2968, "step": 24056 }, { "epoch": 0.8255662319835277, "grad_norm": 0.85646097284281, "learning_rate": 7.772589440461814e-07, "loss": 0.2417, "step": 24057 }, { "epoch": 0.8256005490734386, "grad_norm": 0.8007862792925087, "learning_rate": 7.769613832677791e-07, "loss": 0.2725, "step": 24058 }, { "epoch": 0.8256348661633494, "grad_norm": 0.7627594110051226, "learning_rate": 7.76663874660098e-07, "loss": 0.2585, "step": 24059 }, { "epoch": 0.8256691832532601, "grad_norm": 0.7342071014188162, "learning_rate": 7.763664182268093e-07, "loss": 0.2452, "step": 24060 }, { "epoch": 0.8257035003431709, "grad_norm": 0.7051491938061649, "learning_rate": 7.760690139715893e-07, "loss": 0.2313, "step": 24061 }, { "epoch": 0.8257378174330817, "grad_norm": 0.7033516358802032, "learning_rate": 7.757716618981121e-07, "loss": 0.2689, "step": 24062 }, { "epoch": 0.8257721345229925, "grad_norm": 0.7139551141848944, "learning_rate": 7.754743620100513e-07, "loss": 0.2505, "step": 24063 }, { "epoch": 0.8258064516129032, "grad_norm": 0.7797936086888025, "learning_rate": 7.751771143110792e-07, "loss": 0.2748, "step": 24064 }, { "epoch": 0.825840768702814, "grad_norm": 0.7953856555182967, "learning_rate": 7.748799188048678e-07, "loss": 0.2731, "step": 24065 }, { "epoch": 0.8258750857927247, "grad_norm": 0.7205920516719391, "learning_rate": 7.745827754950907e-07, "loss": 0.2283, "step": 24066 }, { "epoch": 0.8259094028826356, "grad_norm": 0.7665734794815386, "learning_rate": 7.742856843854146e-07, "loss": 0.24, "step": 24067 }, { "epoch": 0.8259437199725463, "grad_norm": 0.7341291028196707, "learning_rate": 7.739886454795143e-07, "loss": 0.2781, "step": 24068 }, { "epoch": 0.8259780370624571, "grad_norm": 0.8004653938400155, "learning_rate": 7.736916587810561e-07, "loss": 0.2735, "step": 24069 }, { "epoch": 0.8260123541523678, "grad_norm": 0.8145128867300048, "learning_rate": 7.733947242937101e-07, "loss": 0.2774, "step": 24070 }, { "epoch": 0.8260466712422787, "grad_norm": 0.7775767501946736, "learning_rate": 7.730978420211449e-07, "loss": 0.2036, "step": 24071 }, { "epoch": 0.8260809883321895, "grad_norm": 0.8142195696197598, "learning_rate": 7.72801011967027e-07, "loss": 0.2625, "step": 24072 }, { "epoch": 0.8261153054221002, "grad_norm": 0.8020141415854671, "learning_rate": 7.725042341350247e-07, "loss": 0.2817, "step": 24073 }, { "epoch": 0.826149622512011, "grad_norm": 0.7190038087329312, "learning_rate": 7.722075085288039e-07, "loss": 0.2365, "step": 24074 }, { "epoch": 0.8261839396019217, "grad_norm": 0.7559174723006997, "learning_rate": 7.719108351520299e-07, "loss": 0.2522, "step": 24075 }, { "epoch": 0.8262182566918326, "grad_norm": 0.9566402684626943, "learning_rate": 7.716142140083682e-07, "loss": 0.2213, "step": 24076 }, { "epoch": 0.8262525737817433, "grad_norm": 0.7063101051969842, "learning_rate": 7.713176451014831e-07, "loss": 0.2286, "step": 24077 }, { "epoch": 0.8262868908716541, "grad_norm": 0.8687522543333018, "learning_rate": 7.710211284350383e-07, "loss": 0.2782, "step": 24078 }, { "epoch": 0.8263212079615648, "grad_norm": 0.7404750304891301, "learning_rate": 7.70724664012697e-07, "loss": 0.2611, "step": 24079 }, { "epoch": 0.8263555250514756, "grad_norm": 0.7739692904448622, "learning_rate": 7.704282518381229e-07, "loss": 0.2448, "step": 24080 }, { "epoch": 0.8263898421413864, "grad_norm": 0.958822430507494, "learning_rate": 7.701318919149736e-07, "loss": 0.2636, "step": 24081 }, { "epoch": 0.8264241592312972, "grad_norm": 0.8949569629503683, "learning_rate": 7.698355842469163e-07, "loss": 0.239, "step": 24082 }, { "epoch": 0.826458476321208, "grad_norm": 0.8650204784354921, "learning_rate": 7.695393288376069e-07, "loss": 0.2647, "step": 24083 }, { "epoch": 0.8264927934111187, "grad_norm": 0.7948457366823152, "learning_rate": 7.692431256907068e-07, "loss": 0.2086, "step": 24084 }, { "epoch": 0.8265271105010296, "grad_norm": 0.6739904432287956, "learning_rate": 7.689469748098754e-07, "loss": 0.2474, "step": 24085 }, { "epoch": 0.8265614275909403, "grad_norm": 0.7440659380416659, "learning_rate": 7.686508761987709e-07, "loss": 0.2476, "step": 24086 }, { "epoch": 0.8265957446808511, "grad_norm": 0.7793016438167909, "learning_rate": 7.683548298610522e-07, "loss": 0.292, "step": 24087 }, { "epoch": 0.8266300617707618, "grad_norm": 0.7871602052421576, "learning_rate": 7.680588358003755e-07, "loss": 0.2441, "step": 24088 }, { "epoch": 0.8266643788606726, "grad_norm": 0.7873475994506839, "learning_rate": 7.677628940203996e-07, "loss": 0.2445, "step": 24089 }, { "epoch": 0.8266986959505834, "grad_norm": 0.8011733778192764, "learning_rate": 7.674670045247757e-07, "loss": 0.2788, "step": 24090 }, { "epoch": 0.8267330130404942, "grad_norm": 0.7860620072049022, "learning_rate": 7.67171167317165e-07, "loss": 0.2955, "step": 24091 }, { "epoch": 0.8267673301304049, "grad_norm": 0.769354398965077, "learning_rate": 7.668753824012187e-07, "loss": 0.241, "step": 24092 }, { "epoch": 0.8268016472203157, "grad_norm": 0.7526314762733641, "learning_rate": 7.6657964978059e-07, "loss": 0.2897, "step": 24093 }, { "epoch": 0.8268359643102265, "grad_norm": 0.7388941287357664, "learning_rate": 7.662839694589369e-07, "loss": 0.2295, "step": 24094 }, { "epoch": 0.8268702814001373, "grad_norm": 0.7827626109985107, "learning_rate": 7.659883414399077e-07, "loss": 0.2935, "step": 24095 }, { "epoch": 0.826904598490048, "grad_norm": 0.7479207505484872, "learning_rate": 7.656927657271562e-07, "loss": 0.2699, "step": 24096 }, { "epoch": 0.8269389155799588, "grad_norm": 0.7995974703638601, "learning_rate": 7.653972423243344e-07, "loss": 0.2927, "step": 24097 }, { "epoch": 0.8269732326698696, "grad_norm": 0.8123682248694064, "learning_rate": 7.651017712350923e-07, "loss": 0.2393, "step": 24098 }, { "epoch": 0.8270075497597804, "grad_norm": 0.7362215324630695, "learning_rate": 7.6480635246308e-07, "loss": 0.2615, "step": 24099 }, { "epoch": 0.8270418668496912, "grad_norm": 0.814721569704447, "learning_rate": 7.645109860119476e-07, "loss": 0.3053, "step": 24100 }, { "epoch": 0.8270761839396019, "grad_norm": 0.7781014043412056, "learning_rate": 7.642156718853455e-07, "loss": 0.2481, "step": 24101 }, { "epoch": 0.8271105010295127, "grad_norm": 0.845349893919376, "learning_rate": 7.639204100869175e-07, "loss": 0.2499, "step": 24102 }, { "epoch": 0.8271448181194234, "grad_norm": 0.7659278059491604, "learning_rate": 7.636252006203166e-07, "loss": 0.3357, "step": 24103 }, { "epoch": 0.8271791352093343, "grad_norm": 0.7438911479955068, "learning_rate": 7.633300434891861e-07, "loss": 0.2703, "step": 24104 }, { "epoch": 0.827213452299245, "grad_norm": 0.855724457306855, "learning_rate": 7.630349386971736e-07, "loss": 0.2374, "step": 24105 }, { "epoch": 0.8272477693891558, "grad_norm": 0.7989479047108285, "learning_rate": 7.627398862479241e-07, "loss": 0.2783, "step": 24106 }, { "epoch": 0.8272820864790665, "grad_norm": 0.8587495076119156, "learning_rate": 7.624448861450834e-07, "loss": 0.2748, "step": 24107 }, { "epoch": 0.8273164035689774, "grad_norm": 0.73225874961239, "learning_rate": 7.621499383922959e-07, "loss": 0.223, "step": 24108 }, { "epoch": 0.8273507206588882, "grad_norm": 0.8018573934065639, "learning_rate": 7.618550429932048e-07, "loss": 0.2439, "step": 24109 }, { "epoch": 0.8273850377487989, "grad_norm": 0.8042896762475596, "learning_rate": 7.615601999514532e-07, "loss": 0.2899, "step": 24110 }, { "epoch": 0.8274193548387097, "grad_norm": 0.7929781282413472, "learning_rate": 7.612654092706839e-07, "loss": 0.2345, "step": 24111 }, { "epoch": 0.8274536719286204, "grad_norm": 0.7202370244666286, "learning_rate": 7.609706709545401e-07, "loss": 0.2388, "step": 24112 }, { "epoch": 0.8274879890185313, "grad_norm": 0.7466030576167582, "learning_rate": 7.606759850066586e-07, "loss": 0.281, "step": 24113 }, { "epoch": 0.827522306108442, "grad_norm": 0.7922920917893306, "learning_rate": 7.603813514306857e-07, "loss": 0.2774, "step": 24114 }, { "epoch": 0.8275566231983528, "grad_norm": 0.8281649007551632, "learning_rate": 7.600867702302567e-07, "loss": 0.2489, "step": 24115 }, { "epoch": 0.8275909402882635, "grad_norm": 0.7876213540688393, "learning_rate": 7.597922414090114e-07, "loss": 0.3051, "step": 24116 }, { "epoch": 0.8276252573781744, "grad_norm": 0.6654026751548787, "learning_rate": 7.594977649705915e-07, "loss": 0.2371, "step": 24117 }, { "epoch": 0.8276595744680851, "grad_norm": 0.8532698619600505, "learning_rate": 7.592033409186317e-07, "loss": 0.2611, "step": 24118 }, { "epoch": 0.8276938915579959, "grad_norm": 0.7878891763982674, "learning_rate": 7.589089692567703e-07, "loss": 0.2735, "step": 24119 }, { "epoch": 0.8277282086479066, "grad_norm": 0.7770208017598519, "learning_rate": 7.586146499886437e-07, "loss": 0.2335, "step": 24120 }, { "epoch": 0.8277625257378174, "grad_norm": 0.8112517749390266, "learning_rate": 7.583203831178887e-07, "loss": 0.2783, "step": 24121 }, { "epoch": 0.8277968428277283, "grad_norm": 0.8369644737082061, "learning_rate": 7.580261686481394e-07, "loss": 0.2542, "step": 24122 }, { "epoch": 0.827831159917639, "grad_norm": 0.8958513302914642, "learning_rate": 7.577320065830319e-07, "loss": 0.2597, "step": 24123 }, { "epoch": 0.8278654770075498, "grad_norm": 0.7430604321718146, "learning_rate": 7.574378969261997e-07, "loss": 0.2107, "step": 24124 }, { "epoch": 0.8278997940974605, "grad_norm": 0.8182605561534707, "learning_rate": 7.571438396812741e-07, "loss": 0.3211, "step": 24125 }, { "epoch": 0.8279341111873713, "grad_norm": 0.7581527147666464, "learning_rate": 7.568498348518921e-07, "loss": 0.2928, "step": 24126 }, { "epoch": 0.8279684282772821, "grad_norm": 0.7595884219453204, "learning_rate": 7.565558824416819e-07, "loss": 0.2332, "step": 24127 }, { "epoch": 0.8280027453671929, "grad_norm": 0.8204652564569718, "learning_rate": 7.562619824542766e-07, "loss": 0.2714, "step": 24128 }, { "epoch": 0.8280370624571036, "grad_norm": 0.7703369628336488, "learning_rate": 7.559681348933067e-07, "loss": 0.2349, "step": 24129 }, { "epoch": 0.8280713795470144, "grad_norm": 0.6881308240271903, "learning_rate": 7.556743397624028e-07, "loss": 0.2469, "step": 24130 }, { "epoch": 0.8281056966369252, "grad_norm": 0.7251610948705396, "learning_rate": 7.553805970651934e-07, "loss": 0.2666, "step": 24131 }, { "epoch": 0.828140013726836, "grad_norm": 0.777531518897938, "learning_rate": 7.550869068053085e-07, "loss": 0.3284, "step": 24132 }, { "epoch": 0.8281743308167467, "grad_norm": 0.7395138118549925, "learning_rate": 7.547932689863768e-07, "loss": 0.259, "step": 24133 }, { "epoch": 0.8282086479066575, "grad_norm": 0.7952251516892422, "learning_rate": 7.544996836120222e-07, "loss": 0.2729, "step": 24134 }, { "epoch": 0.8282429649965682, "grad_norm": 0.7752469569436697, "learning_rate": 7.542061506858772e-07, "loss": 0.2403, "step": 24135 }, { "epoch": 0.8282772820864791, "grad_norm": 0.8097225228508652, "learning_rate": 7.539126702115634e-07, "loss": 0.2469, "step": 24136 }, { "epoch": 0.8283115991763899, "grad_norm": 0.7358765164245747, "learning_rate": 7.536192421927069e-07, "loss": 0.2369, "step": 24137 }, { "epoch": 0.8283459162663006, "grad_norm": 0.7917921037757123, "learning_rate": 7.533258666329362e-07, "loss": 0.2896, "step": 24138 }, { "epoch": 0.8283802333562114, "grad_norm": 0.7199753088135031, "learning_rate": 7.530325435358715e-07, "loss": 0.3072, "step": 24139 }, { "epoch": 0.8284145504461222, "grad_norm": 0.8137377498486269, "learning_rate": 7.527392729051402e-07, "loss": 0.2349, "step": 24140 }, { "epoch": 0.828448867536033, "grad_norm": 0.7584940497002987, "learning_rate": 7.524460547443618e-07, "loss": 0.2617, "step": 24141 }, { "epoch": 0.8284831846259437, "grad_norm": 0.7816128564945988, "learning_rate": 7.521528890571611e-07, "loss": 0.2441, "step": 24142 }, { "epoch": 0.8285175017158545, "grad_norm": 0.7358441304282068, "learning_rate": 7.518597758471579e-07, "loss": 0.2124, "step": 24143 }, { "epoch": 0.8285518188057652, "grad_norm": 0.7251963403579946, "learning_rate": 7.515667151179751e-07, "loss": 0.2425, "step": 24144 }, { "epoch": 0.8285861358956761, "grad_norm": 0.8224125654257822, "learning_rate": 7.512737068732323e-07, "loss": 0.2409, "step": 24145 }, { "epoch": 0.8286204529855868, "grad_norm": 0.7579046161416412, "learning_rate": 7.50980751116549e-07, "loss": 0.266, "step": 24146 }, { "epoch": 0.8286547700754976, "grad_norm": 0.9097556402408503, "learning_rate": 7.506878478515462e-07, "loss": 0.2349, "step": 24147 }, { "epoch": 0.8286890871654083, "grad_norm": 0.8915944781260696, "learning_rate": 7.50394997081838e-07, "loss": 0.225, "step": 24148 }, { "epoch": 0.8287234042553191, "grad_norm": 0.7388611082990735, "learning_rate": 7.501021988110474e-07, "loss": 0.284, "step": 24149 }, { "epoch": 0.82875772134523, "grad_norm": 0.8554337860624478, "learning_rate": 7.498094530427885e-07, "loss": 0.2617, "step": 24150 }, { "epoch": 0.8287920384351407, "grad_norm": 0.789571264808312, "learning_rate": 7.495167597806785e-07, "loss": 0.2439, "step": 24151 }, { "epoch": 0.8288263555250515, "grad_norm": 0.8462725226824623, "learning_rate": 7.492241190283328e-07, "loss": 0.2983, "step": 24152 }, { "epoch": 0.8288606726149622, "grad_norm": 0.8216919558986655, "learning_rate": 7.48931530789368e-07, "loss": 0.2583, "step": 24153 }, { "epoch": 0.8288949897048731, "grad_norm": 0.7556529015378509, "learning_rate": 7.486389950673973e-07, "loss": 0.2498, "step": 24154 }, { "epoch": 0.8289293067947838, "grad_norm": 0.8407525947145769, "learning_rate": 7.483465118660354e-07, "loss": 0.2193, "step": 24155 }, { "epoch": 0.8289636238846946, "grad_norm": 0.7898537511135452, "learning_rate": 7.480540811888965e-07, "loss": 0.2512, "step": 24156 }, { "epoch": 0.8289979409746053, "grad_norm": 0.8672501584852644, "learning_rate": 7.477617030395895e-07, "loss": 0.3173, "step": 24157 }, { "epoch": 0.8290322580645161, "grad_norm": 0.7228020322683514, "learning_rate": 7.474693774217318e-07, "loss": 0.332, "step": 24158 }, { "epoch": 0.8290665751544269, "grad_norm": 0.846637614365351, "learning_rate": 7.471771043389309e-07, "loss": 0.2914, "step": 24159 }, { "epoch": 0.8291008922443377, "grad_norm": 0.7866341884865413, "learning_rate": 7.468848837947973e-07, "loss": 0.2726, "step": 24160 }, { "epoch": 0.8291352093342484, "grad_norm": 0.7007874052847485, "learning_rate": 7.465927157929442e-07, "loss": 0.2481, "step": 24161 }, { "epoch": 0.8291695264241592, "grad_norm": 0.6866125988810065, "learning_rate": 7.463006003369783e-07, "loss": 0.2409, "step": 24162 }, { "epoch": 0.8292038435140701, "grad_norm": 0.8164521865292481, "learning_rate": 7.460085374305092e-07, "loss": 0.2563, "step": 24163 }, { "epoch": 0.8292381606039808, "grad_norm": 0.8201882748525625, "learning_rate": 7.457165270771449e-07, "loss": 0.2516, "step": 24164 }, { "epoch": 0.8292724776938916, "grad_norm": 0.8211131675441838, "learning_rate": 7.454245692804929e-07, "loss": 0.2645, "step": 24165 }, { "epoch": 0.8293067947838023, "grad_norm": 0.7342398572342494, "learning_rate": 7.451326640441603e-07, "loss": 0.2536, "step": 24166 }, { "epoch": 0.8293411118737131, "grad_norm": 0.8281704054791305, "learning_rate": 7.448408113717526e-07, "loss": 0.3043, "step": 24167 }, { "epoch": 0.8293754289636239, "grad_norm": 0.9211964750083618, "learning_rate": 7.445490112668763e-07, "loss": 0.2639, "step": 24168 }, { "epoch": 0.8294097460535347, "grad_norm": 0.6980307592581595, "learning_rate": 7.442572637331341e-07, "loss": 0.2415, "step": 24169 }, { "epoch": 0.8294440631434454, "grad_norm": 0.7908122834568698, "learning_rate": 7.439655687741337e-07, "loss": 0.2714, "step": 24170 }, { "epoch": 0.8294783802333562, "grad_norm": 0.8040968651306082, "learning_rate": 7.436739263934745e-07, "loss": 0.279, "step": 24171 }, { "epoch": 0.8295126973232669, "grad_norm": 0.785251633985369, "learning_rate": 7.433823365947635e-07, "loss": 0.2479, "step": 24172 }, { "epoch": 0.8295470144131778, "grad_norm": 0.8232757649853725, "learning_rate": 7.430907993816e-07, "loss": 0.2656, "step": 24173 }, { "epoch": 0.8295813315030885, "grad_norm": 0.7328093068733144, "learning_rate": 7.427993147575869e-07, "loss": 0.2444, "step": 24174 }, { "epoch": 0.8296156485929993, "grad_norm": 0.8402791149995463, "learning_rate": 7.425078827263244e-07, "loss": 0.2901, "step": 24175 }, { "epoch": 0.82964996568291, "grad_norm": 0.7528012181981623, "learning_rate": 7.42216503291413e-07, "loss": 0.2777, "step": 24176 }, { "epoch": 0.8296842827728209, "grad_norm": 0.7925280690973916, "learning_rate": 7.419251764564533e-07, "loss": 0.2709, "step": 24177 }, { "epoch": 0.8297185998627317, "grad_norm": 0.8617289781198247, "learning_rate": 7.416339022250435e-07, "loss": 0.2576, "step": 24178 }, { "epoch": 0.8297529169526424, "grad_norm": 0.7979809862573073, "learning_rate": 7.413426806007828e-07, "loss": 0.3093, "step": 24179 }, { "epoch": 0.8297872340425532, "grad_norm": 0.8014916419595849, "learning_rate": 7.410515115872663e-07, "loss": 0.2548, "step": 24180 }, { "epoch": 0.8298215511324639, "grad_norm": 0.838252972681236, "learning_rate": 7.407603951880949e-07, "loss": 0.2904, "step": 24181 }, { "epoch": 0.8298558682223748, "grad_norm": 0.8440037031989762, "learning_rate": 7.404693314068623e-07, "loss": 0.2079, "step": 24182 }, { "epoch": 0.8298901853122855, "grad_norm": 0.7888814829616707, "learning_rate": 7.401783202471635e-07, "loss": 0.3026, "step": 24183 }, { "epoch": 0.8299245024021963, "grad_norm": 0.8972724296925149, "learning_rate": 7.398873617125974e-07, "loss": 0.2739, "step": 24184 }, { "epoch": 0.829958819492107, "grad_norm": 0.8041214598455745, "learning_rate": 7.395964558067548e-07, "loss": 0.2318, "step": 24185 }, { "epoch": 0.8299931365820179, "grad_norm": 0.8458928093557234, "learning_rate": 7.393056025332312e-07, "loss": 0.2513, "step": 24186 }, { "epoch": 0.8300274536719287, "grad_norm": 0.763878428021596, "learning_rate": 7.390148018956194e-07, "loss": 0.2225, "step": 24187 }, { "epoch": 0.8300617707618394, "grad_norm": 0.7289952445718137, "learning_rate": 7.387240538975116e-07, "loss": 0.2402, "step": 24188 }, { "epoch": 0.8300960878517502, "grad_norm": 0.8724797137900232, "learning_rate": 7.384333585425002e-07, "loss": 0.2729, "step": 24189 }, { "epoch": 0.8301304049416609, "grad_norm": 0.7758364712224864, "learning_rate": 7.381427158341759e-07, "loss": 0.2575, "step": 24190 }, { "epoch": 0.8301647220315718, "grad_norm": 0.8001185359604812, "learning_rate": 7.378521257761306e-07, "loss": 0.268, "step": 24191 }, { "epoch": 0.8301990391214825, "grad_norm": 0.8277112960875973, "learning_rate": 7.375615883719512e-07, "loss": 0.2488, "step": 24192 }, { "epoch": 0.8302333562113933, "grad_norm": 0.8328054974712542, "learning_rate": 7.372711036252311e-07, "loss": 0.2682, "step": 24193 }, { "epoch": 0.830267673301304, "grad_norm": 0.9512204683176824, "learning_rate": 7.369806715395555e-07, "loss": 0.2415, "step": 24194 }, { "epoch": 0.8303019903912148, "grad_norm": 0.7671309818870008, "learning_rate": 7.366902921185132e-07, "loss": 0.2356, "step": 24195 }, { "epoch": 0.8303363074811256, "grad_norm": 0.8987832114605625, "learning_rate": 7.363999653656917e-07, "loss": 0.2647, "step": 24196 }, { "epoch": 0.8303706245710364, "grad_norm": 0.9054452452733156, "learning_rate": 7.361096912846783e-07, "loss": 0.2529, "step": 24197 }, { "epoch": 0.8304049416609471, "grad_norm": 0.825217238007408, "learning_rate": 7.358194698790583e-07, "loss": 0.2651, "step": 24198 }, { "epoch": 0.8304392587508579, "grad_norm": 0.946215324586546, "learning_rate": 7.355293011524167e-07, "loss": 0.3142, "step": 24199 }, { "epoch": 0.8304735758407688, "grad_norm": 0.7371993334810742, "learning_rate": 7.352391851083407e-07, "loss": 0.2671, "step": 24200 }, { "epoch": 0.8305078929306795, "grad_norm": 0.7020840502860696, "learning_rate": 7.349491217504095e-07, "loss": 0.2572, "step": 24201 }, { "epoch": 0.8305422100205903, "grad_norm": 0.7923176274297442, "learning_rate": 7.346591110822116e-07, "loss": 0.2246, "step": 24202 }, { "epoch": 0.830576527110501, "grad_norm": 0.8100102981507235, "learning_rate": 7.343691531073249e-07, "loss": 0.2765, "step": 24203 }, { "epoch": 0.8306108442004118, "grad_norm": 0.8010686506615292, "learning_rate": 7.340792478293357e-07, "loss": 0.2608, "step": 24204 }, { "epoch": 0.8306451612903226, "grad_norm": 0.7482427421165558, "learning_rate": 7.337893952518249e-07, "loss": 0.3413, "step": 24205 }, { "epoch": 0.8306794783802334, "grad_norm": 0.8548816955878985, "learning_rate": 7.334995953783702e-07, "loss": 0.297, "step": 24206 }, { "epoch": 0.8307137954701441, "grad_norm": 0.80631790167266, "learning_rate": 7.332098482125555e-07, "loss": 0.2622, "step": 24207 }, { "epoch": 0.8307481125600549, "grad_norm": 0.8158048852881584, "learning_rate": 7.329201537579577e-07, "loss": 0.2506, "step": 24208 }, { "epoch": 0.8307824296499657, "grad_norm": 1.0395923832794323, "learning_rate": 7.326305120181565e-07, "loss": 0.2763, "step": 24209 }, { "epoch": 0.8308167467398765, "grad_norm": 0.8376166786800744, "learning_rate": 7.323409229967299e-07, "loss": 0.286, "step": 24210 }, { "epoch": 0.8308510638297872, "grad_norm": 0.8789375484950138, "learning_rate": 7.320513866972551e-07, "loss": 0.2491, "step": 24211 }, { "epoch": 0.830885380919698, "grad_norm": 0.7181660830153938, "learning_rate": 7.317619031233103e-07, "loss": 0.2572, "step": 24212 }, { "epoch": 0.8309196980096087, "grad_norm": 0.7820218703989501, "learning_rate": 7.314724722784705e-07, "loss": 0.2867, "step": 24213 }, { "epoch": 0.8309540150995196, "grad_norm": 0.7428348939073778, "learning_rate": 7.311830941663129e-07, "loss": 0.2844, "step": 24214 }, { "epoch": 0.8309883321894304, "grad_norm": 0.7781129441616452, "learning_rate": 7.308937687904088e-07, "loss": 0.2579, "step": 24215 }, { "epoch": 0.8310226492793411, "grad_norm": 0.7579263077037708, "learning_rate": 7.306044961543374e-07, "loss": 0.2219, "step": 24216 }, { "epoch": 0.8310569663692519, "grad_norm": 0.719136240605614, "learning_rate": 7.303152762616683e-07, "loss": 0.2149, "step": 24217 }, { "epoch": 0.8310912834591626, "grad_norm": 0.8595621152429245, "learning_rate": 7.300261091159767e-07, "loss": 0.2592, "step": 24218 }, { "epoch": 0.8311256005490735, "grad_norm": 0.8303199370772729, "learning_rate": 7.297369947208338e-07, "loss": 0.2789, "step": 24219 }, { "epoch": 0.8311599176389842, "grad_norm": 0.7403419098574407, "learning_rate": 7.294479330798116e-07, "loss": 0.2754, "step": 24220 }, { "epoch": 0.831194234728895, "grad_norm": 0.7434361791066283, "learning_rate": 7.291589241964814e-07, "loss": 0.2951, "step": 24221 }, { "epoch": 0.8312285518188057, "grad_norm": 0.7696237072451664, "learning_rate": 7.288699680744133e-07, "loss": 0.2523, "step": 24222 }, { "epoch": 0.8312628689087166, "grad_norm": 0.7914665083048755, "learning_rate": 7.285810647171782e-07, "loss": 0.2119, "step": 24223 }, { "epoch": 0.8312971859986273, "grad_norm": 0.765538747876299, "learning_rate": 7.282922141283416e-07, "loss": 0.2408, "step": 24224 }, { "epoch": 0.8313315030885381, "grad_norm": 1.0567966127569461, "learning_rate": 7.280034163114768e-07, "loss": 0.2703, "step": 24225 }, { "epoch": 0.8313658201784488, "grad_norm": 0.697386467066886, "learning_rate": 7.277146712701477e-07, "loss": 0.2741, "step": 24226 }, { "epoch": 0.8314001372683596, "grad_norm": 0.7173070758914865, "learning_rate": 7.274259790079219e-07, "loss": 0.2283, "step": 24227 }, { "epoch": 0.8314344543582705, "grad_norm": 0.7480593587800407, "learning_rate": 7.271373395283693e-07, "loss": 0.2463, "step": 24228 }, { "epoch": 0.8314687714481812, "grad_norm": 0.726155331462647, "learning_rate": 7.268487528350521e-07, "loss": 0.2214, "step": 24229 }, { "epoch": 0.831503088538092, "grad_norm": 0.82546488171323, "learning_rate": 7.265602189315363e-07, "loss": 0.2556, "step": 24230 }, { "epoch": 0.8315374056280027, "grad_norm": 0.7760918933452521, "learning_rate": 7.262717378213868e-07, "loss": 0.2405, "step": 24231 }, { "epoch": 0.8315717227179136, "grad_norm": 0.8656505543315957, "learning_rate": 7.25983309508167e-07, "loss": 0.2606, "step": 24232 }, { "epoch": 0.8316060398078243, "grad_norm": 0.8328822716863331, "learning_rate": 7.256949339954406e-07, "loss": 0.2068, "step": 24233 }, { "epoch": 0.8316403568977351, "grad_norm": 0.9014553059026761, "learning_rate": 7.254066112867702e-07, "loss": 0.2653, "step": 24234 }, { "epoch": 0.8316746739876458, "grad_norm": 0.7899846142617082, "learning_rate": 7.251183413857171e-07, "loss": 0.2843, "step": 24235 }, { "epoch": 0.8317089910775566, "grad_norm": 0.8550669667526684, "learning_rate": 7.248301242958428e-07, "loss": 0.2903, "step": 24236 }, { "epoch": 0.8317433081674674, "grad_norm": 0.7126584853750039, "learning_rate": 7.245419600207093e-07, "loss": 0.1969, "step": 24237 }, { "epoch": 0.8317776252573782, "grad_norm": 0.7895036416407267, "learning_rate": 7.242538485638728e-07, "loss": 0.2778, "step": 24238 }, { "epoch": 0.831811942347289, "grad_norm": 0.7629082439212646, "learning_rate": 7.239657899288971e-07, "loss": 0.2903, "step": 24239 }, { "epoch": 0.8318462594371997, "grad_norm": 0.8450861210460564, "learning_rate": 7.236777841193377e-07, "loss": 0.2408, "step": 24240 }, { "epoch": 0.8318805765271104, "grad_norm": 0.7256813261925357, "learning_rate": 7.233898311387538e-07, "loss": 0.2417, "step": 24241 }, { "epoch": 0.8319148936170213, "grad_norm": 0.7917842089329683, "learning_rate": 7.231019309907023e-07, "loss": 0.3061, "step": 24242 }, { "epoch": 0.8319492107069321, "grad_norm": 0.7407545606241766, "learning_rate": 7.2281408367874e-07, "loss": 0.2434, "step": 24243 }, { "epoch": 0.8319835277968428, "grad_norm": 0.7560088758242888, "learning_rate": 7.225262892064227e-07, "loss": 0.2784, "step": 24244 }, { "epoch": 0.8320178448867536, "grad_norm": 0.8178228221469515, "learning_rate": 7.222385475773064e-07, "loss": 0.2227, "step": 24245 }, { "epoch": 0.8320521619766644, "grad_norm": 0.7791935813104366, "learning_rate": 7.219508587949464e-07, "loss": 0.2653, "step": 24246 }, { "epoch": 0.8320864790665752, "grad_norm": 0.8448411480037596, "learning_rate": 7.21663222862894e-07, "loss": 0.2778, "step": 24247 }, { "epoch": 0.8321207961564859, "grad_norm": 0.7760588479045872, "learning_rate": 7.213756397847054e-07, "loss": 0.2352, "step": 24248 }, { "epoch": 0.8321551132463967, "grad_norm": 0.8359932425556816, "learning_rate": 7.210881095639333e-07, "loss": 0.2271, "step": 24249 }, { "epoch": 0.8321894303363074, "grad_norm": 0.8565893408226981, "learning_rate": 7.208006322041267e-07, "loss": 0.2737, "step": 24250 }, { "epoch": 0.8322237474262183, "grad_norm": 0.8382891096484041, "learning_rate": 7.205132077088412e-07, "loss": 0.2385, "step": 24251 }, { "epoch": 0.832258064516129, "grad_norm": 0.7642347062655109, "learning_rate": 7.20225836081625e-07, "loss": 0.2772, "step": 24252 }, { "epoch": 0.8322923816060398, "grad_norm": 0.670489610860854, "learning_rate": 7.199385173260287e-07, "loss": 0.2616, "step": 24253 }, { "epoch": 0.8323266986959506, "grad_norm": 0.7299693371854087, "learning_rate": 7.196512514456016e-07, "loss": 0.3228, "step": 24254 }, { "epoch": 0.8323610157858614, "grad_norm": 0.8417024899161974, "learning_rate": 7.193640384438933e-07, "loss": 0.2811, "step": 24255 }, { "epoch": 0.8323953328757722, "grad_norm": 0.7857439851569102, "learning_rate": 7.190768783244517e-07, "loss": 0.2327, "step": 24256 }, { "epoch": 0.8324296499656829, "grad_norm": 0.7976337544054987, "learning_rate": 7.187897710908237e-07, "loss": 0.2578, "step": 24257 }, { "epoch": 0.8324639670555937, "grad_norm": 0.7094852062056453, "learning_rate": 7.18502716746558e-07, "loss": 0.2567, "step": 24258 }, { "epoch": 0.8324982841455044, "grad_norm": 0.7327609405009194, "learning_rate": 7.182157152951974e-07, "loss": 0.2307, "step": 24259 }, { "epoch": 0.8325326012354153, "grad_norm": 0.750552702650605, "learning_rate": 7.179287667402918e-07, "loss": 0.2199, "step": 24260 }, { "epoch": 0.832566918325326, "grad_norm": 0.7524464114762076, "learning_rate": 7.176418710853822e-07, "loss": 0.2863, "step": 24261 }, { "epoch": 0.8326012354152368, "grad_norm": 0.7559556380831074, "learning_rate": 7.173550283340153e-07, "loss": 0.2522, "step": 24262 }, { "epoch": 0.8326355525051475, "grad_norm": 0.8764282331272404, "learning_rate": 7.170682384897338e-07, "loss": 0.2705, "step": 24263 }, { "epoch": 0.8326698695950583, "grad_norm": 0.9539089185790436, "learning_rate": 7.167815015560803e-07, "loss": 0.3192, "step": 24264 }, { "epoch": 0.8327041866849691, "grad_norm": 0.8053778813914642, "learning_rate": 7.164948175365977e-07, "loss": 0.321, "step": 24265 }, { "epoch": 0.8327385037748799, "grad_norm": 0.7947282114856895, "learning_rate": 7.16208186434828e-07, "loss": 0.2844, "step": 24266 }, { "epoch": 0.8327728208647907, "grad_norm": 0.7923463294733017, "learning_rate": 7.159216082543113e-07, "loss": 0.2117, "step": 24267 }, { "epoch": 0.8328071379547014, "grad_norm": 0.743857969283095, "learning_rate": 7.156350829985886e-07, "loss": 0.2855, "step": 24268 }, { "epoch": 0.8328414550446123, "grad_norm": 0.8489920995170998, "learning_rate": 7.153486106712004e-07, "loss": 0.2535, "step": 24269 }, { "epoch": 0.832875772134523, "grad_norm": 0.9374610897414015, "learning_rate": 7.150621912756828e-07, "loss": 0.2467, "step": 24270 }, { "epoch": 0.8329100892244338, "grad_norm": 0.7099114287597469, "learning_rate": 7.147758248155767e-07, "loss": 0.2583, "step": 24271 }, { "epoch": 0.8329444063143445, "grad_norm": 0.8255673227473407, "learning_rate": 7.144895112944206e-07, "loss": 0.3053, "step": 24272 }, { "epoch": 0.8329787234042553, "grad_norm": 0.7806762547171131, "learning_rate": 7.142032507157481e-07, "loss": 0.2747, "step": 24273 }, { "epoch": 0.8330130404941661, "grad_norm": 0.9284395321368158, "learning_rate": 7.139170430830994e-07, "loss": 0.2209, "step": 24274 }, { "epoch": 0.8330473575840769, "grad_norm": 0.6983426347143384, "learning_rate": 7.136308884000082e-07, "loss": 0.2283, "step": 24275 }, { "epoch": 0.8330816746739876, "grad_norm": 0.7927386598116536, "learning_rate": 7.133447866700094e-07, "loss": 0.2191, "step": 24276 }, { "epoch": 0.8331159917638984, "grad_norm": 0.8206009471921109, "learning_rate": 7.130587378966381e-07, "loss": 0.2948, "step": 24277 }, { "epoch": 0.8331503088538093, "grad_norm": 0.8747795060562528, "learning_rate": 7.12772742083428e-07, "loss": 0.2924, "step": 24278 }, { "epoch": 0.83318462594372, "grad_norm": 0.8636313028632491, "learning_rate": 7.124867992339119e-07, "loss": 0.3143, "step": 24279 }, { "epoch": 0.8332189430336308, "grad_norm": 0.9546159256755746, "learning_rate": 7.12200909351623e-07, "loss": 0.3204, "step": 24280 }, { "epoch": 0.8332532601235415, "grad_norm": 0.8524982970099894, "learning_rate": 7.119150724400936e-07, "loss": 0.2225, "step": 24281 }, { "epoch": 0.8332875772134523, "grad_norm": 0.7697999267725574, "learning_rate": 7.116292885028525e-07, "loss": 0.2575, "step": 24282 }, { "epoch": 0.8333218943033631, "grad_norm": 0.7704465272302237, "learning_rate": 7.113435575434336e-07, "loss": 0.2139, "step": 24283 }, { "epoch": 0.8333562113932739, "grad_norm": 0.7337291103273087, "learning_rate": 7.110578795653638e-07, "loss": 0.3144, "step": 24284 }, { "epoch": 0.8333905284831846, "grad_norm": 0.8835178137962257, "learning_rate": 7.107722545721735e-07, "loss": 0.2801, "step": 24285 }, { "epoch": 0.8334248455730954, "grad_norm": 0.8121689997437354, "learning_rate": 7.104866825673911e-07, "loss": 0.2663, "step": 24286 }, { "epoch": 0.8334591626630061, "grad_norm": 0.6945889854344843, "learning_rate": 7.102011635545447e-07, "loss": 0.2116, "step": 24287 }, { "epoch": 0.833493479752917, "grad_norm": 0.8271944230549984, "learning_rate": 7.099156975371619e-07, "loss": 0.29, "step": 24288 }, { "epoch": 0.8335277968428277, "grad_norm": 0.7347787686662296, "learning_rate": 7.096302845187686e-07, "loss": 0.3122, "step": 24289 }, { "epoch": 0.8335621139327385, "grad_norm": 0.7673262886715987, "learning_rate": 7.093449245028922e-07, "loss": 0.2884, "step": 24290 }, { "epoch": 0.8335964310226492, "grad_norm": 0.7870026677160699, "learning_rate": 7.090596174930548e-07, "loss": 0.3178, "step": 24291 }, { "epoch": 0.8336307481125601, "grad_norm": 0.8255154001426652, "learning_rate": 7.087743634927846e-07, "loss": 0.2648, "step": 24292 }, { "epoch": 0.8336650652024709, "grad_norm": 0.8546296543605879, "learning_rate": 7.084891625056034e-07, "loss": 0.2869, "step": 24293 }, { "epoch": 0.8336993822923816, "grad_norm": 1.0152816746451556, "learning_rate": 7.082040145350344e-07, "loss": 0.2476, "step": 24294 }, { "epoch": 0.8337336993822924, "grad_norm": 0.9519819143449194, "learning_rate": 7.079189195846031e-07, "loss": 0.2685, "step": 24295 }, { "epoch": 0.8337680164722031, "grad_norm": 0.8102894712387836, "learning_rate": 7.076338776578267e-07, "loss": 0.2869, "step": 24296 }, { "epoch": 0.833802333562114, "grad_norm": 0.8192745926852679, "learning_rate": 7.073488887582319e-07, "loss": 0.2661, "step": 24297 }, { "epoch": 0.8338366506520247, "grad_norm": 0.7542743225918425, "learning_rate": 7.070639528893364e-07, "loss": 0.2964, "step": 24298 }, { "epoch": 0.8338709677419355, "grad_norm": 0.8440307192492584, "learning_rate": 7.067790700546595e-07, "loss": 0.2277, "step": 24299 }, { "epoch": 0.8339052848318462, "grad_norm": 0.8603597659674611, "learning_rate": 7.064942402577224e-07, "loss": 0.2285, "step": 24300 }, { "epoch": 0.8339396019217571, "grad_norm": 0.828656508036704, "learning_rate": 7.062094635020433e-07, "loss": 0.2386, "step": 24301 }, { "epoch": 0.8339739190116678, "grad_norm": 0.7283427221138165, "learning_rate": 7.0592473979114e-07, "loss": 0.2379, "step": 24302 }, { "epoch": 0.8340082361015786, "grad_norm": 0.7704638916847721, "learning_rate": 7.056400691285303e-07, "loss": 0.2815, "step": 24303 }, { "epoch": 0.8340425531914893, "grad_norm": 0.7443303764992639, "learning_rate": 7.053554515177319e-07, "loss": 0.2108, "step": 24304 }, { "epoch": 0.8340768702814001, "grad_norm": 0.7978504407365696, "learning_rate": 7.050708869622575e-07, "loss": 0.2816, "step": 24305 }, { "epoch": 0.834111187371311, "grad_norm": 0.7307510480268401, "learning_rate": 7.047863754656276e-07, "loss": 0.2043, "step": 24306 }, { "epoch": 0.8341455044612217, "grad_norm": 0.8109099812029021, "learning_rate": 7.045019170313521e-07, "loss": 0.2317, "step": 24307 }, { "epoch": 0.8341798215511325, "grad_norm": 0.8891036292104, "learning_rate": 7.042175116629485e-07, "loss": 0.2164, "step": 24308 }, { "epoch": 0.8342141386410432, "grad_norm": 0.8784763857412933, "learning_rate": 7.039331593639287e-07, "loss": 0.2694, "step": 24309 }, { "epoch": 0.834248455730954, "grad_norm": 0.8114430070125219, "learning_rate": 7.036488601378055e-07, "loss": 0.2448, "step": 24310 }, { "epoch": 0.8342827728208648, "grad_norm": 0.6726758154178869, "learning_rate": 7.033646139880923e-07, "loss": 0.2779, "step": 24311 }, { "epoch": 0.8343170899107756, "grad_norm": 0.7608281641805209, "learning_rate": 7.030804209182996e-07, "loss": 0.2573, "step": 24312 }, { "epoch": 0.8343514070006863, "grad_norm": 1.317497708440425, "learning_rate": 7.027962809319394e-07, "loss": 0.2851, "step": 24313 }, { "epoch": 0.8343857240905971, "grad_norm": 0.6913642510631848, "learning_rate": 7.025121940325191e-07, "loss": 0.238, "step": 24314 }, { "epoch": 0.8344200411805079, "grad_norm": 0.7861645063424555, "learning_rate": 7.022281602235515e-07, "loss": 0.2751, "step": 24315 }, { "epoch": 0.8344543582704187, "grad_norm": 0.8394225132109042, "learning_rate": 7.019441795085457e-07, "loss": 0.2665, "step": 24316 }, { "epoch": 0.8344886753603294, "grad_norm": 0.6304845410298068, "learning_rate": 7.016602518910059e-07, "loss": 0.2294, "step": 24317 }, { "epoch": 0.8345229924502402, "grad_norm": 0.7982514658755329, "learning_rate": 7.013763773744442e-07, "loss": 0.2228, "step": 24318 }, { "epoch": 0.834557309540151, "grad_norm": 0.8128096220351256, "learning_rate": 7.010925559623649e-07, "loss": 0.2659, "step": 24319 }, { "epoch": 0.8345916266300618, "grad_norm": 0.7543891746498441, "learning_rate": 7.008087876582753e-07, "loss": 0.2467, "step": 24320 }, { "epoch": 0.8346259437199726, "grad_norm": 0.7662269712989632, "learning_rate": 7.005250724656804e-07, "loss": 0.2814, "step": 24321 }, { "epoch": 0.8346602608098833, "grad_norm": 0.8265007914123302, "learning_rate": 7.002414103880856e-07, "loss": 0.2558, "step": 24322 }, { "epoch": 0.8346945778997941, "grad_norm": 0.654518255094677, "learning_rate": 6.999578014289949e-07, "loss": 0.2191, "step": 24323 }, { "epoch": 0.8347288949897049, "grad_norm": 0.7857631341091389, "learning_rate": 6.996742455919126e-07, "loss": 0.3153, "step": 24324 }, { "epoch": 0.8347632120796157, "grad_norm": 0.7585688505375416, "learning_rate": 6.99390742880342e-07, "loss": 0.2451, "step": 24325 }, { "epoch": 0.8347975291695264, "grad_norm": 0.7656676519732493, "learning_rate": 6.991072932977832e-07, "loss": 0.2978, "step": 24326 }, { "epoch": 0.8348318462594372, "grad_norm": 0.8632761055253487, "learning_rate": 6.988238968477412e-07, "loss": 0.2493, "step": 24327 }, { "epoch": 0.8348661633493479, "grad_norm": 0.7420276498830851, "learning_rate": 6.985405535337131e-07, "loss": 0.2408, "step": 24328 }, { "epoch": 0.8349004804392588, "grad_norm": 0.8043162274903153, "learning_rate": 6.982572633592033e-07, "loss": 0.2716, "step": 24329 }, { "epoch": 0.8349347975291695, "grad_norm": 0.8868603229025438, "learning_rate": 6.97974026327709e-07, "loss": 0.2466, "step": 24330 }, { "epoch": 0.8349691146190803, "grad_norm": 0.7924525783051395, "learning_rate": 6.976908424427298e-07, "loss": 0.2899, "step": 24331 }, { "epoch": 0.835003431708991, "grad_norm": 0.7072471155898465, "learning_rate": 6.974077117077643e-07, "loss": 0.2358, "step": 24332 }, { "epoch": 0.8350377487989018, "grad_norm": 0.8571286611861612, "learning_rate": 6.971246341263105e-07, "loss": 0.2305, "step": 24333 }, { "epoch": 0.8350720658888127, "grad_norm": 0.8073524530058825, "learning_rate": 6.968416097018643e-07, "loss": 0.2482, "step": 24334 }, { "epoch": 0.8351063829787234, "grad_norm": 0.7878793709231408, "learning_rate": 6.965586384379236e-07, "loss": 0.2521, "step": 24335 }, { "epoch": 0.8351407000686342, "grad_norm": 0.7752221475958387, "learning_rate": 6.962757203379849e-07, "loss": 0.2321, "step": 24336 }, { "epoch": 0.8351750171585449, "grad_norm": 0.834783721273021, "learning_rate": 6.959928554055395e-07, "loss": 0.2203, "step": 24337 }, { "epoch": 0.8352093342484558, "grad_norm": 0.8053192536278893, "learning_rate": 6.957100436440855e-07, "loss": 0.2511, "step": 24338 }, { "epoch": 0.8352436513383665, "grad_norm": 0.7804317230028935, "learning_rate": 6.954272850571169e-07, "loss": 0.2544, "step": 24339 }, { "epoch": 0.8352779684282773, "grad_norm": 0.8639278863802169, "learning_rate": 6.951445796481232e-07, "loss": 0.2509, "step": 24340 }, { "epoch": 0.835312285518188, "grad_norm": 0.7734777973231146, "learning_rate": 6.948619274206015e-07, "loss": 0.2101, "step": 24341 }, { "epoch": 0.8353466026080988, "grad_norm": 0.913129195925847, "learning_rate": 6.9457932837804e-07, "loss": 0.2994, "step": 24342 }, { "epoch": 0.8353809196980096, "grad_norm": 0.8132437773776866, "learning_rate": 6.942967825239316e-07, "loss": 0.2681, "step": 24343 }, { "epoch": 0.8354152367879204, "grad_norm": 0.7727340935469355, "learning_rate": 6.940142898617663e-07, "loss": 0.2559, "step": 24344 }, { "epoch": 0.8354495538778312, "grad_norm": 0.8280034270019628, "learning_rate": 6.937318503950341e-07, "loss": 0.2745, "step": 24345 }, { "epoch": 0.8354838709677419, "grad_norm": 0.7966055905335584, "learning_rate": 6.934494641272243e-07, "loss": 0.232, "step": 24346 }, { "epoch": 0.8355181880576528, "grad_norm": 0.7412154315820909, "learning_rate": 6.93167131061826e-07, "loss": 0.2735, "step": 24347 }, { "epoch": 0.8355525051475635, "grad_norm": 0.8037689626383278, "learning_rate": 6.928848512023267e-07, "loss": 0.2682, "step": 24348 }, { "epoch": 0.8355868222374743, "grad_norm": 0.8390330975813358, "learning_rate": 6.926026245522122e-07, "loss": 0.2787, "step": 24349 }, { "epoch": 0.835621139327385, "grad_norm": 0.6759385753773176, "learning_rate": 6.923204511149717e-07, "loss": 0.2109, "step": 24350 }, { "epoch": 0.8356554564172958, "grad_norm": 0.7509462798137172, "learning_rate": 6.920383308940893e-07, "loss": 0.2604, "step": 24351 }, { "epoch": 0.8356897735072066, "grad_norm": 0.826172820882096, "learning_rate": 6.917562638930503e-07, "loss": 0.2442, "step": 24352 }, { "epoch": 0.8357240905971174, "grad_norm": 0.7848037816156996, "learning_rate": 6.914742501153399e-07, "loss": 0.2485, "step": 24353 }, { "epoch": 0.8357584076870281, "grad_norm": 0.7450601068813113, "learning_rate": 6.911922895644418e-07, "loss": 0.2193, "step": 24354 }, { "epoch": 0.8357927247769389, "grad_norm": 0.7586216688444062, "learning_rate": 6.909103822438396e-07, "loss": 0.2716, "step": 24355 }, { "epoch": 0.8358270418668496, "grad_norm": 0.7186740805530283, "learning_rate": 6.906285281570152e-07, "loss": 0.2781, "step": 24356 }, { "epoch": 0.8358613589567605, "grad_norm": 0.79926345918547, "learning_rate": 6.903467273074515e-07, "loss": 0.3092, "step": 24357 }, { "epoch": 0.8358956760466713, "grad_norm": 0.7102536695566016, "learning_rate": 6.900649796986292e-07, "loss": 0.2268, "step": 24358 }, { "epoch": 0.835929993136582, "grad_norm": 0.8893931987282157, "learning_rate": 6.897832853340291e-07, "loss": 0.253, "step": 24359 }, { "epoch": 0.8359643102264928, "grad_norm": 0.8490658885948449, "learning_rate": 6.895016442171321e-07, "loss": 0.3108, "step": 24360 }, { "epoch": 0.8359986273164036, "grad_norm": 0.7206387216980186, "learning_rate": 6.892200563514157e-07, "loss": 0.2374, "step": 24361 }, { "epoch": 0.8360329444063144, "grad_norm": 0.8778847538414383, "learning_rate": 6.889385217403616e-07, "loss": 0.2228, "step": 24362 }, { "epoch": 0.8360672614962251, "grad_norm": 0.7442552519540672, "learning_rate": 6.886570403874432e-07, "loss": 0.2658, "step": 24363 }, { "epoch": 0.8361015785861359, "grad_norm": 0.8608294407292773, "learning_rate": 6.883756122961427e-07, "loss": 0.2415, "step": 24364 }, { "epoch": 0.8361358956760466, "grad_norm": 0.777648226593892, "learning_rate": 6.880942374699334e-07, "loss": 0.2689, "step": 24365 }, { "epoch": 0.8361702127659575, "grad_norm": 0.8920617998382034, "learning_rate": 6.878129159122925e-07, "loss": 0.2248, "step": 24366 }, { "epoch": 0.8362045298558682, "grad_norm": 0.7484119154985662, "learning_rate": 6.875316476266958e-07, "loss": 0.2828, "step": 24367 }, { "epoch": 0.836238846945779, "grad_norm": 0.9724391833656088, "learning_rate": 6.872504326166179e-07, "loss": 0.2494, "step": 24368 }, { "epoch": 0.8362731640356897, "grad_norm": 0.734940536553175, "learning_rate": 6.869692708855325e-07, "loss": 0.2926, "step": 24369 }, { "epoch": 0.8363074811256005, "grad_norm": 0.7337597085495184, "learning_rate": 6.866881624369131e-07, "loss": 0.2182, "step": 24370 }, { "epoch": 0.8363417982155114, "grad_norm": 0.7050991023196354, "learning_rate": 6.864071072742345e-07, "loss": 0.303, "step": 24371 }, { "epoch": 0.8363761153054221, "grad_norm": 0.7564145625769337, "learning_rate": 6.861261054009638e-07, "loss": 0.2725, "step": 24372 }, { "epoch": 0.8364104323953329, "grad_norm": 0.8411572699669314, "learning_rate": 6.858451568205782e-07, "loss": 0.2599, "step": 24373 }, { "epoch": 0.8364447494852436, "grad_norm": 0.7508967353482715, "learning_rate": 6.855642615365449e-07, "loss": 0.3054, "step": 24374 }, { "epoch": 0.8364790665751545, "grad_norm": 0.8587031898269272, "learning_rate": 6.852834195523344e-07, "loss": 0.2926, "step": 24375 }, { "epoch": 0.8365133836650652, "grad_norm": 0.877941712955645, "learning_rate": 6.850026308714175e-07, "loss": 0.2564, "step": 24376 }, { "epoch": 0.836547700754976, "grad_norm": 0.8122152674918043, "learning_rate": 6.847218954972618e-07, "loss": 0.2854, "step": 24377 }, { "epoch": 0.8365820178448867, "grad_norm": 0.7736126795234639, "learning_rate": 6.844412134333361e-07, "loss": 0.2643, "step": 24378 }, { "epoch": 0.8366163349347975, "grad_norm": 0.8887203841155628, "learning_rate": 6.841605846831078e-07, "loss": 0.2522, "step": 24379 }, { "epoch": 0.8366506520247083, "grad_norm": 0.7740164304031818, "learning_rate": 6.838800092500452e-07, "loss": 0.2541, "step": 24380 }, { "epoch": 0.8366849691146191, "grad_norm": 0.7127291714092442, "learning_rate": 6.835994871376101e-07, "loss": 0.3079, "step": 24381 }, { "epoch": 0.8367192862045298, "grad_norm": 0.8684058888648698, "learning_rate": 6.833190183492722e-07, "loss": 0.2518, "step": 24382 }, { "epoch": 0.8367536032944406, "grad_norm": 0.7478527928268659, "learning_rate": 6.830386028884967e-07, "loss": 0.2357, "step": 24383 }, { "epoch": 0.8367879203843515, "grad_norm": 0.8456686210055363, "learning_rate": 6.82758240758743e-07, "loss": 0.215, "step": 24384 }, { "epoch": 0.8368222374742622, "grad_norm": 0.7746892637327586, "learning_rate": 6.824779319634805e-07, "loss": 0.2153, "step": 24385 }, { "epoch": 0.836856554564173, "grad_norm": 0.9040277535161881, "learning_rate": 6.821976765061688e-07, "loss": 0.2292, "step": 24386 }, { "epoch": 0.8368908716540837, "grad_norm": 0.8377885267619126, "learning_rate": 6.819174743902702e-07, "loss": 0.3204, "step": 24387 }, { "epoch": 0.8369251887439945, "grad_norm": 0.725395123209985, "learning_rate": 6.816373256192466e-07, "loss": 0.2208, "step": 24388 }, { "epoch": 0.8369595058339053, "grad_norm": 0.8033867085986744, "learning_rate": 6.813572301965593e-07, "loss": 0.2399, "step": 24389 }, { "epoch": 0.8369938229238161, "grad_norm": 0.7979818718290155, "learning_rate": 6.81077188125669e-07, "loss": 0.2367, "step": 24390 }, { "epoch": 0.8370281400137268, "grad_norm": 0.7635692429913952, "learning_rate": 6.807971994100337e-07, "loss": 0.2343, "step": 24391 }, { "epoch": 0.8370624571036376, "grad_norm": 0.7950957533902643, "learning_rate": 6.805172640531138e-07, "loss": 0.24, "step": 24392 }, { "epoch": 0.8370967741935483, "grad_norm": 0.8105418860632331, "learning_rate": 6.802373820583669e-07, "loss": 0.2759, "step": 24393 }, { "epoch": 0.8371310912834592, "grad_norm": 0.9088801842571954, "learning_rate": 6.799575534292524e-07, "loss": 0.3015, "step": 24394 }, { "epoch": 0.83716540837337, "grad_norm": 0.9021648416219888, "learning_rate": 6.796777781692227e-07, "loss": 0.2697, "step": 24395 }, { "epoch": 0.8371997254632807, "grad_norm": 0.8161052808763125, "learning_rate": 6.793980562817398e-07, "loss": 0.3045, "step": 24396 }, { "epoch": 0.8372340425531914, "grad_norm": 0.8153493552551697, "learning_rate": 6.791183877702551e-07, "loss": 0.289, "step": 24397 }, { "epoch": 0.8372683596431023, "grad_norm": 0.9012623883320128, "learning_rate": 6.788387726382245e-07, "loss": 0.3335, "step": 24398 }, { "epoch": 0.8373026767330131, "grad_norm": 0.7714357299784279, "learning_rate": 6.785592108891049e-07, "loss": 0.2848, "step": 24399 }, { "epoch": 0.8373369938229238, "grad_norm": 0.7243146660441125, "learning_rate": 6.782797025263466e-07, "loss": 0.2813, "step": 24400 }, { "epoch": 0.8373713109128346, "grad_norm": 0.8374173700104319, "learning_rate": 6.780002475534036e-07, "loss": 0.2542, "step": 24401 }, { "epoch": 0.8374056280027453, "grad_norm": 0.6340029731881112, "learning_rate": 6.777208459737289e-07, "loss": 0.2539, "step": 24402 }, { "epoch": 0.8374399450926562, "grad_norm": 0.7976236866714955, "learning_rate": 6.774414977907751e-07, "loss": 0.2805, "step": 24403 }, { "epoch": 0.8374742621825669, "grad_norm": 0.9283827580020801, "learning_rate": 6.771622030079899e-07, "loss": 0.2894, "step": 24404 }, { "epoch": 0.8375085792724777, "grad_norm": 0.7715587307885122, "learning_rate": 6.768829616288264e-07, "loss": 0.2618, "step": 24405 }, { "epoch": 0.8375428963623884, "grad_norm": 0.7941029205367776, "learning_rate": 6.766037736567355e-07, "loss": 0.2731, "step": 24406 }, { "epoch": 0.8375772134522993, "grad_norm": 0.8845840666645299, "learning_rate": 6.763246390951611e-07, "loss": 0.276, "step": 24407 }, { "epoch": 0.83761153054221, "grad_norm": 0.788247102831641, "learning_rate": 6.760455579475578e-07, "loss": 0.2564, "step": 24408 }, { "epoch": 0.8376458476321208, "grad_norm": 0.8158453412124695, "learning_rate": 6.757665302173694e-07, "loss": 0.2961, "step": 24409 }, { "epoch": 0.8376801647220316, "grad_norm": 0.7973396463560402, "learning_rate": 6.754875559080437e-07, "loss": 0.331, "step": 24410 }, { "epoch": 0.8377144818119423, "grad_norm": 0.7486617819489576, "learning_rate": 6.752086350230274e-07, "loss": 0.2509, "step": 24411 }, { "epoch": 0.8377487989018532, "grad_norm": 0.8025518133407104, "learning_rate": 6.749297675657662e-07, "loss": 0.2822, "step": 24412 }, { "epoch": 0.8377831159917639, "grad_norm": 0.8049951603387232, "learning_rate": 6.74650953539705e-07, "loss": 0.2849, "step": 24413 }, { "epoch": 0.8378174330816747, "grad_norm": 0.8579628522766424, "learning_rate": 6.743721929482883e-07, "loss": 0.2744, "step": 24414 }, { "epoch": 0.8378517501715854, "grad_norm": 0.8082577678842383, "learning_rate": 6.740934857949616e-07, "loss": 0.2525, "step": 24415 }, { "epoch": 0.8378860672614962, "grad_norm": 0.8957479681873928, "learning_rate": 6.738148320831639e-07, "loss": 0.2836, "step": 24416 }, { "epoch": 0.837920384351407, "grad_norm": 0.8261547567075297, "learning_rate": 6.735362318163419e-07, "loss": 0.2662, "step": 24417 }, { "epoch": 0.8379547014413178, "grad_norm": 0.7977170958015564, "learning_rate": 6.732576849979349e-07, "loss": 0.2882, "step": 24418 }, { "epoch": 0.8379890185312285, "grad_norm": 0.7462385372006388, "learning_rate": 6.729791916313849e-07, "loss": 0.205, "step": 24419 }, { "epoch": 0.8380233356211393, "grad_norm": 0.8135452595001744, "learning_rate": 6.727007517201317e-07, "loss": 0.2625, "step": 24420 }, { "epoch": 0.8380576527110501, "grad_norm": 0.8779476815869185, "learning_rate": 6.724223652676148e-07, "loss": 0.297, "step": 24421 }, { "epoch": 0.8380919698009609, "grad_norm": 0.7418685994944423, "learning_rate": 6.72144032277276e-07, "loss": 0.2573, "step": 24422 }, { "epoch": 0.8381262868908717, "grad_norm": 0.7102975819055323, "learning_rate": 6.718657527525512e-07, "loss": 0.2451, "step": 24423 }, { "epoch": 0.8381606039807824, "grad_norm": 2.8606543437936494, "learning_rate": 6.715875266968786e-07, "loss": 0.2136, "step": 24424 }, { "epoch": 0.8381949210706932, "grad_norm": 0.7940405339932273, "learning_rate": 6.713093541136962e-07, "loss": 0.318, "step": 24425 }, { "epoch": 0.838229238160604, "grad_norm": 0.8528859681146691, "learning_rate": 6.710312350064391e-07, "loss": 0.3023, "step": 24426 }, { "epoch": 0.8382635552505148, "grad_norm": 0.7302496893854935, "learning_rate": 6.707531693785446e-07, "loss": 0.2199, "step": 24427 }, { "epoch": 0.8382978723404255, "grad_norm": 0.7369746709310282, "learning_rate": 6.704751572334472e-07, "loss": 0.2308, "step": 24428 }, { "epoch": 0.8383321894303363, "grad_norm": 0.779507754602977, "learning_rate": 6.701971985745825e-07, "loss": 0.2604, "step": 24429 }, { "epoch": 0.8383665065202471, "grad_norm": 0.7389738359165086, "learning_rate": 6.699192934053811e-07, "loss": 0.2495, "step": 24430 }, { "epoch": 0.8384008236101579, "grad_norm": 0.7391618764092875, "learning_rate": 6.696414417292806e-07, "loss": 0.2707, "step": 24431 }, { "epoch": 0.8384351407000686, "grad_norm": 0.9033461683500452, "learning_rate": 6.693636435497109e-07, "loss": 0.3183, "step": 24432 }, { "epoch": 0.8384694577899794, "grad_norm": 0.8422208200389601, "learning_rate": 6.690858988701038e-07, "loss": 0.2696, "step": 24433 }, { "epoch": 0.8385037748798901, "grad_norm": 0.8098582798974266, "learning_rate": 6.688082076938906e-07, "loss": 0.2579, "step": 24434 }, { "epoch": 0.838538091969801, "grad_norm": 0.7218337636467581, "learning_rate": 6.685305700245031e-07, "loss": 0.2774, "step": 24435 }, { "epoch": 0.8385724090597118, "grad_norm": 0.7490418798919428, "learning_rate": 6.682529858653702e-07, "loss": 0.2936, "step": 24436 }, { "epoch": 0.8386067261496225, "grad_norm": 0.7346509038868054, "learning_rate": 6.679754552199213e-07, "loss": 0.2402, "step": 24437 }, { "epoch": 0.8386410432395333, "grad_norm": 0.8651817351680487, "learning_rate": 6.67697978091586e-07, "loss": 0.3036, "step": 24438 }, { "epoch": 0.838675360329444, "grad_norm": 0.7398605972796433, "learning_rate": 6.674205544837892e-07, "loss": 0.2581, "step": 24439 }, { "epoch": 0.8387096774193549, "grad_norm": 0.8074161052757999, "learning_rate": 6.671431843999621e-07, "loss": 0.2797, "step": 24440 }, { "epoch": 0.8387439945092656, "grad_norm": 0.8653371184868625, "learning_rate": 6.668658678435285e-07, "loss": 0.2571, "step": 24441 }, { "epoch": 0.8387783115991764, "grad_norm": 0.9209510367289283, "learning_rate": 6.665886048179149e-07, "loss": 0.2383, "step": 24442 }, { "epoch": 0.8388126286890871, "grad_norm": 0.7672227992720732, "learning_rate": 6.663113953265471e-07, "loss": 0.2242, "step": 24443 }, { "epoch": 0.838846945778998, "grad_norm": 0.6960609607224827, "learning_rate": 6.660342393728491e-07, "loss": 0.273, "step": 24444 }, { "epoch": 0.8388812628689087, "grad_norm": 0.9650202194685291, "learning_rate": 6.657571369602456e-07, "loss": 0.2665, "step": 24445 }, { "epoch": 0.8389155799588195, "grad_norm": 0.7849848483079617, "learning_rate": 6.654800880921591e-07, "loss": 0.2551, "step": 24446 }, { "epoch": 0.8389498970487302, "grad_norm": 1.0418335008172437, "learning_rate": 6.652030927720138e-07, "loss": 0.2816, "step": 24447 }, { "epoch": 0.838984214138641, "grad_norm": 0.7620888347813887, "learning_rate": 6.649261510032279e-07, "loss": 0.2624, "step": 24448 }, { "epoch": 0.8390185312285519, "grad_norm": 0.7299135229152851, "learning_rate": 6.646492627892265e-07, "loss": 0.2802, "step": 24449 }, { "epoch": 0.8390528483184626, "grad_norm": 1.964648743855105, "learning_rate": 6.643724281334302e-07, "loss": 0.2504, "step": 24450 }, { "epoch": 0.8390871654083734, "grad_norm": 0.7646969635375873, "learning_rate": 6.640956470392551e-07, "loss": 0.2948, "step": 24451 }, { "epoch": 0.8391214824982841, "grad_norm": 0.7091794152861608, "learning_rate": 6.638189195101258e-07, "loss": 0.2842, "step": 24452 }, { "epoch": 0.839155799588195, "grad_norm": 0.7369201628363328, "learning_rate": 6.635422455494556e-07, "loss": 0.2536, "step": 24453 }, { "epoch": 0.8391901166781057, "grad_norm": 0.7514336138203422, "learning_rate": 6.632656251606673e-07, "loss": 0.2617, "step": 24454 }, { "epoch": 0.8392244337680165, "grad_norm": 0.726545706822786, "learning_rate": 6.629890583471755e-07, "loss": 0.2012, "step": 24455 }, { "epoch": 0.8392587508579272, "grad_norm": 0.7016362061222712, "learning_rate": 6.627125451123973e-07, "loss": 0.2759, "step": 24456 }, { "epoch": 0.839293067947838, "grad_norm": 0.7488338464505463, "learning_rate": 6.624360854597483e-07, "loss": 0.2285, "step": 24457 }, { "epoch": 0.8393273850377488, "grad_norm": 0.7308528774712614, "learning_rate": 6.621596793926449e-07, "loss": 0.2888, "step": 24458 }, { "epoch": 0.8393617021276596, "grad_norm": 0.8485458678780475, "learning_rate": 6.618833269145009e-07, "loss": 0.2495, "step": 24459 }, { "epoch": 0.8393960192175703, "grad_norm": 0.7270624224384239, "learning_rate": 6.616070280287306e-07, "loss": 0.2399, "step": 24460 }, { "epoch": 0.8394303363074811, "grad_norm": 0.9687192383297483, "learning_rate": 6.613307827387489e-07, "loss": 0.3001, "step": 24461 }, { "epoch": 0.8394646533973918, "grad_norm": 0.8402271274521192, "learning_rate": 6.610545910479643e-07, "loss": 0.3075, "step": 24462 }, { "epoch": 0.8394989704873027, "grad_norm": 0.7711682100092372, "learning_rate": 6.607784529597944e-07, "loss": 0.2297, "step": 24463 }, { "epoch": 0.8395332875772135, "grad_norm": 0.76951669237626, "learning_rate": 6.605023684776457e-07, "loss": 0.2553, "step": 24464 }, { "epoch": 0.8395676046671242, "grad_norm": 0.6704891984827541, "learning_rate": 6.602263376049306e-07, "loss": 0.2294, "step": 24465 }, { "epoch": 0.839601921757035, "grad_norm": 0.783493878446002, "learning_rate": 6.599503603450607e-07, "loss": 0.2522, "step": 24466 }, { "epoch": 0.8396362388469458, "grad_norm": 0.7710731325688642, "learning_rate": 6.59674436701444e-07, "loss": 0.2621, "step": 24467 }, { "epoch": 0.8396705559368566, "grad_norm": 0.7455527099318899, "learning_rate": 6.593985666774888e-07, "loss": 0.3001, "step": 24468 }, { "epoch": 0.8397048730267673, "grad_norm": 0.8725939269789539, "learning_rate": 6.591227502766035e-07, "loss": 0.2796, "step": 24469 }, { "epoch": 0.8397391901166781, "grad_norm": 0.820077450736382, "learning_rate": 6.588469875021958e-07, "loss": 0.2735, "step": 24470 }, { "epoch": 0.8397735072065888, "grad_norm": 0.7255197184548254, "learning_rate": 6.585712783576725e-07, "loss": 0.26, "step": 24471 }, { "epoch": 0.8398078242964997, "grad_norm": 0.7110048680642163, "learning_rate": 6.582956228464388e-07, "loss": 0.2508, "step": 24472 }, { "epoch": 0.8398421413864104, "grad_norm": 0.7266856299951266, "learning_rate": 6.580200209719023e-07, "loss": 0.2448, "step": 24473 }, { "epoch": 0.8398764584763212, "grad_norm": 0.7551270595406095, "learning_rate": 6.577444727374643e-07, "loss": 0.2216, "step": 24474 }, { "epoch": 0.839910775566232, "grad_norm": 0.8226535302688942, "learning_rate": 6.574689781465326e-07, "loss": 0.2794, "step": 24475 }, { "epoch": 0.8399450926561428, "grad_norm": 0.8577098202973952, "learning_rate": 6.571935372025074e-07, "loss": 0.3472, "step": 24476 }, { "epoch": 0.8399794097460536, "grad_norm": 0.6823705003186825, "learning_rate": 6.569181499087934e-07, "loss": 0.2325, "step": 24477 }, { "epoch": 0.8400137268359643, "grad_norm": 0.7673424139346309, "learning_rate": 6.566428162687916e-07, "loss": 0.2323, "step": 24478 }, { "epoch": 0.8400480439258751, "grad_norm": 0.8982001736456254, "learning_rate": 6.563675362859046e-07, "loss": 0.2908, "step": 24479 }, { "epoch": 0.8400823610157858, "grad_norm": 0.7459016806526042, "learning_rate": 6.560923099635319e-07, "loss": 0.252, "step": 24480 }, { "epoch": 0.8401166781056967, "grad_norm": 0.7299775556405463, "learning_rate": 6.558171373050742e-07, "loss": 0.238, "step": 24481 }, { "epoch": 0.8401509951956074, "grad_norm": 0.7725986996799904, "learning_rate": 6.555420183139322e-07, "loss": 0.2474, "step": 24482 }, { "epoch": 0.8401853122855182, "grad_norm": 0.8067069099194891, "learning_rate": 6.552669529935013e-07, "loss": 0.2537, "step": 24483 }, { "epoch": 0.8402196293754289, "grad_norm": 0.729888836242187, "learning_rate": 6.549919413471839e-07, "loss": 0.2664, "step": 24484 }, { "epoch": 0.8402539464653397, "grad_norm": 0.7075782859722843, "learning_rate": 6.547169833783728e-07, "loss": 0.2349, "step": 24485 }, { "epoch": 0.8402882635552505, "grad_norm": 0.7972718868660575, "learning_rate": 6.544420790904699e-07, "loss": 0.2571, "step": 24486 }, { "epoch": 0.8403225806451613, "grad_norm": 0.7928442934217154, "learning_rate": 6.541672284868678e-07, "loss": 0.2369, "step": 24487 }, { "epoch": 0.840356897735072, "grad_norm": 0.7449594694932706, "learning_rate": 6.53892431570961e-07, "loss": 0.2371, "step": 24488 }, { "epoch": 0.8403912148249828, "grad_norm": 0.6684925868013688, "learning_rate": 6.536176883461487e-07, "loss": 0.2711, "step": 24489 }, { "epoch": 0.8404255319148937, "grad_norm": 0.7863640219275754, "learning_rate": 6.533429988158213e-07, "loss": 0.2504, "step": 24490 }, { "epoch": 0.8404598490048044, "grad_norm": 0.6990848244883232, "learning_rate": 6.530683629833734e-07, "loss": 0.2595, "step": 24491 }, { "epoch": 0.8404941660947152, "grad_norm": 0.7008367336331872, "learning_rate": 6.527937808521984e-07, "loss": 0.2517, "step": 24492 }, { "epoch": 0.8405284831846259, "grad_norm": 0.875679259349255, "learning_rate": 6.525192524256874e-07, "loss": 0.258, "step": 24493 }, { "epoch": 0.8405628002745367, "grad_norm": 0.8766103607018343, "learning_rate": 6.522447777072322e-07, "loss": 0.264, "step": 24494 }, { "epoch": 0.8405971173644475, "grad_norm": 0.7998435111317416, "learning_rate": 6.519703567002239e-07, "loss": 0.29, "step": 24495 }, { "epoch": 0.8406314344543583, "grad_norm": 0.669807796848002, "learning_rate": 6.516959894080538e-07, "loss": 0.2579, "step": 24496 }, { "epoch": 0.840665751544269, "grad_norm": 0.6657416765495716, "learning_rate": 6.514216758341079e-07, "loss": 0.1807, "step": 24497 }, { "epoch": 0.8407000686341798, "grad_norm": 0.797933191228283, "learning_rate": 6.511474159817793e-07, "loss": 0.2802, "step": 24498 }, { "epoch": 0.8407343857240906, "grad_norm": 0.8932651590297411, "learning_rate": 6.50873209854453e-07, "loss": 0.2682, "step": 24499 }, { "epoch": 0.8407687028140014, "grad_norm": 0.8140841476050099, "learning_rate": 6.505990574555176e-07, "loss": 0.2745, "step": 24500 }, { "epoch": 0.8408030199039122, "grad_norm": 0.8218432268172269, "learning_rate": 6.503249587883598e-07, "loss": 0.2764, "step": 24501 }, { "epoch": 0.8408373369938229, "grad_norm": 0.8722503772517711, "learning_rate": 6.500509138563666e-07, "loss": 0.2441, "step": 24502 }, { "epoch": 0.8408716540837337, "grad_norm": 0.762975167055405, "learning_rate": 6.497769226629219e-07, "loss": 0.2586, "step": 24503 }, { "epoch": 0.8409059711736445, "grad_norm": 0.7776129586016519, "learning_rate": 6.495029852114121e-07, "loss": 0.2401, "step": 24504 }, { "epoch": 0.8409402882635553, "grad_norm": 0.8132756750874799, "learning_rate": 6.492291015052216e-07, "loss": 0.31, "step": 24505 }, { "epoch": 0.840974605353466, "grad_norm": 0.7419462061619241, "learning_rate": 6.48955271547731e-07, "loss": 0.2409, "step": 24506 }, { "epoch": 0.8410089224433768, "grad_norm": 0.8702744210275326, "learning_rate": 6.48681495342327e-07, "loss": 0.224, "step": 24507 }, { "epoch": 0.8410432395332875, "grad_norm": 0.7227347089977725, "learning_rate": 6.484077728923893e-07, "loss": 0.2221, "step": 24508 }, { "epoch": 0.8410775566231984, "grad_norm": 0.7918173513410209, "learning_rate": 6.48134104201299e-07, "loss": 0.2647, "step": 24509 }, { "epoch": 0.8411118737131091, "grad_norm": 0.7973361695526741, "learning_rate": 6.478604892724399e-07, "loss": 0.3392, "step": 24510 }, { "epoch": 0.8411461908030199, "grad_norm": 0.7723628872237749, "learning_rate": 6.475869281091895e-07, "loss": 0.2444, "step": 24511 }, { "epoch": 0.8411805078929306, "grad_norm": 0.739447678703511, "learning_rate": 6.473134207149279e-07, "loss": 0.278, "step": 24512 }, { "epoch": 0.8412148249828415, "grad_norm": 0.850664904640749, "learning_rate": 6.470399670930344e-07, "loss": 0.2327, "step": 24513 }, { "epoch": 0.8412491420727523, "grad_norm": 0.9066314783854181, "learning_rate": 6.46766567246887e-07, "loss": 0.2351, "step": 24514 }, { "epoch": 0.841283459162663, "grad_norm": 0.7902118086469718, "learning_rate": 6.464932211798636e-07, "loss": 0.3131, "step": 24515 }, { "epoch": 0.8413177762525738, "grad_norm": 0.7316936334651221, "learning_rate": 6.462199288953407e-07, "loss": 0.2045, "step": 24516 }, { "epoch": 0.8413520933424845, "grad_norm": 0.7802904405300101, "learning_rate": 6.459466903966943e-07, "loss": 0.2938, "step": 24517 }, { "epoch": 0.8413864104323954, "grad_norm": 0.7890783818843521, "learning_rate": 6.456735056873004e-07, "loss": 0.2529, "step": 24518 }, { "epoch": 0.8414207275223061, "grad_norm": 0.7850992770424248, "learning_rate": 6.454003747705345e-07, "loss": 0.2568, "step": 24519 }, { "epoch": 0.8414550446122169, "grad_norm": 0.833599607935251, "learning_rate": 6.45127297649768e-07, "loss": 0.2507, "step": 24520 }, { "epoch": 0.8414893617021276, "grad_norm": 0.7784921328742163, "learning_rate": 6.448542743283787e-07, "loss": 0.2609, "step": 24521 }, { "epoch": 0.8415236787920385, "grad_norm": 0.8769565642232253, "learning_rate": 6.445813048097361e-07, "loss": 0.3726, "step": 24522 }, { "epoch": 0.8415579958819492, "grad_norm": 0.7335662572930696, "learning_rate": 6.443083890972135e-07, "loss": 0.2752, "step": 24523 }, { "epoch": 0.84159231297186, "grad_norm": 0.7299773061434105, "learning_rate": 6.440355271941828e-07, "loss": 0.2574, "step": 24524 }, { "epoch": 0.8416266300617707, "grad_norm": 0.8060109868930719, "learning_rate": 6.437627191040141e-07, "loss": 0.2734, "step": 24525 }, { "epoch": 0.8416609471516815, "grad_norm": 0.8835567835658367, "learning_rate": 6.434899648300785e-07, "loss": 0.2636, "step": 24526 }, { "epoch": 0.8416952642415924, "grad_norm": 0.8872912523969285, "learning_rate": 6.432172643757451e-07, "loss": 0.2986, "step": 24527 }, { "epoch": 0.8417295813315031, "grad_norm": 0.7903079704283823, "learning_rate": 6.429446177443838e-07, "loss": 0.2423, "step": 24528 }, { "epoch": 0.8417638984214139, "grad_norm": 0.9180631498243258, "learning_rate": 6.426720249393603e-07, "loss": 0.3203, "step": 24529 }, { "epoch": 0.8417982155113246, "grad_norm": 0.8839368541427808, "learning_rate": 6.423994859640454e-07, "loss": 0.2581, "step": 24530 }, { "epoch": 0.8418325326012354, "grad_norm": 0.8225264492916942, "learning_rate": 6.421270008218034e-07, "loss": 0.2842, "step": 24531 }, { "epoch": 0.8418668496911462, "grad_norm": 0.7583559480887275, "learning_rate": 6.418545695160006e-07, "loss": 0.2523, "step": 24532 }, { "epoch": 0.841901166781057, "grad_norm": 0.7818916211568882, "learning_rate": 6.415821920500053e-07, "loss": 0.3448, "step": 24533 }, { "epoch": 0.8419354838709677, "grad_norm": 0.7170807208368601, "learning_rate": 6.413098684271796e-07, "loss": 0.2181, "step": 24534 }, { "epoch": 0.8419698009608785, "grad_norm": 0.7178272619855065, "learning_rate": 6.410375986508888e-07, "loss": 0.3014, "step": 24535 }, { "epoch": 0.8420041180507893, "grad_norm": 0.7509864897992232, "learning_rate": 6.40765382724497e-07, "loss": 0.2737, "step": 24536 }, { "epoch": 0.8420384351407001, "grad_norm": 0.7265429433917298, "learning_rate": 6.404932206513659e-07, "loss": 0.2302, "step": 24537 }, { "epoch": 0.8420727522306108, "grad_norm": 0.7900425174938944, "learning_rate": 6.402211124348584e-07, "loss": 0.2551, "step": 24538 }, { "epoch": 0.8421070693205216, "grad_norm": 0.7695491177106051, "learning_rate": 6.399490580783357e-07, "loss": 0.2586, "step": 24539 }, { "epoch": 0.8421413864104323, "grad_norm": 0.8086079950313672, "learning_rate": 6.396770575851607e-07, "loss": 0.2375, "step": 24540 }, { "epoch": 0.8421757035003432, "grad_norm": 0.7934026064423827, "learning_rate": 6.394051109586897e-07, "loss": 0.3089, "step": 24541 }, { "epoch": 0.842210020590254, "grad_norm": 0.7790641130027335, "learning_rate": 6.391332182022869e-07, "loss": 0.2461, "step": 24542 }, { "epoch": 0.8422443376801647, "grad_norm": 0.6875152172607721, "learning_rate": 6.388613793193077e-07, "loss": 0.227, "step": 24543 }, { "epoch": 0.8422786547700755, "grad_norm": 0.9628218232642884, "learning_rate": 6.385895943131115e-07, "loss": 0.2475, "step": 24544 }, { "epoch": 0.8423129718599863, "grad_norm": 0.7770866844518461, "learning_rate": 6.383178631870562e-07, "loss": 0.3481, "step": 24545 }, { "epoch": 0.8423472889498971, "grad_norm": 0.9051780209913461, "learning_rate": 6.380461859444981e-07, "loss": 0.2516, "step": 24546 }, { "epoch": 0.8423816060398078, "grad_norm": 0.8875966790279706, "learning_rate": 6.377745625887943e-07, "loss": 0.2408, "step": 24547 }, { "epoch": 0.8424159231297186, "grad_norm": 0.8174255311926378, "learning_rate": 6.375029931232995e-07, "loss": 0.2554, "step": 24548 }, { "epoch": 0.8424502402196293, "grad_norm": 0.7614765369073379, "learning_rate": 6.372314775513689e-07, "loss": 0.3092, "step": 24549 }, { "epoch": 0.8424845573095402, "grad_norm": 0.7939247816147877, "learning_rate": 6.369600158763572e-07, "loss": 0.3019, "step": 24550 }, { "epoch": 0.842518874399451, "grad_norm": 0.767347027959731, "learning_rate": 6.366886081016188e-07, "loss": 0.2678, "step": 24551 }, { "epoch": 0.8425531914893617, "grad_norm": 0.7741755265868345, "learning_rate": 6.364172542305036e-07, "loss": 0.2877, "step": 24552 }, { "epoch": 0.8425875085792724, "grad_norm": 0.7667696347339108, "learning_rate": 6.361459542663673e-07, "loss": 0.247, "step": 24553 }, { "epoch": 0.8426218256691832, "grad_norm": 0.7928641625830247, "learning_rate": 6.358747082125594e-07, "loss": 0.2929, "step": 24554 }, { "epoch": 0.8426561427590941, "grad_norm": 0.7826742209975859, "learning_rate": 6.356035160724299e-07, "loss": 0.2388, "step": 24555 }, { "epoch": 0.8426904598490048, "grad_norm": 0.7439427119130541, "learning_rate": 6.353323778493331e-07, "loss": 0.266, "step": 24556 }, { "epoch": 0.8427247769389156, "grad_norm": 0.8079125338464279, "learning_rate": 6.35061293546615e-07, "loss": 0.2648, "step": 24557 }, { "epoch": 0.8427590940288263, "grad_norm": 0.8524166504412347, "learning_rate": 6.347902631676256e-07, "loss": 0.2054, "step": 24558 }, { "epoch": 0.8427934111187372, "grad_norm": 0.7896383463354134, "learning_rate": 6.34519286715713e-07, "loss": 0.2734, "step": 24559 }, { "epoch": 0.8428277282086479, "grad_norm": 0.8005696608131307, "learning_rate": 6.342483641942249e-07, "loss": 0.2731, "step": 24560 }, { "epoch": 0.8428620452985587, "grad_norm": 0.7325748672240431, "learning_rate": 6.339774956065081e-07, "loss": 0.2346, "step": 24561 }, { "epoch": 0.8428963623884694, "grad_norm": 0.866216287054659, "learning_rate": 6.337066809559095e-07, "loss": 0.2801, "step": 24562 }, { "epoch": 0.8429306794783802, "grad_norm": 0.8453587856698672, "learning_rate": 6.334359202457752e-07, "loss": 0.2238, "step": 24563 }, { "epoch": 0.842964996568291, "grad_norm": 0.727847833866646, "learning_rate": 6.33165213479447e-07, "loss": 0.2515, "step": 24564 }, { "epoch": 0.8429993136582018, "grad_norm": 1.2228608658545512, "learning_rate": 6.328945606602733e-07, "loss": 0.2546, "step": 24565 }, { "epoch": 0.8430336307481126, "grad_norm": 0.8138294065557831, "learning_rate": 6.326239617915947e-07, "loss": 0.2714, "step": 24566 }, { "epoch": 0.8430679478380233, "grad_norm": 0.7395318567865027, "learning_rate": 6.323534168767553e-07, "loss": 0.2681, "step": 24567 }, { "epoch": 0.8431022649279342, "grad_norm": 0.8042552941992742, "learning_rate": 6.320829259190975e-07, "loss": 0.2408, "step": 24568 }, { "epoch": 0.8431365820178449, "grad_norm": 0.8281813887370972, "learning_rate": 6.318124889219623e-07, "loss": 0.3096, "step": 24569 }, { "epoch": 0.8431708991077557, "grad_norm": 0.8182208813151611, "learning_rate": 6.315421058886912e-07, "loss": 0.2504, "step": 24570 }, { "epoch": 0.8432052161976664, "grad_norm": 0.6627237627585922, "learning_rate": 6.312717768226245e-07, "loss": 0.2904, "step": 24571 }, { "epoch": 0.8432395332875772, "grad_norm": 0.7661408135505003, "learning_rate": 6.310015017271021e-07, "loss": 0.2126, "step": 24572 }, { "epoch": 0.843273850377488, "grad_norm": 0.8719257129970197, "learning_rate": 6.307312806054605e-07, "loss": 0.2771, "step": 24573 }, { "epoch": 0.8433081674673988, "grad_norm": 0.7419425697265271, "learning_rate": 6.304611134610423e-07, "loss": 0.2691, "step": 24574 }, { "epoch": 0.8433424845573095, "grad_norm": 0.8073128955140937, "learning_rate": 6.301910002971812e-07, "loss": 0.293, "step": 24575 }, { "epoch": 0.8433768016472203, "grad_norm": 0.8242564005625412, "learning_rate": 6.299209411172142e-07, "loss": 0.2841, "step": 24576 }, { "epoch": 0.843411118737131, "grad_norm": 0.8688298503541584, "learning_rate": 6.296509359244818e-07, "loss": 0.2218, "step": 24577 }, { "epoch": 0.8434454358270419, "grad_norm": 0.7093100792051223, "learning_rate": 6.293809847223142e-07, "loss": 0.2323, "step": 24578 }, { "epoch": 0.8434797529169527, "grad_norm": 0.6706656877367175, "learning_rate": 6.29111087514051e-07, "loss": 0.2624, "step": 24579 }, { "epoch": 0.8435140700068634, "grad_norm": 0.8507634120104006, "learning_rate": 6.288412443030234e-07, "loss": 0.3, "step": 24580 }, { "epoch": 0.8435483870967742, "grad_norm": 0.8147973355017804, "learning_rate": 6.285714550925659e-07, "loss": 0.2592, "step": 24581 }, { "epoch": 0.843582704186685, "grad_norm": 0.7548350795694697, "learning_rate": 6.283017198860114e-07, "loss": 0.2374, "step": 24582 }, { "epoch": 0.8436170212765958, "grad_norm": 0.7077697183780584, "learning_rate": 6.280320386866923e-07, "loss": 0.2339, "step": 24583 }, { "epoch": 0.8436513383665065, "grad_norm": 0.6640588011465711, "learning_rate": 6.277624114979402e-07, "loss": 0.2074, "step": 24584 }, { "epoch": 0.8436856554564173, "grad_norm": 0.6694444452201922, "learning_rate": 6.274928383230861e-07, "loss": 0.2512, "step": 24585 }, { "epoch": 0.843719972546328, "grad_norm": 0.7661282764148871, "learning_rate": 6.272233191654609e-07, "loss": 0.2488, "step": 24586 }, { "epoch": 0.8437542896362389, "grad_norm": 0.794663130538745, "learning_rate": 6.269538540283915e-07, "loss": 0.2664, "step": 24587 }, { "epoch": 0.8437886067261496, "grad_norm": 0.8353774110372827, "learning_rate": 6.266844429152108e-07, "loss": 0.3208, "step": 24588 }, { "epoch": 0.8438229238160604, "grad_norm": 0.7466490697392242, "learning_rate": 6.264150858292439e-07, "loss": 0.2593, "step": 24589 }, { "epoch": 0.8438572409059711, "grad_norm": 0.7685362347692367, "learning_rate": 6.261457827738198e-07, "loss": 0.3058, "step": 24590 }, { "epoch": 0.843891557995882, "grad_norm": 0.7753022595506114, "learning_rate": 6.258765337522649e-07, "loss": 0.3004, "step": 24591 }, { "epoch": 0.8439258750857928, "grad_norm": 0.7759602532400094, "learning_rate": 6.256073387679057e-07, "loss": 0.2725, "step": 24592 }, { "epoch": 0.8439601921757035, "grad_norm": 0.9224511449296697, "learning_rate": 6.25338197824068e-07, "loss": 0.2447, "step": 24593 }, { "epoch": 0.8439945092656143, "grad_norm": 0.7212248965567042, "learning_rate": 6.250691109240764e-07, "loss": 0.2542, "step": 24594 }, { "epoch": 0.844028826355525, "grad_norm": 0.8046406933869145, "learning_rate": 6.24800078071256e-07, "loss": 0.2933, "step": 24595 }, { "epoch": 0.8440631434454359, "grad_norm": 0.769824777634437, "learning_rate": 6.245310992689279e-07, "loss": 0.3162, "step": 24596 }, { "epoch": 0.8440974605353466, "grad_norm": 0.7200782703303583, "learning_rate": 6.242621745204186e-07, "loss": 0.2386, "step": 24597 }, { "epoch": 0.8441317776252574, "grad_norm": 0.8115982167786463, "learning_rate": 6.23993303829048e-07, "loss": 0.2262, "step": 24598 }, { "epoch": 0.8441660947151681, "grad_norm": 0.8024436677972331, "learning_rate": 6.237244871981368e-07, "loss": 0.2602, "step": 24599 }, { "epoch": 0.8442004118050789, "grad_norm": 0.9358700584960112, "learning_rate": 6.234557246310091e-07, "loss": 0.2296, "step": 24600 }, { "epoch": 0.8442347288949897, "grad_norm": 0.7432243833960972, "learning_rate": 6.231870161309828e-07, "loss": 0.2638, "step": 24601 }, { "epoch": 0.8442690459849005, "grad_norm": 0.7885415720769869, "learning_rate": 6.229183617013779e-07, "loss": 0.2886, "step": 24602 }, { "epoch": 0.8443033630748112, "grad_norm": 0.7489243393892648, "learning_rate": 6.226497613455135e-07, "loss": 0.2277, "step": 24603 }, { "epoch": 0.844337680164722, "grad_norm": 0.7116705811933005, "learning_rate": 6.223812150667074e-07, "loss": 0.2591, "step": 24604 }, { "epoch": 0.8443719972546329, "grad_norm": 0.7771542512663258, "learning_rate": 6.221127228682783e-07, "loss": 0.2083, "step": 24605 }, { "epoch": 0.8444063143445436, "grad_norm": 0.8140751406573165, "learning_rate": 6.218442847535416e-07, "loss": 0.2328, "step": 24606 }, { "epoch": 0.8444406314344544, "grad_norm": 0.8000501699379954, "learning_rate": 6.21575900725816e-07, "loss": 0.3111, "step": 24607 }, { "epoch": 0.8444749485243651, "grad_norm": 0.7688298395767023, "learning_rate": 6.213075707884131e-07, "loss": 0.2942, "step": 24608 }, { "epoch": 0.8445092656142759, "grad_norm": 0.859581042565722, "learning_rate": 6.21039294944652e-07, "loss": 0.2692, "step": 24609 }, { "epoch": 0.8445435827041867, "grad_norm": 0.7825773649612701, "learning_rate": 6.207710731978433e-07, "loss": 0.2283, "step": 24610 }, { "epoch": 0.8445778997940975, "grad_norm": 0.7856121507080736, "learning_rate": 6.20502905551304e-07, "loss": 0.3162, "step": 24611 }, { "epoch": 0.8446122168840082, "grad_norm": 0.7224777625073818, "learning_rate": 6.202347920083446e-07, "loss": 0.2167, "step": 24612 }, { "epoch": 0.844646533973919, "grad_norm": 0.8329898991953318, "learning_rate": 6.19966732572278e-07, "loss": 0.246, "step": 24613 }, { "epoch": 0.8446808510638298, "grad_norm": 0.7603890941444722, "learning_rate": 6.196987272464156e-07, "loss": 0.2479, "step": 24614 }, { "epoch": 0.8447151681537406, "grad_norm": 0.7114760174023961, "learning_rate": 6.194307760340684e-07, "loss": 0.2187, "step": 24615 }, { "epoch": 0.8447494852436513, "grad_norm": 0.7173986290013631, "learning_rate": 6.191628789385468e-07, "loss": 0.2319, "step": 24616 }, { "epoch": 0.8447838023335621, "grad_norm": 0.8577879052814281, "learning_rate": 6.188950359631607e-07, "loss": 0.3032, "step": 24617 }, { "epoch": 0.8448181194234728, "grad_norm": 0.7513424811385634, "learning_rate": 6.186272471112193e-07, "loss": 0.2285, "step": 24618 }, { "epoch": 0.8448524365133837, "grad_norm": 0.8323607030676617, "learning_rate": 6.183595123860275e-07, "loss": 0.2487, "step": 24619 }, { "epoch": 0.8448867536032945, "grad_norm": 0.7910379452376489, "learning_rate": 6.180918317908974e-07, "loss": 0.2446, "step": 24620 }, { "epoch": 0.8449210706932052, "grad_norm": 0.7670815876640923, "learning_rate": 6.178242053291344e-07, "loss": 0.2663, "step": 24621 }, { "epoch": 0.844955387783116, "grad_norm": 0.8252707950426246, "learning_rate": 6.175566330040422e-07, "loss": 0.2368, "step": 24622 }, { "epoch": 0.8449897048730267, "grad_norm": 0.6845415563157882, "learning_rate": 6.172891148189303e-07, "loss": 0.2323, "step": 24623 }, { "epoch": 0.8450240219629376, "grad_norm": 0.7939670561234011, "learning_rate": 6.17021650777101e-07, "loss": 0.241, "step": 24624 }, { "epoch": 0.8450583390528483, "grad_norm": 0.8686113948226712, "learning_rate": 6.167542408818589e-07, "loss": 0.2468, "step": 24625 }, { "epoch": 0.8450926561427591, "grad_norm": 0.7405713590635227, "learning_rate": 6.164868851365074e-07, "loss": 0.2355, "step": 24626 }, { "epoch": 0.8451269732326698, "grad_norm": 0.913388498111827, "learning_rate": 6.162195835443502e-07, "loss": 0.2674, "step": 24627 }, { "epoch": 0.8451612903225807, "grad_norm": 0.8277911097397236, "learning_rate": 6.159523361086895e-07, "loss": 0.2623, "step": 24628 }, { "epoch": 0.8451956074124914, "grad_norm": 0.8110135956395075, "learning_rate": 6.156851428328259e-07, "loss": 0.2344, "step": 24629 }, { "epoch": 0.8452299245024022, "grad_norm": 0.7340944265154987, "learning_rate": 6.154180037200614e-07, "loss": 0.2376, "step": 24630 }, { "epoch": 0.845264241592313, "grad_norm": 0.7459002996737131, "learning_rate": 6.151509187736938e-07, "loss": 0.2546, "step": 24631 }, { "epoch": 0.8452985586822237, "grad_norm": 0.7941151715001503, "learning_rate": 6.148838879970265e-07, "loss": 0.2917, "step": 24632 }, { "epoch": 0.8453328757721346, "grad_norm": 0.8888104366887034, "learning_rate": 6.146169113933554e-07, "loss": 0.2795, "step": 24633 }, { "epoch": 0.8453671928620453, "grad_norm": 1.2012249865191174, "learning_rate": 6.143499889659793e-07, "loss": 0.2937, "step": 24634 }, { "epoch": 0.8454015099519561, "grad_norm": 0.8665366465009667, "learning_rate": 6.140831207181958e-07, "loss": 0.2483, "step": 24635 }, { "epoch": 0.8454358270418668, "grad_norm": 0.7278943296166994, "learning_rate": 6.13816306653302e-07, "loss": 0.2181, "step": 24636 }, { "epoch": 0.8454701441317777, "grad_norm": 0.7737485850699292, "learning_rate": 6.135495467745939e-07, "loss": 0.2906, "step": 24637 }, { "epoch": 0.8455044612216884, "grad_norm": 0.8505181133989371, "learning_rate": 6.13282841085367e-07, "loss": 0.2596, "step": 24638 }, { "epoch": 0.8455387783115992, "grad_norm": 0.852692835486415, "learning_rate": 6.13016189588917e-07, "loss": 0.3222, "step": 24639 }, { "epoch": 0.8455730954015099, "grad_norm": 0.769152885210134, "learning_rate": 6.127495922885352e-07, "loss": 0.2189, "step": 24640 }, { "epoch": 0.8456074124914207, "grad_norm": 0.7829905545008898, "learning_rate": 6.124830491875194e-07, "loss": 0.241, "step": 24641 }, { "epoch": 0.8456417295813315, "grad_norm": 0.7384426846547624, "learning_rate": 6.122165602891583e-07, "loss": 0.253, "step": 24642 }, { "epoch": 0.8456760466712423, "grad_norm": 0.7885865966243181, "learning_rate": 6.119501255967469e-07, "loss": 0.2057, "step": 24643 }, { "epoch": 0.845710363761153, "grad_norm": 0.7934712597013853, "learning_rate": 6.116837451135765e-07, "loss": 0.2519, "step": 24644 }, { "epoch": 0.8457446808510638, "grad_norm": 0.7176324612211229, "learning_rate": 6.114174188429356e-07, "loss": 0.2244, "step": 24645 }, { "epoch": 0.8457789979409746, "grad_norm": 0.752242221091572, "learning_rate": 6.111511467881176e-07, "loss": 0.2295, "step": 24646 }, { "epoch": 0.8458133150308854, "grad_norm": 0.7141065340220591, "learning_rate": 6.108849289524088e-07, "loss": 0.2521, "step": 24647 }, { "epoch": 0.8458476321207962, "grad_norm": 0.7928113674753701, "learning_rate": 6.106187653391004e-07, "loss": 0.263, "step": 24648 }, { "epoch": 0.8458819492107069, "grad_norm": 0.7233250995212935, "learning_rate": 6.103526559514789e-07, "loss": 0.2192, "step": 24649 }, { "epoch": 0.8459162663006177, "grad_norm": 1.1216605886457025, "learning_rate": 6.10086600792833e-07, "loss": 0.2844, "step": 24650 }, { "epoch": 0.8459505833905285, "grad_norm": 0.7971978199482446, "learning_rate": 6.098205998664486e-07, "loss": 0.2249, "step": 24651 }, { "epoch": 0.8459849004804393, "grad_norm": 0.7383081363862674, "learning_rate": 6.095546531756125e-07, "loss": 0.2048, "step": 24652 }, { "epoch": 0.84601921757035, "grad_norm": 0.8592136235591042, "learning_rate": 6.092887607236109e-07, "loss": 0.2784, "step": 24653 }, { "epoch": 0.8460535346602608, "grad_norm": 0.7230884161579119, "learning_rate": 6.090229225137256e-07, "loss": 0.2387, "step": 24654 }, { "epoch": 0.8460878517501715, "grad_norm": 0.7593522606036975, "learning_rate": 6.087571385492447e-07, "loss": 0.2492, "step": 24655 }, { "epoch": 0.8461221688400824, "grad_norm": 0.7933506525780746, "learning_rate": 6.084914088334487e-07, "loss": 0.3213, "step": 24656 }, { "epoch": 0.8461564859299932, "grad_norm": 0.9504465472821371, "learning_rate": 6.082257333696212e-07, "loss": 0.2602, "step": 24657 }, { "epoch": 0.8461908030199039, "grad_norm": 0.8551399085446628, "learning_rate": 6.07960112161044e-07, "loss": 0.2827, "step": 24658 }, { "epoch": 0.8462251201098147, "grad_norm": 0.8278993920903479, "learning_rate": 6.076945452109994e-07, "loss": 0.254, "step": 24659 }, { "epoch": 0.8462594371997255, "grad_norm": 0.8967544210152628, "learning_rate": 6.074290325227683e-07, "loss": 0.2356, "step": 24660 }, { "epoch": 0.8462937542896363, "grad_norm": 0.7685601752280697, "learning_rate": 6.071635740996296e-07, "loss": 0.2444, "step": 24661 }, { "epoch": 0.846328071379547, "grad_norm": 0.7564868086935064, "learning_rate": 6.068981699448645e-07, "loss": 0.2652, "step": 24662 }, { "epoch": 0.8463623884694578, "grad_norm": 0.7592618749216757, "learning_rate": 6.066328200617488e-07, "loss": 0.283, "step": 24663 }, { "epoch": 0.8463967055593685, "grad_norm": 0.6931210575133171, "learning_rate": 6.063675244535644e-07, "loss": 0.2275, "step": 24664 }, { "epoch": 0.8464310226492794, "grad_norm": 0.8387227929059745, "learning_rate": 6.061022831235857e-07, "loss": 0.2459, "step": 24665 }, { "epoch": 0.8464653397391901, "grad_norm": 0.8727221441703246, "learning_rate": 6.058370960750898e-07, "loss": 0.2463, "step": 24666 }, { "epoch": 0.8464996568291009, "grad_norm": 0.8012715724317387, "learning_rate": 6.055719633113549e-07, "loss": 0.2354, "step": 24667 }, { "epoch": 0.8465339739190116, "grad_norm": 0.7807574683286267, "learning_rate": 6.053068848356547e-07, "loss": 0.2454, "step": 24668 }, { "epoch": 0.8465682910089224, "grad_norm": 0.7695318808822653, "learning_rate": 6.050418606512637e-07, "loss": 0.2567, "step": 24669 }, { "epoch": 0.8466026080988333, "grad_norm": 0.7643601899384276, "learning_rate": 6.047768907614565e-07, "loss": 0.2595, "step": 24670 }, { "epoch": 0.846636925188744, "grad_norm": 0.789987521116038, "learning_rate": 6.045119751695067e-07, "loss": 0.2527, "step": 24671 }, { "epoch": 0.8466712422786548, "grad_norm": 0.7983767322875273, "learning_rate": 6.042471138786866e-07, "loss": 0.2179, "step": 24672 }, { "epoch": 0.8467055593685655, "grad_norm": 0.7099933304357678, "learning_rate": 6.039823068922685e-07, "loss": 0.266, "step": 24673 }, { "epoch": 0.8467398764584764, "grad_norm": 0.7582095078523291, "learning_rate": 6.037175542135237e-07, "loss": 0.2941, "step": 24674 }, { "epoch": 0.8467741935483871, "grad_norm": 0.7494472846104733, "learning_rate": 6.034528558457231e-07, "loss": 0.2974, "step": 24675 }, { "epoch": 0.8468085106382979, "grad_norm": 0.7746962748200327, "learning_rate": 6.031882117921378e-07, "loss": 0.2571, "step": 24676 }, { "epoch": 0.8468428277282086, "grad_norm": 0.8778936469636942, "learning_rate": 6.029236220560336e-07, "loss": 0.285, "step": 24677 }, { "epoch": 0.8468771448181194, "grad_norm": 0.8211763262038762, "learning_rate": 6.02659086640684e-07, "loss": 0.2652, "step": 24678 }, { "epoch": 0.8469114619080302, "grad_norm": 0.786744937115642, "learning_rate": 6.023946055493529e-07, "loss": 0.2979, "step": 24679 }, { "epoch": 0.846945778997941, "grad_norm": 0.7532840930523309, "learning_rate": 6.021301787853096e-07, "loss": 0.2445, "step": 24680 }, { "epoch": 0.8469800960878517, "grad_norm": 0.7958908579313521, "learning_rate": 6.018658063518207e-07, "loss": 0.2936, "step": 24681 }, { "epoch": 0.8470144131777625, "grad_norm": 0.8056465389153645, "learning_rate": 6.016014882521521e-07, "loss": 0.3007, "step": 24682 }, { "epoch": 0.8470487302676734, "grad_norm": 0.8477793424429366, "learning_rate": 6.013372244895687e-07, "loss": 0.2752, "step": 24683 }, { "epoch": 0.8470830473575841, "grad_norm": 0.8300289816094312, "learning_rate": 6.01073015067336e-07, "loss": 0.2635, "step": 24684 }, { "epoch": 0.8471173644474949, "grad_norm": 0.8069086246557677, "learning_rate": 6.008088599887179e-07, "loss": 0.2829, "step": 24685 }, { "epoch": 0.8471516815374056, "grad_norm": 0.8636707166397402, "learning_rate": 6.005447592569763e-07, "loss": 0.2421, "step": 24686 }, { "epoch": 0.8471859986273164, "grad_norm": 0.7787632737225686, "learning_rate": 6.002807128753751e-07, "loss": 0.2458, "step": 24687 }, { "epoch": 0.8472203157172272, "grad_norm": 0.8375728901714967, "learning_rate": 6.000167208471774e-07, "loss": 0.2433, "step": 24688 }, { "epoch": 0.847254632807138, "grad_norm": 0.8051337458795723, "learning_rate": 5.997527831756412e-07, "loss": 0.2651, "step": 24689 }, { "epoch": 0.8472889498970487, "grad_norm": 0.9002604958634599, "learning_rate": 5.994888998640313e-07, "loss": 0.2678, "step": 24690 }, { "epoch": 0.8473232669869595, "grad_norm": 0.8118819356602, "learning_rate": 5.992250709156044e-07, "loss": 0.3201, "step": 24691 }, { "epoch": 0.8473575840768702, "grad_norm": 0.7567489451411792, "learning_rate": 5.989612963336211e-07, "loss": 0.2215, "step": 24692 }, { "epoch": 0.8473919011667811, "grad_norm": 0.828959979407818, "learning_rate": 5.986975761213399e-07, "loss": 0.2426, "step": 24693 }, { "epoch": 0.8474262182566918, "grad_norm": 0.9080568984279797, "learning_rate": 5.984339102820186e-07, "loss": 0.2739, "step": 24694 }, { "epoch": 0.8474605353466026, "grad_norm": 0.7305032993754135, "learning_rate": 5.981702988189147e-07, "loss": 0.2553, "step": 24695 }, { "epoch": 0.8474948524365133, "grad_norm": 0.8393105399145191, "learning_rate": 5.979067417352847e-07, "loss": 0.2709, "step": 24696 }, { "epoch": 0.8475291695264242, "grad_norm": 0.8230754823368973, "learning_rate": 5.976432390343856e-07, "loss": 0.3232, "step": 24697 }, { "epoch": 0.847563486616335, "grad_norm": 0.803657561634636, "learning_rate": 5.973797907194695e-07, "loss": 0.2748, "step": 24698 }, { "epoch": 0.8475978037062457, "grad_norm": 0.7695239530019656, "learning_rate": 5.971163967937954e-07, "loss": 0.2893, "step": 24699 }, { "epoch": 0.8476321207961565, "grad_norm": 0.7465354671002945, "learning_rate": 5.968530572606135e-07, "loss": 0.2407, "step": 24700 }, { "epoch": 0.8476664378860672, "grad_norm": 0.7584160804466682, "learning_rate": 5.965897721231789e-07, "loss": 0.235, "step": 24701 }, { "epoch": 0.8477007549759781, "grad_norm": 0.8069127562803858, "learning_rate": 5.963265413847436e-07, "loss": 0.2652, "step": 24702 }, { "epoch": 0.8477350720658888, "grad_norm": 0.8657288001338355, "learning_rate": 5.960633650485597e-07, "loss": 0.2524, "step": 24703 }, { "epoch": 0.8477693891557996, "grad_norm": 0.7202188790062705, "learning_rate": 5.958002431178783e-07, "loss": 0.2589, "step": 24704 }, { "epoch": 0.8478037062457103, "grad_norm": 0.8438186552388417, "learning_rate": 5.955371755959505e-07, "loss": 0.2828, "step": 24705 }, { "epoch": 0.8478380233356212, "grad_norm": 0.7502338884509088, "learning_rate": 5.952741624860253e-07, "loss": 0.2722, "step": 24706 }, { "epoch": 0.847872340425532, "grad_norm": 0.7923840770359148, "learning_rate": 5.950112037913525e-07, "loss": 0.2529, "step": 24707 }, { "epoch": 0.8479066575154427, "grad_norm": 0.7551431846222397, "learning_rate": 5.947482995151821e-07, "loss": 0.2358, "step": 24708 }, { "epoch": 0.8479409746053534, "grad_norm": 0.7836389952743131, "learning_rate": 5.944854496607577e-07, "loss": 0.2468, "step": 24709 }, { "epoch": 0.8479752916952642, "grad_norm": 0.7609495605200947, "learning_rate": 5.942226542313306e-07, "loss": 0.2376, "step": 24710 }, { "epoch": 0.8480096087851751, "grad_norm": 0.6920828790065929, "learning_rate": 5.939599132301471e-07, "loss": 0.2491, "step": 24711 }, { "epoch": 0.8480439258750858, "grad_norm": 0.724719616113033, "learning_rate": 5.936972266604496e-07, "loss": 0.2786, "step": 24712 }, { "epoch": 0.8480782429649966, "grad_norm": 0.7874684077080175, "learning_rate": 5.934345945254883e-07, "loss": 0.2206, "step": 24713 }, { "epoch": 0.8481125600549073, "grad_norm": 0.7623150941574209, "learning_rate": 5.931720168285033e-07, "loss": 0.2173, "step": 24714 }, { "epoch": 0.8481468771448181, "grad_norm": 0.6852374579113876, "learning_rate": 5.929094935727403e-07, "loss": 0.2185, "step": 24715 }, { "epoch": 0.8481811942347289, "grad_norm": 1.329457122131206, "learning_rate": 5.926470247614424e-07, "loss": 0.2905, "step": 24716 }, { "epoch": 0.8482155113246397, "grad_norm": 0.8555303356257848, "learning_rate": 5.92384610397852e-07, "loss": 0.3149, "step": 24717 }, { "epoch": 0.8482498284145504, "grad_norm": 0.8232566643443053, "learning_rate": 5.921222504852109e-07, "loss": 0.2634, "step": 24718 }, { "epoch": 0.8482841455044612, "grad_norm": 0.8944199069010038, "learning_rate": 5.918599450267604e-07, "loss": 0.2821, "step": 24719 }, { "epoch": 0.848318462594372, "grad_norm": 0.6771951233718779, "learning_rate": 5.915976940257423e-07, "loss": 0.2191, "step": 24720 }, { "epoch": 0.8483527796842828, "grad_norm": 0.7547536609684351, "learning_rate": 5.913354974853924e-07, "loss": 0.2658, "step": 24721 }, { "epoch": 0.8483870967741935, "grad_norm": 0.7667581802061668, "learning_rate": 5.910733554089549e-07, "loss": 0.3041, "step": 24722 }, { "epoch": 0.8484214138641043, "grad_norm": 0.6742233786791305, "learning_rate": 5.908112677996641e-07, "loss": 0.2423, "step": 24723 }, { "epoch": 0.848455730954015, "grad_norm": 0.7082704002962154, "learning_rate": 5.9054923466076e-07, "loss": 0.2127, "step": 24724 }, { "epoch": 0.8484900480439259, "grad_norm": 0.9669311714350709, "learning_rate": 5.902872559954792e-07, "loss": 0.2396, "step": 24725 }, { "epoch": 0.8485243651338367, "grad_norm": 0.7380754744453227, "learning_rate": 5.900253318070581e-07, "loss": 0.2485, "step": 24726 }, { "epoch": 0.8485586822237474, "grad_norm": 0.7882424606330253, "learning_rate": 5.897634620987325e-07, "loss": 0.2518, "step": 24727 }, { "epoch": 0.8485929993136582, "grad_norm": 0.8838635193787381, "learning_rate": 5.895016468737374e-07, "loss": 0.2778, "step": 24728 }, { "epoch": 0.848627316403569, "grad_norm": 0.7406170188293061, "learning_rate": 5.892398861353083e-07, "loss": 0.2812, "step": 24729 }, { "epoch": 0.8486616334934798, "grad_norm": 0.7834700607797211, "learning_rate": 5.889781798866756e-07, "loss": 0.2691, "step": 24730 }, { "epoch": 0.8486959505833905, "grad_norm": 0.7604891541744163, "learning_rate": 5.887165281310763e-07, "loss": 0.3151, "step": 24731 }, { "epoch": 0.8487302676733013, "grad_norm": 0.9322515758303148, "learning_rate": 5.884549308717419e-07, "loss": 0.2774, "step": 24732 }, { "epoch": 0.848764584763212, "grad_norm": 0.8447827786770321, "learning_rate": 5.881933881119017e-07, "loss": 0.263, "step": 24733 }, { "epoch": 0.8487989018531229, "grad_norm": 0.7398183441184003, "learning_rate": 5.879318998547906e-07, "loss": 0.3202, "step": 24734 }, { "epoch": 0.8488332189430337, "grad_norm": 0.802766695626028, "learning_rate": 5.876704661036342e-07, "loss": 0.3219, "step": 24735 }, { "epoch": 0.8488675360329444, "grad_norm": 0.9702966451439846, "learning_rate": 5.874090868616678e-07, "loss": 0.2372, "step": 24736 }, { "epoch": 0.8489018531228552, "grad_norm": 0.822920049296135, "learning_rate": 5.871477621321159e-07, "loss": 0.2676, "step": 24737 }, { "epoch": 0.8489361702127659, "grad_norm": 0.9262807281286282, "learning_rate": 5.868864919182088e-07, "loss": 0.2518, "step": 24738 }, { "epoch": 0.8489704873026768, "grad_norm": 1.137749573289513, "learning_rate": 5.866252762231739e-07, "loss": 0.3167, "step": 24739 }, { "epoch": 0.8490048043925875, "grad_norm": 0.7354313622469454, "learning_rate": 5.863641150502387e-07, "loss": 0.2664, "step": 24740 }, { "epoch": 0.8490391214824983, "grad_norm": 0.7676135784375274, "learning_rate": 5.861030084026281e-07, "loss": 0.2168, "step": 24741 }, { "epoch": 0.849073438572409, "grad_norm": 0.7753014642564263, "learning_rate": 5.858419562835693e-07, "loss": 0.3013, "step": 24742 }, { "epoch": 0.8491077556623199, "grad_norm": 0.8097682652263666, "learning_rate": 5.85580958696288e-07, "loss": 0.275, "step": 24743 }, { "epoch": 0.8491420727522306, "grad_norm": 0.8470888751745521, "learning_rate": 5.853200156440048e-07, "loss": 0.2754, "step": 24744 }, { "epoch": 0.8491763898421414, "grad_norm": 0.7003932876543773, "learning_rate": 5.850591271299478e-07, "loss": 0.2645, "step": 24745 }, { "epoch": 0.8492107069320521, "grad_norm": 0.7208161954279957, "learning_rate": 5.847982931573371e-07, "loss": 0.2579, "step": 24746 }, { "epoch": 0.8492450240219629, "grad_norm": 0.8368581157601861, "learning_rate": 5.845375137293957e-07, "loss": 0.3249, "step": 24747 }, { "epoch": 0.8492793411118738, "grad_norm": 0.7603849444161692, "learning_rate": 5.842767888493456e-07, "loss": 0.2901, "step": 24748 }, { "epoch": 0.8493136582017845, "grad_norm": 0.7696946042902477, "learning_rate": 5.840161185204068e-07, "loss": 0.2697, "step": 24749 }, { "epoch": 0.8493479752916953, "grad_norm": 0.8449825345815719, "learning_rate": 5.837555027458014e-07, "loss": 0.2656, "step": 24750 }, { "epoch": 0.849382292381606, "grad_norm": 0.841754522041355, "learning_rate": 5.834949415287472e-07, "loss": 0.286, "step": 24751 }, { "epoch": 0.8494166094715169, "grad_norm": 0.7838168007868148, "learning_rate": 5.83234434872465e-07, "loss": 0.2855, "step": 24752 }, { "epoch": 0.8494509265614276, "grad_norm": 0.9209203187831826, "learning_rate": 5.8297398278017e-07, "loss": 0.2702, "step": 24753 }, { "epoch": 0.8494852436513384, "grad_norm": 0.804965199677921, "learning_rate": 5.82713585255083e-07, "loss": 0.2694, "step": 24754 }, { "epoch": 0.8495195607412491, "grad_norm": 0.8109514262109135, "learning_rate": 5.824532423004203e-07, "loss": 0.255, "step": 24755 }, { "epoch": 0.8495538778311599, "grad_norm": 0.7659782757750135, "learning_rate": 5.821929539193955e-07, "loss": 0.2685, "step": 24756 }, { "epoch": 0.8495881949210707, "grad_norm": 0.8013636434664313, "learning_rate": 5.81932720115228e-07, "loss": 0.2738, "step": 24757 }, { "epoch": 0.8496225120109815, "grad_norm": 0.7734662411941435, "learning_rate": 5.816725408911295e-07, "loss": 0.2762, "step": 24758 }, { "epoch": 0.8496568291008922, "grad_norm": 0.688141725934451, "learning_rate": 5.81412416250316e-07, "loss": 0.2426, "step": 24759 }, { "epoch": 0.849691146190803, "grad_norm": 0.7098659275301311, "learning_rate": 5.81152346196e-07, "loss": 0.2735, "step": 24760 }, { "epoch": 0.8497254632807137, "grad_norm": 0.7426131226283526, "learning_rate": 5.808923307313952e-07, "loss": 0.1999, "step": 24761 }, { "epoch": 0.8497597803706246, "grad_norm": 0.8410909662987567, "learning_rate": 5.806323698597139e-07, "loss": 0.3053, "step": 24762 }, { "epoch": 0.8497940974605354, "grad_norm": 0.8459818415162874, "learning_rate": 5.803724635841668e-07, "loss": 0.2388, "step": 24763 }, { "epoch": 0.8498284145504461, "grad_norm": 0.74341381441335, "learning_rate": 5.801126119079658e-07, "loss": 0.2969, "step": 24764 }, { "epoch": 0.8498627316403569, "grad_norm": 0.7588020985339748, "learning_rate": 5.798528148343185e-07, "loss": 0.2442, "step": 24765 }, { "epoch": 0.8498970487302677, "grad_norm": 0.7053607856317251, "learning_rate": 5.795930723664389e-07, "loss": 0.2164, "step": 24766 }, { "epoch": 0.8499313658201785, "grad_norm": 0.8019705721436312, "learning_rate": 5.793333845075305e-07, "loss": 0.2407, "step": 24767 }, { "epoch": 0.8499656829100892, "grad_norm": 0.9958381236583551, "learning_rate": 5.790737512608064e-07, "loss": 0.2513, "step": 24768 }, { "epoch": 0.85, "grad_norm": 0.8272237586934463, "learning_rate": 5.788141726294704e-07, "loss": 0.2451, "step": 24769 }, { "epoch": 0.8500343170899107, "grad_norm": 0.7921085816661009, "learning_rate": 5.785546486167304e-07, "loss": 0.2113, "step": 24770 }, { "epoch": 0.8500686341798216, "grad_norm": 0.7627312917604386, "learning_rate": 5.782951792257945e-07, "loss": 0.2661, "step": 24771 }, { "epoch": 0.8501029512697323, "grad_norm": 0.8007215100986272, "learning_rate": 5.780357644598656e-07, "loss": 0.3002, "step": 24772 }, { "epoch": 0.8501372683596431, "grad_norm": 1.0424297831053129, "learning_rate": 5.77776404322149e-07, "loss": 0.2976, "step": 24773 }, { "epoch": 0.8501715854495538, "grad_norm": 0.7508206542063023, "learning_rate": 5.775170988158491e-07, "loss": 0.285, "step": 24774 }, { "epoch": 0.8502059025394647, "grad_norm": 0.7621792373111481, "learning_rate": 5.772578479441709e-07, "loss": 0.2617, "step": 24775 }, { "epoch": 0.8502402196293755, "grad_norm": 0.7089145838498758, "learning_rate": 5.769986517103132e-07, "loss": 0.2978, "step": 24776 }, { "epoch": 0.8502745367192862, "grad_norm": 0.8266605237462786, "learning_rate": 5.767395101174811e-07, "loss": 0.2466, "step": 24777 }, { "epoch": 0.850308853809197, "grad_norm": 0.779748398751667, "learning_rate": 5.76480423168877e-07, "loss": 0.285, "step": 24778 }, { "epoch": 0.8503431708991077, "grad_norm": 0.7131422703662348, "learning_rate": 5.762213908676972e-07, "loss": 0.2731, "step": 24779 }, { "epoch": 0.8503774879890186, "grad_norm": 0.753759587919456, "learning_rate": 5.759624132171471e-07, "loss": 0.2721, "step": 24780 }, { "epoch": 0.8504118050789293, "grad_norm": 0.7755593423344737, "learning_rate": 5.757034902204222e-07, "loss": 0.2793, "step": 24781 }, { "epoch": 0.8504461221688401, "grad_norm": 0.7053627660323536, "learning_rate": 5.754446218807225e-07, "loss": 0.2331, "step": 24782 }, { "epoch": 0.8504804392587508, "grad_norm": 0.6926721154520594, "learning_rate": 5.751858082012462e-07, "loss": 0.2393, "step": 24783 }, { "epoch": 0.8505147563486616, "grad_norm": 0.7852516705522476, "learning_rate": 5.7492704918519e-07, "loss": 0.2833, "step": 24784 }, { "epoch": 0.8505490734385724, "grad_norm": 0.8268123717109709, "learning_rate": 5.746683448357509e-07, "loss": 0.2944, "step": 24785 }, { "epoch": 0.8505833905284832, "grad_norm": 0.6509265160611561, "learning_rate": 5.744096951561256e-07, "loss": 0.2035, "step": 24786 }, { "epoch": 0.850617707618394, "grad_norm": 0.8353027600574252, "learning_rate": 5.741511001495093e-07, "loss": 0.3119, "step": 24787 }, { "epoch": 0.8506520247083047, "grad_norm": 0.858500254864871, "learning_rate": 5.738925598190942e-07, "loss": 0.3043, "step": 24788 }, { "epoch": 0.8506863417982156, "grad_norm": 0.6791778801050798, "learning_rate": 5.73634074168078e-07, "loss": 0.228, "step": 24789 }, { "epoch": 0.8507206588881263, "grad_norm": 0.8812261154070711, "learning_rate": 5.733756431996512e-07, "loss": 0.2242, "step": 24790 }, { "epoch": 0.8507549759780371, "grad_norm": 0.8180663656487205, "learning_rate": 5.731172669170076e-07, "loss": 0.2751, "step": 24791 }, { "epoch": 0.8507892930679478, "grad_norm": 0.7290426009781551, "learning_rate": 5.72858945323339e-07, "loss": 0.2745, "step": 24792 }, { "epoch": 0.8508236101578586, "grad_norm": 0.6851579980869086, "learning_rate": 5.726006784218363e-07, "loss": 0.2172, "step": 24793 }, { "epoch": 0.8508579272477694, "grad_norm": 0.8204377042412108, "learning_rate": 5.723424662156907e-07, "loss": 0.2732, "step": 24794 }, { "epoch": 0.8508922443376802, "grad_norm": 0.763495342509963, "learning_rate": 5.720843087080918e-07, "loss": 0.3081, "step": 24795 }, { "epoch": 0.8509265614275909, "grad_norm": 0.7827289020574609, "learning_rate": 5.718262059022295e-07, "loss": 0.2399, "step": 24796 }, { "epoch": 0.8509608785175017, "grad_norm": 0.7653694174801431, "learning_rate": 5.7156815780129e-07, "loss": 0.2676, "step": 24797 }, { "epoch": 0.8509951956074125, "grad_norm": 1.0521525098924205, "learning_rate": 5.713101644084634e-07, "loss": 0.3085, "step": 24798 }, { "epoch": 0.8510295126973233, "grad_norm": 0.8548744169484501, "learning_rate": 5.710522257269369e-07, "loss": 0.2212, "step": 24799 }, { "epoch": 0.851063829787234, "grad_norm": 1.0262071697779955, "learning_rate": 5.70794341759896e-07, "loss": 0.2328, "step": 24800 }, { "epoch": 0.8510981468771448, "grad_norm": 0.8138552626164506, "learning_rate": 5.705365125105283e-07, "loss": 0.2818, "step": 24801 }, { "epoch": 0.8511324639670556, "grad_norm": 0.7779319029775532, "learning_rate": 5.702787379820157e-07, "loss": 0.2851, "step": 24802 }, { "epoch": 0.8511667810569664, "grad_norm": 0.8339976180053278, "learning_rate": 5.700210181775467e-07, "loss": 0.2502, "step": 24803 }, { "epoch": 0.8512010981468772, "grad_norm": 0.7303536911557538, "learning_rate": 5.697633531003021e-07, "loss": 0.2374, "step": 24804 }, { "epoch": 0.8512354152367879, "grad_norm": 0.8270797803729234, "learning_rate": 5.695057427534662e-07, "loss": 0.2872, "step": 24805 }, { "epoch": 0.8512697323266987, "grad_norm": 0.7705956136394945, "learning_rate": 5.692481871402216e-07, "loss": 0.2441, "step": 24806 }, { "epoch": 0.8513040494166094, "grad_norm": 0.7500773398188588, "learning_rate": 5.689906862637495e-07, "loss": 0.232, "step": 24807 }, { "epoch": 0.8513383665065203, "grad_norm": 0.8661753556783343, "learning_rate": 5.687332401272316e-07, "loss": 0.3496, "step": 24808 }, { "epoch": 0.851372683596431, "grad_norm": 0.7776067827088886, "learning_rate": 5.684758487338482e-07, "loss": 0.278, "step": 24809 }, { "epoch": 0.8514070006863418, "grad_norm": 0.8139957864820957, "learning_rate": 5.6821851208678e-07, "loss": 0.2701, "step": 24810 }, { "epoch": 0.8514413177762525, "grad_norm": 0.7512589107457638, "learning_rate": 5.679612301892034e-07, "loss": 0.2361, "step": 24811 }, { "epoch": 0.8514756348661634, "grad_norm": 0.7381594169458925, "learning_rate": 5.677040030443003e-07, "loss": 0.2543, "step": 24812 }, { "epoch": 0.8515099519560742, "grad_norm": 0.986389738472698, "learning_rate": 5.674468306552455e-07, "loss": 0.2512, "step": 24813 }, { "epoch": 0.8515442690459849, "grad_norm": 0.7709937552857109, "learning_rate": 5.671897130252174e-07, "loss": 0.2788, "step": 24814 }, { "epoch": 0.8515785861358957, "grad_norm": 0.8499096159547148, "learning_rate": 5.669326501573924e-07, "loss": 0.2901, "step": 24815 }, { "epoch": 0.8516129032258064, "grad_norm": 0.8090988868321047, "learning_rate": 5.666756420549462e-07, "loss": 0.3124, "step": 24816 }, { "epoch": 0.8516472203157173, "grad_norm": 0.8345876753658847, "learning_rate": 5.664186887210532e-07, "loss": 0.257, "step": 24817 }, { "epoch": 0.851681537405628, "grad_norm": 1.07500511630395, "learning_rate": 5.661617901588884e-07, "loss": 0.2713, "step": 24818 }, { "epoch": 0.8517158544955388, "grad_norm": 0.9337908581072242, "learning_rate": 5.659049463716265e-07, "loss": 0.2178, "step": 24819 }, { "epoch": 0.8517501715854495, "grad_norm": 0.7328823684435412, "learning_rate": 5.656481573624373e-07, "loss": 0.2921, "step": 24820 }, { "epoch": 0.8517844886753603, "grad_norm": 0.8542683434059408, "learning_rate": 5.65391423134496e-07, "loss": 0.3322, "step": 24821 }, { "epoch": 0.8518188057652711, "grad_norm": 0.8045811526783699, "learning_rate": 5.651347436909744e-07, "loss": 0.361, "step": 24822 }, { "epoch": 0.8518531228551819, "grad_norm": 0.8887882125994685, "learning_rate": 5.648781190350405e-07, "loss": 0.2895, "step": 24823 }, { "epoch": 0.8518874399450926, "grad_norm": 0.8210498119881561, "learning_rate": 5.646215491698693e-07, "loss": 0.275, "step": 24824 }, { "epoch": 0.8519217570350034, "grad_norm": 0.7942332449721262, "learning_rate": 5.643650340986256e-07, "loss": 0.3008, "step": 24825 }, { "epoch": 0.8519560741249143, "grad_norm": 0.7985561072870737, "learning_rate": 5.641085738244811e-07, "loss": 0.3104, "step": 24826 }, { "epoch": 0.851990391214825, "grad_norm": 0.9069667325176087, "learning_rate": 5.638521683506032e-07, "loss": 0.231, "step": 24827 }, { "epoch": 0.8520247083047358, "grad_norm": 0.8337231505868485, "learning_rate": 5.635958176801598e-07, "loss": 0.305, "step": 24828 }, { "epoch": 0.8520590253946465, "grad_norm": 0.9087278497837565, "learning_rate": 5.633395218163179e-07, "loss": 0.2592, "step": 24829 }, { "epoch": 0.8520933424845573, "grad_norm": 0.8162014315881969, "learning_rate": 5.630832807622432e-07, "loss": 0.2521, "step": 24830 }, { "epoch": 0.8521276595744681, "grad_norm": 0.9657745989387995, "learning_rate": 5.628270945211017e-07, "loss": 0.2429, "step": 24831 }, { "epoch": 0.8521619766643789, "grad_norm": 0.7664467691273801, "learning_rate": 5.625709630960586e-07, "loss": 0.2301, "step": 24832 }, { "epoch": 0.8521962937542896, "grad_norm": 0.7363701497436694, "learning_rate": 5.623148864902783e-07, "loss": 0.286, "step": 24833 }, { "epoch": 0.8522306108442004, "grad_norm": 0.7754503255436905, "learning_rate": 5.62058864706922e-07, "loss": 0.2237, "step": 24834 }, { "epoch": 0.8522649279341112, "grad_norm": 0.7893606217277263, "learning_rate": 5.618028977491563e-07, "loss": 0.3038, "step": 24835 }, { "epoch": 0.852299245024022, "grad_norm": 0.7343450941484915, "learning_rate": 5.615469856201399e-07, "loss": 0.2456, "step": 24836 }, { "epoch": 0.8523335621139327, "grad_norm": 0.7535805763789581, "learning_rate": 5.612911283230349e-07, "loss": 0.2432, "step": 24837 }, { "epoch": 0.8523678792038435, "grad_norm": 0.7771827532998767, "learning_rate": 5.610353258610052e-07, "loss": 0.2702, "step": 24838 }, { "epoch": 0.8524021962937542, "grad_norm": 0.7096484061941898, "learning_rate": 5.607795782372078e-07, "loss": 0.2477, "step": 24839 }, { "epoch": 0.8524365133836651, "grad_norm": 0.7548448967257932, "learning_rate": 5.605238854548028e-07, "loss": 0.2646, "step": 24840 }, { "epoch": 0.8524708304735759, "grad_norm": 0.8416527662394084, "learning_rate": 5.602682475169497e-07, "loss": 0.2726, "step": 24841 }, { "epoch": 0.8525051475634866, "grad_norm": 0.7506407351712148, "learning_rate": 5.600126644268055e-07, "loss": 0.2426, "step": 24842 }, { "epoch": 0.8525394646533974, "grad_norm": 0.8828110578004904, "learning_rate": 5.597571361875292e-07, "loss": 0.2735, "step": 24843 }, { "epoch": 0.8525737817433081, "grad_norm": 0.713240193839578, "learning_rate": 5.595016628022759e-07, "loss": 0.2208, "step": 24844 }, { "epoch": 0.852608098833219, "grad_norm": 0.8207511008800096, "learning_rate": 5.592462442742036e-07, "loss": 0.2947, "step": 24845 }, { "epoch": 0.8526424159231297, "grad_norm": 0.7319456501143378, "learning_rate": 5.589908806064648e-07, "loss": 0.1983, "step": 24846 }, { "epoch": 0.8526767330130405, "grad_norm": 0.8020855219810724, "learning_rate": 5.58735571802218e-07, "loss": 0.2276, "step": 24847 }, { "epoch": 0.8527110501029512, "grad_norm": 0.8649861818003172, "learning_rate": 5.584803178646136e-07, "loss": 0.2793, "step": 24848 }, { "epoch": 0.8527453671928621, "grad_norm": 0.8516104192391581, "learning_rate": 5.582251187968075e-07, "loss": 0.289, "step": 24849 }, { "epoch": 0.8527796842827728, "grad_norm": 0.8123314352423656, "learning_rate": 5.579699746019507e-07, "loss": 0.2954, "step": 24850 }, { "epoch": 0.8528140013726836, "grad_norm": 0.7328338014609178, "learning_rate": 5.577148852831959e-07, "loss": 0.252, "step": 24851 }, { "epoch": 0.8528483184625943, "grad_norm": 0.8596582950248177, "learning_rate": 5.574598508436951e-07, "loss": 0.2668, "step": 24852 }, { "epoch": 0.8528826355525051, "grad_norm": 0.7460680386691032, "learning_rate": 5.57204871286598e-07, "loss": 0.2659, "step": 24853 }, { "epoch": 0.852916952642416, "grad_norm": 0.8283799016936523, "learning_rate": 5.569499466150558e-07, "loss": 0.2533, "step": 24854 }, { "epoch": 0.8529512697323267, "grad_norm": 0.8333878302463721, "learning_rate": 5.566950768322149e-07, "loss": 0.2783, "step": 24855 }, { "epoch": 0.8529855868222375, "grad_norm": 0.7088629138457989, "learning_rate": 5.564402619412279e-07, "loss": 0.2362, "step": 24856 }, { "epoch": 0.8530199039121482, "grad_norm": 0.8938558374716926, "learning_rate": 5.561855019452401e-07, "loss": 0.275, "step": 24857 }, { "epoch": 0.8530542210020591, "grad_norm": 0.6992693902268927, "learning_rate": 5.559307968473992e-07, "loss": 0.2658, "step": 24858 }, { "epoch": 0.8530885380919698, "grad_norm": 0.7710299057529482, "learning_rate": 5.55676146650852e-07, "loss": 0.2326, "step": 24859 }, { "epoch": 0.8531228551818806, "grad_norm": 0.8249266008837904, "learning_rate": 5.554215513587435e-07, "loss": 0.2649, "step": 24860 }, { "epoch": 0.8531571722717913, "grad_norm": 0.7535398071518282, "learning_rate": 5.551670109742219e-07, "loss": 0.3003, "step": 24861 }, { "epoch": 0.8531914893617021, "grad_norm": 1.0397308073661014, "learning_rate": 5.54912525500429e-07, "loss": 0.2533, "step": 24862 }, { "epoch": 0.853225806451613, "grad_norm": 0.7091424518366152, "learning_rate": 5.54658094940509e-07, "loss": 0.2326, "step": 24863 }, { "epoch": 0.8532601235415237, "grad_norm": 0.7976603957365629, "learning_rate": 5.544037192976054e-07, "loss": 0.2513, "step": 24864 }, { "epoch": 0.8532944406314344, "grad_norm": 0.7049416259716459, "learning_rate": 5.541493985748608e-07, "loss": 0.2403, "step": 24865 }, { "epoch": 0.8533287577213452, "grad_norm": 0.7391623347442048, "learning_rate": 5.538951327754172e-07, "loss": 0.2525, "step": 24866 }, { "epoch": 0.853363074811256, "grad_norm": 0.7477003318068399, "learning_rate": 5.536409219024159e-07, "loss": 0.2767, "step": 24867 }, { "epoch": 0.8533973919011668, "grad_norm": 0.7895637996643905, "learning_rate": 5.533867659589975e-07, "loss": 0.2658, "step": 24868 }, { "epoch": 0.8534317089910776, "grad_norm": 0.7887459243098831, "learning_rate": 5.531326649482998e-07, "loss": 0.2722, "step": 24869 }, { "epoch": 0.8534660260809883, "grad_norm": 0.7354408118692398, "learning_rate": 5.528786188734653e-07, "loss": 0.2505, "step": 24870 }, { "epoch": 0.8535003431708991, "grad_norm": 0.8325280350455434, "learning_rate": 5.52624627737629e-07, "loss": 0.2463, "step": 24871 }, { "epoch": 0.8535346602608099, "grad_norm": 0.8172367073506893, "learning_rate": 5.523706915439314e-07, "loss": 0.3186, "step": 24872 }, { "epoch": 0.8535689773507207, "grad_norm": 0.6923336657923963, "learning_rate": 5.521168102955076e-07, "loss": 0.2817, "step": 24873 }, { "epoch": 0.8536032944406314, "grad_norm": 0.8561533053962607, "learning_rate": 5.518629839954953e-07, "loss": 0.3271, "step": 24874 }, { "epoch": 0.8536376115305422, "grad_norm": 0.814353215446529, "learning_rate": 5.516092126470296e-07, "loss": 0.2538, "step": 24875 }, { "epoch": 0.8536719286204529, "grad_norm": 0.7144156513178502, "learning_rate": 5.513554962532458e-07, "loss": 0.2216, "step": 24876 }, { "epoch": 0.8537062457103638, "grad_norm": 0.8399850873088478, "learning_rate": 5.511018348172792e-07, "loss": 0.3005, "step": 24877 }, { "epoch": 0.8537405628002745, "grad_norm": 0.761085332158447, "learning_rate": 5.508482283422606e-07, "loss": 0.2115, "step": 24878 }, { "epoch": 0.8537748798901853, "grad_norm": 0.8076787442892291, "learning_rate": 5.50594676831327e-07, "loss": 0.3166, "step": 24879 }, { "epoch": 0.853809196980096, "grad_norm": 0.7524667126811501, "learning_rate": 5.50341180287608e-07, "loss": 0.231, "step": 24880 }, { "epoch": 0.8538435140700069, "grad_norm": 0.7766642331735966, "learning_rate": 5.500877387142345e-07, "loss": 0.2769, "step": 24881 }, { "epoch": 0.8538778311599177, "grad_norm": 0.7479354829516155, "learning_rate": 5.498343521143407e-07, "loss": 0.2756, "step": 24882 }, { "epoch": 0.8539121482498284, "grad_norm": 0.7852973447574866, "learning_rate": 5.495810204910545e-07, "loss": 0.2364, "step": 24883 }, { "epoch": 0.8539464653397392, "grad_norm": 0.8506666971355633, "learning_rate": 5.493277438475058e-07, "loss": 0.2296, "step": 24884 }, { "epoch": 0.8539807824296499, "grad_norm": 0.9282954459979629, "learning_rate": 5.490745221868238e-07, "loss": 0.2931, "step": 24885 }, { "epoch": 0.8540150995195608, "grad_norm": 0.6254183994616761, "learning_rate": 5.48821355512138e-07, "loss": 0.2377, "step": 24886 }, { "epoch": 0.8540494166094715, "grad_norm": 0.8116323012119295, "learning_rate": 5.485682438265727e-07, "loss": 0.2512, "step": 24887 }, { "epoch": 0.8540837336993823, "grad_norm": 0.7888330480917737, "learning_rate": 5.483151871332581e-07, "loss": 0.2615, "step": 24888 }, { "epoch": 0.854118050789293, "grad_norm": 0.6886833508507781, "learning_rate": 5.480621854353202e-07, "loss": 0.2358, "step": 24889 }, { "epoch": 0.8541523678792038, "grad_norm": 0.6687030012116622, "learning_rate": 5.478092387358813e-07, "loss": 0.2574, "step": 24890 }, { "epoch": 0.8541866849691147, "grad_norm": 0.7634802098124225, "learning_rate": 5.475563470380701e-07, "loss": 0.2608, "step": 24891 }, { "epoch": 0.8542210020590254, "grad_norm": 0.8366434512696493, "learning_rate": 5.47303510345008e-07, "loss": 0.3029, "step": 24892 }, { "epoch": 0.8542553191489362, "grad_norm": 0.9094081354091144, "learning_rate": 5.470507286598214e-07, "loss": 0.3323, "step": 24893 }, { "epoch": 0.8542896362388469, "grad_norm": 0.7671154349434582, "learning_rate": 5.4679800198563e-07, "loss": 0.2492, "step": 24894 }, { "epoch": 0.8543239533287578, "grad_norm": 0.7588766938351709, "learning_rate": 5.46545330325558e-07, "loss": 0.2762, "step": 24895 }, { "epoch": 0.8543582704186685, "grad_norm": 0.6907162851451916, "learning_rate": 5.462927136827256e-07, "loss": 0.2671, "step": 24896 }, { "epoch": 0.8543925875085793, "grad_norm": 0.7732515677410321, "learning_rate": 5.460401520602549e-07, "loss": 0.2369, "step": 24897 }, { "epoch": 0.85442690459849, "grad_norm": 0.7513699647862906, "learning_rate": 5.457876454612648e-07, "loss": 0.2292, "step": 24898 }, { "epoch": 0.8544612216884008, "grad_norm": 0.7725915287949902, "learning_rate": 5.455351938888753e-07, "loss": 0.2661, "step": 24899 }, { "epoch": 0.8544955387783116, "grad_norm": 0.8502347249741355, "learning_rate": 5.45282797346206e-07, "loss": 0.2734, "step": 24900 }, { "epoch": 0.8545298558682224, "grad_norm": 0.8221131594334561, "learning_rate": 5.450304558363723e-07, "loss": 0.2554, "step": 24901 }, { "epoch": 0.8545641729581331, "grad_norm": 0.8615348975268979, "learning_rate": 5.447781693624954e-07, "loss": 0.2388, "step": 24902 }, { "epoch": 0.8545984900480439, "grad_norm": 0.8084990010392973, "learning_rate": 5.44525937927689e-07, "loss": 0.2905, "step": 24903 }, { "epoch": 0.8546328071379548, "grad_norm": 0.9403117580661127, "learning_rate": 5.442737615350685e-07, "loss": 0.3022, "step": 24904 }, { "epoch": 0.8546671242278655, "grad_norm": 0.8573096425813626, "learning_rate": 5.440216401877535e-07, "loss": 0.2599, "step": 24905 }, { "epoch": 0.8547014413177763, "grad_norm": 0.6938030737374502, "learning_rate": 5.437695738888543e-07, "loss": 0.219, "step": 24906 }, { "epoch": 0.854735758407687, "grad_norm": 0.8306430310673609, "learning_rate": 5.435175626414868e-07, "loss": 0.2696, "step": 24907 }, { "epoch": 0.8547700754975978, "grad_norm": 0.7417219063175892, "learning_rate": 5.43265606448764e-07, "loss": 0.2794, "step": 24908 }, { "epoch": 0.8548043925875086, "grad_norm": 0.7824511927538966, "learning_rate": 5.43013705313799e-07, "loss": 0.2155, "step": 24909 }, { "epoch": 0.8548387096774194, "grad_norm": 0.790395606103479, "learning_rate": 5.427618592397027e-07, "loss": 0.264, "step": 24910 }, { "epoch": 0.8548730267673301, "grad_norm": 0.8548577398492131, "learning_rate": 5.425100682295875e-07, "loss": 0.2854, "step": 24911 }, { "epoch": 0.8549073438572409, "grad_norm": 0.8196054515364088, "learning_rate": 5.422583322865643e-07, "loss": 0.3203, "step": 24912 }, { "epoch": 0.8549416609471516, "grad_norm": 0.7539425661385821, "learning_rate": 5.420066514137401e-07, "loss": 0.2471, "step": 24913 }, { "epoch": 0.8549759780370625, "grad_norm": 0.8062610845190342, "learning_rate": 5.417550256142284e-07, "loss": 0.2519, "step": 24914 }, { "epoch": 0.8550102951269732, "grad_norm": 0.7865900337113639, "learning_rate": 5.415034548911341e-07, "loss": 0.2268, "step": 24915 }, { "epoch": 0.855044612216884, "grad_norm": 0.7524327182494285, "learning_rate": 5.412519392475673e-07, "loss": 0.278, "step": 24916 }, { "epoch": 0.8550789293067947, "grad_norm": 0.8570274715677995, "learning_rate": 5.410004786866335e-07, "loss": 0.2304, "step": 24917 }, { "epoch": 0.8551132463967056, "grad_norm": 0.7115155625746361, "learning_rate": 5.407490732114407e-07, "loss": 0.2835, "step": 24918 }, { "epoch": 0.8551475634866164, "grad_norm": 0.950215346925188, "learning_rate": 5.404977228250941e-07, "loss": 0.273, "step": 24919 }, { "epoch": 0.8551818805765271, "grad_norm": 0.7811584149302697, "learning_rate": 5.402464275306984e-07, "loss": 0.2506, "step": 24920 }, { "epoch": 0.8552161976664379, "grad_norm": 0.8137353329173933, "learning_rate": 5.399951873313603e-07, "loss": 0.2569, "step": 24921 }, { "epoch": 0.8552505147563486, "grad_norm": 0.7512331229497927, "learning_rate": 5.397440022301792e-07, "loss": 0.2437, "step": 24922 }, { "epoch": 0.8552848318462595, "grad_norm": 0.7414410622435141, "learning_rate": 5.394928722302628e-07, "loss": 0.2874, "step": 24923 }, { "epoch": 0.8553191489361702, "grad_norm": 0.7774111798811453, "learning_rate": 5.392417973347097e-07, "loss": 0.245, "step": 24924 }, { "epoch": 0.855353466026081, "grad_norm": 0.8456961087742565, "learning_rate": 5.389907775466252e-07, "loss": 0.3395, "step": 24925 }, { "epoch": 0.8553877831159917, "grad_norm": 0.7535989375604383, "learning_rate": 5.387398128691079e-07, "loss": 0.2671, "step": 24926 }, { "epoch": 0.8554221002059026, "grad_norm": 0.7968006932429617, "learning_rate": 5.384889033052576e-07, "loss": 0.2645, "step": 24927 }, { "epoch": 0.8554564172958133, "grad_norm": 0.7472965038702539, "learning_rate": 5.382380488581778e-07, "loss": 0.2666, "step": 24928 }, { "epoch": 0.8554907343857241, "grad_norm": 0.8733120161088269, "learning_rate": 5.379872495309634e-07, "loss": 0.2415, "step": 24929 }, { "epoch": 0.8555250514756348, "grad_norm": 0.7613747046800373, "learning_rate": 5.377365053267147e-07, "loss": 0.2787, "step": 24930 }, { "epoch": 0.8555593685655456, "grad_norm": 0.9088683736024981, "learning_rate": 5.374858162485291e-07, "loss": 0.2818, "step": 24931 }, { "epoch": 0.8555936856554565, "grad_norm": 0.7710020108764755, "learning_rate": 5.372351822995031e-07, "loss": 0.2901, "step": 24932 }, { "epoch": 0.8556280027453672, "grad_norm": 0.8420089857758385, "learning_rate": 5.369846034827337e-07, "loss": 0.288, "step": 24933 }, { "epoch": 0.855662319835278, "grad_norm": 0.720806769222563, "learning_rate": 5.36734079801316e-07, "loss": 0.2323, "step": 24934 }, { "epoch": 0.8556966369251887, "grad_norm": 0.7697214035898345, "learning_rate": 5.36483611258346e-07, "loss": 0.2298, "step": 24935 }, { "epoch": 0.8557309540150995, "grad_norm": 0.729495680194532, "learning_rate": 5.362331978569146e-07, "loss": 0.2324, "step": 24936 }, { "epoch": 0.8557652711050103, "grad_norm": 0.7670873241908789, "learning_rate": 5.359828396001204e-07, "loss": 0.2423, "step": 24937 }, { "epoch": 0.8557995881949211, "grad_norm": 0.804881105537031, "learning_rate": 5.357325364910526e-07, "loss": 0.2327, "step": 24938 }, { "epoch": 0.8558339052848318, "grad_norm": 1.4705756577275486, "learning_rate": 5.354822885328043e-07, "loss": 0.2463, "step": 24939 }, { "epoch": 0.8558682223747426, "grad_norm": 0.7767653438222567, "learning_rate": 5.352320957284673e-07, "loss": 0.2706, "step": 24940 }, { "epoch": 0.8559025394646534, "grad_norm": 0.8090399384800213, "learning_rate": 5.349819580811321e-07, "loss": 0.2352, "step": 24941 }, { "epoch": 0.8559368565545642, "grad_norm": 0.7729538557800587, "learning_rate": 5.347318755938896e-07, "loss": 0.2292, "step": 24942 }, { "epoch": 0.855971173644475, "grad_norm": 0.8207123268804257, "learning_rate": 5.344818482698283e-07, "loss": 0.2635, "step": 24943 }, { "epoch": 0.8560054907343857, "grad_norm": 0.8031858191932404, "learning_rate": 5.342318761120391e-07, "loss": 0.2922, "step": 24944 }, { "epoch": 0.8560398078242965, "grad_norm": 0.6815625416886455, "learning_rate": 5.339819591236062e-07, "loss": 0.2424, "step": 24945 }, { "epoch": 0.8560741249142073, "grad_norm": 0.7718198913315395, "learning_rate": 5.33732097307621e-07, "loss": 0.2996, "step": 24946 }, { "epoch": 0.8561084420041181, "grad_norm": 0.6754369268159122, "learning_rate": 5.334822906671677e-07, "loss": 0.1961, "step": 24947 }, { "epoch": 0.8561427590940288, "grad_norm": 0.8752811632671293, "learning_rate": 5.332325392053322e-07, "loss": 0.3003, "step": 24948 }, { "epoch": 0.8561770761839396, "grad_norm": 0.764345316315666, "learning_rate": 5.329828429252032e-07, "loss": 0.2651, "step": 24949 }, { "epoch": 0.8562113932738504, "grad_norm": 0.7669912157524623, "learning_rate": 5.327332018298619e-07, "loss": 0.281, "step": 24950 }, { "epoch": 0.8562457103637612, "grad_norm": 0.7204647325854979, "learning_rate": 5.324836159223939e-07, "loss": 0.2164, "step": 24951 }, { "epoch": 0.8562800274536719, "grad_norm": 0.775140047804169, "learning_rate": 5.322340852058822e-07, "loss": 0.2391, "step": 24952 }, { "epoch": 0.8563143445435827, "grad_norm": 0.7357091867137813, "learning_rate": 5.319846096834092e-07, "loss": 0.2727, "step": 24953 }, { "epoch": 0.8563486616334934, "grad_norm": 0.84173730288148, "learning_rate": 5.317351893580569e-07, "loss": 0.3162, "step": 24954 }, { "epoch": 0.8563829787234043, "grad_norm": 0.6606682320328999, "learning_rate": 5.314858242329074e-07, "loss": 0.2342, "step": 24955 }, { "epoch": 0.856417295813315, "grad_norm": 0.8216928316920036, "learning_rate": 5.312365143110404e-07, "loss": 0.3175, "step": 24956 }, { "epoch": 0.8564516129032258, "grad_norm": 0.7026407347405168, "learning_rate": 5.309872595955362e-07, "loss": 0.2587, "step": 24957 }, { "epoch": 0.8564859299931366, "grad_norm": 0.9489277112473142, "learning_rate": 5.307380600894751e-07, "loss": 0.2976, "step": 24958 }, { "epoch": 0.8565202470830473, "grad_norm": 0.9299893919870621, "learning_rate": 5.304889157959326e-07, "loss": 0.2969, "step": 24959 }, { "epoch": 0.8565545641729582, "grad_norm": 0.7626946615647504, "learning_rate": 5.302398267179909e-07, "loss": 0.2834, "step": 24960 }, { "epoch": 0.8565888812628689, "grad_norm": 0.8081633913354431, "learning_rate": 5.299907928587233e-07, "loss": 0.2991, "step": 24961 }, { "epoch": 0.8566231983527797, "grad_norm": 0.764500678736217, "learning_rate": 5.297418142212085e-07, "loss": 0.3004, "step": 24962 }, { "epoch": 0.8566575154426904, "grad_norm": 0.7818649943854146, "learning_rate": 5.294928908085211e-07, "loss": 0.2305, "step": 24963 }, { "epoch": 0.8566918325326013, "grad_norm": 0.8561801204099992, "learning_rate": 5.292440226237378e-07, "loss": 0.2756, "step": 24964 }, { "epoch": 0.856726149622512, "grad_norm": 0.7732010387797976, "learning_rate": 5.289952096699319e-07, "loss": 0.2569, "step": 24965 }, { "epoch": 0.8567604667124228, "grad_norm": 0.8334211805982763, "learning_rate": 5.287464519501773e-07, "loss": 0.2792, "step": 24966 }, { "epoch": 0.8567947838023335, "grad_norm": 0.7325296073439298, "learning_rate": 5.284977494675481e-07, "loss": 0.2875, "step": 24967 }, { "epoch": 0.8568291008922443, "grad_norm": 0.8251274187384908, "learning_rate": 5.282491022251146e-07, "loss": 0.2734, "step": 24968 }, { "epoch": 0.8568634179821552, "grad_norm": 0.8097170692305217, "learning_rate": 5.280005102259511e-07, "loss": 0.2659, "step": 24969 }, { "epoch": 0.8568977350720659, "grad_norm": 0.8929572272087939, "learning_rate": 5.277519734731273e-07, "loss": 0.287, "step": 24970 }, { "epoch": 0.8569320521619767, "grad_norm": 0.842638404432808, "learning_rate": 5.27503491969712e-07, "loss": 0.2548, "step": 24971 }, { "epoch": 0.8569663692518874, "grad_norm": 0.6984635581608512, "learning_rate": 5.272550657187792e-07, "loss": 0.2435, "step": 24972 }, { "epoch": 0.8570006863417983, "grad_norm": 0.7672947798536922, "learning_rate": 5.270066947233937e-07, "loss": 0.2901, "step": 24973 }, { "epoch": 0.857035003431709, "grad_norm": 0.7654633072614394, "learning_rate": 5.26758378986626e-07, "loss": 0.247, "step": 24974 }, { "epoch": 0.8570693205216198, "grad_norm": 0.7948966984769851, "learning_rate": 5.265101185115434e-07, "loss": 0.258, "step": 24975 }, { "epoch": 0.8571036376115305, "grad_norm": 0.7016988875718893, "learning_rate": 5.26261913301212e-07, "loss": 0.2544, "step": 24976 }, { "epoch": 0.8571379547014413, "grad_norm": 0.7410349476101417, "learning_rate": 5.260137633586993e-07, "loss": 0.2322, "step": 24977 }, { "epoch": 0.8571722717913521, "grad_norm": 0.8573418539403785, "learning_rate": 5.257656686870704e-07, "loss": 0.242, "step": 24978 }, { "epoch": 0.8572065888812629, "grad_norm": 0.810877624378365, "learning_rate": 5.25517629289391e-07, "loss": 0.2561, "step": 24979 }, { "epoch": 0.8572409059711736, "grad_norm": 0.836385483941031, "learning_rate": 5.252696451687228e-07, "loss": 0.3247, "step": 24980 }, { "epoch": 0.8572752230610844, "grad_norm": 0.6989881924358357, "learning_rate": 5.250217163281329e-07, "loss": 0.2207, "step": 24981 }, { "epoch": 0.8573095401509951, "grad_norm": 0.7600386636053005, "learning_rate": 5.247738427706811e-07, "loss": 0.332, "step": 24982 }, { "epoch": 0.857343857240906, "grad_norm": 0.9639303736594313, "learning_rate": 5.245260244994316e-07, "loss": 0.3183, "step": 24983 }, { "epoch": 0.8573781743308168, "grad_norm": 0.7524462037984593, "learning_rate": 5.242782615174446e-07, "loss": 0.3055, "step": 24984 }, { "epoch": 0.8574124914207275, "grad_norm": 0.8598715276949171, "learning_rate": 5.240305538277818e-07, "loss": 0.2612, "step": 24985 }, { "epoch": 0.8574468085106383, "grad_norm": 0.789437908276613, "learning_rate": 5.237829014335028e-07, "loss": 0.297, "step": 24986 }, { "epoch": 0.8574811256005491, "grad_norm": 1.0460623232473767, "learning_rate": 5.235353043376674e-07, "loss": 0.3188, "step": 24987 }, { "epoch": 0.8575154426904599, "grad_norm": 0.783876175514586, "learning_rate": 5.232877625433342e-07, "loss": 0.3659, "step": 24988 }, { "epoch": 0.8575497597803706, "grad_norm": 0.7566212748775948, "learning_rate": 5.230402760535614e-07, "loss": 0.2085, "step": 24989 }, { "epoch": 0.8575840768702814, "grad_norm": 0.8537200498588627, "learning_rate": 5.227928448714076e-07, "loss": 0.2891, "step": 24990 }, { "epoch": 0.8576183939601921, "grad_norm": 0.7757371441381757, "learning_rate": 5.225454689999259e-07, "loss": 0.2697, "step": 24991 }, { "epoch": 0.857652711050103, "grad_norm": 0.8235860855427233, "learning_rate": 5.222981484421758e-07, "loss": 0.2768, "step": 24992 }, { "epoch": 0.8576870281400137, "grad_norm": 0.7840662813104826, "learning_rate": 5.220508832012127e-07, "loss": 0.2521, "step": 24993 }, { "epoch": 0.8577213452299245, "grad_norm": 0.7840871739607416, "learning_rate": 5.21803673280088e-07, "loss": 0.2775, "step": 24994 }, { "epoch": 0.8577556623198352, "grad_norm": 0.8584975651422985, "learning_rate": 5.2155651868186e-07, "loss": 0.2799, "step": 24995 }, { "epoch": 0.8577899794097461, "grad_norm": 0.7168789259012722, "learning_rate": 5.213094194095786e-07, "loss": 0.2353, "step": 24996 }, { "epoch": 0.8578242964996569, "grad_norm": 0.7688416981632082, "learning_rate": 5.21062375466298e-07, "loss": 0.2669, "step": 24997 }, { "epoch": 0.8578586135895676, "grad_norm": 0.7355662808863951, "learning_rate": 5.208153868550697e-07, "loss": 0.2254, "step": 24998 }, { "epoch": 0.8578929306794784, "grad_norm": 0.754171495473731, "learning_rate": 5.205684535789452e-07, "loss": 0.2384, "step": 24999 }, { "epoch": 0.8579272477693891, "grad_norm": 0.7448620089309148, "learning_rate": 5.203215756409746e-07, "loss": 0.2299, "step": 25000 }, { "epoch": 0.8579615648593, "grad_norm": 0.6901917652181704, "learning_rate": 5.200747530442086e-07, "loss": 0.2138, "step": 25001 }, { "epoch": 0.8579958819492107, "grad_norm": 0.7421203480859453, "learning_rate": 5.198279857916966e-07, "loss": 0.2068, "step": 25002 }, { "epoch": 0.8580301990391215, "grad_norm": 0.6976229138222898, "learning_rate": 5.195812738864847e-07, "loss": 0.2086, "step": 25003 }, { "epoch": 0.8580645161290322, "grad_norm": 0.8112638549225516, "learning_rate": 5.193346173316244e-07, "loss": 0.272, "step": 25004 }, { "epoch": 0.858098833218943, "grad_norm": 0.7305274721472378, "learning_rate": 5.190880161301598e-07, "loss": 0.2401, "step": 25005 }, { "epoch": 0.8581331503088538, "grad_norm": 0.7475760060775906, "learning_rate": 5.188414702851385e-07, "loss": 0.2356, "step": 25006 }, { "epoch": 0.8581674673987646, "grad_norm": 0.8181030954067233, "learning_rate": 5.185949797996065e-07, "loss": 0.2579, "step": 25007 }, { "epoch": 0.8582017844886753, "grad_norm": 0.763800332349246, "learning_rate": 5.183485446766084e-07, "loss": 0.2917, "step": 25008 }, { "epoch": 0.8582361015785861, "grad_norm": 1.0038989372822031, "learning_rate": 5.181021649191892e-07, "loss": 0.3147, "step": 25009 }, { "epoch": 0.858270418668497, "grad_norm": 0.8208664602184914, "learning_rate": 5.178558405303924e-07, "loss": 0.2055, "step": 25010 }, { "epoch": 0.8583047357584077, "grad_norm": 0.7645230666552678, "learning_rate": 5.176095715132623e-07, "loss": 0.2415, "step": 25011 }, { "epoch": 0.8583390528483185, "grad_norm": 0.8052999567387451, "learning_rate": 5.173633578708376e-07, "loss": 0.2617, "step": 25012 }, { "epoch": 0.8583733699382292, "grad_norm": 0.8326295960146859, "learning_rate": 5.171171996061641e-07, "loss": 0.2233, "step": 25013 }, { "epoch": 0.85840768702814, "grad_norm": 0.7713222711466717, "learning_rate": 5.168710967222806e-07, "loss": 0.2948, "step": 25014 }, { "epoch": 0.8584420041180508, "grad_norm": 0.8599806600767627, "learning_rate": 5.166250492222264e-07, "loss": 0.2825, "step": 25015 }, { "epoch": 0.8584763212079616, "grad_norm": 0.7701838114451555, "learning_rate": 5.163790571090449e-07, "loss": 0.2629, "step": 25016 }, { "epoch": 0.8585106382978723, "grad_norm": 0.7691208986703013, "learning_rate": 5.161331203857711e-07, "loss": 0.2991, "step": 25017 }, { "epoch": 0.8585449553877831, "grad_norm": 0.756471667756903, "learning_rate": 5.158872390554464e-07, "loss": 0.2755, "step": 25018 }, { "epoch": 0.8585792724776939, "grad_norm": 0.8883150667674142, "learning_rate": 5.156414131211057e-07, "loss": 0.2951, "step": 25019 }, { "epoch": 0.8586135895676047, "grad_norm": 0.7605696846485429, "learning_rate": 5.153956425857876e-07, "loss": 0.3093, "step": 25020 }, { "epoch": 0.8586479066575154, "grad_norm": 0.8085034207635787, "learning_rate": 5.151499274525279e-07, "loss": 0.299, "step": 25021 }, { "epoch": 0.8586822237474262, "grad_norm": 0.7556136816295557, "learning_rate": 5.149042677243616e-07, "loss": 0.2252, "step": 25022 }, { "epoch": 0.858716540837337, "grad_norm": 0.8640080161489408, "learning_rate": 5.146586634043238e-07, "loss": 0.292, "step": 25023 }, { "epoch": 0.8587508579272478, "grad_norm": 0.8701231916117598, "learning_rate": 5.144131144954495e-07, "loss": 0.285, "step": 25024 }, { "epoch": 0.8587851750171586, "grad_norm": 0.8041971779373951, "learning_rate": 5.141676210007718e-07, "loss": 0.2557, "step": 25025 }, { "epoch": 0.8588194921070693, "grad_norm": 0.8346287151403952, "learning_rate": 5.139221829233216e-07, "loss": 0.2483, "step": 25026 }, { "epoch": 0.8588538091969801, "grad_norm": 0.7013866011083472, "learning_rate": 5.136768002661341e-07, "loss": 0.229, "step": 25027 }, { "epoch": 0.8588881262868908, "grad_norm": 0.8023550083587039, "learning_rate": 5.134314730322388e-07, "loss": 0.276, "step": 25028 }, { "epoch": 0.8589224433768017, "grad_norm": 0.7358252095894352, "learning_rate": 5.131862012246663e-07, "loss": 0.2288, "step": 25029 }, { "epoch": 0.8589567604667124, "grad_norm": 0.7809596473711075, "learning_rate": 5.129409848464479e-07, "loss": 0.303, "step": 25030 }, { "epoch": 0.8589910775566232, "grad_norm": 0.7184331630541037, "learning_rate": 5.126958239006113e-07, "loss": 0.2407, "step": 25031 }, { "epoch": 0.8590253946465339, "grad_norm": 0.8887100329673936, "learning_rate": 5.124507183901867e-07, "loss": 0.3184, "step": 25032 }, { "epoch": 0.8590597117364448, "grad_norm": 0.8526986857197585, "learning_rate": 5.122056683182014e-07, "loss": 0.2701, "step": 25033 }, { "epoch": 0.8590940288263555, "grad_norm": 0.7523889523898478, "learning_rate": 5.119606736876837e-07, "loss": 0.2494, "step": 25034 }, { "epoch": 0.8591283459162663, "grad_norm": 0.7833233243945863, "learning_rate": 5.117157345016571e-07, "loss": 0.2257, "step": 25035 }, { "epoch": 0.859162663006177, "grad_norm": 0.760614176498552, "learning_rate": 5.114708507631521e-07, "loss": 0.2153, "step": 25036 }, { "epoch": 0.8591969800960878, "grad_norm": 0.7702541792725447, "learning_rate": 5.112260224751908e-07, "loss": 0.2568, "step": 25037 }, { "epoch": 0.8592312971859987, "grad_norm": 0.8676757197858453, "learning_rate": 5.109812496407973e-07, "loss": 0.2459, "step": 25038 }, { "epoch": 0.8592656142759094, "grad_norm": 0.737432042831598, "learning_rate": 5.107365322629986e-07, "loss": 0.2704, "step": 25039 }, { "epoch": 0.8592999313658202, "grad_norm": 0.8340995644455924, "learning_rate": 5.104918703448153e-07, "loss": 0.2289, "step": 25040 }, { "epoch": 0.8593342484557309, "grad_norm": 0.7768015502743163, "learning_rate": 5.102472638892708e-07, "loss": 0.2461, "step": 25041 }, { "epoch": 0.8593685655456418, "grad_norm": 0.7497780968233497, "learning_rate": 5.100027128993868e-07, "loss": 0.241, "step": 25042 }, { "epoch": 0.8594028826355525, "grad_norm": 0.7632318264467005, "learning_rate": 5.097582173781846e-07, "loss": 0.2528, "step": 25043 }, { "epoch": 0.8594371997254633, "grad_norm": 0.7499784950691437, "learning_rate": 5.09513777328684e-07, "loss": 0.2528, "step": 25044 }, { "epoch": 0.859471516815374, "grad_norm": 0.8138862319020296, "learning_rate": 5.092693927539056e-07, "loss": 0.2628, "step": 25045 }, { "epoch": 0.8595058339052848, "grad_norm": 0.7432090844932012, "learning_rate": 5.090250636568688e-07, "loss": 0.21, "step": 25046 }, { "epoch": 0.8595401509951957, "grad_norm": 0.7867682536833691, "learning_rate": 5.0878079004059e-07, "loss": 0.2678, "step": 25047 }, { "epoch": 0.8595744680851064, "grad_norm": 0.7925066094403511, "learning_rate": 5.085365719080898e-07, "loss": 0.2049, "step": 25048 }, { "epoch": 0.8596087851750172, "grad_norm": 0.8132923327655435, "learning_rate": 5.08292409262382e-07, "loss": 0.2707, "step": 25049 }, { "epoch": 0.8596431022649279, "grad_norm": 0.8461010474833334, "learning_rate": 5.080483021064864e-07, "loss": 0.2166, "step": 25050 }, { "epoch": 0.8596774193548387, "grad_norm": 0.8505964478602401, "learning_rate": 5.078042504434161e-07, "loss": 0.2365, "step": 25051 }, { "epoch": 0.8597117364447495, "grad_norm": 0.7672403145552307, "learning_rate": 5.07560254276187e-07, "loss": 0.2196, "step": 25052 }, { "epoch": 0.8597460535346603, "grad_norm": 0.7765865483284853, "learning_rate": 5.073163136078129e-07, "loss": 0.2566, "step": 25053 }, { "epoch": 0.859780370624571, "grad_norm": 0.736446828240106, "learning_rate": 5.070724284413081e-07, "loss": 0.2558, "step": 25054 }, { "epoch": 0.8598146877144818, "grad_norm": 0.8027832672566949, "learning_rate": 5.068285987796851e-07, "loss": 0.2889, "step": 25055 }, { "epoch": 0.8598490048043926, "grad_norm": 0.8197247209195442, "learning_rate": 5.065848246259563e-07, "loss": 0.3176, "step": 25056 }, { "epoch": 0.8598833218943034, "grad_norm": 0.7506494925291423, "learning_rate": 5.063411059831342e-07, "loss": 0.286, "step": 25057 }, { "epoch": 0.8599176389842141, "grad_norm": 0.8919859049879684, "learning_rate": 5.060974428542264e-07, "loss": 0.2306, "step": 25058 }, { "epoch": 0.8599519560741249, "grad_norm": 0.8082215521973389, "learning_rate": 5.058538352422465e-07, "loss": 0.2446, "step": 25059 }, { "epoch": 0.8599862731640356, "grad_norm": 0.733444653594508, "learning_rate": 5.056102831502041e-07, "loss": 0.2445, "step": 25060 }, { "epoch": 0.8600205902539465, "grad_norm": 0.8778015901714573, "learning_rate": 5.053667865811041e-07, "loss": 0.2407, "step": 25061 }, { "epoch": 0.8600549073438573, "grad_norm": 0.8743238554159927, "learning_rate": 5.051233455379595e-07, "loss": 0.301, "step": 25062 }, { "epoch": 0.860089224433768, "grad_norm": 0.7765454597182292, "learning_rate": 5.048799600237747e-07, "loss": 0.2659, "step": 25063 }, { "epoch": 0.8601235415236788, "grad_norm": 0.8849447343379799, "learning_rate": 5.046366300415567e-07, "loss": 0.3, "step": 25064 }, { "epoch": 0.8601578586135896, "grad_norm": 0.7564730796606638, "learning_rate": 5.043933555943125e-07, "loss": 0.2471, "step": 25065 }, { "epoch": 0.8601921757035004, "grad_norm": 0.8270228136881976, "learning_rate": 5.041501366850465e-07, "loss": 0.273, "step": 25066 }, { "epoch": 0.8602264927934111, "grad_norm": 0.7823590384304704, "learning_rate": 5.039069733167645e-07, "loss": 0.2583, "step": 25067 }, { "epoch": 0.8602608098833219, "grad_norm": 0.8083063347222998, "learning_rate": 5.036638654924697e-07, "loss": 0.212, "step": 25068 }, { "epoch": 0.8602951269732326, "grad_norm": 0.7635415961852903, "learning_rate": 5.034208132151663e-07, "loss": 0.2531, "step": 25069 }, { "epoch": 0.8603294440631435, "grad_norm": 0.8652769871512525, "learning_rate": 5.031778164878548e-07, "loss": 0.2607, "step": 25070 }, { "epoch": 0.8603637611530542, "grad_norm": 0.8669255968038904, "learning_rate": 5.029348753135399e-07, "loss": 0.2494, "step": 25071 }, { "epoch": 0.860398078242965, "grad_norm": 0.8288076187512197, "learning_rate": 5.02691989695221e-07, "loss": 0.2803, "step": 25072 }, { "epoch": 0.8604323953328757, "grad_norm": 0.7892801580523997, "learning_rate": 5.024491596358994e-07, "loss": 0.2638, "step": 25073 }, { "epoch": 0.8604667124227865, "grad_norm": 0.9182588312584954, "learning_rate": 5.022063851385745e-07, "loss": 0.2797, "step": 25074 }, { "epoch": 0.8605010295126974, "grad_norm": 0.8544070902157026, "learning_rate": 5.019636662062455e-07, "loss": 0.3099, "step": 25075 }, { "epoch": 0.8605353466026081, "grad_norm": 0.8415697738455907, "learning_rate": 5.017210028419117e-07, "loss": 0.2686, "step": 25076 }, { "epoch": 0.8605696636925189, "grad_norm": 0.7696973984729064, "learning_rate": 5.014783950485702e-07, "loss": 0.2649, "step": 25077 }, { "epoch": 0.8606039807824296, "grad_norm": 0.7047921236235293, "learning_rate": 5.012358428292197e-07, "loss": 0.2314, "step": 25078 }, { "epoch": 0.8606382978723405, "grad_norm": 0.7545568738492514, "learning_rate": 5.009933461868527e-07, "loss": 0.2596, "step": 25079 }, { "epoch": 0.8606726149622512, "grad_norm": 0.7842721152402862, "learning_rate": 5.007509051244703e-07, "loss": 0.2433, "step": 25080 }, { "epoch": 0.860706932052162, "grad_norm": 0.8528109217024039, "learning_rate": 5.005085196450621e-07, "loss": 0.2511, "step": 25081 }, { "epoch": 0.8607412491420727, "grad_norm": 0.7740154699614977, "learning_rate": 5.00266189751627e-07, "loss": 0.2537, "step": 25082 }, { "epoch": 0.8607755662319835, "grad_norm": 0.8010888068271467, "learning_rate": 5.000239154471576e-07, "loss": 0.2802, "step": 25083 }, { "epoch": 0.8608098833218943, "grad_norm": 0.7561590482604666, "learning_rate": 4.997816967346441e-07, "loss": 0.2427, "step": 25084 }, { "epoch": 0.8608442004118051, "grad_norm": 0.6842604095723762, "learning_rate": 4.995395336170833e-07, "loss": 0.2816, "step": 25085 }, { "epoch": 0.8608785175017158, "grad_norm": 0.7850574892074684, "learning_rate": 4.992974260974631e-07, "loss": 0.2815, "step": 25086 }, { "epoch": 0.8609128345916266, "grad_norm": 0.8507591444959488, "learning_rate": 4.990553741787763e-07, "loss": 0.2592, "step": 25087 }, { "epoch": 0.8609471516815375, "grad_norm": 0.8524054887011705, "learning_rate": 4.988133778640125e-07, "loss": 0.2602, "step": 25088 }, { "epoch": 0.8609814687714482, "grad_norm": 1.019859961723857, "learning_rate": 4.985714371561618e-07, "loss": 0.3158, "step": 25089 }, { "epoch": 0.861015785861359, "grad_norm": 0.696086437872502, "learning_rate": 4.983295520582127e-07, "loss": 0.2292, "step": 25090 }, { "epoch": 0.8610501029512697, "grad_norm": 0.8653457961313675, "learning_rate": 4.980877225731534e-07, "loss": 0.2662, "step": 25091 }, { "epoch": 0.8610844200411805, "grad_norm": 0.770439139155712, "learning_rate": 4.978459487039727e-07, "loss": 0.2491, "step": 25092 }, { "epoch": 0.8611187371310913, "grad_norm": 0.8137445007666216, "learning_rate": 4.976042304536543e-07, "loss": 0.3068, "step": 25093 }, { "epoch": 0.8611530542210021, "grad_norm": 0.8057035551751901, "learning_rate": 4.973625678251881e-07, "loss": 0.2777, "step": 25094 }, { "epoch": 0.8611873713109128, "grad_norm": 0.8617608693659706, "learning_rate": 4.971209608215571e-07, "loss": 0.2626, "step": 25095 }, { "epoch": 0.8612216884008236, "grad_norm": 0.7785310918658271, "learning_rate": 4.968794094457469e-07, "loss": 0.258, "step": 25096 }, { "epoch": 0.8612560054907343, "grad_norm": 0.8254660476544389, "learning_rate": 4.96637913700741e-07, "loss": 0.2626, "step": 25097 }, { "epoch": 0.8612903225806452, "grad_norm": 0.8141951337523652, "learning_rate": 4.963964735895239e-07, "loss": 0.2956, "step": 25098 }, { "epoch": 0.861324639670556, "grad_norm": 0.7315708728905765, "learning_rate": 4.961550891150774e-07, "loss": 0.2401, "step": 25099 }, { "epoch": 0.8613589567604667, "grad_norm": 0.7928156690097388, "learning_rate": 4.959137602803837e-07, "loss": 0.2402, "step": 25100 }, { "epoch": 0.8613932738503774, "grad_norm": 0.7215901548757814, "learning_rate": 4.95672487088425e-07, "loss": 0.2537, "step": 25101 }, { "epoch": 0.8614275909402883, "grad_norm": 0.7672666848563583, "learning_rate": 4.954312695421798e-07, "loss": 0.2363, "step": 25102 }, { "epoch": 0.8614619080301991, "grad_norm": 0.7591195896103434, "learning_rate": 4.951901076446297e-07, "loss": 0.2705, "step": 25103 }, { "epoch": 0.8614962251201098, "grad_norm": 0.8709171178514805, "learning_rate": 4.949490013987552e-07, "loss": 0.2666, "step": 25104 }, { "epoch": 0.8615305422100206, "grad_norm": 0.8332574597434642, "learning_rate": 4.947079508075314e-07, "loss": 0.2899, "step": 25105 }, { "epoch": 0.8615648592999313, "grad_norm": 0.7495523805081172, "learning_rate": 4.944669558739401e-07, "loss": 0.2316, "step": 25106 }, { "epoch": 0.8615991763898422, "grad_norm": 0.808549400976099, "learning_rate": 4.942260166009555e-07, "loss": 0.2381, "step": 25107 }, { "epoch": 0.8616334934797529, "grad_norm": 0.7789166285018518, "learning_rate": 4.939851329915557e-07, "loss": 0.213, "step": 25108 }, { "epoch": 0.8616678105696637, "grad_norm": 0.6728085266300404, "learning_rate": 4.937443050487156e-07, "loss": 0.2217, "step": 25109 }, { "epoch": 0.8617021276595744, "grad_norm": 0.709352379542049, "learning_rate": 4.935035327754111e-07, "loss": 0.2959, "step": 25110 }, { "epoch": 0.8617364447494853, "grad_norm": 0.76256599315833, "learning_rate": 4.932628161746162e-07, "loss": 0.2785, "step": 25111 }, { "epoch": 0.861770761839396, "grad_norm": 0.6928401003453102, "learning_rate": 4.930221552493053e-07, "loss": 0.234, "step": 25112 }, { "epoch": 0.8618050789293068, "grad_norm": 0.7791397530292294, "learning_rate": 4.927815500024507e-07, "loss": 0.2221, "step": 25113 }, { "epoch": 0.8618393960192176, "grad_norm": 0.8110030453126639, "learning_rate": 4.925410004370251e-07, "loss": 0.2527, "step": 25114 }, { "epoch": 0.8618737131091283, "grad_norm": 0.7405098402489713, "learning_rate": 4.923005065560011e-07, "loss": 0.2557, "step": 25115 }, { "epoch": 0.8619080301990392, "grad_norm": 0.7826068277778054, "learning_rate": 4.920600683623473e-07, "loss": 0.2561, "step": 25116 }, { "epoch": 0.8619423472889499, "grad_norm": 0.925588111972897, "learning_rate": 4.918196858590374e-07, "loss": 0.283, "step": 25117 }, { "epoch": 0.8619766643788607, "grad_norm": 0.8093048269650157, "learning_rate": 4.915793590490381e-07, "loss": 0.2927, "step": 25118 }, { "epoch": 0.8620109814687714, "grad_norm": 0.8130697724737664, "learning_rate": 4.913390879353197e-07, "loss": 0.2265, "step": 25119 }, { "epoch": 0.8620452985586822, "grad_norm": 0.7810784861100899, "learning_rate": 4.910988725208499e-07, "loss": 0.2781, "step": 25120 }, { "epoch": 0.862079615648593, "grad_norm": 0.8940974446079922, "learning_rate": 4.908587128085973e-07, "loss": 0.2589, "step": 25121 }, { "epoch": 0.8621139327385038, "grad_norm": 0.7346514258685777, "learning_rate": 4.906186088015274e-07, "loss": 0.2782, "step": 25122 }, { "epoch": 0.8621482498284145, "grad_norm": 0.7055626086035295, "learning_rate": 4.903785605026079e-07, "loss": 0.193, "step": 25123 }, { "epoch": 0.8621825669183253, "grad_norm": 0.8005382371936604, "learning_rate": 4.901385679148041e-07, "loss": 0.2432, "step": 25124 }, { "epoch": 0.8622168840082362, "grad_norm": 0.7124756108307937, "learning_rate": 4.898986310410786e-07, "loss": 0.2524, "step": 25125 }, { "epoch": 0.8622512010981469, "grad_norm": 0.7786384464871825, "learning_rate": 4.89658749884398e-07, "loss": 0.2456, "step": 25126 }, { "epoch": 0.8622855181880577, "grad_norm": 0.8651296161517636, "learning_rate": 4.894189244477265e-07, "loss": 0.3029, "step": 25127 }, { "epoch": 0.8623198352779684, "grad_norm": 0.7764141114750625, "learning_rate": 4.89179154734023e-07, "loss": 0.2386, "step": 25128 }, { "epoch": 0.8623541523678792, "grad_norm": 0.807454106845032, "learning_rate": 4.889394407462538e-07, "loss": 0.2833, "step": 25129 }, { "epoch": 0.86238846945779, "grad_norm": 0.7929686692925388, "learning_rate": 4.886997824873774e-07, "loss": 0.2454, "step": 25130 }, { "epoch": 0.8624227865477008, "grad_norm": 0.7034975878411696, "learning_rate": 4.884601799603555e-07, "loss": 0.2434, "step": 25131 }, { "epoch": 0.8624571036376115, "grad_norm": 0.6968927831211513, "learning_rate": 4.882206331681482e-07, "loss": 0.1873, "step": 25132 }, { "epoch": 0.8624914207275223, "grad_norm": 0.8201562947685157, "learning_rate": 4.879811421137154e-07, "loss": 0.3025, "step": 25133 }, { "epoch": 0.8625257378174331, "grad_norm": 1.1460537013762948, "learning_rate": 4.877417068000146e-07, "loss": 0.2296, "step": 25134 }, { "epoch": 0.8625600549073439, "grad_norm": 0.8365538768656, "learning_rate": 4.875023272300039e-07, "loss": 0.2681, "step": 25135 }, { "epoch": 0.8625943719972546, "grad_norm": 0.704892934447185, "learning_rate": 4.872630034066422e-07, "loss": 0.269, "step": 25136 }, { "epoch": 0.8626286890871654, "grad_norm": 0.6946936319519988, "learning_rate": 4.870237353328828e-07, "loss": 0.2862, "step": 25137 }, { "epoch": 0.8626630061770761, "grad_norm": 0.7901845617595826, "learning_rate": 4.867845230116853e-07, "loss": 0.3097, "step": 25138 }, { "epoch": 0.862697323266987, "grad_norm": 0.7089919410175795, "learning_rate": 4.865453664460024e-07, "loss": 0.2386, "step": 25139 }, { "epoch": 0.8627316403568978, "grad_norm": 0.8069142278611244, "learning_rate": 4.86306265638789e-07, "loss": 0.2688, "step": 25140 }, { "epoch": 0.8627659574468085, "grad_norm": 0.7984181761352335, "learning_rate": 4.860672205929995e-07, "loss": 0.2463, "step": 25141 }, { "epoch": 0.8628002745367193, "grad_norm": 0.7293216485930294, "learning_rate": 4.858282313115858e-07, "loss": 0.2317, "step": 25142 }, { "epoch": 0.86283459162663, "grad_norm": 0.7256515607014817, "learning_rate": 4.85589297797503e-07, "loss": 0.2288, "step": 25143 }, { "epoch": 0.8628689087165409, "grad_norm": 0.7261644808626236, "learning_rate": 4.853504200537002e-07, "loss": 0.2244, "step": 25144 }, { "epoch": 0.8629032258064516, "grad_norm": 0.9388322373982989, "learning_rate": 4.851115980831295e-07, "loss": 0.2615, "step": 25145 }, { "epoch": 0.8629375428963624, "grad_norm": 0.8046949900764583, "learning_rate": 4.848728318887413e-07, "loss": 0.2473, "step": 25146 }, { "epoch": 0.8629718599862731, "grad_norm": 0.8380658371667407, "learning_rate": 4.846341214734857e-07, "loss": 0.2955, "step": 25147 }, { "epoch": 0.863006177076184, "grad_norm": 0.7685078154254955, "learning_rate": 4.843954668403095e-07, "loss": 0.2692, "step": 25148 }, { "epoch": 0.8630404941660947, "grad_norm": 0.7569415953350259, "learning_rate": 4.841568679921632e-07, "loss": 0.306, "step": 25149 }, { "epoch": 0.8630748112560055, "grad_norm": 0.8062916021210053, "learning_rate": 4.839183249319957e-07, "loss": 0.2203, "step": 25150 }, { "epoch": 0.8631091283459162, "grad_norm": 0.6837231251799029, "learning_rate": 4.836798376627494e-07, "loss": 0.2544, "step": 25151 }, { "epoch": 0.863143445435827, "grad_norm": 0.7646262141741602, "learning_rate": 4.834414061873749e-07, "loss": 0.2452, "step": 25152 }, { "epoch": 0.8631777625257379, "grad_norm": 0.8189901939954689, "learning_rate": 4.832030305088153e-07, "loss": 0.2394, "step": 25153 }, { "epoch": 0.8632120796156486, "grad_norm": 0.853873919600202, "learning_rate": 4.829647106300167e-07, "loss": 0.3329, "step": 25154 }, { "epoch": 0.8632463967055594, "grad_norm": 0.7848954435115013, "learning_rate": 4.827264465539222e-07, "loss": 0.2617, "step": 25155 }, { "epoch": 0.8632807137954701, "grad_norm": 0.809266621763979, "learning_rate": 4.824882382834761e-07, "loss": 0.2652, "step": 25156 }, { "epoch": 0.863315030885381, "grad_norm": 0.8099496805858835, "learning_rate": 4.822500858216206e-07, "loss": 0.2736, "step": 25157 }, { "epoch": 0.8633493479752917, "grad_norm": 0.8574101189966029, "learning_rate": 4.820119891712982e-07, "loss": 0.2153, "step": 25158 }, { "epoch": 0.8633836650652025, "grad_norm": 0.7400066049591886, "learning_rate": 4.817739483354517e-07, "loss": 0.2429, "step": 25159 }, { "epoch": 0.8634179821551132, "grad_norm": 0.8328658411281272, "learning_rate": 4.815359633170175e-07, "loss": 0.3134, "step": 25160 }, { "epoch": 0.863452299245024, "grad_norm": 0.7667508584549843, "learning_rate": 4.81298034118941e-07, "loss": 0.2979, "step": 25161 }, { "epoch": 0.8634866163349348, "grad_norm": 0.7513986943953358, "learning_rate": 4.810601607441579e-07, "loss": 0.2628, "step": 25162 }, { "epoch": 0.8635209334248456, "grad_norm": 0.8638239493737991, "learning_rate": 4.808223431956077e-07, "loss": 0.2556, "step": 25163 }, { "epoch": 0.8635552505147563, "grad_norm": 0.8865908792568, "learning_rate": 4.805845814762289e-07, "loss": 0.2913, "step": 25164 }, { "epoch": 0.8635895676046671, "grad_norm": 0.9214695899688207, "learning_rate": 4.803468755889584e-07, "loss": 0.2447, "step": 25165 }, { "epoch": 0.8636238846945778, "grad_norm": 0.7834470435066597, "learning_rate": 4.801092255367329e-07, "loss": 0.2507, "step": 25166 }, { "epoch": 0.8636582017844887, "grad_norm": 0.7658401199370324, "learning_rate": 4.798716313224883e-07, "loss": 0.2696, "step": 25167 }, { "epoch": 0.8636925188743995, "grad_norm": 0.681244171445644, "learning_rate": 4.796340929491605e-07, "loss": 0.2006, "step": 25168 }, { "epoch": 0.8637268359643102, "grad_norm": 0.7839818769363107, "learning_rate": 4.793966104196812e-07, "loss": 0.3112, "step": 25169 }, { "epoch": 0.863761153054221, "grad_norm": 0.7984845707477088, "learning_rate": 4.791591837369874e-07, "loss": 0.2997, "step": 25170 }, { "epoch": 0.8637954701441318, "grad_norm": 0.7909852909076133, "learning_rate": 4.789218129040124e-07, "loss": 0.2709, "step": 25171 }, { "epoch": 0.8638297872340426, "grad_norm": 0.8094489902420683, "learning_rate": 4.786844979236848e-07, "loss": 0.2446, "step": 25172 }, { "epoch": 0.8638641043239533, "grad_norm": 0.8339794807370033, "learning_rate": 4.784472387989414e-07, "loss": 0.2443, "step": 25173 }, { "epoch": 0.8638984214138641, "grad_norm": 0.7378294262266086, "learning_rate": 4.782100355327085e-07, "loss": 0.2377, "step": 25174 }, { "epoch": 0.8639327385037748, "grad_norm": 0.761542055845805, "learning_rate": 4.779728881279205e-07, "loss": 0.23, "step": 25175 }, { "epoch": 0.8639670555936857, "grad_norm": 0.8210606885895856, "learning_rate": 4.777357965875045e-07, "loss": 0.3046, "step": 25176 }, { "epoch": 0.8640013726835964, "grad_norm": 0.7294770507200558, "learning_rate": 4.774987609143905e-07, "loss": 0.2367, "step": 25177 }, { "epoch": 0.8640356897735072, "grad_norm": 0.7775340452080935, "learning_rate": 4.77261781111506e-07, "loss": 0.2581, "step": 25178 }, { "epoch": 0.864070006863418, "grad_norm": 0.9038115496414605, "learning_rate": 4.770248571817798e-07, "loss": 0.2653, "step": 25179 }, { "epoch": 0.8641043239533288, "grad_norm": 0.7968876337582856, "learning_rate": 4.7678798912813796e-07, "loss": 0.2264, "step": 25180 }, { "epoch": 0.8641386410432396, "grad_norm": 0.6017409184341375, "learning_rate": 4.7655117695350694e-07, "loss": 0.2052, "step": 25181 }, { "epoch": 0.8641729581331503, "grad_norm": 0.7559320730282607, "learning_rate": 4.7631442066081334e-07, "loss": 0.2989, "step": 25182 }, { "epoch": 0.8642072752230611, "grad_norm": 0.8060806382265995, "learning_rate": 4.760777202529787e-07, "loss": 0.278, "step": 25183 }, { "epoch": 0.8642415923129718, "grad_norm": 0.8234888482383771, "learning_rate": 4.758410757329318e-07, "loss": 0.2584, "step": 25184 }, { "epoch": 0.8642759094028827, "grad_norm": 1.037250328221271, "learning_rate": 4.756044871035925e-07, "loss": 0.2444, "step": 25185 }, { "epoch": 0.8643102264927934, "grad_norm": 0.6858350957548847, "learning_rate": 4.753679543678852e-07, "loss": 0.2228, "step": 25186 }, { "epoch": 0.8643445435827042, "grad_norm": 0.7851491097209311, "learning_rate": 4.7513147752873136e-07, "loss": 0.2759, "step": 25187 }, { "epoch": 0.8643788606726149, "grad_norm": 0.7650998696111353, "learning_rate": 4.7489505658905256e-07, "loss": 0.2856, "step": 25188 }, { "epoch": 0.8644131777625257, "grad_norm": 0.8735834306648866, "learning_rate": 4.746586915517698e-07, "loss": 0.237, "step": 25189 }, { "epoch": 0.8644474948524365, "grad_norm": 0.8403368815231507, "learning_rate": 4.7442238241980297e-07, "loss": 0.2865, "step": 25190 }, { "epoch": 0.8644818119423473, "grad_norm": 0.8299793795288624, "learning_rate": 4.74186129196072e-07, "loss": 0.2769, "step": 25191 }, { "epoch": 0.864516129032258, "grad_norm": 0.6563393743862421, "learning_rate": 4.739499318834928e-07, "loss": 0.2499, "step": 25192 }, { "epoch": 0.8645504461221688, "grad_norm": 0.7226200260333355, "learning_rate": 4.7371379048498646e-07, "loss": 0.2058, "step": 25193 }, { "epoch": 0.8645847632120797, "grad_norm": 0.8808230044205453, "learning_rate": 4.7347770500347e-07, "loss": 0.3098, "step": 25194 }, { "epoch": 0.8646190803019904, "grad_norm": 0.8391583197331937, "learning_rate": 4.732416754418573e-07, "loss": 0.3049, "step": 25195 }, { "epoch": 0.8646533973919012, "grad_norm": 0.7575023584239817, "learning_rate": 4.730057018030676e-07, "loss": 0.2382, "step": 25196 }, { "epoch": 0.8646877144818119, "grad_norm": 0.7457301776557517, "learning_rate": 4.7276978409001364e-07, "loss": 0.2679, "step": 25197 }, { "epoch": 0.8647220315717227, "grad_norm": 0.777836938356672, "learning_rate": 4.725339223056108e-07, "loss": 0.2904, "step": 25198 }, { "epoch": 0.8647563486616335, "grad_norm": 0.7223121175469287, "learning_rate": 4.722981164527729e-07, "loss": 0.2478, "step": 25199 }, { "epoch": 0.8647906657515443, "grad_norm": 0.7229161119269483, "learning_rate": 4.7206236653441316e-07, "loss": 0.2419, "step": 25200 }, { "epoch": 0.864824982841455, "grad_norm": 0.7979905310543921, "learning_rate": 4.718266725534437e-07, "loss": 0.267, "step": 25201 }, { "epoch": 0.8648592999313658, "grad_norm": 0.8266735515926755, "learning_rate": 4.715910345127761e-07, "loss": 0.2747, "step": 25202 }, { "epoch": 0.8648936170212767, "grad_norm": 0.7261935736019997, "learning_rate": 4.71355452415323e-07, "loss": 0.2233, "step": 25203 }, { "epoch": 0.8649279341111874, "grad_norm": 0.6865302752545807, "learning_rate": 4.711199262639915e-07, "loss": 0.2539, "step": 25204 }, { "epoch": 0.8649622512010982, "grad_norm": 0.8277890640690493, "learning_rate": 4.7088445606169487e-07, "loss": 0.2515, "step": 25205 }, { "epoch": 0.8649965682910089, "grad_norm": 0.7459293031441896, "learning_rate": 4.70649041811338e-07, "loss": 0.2282, "step": 25206 }, { "epoch": 0.8650308853809197, "grad_norm": 0.8120311897287424, "learning_rate": 4.7041368351583407e-07, "loss": 0.2561, "step": 25207 }, { "epoch": 0.8650652024708305, "grad_norm": 0.7801628466694117, "learning_rate": 4.7017838117808687e-07, "loss": 0.2183, "step": 25208 }, { "epoch": 0.8650995195607413, "grad_norm": 0.8235942943453597, "learning_rate": 4.699431348010036e-07, "loss": 0.2768, "step": 25209 }, { "epoch": 0.865133836650652, "grad_norm": 0.7424559120261686, "learning_rate": 4.697079443874936e-07, "loss": 0.2114, "step": 25210 }, { "epoch": 0.8651681537405628, "grad_norm": 0.7758074422989475, "learning_rate": 4.694728099404583e-07, "loss": 0.2926, "step": 25211 }, { "epoch": 0.8652024708304735, "grad_norm": 0.8355976744360825, "learning_rate": 4.6923773146280495e-07, "loss": 0.2843, "step": 25212 }, { "epoch": 0.8652367879203844, "grad_norm": 0.8131405302186653, "learning_rate": 4.690027089574373e-07, "loss": 0.2813, "step": 25213 }, { "epoch": 0.8652711050102951, "grad_norm": 0.7050799080514204, "learning_rate": 4.6876774242725856e-07, "loss": 0.2292, "step": 25214 }, { "epoch": 0.8653054221002059, "grad_norm": 0.8366803739020958, "learning_rate": 4.685328318751708e-07, "loss": 0.2596, "step": 25215 }, { "epoch": 0.8653397391901166, "grad_norm": 0.7061633412793794, "learning_rate": 4.6829797730407747e-07, "loss": 0.2531, "step": 25216 }, { "epoch": 0.8653740562800275, "grad_norm": 0.758457439745976, "learning_rate": 4.6806317871687936e-07, "loss": 0.2929, "step": 25217 }, { "epoch": 0.8654083733699383, "grad_norm": 0.8416143151107512, "learning_rate": 4.678284361164753e-07, "loss": 0.3138, "step": 25218 }, { "epoch": 0.865442690459849, "grad_norm": 0.7316517446224743, "learning_rate": 4.675937495057692e-07, "loss": 0.2363, "step": 25219 }, { "epoch": 0.8654770075497598, "grad_norm": 0.757437399617025, "learning_rate": 4.6735911888765697e-07, "loss": 0.255, "step": 25220 }, { "epoch": 0.8655113246396705, "grad_norm": 0.7949079213584463, "learning_rate": 4.6712454426503797e-07, "loss": 0.2577, "step": 25221 }, { "epoch": 0.8655456417295814, "grad_norm": 0.8230573235419628, "learning_rate": 4.66890025640811e-07, "loss": 0.2768, "step": 25222 }, { "epoch": 0.8655799588194921, "grad_norm": 0.71750089411134, "learning_rate": 4.6665556301787205e-07, "loss": 0.282, "step": 25223 }, { "epoch": 0.8656142759094029, "grad_norm": 0.658635339667239, "learning_rate": 4.6642115639911835e-07, "loss": 0.2335, "step": 25224 }, { "epoch": 0.8656485929993136, "grad_norm": 0.7327407577077915, "learning_rate": 4.661868057874458e-07, "loss": 0.2706, "step": 25225 }, { "epoch": 0.8656829100892245, "grad_norm": 0.8108505144671764, "learning_rate": 4.659525111857505e-07, "loss": 0.2813, "step": 25226 }, { "epoch": 0.8657172271791352, "grad_norm": 0.7392018827908392, "learning_rate": 4.657182725969234e-07, "loss": 0.2456, "step": 25227 }, { "epoch": 0.865751544269046, "grad_norm": 0.7704129765824146, "learning_rate": 4.6548409002386274e-07, "loss": 0.2954, "step": 25228 }, { "epoch": 0.8657858613589567, "grad_norm": 0.7700860954644441, "learning_rate": 4.652499634694585e-07, "loss": 0.2505, "step": 25229 }, { "epoch": 0.8658201784488675, "grad_norm": 0.8721291859887301, "learning_rate": 4.6501589293660387e-07, "loss": 0.274, "step": 25230 }, { "epoch": 0.8658544955387784, "grad_norm": 0.841674802868593, "learning_rate": 4.6478187842819045e-07, "loss": 0.2907, "step": 25231 }, { "epoch": 0.8658888126286891, "grad_norm": 0.8704755524083436, "learning_rate": 4.645479199471098e-07, "loss": 0.2865, "step": 25232 }, { "epoch": 0.8659231297185999, "grad_norm": 0.7521924767186243, "learning_rate": 4.6431401749625127e-07, "loss": 0.2063, "step": 25233 }, { "epoch": 0.8659574468085106, "grad_norm": 0.8992811078051071, "learning_rate": 4.6408017107850534e-07, "loss": 0.293, "step": 25234 }, { "epoch": 0.8659917638984214, "grad_norm": 0.8052470172016749, "learning_rate": 4.6384638069676137e-07, "loss": 0.3101, "step": 25235 }, { "epoch": 0.8660260809883322, "grad_norm": 0.8859663761482657, "learning_rate": 4.636126463539048e-07, "loss": 0.2191, "step": 25236 }, { "epoch": 0.866060398078243, "grad_norm": 0.7317852660915413, "learning_rate": 4.6337896805282555e-07, "loss": 0.217, "step": 25237 }, { "epoch": 0.8660947151681537, "grad_norm": 0.9174813200670315, "learning_rate": 4.631453457964108e-07, "loss": 0.3034, "step": 25238 }, { "epoch": 0.8661290322580645, "grad_norm": 0.7636768144977868, "learning_rate": 4.629117795875454e-07, "loss": 0.3422, "step": 25239 }, { "epoch": 0.8661633493479753, "grad_norm": 0.7829073861803826, "learning_rate": 4.62678269429116e-07, "loss": 0.2692, "step": 25240 }, { "epoch": 0.8661976664378861, "grad_norm": 0.7476747094803775, "learning_rate": 4.624448153240052e-07, "loss": 0.2306, "step": 25241 }, { "epoch": 0.8662319835277968, "grad_norm": 0.8758740674052982, "learning_rate": 4.6221141727509964e-07, "loss": 0.3121, "step": 25242 }, { "epoch": 0.8662663006177076, "grad_norm": 0.8071944320872617, "learning_rate": 4.619780752852809e-07, "loss": 0.2309, "step": 25243 }, { "epoch": 0.8663006177076183, "grad_norm": 0.8349802492478114, "learning_rate": 4.617447893574323e-07, "loss": 0.2618, "step": 25244 }, { "epoch": 0.8663349347975292, "grad_norm": 1.0397287557149162, "learning_rate": 4.6151155949443584e-07, "loss": 0.2486, "step": 25245 }, { "epoch": 0.86636925188744, "grad_norm": 0.7847918451193734, "learning_rate": 4.6127838569917263e-07, "loss": 0.2112, "step": 25246 }, { "epoch": 0.8664035689773507, "grad_norm": 0.7741544706390561, "learning_rate": 4.610452679745231e-07, "loss": 0.2802, "step": 25247 }, { "epoch": 0.8664378860672615, "grad_norm": 0.7616007031935913, "learning_rate": 4.6081220632336776e-07, "loss": 0.2488, "step": 25248 }, { "epoch": 0.8664722031571723, "grad_norm": 0.6702085760092331, "learning_rate": 4.60579200748586e-07, "loss": 0.2398, "step": 25249 }, { "epoch": 0.8665065202470831, "grad_norm": 0.6588426448274897, "learning_rate": 4.6034625125305376e-07, "loss": 0.2701, "step": 25250 }, { "epoch": 0.8665408373369938, "grad_norm": 0.7568096593120041, "learning_rate": 4.601133578396533e-07, "loss": 0.2198, "step": 25251 }, { "epoch": 0.8665751544269046, "grad_norm": 0.8380257914786059, "learning_rate": 4.5988052051125775e-07, "loss": 0.2817, "step": 25252 }, { "epoch": 0.8666094715168153, "grad_norm": 0.6980812223056077, "learning_rate": 4.596477392707449e-07, "loss": 0.2704, "step": 25253 }, { "epoch": 0.8666437886067262, "grad_norm": 0.7639290215898126, "learning_rate": 4.5941501412099187e-07, "loss": 0.2508, "step": 25254 }, { "epoch": 0.866678105696637, "grad_norm": 0.7788851788224773, "learning_rate": 4.591823450648719e-07, "loss": 0.2681, "step": 25255 }, { "epoch": 0.8667124227865477, "grad_norm": 0.7847366967909366, "learning_rate": 4.5894973210525993e-07, "loss": 0.2482, "step": 25256 }, { "epoch": 0.8667467398764584, "grad_norm": 0.8608166539992625, "learning_rate": 4.587171752450298e-07, "loss": 0.2264, "step": 25257 }, { "epoch": 0.8667810569663692, "grad_norm": 0.7152894767594582, "learning_rate": 4.5848467448705525e-07, "loss": 0.2584, "step": 25258 }, { "epoch": 0.8668153740562801, "grad_norm": 0.7739263315325104, "learning_rate": 4.582522298342057e-07, "loss": 0.2475, "step": 25259 }, { "epoch": 0.8668496911461908, "grad_norm": 0.7919412795355752, "learning_rate": 4.580198412893555e-07, "loss": 0.2725, "step": 25260 }, { "epoch": 0.8668840082361016, "grad_norm": 0.7887954962526378, "learning_rate": 4.5778750885537573e-07, "loss": 0.2635, "step": 25261 }, { "epoch": 0.8669183253260123, "grad_norm": 0.8529099289554054, "learning_rate": 4.57555232535134e-07, "loss": 0.2699, "step": 25262 }, { "epoch": 0.8669526424159232, "grad_norm": 0.7317846229734869, "learning_rate": 4.5732301233150253e-07, "loss": 0.3187, "step": 25263 }, { "epoch": 0.8669869595058339, "grad_norm": 0.7297882807270339, "learning_rate": 4.5709084824734906e-07, "loss": 0.2913, "step": 25264 }, { "epoch": 0.8670212765957447, "grad_norm": 0.7634154754907535, "learning_rate": 4.568587402855407e-07, "loss": 0.2044, "step": 25265 }, { "epoch": 0.8670555936856554, "grad_norm": 0.7428757136054353, "learning_rate": 4.5662668844894677e-07, "loss": 0.2726, "step": 25266 }, { "epoch": 0.8670899107755662, "grad_norm": 0.8110069582484968, "learning_rate": 4.5639469274043226e-07, "loss": 0.2294, "step": 25267 }, { "epoch": 0.867124227865477, "grad_norm": 0.8410525626004945, "learning_rate": 4.561627531628643e-07, "loss": 0.228, "step": 25268 }, { "epoch": 0.8671585449553878, "grad_norm": 0.6373280135455908, "learning_rate": 4.5593086971910785e-07, "loss": 0.202, "step": 25269 }, { "epoch": 0.8671928620452986, "grad_norm": 0.7734241394868239, "learning_rate": 4.556990424120278e-07, "loss": 0.2889, "step": 25270 }, { "epoch": 0.8672271791352093, "grad_norm": 0.7980469726762716, "learning_rate": 4.554672712444874e-07, "loss": 0.2503, "step": 25271 }, { "epoch": 0.86726149622512, "grad_norm": 0.7542746657818129, "learning_rate": 4.552355562193522e-07, "loss": 0.2472, "step": 25272 }, { "epoch": 0.8672958133150309, "grad_norm": 0.7638664891759082, "learning_rate": 4.5500389733948047e-07, "loss": 0.2616, "step": 25273 }, { "epoch": 0.8673301304049417, "grad_norm": 0.854193135165948, "learning_rate": 4.5477229460773873e-07, "loss": 0.3591, "step": 25274 }, { "epoch": 0.8673644474948524, "grad_norm": 0.8344169644530393, "learning_rate": 4.545407480269853e-07, "loss": 0.254, "step": 25275 }, { "epoch": 0.8673987645847632, "grad_norm": 0.7455157012553436, "learning_rate": 4.543092576000807e-07, "loss": 0.2652, "step": 25276 }, { "epoch": 0.867433081674674, "grad_norm": 0.7179806246240338, "learning_rate": 4.5407782332988703e-07, "loss": 0.2527, "step": 25277 }, { "epoch": 0.8674673987645848, "grad_norm": 0.7839394741179931, "learning_rate": 4.538464452192609e-07, "loss": 0.3657, "step": 25278 }, { "epoch": 0.8675017158544955, "grad_norm": 0.8349618724272573, "learning_rate": 4.536151232710612e-07, "loss": 0.2677, "step": 25279 }, { "epoch": 0.8675360329444063, "grad_norm": 0.7588674889232593, "learning_rate": 4.5338385748814664e-07, "loss": 0.266, "step": 25280 }, { "epoch": 0.867570350034317, "grad_norm": 0.843907807183075, "learning_rate": 4.531526478733739e-07, "loss": 0.2743, "step": 25281 }, { "epoch": 0.8676046671242279, "grad_norm": 0.7897153260544323, "learning_rate": 4.529214944295984e-07, "loss": 0.2899, "step": 25282 }, { "epoch": 0.8676389842141387, "grad_norm": 0.8194620602941157, "learning_rate": 4.526903971596769e-07, "loss": 0.2516, "step": 25283 }, { "epoch": 0.8676733013040494, "grad_norm": 0.768921250952977, "learning_rate": 4.524593560664647e-07, "loss": 0.221, "step": 25284 }, { "epoch": 0.8677076183939602, "grad_norm": 0.8419773296513728, "learning_rate": 4.5222837115281357e-07, "loss": 0.2421, "step": 25285 }, { "epoch": 0.867741935483871, "grad_norm": 0.8114412871454257, "learning_rate": 4.5199744242158006e-07, "loss": 0.3032, "step": 25286 }, { "epoch": 0.8677762525737818, "grad_norm": 0.803000632263769, "learning_rate": 4.517665698756152e-07, "loss": 0.3339, "step": 25287 }, { "epoch": 0.8678105696636925, "grad_norm": 0.9215405189806536, "learning_rate": 4.515357535177717e-07, "loss": 0.3435, "step": 25288 }, { "epoch": 0.8678448867536033, "grad_norm": 0.7429813261624355, "learning_rate": 4.5130499335090063e-07, "loss": 0.3123, "step": 25289 }, { "epoch": 0.867879203843514, "grad_norm": 0.8911673640579392, "learning_rate": 4.510742893778536e-07, "loss": 0.2377, "step": 25290 }, { "epoch": 0.8679135209334249, "grad_norm": 0.7727258982206524, "learning_rate": 4.508436416014794e-07, "loss": 0.2486, "step": 25291 }, { "epoch": 0.8679478380233356, "grad_norm": 0.7338733010161367, "learning_rate": 4.5061305002462916e-07, "loss": 0.2912, "step": 25292 }, { "epoch": 0.8679821551132464, "grad_norm": 0.7359891434532346, "learning_rate": 4.5038251465015057e-07, "loss": 0.2593, "step": 25293 }, { "epoch": 0.8680164722031571, "grad_norm": 0.7590178681589779, "learning_rate": 4.501520354808908e-07, "loss": 0.2443, "step": 25294 }, { "epoch": 0.8680507892930679, "grad_norm": 1.548344353886828, "learning_rate": 4.4992161251969925e-07, "loss": 0.2527, "step": 25295 }, { "epoch": 0.8680851063829788, "grad_norm": 0.7594658797521541, "learning_rate": 4.4969124576942023e-07, "loss": 0.2819, "step": 25296 }, { "epoch": 0.8681194234728895, "grad_norm": 0.7933023450816853, "learning_rate": 4.4946093523290103e-07, "loss": 0.2921, "step": 25297 }, { "epoch": 0.8681537405628003, "grad_norm": 0.7587563122393829, "learning_rate": 4.4923068091298705e-07, "loss": 0.2703, "step": 25298 }, { "epoch": 0.868188057652711, "grad_norm": 0.9040828754407422, "learning_rate": 4.490004828125205e-07, "loss": 0.2818, "step": 25299 }, { "epoch": 0.8682223747426219, "grad_norm": 0.842832129235471, "learning_rate": 4.487703409343497e-07, "loss": 0.2442, "step": 25300 }, { "epoch": 0.8682566918325326, "grad_norm": 0.7665537756668211, "learning_rate": 4.485402552813134e-07, "loss": 0.274, "step": 25301 }, { "epoch": 0.8682910089224434, "grad_norm": 0.7717174670791902, "learning_rate": 4.4831022585625607e-07, "loss": 0.2631, "step": 25302 }, { "epoch": 0.8683253260123541, "grad_norm": 0.7906019190787372, "learning_rate": 4.480802526620193e-07, "loss": 0.2282, "step": 25303 }, { "epoch": 0.8683596431022649, "grad_norm": 0.7213679599451587, "learning_rate": 4.478503357014441e-07, "loss": 0.2392, "step": 25304 }, { "epoch": 0.8683939601921757, "grad_norm": 0.7367437109518153, "learning_rate": 4.4762047497737047e-07, "loss": 0.2724, "step": 25305 }, { "epoch": 0.8684282772820865, "grad_norm": 0.8365990455177686, "learning_rate": 4.473906704926384e-07, "loss": 0.2683, "step": 25306 }, { "epoch": 0.8684625943719972, "grad_norm": 0.7829863381873229, "learning_rate": 4.471609222500878e-07, "loss": 0.291, "step": 25307 }, { "epoch": 0.868496911461908, "grad_norm": 0.7959951734438426, "learning_rate": 4.469312302525536e-07, "loss": 0.2619, "step": 25308 }, { "epoch": 0.8685312285518189, "grad_norm": 0.7209412517610216, "learning_rate": 4.4670159450287796e-07, "loss": 0.1943, "step": 25309 }, { "epoch": 0.8685655456417296, "grad_norm": 0.7657936954613009, "learning_rate": 4.4647201500389424e-07, "loss": 0.2182, "step": 25310 }, { "epoch": 0.8685998627316404, "grad_norm": 0.7352430809636772, "learning_rate": 4.462424917584401e-07, "loss": 0.2965, "step": 25311 }, { "epoch": 0.8686341798215511, "grad_norm": 0.7492886789146617, "learning_rate": 4.4601302476935115e-07, "loss": 0.2254, "step": 25312 }, { "epoch": 0.8686684969114619, "grad_norm": 0.7486014392722388, "learning_rate": 4.4578361403946114e-07, "loss": 0.2648, "step": 25313 }, { "epoch": 0.8687028140013727, "grad_norm": 0.8230348766708666, "learning_rate": 4.455542595716056e-07, "loss": 0.2445, "step": 25314 }, { "epoch": 0.8687371310912835, "grad_norm": 0.7911438859490587, "learning_rate": 4.453249613686167e-07, "loss": 0.2146, "step": 25315 }, { "epoch": 0.8687714481811942, "grad_norm": 0.8260820568092669, "learning_rate": 4.4509571943332884e-07, "loss": 0.2137, "step": 25316 }, { "epoch": 0.868805765271105, "grad_norm": 0.8746388232144272, "learning_rate": 4.448665337685709e-07, "loss": 0.2885, "step": 25317 }, { "epoch": 0.8688400823610157, "grad_norm": 0.8705085665035409, "learning_rate": 4.4463740437717784e-07, "loss": 0.3159, "step": 25318 }, { "epoch": 0.8688743994509266, "grad_norm": 0.8036661200212116, "learning_rate": 4.444083312619779e-07, "loss": 0.3103, "step": 25319 }, { "epoch": 0.8689087165408373, "grad_norm": 0.7180892950931076, "learning_rate": 4.4417931442580055e-07, "loss": 0.2067, "step": 25320 }, { "epoch": 0.8689430336307481, "grad_norm": 0.8194450328060114, "learning_rate": 4.4395035387147844e-07, "loss": 0.3363, "step": 25321 }, { "epoch": 0.8689773507206588, "grad_norm": 0.763636809989298, "learning_rate": 4.437214496018366e-07, "loss": 0.3137, "step": 25322 }, { "epoch": 0.8690116678105697, "grad_norm": 0.9988158862677031, "learning_rate": 4.434926016197044e-07, "loss": 0.2485, "step": 25323 }, { "epoch": 0.8690459849004805, "grad_norm": 0.7823644051815032, "learning_rate": 4.432638099279085e-07, "loss": 0.2335, "step": 25324 }, { "epoch": 0.8690803019903912, "grad_norm": 0.8864368025433288, "learning_rate": 4.4303507452927605e-07, "loss": 0.2813, "step": 25325 }, { "epoch": 0.869114619080302, "grad_norm": 0.7894642425660345, "learning_rate": 4.4280639542663205e-07, "loss": 0.2354, "step": 25326 }, { "epoch": 0.8691489361702127, "grad_norm": 0.783636862554604, "learning_rate": 4.4257777262280146e-07, "loss": 0.2668, "step": 25327 }, { "epoch": 0.8691832532601236, "grad_norm": 0.8006562283929956, "learning_rate": 4.4234920612061084e-07, "loss": 0.3243, "step": 25328 }, { "epoch": 0.8692175703500343, "grad_norm": 0.7392902402565289, "learning_rate": 4.4212069592287964e-07, "loss": 0.2421, "step": 25329 }, { "epoch": 0.8692518874399451, "grad_norm": 0.7256101709660124, "learning_rate": 4.418922420324351e-07, "loss": 0.247, "step": 25330 }, { "epoch": 0.8692862045298558, "grad_norm": 0.8331777245623162, "learning_rate": 4.4166384445209597e-07, "loss": 0.3025, "step": 25331 }, { "epoch": 0.8693205216197667, "grad_norm": 0.8151654109732862, "learning_rate": 4.414355031846873e-07, "loss": 0.2222, "step": 25332 }, { "epoch": 0.8693548387096774, "grad_norm": 0.8960031579696772, "learning_rate": 4.412072182330274e-07, "loss": 0.2344, "step": 25333 }, { "epoch": 0.8693891557995882, "grad_norm": 0.7077128609455371, "learning_rate": 4.4097898959993724e-07, "loss": 0.2179, "step": 25334 }, { "epoch": 0.869423472889499, "grad_norm": 0.7530192874505482, "learning_rate": 4.4075081728823634e-07, "loss": 0.2139, "step": 25335 }, { "epoch": 0.8694577899794097, "grad_norm": 0.7485404801479795, "learning_rate": 4.405227013007435e-07, "loss": 0.2801, "step": 25336 }, { "epoch": 0.8694921070693206, "grad_norm": 0.8440415682004335, "learning_rate": 4.40294641640277e-07, "loss": 0.3163, "step": 25337 }, { "epoch": 0.8695264241592313, "grad_norm": 0.9305852670552182, "learning_rate": 4.4006663830965415e-07, "loss": 0.3069, "step": 25338 }, { "epoch": 0.8695607412491421, "grad_norm": 0.7544001807680427, "learning_rate": 4.39838691311692e-07, "loss": 0.2368, "step": 25339 }, { "epoch": 0.8695950583390528, "grad_norm": 0.7800757310371655, "learning_rate": 4.39610800649205e-07, "loss": 0.2393, "step": 25340 }, { "epoch": 0.8696293754289636, "grad_norm": 0.7303925735966477, "learning_rate": 4.39382966325011e-07, "loss": 0.2743, "step": 25341 }, { "epoch": 0.8696636925188744, "grad_norm": 0.7085130175061305, "learning_rate": 4.391551883419226e-07, "loss": 0.2416, "step": 25342 }, { "epoch": 0.8696980096087852, "grad_norm": 0.777633248381298, "learning_rate": 4.3892746670275374e-07, "loss": 0.286, "step": 25343 }, { "epoch": 0.8697323266986959, "grad_norm": 0.7500495231474245, "learning_rate": 4.3869980141031996e-07, "loss": 0.2569, "step": 25344 }, { "epoch": 0.8697666437886067, "grad_norm": 0.7447409386395661, "learning_rate": 4.384721924674312e-07, "loss": 0.2406, "step": 25345 }, { "epoch": 0.8698009608785175, "grad_norm": 0.846011683881222, "learning_rate": 4.3824463987690026e-07, "loss": 0.2982, "step": 25346 }, { "epoch": 0.8698352779684283, "grad_norm": 0.8278970751673257, "learning_rate": 4.380171436415381e-07, "loss": 0.258, "step": 25347 }, { "epoch": 0.869869595058339, "grad_norm": 0.703437664727579, "learning_rate": 4.3778970376415595e-07, "loss": 0.2404, "step": 25348 }, { "epoch": 0.8699039121482498, "grad_norm": 0.7987451738055202, "learning_rate": 4.3756232024756264e-07, "loss": 0.2939, "step": 25349 }, { "epoch": 0.8699382292381606, "grad_norm": 1.0055995051088191, "learning_rate": 4.373349930945675e-07, "loss": 0.2509, "step": 25350 }, { "epoch": 0.8699725463280714, "grad_norm": 0.7792056606477312, "learning_rate": 4.3710772230798004e-07, "loss": 0.3235, "step": 25351 }, { "epoch": 0.8700068634179822, "grad_norm": 0.7456972045076304, "learning_rate": 4.3688050789060466e-07, "loss": 0.2384, "step": 25352 }, { "epoch": 0.8700411805078929, "grad_norm": 0.6592375955273432, "learning_rate": 4.3665334984525297e-07, "loss": 0.2058, "step": 25353 }, { "epoch": 0.8700754975978037, "grad_norm": 0.7444544811437048, "learning_rate": 4.364262481747272e-07, "loss": 0.2525, "step": 25354 }, { "epoch": 0.8701098146877145, "grad_norm": 0.6844536374729208, "learning_rate": 4.3619920288183517e-07, "loss": 0.2885, "step": 25355 }, { "epoch": 0.8701441317776253, "grad_norm": 0.7676383375852825, "learning_rate": 4.359722139693806e-07, "loss": 0.2446, "step": 25356 }, { "epoch": 0.870178448867536, "grad_norm": 0.7551736701768674, "learning_rate": 4.357452814401686e-07, "loss": 0.2802, "step": 25357 }, { "epoch": 0.8702127659574468, "grad_norm": 0.8458590179064887, "learning_rate": 4.3551840529700184e-07, "loss": 0.3287, "step": 25358 }, { "epoch": 0.8702470830473575, "grad_norm": 0.7881036864630094, "learning_rate": 4.352915855426837e-07, "loss": 0.2957, "step": 25359 }, { "epoch": 0.8702814001372684, "grad_norm": 0.8831552223831586, "learning_rate": 4.350648221800174e-07, "loss": 0.2282, "step": 25360 }, { "epoch": 0.8703157172271792, "grad_norm": 0.865237849457242, "learning_rate": 4.3483811521180084e-07, "loss": 0.31, "step": 25361 }, { "epoch": 0.8703500343170899, "grad_norm": 0.8203929319193788, "learning_rate": 4.3461146464083836e-07, "loss": 0.2843, "step": 25362 }, { "epoch": 0.8703843514070007, "grad_norm": 0.719158080544889, "learning_rate": 4.3438487046992714e-07, "loss": 0.2105, "step": 25363 }, { "epoch": 0.8704186684969114, "grad_norm": 0.8745806131512931, "learning_rate": 4.341583327018689e-07, "loss": 0.2615, "step": 25364 }, { "epoch": 0.8704529855868223, "grad_norm": 0.6745199424901278, "learning_rate": 4.339318513394619e-07, "loss": 0.2298, "step": 25365 }, { "epoch": 0.870487302676733, "grad_norm": 0.8782991543043042, "learning_rate": 4.3370542638550117e-07, "loss": 0.2602, "step": 25366 }, { "epoch": 0.8705216197666438, "grad_norm": 0.7955635052732397, "learning_rate": 4.334790578427883e-07, "loss": 0.2984, "step": 25367 }, { "epoch": 0.8705559368565545, "grad_norm": 0.9050147951357761, "learning_rate": 4.332527457141167e-07, "loss": 0.2556, "step": 25368 }, { "epoch": 0.8705902539464654, "grad_norm": 0.8228013444965341, "learning_rate": 4.330264900022835e-07, "loss": 0.3103, "step": 25369 }, { "epoch": 0.8706245710363761, "grad_norm": 0.9261818475307363, "learning_rate": 4.3280029071008313e-07, "loss": 0.2963, "step": 25370 }, { "epoch": 0.8706588881262869, "grad_norm": 0.8050073534277687, "learning_rate": 4.3257414784031007e-07, "loss": 0.2536, "step": 25371 }, { "epoch": 0.8706932052161976, "grad_norm": 0.9173067057082341, "learning_rate": 4.323480613957587e-07, "loss": 0.2547, "step": 25372 }, { "epoch": 0.8707275223061084, "grad_norm": 0.7283098599237084, "learning_rate": 4.3212203137922125e-07, "loss": 0.2409, "step": 25373 }, { "epoch": 0.8707618393960193, "grad_norm": 0.6560045733998672, "learning_rate": 4.3189605779349166e-07, "loss": 0.262, "step": 25374 }, { "epoch": 0.87079615648593, "grad_norm": 0.7768056829708178, "learning_rate": 4.316701406413587e-07, "loss": 0.237, "step": 25375 }, { "epoch": 0.8708304735758408, "grad_norm": 0.7800666765397171, "learning_rate": 4.3144427992561634e-07, "loss": 0.283, "step": 25376 }, { "epoch": 0.8708647906657515, "grad_norm": 0.8856165378168637, "learning_rate": 4.3121847564905286e-07, "loss": 0.3015, "step": 25377 }, { "epoch": 0.8708991077556624, "grad_norm": 0.7884757285672678, "learning_rate": 4.3099272781445825e-07, "loss": 0.2853, "step": 25378 }, { "epoch": 0.8709334248455731, "grad_norm": 0.725000016466693, "learning_rate": 4.3076703642462194e-07, "loss": 0.2945, "step": 25379 }, { "epoch": 0.8709677419354839, "grad_norm": 0.7119627310222719, "learning_rate": 4.305414014823317e-07, "loss": 0.2272, "step": 25380 }, { "epoch": 0.8710020590253946, "grad_norm": 1.8248234525053455, "learning_rate": 4.303158229903748e-07, "loss": 0.1996, "step": 25381 }, { "epoch": 0.8710363761153054, "grad_norm": 0.7657273963667244, "learning_rate": 4.300903009515378e-07, "loss": 0.2035, "step": 25382 }, { "epoch": 0.8710706932052162, "grad_norm": 0.8094308633827378, "learning_rate": 4.2986483536860854e-07, "loss": 0.2344, "step": 25383 }, { "epoch": 0.871105010295127, "grad_norm": 0.8253108267688388, "learning_rate": 4.296394262443687e-07, "loss": 0.2909, "step": 25384 }, { "epoch": 0.8711393273850377, "grad_norm": 0.8117616474365319, "learning_rate": 4.2941407358160716e-07, "loss": 0.2959, "step": 25385 }, { "epoch": 0.8711736444749485, "grad_norm": 0.7713871037008551, "learning_rate": 4.291887773831055e-07, "loss": 0.2834, "step": 25386 }, { "epoch": 0.8712079615648592, "grad_norm": 0.6734814224304791, "learning_rate": 4.28963537651646e-07, "loss": 0.2215, "step": 25387 }, { "epoch": 0.8712422786547701, "grad_norm": 0.7805566112948744, "learning_rate": 4.2873835439001476e-07, "loss": 0.2311, "step": 25388 }, { "epoch": 0.8712765957446809, "grad_norm": 0.7256642238307552, "learning_rate": 4.2851322760099065e-07, "loss": 0.2132, "step": 25389 }, { "epoch": 0.8713109128345916, "grad_norm": 0.7559848542894891, "learning_rate": 4.2828815728735594e-07, "loss": 0.2787, "step": 25390 }, { "epoch": 0.8713452299245024, "grad_norm": 0.8702598458452422, "learning_rate": 4.280631434518906e-07, "loss": 0.2596, "step": 25391 }, { "epoch": 0.8713795470144132, "grad_norm": 0.7334055593665242, "learning_rate": 4.2783818609737515e-07, "loss": 0.262, "step": 25392 }, { "epoch": 0.871413864104324, "grad_norm": 0.7869213274994924, "learning_rate": 4.2761328522658794e-07, "loss": 0.2518, "step": 25393 }, { "epoch": 0.8714481811942347, "grad_norm": 0.7428277861302136, "learning_rate": 4.273884408423079e-07, "loss": 0.2263, "step": 25394 }, { "epoch": 0.8714824982841455, "grad_norm": 0.8070424143549493, "learning_rate": 4.271636529473128e-07, "loss": 0.252, "step": 25395 }, { "epoch": 0.8715168153740562, "grad_norm": 0.7457474361783051, "learning_rate": 4.2693892154437864e-07, "loss": 0.2975, "step": 25396 }, { "epoch": 0.8715511324639671, "grad_norm": 0.672788983409644, "learning_rate": 4.2671424663628393e-07, "loss": 0.2694, "step": 25397 }, { "epoch": 0.8715854495538778, "grad_norm": 0.8196177307783362, "learning_rate": 4.2648962822580075e-07, "loss": 0.2034, "step": 25398 }, { "epoch": 0.8716197666437886, "grad_norm": 0.7753386340872352, "learning_rate": 4.262650663157075e-07, "loss": 0.2084, "step": 25399 }, { "epoch": 0.8716540837336993, "grad_norm": 0.7757448687718463, "learning_rate": 4.2604056090877645e-07, "loss": 0.2955, "step": 25400 }, { "epoch": 0.8716884008236102, "grad_norm": 0.8154310591302594, "learning_rate": 4.258161120077814e-07, "loss": 0.2517, "step": 25401 }, { "epoch": 0.871722717913521, "grad_norm": 0.731307744370853, "learning_rate": 4.2559171961549573e-07, "loss": 0.2564, "step": 25402 }, { "epoch": 0.8717570350034317, "grad_norm": 0.8498058744163377, "learning_rate": 4.2536738373469053e-07, "loss": 0.2833, "step": 25403 }, { "epoch": 0.8717913520933425, "grad_norm": 0.6844104544965341, "learning_rate": 4.251431043681381e-07, "loss": 0.2161, "step": 25404 }, { "epoch": 0.8718256691832532, "grad_norm": 0.7826178780575294, "learning_rate": 4.249188815186084e-07, "loss": 0.2543, "step": 25405 }, { "epoch": 0.8718599862731641, "grad_norm": 0.7996027722784269, "learning_rate": 4.246947151888736e-07, "loss": 0.2619, "step": 25406 }, { "epoch": 0.8718943033630748, "grad_norm": 0.8313103170935866, "learning_rate": 4.2447060538169884e-07, "loss": 0.2393, "step": 25407 }, { "epoch": 0.8719286204529856, "grad_norm": 0.769523614778213, "learning_rate": 4.2424655209985734e-07, "loss": 0.2916, "step": 25408 }, { "epoch": 0.8719629375428963, "grad_norm": 0.7523493079314958, "learning_rate": 4.240225553461136e-07, "loss": 0.2175, "step": 25409 }, { "epoch": 0.8719972546328071, "grad_norm": 0.7984920856588795, "learning_rate": 4.237986151232354e-07, "loss": 0.2515, "step": 25410 }, { "epoch": 0.872031571722718, "grad_norm": 0.7744927078284544, "learning_rate": 4.235747314339922e-07, "loss": 0.2654, "step": 25411 }, { "epoch": 0.8720658888126287, "grad_norm": 0.7766619588097885, "learning_rate": 4.233509042811462e-07, "loss": 0.2459, "step": 25412 }, { "epoch": 0.8721002059025394, "grad_norm": 0.7720138986034264, "learning_rate": 4.2312713366746417e-07, "loss": 0.2599, "step": 25413 }, { "epoch": 0.8721345229924502, "grad_norm": 0.7607787744827311, "learning_rate": 4.2290341959570993e-07, "loss": 0.2474, "step": 25414 }, { "epoch": 0.8721688400823611, "grad_norm": 0.9208582163557254, "learning_rate": 4.226797620686479e-07, "loss": 0.3022, "step": 25415 }, { "epoch": 0.8722031571722718, "grad_norm": 0.7825807723747679, "learning_rate": 4.224561610890404e-07, "loss": 0.3002, "step": 25416 }, { "epoch": 0.8722374742621826, "grad_norm": 1.003329092833097, "learning_rate": 4.222326166596508e-07, "loss": 0.2402, "step": 25417 }, { "epoch": 0.8722717913520933, "grad_norm": 0.8178714962972201, "learning_rate": 4.2200912878324064e-07, "loss": 0.2241, "step": 25418 }, { "epoch": 0.8723061084420041, "grad_norm": 0.8000837072855053, "learning_rate": 4.217856974625684e-07, "loss": 0.2487, "step": 25419 }, { "epoch": 0.8723404255319149, "grad_norm": 0.8201835509752806, "learning_rate": 4.2156232270039786e-07, "loss": 0.2648, "step": 25420 }, { "epoch": 0.8723747426218257, "grad_norm": 0.8225134010835454, "learning_rate": 4.213390044994858e-07, "loss": 0.2371, "step": 25421 }, { "epoch": 0.8724090597117364, "grad_norm": 0.7822476798561205, "learning_rate": 4.2111574286259285e-07, "loss": 0.2597, "step": 25422 }, { "epoch": 0.8724433768016472, "grad_norm": 0.7147391811761897, "learning_rate": 4.208925377924761e-07, "loss": 0.2323, "step": 25423 }, { "epoch": 0.872477693891558, "grad_norm": 0.825274013552445, "learning_rate": 4.206693892918928e-07, "loss": 0.2805, "step": 25424 }, { "epoch": 0.8725120109814688, "grad_norm": 0.860644416708449, "learning_rate": 4.2044629736360085e-07, "loss": 0.2718, "step": 25425 }, { "epoch": 0.8725463280713796, "grad_norm": 0.7556211908189551, "learning_rate": 4.202232620103558e-07, "loss": 0.2314, "step": 25426 }, { "epoch": 0.8725806451612903, "grad_norm": 0.9024544601668, "learning_rate": 4.2000028323491206e-07, "loss": 0.2642, "step": 25427 }, { "epoch": 0.872614962251201, "grad_norm": 0.7525828957665959, "learning_rate": 4.197773610400258e-07, "loss": 0.2361, "step": 25428 }, { "epoch": 0.8726492793411119, "grad_norm": 0.7536813767305177, "learning_rate": 4.1955449542845083e-07, "loss": 0.2791, "step": 25429 }, { "epoch": 0.8726835964310227, "grad_norm": 0.8649513319688198, "learning_rate": 4.1933168640293787e-07, "loss": 0.3004, "step": 25430 }, { "epoch": 0.8727179135209334, "grad_norm": 0.7371207353325777, "learning_rate": 4.191089339662424e-07, "loss": 0.2296, "step": 25431 }, { "epoch": 0.8727522306108442, "grad_norm": 0.7687600251961336, "learning_rate": 4.1888623812111616e-07, "loss": 0.2495, "step": 25432 }, { "epoch": 0.8727865477007549, "grad_norm": 0.8731443537600885, "learning_rate": 4.1866359887030803e-07, "loss": 0.2538, "step": 25433 }, { "epoch": 0.8728208647906658, "grad_norm": 0.7874346450519755, "learning_rate": 4.184410162165708e-07, "loss": 0.2463, "step": 25434 }, { "epoch": 0.8728551818805765, "grad_norm": 0.8083399001292929, "learning_rate": 4.182184901626529e-07, "loss": 0.252, "step": 25435 }, { "epoch": 0.8728894989704873, "grad_norm": 0.8341360829492511, "learning_rate": 4.179960207113037e-07, "loss": 0.2409, "step": 25436 }, { "epoch": 0.872923816060398, "grad_norm": 0.7326612557720605, "learning_rate": 4.177736078652711e-07, "loss": 0.2382, "step": 25437 }, { "epoch": 0.8729581331503089, "grad_norm": 0.7775439434171771, "learning_rate": 4.1755125162730335e-07, "loss": 0.2912, "step": 25438 }, { "epoch": 0.8729924502402197, "grad_norm": 0.7650546580262098, "learning_rate": 4.1732895200014776e-07, "loss": 0.2262, "step": 25439 }, { "epoch": 0.8730267673301304, "grad_norm": 0.7182476067572742, "learning_rate": 4.1710670898654994e-07, "loss": 0.2401, "step": 25440 }, { "epoch": 0.8730610844200412, "grad_norm": 0.8065342818378658, "learning_rate": 4.1688452258925594e-07, "loss": 0.2683, "step": 25441 }, { "epoch": 0.8730954015099519, "grad_norm": 0.6987581738240642, "learning_rate": 4.166623928110092e-07, "loss": 0.2394, "step": 25442 }, { "epoch": 0.8731297185998628, "grad_norm": 0.8463956838733042, "learning_rate": 4.1644031965455634e-07, "loss": 0.3167, "step": 25443 }, { "epoch": 0.8731640356897735, "grad_norm": 0.7696109531803044, "learning_rate": 4.1621830312263855e-07, "loss": 0.2309, "step": 25444 }, { "epoch": 0.8731983527796843, "grad_norm": 0.8573961080753585, "learning_rate": 4.159963432179992e-07, "loss": 0.2115, "step": 25445 }, { "epoch": 0.873232669869595, "grad_norm": 0.8716802113958471, "learning_rate": 4.1577443994338107e-07, "loss": 0.2874, "step": 25446 }, { "epoch": 0.8732669869595059, "grad_norm": 0.7068914455330172, "learning_rate": 4.1555259330152475e-07, "loss": 0.2453, "step": 25447 }, { "epoch": 0.8733013040494166, "grad_norm": 0.792517728577448, "learning_rate": 4.153308032951714e-07, "loss": 0.2336, "step": 25448 }, { "epoch": 0.8733356211393274, "grad_norm": 0.772654528941105, "learning_rate": 4.1510906992706044e-07, "loss": 0.2433, "step": 25449 }, { "epoch": 0.8733699382292381, "grad_norm": 1.1992148147918043, "learning_rate": 4.1488739319993307e-07, "loss": 0.2971, "step": 25450 }, { "epoch": 0.8734042553191489, "grad_norm": 0.765728867270576, "learning_rate": 4.146657731165238e-07, "loss": 0.2083, "step": 25451 }, { "epoch": 0.8734385724090598, "grad_norm": 0.8765690181128689, "learning_rate": 4.1444420967957476e-07, "loss": 0.3335, "step": 25452 }, { "epoch": 0.8734728894989705, "grad_norm": 0.9205922906280867, "learning_rate": 4.142227028918205e-07, "loss": 0.255, "step": 25453 }, { "epoch": 0.8735072065888813, "grad_norm": 0.8041420716588746, "learning_rate": 4.1400125275599725e-07, "loss": 0.2316, "step": 25454 }, { "epoch": 0.873541523678792, "grad_norm": 0.7881358967374537, "learning_rate": 4.137798592748438e-07, "loss": 0.1997, "step": 25455 }, { "epoch": 0.8735758407687028, "grad_norm": 0.6869915599045453, "learning_rate": 4.135585224510913e-07, "loss": 0.3106, "step": 25456 }, { "epoch": 0.8736101578586136, "grad_norm": 0.8052288142085221, "learning_rate": 4.133372422874776e-07, "loss": 0.2577, "step": 25457 }, { "epoch": 0.8736444749485244, "grad_norm": 0.7581278776390203, "learning_rate": 4.1311601878673393e-07, "loss": 0.2638, "step": 25458 }, { "epoch": 0.8736787920384351, "grad_norm": 0.8058344955188527, "learning_rate": 4.128948519515941e-07, "loss": 0.3106, "step": 25459 }, { "epoch": 0.8737131091283459, "grad_norm": 0.8058710511202439, "learning_rate": 4.1267374178479034e-07, "loss": 0.2533, "step": 25460 }, { "epoch": 0.8737474262182567, "grad_norm": 0.696792202975689, "learning_rate": 4.1245268828905394e-07, "loss": 0.2328, "step": 25461 }, { "epoch": 0.8737817433081675, "grad_norm": 0.9116315149435829, "learning_rate": 4.122316914671165e-07, "loss": 0.2764, "step": 25462 }, { "epoch": 0.8738160603980782, "grad_norm": 0.8350019331456494, "learning_rate": 4.1201075132170753e-07, "loss": 0.2335, "step": 25463 }, { "epoch": 0.873850377487989, "grad_norm": 0.9024928871998243, "learning_rate": 4.1178986785555707e-07, "loss": 0.2614, "step": 25464 }, { "epoch": 0.8738846945778997, "grad_norm": 0.7372223967744566, "learning_rate": 4.1156904107139237e-07, "loss": 0.2758, "step": 25465 }, { "epoch": 0.8739190116678106, "grad_norm": 0.9588786189353012, "learning_rate": 4.1134827097194406e-07, "loss": 0.2919, "step": 25466 }, { "epoch": 0.8739533287577214, "grad_norm": 0.7380433464018622, "learning_rate": 4.1112755755993715e-07, "loss": 0.2936, "step": 25467 }, { "epoch": 0.8739876458476321, "grad_norm": 0.7222302220179753, "learning_rate": 4.109069008380989e-07, "loss": 0.2358, "step": 25468 }, { "epoch": 0.8740219629375429, "grad_norm": 0.8466436853127499, "learning_rate": 4.106863008091561e-07, "loss": 0.2509, "step": 25469 }, { "epoch": 0.8740562800274537, "grad_norm": 0.9589912416085328, "learning_rate": 4.1046575747583305e-07, "loss": 0.2676, "step": 25470 }, { "epoch": 0.8740905971173645, "grad_norm": 0.8015437351421569, "learning_rate": 4.1024527084085443e-07, "loss": 0.2818, "step": 25471 }, { "epoch": 0.8741249142072752, "grad_norm": 0.8103398844468797, "learning_rate": 4.100248409069446e-07, "loss": 0.256, "step": 25472 }, { "epoch": 0.874159231297186, "grad_norm": 0.7521619150288014, "learning_rate": 4.098044676768276e-07, "loss": 0.221, "step": 25473 }, { "epoch": 0.8741935483870967, "grad_norm": 0.7949172606875698, "learning_rate": 4.095841511532228e-07, "loss": 0.2677, "step": 25474 }, { "epoch": 0.8742278654770076, "grad_norm": 0.8262863828238782, "learning_rate": 4.093638913388548e-07, "loss": 0.2666, "step": 25475 }, { "epoch": 0.8742621825669183, "grad_norm": 0.7374678717323331, "learning_rate": 4.091436882364441e-07, "loss": 0.226, "step": 25476 }, { "epoch": 0.8742964996568291, "grad_norm": 0.7730733363847364, "learning_rate": 4.0892354184870977e-07, "loss": 0.2267, "step": 25477 }, { "epoch": 0.8743308167467398, "grad_norm": 0.8495664121012367, "learning_rate": 4.0870345217837394e-07, "loss": 0.2819, "step": 25478 }, { "epoch": 0.8743651338366506, "grad_norm": 0.6745056465518038, "learning_rate": 4.084834192281528e-07, "loss": 0.2069, "step": 25479 }, { "epoch": 0.8743994509265615, "grad_norm": 0.7988055839770521, "learning_rate": 4.082634430007665e-07, "loss": 0.2583, "step": 25480 }, { "epoch": 0.8744337680164722, "grad_norm": 0.7696714954973787, "learning_rate": 4.0804352349893106e-07, "loss": 0.2022, "step": 25481 }, { "epoch": 0.874468085106383, "grad_norm": 0.8485088743105605, "learning_rate": 4.078236607253649e-07, "loss": 0.2604, "step": 25482 }, { "epoch": 0.8745024021962937, "grad_norm": 0.6777424409808279, "learning_rate": 4.076038546827832e-07, "loss": 0.2303, "step": 25483 }, { "epoch": 0.8745367192862046, "grad_norm": 0.7811264590027126, "learning_rate": 4.0738410537390136e-07, "loss": 0.285, "step": 25484 }, { "epoch": 0.8745710363761153, "grad_norm": 0.8433758829714345, "learning_rate": 4.071644128014357e-07, "loss": 0.2891, "step": 25485 }, { "epoch": 0.8746053534660261, "grad_norm": 0.7728964473154065, "learning_rate": 4.0694477696809733e-07, "loss": 0.2361, "step": 25486 }, { "epoch": 0.8746396705559368, "grad_norm": 0.7415783009189724, "learning_rate": 4.067251978766029e-07, "loss": 0.281, "step": 25487 }, { "epoch": 0.8746739876458476, "grad_norm": 0.8217476620995745, "learning_rate": 4.065056755296615e-07, "loss": 0.2733, "step": 25488 }, { "epoch": 0.8747083047357584, "grad_norm": 0.810265339867712, "learning_rate": 4.062862099299886e-07, "loss": 0.2071, "step": 25489 }, { "epoch": 0.8747426218256692, "grad_norm": 0.7489890004721285, "learning_rate": 4.060668010802932e-07, "loss": 0.2492, "step": 25490 }, { "epoch": 0.87477693891558, "grad_norm": 0.786737654355057, "learning_rate": 4.058474489832864e-07, "loss": 0.2397, "step": 25491 }, { "epoch": 0.8748112560054907, "grad_norm": 0.9924194573362289, "learning_rate": 4.056281536416784e-07, "loss": 0.2504, "step": 25492 }, { "epoch": 0.8748455730954016, "grad_norm": 0.7092367751897242, "learning_rate": 4.0540891505817746e-07, "loss": 0.2251, "step": 25493 }, { "epoch": 0.8748798901853123, "grad_norm": 0.7291879706202329, "learning_rate": 4.0518973323549313e-07, "loss": 0.2552, "step": 25494 }, { "epoch": 0.8749142072752231, "grad_norm": 0.7007573803350152, "learning_rate": 4.049706081763322e-07, "loss": 0.3173, "step": 25495 }, { "epoch": 0.8749485243651338, "grad_norm": 0.7019776965113785, "learning_rate": 4.047515398834034e-07, "loss": 0.2464, "step": 25496 }, { "epoch": 0.8749828414550446, "grad_norm": 0.7567847096869571, "learning_rate": 4.0453252835940984e-07, "loss": 0.2658, "step": 25497 }, { "epoch": 0.8750171585449554, "grad_norm": 0.7974287489649472, "learning_rate": 4.043135736070597e-07, "loss": 0.258, "step": 25498 }, { "epoch": 0.8750514756348662, "grad_norm": 1.2458732751466663, "learning_rate": 4.040946756290587e-07, "loss": 0.2189, "step": 25499 }, { "epoch": 0.8750857927247769, "grad_norm": 0.7214011071127654, "learning_rate": 4.038758344281074e-07, "loss": 0.2379, "step": 25500 }, { "epoch": 0.8751201098146877, "grad_norm": 0.7546372601865462, "learning_rate": 4.036570500069137e-07, "loss": 0.2694, "step": 25501 }, { "epoch": 0.8751544269045984, "grad_norm": 0.8215003960726271, "learning_rate": 4.034383223681776e-07, "loss": 0.2792, "step": 25502 }, { "epoch": 0.8751887439945093, "grad_norm": 0.8566119928086494, "learning_rate": 4.032196515146014e-07, "loss": 0.2492, "step": 25503 }, { "epoch": 0.87522306108442, "grad_norm": 0.7864867098925198, "learning_rate": 4.030010374488874e-07, "loss": 0.2721, "step": 25504 }, { "epoch": 0.8752573781743308, "grad_norm": 0.8029239209048786, "learning_rate": 4.027824801737362e-07, "loss": 0.2916, "step": 25505 }, { "epoch": 0.8752916952642416, "grad_norm": 0.8182130165668525, "learning_rate": 4.0256397969184736e-07, "loss": 0.2582, "step": 25506 }, { "epoch": 0.8753260123541524, "grad_norm": 0.8729474279287426, "learning_rate": 4.023455360059203e-07, "loss": 0.3228, "step": 25507 }, { "epoch": 0.8753603294440632, "grad_norm": 1.1840291853825322, "learning_rate": 4.021271491186551e-07, "loss": 0.245, "step": 25508 }, { "epoch": 0.8753946465339739, "grad_norm": 0.8019387073003547, "learning_rate": 4.019088190327464e-07, "loss": 0.2738, "step": 25509 }, { "epoch": 0.8754289636238847, "grad_norm": 0.8574095403872917, "learning_rate": 4.0169054575089514e-07, "loss": 0.2948, "step": 25510 }, { "epoch": 0.8754632807137954, "grad_norm": 0.8483240387359026, "learning_rate": 4.014723292757955e-07, "loss": 0.3108, "step": 25511 }, { "epoch": 0.8754975978037063, "grad_norm": 0.715504923522425, "learning_rate": 4.012541696101435e-07, "loss": 0.2551, "step": 25512 }, { "epoch": 0.875531914893617, "grad_norm": 0.7401417731556672, "learning_rate": 4.0103606675663486e-07, "loss": 0.2472, "step": 25513 }, { "epoch": 0.8755662319835278, "grad_norm": 0.750922172377552, "learning_rate": 4.0081802071796347e-07, "loss": 0.2826, "step": 25514 }, { "epoch": 0.8756005490734385, "grad_norm": 0.7173445849708904, "learning_rate": 4.0060003149682334e-07, "loss": 0.2797, "step": 25515 }, { "epoch": 0.8756348661633494, "grad_norm": 0.8977332777146269, "learning_rate": 4.003820990959079e-07, "loss": 0.3332, "step": 25516 }, { "epoch": 0.8756691832532602, "grad_norm": 0.7549092259864576, "learning_rate": 4.0016422351790927e-07, "loss": 0.2686, "step": 25517 }, { "epoch": 0.8757035003431709, "grad_norm": 0.7432469933623022, "learning_rate": 3.9994640476551715e-07, "loss": 0.2837, "step": 25518 }, { "epoch": 0.8757378174330817, "grad_norm": 0.6704545379060214, "learning_rate": 3.997286428414249e-07, "loss": 0.2941, "step": 25519 }, { "epoch": 0.8757721345229924, "grad_norm": 0.7590658778111318, "learning_rate": 3.9951093774832197e-07, "loss": 0.2305, "step": 25520 }, { "epoch": 0.8758064516129033, "grad_norm": 0.8509918614732267, "learning_rate": 3.992932894888979e-07, "loss": 0.2892, "step": 25521 }, { "epoch": 0.875840768702814, "grad_norm": 0.7900870695044878, "learning_rate": 3.990756980658422e-07, "loss": 0.2795, "step": 25522 }, { "epoch": 0.8758750857927248, "grad_norm": 0.935588834960342, "learning_rate": 3.9885816348184006e-07, "loss": 0.297, "step": 25523 }, { "epoch": 0.8759094028826355, "grad_norm": 0.6963619591981209, "learning_rate": 3.9864068573958303e-07, "loss": 0.2604, "step": 25524 }, { "epoch": 0.8759437199725463, "grad_norm": 0.8121075135492772, "learning_rate": 3.9842326484175463e-07, "loss": 0.2751, "step": 25525 }, { "epoch": 0.8759780370624571, "grad_norm": 0.8195107399849395, "learning_rate": 3.982059007910416e-07, "loss": 0.2168, "step": 25526 }, { "epoch": 0.8760123541523679, "grad_norm": 0.7746671245055284, "learning_rate": 3.979885935901301e-07, "loss": 0.2431, "step": 25527 }, { "epoch": 0.8760466712422786, "grad_norm": 0.7606944630406182, "learning_rate": 3.9777134324170355e-07, "loss": 0.2928, "step": 25528 }, { "epoch": 0.8760809883321894, "grad_norm": 0.8059167217031522, "learning_rate": 3.9755414974844644e-07, "loss": 0.2959, "step": 25529 }, { "epoch": 0.8761153054221003, "grad_norm": 0.8146366745776692, "learning_rate": 3.9733701311304216e-07, "loss": 0.2373, "step": 25530 }, { "epoch": 0.876149622512011, "grad_norm": 0.8453236340018786, "learning_rate": 3.971199333381737e-07, "loss": 0.2659, "step": 25531 }, { "epoch": 0.8761839396019218, "grad_norm": 0.8190772896378314, "learning_rate": 3.969029104265204e-07, "loss": 0.2221, "step": 25532 }, { "epoch": 0.8762182566918325, "grad_norm": 0.7010849953012317, "learning_rate": 3.9668594438076634e-07, "loss": 0.2768, "step": 25533 }, { "epoch": 0.8762525737817433, "grad_norm": 0.8099696181620492, "learning_rate": 3.964690352035899e-07, "loss": 0.2318, "step": 25534 }, { "epoch": 0.8762868908716541, "grad_norm": 0.7980882822139749, "learning_rate": 3.962521828976712e-07, "loss": 0.2336, "step": 25535 }, { "epoch": 0.8763212079615649, "grad_norm": 0.7299315104310021, "learning_rate": 3.9603538746568917e-07, "loss": 0.2362, "step": 25536 }, { "epoch": 0.8763555250514756, "grad_norm": 0.8205034716776324, "learning_rate": 3.9581864891032216e-07, "loss": 0.2658, "step": 25537 }, { "epoch": 0.8763898421413864, "grad_norm": 0.8699346126894862, "learning_rate": 3.9560196723424814e-07, "loss": 0.2968, "step": 25538 }, { "epoch": 0.8764241592312972, "grad_norm": 0.7308978655012155, "learning_rate": 3.953853424401438e-07, "loss": 0.253, "step": 25539 }, { "epoch": 0.876458476321208, "grad_norm": 0.8502953642309871, "learning_rate": 3.9516877453068537e-07, "loss": 0.3158, "step": 25540 }, { "epoch": 0.8764927934111187, "grad_norm": 0.7940255039062354, "learning_rate": 3.9495226350854675e-07, "loss": 0.294, "step": 25541 }, { "epoch": 0.8765271105010295, "grad_norm": 0.723415800455035, "learning_rate": 3.9473580937640476e-07, "loss": 0.2743, "step": 25542 }, { "epoch": 0.8765614275909402, "grad_norm": 0.7662600363286934, "learning_rate": 3.9451941213693336e-07, "loss": 0.3325, "step": 25543 }, { "epoch": 0.8765957446808511, "grad_norm": 0.6794612135033892, "learning_rate": 3.9430307179280315e-07, "loss": 0.2439, "step": 25544 }, { "epoch": 0.8766300617707619, "grad_norm": 1.0019166632238226, "learning_rate": 3.940867883466909e-07, "loss": 0.2282, "step": 25545 }, { "epoch": 0.8766643788606726, "grad_norm": 0.7551190054566851, "learning_rate": 3.938705618012656e-07, "loss": 0.257, "step": 25546 }, { "epoch": 0.8766986959505834, "grad_norm": 0.7321071120132336, "learning_rate": 3.93654392159199e-07, "loss": 0.2411, "step": 25547 }, { "epoch": 0.8767330130404941, "grad_norm": 0.6726136291217947, "learning_rate": 3.934382794231617e-07, "loss": 0.2433, "step": 25548 }, { "epoch": 0.876767330130405, "grad_norm": 0.6883528866824319, "learning_rate": 3.932222235958244e-07, "loss": 0.2141, "step": 25549 }, { "epoch": 0.8768016472203157, "grad_norm": 0.8034832342581122, "learning_rate": 3.9300622467985497e-07, "loss": 0.251, "step": 25550 }, { "epoch": 0.8768359643102265, "grad_norm": 0.7936419993868434, "learning_rate": 3.9279028267792285e-07, "loss": 0.257, "step": 25551 }, { "epoch": 0.8768702814001372, "grad_norm": 0.7821827232395914, "learning_rate": 3.925743975926949e-07, "loss": 0.2649, "step": 25552 }, { "epoch": 0.8769045984900481, "grad_norm": 0.822708459172711, "learning_rate": 3.923585694268384e-07, "loss": 0.3055, "step": 25553 }, { "epoch": 0.8769389155799588, "grad_norm": 0.7653412200405657, "learning_rate": 3.9214279818302116e-07, "loss": 0.2823, "step": 25554 }, { "epoch": 0.8769732326698696, "grad_norm": 0.7082395980318249, "learning_rate": 3.919270838639055e-07, "loss": 0.2671, "step": 25555 }, { "epoch": 0.8770075497597803, "grad_norm": 0.7649520008637795, "learning_rate": 3.9171142647216e-07, "loss": 0.3419, "step": 25556 }, { "epoch": 0.8770418668496911, "grad_norm": 0.7517176543947267, "learning_rate": 3.914958260104462e-07, "loss": 0.2704, "step": 25557 }, { "epoch": 0.877076183939602, "grad_norm": 0.8667886398407392, "learning_rate": 3.9128028248142823e-07, "loss": 0.2632, "step": 25558 }, { "epoch": 0.8771105010295127, "grad_norm": 0.917278365250685, "learning_rate": 3.9106479588776945e-07, "loss": 0.3078, "step": 25559 }, { "epoch": 0.8771448181194235, "grad_norm": 0.7864966785930525, "learning_rate": 3.908493662321311e-07, "loss": 0.2713, "step": 25560 }, { "epoch": 0.8771791352093342, "grad_norm": 0.7366202423780318, "learning_rate": 3.90633993517176e-07, "loss": 0.2344, "step": 25561 }, { "epoch": 0.8772134522992451, "grad_norm": 0.7702924566798977, "learning_rate": 3.904186777455632e-07, "loss": 0.2456, "step": 25562 }, { "epoch": 0.8772477693891558, "grad_norm": 0.8361792992048356, "learning_rate": 3.902034189199544e-07, "loss": 0.3331, "step": 25563 }, { "epoch": 0.8772820864790666, "grad_norm": 0.7807009089688137, "learning_rate": 3.899882170430058e-07, "loss": 0.2846, "step": 25564 }, { "epoch": 0.8773164035689773, "grad_norm": 0.7766476010709212, "learning_rate": 3.897730721173798e-07, "loss": 0.2706, "step": 25565 }, { "epoch": 0.8773507206588881, "grad_norm": 0.8627483036090347, "learning_rate": 3.8955798414573254e-07, "loss": 0.2403, "step": 25566 }, { "epoch": 0.877385037748799, "grad_norm": 0.8761460224685969, "learning_rate": 3.8934295313071967e-07, "loss": 0.2986, "step": 25567 }, { "epoch": 0.8774193548387097, "grad_norm": 0.8326018234890958, "learning_rate": 3.8912797907500076e-07, "loss": 0.2367, "step": 25568 }, { "epoch": 0.8774536719286204, "grad_norm": 0.6906435845551558, "learning_rate": 3.8891306198122927e-07, "loss": 0.2626, "step": 25569 }, { "epoch": 0.8774879890185312, "grad_norm": 0.7617336550770352, "learning_rate": 3.8869820185206074e-07, "loss": 0.2645, "step": 25570 }, { "epoch": 0.877522306108442, "grad_norm": 0.7929655657464892, "learning_rate": 3.884833986901493e-07, "loss": 0.2781, "step": 25571 }, { "epoch": 0.8775566231983528, "grad_norm": 0.7514725001296984, "learning_rate": 3.882686524981494e-07, "loss": 0.2709, "step": 25572 }, { "epoch": 0.8775909402882636, "grad_norm": 0.8931689869407659, "learning_rate": 3.8805396327871393e-07, "loss": 0.2852, "step": 25573 }, { "epoch": 0.8776252573781743, "grad_norm": 0.779868774226496, "learning_rate": 3.878393310344941e-07, "loss": 0.2835, "step": 25574 }, { "epoch": 0.8776595744680851, "grad_norm": 0.7616494005230295, "learning_rate": 3.8762475576814286e-07, "loss": 0.201, "step": 25575 }, { "epoch": 0.8776938915579959, "grad_norm": 0.8351687913225129, "learning_rate": 3.874102374823091e-07, "loss": 0.2542, "step": 25576 }, { "epoch": 0.8777282086479067, "grad_norm": 0.7853304452030069, "learning_rate": 3.871957761796452e-07, "loss": 0.3098, "step": 25577 }, { "epoch": 0.8777625257378174, "grad_norm": 0.7341862154890435, "learning_rate": 3.869813718627991e-07, "loss": 0.2648, "step": 25578 }, { "epoch": 0.8777968428277282, "grad_norm": 0.781075033839343, "learning_rate": 3.8676702453441963e-07, "loss": 0.2386, "step": 25579 }, { "epoch": 0.8778311599176389, "grad_norm": 0.7666968039407174, "learning_rate": 3.8655273419715533e-07, "loss": 0.234, "step": 25580 }, { "epoch": 0.8778654770075498, "grad_norm": 0.7278600113397607, "learning_rate": 3.863385008536519e-07, "loss": 0.2167, "step": 25581 }, { "epoch": 0.8778997940974606, "grad_norm": 0.8061918144638562, "learning_rate": 3.8612432450655935e-07, "loss": 0.2851, "step": 25582 }, { "epoch": 0.8779341111873713, "grad_norm": 0.7369213540762591, "learning_rate": 3.859102051585206e-07, "loss": 0.2241, "step": 25583 }, { "epoch": 0.877968428277282, "grad_norm": 0.8425242881278209, "learning_rate": 3.8569614281218194e-07, "loss": 0.2824, "step": 25584 }, { "epoch": 0.8780027453671929, "grad_norm": 0.7985758204767389, "learning_rate": 3.8548213747018783e-07, "loss": 0.2528, "step": 25585 }, { "epoch": 0.8780370624571037, "grad_norm": 0.7275606458974693, "learning_rate": 3.8526818913518117e-07, "loss": 0.2449, "step": 25586 }, { "epoch": 0.8780713795470144, "grad_norm": 0.7126683087515062, "learning_rate": 3.8505429780980653e-07, "loss": 0.246, "step": 25587 }, { "epoch": 0.8781056966369252, "grad_norm": 0.7853385811471115, "learning_rate": 3.8484046349670513e-07, "loss": 0.2574, "step": 25588 }, { "epoch": 0.8781400137268359, "grad_norm": 0.7310268479927805, "learning_rate": 3.846266861985198e-07, "loss": 0.2347, "step": 25589 }, { "epoch": 0.8781743308167468, "grad_norm": 0.7392649098546721, "learning_rate": 3.844129659178891e-07, "loss": 0.2336, "step": 25590 }, { "epoch": 0.8782086479066575, "grad_norm": 0.7535890896451948, "learning_rate": 3.84199302657457e-07, "loss": 0.2019, "step": 25591 }, { "epoch": 0.8782429649965683, "grad_norm": 0.780349387887089, "learning_rate": 3.8398569641986014e-07, "loss": 0.3113, "step": 25592 }, { "epoch": 0.878277282086479, "grad_norm": 0.7072800280104997, "learning_rate": 3.837721472077377e-07, "loss": 0.215, "step": 25593 }, { "epoch": 0.8783115991763898, "grad_norm": 0.7160558901340938, "learning_rate": 3.835586550237291e-07, "loss": 0.2659, "step": 25594 }, { "epoch": 0.8783459162663007, "grad_norm": 0.7255649560820859, "learning_rate": 3.833452198704707e-07, "loss": 0.2376, "step": 25595 }, { "epoch": 0.8783802333562114, "grad_norm": 0.7787905623860157, "learning_rate": 3.831318417505997e-07, "loss": 0.2179, "step": 25596 }, { "epoch": 0.8784145504461222, "grad_norm": 0.8224128113002508, "learning_rate": 3.8291852066675185e-07, "loss": 0.3389, "step": 25597 }, { "epoch": 0.8784488675360329, "grad_norm": 0.7994337891377968, "learning_rate": 3.827052566215633e-07, "loss": 0.2192, "step": 25598 }, { "epoch": 0.8784831846259438, "grad_norm": 0.8336285362791247, "learning_rate": 3.8249204961766707e-07, "loss": 0.2502, "step": 25599 }, { "epoch": 0.8785175017158545, "grad_norm": 0.6561056671176064, "learning_rate": 3.8227889965769925e-07, "loss": 0.2131, "step": 25600 }, { "epoch": 0.8785518188057653, "grad_norm": 0.8097263230542351, "learning_rate": 3.8206580674429115e-07, "loss": 0.2449, "step": 25601 }, { "epoch": 0.878586135895676, "grad_norm": 0.8636272274588925, "learning_rate": 3.818527708800762e-07, "loss": 0.2614, "step": 25602 }, { "epoch": 0.8786204529855868, "grad_norm": 0.966126546856696, "learning_rate": 3.8163979206768567e-07, "loss": 0.2926, "step": 25603 }, { "epoch": 0.8786547700754976, "grad_norm": 0.8471743969617167, "learning_rate": 3.8142687030975124e-07, "loss": 0.3035, "step": 25604 }, { "epoch": 0.8786890871654084, "grad_norm": 0.8304235492115095, "learning_rate": 3.812140056089031e-07, "loss": 0.2742, "step": 25605 }, { "epoch": 0.8787234042553191, "grad_norm": 0.69220782251611, "learning_rate": 3.8100119796777134e-07, "loss": 0.2256, "step": 25606 }, { "epoch": 0.8787577213452299, "grad_norm": 0.6672366125940216, "learning_rate": 3.80788447388985e-07, "loss": 0.2397, "step": 25607 }, { "epoch": 0.8787920384351408, "grad_norm": 0.8122542391019123, "learning_rate": 3.805757538751703e-07, "loss": 0.2282, "step": 25608 }, { "epoch": 0.8788263555250515, "grad_norm": 0.7136511045412345, "learning_rate": 3.8036311742895684e-07, "loss": 0.2741, "step": 25609 }, { "epoch": 0.8788606726149623, "grad_norm": 0.912592422084229, "learning_rate": 3.801505380529724e-07, "loss": 0.2219, "step": 25610 }, { "epoch": 0.878894989704873, "grad_norm": 0.867017254178249, "learning_rate": 3.7993801574984e-07, "loss": 0.295, "step": 25611 }, { "epoch": 0.8789293067947838, "grad_norm": 0.891239489618976, "learning_rate": 3.7972555052218864e-07, "loss": 0.3172, "step": 25612 }, { "epoch": 0.8789636238846946, "grad_norm": 0.9471209909216284, "learning_rate": 3.7951314237264e-07, "loss": 0.2953, "step": 25613 }, { "epoch": 0.8789979409746054, "grad_norm": 0.7509358148704709, "learning_rate": 3.79300791303821e-07, "loss": 0.2522, "step": 25614 }, { "epoch": 0.8790322580645161, "grad_norm": 0.9344275679861023, "learning_rate": 3.790884973183523e-07, "loss": 0.2841, "step": 25615 }, { "epoch": 0.8790665751544269, "grad_norm": 0.769550954510644, "learning_rate": 3.788762604188584e-07, "loss": 0.2403, "step": 25616 }, { "epoch": 0.8791008922443376, "grad_norm": 0.8156891041743929, "learning_rate": 3.7866408060796e-07, "loss": 0.262, "step": 25617 }, { "epoch": 0.8791352093342485, "grad_norm": 0.7859687623630618, "learning_rate": 3.784519578882795e-07, "loss": 0.2391, "step": 25618 }, { "epoch": 0.8791695264241592, "grad_norm": 0.7712314041259578, "learning_rate": 3.782398922624364e-07, "loss": 0.2242, "step": 25619 }, { "epoch": 0.87920384351407, "grad_norm": 0.7333143759605182, "learning_rate": 3.780278837330509e-07, "loss": 0.236, "step": 25620 }, { "epoch": 0.8792381606039807, "grad_norm": 0.7689284504585261, "learning_rate": 3.778159323027436e-07, "loss": 0.3019, "step": 25621 }, { "epoch": 0.8792724776938916, "grad_norm": 0.8589238566422477, "learning_rate": 3.7760403797412916e-07, "loss": 0.2364, "step": 25622 }, { "epoch": 0.8793067947838024, "grad_norm": 0.8108823441902862, "learning_rate": 3.7739220074982983e-07, "loss": 0.2525, "step": 25623 }, { "epoch": 0.8793411118737131, "grad_norm": 0.8264875475130832, "learning_rate": 3.7718042063245974e-07, "loss": 0.2399, "step": 25624 }, { "epoch": 0.8793754289636239, "grad_norm": 0.8042790173798843, "learning_rate": 3.769686976246345e-07, "loss": 0.3058, "step": 25625 }, { "epoch": 0.8794097460535346, "grad_norm": 0.8487230351551383, "learning_rate": 3.7675703172897315e-07, "loss": 0.2416, "step": 25626 }, { "epoch": 0.8794440631434455, "grad_norm": 0.6814896746886467, "learning_rate": 3.765454229480875e-07, "loss": 0.2341, "step": 25627 }, { "epoch": 0.8794783802333562, "grad_norm": 0.8389097312427568, "learning_rate": 3.7633387128459264e-07, "loss": 0.278, "step": 25628 }, { "epoch": 0.879512697323267, "grad_norm": 0.6812114176148176, "learning_rate": 3.761223767411021e-07, "loss": 0.209, "step": 25629 }, { "epoch": 0.8795470144131777, "grad_norm": 0.8532016899901963, "learning_rate": 3.759109393202287e-07, "loss": 0.2446, "step": 25630 }, { "epoch": 0.8795813315030886, "grad_norm": 0.7632781000790142, "learning_rate": 3.7569955902458433e-07, "loss": 0.2515, "step": 25631 }, { "epoch": 0.8796156485929993, "grad_norm": 0.8799668403799293, "learning_rate": 3.7548823585678075e-07, "loss": 0.2632, "step": 25632 }, { "epoch": 0.8796499656829101, "grad_norm": 0.7647739061172056, "learning_rate": 3.752769698194292e-07, "loss": 0.2415, "step": 25633 }, { "epoch": 0.8796842827728208, "grad_norm": 0.8138233401257822, "learning_rate": 3.75065760915137e-07, "loss": 0.2479, "step": 25634 }, { "epoch": 0.8797185998627316, "grad_norm": 0.7796070223587598, "learning_rate": 3.7485460914651715e-07, "loss": 0.2647, "step": 25635 }, { "epoch": 0.8797529169526425, "grad_norm": 0.750174417874838, "learning_rate": 3.7464351451617474e-07, "loss": 0.2586, "step": 25636 }, { "epoch": 0.8797872340425532, "grad_norm": 0.7391228755052216, "learning_rate": 3.744324770267199e-07, "loss": 0.2171, "step": 25637 }, { "epoch": 0.879821551132464, "grad_norm": 0.7544753374575416, "learning_rate": 3.7422149668075837e-07, "loss": 0.2411, "step": 25638 }, { "epoch": 0.8798558682223747, "grad_norm": 0.6839219342875242, "learning_rate": 3.740105734808969e-07, "loss": 0.2113, "step": 25639 }, { "epoch": 0.8798901853122855, "grad_norm": 0.7842581780783369, "learning_rate": 3.737997074297417e-07, "loss": 0.2713, "step": 25640 }, { "epoch": 0.8799245024021963, "grad_norm": 0.7864850708003513, "learning_rate": 3.73588898529898e-07, "loss": 0.2855, "step": 25641 }, { "epoch": 0.8799588194921071, "grad_norm": 0.7854921869638745, "learning_rate": 3.733781467839698e-07, "loss": 0.239, "step": 25642 }, { "epoch": 0.8799931365820178, "grad_norm": 0.7542701068914496, "learning_rate": 3.7316745219455883e-07, "loss": 0.2492, "step": 25643 }, { "epoch": 0.8800274536719286, "grad_norm": 0.7871694187096973, "learning_rate": 3.729568147642715e-07, "loss": 0.262, "step": 25644 }, { "epoch": 0.8800617707618394, "grad_norm": 0.7845394889368478, "learning_rate": 3.727462344957061e-07, "loss": 0.2477, "step": 25645 }, { "epoch": 0.8800960878517502, "grad_norm": 0.7826264046642369, "learning_rate": 3.725357113914685e-07, "loss": 0.2632, "step": 25646 }, { "epoch": 0.880130404941661, "grad_norm": 0.803589473810756, "learning_rate": 3.7232524545415536e-07, "loss": 0.2757, "step": 25647 }, { "epoch": 0.8801647220315717, "grad_norm": 0.8528430854206989, "learning_rate": 3.7211483668636804e-07, "loss": 0.2267, "step": 25648 }, { "epoch": 0.8801990391214825, "grad_norm": 0.8194356600425813, "learning_rate": 3.719044850907083e-07, "loss": 0.2972, "step": 25649 }, { "epoch": 0.8802333562113933, "grad_norm": 0.7238369535640482, "learning_rate": 3.7169419066977184e-07, "loss": 0.2746, "step": 25650 }, { "epoch": 0.8802676733013041, "grad_norm": 0.9225579332368901, "learning_rate": 3.714839534261572e-07, "loss": 0.2736, "step": 25651 }, { "epoch": 0.8803019903912148, "grad_norm": 0.8493786111706277, "learning_rate": 3.7127377336246217e-07, "loss": 0.2482, "step": 25652 }, { "epoch": 0.8803363074811256, "grad_norm": 0.728897206334605, "learning_rate": 3.710636504812831e-07, "loss": 0.2291, "step": 25653 }, { "epoch": 0.8803706245710364, "grad_norm": 0.7164256934016208, "learning_rate": 3.7085358478521573e-07, "loss": 0.3048, "step": 25654 }, { "epoch": 0.8804049416609472, "grad_norm": 0.8606094510792894, "learning_rate": 3.706435762768557e-07, "loss": 0.299, "step": 25655 }, { "epoch": 0.8804392587508579, "grad_norm": 0.732408211394569, "learning_rate": 3.7043362495879755e-07, "loss": 0.2971, "step": 25656 }, { "epoch": 0.8804735758407687, "grad_norm": 0.7641512240836497, "learning_rate": 3.702237308336326e-07, "loss": 0.2625, "step": 25657 }, { "epoch": 0.8805078929306794, "grad_norm": 0.8529658347137035, "learning_rate": 3.700138939039577e-07, "loss": 0.2717, "step": 25658 }, { "epoch": 0.8805422100205903, "grad_norm": 0.7698754464027406, "learning_rate": 3.698041141723618e-07, "loss": 0.2615, "step": 25659 }, { "epoch": 0.880576527110501, "grad_norm": 0.837952490615662, "learning_rate": 3.6959439164143794e-07, "loss": 0.2537, "step": 25660 }, { "epoch": 0.8806108442004118, "grad_norm": 0.6816026524634433, "learning_rate": 3.6938472631377677e-07, "loss": 0.2474, "step": 25661 }, { "epoch": 0.8806451612903226, "grad_norm": 0.7863806034866802, "learning_rate": 3.6917511819196895e-07, "loss": 0.3172, "step": 25662 }, { "epoch": 0.8806794783802333, "grad_norm": 0.7906005211960603, "learning_rate": 3.68965567278603e-07, "loss": 0.2196, "step": 25663 }, { "epoch": 0.8807137954701442, "grad_norm": 0.857277471917681, "learning_rate": 3.687560735762685e-07, "loss": 0.2479, "step": 25664 }, { "epoch": 0.8807481125600549, "grad_norm": 0.6412418914208429, "learning_rate": 3.68546637087554e-07, "loss": 0.28, "step": 25665 }, { "epoch": 0.8807824296499657, "grad_norm": 0.7404525312092782, "learning_rate": 3.6833725781504404e-07, "loss": 0.2341, "step": 25666 }, { "epoch": 0.8808167467398764, "grad_norm": 0.7872225415945073, "learning_rate": 3.6812793576132933e-07, "loss": 0.2563, "step": 25667 }, { "epoch": 0.8808510638297873, "grad_norm": 0.7822497680393795, "learning_rate": 3.6791867092899283e-07, "loss": 0.2427, "step": 25668 }, { "epoch": 0.880885380919698, "grad_norm": 0.8113093920200233, "learning_rate": 3.677094633206207e-07, "loss": 0.2924, "step": 25669 }, { "epoch": 0.8809196980096088, "grad_norm": 0.668743179611163, "learning_rate": 3.6750031293879716e-07, "loss": 0.2028, "step": 25670 }, { "epoch": 0.8809540150995195, "grad_norm": 0.839782158907664, "learning_rate": 3.672912197861067e-07, "loss": 0.255, "step": 25671 }, { "epoch": 0.8809883321894303, "grad_norm": 0.8289797745289136, "learning_rate": 3.670821838651317e-07, "loss": 0.2377, "step": 25672 }, { "epoch": 0.8810226492793412, "grad_norm": 0.709234140309162, "learning_rate": 3.6687320517845516e-07, "loss": 0.2393, "step": 25673 }, { "epoch": 0.8810569663692519, "grad_norm": 0.723897926329288, "learning_rate": 3.666642837286588e-07, "loss": 0.2138, "step": 25674 }, { "epoch": 0.8810912834591627, "grad_norm": 0.8851450552239292, "learning_rate": 3.664554195183223e-07, "loss": 0.2797, "step": 25675 }, { "epoch": 0.8811256005490734, "grad_norm": 0.7641053885862875, "learning_rate": 3.662466125500269e-07, "loss": 0.2604, "step": 25676 }, { "epoch": 0.8811599176389843, "grad_norm": 0.8879815631475111, "learning_rate": 3.660378628263528e-07, "loss": 0.2233, "step": 25677 }, { "epoch": 0.881194234728895, "grad_norm": 0.8086987008183841, "learning_rate": 3.6582917034987786e-07, "loss": 0.2861, "step": 25678 }, { "epoch": 0.8812285518188058, "grad_norm": 0.8176052871422694, "learning_rate": 3.656205351231817e-07, "loss": 0.2683, "step": 25679 }, { "epoch": 0.8812628689087165, "grad_norm": 0.7381839086785907, "learning_rate": 3.6541195714883903e-07, "loss": 0.2633, "step": 25680 }, { "epoch": 0.8812971859986273, "grad_norm": 0.8140613372952556, "learning_rate": 3.652034364294299e-07, "loss": 0.2534, "step": 25681 }, { "epoch": 0.8813315030885381, "grad_norm": 0.7458809154564136, "learning_rate": 3.6499497296752786e-07, "loss": 0.2323, "step": 25682 }, { "epoch": 0.8813658201784489, "grad_norm": 0.8818373913739433, "learning_rate": 3.647865667657086e-07, "loss": 0.2963, "step": 25683 }, { "epoch": 0.8814001372683596, "grad_norm": 0.8931066193264253, "learning_rate": 3.6457821782654723e-07, "loss": 0.2611, "step": 25684 }, { "epoch": 0.8814344543582704, "grad_norm": 0.7202671298133568, "learning_rate": 3.643699261526179e-07, "loss": 0.2524, "step": 25685 }, { "epoch": 0.8814687714481811, "grad_norm": 0.7938314689264074, "learning_rate": 3.641616917464935e-07, "loss": 0.2332, "step": 25686 }, { "epoch": 0.881503088538092, "grad_norm": 0.7591021470603885, "learning_rate": 3.6395351461074645e-07, "loss": 0.264, "step": 25687 }, { "epoch": 0.8815374056280028, "grad_norm": 0.6464796888692057, "learning_rate": 3.6374539474794966e-07, "loss": 0.2353, "step": 25688 }, { "epoch": 0.8815717227179135, "grad_norm": 0.889106287767099, "learning_rate": 3.6353733216067113e-07, "loss": 0.3384, "step": 25689 }, { "epoch": 0.8816060398078243, "grad_norm": 0.7664149212416461, "learning_rate": 3.633293268514854e-07, "loss": 0.2567, "step": 25690 }, { "epoch": 0.8816403568977351, "grad_norm": 0.7749702334992643, "learning_rate": 3.6312137882295874e-07, "loss": 0.2487, "step": 25691 }, { "epoch": 0.8816746739876459, "grad_norm": 0.8930633321785726, "learning_rate": 3.629134880776608e-07, "loss": 0.3022, "step": 25692 }, { "epoch": 0.8817089910775566, "grad_norm": 0.7490397629883906, "learning_rate": 3.6270565461816174e-07, "loss": 0.2399, "step": 25693 }, { "epoch": 0.8817433081674674, "grad_norm": 0.8227486431165483, "learning_rate": 3.624978784470273e-07, "loss": 0.2875, "step": 25694 }, { "epoch": 0.8817776252573781, "grad_norm": 0.8146299411293946, "learning_rate": 3.6229015956682433e-07, "loss": 0.2495, "step": 25695 }, { "epoch": 0.881811942347289, "grad_norm": 0.7455835340009868, "learning_rate": 3.6208249798012017e-07, "loss": 0.2714, "step": 25696 }, { "epoch": 0.8818462594371997, "grad_norm": 0.7682303663309779, "learning_rate": 3.618748936894789e-07, "loss": 0.2782, "step": 25697 }, { "epoch": 0.8818805765271105, "grad_norm": 0.7543469522819329, "learning_rate": 3.616673466974657e-07, "loss": 0.2273, "step": 25698 }, { "epoch": 0.8819148936170212, "grad_norm": 0.8583099403408032, "learning_rate": 3.614598570066447e-07, "loss": 0.2613, "step": 25699 }, { "epoch": 0.8819492107069321, "grad_norm": 0.8052399272065242, "learning_rate": 3.6125242461958034e-07, "loss": 0.2964, "step": 25700 }, { "epoch": 0.8819835277968429, "grad_norm": 0.7033881017160238, "learning_rate": 3.610450495388318e-07, "loss": 0.2021, "step": 25701 }, { "epoch": 0.8820178448867536, "grad_norm": 0.8377656741512259, "learning_rate": 3.6083773176696533e-07, "loss": 0.2831, "step": 25702 }, { "epoch": 0.8820521619766644, "grad_norm": 0.7705586258523942, "learning_rate": 3.6063047130653893e-07, "loss": 0.2148, "step": 25703 }, { "epoch": 0.8820864790665751, "grad_norm": 0.8486433385976195, "learning_rate": 3.6042326816011383e-07, "loss": 0.2696, "step": 25704 }, { "epoch": 0.882120796156486, "grad_norm": 0.812657631202777, "learning_rate": 3.6021612233025025e-07, "loss": 0.289, "step": 25705 }, { "epoch": 0.8821551132463967, "grad_norm": 0.7664738582017829, "learning_rate": 3.6000903381950723e-07, "loss": 0.2758, "step": 25706 }, { "epoch": 0.8821894303363075, "grad_norm": 0.8188862815765595, "learning_rate": 3.5980200263044275e-07, "loss": 0.2944, "step": 25707 }, { "epoch": 0.8822237474262182, "grad_norm": 0.8208621247584139, "learning_rate": 3.595950287656147e-07, "loss": 0.2136, "step": 25708 }, { "epoch": 0.882258064516129, "grad_norm": 0.816684949268429, "learning_rate": 3.593881122275794e-07, "loss": 0.2603, "step": 25709 }, { "epoch": 0.8822923816060398, "grad_norm": 0.7696664212485097, "learning_rate": 3.591812530188943e-07, "loss": 0.2445, "step": 25710 }, { "epoch": 0.8823266986959506, "grad_norm": 0.7401707616499843, "learning_rate": 3.589744511421145e-07, "loss": 0.2311, "step": 25711 }, { "epoch": 0.8823610157858613, "grad_norm": 0.7389350695612789, "learning_rate": 3.587677065997924e-07, "loss": 0.2517, "step": 25712 }, { "epoch": 0.8823953328757721, "grad_norm": 0.7409179792499806, "learning_rate": 3.5856101939448664e-07, "loss": 0.2322, "step": 25713 }, { "epoch": 0.882429649965683, "grad_norm": 0.8336689049852438, "learning_rate": 3.583543895287467e-07, "loss": 0.2303, "step": 25714 }, { "epoch": 0.8824639670555937, "grad_norm": 0.8209320148734481, "learning_rate": 3.581478170051261e-07, "loss": 0.3117, "step": 25715 }, { "epoch": 0.8824982841455045, "grad_norm": 0.7521278783854726, "learning_rate": 3.5794130182617846e-07, "loss": 0.2908, "step": 25716 }, { "epoch": 0.8825326012354152, "grad_norm": 0.7704648085328097, "learning_rate": 3.577348439944539e-07, "loss": 0.2486, "step": 25717 }, { "epoch": 0.882566918325326, "grad_norm": 0.7603343182004384, "learning_rate": 3.575284435125026e-07, "loss": 0.2353, "step": 25718 }, { "epoch": 0.8826012354152368, "grad_norm": 0.7952107615024437, "learning_rate": 3.5732210038287474e-07, "loss": 0.2278, "step": 25719 }, { "epoch": 0.8826355525051476, "grad_norm": 0.7393452275100778, "learning_rate": 3.571158146081194e-07, "loss": 0.273, "step": 25720 }, { "epoch": 0.8826698695950583, "grad_norm": 0.7950680312857012, "learning_rate": 3.569095861907851e-07, "loss": 0.2299, "step": 25721 }, { "epoch": 0.8827041866849691, "grad_norm": 0.8309133098805915, "learning_rate": 3.5670341513341977e-07, "loss": 0.2519, "step": 25722 }, { "epoch": 0.88273850377488, "grad_norm": 0.758732650208669, "learning_rate": 3.5649730143857084e-07, "loss": 0.2253, "step": 25723 }, { "epoch": 0.8827728208647907, "grad_norm": 1.1948607952557218, "learning_rate": 3.562912451087824e-07, "loss": 0.2463, "step": 25724 }, { "epoch": 0.8828071379547014, "grad_norm": 0.7044466728700493, "learning_rate": 3.56085246146603e-07, "loss": 0.2277, "step": 25725 }, { "epoch": 0.8828414550446122, "grad_norm": 0.7392386889182813, "learning_rate": 3.5587930455457555e-07, "loss": 0.2436, "step": 25726 }, { "epoch": 0.882875772134523, "grad_norm": 0.9716352079166215, "learning_rate": 3.5567342033524466e-07, "loss": 0.3272, "step": 25727 }, { "epoch": 0.8829100892244338, "grad_norm": 0.8209281512515894, "learning_rate": 3.554675934911539e-07, "loss": 0.2259, "step": 25728 }, { "epoch": 0.8829444063143446, "grad_norm": 1.6031604452076287, "learning_rate": 3.552618240248462e-07, "loss": 0.2511, "step": 25729 }, { "epoch": 0.8829787234042553, "grad_norm": 0.6935091014444262, "learning_rate": 3.550561119388635e-07, "loss": 0.2397, "step": 25730 }, { "epoch": 0.8830130404941661, "grad_norm": 0.7143563537321546, "learning_rate": 3.5485045723574695e-07, "loss": 0.2581, "step": 25731 }, { "epoch": 0.8830473575840768, "grad_norm": 0.7046936093262527, "learning_rate": 3.54644859918038e-07, "loss": 0.2598, "step": 25732 }, { "epoch": 0.8830816746739877, "grad_norm": 0.8544267162131296, "learning_rate": 3.5443931998827395e-07, "loss": 0.2507, "step": 25733 }, { "epoch": 0.8831159917638984, "grad_norm": 0.8304011379384096, "learning_rate": 3.542338374489979e-07, "loss": 0.2806, "step": 25734 }, { "epoch": 0.8831503088538092, "grad_norm": 0.8669383655580839, "learning_rate": 3.540284123027454e-07, "loss": 0.297, "step": 25735 }, { "epoch": 0.8831846259437199, "grad_norm": 0.9726000588340037, "learning_rate": 3.5382304455205463e-07, "loss": 0.3439, "step": 25736 }, { "epoch": 0.8832189430336308, "grad_norm": 0.8079776487296789, "learning_rate": 3.5361773419946456e-07, "loss": 0.2399, "step": 25737 }, { "epoch": 0.8832532601235416, "grad_norm": 0.5933252903464706, "learning_rate": 3.5341248124750873e-07, "loss": 0.2193, "step": 25738 }, { "epoch": 0.8832875772134523, "grad_norm": 0.8967380645092, "learning_rate": 3.5320728569872565e-07, "loss": 0.2358, "step": 25739 }, { "epoch": 0.883321894303363, "grad_norm": 0.8879987723246686, "learning_rate": 3.530021475556483e-07, "loss": 0.2485, "step": 25740 }, { "epoch": 0.8833562113932738, "grad_norm": 0.8418443817606238, "learning_rate": 3.5279706682081196e-07, "loss": 0.29, "step": 25741 }, { "epoch": 0.8833905284831847, "grad_norm": 0.7426643543316839, "learning_rate": 3.5259204349674894e-07, "loss": 0.2509, "step": 25742 }, { "epoch": 0.8834248455730954, "grad_norm": 0.7565141417959896, "learning_rate": 3.523870775859933e-07, "loss": 0.307, "step": 25743 }, { "epoch": 0.8834591626630062, "grad_norm": 0.791752588444005, "learning_rate": 3.5218216909107706e-07, "loss": 0.248, "step": 25744 }, { "epoch": 0.8834934797529169, "grad_norm": 0.7582445089045591, "learning_rate": 3.519773180145308e-07, "loss": 0.2162, "step": 25745 }, { "epoch": 0.8835277968428277, "grad_norm": 0.8076990333248218, "learning_rate": 3.517725243588865e-07, "loss": 0.2618, "step": 25746 }, { "epoch": 0.8835621139327385, "grad_norm": 0.7656301903250295, "learning_rate": 3.515677881266721e-07, "loss": 0.2925, "step": 25747 }, { "epoch": 0.8835964310226493, "grad_norm": 0.8532090950814905, "learning_rate": 3.513631093204195e-07, "loss": 0.2866, "step": 25748 }, { "epoch": 0.88363074811256, "grad_norm": 0.9528217693880513, "learning_rate": 3.5115848794265497e-07, "loss": 0.2757, "step": 25749 }, { "epoch": 0.8836650652024708, "grad_norm": 0.7264235541205212, "learning_rate": 3.5095392399590756e-07, "loss": 0.2701, "step": 25750 }, { "epoch": 0.8836993822923817, "grad_norm": 0.7358401438574423, "learning_rate": 3.507494174827042e-07, "loss": 0.2283, "step": 25751 }, { "epoch": 0.8837336993822924, "grad_norm": 0.7703371327632456, "learning_rate": 3.5054496840557116e-07, "loss": 0.267, "step": 25752 }, { "epoch": 0.8837680164722032, "grad_norm": 0.7541332680008643, "learning_rate": 3.5034057676703426e-07, "loss": 0.2758, "step": 25753 }, { "epoch": 0.8838023335621139, "grad_norm": 0.8294844677222907, "learning_rate": 3.5013624256961866e-07, "loss": 0.2154, "step": 25754 }, { "epoch": 0.8838366506520247, "grad_norm": 0.8236154079174949, "learning_rate": 3.49931965815849e-07, "loss": 0.309, "step": 25755 }, { "epoch": 0.8838709677419355, "grad_norm": 0.9352146064791149, "learning_rate": 3.4972774650824716e-07, "loss": 0.2767, "step": 25756 }, { "epoch": 0.8839052848318463, "grad_norm": 0.7895052084091398, "learning_rate": 3.495235846493389e-07, "loss": 0.2555, "step": 25757 }, { "epoch": 0.883939601921757, "grad_norm": 0.8213394852980058, "learning_rate": 3.493194802416444e-07, "loss": 0.2719, "step": 25758 }, { "epoch": 0.8839739190116678, "grad_norm": 0.7871982810743976, "learning_rate": 3.4911543328768394e-07, "loss": 0.2428, "step": 25759 }, { "epoch": 0.8840082361015786, "grad_norm": 0.7832604774828662, "learning_rate": 3.489114437899821e-07, "loss": 0.2383, "step": 25760 }, { "epoch": 0.8840425531914894, "grad_norm": 0.7869918451222536, "learning_rate": 3.487075117510558e-07, "loss": 0.2583, "step": 25761 }, { "epoch": 0.8840768702814001, "grad_norm": 0.7568709930723271, "learning_rate": 3.4850363717342517e-07, "loss": 0.2628, "step": 25762 }, { "epoch": 0.8841111873713109, "grad_norm": 0.7370341515929849, "learning_rate": 3.4829982005960936e-07, "loss": 0.3016, "step": 25763 }, { "epoch": 0.8841455044612216, "grad_norm": 0.7345136603830601, "learning_rate": 3.480960604121253e-07, "loss": 0.315, "step": 25764 }, { "epoch": 0.8841798215511325, "grad_norm": 0.7911557853397246, "learning_rate": 3.478923582334914e-07, "loss": 0.236, "step": 25765 }, { "epoch": 0.8842141386410433, "grad_norm": 0.8529228169433064, "learning_rate": 3.4768871352622356e-07, "loss": 0.266, "step": 25766 }, { "epoch": 0.884248455730954, "grad_norm": 0.7478682241870965, "learning_rate": 3.4748512629283803e-07, "loss": 0.2518, "step": 25767 }, { "epoch": 0.8842827728208648, "grad_norm": 1.025122988905745, "learning_rate": 3.472815965358478e-07, "loss": 0.2493, "step": 25768 }, { "epoch": 0.8843170899107755, "grad_norm": 0.8565490594699311, "learning_rate": 3.470781242577709e-07, "loss": 0.2821, "step": 25769 }, { "epoch": 0.8843514070006864, "grad_norm": 0.784861051150221, "learning_rate": 3.468747094611169e-07, "loss": 0.2704, "step": 25770 }, { "epoch": 0.8843857240905971, "grad_norm": 0.719806312738968, "learning_rate": 3.466713521484033e-07, "loss": 0.2365, "step": 25771 }, { "epoch": 0.8844200411805079, "grad_norm": 0.8862399607029645, "learning_rate": 3.464680523221381e-07, "loss": 0.289, "step": 25772 }, { "epoch": 0.8844543582704186, "grad_norm": 0.8167826224558511, "learning_rate": 3.4626480998483537e-07, "loss": 0.2275, "step": 25773 }, { "epoch": 0.8844886753603295, "grad_norm": 0.8597364519021646, "learning_rate": 3.4606162513900476e-07, "loss": 0.2868, "step": 25774 }, { "epoch": 0.8845229924502402, "grad_norm": 0.7871637012832775, "learning_rate": 3.458584977871571e-07, "loss": 0.2775, "step": 25775 }, { "epoch": 0.884557309540151, "grad_norm": 0.8203411301731071, "learning_rate": 3.456554279318014e-07, "loss": 0.3341, "step": 25776 }, { "epoch": 0.8845916266300617, "grad_norm": 0.8036373601878403, "learning_rate": 3.4545241557544627e-07, "loss": 0.3009, "step": 25777 }, { "epoch": 0.8846259437199725, "grad_norm": 0.713517423383242, "learning_rate": 3.4524946072060084e-07, "loss": 0.2973, "step": 25778 }, { "epoch": 0.8846602608098834, "grad_norm": 1.0119518914653844, "learning_rate": 3.450465633697697e-07, "loss": 0.2906, "step": 25779 }, { "epoch": 0.8846945778997941, "grad_norm": 0.7590521165481555, "learning_rate": 3.4484372352546204e-07, "loss": 0.2351, "step": 25780 }, { "epoch": 0.8847288949897049, "grad_norm": 0.8521216696784778, "learning_rate": 3.4464094119018353e-07, "loss": 0.2862, "step": 25781 }, { "epoch": 0.8847632120796156, "grad_norm": 0.8073758271560054, "learning_rate": 3.444382163664367e-07, "loss": 0.2864, "step": 25782 }, { "epoch": 0.8847975291695265, "grad_norm": 0.7228763155316024, "learning_rate": 3.442355490567295e-07, "loss": 0.2464, "step": 25783 }, { "epoch": 0.8848318462594372, "grad_norm": 0.7728057705737805, "learning_rate": 3.4403293926356384e-07, "loss": 0.2687, "step": 25784 }, { "epoch": 0.884866163349348, "grad_norm": 0.9833778987392268, "learning_rate": 3.4383038698944216e-07, "loss": 0.3056, "step": 25785 }, { "epoch": 0.8849004804392587, "grad_norm": 0.8418007145862357, "learning_rate": 3.43627892236868e-07, "loss": 0.3209, "step": 25786 }, { "epoch": 0.8849347975291695, "grad_norm": 0.8977479636225398, "learning_rate": 3.4342545500834214e-07, "loss": 0.2728, "step": 25787 }, { "epoch": 0.8849691146190803, "grad_norm": 0.827793668634549, "learning_rate": 3.432230753063659e-07, "loss": 0.2412, "step": 25788 }, { "epoch": 0.8850034317089911, "grad_norm": 0.7953391423755046, "learning_rate": 3.4302075313343897e-07, "loss": 0.2553, "step": 25789 }, { "epoch": 0.8850377487989018, "grad_norm": 0.741534336729552, "learning_rate": 3.428184884920621e-07, "loss": 0.2659, "step": 25790 }, { "epoch": 0.8850720658888126, "grad_norm": 0.7762199134834047, "learning_rate": 3.4261628138473167e-07, "loss": 0.3089, "step": 25791 }, { "epoch": 0.8851063829787233, "grad_norm": 0.9629562059126634, "learning_rate": 3.4241413181394843e-07, "loss": 0.3155, "step": 25792 }, { "epoch": 0.8851407000686342, "grad_norm": 0.8130450553639768, "learning_rate": 3.4221203978220764e-07, "loss": 0.2188, "step": 25793 }, { "epoch": 0.885175017158545, "grad_norm": 0.8097126052869371, "learning_rate": 3.4201000529200666e-07, "loss": 0.2947, "step": 25794 }, { "epoch": 0.8852093342484557, "grad_norm": 0.8872743858597066, "learning_rate": 3.418080283458414e-07, "loss": 0.2228, "step": 25795 }, { "epoch": 0.8852436513383665, "grad_norm": 0.8151826488970496, "learning_rate": 3.416061089462069e-07, "loss": 0.2803, "step": 25796 }, { "epoch": 0.8852779684282773, "grad_norm": 0.8053246736292413, "learning_rate": 3.4140424709559807e-07, "loss": 0.2482, "step": 25797 }, { "epoch": 0.8853122855181881, "grad_norm": 0.7434260675159337, "learning_rate": 3.4120244279650825e-07, "loss": 0.2919, "step": 25798 }, { "epoch": 0.8853466026080988, "grad_norm": 0.7086004165956235, "learning_rate": 3.4100069605143113e-07, "loss": 0.2538, "step": 25799 }, { "epoch": 0.8853809196980096, "grad_norm": 0.741927084916699, "learning_rate": 3.407990068628575e-07, "loss": 0.2701, "step": 25800 }, { "epoch": 0.8854152367879203, "grad_norm": 0.8734812274951085, "learning_rate": 3.405973752332814e-07, "loss": 0.2673, "step": 25801 }, { "epoch": 0.8854495538778312, "grad_norm": 0.9419219754608292, "learning_rate": 3.4039580116519034e-07, "loss": 0.2305, "step": 25802 }, { "epoch": 0.885483870967742, "grad_norm": 0.8539242879397508, "learning_rate": 3.4019428466107783e-07, "loss": 0.2434, "step": 25803 }, { "epoch": 0.8855181880576527, "grad_norm": 0.8266561377732353, "learning_rate": 3.3999282572343305e-07, "loss": 0.2912, "step": 25804 }, { "epoch": 0.8855525051475635, "grad_norm": 0.7144147029597224, "learning_rate": 3.3979142435474177e-07, "loss": 0.2719, "step": 25805 }, { "epoch": 0.8855868222374743, "grad_norm": 0.8088333582956428, "learning_rate": 3.3959008055749587e-07, "loss": 0.2623, "step": 25806 }, { "epoch": 0.8856211393273851, "grad_norm": 0.7830755554388479, "learning_rate": 3.393887943341806e-07, "loss": 0.293, "step": 25807 }, { "epoch": 0.8856554564172958, "grad_norm": 0.8607912678802702, "learning_rate": 3.391875656872828e-07, "loss": 0.2655, "step": 25808 }, { "epoch": 0.8856897735072066, "grad_norm": 0.838200932688026, "learning_rate": 3.3898639461928896e-07, "loss": 0.2804, "step": 25809 }, { "epoch": 0.8857240905971173, "grad_norm": 0.8136050980296341, "learning_rate": 3.387852811326836e-07, "loss": 0.2414, "step": 25810 }, { "epoch": 0.8857584076870282, "grad_norm": 0.8184912133964312, "learning_rate": 3.385842252299515e-07, "loss": 0.2689, "step": 25811 }, { "epoch": 0.8857927247769389, "grad_norm": 0.7605036802250638, "learning_rate": 3.383832269135773e-07, "loss": 0.2385, "step": 25812 }, { "epoch": 0.8858270418668497, "grad_norm": 0.7492975368590576, "learning_rate": 3.3818228618604344e-07, "loss": 0.2741, "step": 25813 }, { "epoch": 0.8858613589567604, "grad_norm": 0.7651256718724342, "learning_rate": 3.379814030498313e-07, "loss": 0.2521, "step": 25814 }, { "epoch": 0.8858956760466712, "grad_norm": 0.7360936332772966, "learning_rate": 3.3778057750742443e-07, "loss": 0.2283, "step": 25815 }, { "epoch": 0.885929993136582, "grad_norm": 0.8822693908288844, "learning_rate": 3.3757980956130256e-07, "loss": 0.3079, "step": 25816 }, { "epoch": 0.8859643102264928, "grad_norm": 0.7420057430597934, "learning_rate": 3.373790992139464e-07, "loss": 0.2823, "step": 25817 }, { "epoch": 0.8859986273164036, "grad_norm": 0.6965012769969546, "learning_rate": 3.371784464678352e-07, "loss": 0.235, "step": 25818 }, { "epoch": 0.8860329444063143, "grad_norm": 0.6900321707347409, "learning_rate": 3.3697785132544793e-07, "loss": 0.2847, "step": 25819 }, { "epoch": 0.8860672614962252, "grad_norm": 1.0438126780645682, "learning_rate": 3.3677731378926326e-07, "loss": 0.2643, "step": 25820 }, { "epoch": 0.8861015785861359, "grad_norm": 0.8304490975657918, "learning_rate": 3.3657683386175757e-07, "loss": 0.3116, "step": 25821 }, { "epoch": 0.8861358956760467, "grad_norm": 0.660883774953305, "learning_rate": 3.363764115454088e-07, "loss": 0.2017, "step": 25822 }, { "epoch": 0.8861702127659574, "grad_norm": 0.7983580945578391, "learning_rate": 3.361760468426911e-07, "loss": 0.2234, "step": 25823 }, { "epoch": 0.8862045298558682, "grad_norm": 0.7456193082563707, "learning_rate": 3.35975739756082e-07, "loss": 0.2757, "step": 25824 }, { "epoch": 0.886238846945779, "grad_norm": 0.7024831367696092, "learning_rate": 3.357754902880539e-07, "loss": 0.2998, "step": 25825 }, { "epoch": 0.8862731640356898, "grad_norm": 0.8755008603622526, "learning_rate": 3.355752984410815e-07, "loss": 0.2775, "step": 25826 }, { "epoch": 0.8863074811256005, "grad_norm": 1.4440923521063047, "learning_rate": 3.353751642176395e-07, "loss": 0.2497, "step": 25827 }, { "epoch": 0.8863417982155113, "grad_norm": 0.7171093209769381, "learning_rate": 3.3517508762019814e-07, "loss": 0.2567, "step": 25828 }, { "epoch": 0.8863761153054222, "grad_norm": 0.6729522763528982, "learning_rate": 3.349750686512293e-07, "loss": 0.2461, "step": 25829 }, { "epoch": 0.8864104323953329, "grad_norm": 0.849058120005571, "learning_rate": 3.347751073132055e-07, "loss": 0.286, "step": 25830 }, { "epoch": 0.8864447494852437, "grad_norm": 0.7444148054490103, "learning_rate": 3.345752036085953e-07, "loss": 0.2254, "step": 25831 }, { "epoch": 0.8864790665751544, "grad_norm": 0.761837831219201, "learning_rate": 3.3437535753986947e-07, "loss": 0.297, "step": 25832 }, { "epoch": 0.8865133836650652, "grad_norm": 0.7259823351040106, "learning_rate": 3.3417556910949665e-07, "loss": 0.2292, "step": 25833 }, { "epoch": 0.886547700754976, "grad_norm": 0.9640198672401808, "learning_rate": 3.3397583831994485e-07, "loss": 0.2964, "step": 25834 }, { "epoch": 0.8865820178448868, "grad_norm": 0.8648074195210206, "learning_rate": 3.337761651736815e-07, "loss": 0.2775, "step": 25835 }, { "epoch": 0.8866163349347975, "grad_norm": 0.8734214395611043, "learning_rate": 3.335765496731741e-07, "loss": 0.251, "step": 25836 }, { "epoch": 0.8866506520247083, "grad_norm": 0.801105773059406, "learning_rate": 3.3337699182088623e-07, "loss": 0.2538, "step": 25837 }, { "epoch": 0.886684969114619, "grad_norm": 0.7531989246013608, "learning_rate": 3.3317749161928646e-07, "loss": 0.2474, "step": 25838 }, { "epoch": 0.8867192862045299, "grad_norm": 0.7902420369511011, "learning_rate": 3.329780490708373e-07, "loss": 0.2336, "step": 25839 }, { "epoch": 0.8867536032944406, "grad_norm": 0.850091394877769, "learning_rate": 3.327786641780029e-07, "loss": 0.2469, "step": 25840 }, { "epoch": 0.8867879203843514, "grad_norm": 0.8464939482336207, "learning_rate": 3.3257933694324615e-07, "loss": 0.2706, "step": 25841 }, { "epoch": 0.8868222374742621, "grad_norm": 0.7539705351719023, "learning_rate": 3.323800673690308e-07, "loss": 0.2383, "step": 25842 }, { "epoch": 0.886856554564173, "grad_norm": 0.7146129625383671, "learning_rate": 3.321808554578171e-07, "loss": 0.213, "step": 25843 }, { "epoch": 0.8868908716540838, "grad_norm": 0.6836754846758095, "learning_rate": 3.319817012120674e-07, "loss": 0.2645, "step": 25844 }, { "epoch": 0.8869251887439945, "grad_norm": 0.7532811800873216, "learning_rate": 3.3178260463424206e-07, "loss": 0.2335, "step": 25845 }, { "epoch": 0.8869595058339053, "grad_norm": 0.7353685226256286, "learning_rate": 3.3158356572679797e-07, "loss": 0.2258, "step": 25846 }, { "epoch": 0.886993822923816, "grad_norm": 0.7721050690598982, "learning_rate": 3.3138458449219756e-07, "loss": 0.2437, "step": 25847 }, { "epoch": 0.8870281400137269, "grad_norm": 0.6790304304944139, "learning_rate": 3.311856609328978e-07, "loss": 0.2906, "step": 25848 }, { "epoch": 0.8870624571036376, "grad_norm": 0.7758488935631997, "learning_rate": 3.3098679505135454e-07, "loss": 0.2741, "step": 25849 }, { "epoch": 0.8870967741935484, "grad_norm": 0.7788199685084577, "learning_rate": 3.3078798685002743e-07, "loss": 0.2685, "step": 25850 }, { "epoch": 0.8871310912834591, "grad_norm": 0.7831305510324563, "learning_rate": 3.3058923633137007e-07, "loss": 0.2402, "step": 25851 }, { "epoch": 0.88716540837337, "grad_norm": 0.8207427090682955, "learning_rate": 3.3039054349783884e-07, "loss": 0.249, "step": 25852 }, { "epoch": 0.8871997254632807, "grad_norm": 0.8199087905731863, "learning_rate": 3.301919083518884e-07, "loss": 0.3082, "step": 25853 }, { "epoch": 0.8872340425531915, "grad_norm": 0.8022104379941453, "learning_rate": 3.299933308959719e-07, "loss": 0.2433, "step": 25854 }, { "epoch": 0.8872683596431022, "grad_norm": 0.7023018456775532, "learning_rate": 3.2979481113254385e-07, "loss": 0.2464, "step": 25855 }, { "epoch": 0.887302676733013, "grad_norm": 0.8194572874812213, "learning_rate": 3.2959634906405526e-07, "loss": 0.2533, "step": 25856 }, { "epoch": 0.8873369938229239, "grad_norm": 0.7358235654815172, "learning_rate": 3.293979446929596e-07, "loss": 0.2358, "step": 25857 }, { "epoch": 0.8873713109128346, "grad_norm": 0.7791468358785796, "learning_rate": 3.291995980217055e-07, "loss": 0.2318, "step": 25858 }, { "epoch": 0.8874056280027454, "grad_norm": 0.6922116506320121, "learning_rate": 3.290013090527466e-07, "loss": 0.2636, "step": 25859 }, { "epoch": 0.8874399450926561, "grad_norm": 0.7060938471524928, "learning_rate": 3.2880307778852925e-07, "loss": 0.2272, "step": 25860 }, { "epoch": 0.8874742621825669, "grad_norm": 0.7591968937893111, "learning_rate": 3.286049042315043e-07, "loss": 0.2763, "step": 25861 }, { "epoch": 0.8875085792724777, "grad_norm": 0.8200355848888177, "learning_rate": 3.2840678838411913e-07, "loss": 0.2548, "step": 25862 }, { "epoch": 0.8875428963623885, "grad_norm": 0.7728765576813141, "learning_rate": 3.282087302488218e-07, "loss": 0.2607, "step": 25863 }, { "epoch": 0.8875772134522992, "grad_norm": 0.7606132107550384, "learning_rate": 3.280107298280588e-07, "loss": 0.2233, "step": 25864 }, { "epoch": 0.88761153054221, "grad_norm": 0.8620936562916902, "learning_rate": 3.278127871242759e-07, "loss": 0.2491, "step": 25865 }, { "epoch": 0.8876458476321208, "grad_norm": 0.7575037601945451, "learning_rate": 3.276149021399189e-07, "loss": 0.2785, "step": 25866 }, { "epoch": 0.8876801647220316, "grad_norm": 0.7206372809388499, "learning_rate": 3.2741707487743246e-07, "loss": 0.232, "step": 25867 }, { "epoch": 0.8877144818119423, "grad_norm": 0.7166453121592201, "learning_rate": 3.2721930533926084e-07, "loss": 0.2427, "step": 25868 }, { "epoch": 0.8877487989018531, "grad_norm": 0.7242885592856756, "learning_rate": 3.2702159352784534e-07, "loss": 0.2445, "step": 25869 }, { "epoch": 0.8877831159917638, "grad_norm": 0.9583396316291656, "learning_rate": 3.268239394456307e-07, "loss": 0.2317, "step": 25870 }, { "epoch": 0.8878174330816747, "grad_norm": 0.7928726842869672, "learning_rate": 3.2662634309505894e-07, "loss": 0.2937, "step": 25871 }, { "epoch": 0.8878517501715855, "grad_norm": 0.8017415555393616, "learning_rate": 3.2642880447856853e-07, "loss": 0.2533, "step": 25872 }, { "epoch": 0.8878860672614962, "grad_norm": 0.7899416268279336, "learning_rate": 3.2623132359860254e-07, "loss": 0.2561, "step": 25873 }, { "epoch": 0.887920384351407, "grad_norm": 0.8150937809361727, "learning_rate": 3.260339004575991e-07, "loss": 0.2053, "step": 25874 }, { "epoch": 0.8879547014413178, "grad_norm": 0.7913846042255006, "learning_rate": 3.258365350579973e-07, "loss": 0.2953, "step": 25875 }, { "epoch": 0.8879890185312286, "grad_norm": 0.7113815557005947, "learning_rate": 3.256392274022352e-07, "loss": 0.2613, "step": 25876 }, { "epoch": 0.8880233356211393, "grad_norm": 0.8018606207327756, "learning_rate": 3.254419774927514e-07, "loss": 0.2364, "step": 25877 }, { "epoch": 0.8880576527110501, "grad_norm": 0.7314338314115089, "learning_rate": 3.2524478533198124e-07, "loss": 0.2975, "step": 25878 }, { "epoch": 0.8880919698009608, "grad_norm": 0.7697197071879176, "learning_rate": 3.2504765092236214e-07, "loss": 0.3189, "step": 25879 }, { "epoch": 0.8881262868908717, "grad_norm": 0.7464703528213144, "learning_rate": 3.2485057426632936e-07, "loss": 0.2511, "step": 25880 }, { "epoch": 0.8881606039807824, "grad_norm": 0.7524148717278705, "learning_rate": 3.246535553663155e-07, "loss": 0.228, "step": 25881 }, { "epoch": 0.8881949210706932, "grad_norm": 0.8434971926044366, "learning_rate": 3.244565942247574e-07, "loss": 0.2788, "step": 25882 }, { "epoch": 0.888229238160604, "grad_norm": 0.7856260093492958, "learning_rate": 3.242596908440865e-07, "loss": 0.258, "step": 25883 }, { "epoch": 0.8882635552505147, "grad_norm": 0.817246479743174, "learning_rate": 3.2406284522673526e-07, "loss": 0.2576, "step": 25884 }, { "epoch": 0.8882978723404256, "grad_norm": 0.9100472516842478, "learning_rate": 3.238660573751362e-07, "loss": 0.2692, "step": 25885 }, { "epoch": 0.8883321894303363, "grad_norm": 0.7380123367818486, "learning_rate": 3.236693272917202e-07, "loss": 0.2702, "step": 25886 }, { "epoch": 0.8883665065202471, "grad_norm": 0.6994035780525559, "learning_rate": 3.234726549789174e-07, "loss": 0.2296, "step": 25887 }, { "epoch": 0.8884008236101578, "grad_norm": 0.8071372853689822, "learning_rate": 3.2327604043915826e-07, "loss": 0.3032, "step": 25888 }, { "epoch": 0.8884351407000687, "grad_norm": 0.7033420758579664, "learning_rate": 3.230794836748713e-07, "loss": 0.2808, "step": 25889 }, { "epoch": 0.8884694577899794, "grad_norm": 0.7923101195010882, "learning_rate": 3.228829846884829e-07, "loss": 0.2775, "step": 25890 }, { "epoch": 0.8885037748798902, "grad_norm": 0.7909742899988396, "learning_rate": 3.226865434824233e-07, "loss": 0.2539, "step": 25891 }, { "epoch": 0.8885380919698009, "grad_norm": 0.9378315368194562, "learning_rate": 3.2249016005911906e-07, "loss": 0.3223, "step": 25892 }, { "epoch": 0.8885724090597117, "grad_norm": 0.7842514217698762, "learning_rate": 3.222938344209936e-07, "loss": 0.3097, "step": 25893 }, { "epoch": 0.8886067261496226, "grad_norm": 0.7514939889234876, "learning_rate": 3.220975665704762e-07, "loss": 0.2989, "step": 25894 }, { "epoch": 0.8886410432395333, "grad_norm": 0.8004587063096518, "learning_rate": 3.2190135650998767e-07, "loss": 0.2888, "step": 25895 }, { "epoch": 0.888675360329444, "grad_norm": 0.7474180971644167, "learning_rate": 3.2170520424195497e-07, "loss": 0.2744, "step": 25896 }, { "epoch": 0.8887096774193548, "grad_norm": 0.8571655395370809, "learning_rate": 3.215091097688e-07, "loss": 0.2219, "step": 25897 }, { "epoch": 0.8887439945092657, "grad_norm": 0.7651284791003299, "learning_rate": 3.213130730929448e-07, "loss": 0.2631, "step": 25898 }, { "epoch": 0.8887783115991764, "grad_norm": 0.7204672330995744, "learning_rate": 3.211170942168118e-07, "loss": 0.2495, "step": 25899 }, { "epoch": 0.8888126286890872, "grad_norm": 0.8355870159993282, "learning_rate": 3.2092117314282247e-07, "loss": 0.2563, "step": 25900 }, { "epoch": 0.8888469457789979, "grad_norm": 0.713620414770026, "learning_rate": 3.2072530987339644e-07, "loss": 0.2282, "step": 25901 }, { "epoch": 0.8888812628689087, "grad_norm": 0.7647014460626942, "learning_rate": 3.205295044109535e-07, "loss": 0.2327, "step": 25902 }, { "epoch": 0.8889155799588195, "grad_norm": 0.8338382878969309, "learning_rate": 3.20333756757914e-07, "loss": 0.2877, "step": 25903 }, { "epoch": 0.8889498970487303, "grad_norm": 0.8114471820409248, "learning_rate": 3.2013806691669313e-07, "loss": 0.2965, "step": 25904 }, { "epoch": 0.888984214138641, "grad_norm": 0.7856854830962007, "learning_rate": 3.199424348897123e-07, "loss": 0.2343, "step": 25905 }, { "epoch": 0.8890185312285518, "grad_norm": 0.7507934249118291, "learning_rate": 3.197468606793852e-07, "loss": 0.3129, "step": 25906 }, { "epoch": 0.8890528483184625, "grad_norm": 0.8623977669604678, "learning_rate": 3.195513442881287e-07, "loss": 0.2397, "step": 25907 }, { "epoch": 0.8890871654083734, "grad_norm": 0.8816393062272828, "learning_rate": 3.193558857183587e-07, "loss": 0.2412, "step": 25908 }, { "epoch": 0.8891214824982842, "grad_norm": 0.7726931255101881, "learning_rate": 3.191604849724894e-07, "loss": 0.2937, "step": 25909 }, { "epoch": 0.8891557995881949, "grad_norm": 0.7562788284482496, "learning_rate": 3.189651420529355e-07, "loss": 0.2172, "step": 25910 }, { "epoch": 0.8891901166781057, "grad_norm": 0.7657074300062803, "learning_rate": 3.187698569621095e-07, "loss": 0.26, "step": 25911 }, { "epoch": 0.8892244337680165, "grad_norm": 0.7789860236309201, "learning_rate": 3.185746297024245e-07, "loss": 0.2757, "step": 25912 }, { "epoch": 0.8892587508579273, "grad_norm": 0.8030243916929153, "learning_rate": 3.1837946027629073e-07, "loss": 0.2559, "step": 25913 }, { "epoch": 0.889293067947838, "grad_norm": 0.736202228434038, "learning_rate": 3.1818434868612136e-07, "loss": 0.262, "step": 25914 }, { "epoch": 0.8893273850377488, "grad_norm": 0.8317162674381037, "learning_rate": 3.179892949343266e-07, "loss": 0.3417, "step": 25915 }, { "epoch": 0.8893617021276595, "grad_norm": 0.8394490167132891, "learning_rate": 3.1779429902331407e-07, "loss": 0.3115, "step": 25916 }, { "epoch": 0.8893960192175704, "grad_norm": 0.6824973904122061, "learning_rate": 3.1759936095549503e-07, "loss": 0.2198, "step": 25917 }, { "epoch": 0.8894303363074811, "grad_norm": 0.8036492925928396, "learning_rate": 3.1740448073327655e-07, "loss": 0.2314, "step": 25918 }, { "epoch": 0.8894646533973919, "grad_norm": 0.8434031201193382, "learning_rate": 3.172096583590656e-07, "loss": 0.2634, "step": 25919 }, { "epoch": 0.8894989704873026, "grad_norm": 0.7736596333651659, "learning_rate": 3.1701489383527017e-07, "loss": 0.23, "step": 25920 }, { "epoch": 0.8895332875772135, "grad_norm": 0.8833859607116173, "learning_rate": 3.1682018716429617e-07, "loss": 0.2557, "step": 25921 }, { "epoch": 0.8895676046671243, "grad_norm": 0.8260892418898302, "learning_rate": 3.1662553834854835e-07, "loss": 0.2296, "step": 25922 }, { "epoch": 0.889601921757035, "grad_norm": 0.7274586276921183, "learning_rate": 3.1643094739043203e-07, "loss": 0.2001, "step": 25923 }, { "epoch": 0.8896362388469458, "grad_norm": 0.8862692755451756, "learning_rate": 3.162364142923513e-07, "loss": 0.2883, "step": 25924 }, { "epoch": 0.8896705559368565, "grad_norm": 0.8911838980099843, "learning_rate": 3.160419390567071e-07, "loss": 0.2613, "step": 25925 }, { "epoch": 0.8897048730267674, "grad_norm": 0.7812926098564862, "learning_rate": 3.158475216859058e-07, "loss": 0.2657, "step": 25926 }, { "epoch": 0.8897391901166781, "grad_norm": 0.8651339806296261, "learning_rate": 3.1565316218234553e-07, "loss": 0.2264, "step": 25927 }, { "epoch": 0.8897735072065889, "grad_norm": 0.7485363314429063, "learning_rate": 3.15458860548431e-07, "loss": 0.274, "step": 25928 }, { "epoch": 0.8898078242964996, "grad_norm": 0.7032007438974739, "learning_rate": 3.152646167865592e-07, "loss": 0.3209, "step": 25929 }, { "epoch": 0.8898421413864104, "grad_norm": 0.7781592341752286, "learning_rate": 3.1507043089913146e-07, "loss": 0.2847, "step": 25930 }, { "epoch": 0.8898764584763212, "grad_norm": 0.7632533262618233, "learning_rate": 3.14876302888546e-07, "loss": 0.2581, "step": 25931 }, { "epoch": 0.889910775566232, "grad_norm": 0.7492305165868954, "learning_rate": 3.146822327572019e-07, "loss": 0.3209, "step": 25932 }, { "epoch": 0.8899450926561427, "grad_norm": 0.8138537196607997, "learning_rate": 3.1448822050749614e-07, "loss": 0.2865, "step": 25933 }, { "epoch": 0.8899794097460535, "grad_norm": 0.9939161856942406, "learning_rate": 3.142942661418258e-07, "loss": 0.3025, "step": 25934 }, { "epoch": 0.8900137268359644, "grad_norm": 0.7101550384657364, "learning_rate": 3.141003696625877e-07, "loss": 0.2831, "step": 25935 }, { "epoch": 0.8900480439258751, "grad_norm": 0.8461511895776735, "learning_rate": 3.139065310721745e-07, "loss": 0.2911, "step": 25936 }, { "epoch": 0.8900823610157859, "grad_norm": 0.7854895343504567, "learning_rate": 3.1371275037298364e-07, "loss": 0.2495, "step": 25937 }, { "epoch": 0.8901166781056966, "grad_norm": 0.7878289309461646, "learning_rate": 3.135190275674088e-07, "loss": 0.2642, "step": 25938 }, { "epoch": 0.8901509951956074, "grad_norm": 0.8270719043943962, "learning_rate": 3.1332536265784086e-07, "loss": 0.3537, "step": 25939 }, { "epoch": 0.8901853122855182, "grad_norm": 0.7449108942834916, "learning_rate": 3.131317556466751e-07, "loss": 0.2327, "step": 25940 }, { "epoch": 0.890219629375429, "grad_norm": 0.7712388208098745, "learning_rate": 3.129382065363018e-07, "loss": 0.2441, "step": 25941 }, { "epoch": 0.8902539464653397, "grad_norm": 0.7429110006100593, "learning_rate": 3.127447153291124e-07, "loss": 0.1893, "step": 25942 }, { "epoch": 0.8902882635552505, "grad_norm": 0.8268335408879351, "learning_rate": 3.125512820274967e-07, "loss": 0.2545, "step": 25943 }, { "epoch": 0.8903225806451613, "grad_norm": 0.739111921502202, "learning_rate": 3.1235790663384555e-07, "loss": 0.2203, "step": 25944 }, { "epoch": 0.8903568977350721, "grad_norm": 0.8365073070445487, "learning_rate": 3.12164589150547e-07, "loss": 0.2725, "step": 25945 }, { "epoch": 0.8903912148249828, "grad_norm": 0.6975304881287409, "learning_rate": 3.119713295799892e-07, "loss": 0.2394, "step": 25946 }, { "epoch": 0.8904255319148936, "grad_norm": 0.8558362482105402, "learning_rate": 3.1177812792456075e-07, "loss": 0.256, "step": 25947 }, { "epoch": 0.8904598490048043, "grad_norm": 0.7612519182634182, "learning_rate": 3.115849841866464e-07, "loss": 0.2003, "step": 25948 }, { "epoch": 0.8904941660947152, "grad_norm": 0.6860520872807113, "learning_rate": 3.113918983686348e-07, "loss": 0.2246, "step": 25949 }, { "epoch": 0.890528483184626, "grad_norm": 0.8478901405581148, "learning_rate": 3.111988704729091e-07, "loss": 0.2494, "step": 25950 }, { "epoch": 0.8905628002745367, "grad_norm": 0.7277244887543343, "learning_rate": 3.110059005018545e-07, "loss": 0.2689, "step": 25951 }, { "epoch": 0.8905971173644475, "grad_norm": 0.8193252803801648, "learning_rate": 3.1081298845785535e-07, "loss": 0.3083, "step": 25952 }, { "epoch": 0.8906314344543582, "grad_norm": 0.7496953746244437, "learning_rate": 3.1062013434329464e-07, "loss": 0.2906, "step": 25953 }, { "epoch": 0.8906657515442691, "grad_norm": 0.7233325573466072, "learning_rate": 3.104273381605549e-07, "loss": 0.3266, "step": 25954 }, { "epoch": 0.8907000686341798, "grad_norm": 0.8570726133505643, "learning_rate": 3.102345999120182e-07, "loss": 0.2367, "step": 25955 }, { "epoch": 0.8907343857240906, "grad_norm": 0.6991429907944848, "learning_rate": 3.100419196000659e-07, "loss": 0.2232, "step": 25956 }, { "epoch": 0.8907687028140013, "grad_norm": 0.8078825277368845, "learning_rate": 3.098492972270761e-07, "loss": 0.2751, "step": 25957 }, { "epoch": 0.8908030199039122, "grad_norm": 0.9779059107419724, "learning_rate": 3.096567327954303e-07, "loss": 0.2609, "step": 25958 }, { "epoch": 0.890837336993823, "grad_norm": 0.7228436371940766, "learning_rate": 3.0946422630750816e-07, "loss": 0.2699, "step": 25959 }, { "epoch": 0.8908716540837337, "grad_norm": 0.7607074426950176, "learning_rate": 3.092717777656862e-07, "loss": 0.2363, "step": 25960 }, { "epoch": 0.8909059711736445, "grad_norm": 0.9909344109516378, "learning_rate": 3.0907938717234353e-07, "loss": 0.2204, "step": 25961 }, { "epoch": 0.8909402882635552, "grad_norm": 1.051788580093564, "learning_rate": 3.088870545298539e-07, "loss": 0.2043, "step": 25962 }, { "epoch": 0.8909746053534661, "grad_norm": 0.931769974057696, "learning_rate": 3.086947798405976e-07, "loss": 0.3569, "step": 25963 }, { "epoch": 0.8910089224433768, "grad_norm": 0.8547734636523279, "learning_rate": 3.085025631069466e-07, "loss": 0.2777, "step": 25964 }, { "epoch": 0.8910432395332876, "grad_norm": 0.7904136951335431, "learning_rate": 3.0831040433127736e-07, "loss": 0.2259, "step": 25965 }, { "epoch": 0.8910775566231983, "grad_norm": 0.8612266826653909, "learning_rate": 3.081183035159624e-07, "loss": 0.3083, "step": 25966 }, { "epoch": 0.8911118737131092, "grad_norm": 0.7241747833637343, "learning_rate": 3.0792626066337595e-07, "loss": 0.2542, "step": 25967 }, { "epoch": 0.8911461908030199, "grad_norm": 0.9164544446444125, "learning_rate": 3.0773427577589e-07, "loss": 0.3073, "step": 25968 }, { "epoch": 0.8911805078929307, "grad_norm": 0.792226664361213, "learning_rate": 3.07542348855876e-07, "loss": 0.2773, "step": 25969 }, { "epoch": 0.8912148249828414, "grad_norm": 0.8555152222068882, "learning_rate": 3.0735047990570697e-07, "loss": 0.2672, "step": 25970 }, { "epoch": 0.8912491420727522, "grad_norm": 0.7558197148223447, "learning_rate": 3.0715866892774946e-07, "loss": 0.2562, "step": 25971 }, { "epoch": 0.891283459162663, "grad_norm": 0.8843037972939938, "learning_rate": 3.069669159243771e-07, "loss": 0.2584, "step": 25972 }, { "epoch": 0.8913177762525738, "grad_norm": 0.8830669971308176, "learning_rate": 3.067752208979563e-07, "loss": 0.2859, "step": 25973 }, { "epoch": 0.8913520933424846, "grad_norm": 0.8277973169789172, "learning_rate": 3.065835838508557e-07, "loss": 0.215, "step": 25974 }, { "epoch": 0.8913864104323953, "grad_norm": 0.7298325850740532, "learning_rate": 3.0639200478544295e-07, "loss": 0.2401, "step": 25975 }, { "epoch": 0.8914207275223061, "grad_norm": 0.7160821854610742, "learning_rate": 3.0620048370408495e-07, "loss": 0.2715, "step": 25976 }, { "epoch": 0.8914550446122169, "grad_norm": 0.7522344973926555, "learning_rate": 3.0600902060914704e-07, "loss": 0.2827, "step": 25977 }, { "epoch": 0.8914893617021277, "grad_norm": 0.7563170629433114, "learning_rate": 3.058176155029957e-07, "loss": 0.2501, "step": 25978 }, { "epoch": 0.8915236787920384, "grad_norm": 0.7777947139570858, "learning_rate": 3.056262683879951e-07, "loss": 0.2452, "step": 25979 }, { "epoch": 0.8915579958819492, "grad_norm": 0.7436401761213931, "learning_rate": 3.0543497926650733e-07, "loss": 0.2392, "step": 25980 }, { "epoch": 0.89159231297186, "grad_norm": 0.8919144091264974, "learning_rate": 3.052437481408982e-07, "loss": 0.2362, "step": 25981 }, { "epoch": 0.8916266300617708, "grad_norm": 0.9008572823213762, "learning_rate": 3.0505257501352915e-07, "loss": 0.2241, "step": 25982 }, { "epoch": 0.8916609471516815, "grad_norm": 0.7857940999751923, "learning_rate": 3.0486145988676053e-07, "loss": 0.2616, "step": 25983 }, { "epoch": 0.8916952642415923, "grad_norm": 0.7398708910672583, "learning_rate": 3.046704027629566e-07, "loss": 0.308, "step": 25984 }, { "epoch": 0.891729581331503, "grad_norm": 0.8116653118221863, "learning_rate": 3.0447940364447427e-07, "loss": 0.1979, "step": 25985 }, { "epoch": 0.8917638984214139, "grad_norm": 0.8227258402136576, "learning_rate": 3.042884625336745e-07, "loss": 0.2445, "step": 25986 }, { "epoch": 0.8917982155113247, "grad_norm": 0.743376986494657, "learning_rate": 3.040975794329165e-07, "loss": 0.2754, "step": 25987 }, { "epoch": 0.8918325326012354, "grad_norm": 0.7592636972095713, "learning_rate": 3.039067543445573e-07, "loss": 0.2544, "step": 25988 }, { "epoch": 0.8918668496911462, "grad_norm": 0.7471938700038209, "learning_rate": 3.0371598727095554e-07, "loss": 0.2779, "step": 25989 }, { "epoch": 0.891901166781057, "grad_norm": 0.7319689350167592, "learning_rate": 3.035252782144676e-07, "loss": 0.2815, "step": 25990 }, { "epoch": 0.8919354838709678, "grad_norm": 0.738523587230076, "learning_rate": 3.03334627177449e-07, "loss": 0.2353, "step": 25991 }, { "epoch": 0.8919698009608785, "grad_norm": 0.733214023458956, "learning_rate": 3.031440341622555e-07, "loss": 0.2832, "step": 25992 }, { "epoch": 0.8920041180507893, "grad_norm": 0.7065302951523037, "learning_rate": 3.0295349917124183e-07, "loss": 0.2646, "step": 25993 }, { "epoch": 0.8920384351407, "grad_norm": 0.770785142416468, "learning_rate": 3.027630222067601e-07, "loss": 0.2654, "step": 25994 }, { "epoch": 0.8920727522306109, "grad_norm": 0.8180286682049598, "learning_rate": 3.0257260327116675e-07, "loss": 0.2553, "step": 25995 }, { "epoch": 0.8921070693205216, "grad_norm": 0.7962803411020345, "learning_rate": 3.02382242366811e-07, "loss": 0.3167, "step": 25996 }, { "epoch": 0.8921413864104324, "grad_norm": 0.7754566089127413, "learning_rate": 3.0219193949604486e-07, "loss": 0.3169, "step": 25997 }, { "epoch": 0.8921757035003431, "grad_norm": 0.7180474768194651, "learning_rate": 3.0200169466122196e-07, "loss": 0.3045, "step": 25998 }, { "epoch": 0.8922100205902539, "grad_norm": 0.8391169922289404, "learning_rate": 3.018115078646899e-07, "loss": 0.2474, "step": 25999 }, { "epoch": 0.8922443376801648, "grad_norm": 0.7680850466647128, "learning_rate": 3.0162137910879905e-07, "loss": 0.2416, "step": 26000 }, { "epoch": 0.8922786547700755, "grad_norm": 0.7233745758094463, "learning_rate": 3.0143130839589865e-07, "loss": 0.2904, "step": 26001 }, { "epoch": 0.8923129718599863, "grad_norm": 0.7761375078111254, "learning_rate": 3.0124129572833616e-07, "loss": 0.2244, "step": 26002 }, { "epoch": 0.892347288949897, "grad_norm": 0.9125438699699231, "learning_rate": 3.0105134110845924e-07, "loss": 0.3122, "step": 26003 }, { "epoch": 0.8923816060398079, "grad_norm": 0.7607770513527773, "learning_rate": 3.0086144453861486e-07, "loss": 0.3088, "step": 26004 }, { "epoch": 0.8924159231297186, "grad_norm": 0.703391496487745, "learning_rate": 3.00671606021149e-07, "loss": 0.2201, "step": 26005 }, { "epoch": 0.8924502402196294, "grad_norm": 0.7698565475672582, "learning_rate": 3.004818255584052e-07, "loss": 0.2086, "step": 26006 }, { "epoch": 0.8924845573095401, "grad_norm": 0.7710437944413019, "learning_rate": 3.0029210315273116e-07, "loss": 0.2386, "step": 26007 }, { "epoch": 0.8925188743994509, "grad_norm": 0.7120171348377597, "learning_rate": 3.0010243880646774e-07, "loss": 0.2581, "step": 26008 }, { "epoch": 0.8925531914893617, "grad_norm": 0.9495947253078129, "learning_rate": 2.999128325219591e-07, "loss": 0.2743, "step": 26009 }, { "epoch": 0.8925875085792725, "grad_norm": 0.8562033627908203, "learning_rate": 2.9972328430154795e-07, "loss": 0.2774, "step": 26010 }, { "epoch": 0.8926218256691832, "grad_norm": 0.8242587684946137, "learning_rate": 2.995337941475751e-07, "loss": 0.2631, "step": 26011 }, { "epoch": 0.892656142759094, "grad_norm": 0.81235560343278, "learning_rate": 2.9934436206238257e-07, "loss": 0.2664, "step": 26012 }, { "epoch": 0.8926904598490049, "grad_norm": 0.6876943311383723, "learning_rate": 2.991549880483097e-07, "loss": 0.2322, "step": 26013 }, { "epoch": 0.8927247769389156, "grad_norm": 0.747061433856223, "learning_rate": 2.9896567210769723e-07, "loss": 0.223, "step": 26014 }, { "epoch": 0.8927590940288264, "grad_norm": 0.7449498009424074, "learning_rate": 2.987764142428817e-07, "loss": 0.2732, "step": 26015 }, { "epoch": 0.8927934111187371, "grad_norm": 0.74863473411519, "learning_rate": 2.985872144562035e-07, "loss": 0.2476, "step": 26016 }, { "epoch": 0.8928277282086479, "grad_norm": 0.7129477548875011, "learning_rate": 2.9839807274999857e-07, "loss": 0.2023, "step": 26017 }, { "epoch": 0.8928620452985587, "grad_norm": 0.7696879177867779, "learning_rate": 2.982089891266038e-07, "loss": 0.2346, "step": 26018 }, { "epoch": 0.8928963623884695, "grad_norm": 0.7049456655233001, "learning_rate": 2.9801996358835516e-07, "loss": 0.2778, "step": 26019 }, { "epoch": 0.8929306794783802, "grad_norm": 0.7574664967182766, "learning_rate": 2.978309961375869e-07, "loss": 0.2705, "step": 26020 }, { "epoch": 0.892964996568291, "grad_norm": 0.8579284323608518, "learning_rate": 2.976420867766361e-07, "loss": 0.2549, "step": 26021 }, { "epoch": 0.8929993136582017, "grad_norm": 0.8743841749060584, "learning_rate": 2.974532355078347e-07, "loss": 0.2918, "step": 26022 }, { "epoch": 0.8930336307481126, "grad_norm": 0.709490483611615, "learning_rate": 2.972644423335153e-07, "loss": 0.241, "step": 26023 }, { "epoch": 0.8930679478380233, "grad_norm": 0.7081383889458367, "learning_rate": 2.97075707256011e-07, "loss": 0.2828, "step": 26024 }, { "epoch": 0.8931022649279341, "grad_norm": 0.7412679834505278, "learning_rate": 2.968870302776533e-07, "loss": 0.2514, "step": 26025 }, { "epoch": 0.8931365820178448, "grad_norm": 0.783176523858831, "learning_rate": 2.9669841140077317e-07, "loss": 0.2708, "step": 26026 }, { "epoch": 0.8931708991077557, "grad_norm": 0.9870569686408668, "learning_rate": 2.965098506277009e-07, "loss": 0.2697, "step": 26027 }, { "epoch": 0.8932052161976665, "grad_norm": 0.7861568898735202, "learning_rate": 2.963213479607657e-07, "loss": 0.2471, "step": 26028 }, { "epoch": 0.8932395332875772, "grad_norm": 0.7147084576003764, "learning_rate": 2.9613290340229527e-07, "loss": 0.2308, "step": 26029 }, { "epoch": 0.893273850377488, "grad_norm": 0.7777949754756085, "learning_rate": 2.959445169546199e-07, "loss": 0.2218, "step": 26030 }, { "epoch": 0.8933081674673987, "grad_norm": 0.8316065344995448, "learning_rate": 2.957561886200655e-07, "loss": 0.2711, "step": 26031 }, { "epoch": 0.8933424845573096, "grad_norm": 0.7431689398760474, "learning_rate": 2.95567918400958e-07, "loss": 0.3015, "step": 26032 }, { "epoch": 0.8933768016472203, "grad_norm": 0.9178482217570324, "learning_rate": 2.9537970629962444e-07, "loss": 0.2759, "step": 26033 }, { "epoch": 0.8934111187371311, "grad_norm": 0.8223054700440326, "learning_rate": 2.951915523183896e-07, "loss": 0.2667, "step": 26034 }, { "epoch": 0.8934454358270418, "grad_norm": 0.7137515383752734, "learning_rate": 2.950034564595777e-07, "loss": 0.2521, "step": 26035 }, { "epoch": 0.8934797529169527, "grad_norm": 0.7171381760871719, "learning_rate": 2.9481541872551256e-07, "loss": 0.2591, "step": 26036 }, { "epoch": 0.8935140700068634, "grad_norm": 0.8449379611335996, "learning_rate": 2.9462743911851776e-07, "loss": 0.2327, "step": 26037 }, { "epoch": 0.8935483870967742, "grad_norm": 0.6731815835762518, "learning_rate": 2.944395176409137e-07, "loss": 0.2367, "step": 26038 }, { "epoch": 0.893582704186685, "grad_norm": 0.7531256493046625, "learning_rate": 2.942516542950252e-07, "loss": 0.2502, "step": 26039 }, { "epoch": 0.8936170212765957, "grad_norm": 0.7841029673044482, "learning_rate": 2.9406384908316986e-07, "loss": 0.2367, "step": 26040 }, { "epoch": 0.8936513383665066, "grad_norm": 0.7356861899897688, "learning_rate": 2.938761020076686e-07, "loss": 0.2384, "step": 26041 }, { "epoch": 0.8936856554564173, "grad_norm": 0.7579678085688611, "learning_rate": 2.9368841307084286e-07, "loss": 0.2285, "step": 26042 }, { "epoch": 0.8937199725463281, "grad_norm": 0.8189303924580518, "learning_rate": 2.935007822750086e-07, "loss": 0.2343, "step": 26043 }, { "epoch": 0.8937542896362388, "grad_norm": 0.7475711121152749, "learning_rate": 2.93313209622485e-07, "loss": 0.2214, "step": 26044 }, { "epoch": 0.8937886067261496, "grad_norm": 0.7635566639821828, "learning_rate": 2.931256951155892e-07, "loss": 0.2722, "step": 26045 }, { "epoch": 0.8938229238160604, "grad_norm": 0.804846093955541, "learning_rate": 2.9293823875663874e-07, "loss": 0.2389, "step": 26046 }, { "epoch": 0.8938572409059712, "grad_norm": 0.9410780981779179, "learning_rate": 2.927508405479468e-07, "loss": 0.2785, "step": 26047 }, { "epoch": 0.8938915579958819, "grad_norm": 0.7649186568994378, "learning_rate": 2.925635004918309e-07, "loss": 0.3135, "step": 26048 }, { "epoch": 0.8939258750857927, "grad_norm": 0.7122451132325407, "learning_rate": 2.923762185906054e-07, "loss": 0.2275, "step": 26049 }, { "epoch": 0.8939601921757035, "grad_norm": 0.7787715259413138, "learning_rate": 2.921889948465817e-07, "loss": 0.254, "step": 26050 }, { "epoch": 0.8939945092656143, "grad_norm": 0.818160259288098, "learning_rate": 2.920018292620758e-07, "loss": 0.2778, "step": 26051 }, { "epoch": 0.894028826355525, "grad_norm": 0.7425152207641332, "learning_rate": 2.9181472183939683e-07, "loss": 0.244, "step": 26052 }, { "epoch": 0.8940631434454358, "grad_norm": 0.7937713662255365, "learning_rate": 2.9162767258085925e-07, "loss": 0.2782, "step": 26053 }, { "epoch": 0.8940974605353466, "grad_norm": 0.7022944780228878, "learning_rate": 2.9144068148877105e-07, "loss": 0.2396, "step": 26054 }, { "epoch": 0.8941317776252574, "grad_norm": 0.6711515998868866, "learning_rate": 2.912537485654443e-07, "loss": 0.257, "step": 26055 }, { "epoch": 0.8941660947151682, "grad_norm": 0.7151416403990793, "learning_rate": 2.910668738131872e-07, "loss": 0.2434, "step": 26056 }, { "epoch": 0.8942004118050789, "grad_norm": 0.8946003022933892, "learning_rate": 2.9088005723430844e-07, "loss": 0.2645, "step": 26057 }, { "epoch": 0.8942347288949897, "grad_norm": 0.7747118679337672, "learning_rate": 2.9069329883111674e-07, "loss": 0.3499, "step": 26058 }, { "epoch": 0.8942690459849005, "grad_norm": 0.73801454545162, "learning_rate": 2.905065986059191e-07, "loss": 0.2524, "step": 26059 }, { "epoch": 0.8943033630748113, "grad_norm": 0.7048909847520887, "learning_rate": 2.903199565610221e-07, "loss": 0.2415, "step": 26060 }, { "epoch": 0.894337680164722, "grad_norm": 0.7298329506730408, "learning_rate": 2.901333726987293e-07, "loss": 0.2686, "step": 26061 }, { "epoch": 0.8943719972546328, "grad_norm": 1.17326836882412, "learning_rate": 2.8994684702134954e-07, "loss": 0.2514, "step": 26062 }, { "epoch": 0.8944063143445435, "grad_norm": 0.6884588218813914, "learning_rate": 2.897603795311843e-07, "loss": 0.2643, "step": 26063 }, { "epoch": 0.8944406314344544, "grad_norm": 0.7895920164430038, "learning_rate": 2.8957397023053666e-07, "loss": 0.2823, "step": 26064 }, { "epoch": 0.8944749485243652, "grad_norm": 0.6716562045445037, "learning_rate": 2.8938761912171267e-07, "loss": 0.255, "step": 26065 }, { "epoch": 0.8945092656142759, "grad_norm": 0.7421753420834051, "learning_rate": 2.89201326207012e-07, "loss": 0.2505, "step": 26066 }, { "epoch": 0.8945435827041867, "grad_norm": 0.7057846402493674, "learning_rate": 2.8901509148873686e-07, "loss": 0.2278, "step": 26067 }, { "epoch": 0.8945778997940974, "grad_norm": 1.546817919918616, "learning_rate": 2.8882891496918696e-07, "loss": 0.2572, "step": 26068 }, { "epoch": 0.8946122168840083, "grad_norm": 0.7063580895445221, "learning_rate": 2.8864279665066387e-07, "loss": 0.2916, "step": 26069 }, { "epoch": 0.894646533973919, "grad_norm": 0.8822738067721001, "learning_rate": 2.884567365354657e-07, "loss": 0.2193, "step": 26070 }, { "epoch": 0.8946808510638298, "grad_norm": 0.7331180323692537, "learning_rate": 2.8827073462589174e-07, "loss": 0.266, "step": 26071 }, { "epoch": 0.8947151681537405, "grad_norm": 0.6992260906803258, "learning_rate": 2.8808479092424014e-07, "loss": 0.2277, "step": 26072 }, { "epoch": 0.8947494852436514, "grad_norm": 0.7815721365961591, "learning_rate": 2.8789890543280575e-07, "loss": 0.286, "step": 26073 }, { "epoch": 0.8947838023335621, "grad_norm": 0.7890226466011891, "learning_rate": 2.877130781538884e-07, "loss": 0.2987, "step": 26074 }, { "epoch": 0.8948181194234729, "grad_norm": 0.8079188325435492, "learning_rate": 2.875273090897812e-07, "loss": 0.2816, "step": 26075 }, { "epoch": 0.8948524365133836, "grad_norm": 0.784759100969041, "learning_rate": 2.873415982427796e-07, "loss": 0.2146, "step": 26076 }, { "epoch": 0.8948867536032944, "grad_norm": 0.8135537568565998, "learning_rate": 2.871559456151785e-07, "loss": 0.2561, "step": 26077 }, { "epoch": 0.8949210706932053, "grad_norm": 0.804464573192292, "learning_rate": 2.8697035120927086e-07, "loss": 0.2934, "step": 26078 }, { "epoch": 0.894955387783116, "grad_norm": 0.7925771066660842, "learning_rate": 2.8678481502734945e-07, "loss": 0.3253, "step": 26079 }, { "epoch": 0.8949897048730268, "grad_norm": 0.8450184746943659, "learning_rate": 2.8659933707170686e-07, "loss": 0.2717, "step": 26080 }, { "epoch": 0.8950240219629375, "grad_norm": 0.8860766739638853, "learning_rate": 2.864139173446345e-07, "loss": 0.2764, "step": 26081 }, { "epoch": 0.8950583390528484, "grad_norm": 0.749684594503418, "learning_rate": 2.862285558484218e-07, "loss": 0.2258, "step": 26082 }, { "epoch": 0.8950926561427591, "grad_norm": 0.8037015663229911, "learning_rate": 2.860432525853607e-07, "loss": 0.2701, "step": 26083 }, { "epoch": 0.8951269732326699, "grad_norm": 0.7487250535944151, "learning_rate": 2.8585800755773775e-07, "loss": 0.2448, "step": 26084 }, { "epoch": 0.8951612903225806, "grad_norm": 0.8072901182703796, "learning_rate": 2.856728207678444e-07, "loss": 0.2228, "step": 26085 }, { "epoch": 0.8951956074124914, "grad_norm": 0.7274141660680259, "learning_rate": 2.8548769221796666e-07, "loss": 0.2673, "step": 26086 }, { "epoch": 0.8952299245024022, "grad_norm": 0.7808667923323148, "learning_rate": 2.853026219103905e-07, "loss": 0.2809, "step": 26087 }, { "epoch": 0.895264241592313, "grad_norm": 0.8471786019547547, "learning_rate": 2.8511760984740565e-07, "loss": 0.28, "step": 26088 }, { "epoch": 0.8952985586822237, "grad_norm": 0.7373856188242981, "learning_rate": 2.8493265603129485e-07, "loss": 0.2653, "step": 26089 }, { "epoch": 0.8953328757721345, "grad_norm": 0.7867724821185261, "learning_rate": 2.847477604643439e-07, "loss": 0.2339, "step": 26090 }, { "epoch": 0.8953671928620452, "grad_norm": 0.936241580451783, "learning_rate": 2.8456292314883673e-07, "loss": 0.2624, "step": 26091 }, { "epoch": 0.8954015099519561, "grad_norm": 0.8922089680499787, "learning_rate": 2.843781440870569e-07, "loss": 0.2511, "step": 26092 }, { "epoch": 0.8954358270418669, "grad_norm": 0.7392984922001539, "learning_rate": 2.841934232812876e-07, "loss": 0.2647, "step": 26093 }, { "epoch": 0.8954701441317776, "grad_norm": 0.7586231882100878, "learning_rate": 2.8400876073381044e-07, "loss": 0.2794, "step": 26094 }, { "epoch": 0.8955044612216884, "grad_norm": 0.7512441381813734, "learning_rate": 2.838241564469074e-07, "loss": 0.2189, "step": 26095 }, { "epoch": 0.8955387783115992, "grad_norm": 0.7256688285105486, "learning_rate": 2.836396104228567e-07, "loss": 0.2041, "step": 26096 }, { "epoch": 0.89557309540151, "grad_norm": 0.7382864693380277, "learning_rate": 2.834551226639415e-07, "loss": 0.2711, "step": 26097 }, { "epoch": 0.8956074124914207, "grad_norm": 0.7824041239626459, "learning_rate": 2.8327069317243827e-07, "loss": 0.2472, "step": 26098 }, { "epoch": 0.8956417295813315, "grad_norm": 0.8085388681312711, "learning_rate": 2.830863219506269e-07, "loss": 0.2535, "step": 26099 }, { "epoch": 0.8956760466712422, "grad_norm": 0.7571188873013083, "learning_rate": 2.8290200900078447e-07, "loss": 0.235, "step": 26100 }, { "epoch": 0.8957103637611531, "grad_norm": 0.8378422087618504, "learning_rate": 2.82717754325188e-07, "loss": 0.2793, "step": 26101 }, { "epoch": 0.8957446808510638, "grad_norm": 0.852399919890961, "learning_rate": 2.825335579261135e-07, "loss": 0.296, "step": 26102 }, { "epoch": 0.8957789979409746, "grad_norm": 0.7906149872432588, "learning_rate": 2.823494198058374e-07, "loss": 0.2369, "step": 26103 }, { "epoch": 0.8958133150308853, "grad_norm": 0.8027603575039812, "learning_rate": 2.8216533996663466e-07, "loss": 0.3072, "step": 26104 }, { "epoch": 0.8958476321207962, "grad_norm": 0.7603354438772868, "learning_rate": 2.8198131841077726e-07, "loss": 0.2669, "step": 26105 }, { "epoch": 0.895881949210707, "grad_norm": 0.8027528928804852, "learning_rate": 2.8179735514054117e-07, "loss": 0.2462, "step": 26106 }, { "epoch": 0.8959162663006177, "grad_norm": 0.8503353008940538, "learning_rate": 2.816134501581974e-07, "loss": 0.2385, "step": 26107 }, { "epoch": 0.8959505833905285, "grad_norm": 0.7267789076555757, "learning_rate": 2.8142960346601745e-07, "loss": 0.2632, "step": 26108 }, { "epoch": 0.8959849004804392, "grad_norm": 0.7338853715443228, "learning_rate": 2.81245815066275e-07, "loss": 0.224, "step": 26109 }, { "epoch": 0.8960192175703501, "grad_norm": 0.8420583341006443, "learning_rate": 2.8106208496123834e-07, "loss": 0.239, "step": 26110 }, { "epoch": 0.8960535346602608, "grad_norm": 0.8207015710397036, "learning_rate": 2.808784131531778e-07, "loss": 0.3439, "step": 26111 }, { "epoch": 0.8960878517501716, "grad_norm": 0.7890584924756828, "learning_rate": 2.806947996443621e-07, "loss": 0.2389, "step": 26112 }, { "epoch": 0.8961221688400823, "grad_norm": 1.138883933776656, "learning_rate": 2.8051124443706056e-07, "loss": 0.2412, "step": 26113 }, { "epoch": 0.8961564859299931, "grad_norm": 0.7491637309116445, "learning_rate": 2.8032774753353976e-07, "loss": 0.269, "step": 26114 }, { "epoch": 0.896190803019904, "grad_norm": 0.7757141391742747, "learning_rate": 2.8014430893606726e-07, "loss": 0.2753, "step": 26115 }, { "epoch": 0.8962251201098147, "grad_norm": 0.7349879102992363, "learning_rate": 2.7996092864690903e-07, "loss": 0.2776, "step": 26116 }, { "epoch": 0.8962594371997255, "grad_norm": 0.739621330019037, "learning_rate": 2.797776066683305e-07, "loss": 0.2421, "step": 26117 }, { "epoch": 0.8962937542896362, "grad_norm": 0.7055103414863672, "learning_rate": 2.795943430025971e-07, "loss": 0.285, "step": 26118 }, { "epoch": 0.8963280713795471, "grad_norm": 0.8059369001158733, "learning_rate": 2.7941113765197093e-07, "loss": 0.2327, "step": 26119 }, { "epoch": 0.8963623884694578, "grad_norm": 0.8303603178406145, "learning_rate": 2.792279906187173e-07, "loss": 0.3052, "step": 26120 }, { "epoch": 0.8963967055593686, "grad_norm": 0.721381815713464, "learning_rate": 2.7904490190509783e-07, "loss": 0.2259, "step": 26121 }, { "epoch": 0.8964310226492793, "grad_norm": 0.7400647833005226, "learning_rate": 2.7886187151337397e-07, "loss": 0.2711, "step": 26122 }, { "epoch": 0.8964653397391901, "grad_norm": 0.7251535890921723, "learning_rate": 2.786788994458078e-07, "loss": 0.301, "step": 26123 }, { "epoch": 0.8964996568291009, "grad_norm": 0.750750901053235, "learning_rate": 2.784959857046593e-07, "loss": 0.2617, "step": 26124 }, { "epoch": 0.8965339739190117, "grad_norm": 0.7835684397240625, "learning_rate": 2.7831313029218765e-07, "loss": 0.3024, "step": 26125 }, { "epoch": 0.8965682910089224, "grad_norm": 0.7480411840803227, "learning_rate": 2.7813033321065276e-07, "loss": 0.2268, "step": 26126 }, { "epoch": 0.8966026080988332, "grad_norm": 0.869291893744387, "learning_rate": 2.7794759446231337e-07, "loss": 0.3111, "step": 26127 }, { "epoch": 0.896636925188744, "grad_norm": 0.801431267827561, "learning_rate": 2.7776491404942427e-07, "loss": 0.2252, "step": 26128 }, { "epoch": 0.8966712422786548, "grad_norm": 0.7492038643232856, "learning_rate": 2.7758229197424546e-07, "loss": 0.2368, "step": 26129 }, { "epoch": 0.8967055593685656, "grad_norm": 0.8265820928727832, "learning_rate": 2.7739972823903116e-07, "loss": 0.3121, "step": 26130 }, { "epoch": 0.8967398764584763, "grad_norm": 0.7841740580767532, "learning_rate": 2.772172228460362e-07, "loss": 0.2561, "step": 26131 }, { "epoch": 0.896774193548387, "grad_norm": 0.8454116782126091, "learning_rate": 2.7703477579751783e-07, "loss": 0.2394, "step": 26132 }, { "epoch": 0.8968085106382979, "grad_norm": 0.7668709170064127, "learning_rate": 2.7685238709572793e-07, "loss": 0.2422, "step": 26133 }, { "epoch": 0.8968428277282087, "grad_norm": 0.7494278853162284, "learning_rate": 2.766700567429198e-07, "loss": 0.2339, "step": 26134 }, { "epoch": 0.8968771448181194, "grad_norm": 0.756914996864353, "learning_rate": 2.7648778474134607e-07, "loss": 0.2307, "step": 26135 }, { "epoch": 0.8969114619080302, "grad_norm": 0.7460163072082867, "learning_rate": 2.7630557109325885e-07, "loss": 0.2937, "step": 26136 }, { "epoch": 0.8969457789979409, "grad_norm": 0.7580626122697849, "learning_rate": 2.7612341580090905e-07, "loss": 0.3051, "step": 26137 }, { "epoch": 0.8969800960878518, "grad_norm": 0.905277695928584, "learning_rate": 2.759413188665472e-07, "loss": 0.2746, "step": 26138 }, { "epoch": 0.8970144131777625, "grad_norm": 0.788631041264578, "learning_rate": 2.7575928029242305e-07, "loss": 0.2541, "step": 26139 }, { "epoch": 0.8970487302676733, "grad_norm": 1.1245568143777966, "learning_rate": 2.755773000807832e-07, "loss": 0.3662, "step": 26140 }, { "epoch": 0.897083047357584, "grad_norm": 0.8558015859955649, "learning_rate": 2.7539537823387973e-07, "loss": 0.2404, "step": 26141 }, { "epoch": 0.8971173644474949, "grad_norm": 0.8024259978620488, "learning_rate": 2.7521351475395697e-07, "loss": 0.2412, "step": 26142 }, { "epoch": 0.8971516815374057, "grad_norm": 0.6778687778444771, "learning_rate": 2.750317096432631e-07, "loss": 0.2638, "step": 26143 }, { "epoch": 0.8971859986273164, "grad_norm": 0.7651460542558156, "learning_rate": 2.7484996290404353e-07, "loss": 0.331, "step": 26144 }, { "epoch": 0.8972203157172272, "grad_norm": 0.7850310767573897, "learning_rate": 2.7466827453854317e-07, "loss": 0.2547, "step": 26145 }, { "epoch": 0.8972546328071379, "grad_norm": 0.7157423787877248, "learning_rate": 2.7448664454900744e-07, "loss": 0.2734, "step": 26146 }, { "epoch": 0.8972889498970488, "grad_norm": 0.8051375399414551, "learning_rate": 2.743050729376801e-07, "loss": 0.341, "step": 26147 }, { "epoch": 0.8973232669869595, "grad_norm": 0.7236783309652455, "learning_rate": 2.741235597068037e-07, "loss": 0.215, "step": 26148 }, { "epoch": 0.8973575840768703, "grad_norm": 0.7954447047694347, "learning_rate": 2.7394210485862103e-07, "loss": 0.2604, "step": 26149 }, { "epoch": 0.897391901166781, "grad_norm": 0.745924968773836, "learning_rate": 2.737607083953742e-07, "loss": 0.2629, "step": 26150 }, { "epoch": 0.8974262182566919, "grad_norm": 0.7506114628958417, "learning_rate": 2.735793703193018e-07, "loss": 0.2708, "step": 26151 }, { "epoch": 0.8974605353466026, "grad_norm": 0.8322946215130617, "learning_rate": 2.7339809063264664e-07, "loss": 0.2536, "step": 26152 }, { "epoch": 0.8974948524365134, "grad_norm": 0.8333456021804133, "learning_rate": 2.732168693376486e-07, "loss": 0.2398, "step": 26153 }, { "epoch": 0.8975291695264241, "grad_norm": 0.7523980600859412, "learning_rate": 2.730357064365435e-07, "loss": 0.2291, "step": 26154 }, { "epoch": 0.8975634866163349, "grad_norm": 0.8586042148102253, "learning_rate": 2.728546019315725e-07, "loss": 0.2475, "step": 26155 }, { "epoch": 0.8975978037062458, "grad_norm": 0.8613220998423247, "learning_rate": 2.72673555824971e-07, "loss": 0.2841, "step": 26156 }, { "epoch": 0.8976321207961565, "grad_norm": 0.7692607839074274, "learning_rate": 2.7249256811897607e-07, "loss": 0.268, "step": 26157 }, { "epoch": 0.8976664378860673, "grad_norm": 0.7780884931991864, "learning_rate": 2.723116388158237e-07, "loss": 0.3123, "step": 26158 }, { "epoch": 0.897700754975978, "grad_norm": 0.7407875046111436, "learning_rate": 2.7213076791774873e-07, "loss": 0.2274, "step": 26159 }, { "epoch": 0.8977350720658888, "grad_norm": 0.7035080479836748, "learning_rate": 2.7194995542698665e-07, "loss": 0.3036, "step": 26160 }, { "epoch": 0.8977693891557996, "grad_norm": 0.7672925677149173, "learning_rate": 2.7176920134577014e-07, "loss": 0.2305, "step": 26161 }, { "epoch": 0.8978037062457104, "grad_norm": 0.8096739875169405, "learning_rate": 2.7158850567633346e-07, "loss": 0.3095, "step": 26162 }, { "epoch": 0.8978380233356211, "grad_norm": 0.6773937576351844, "learning_rate": 2.714078684209065e-07, "loss": 0.1978, "step": 26163 }, { "epoch": 0.8978723404255319, "grad_norm": 0.7281484348278594, "learning_rate": 2.7122728958172363e-07, "loss": 0.2629, "step": 26164 }, { "epoch": 0.8979066575154427, "grad_norm": 0.8394393719466856, "learning_rate": 2.7104676916101357e-07, "loss": 0.2235, "step": 26165 }, { "epoch": 0.8979409746053535, "grad_norm": 0.719382121253379, "learning_rate": 2.7086630716100734e-07, "loss": 0.2659, "step": 26166 }, { "epoch": 0.8979752916952642, "grad_norm": 0.88537161164769, "learning_rate": 2.706859035839349e-07, "loss": 0.2496, "step": 26167 }, { "epoch": 0.898009608785175, "grad_norm": 0.8092707966634628, "learning_rate": 2.7050555843202374e-07, "loss": 0.2758, "step": 26168 }, { "epoch": 0.8980439258750857, "grad_norm": 0.9275923680851995, "learning_rate": 2.703252717075022e-07, "loss": 0.255, "step": 26169 }, { "epoch": 0.8980782429649966, "grad_norm": 0.8454615108426283, "learning_rate": 2.701450434125974e-07, "loss": 0.2539, "step": 26170 }, { "epoch": 0.8981125600549074, "grad_norm": 0.7600904135014906, "learning_rate": 2.6996487354953746e-07, "loss": 0.2906, "step": 26171 }, { "epoch": 0.8981468771448181, "grad_norm": 0.8411418039648731, "learning_rate": 2.6978476212054516e-07, "loss": 0.2923, "step": 26172 }, { "epoch": 0.8981811942347289, "grad_norm": 0.8056266297879641, "learning_rate": 2.696047091278486e-07, "loss": 0.2499, "step": 26173 }, { "epoch": 0.8982155113246397, "grad_norm": 1.265523645461088, "learning_rate": 2.6942471457367004e-07, "loss": 0.2474, "step": 26174 }, { "epoch": 0.8982498284145505, "grad_norm": 0.7874440689325943, "learning_rate": 2.692447784602331e-07, "loss": 0.2649, "step": 26175 }, { "epoch": 0.8982841455044612, "grad_norm": 0.7758723813054962, "learning_rate": 2.6906490078976334e-07, "loss": 0.2555, "step": 26176 }, { "epoch": 0.898318462594372, "grad_norm": 0.7958130293868896, "learning_rate": 2.688850815644789e-07, "loss": 0.3795, "step": 26177 }, { "epoch": 0.8983527796842827, "grad_norm": 0.8573424645906016, "learning_rate": 2.687053207866047e-07, "loss": 0.2583, "step": 26178 }, { "epoch": 0.8983870967741936, "grad_norm": 0.8251182634706624, "learning_rate": 2.685256184583601e-07, "loss": 0.2616, "step": 26179 }, { "epoch": 0.8984214138641043, "grad_norm": 0.7028572461548354, "learning_rate": 2.683459745819644e-07, "loss": 0.2543, "step": 26180 }, { "epoch": 0.8984557309540151, "grad_norm": 0.7833800117246855, "learning_rate": 2.681663891596381e-07, "loss": 0.2454, "step": 26181 }, { "epoch": 0.8984900480439258, "grad_norm": 0.7341738653354315, "learning_rate": 2.679868621935988e-07, "loss": 0.3562, "step": 26182 }, { "epoch": 0.8985243651338366, "grad_norm": 0.8199254287112939, "learning_rate": 2.6780739368606533e-07, "loss": 0.3116, "step": 26183 }, { "epoch": 0.8985586822237475, "grad_norm": 0.83532455148629, "learning_rate": 2.676279836392537e-07, "loss": 0.2478, "step": 26184 }, { "epoch": 0.8985929993136582, "grad_norm": 1.2127693533340256, "learning_rate": 2.674486320553821e-07, "loss": 0.2421, "step": 26185 }, { "epoch": 0.898627316403569, "grad_norm": 0.6981548164671428, "learning_rate": 2.672693389366637e-07, "loss": 0.2762, "step": 26186 }, { "epoch": 0.8986616334934797, "grad_norm": 0.7851123991454216, "learning_rate": 2.670901042853158e-07, "loss": 0.31, "step": 26187 }, { "epoch": 0.8986959505833906, "grad_norm": 0.92431869985041, "learning_rate": 2.6691092810355145e-07, "loss": 0.2439, "step": 26188 }, { "epoch": 0.8987302676733013, "grad_norm": 0.7992774472039685, "learning_rate": 2.66731810393584e-07, "loss": 0.2584, "step": 26189 }, { "epoch": 0.8987645847632121, "grad_norm": 0.7226619153078974, "learning_rate": 2.6655275115762657e-07, "loss": 0.1939, "step": 26190 }, { "epoch": 0.8987989018531228, "grad_norm": 0.705851324168354, "learning_rate": 2.6637375039789136e-07, "loss": 0.2161, "step": 26191 }, { "epoch": 0.8988332189430336, "grad_norm": 0.8183434801031109, "learning_rate": 2.661948081165894e-07, "loss": 0.2552, "step": 26192 }, { "epoch": 0.8988675360329444, "grad_norm": 0.9256532264768048, "learning_rate": 2.6601592431593157e-07, "loss": 0.2845, "step": 26193 }, { "epoch": 0.8989018531228552, "grad_norm": 0.7765725310426136, "learning_rate": 2.658370989981285e-07, "loss": 0.2315, "step": 26194 }, { "epoch": 0.898936170212766, "grad_norm": 0.8467438880342079, "learning_rate": 2.6565833216538715e-07, "loss": 0.253, "step": 26195 }, { "epoch": 0.8989704873026767, "grad_norm": 0.7535776517580078, "learning_rate": 2.6547962381991866e-07, "loss": 0.2516, "step": 26196 }, { "epoch": 0.8990048043925875, "grad_norm": 0.8368686031841686, "learning_rate": 2.6530097396392896e-07, "loss": 0.2566, "step": 26197 }, { "epoch": 0.8990391214824983, "grad_norm": 0.7587481667946331, "learning_rate": 2.651223825996246e-07, "loss": 0.3041, "step": 26198 }, { "epoch": 0.8990734385724091, "grad_norm": 0.8787797651263254, "learning_rate": 2.6494384972921503e-07, "loss": 0.2546, "step": 26199 }, { "epoch": 0.8991077556623198, "grad_norm": 0.7082016656555667, "learning_rate": 2.647653753549023e-07, "loss": 0.2304, "step": 26200 }, { "epoch": 0.8991420727522306, "grad_norm": 1.037451585389697, "learning_rate": 2.6458695947889247e-07, "loss": 0.2491, "step": 26201 }, { "epoch": 0.8991763898421414, "grad_norm": 1.1414996123573087, "learning_rate": 2.6440860210339036e-07, "loss": 0.3, "step": 26202 }, { "epoch": 0.8992107069320522, "grad_norm": 0.7487498510126953, "learning_rate": 2.6423030323059817e-07, "loss": 0.2927, "step": 26203 }, { "epoch": 0.8992450240219629, "grad_norm": 0.7168625483299491, "learning_rate": 2.640520628627197e-07, "loss": 0.2533, "step": 26204 }, { "epoch": 0.8992793411118737, "grad_norm": 0.7431167471235413, "learning_rate": 2.638738810019564e-07, "loss": 0.2549, "step": 26205 }, { "epoch": 0.8993136582017844, "grad_norm": 0.8033248245585937, "learning_rate": 2.6369575765051005e-07, "loss": 0.242, "step": 26206 }, { "epoch": 0.8993479752916953, "grad_norm": 0.7242506579754469, "learning_rate": 2.6351769281057926e-07, "loss": 0.2125, "step": 26207 }, { "epoch": 0.899382292381606, "grad_norm": 0.7348693551770861, "learning_rate": 2.633396864843668e-07, "loss": 0.2422, "step": 26208 }, { "epoch": 0.8994166094715168, "grad_norm": 0.8832025194869292, "learning_rate": 2.631617386740681e-07, "loss": 0.2534, "step": 26209 }, { "epoch": 0.8994509265614276, "grad_norm": 0.8392815870683565, "learning_rate": 2.6298384938188536e-07, "loss": 0.2622, "step": 26210 }, { "epoch": 0.8994852436513384, "grad_norm": 0.7779629344437804, "learning_rate": 2.62806018610014e-07, "loss": 0.2643, "step": 26211 }, { "epoch": 0.8995195607412492, "grad_norm": 0.7861512448235858, "learning_rate": 2.6262824636065054e-07, "loss": 0.2449, "step": 26212 }, { "epoch": 0.8995538778311599, "grad_norm": 0.7112820670292207, "learning_rate": 2.624505326359922e-07, "loss": 0.2762, "step": 26213 }, { "epoch": 0.8995881949210707, "grad_norm": 0.6918691677918677, "learning_rate": 2.622728774382338e-07, "loss": 0.2383, "step": 26214 }, { "epoch": 0.8996225120109814, "grad_norm": 0.8213771544050422, "learning_rate": 2.620952807695709e-07, "loss": 0.2467, "step": 26215 }, { "epoch": 0.8996568291008923, "grad_norm": 0.8191870848406716, "learning_rate": 2.619177426321967e-07, "loss": 0.2756, "step": 26216 }, { "epoch": 0.899691146190803, "grad_norm": 0.823682687820181, "learning_rate": 2.61740263028305e-07, "loss": 0.2835, "step": 26217 }, { "epoch": 0.8997254632807138, "grad_norm": 0.7816505114364675, "learning_rate": 2.6156284196008686e-07, "loss": 0.263, "step": 26218 }, { "epoch": 0.8997597803706245, "grad_norm": 0.7795720241262296, "learning_rate": 2.6138547942973655e-07, "loss": 0.367, "step": 26219 }, { "epoch": 0.8997940974605353, "grad_norm": 0.8270359651235168, "learning_rate": 2.612081754394441e-07, "loss": 0.255, "step": 26220 }, { "epoch": 0.8998284145504462, "grad_norm": 0.7703435690624831, "learning_rate": 2.610309299913988e-07, "loss": 0.2664, "step": 26221 }, { "epoch": 0.8998627316403569, "grad_norm": 0.711344789264701, "learning_rate": 2.608537430877922e-07, "loss": 0.2437, "step": 26222 }, { "epoch": 0.8998970487302677, "grad_norm": 0.7438148389782573, "learning_rate": 2.6067661473081153e-07, "loss": 0.2264, "step": 26223 }, { "epoch": 0.8999313658201784, "grad_norm": 0.8298237009952696, "learning_rate": 2.6049954492264607e-07, "loss": 0.2686, "step": 26224 }, { "epoch": 0.8999656829100893, "grad_norm": 0.7828797997960177, "learning_rate": 2.60322533665483e-07, "loss": 0.2797, "step": 26225 }, { "epoch": 0.9, "grad_norm": 0.7998226543929331, "learning_rate": 2.601455809615089e-07, "loss": 0.2609, "step": 26226 }, { "epoch": 0.9000343170899108, "grad_norm": 0.764919924492486, "learning_rate": 2.5996868681291035e-07, "loss": 0.2844, "step": 26227 }, { "epoch": 0.9000686341798215, "grad_norm": 0.793164639753338, "learning_rate": 2.5979185122187225e-07, "loss": 0.3178, "step": 26228 }, { "epoch": 0.9001029512697323, "grad_norm": 0.7675152414147536, "learning_rate": 2.596150741905801e-07, "loss": 0.2303, "step": 26229 }, { "epoch": 0.9001372683596431, "grad_norm": 0.7685501522109278, "learning_rate": 2.594383557212149e-07, "loss": 0.2956, "step": 26230 }, { "epoch": 0.9001715854495539, "grad_norm": 0.7920296814575906, "learning_rate": 2.5926169581596385e-07, "loss": 0.2224, "step": 26231 }, { "epoch": 0.9002059025394646, "grad_norm": 0.7198402836643867, "learning_rate": 2.590850944770068e-07, "loss": 0.3301, "step": 26232 }, { "epoch": 0.9002402196293754, "grad_norm": 0.7448205031188168, "learning_rate": 2.589085517065254e-07, "loss": 0.2144, "step": 26233 }, { "epoch": 0.9002745367192863, "grad_norm": 0.7533131568150764, "learning_rate": 2.5873206750670175e-07, "loss": 0.2378, "step": 26234 }, { "epoch": 0.900308853809197, "grad_norm": 0.7881475030514116, "learning_rate": 2.5855564187971526e-07, "loss": 0.2803, "step": 26235 }, { "epoch": 0.9003431708991078, "grad_norm": 0.7718940275503412, "learning_rate": 2.583792748277464e-07, "loss": 0.2527, "step": 26236 }, { "epoch": 0.9003774879890185, "grad_norm": 0.794895496587266, "learning_rate": 2.582029663529728e-07, "loss": 0.2396, "step": 26237 }, { "epoch": 0.9004118050789293, "grad_norm": 0.7493612901980456, "learning_rate": 2.580267164575745e-07, "loss": 0.23, "step": 26238 }, { "epoch": 0.9004461221688401, "grad_norm": 0.8337807784647053, "learning_rate": 2.5785052514372576e-07, "loss": 0.2989, "step": 26239 }, { "epoch": 0.9004804392587509, "grad_norm": 0.7506779519752477, "learning_rate": 2.5767439241360603e-07, "loss": 0.2303, "step": 26240 }, { "epoch": 0.9005147563486616, "grad_norm": 0.8998841745270053, "learning_rate": 2.574983182693891e-07, "loss": 0.2389, "step": 26241 }, { "epoch": 0.9005490734385724, "grad_norm": 0.7909107921280492, "learning_rate": 2.573223027132521e-07, "loss": 0.2603, "step": 26242 }, { "epoch": 0.9005833905284831, "grad_norm": 0.8085090941406579, "learning_rate": 2.5714634574736885e-07, "loss": 0.2528, "step": 26243 }, { "epoch": 0.900617707618394, "grad_norm": 0.7090471962733323, "learning_rate": 2.569704473739121e-07, "loss": 0.3211, "step": 26244 }, { "epoch": 0.9006520247083047, "grad_norm": 0.7636182493921801, "learning_rate": 2.567946075950567e-07, "loss": 0.2598, "step": 26245 }, { "epoch": 0.9006863417982155, "grad_norm": 0.9621126945979214, "learning_rate": 2.566188264129732e-07, "loss": 0.2239, "step": 26246 }, { "epoch": 0.9007206588881262, "grad_norm": 0.811331175885291, "learning_rate": 2.5644310382983375e-07, "loss": 0.2927, "step": 26247 }, { "epoch": 0.9007549759780371, "grad_norm": 0.8235670145044287, "learning_rate": 2.5626743984780934e-07, "loss": 0.3275, "step": 26248 }, { "epoch": 0.9007892930679479, "grad_norm": 0.7824529772390502, "learning_rate": 2.5609183446906995e-07, "loss": 0.2999, "step": 26249 }, { "epoch": 0.9008236101578586, "grad_norm": 0.8377738242890277, "learning_rate": 2.559162876957855e-07, "loss": 0.2948, "step": 26250 }, { "epoch": 0.9008579272477694, "grad_norm": 0.8503647347519592, "learning_rate": 2.5574079953012364e-07, "loss": 0.24, "step": 26251 }, { "epoch": 0.9008922443376801, "grad_norm": 0.8743126030425342, "learning_rate": 2.555653699742544e-07, "loss": 0.2314, "step": 26252 }, { "epoch": 0.900926561427591, "grad_norm": 0.7051336936086701, "learning_rate": 2.553899990303416e-07, "loss": 0.223, "step": 26253 }, { "epoch": 0.9009608785175017, "grad_norm": 0.7490766134033662, "learning_rate": 2.5521468670055504e-07, "loss": 0.2946, "step": 26254 }, { "epoch": 0.9009951956074125, "grad_norm": 0.7446336069755528, "learning_rate": 2.550394329870587e-07, "loss": 0.236, "step": 26255 }, { "epoch": 0.9010295126973232, "grad_norm": 0.7890639550151416, "learning_rate": 2.54864237892018e-07, "loss": 0.2524, "step": 26256 }, { "epoch": 0.9010638297872341, "grad_norm": 0.8817263632292222, "learning_rate": 2.546891014175973e-07, "loss": 0.2931, "step": 26257 }, { "epoch": 0.9010981468771448, "grad_norm": 0.779159130822456, "learning_rate": 2.545140235659604e-07, "loss": 0.2754, "step": 26258 }, { "epoch": 0.9011324639670556, "grad_norm": 0.7188244253149448, "learning_rate": 2.543390043392696e-07, "loss": 0.2266, "step": 26259 }, { "epoch": 0.9011667810569663, "grad_norm": 0.7541910326873859, "learning_rate": 2.5416404373968803e-07, "loss": 0.2714, "step": 26260 }, { "epoch": 0.9012010981468771, "grad_norm": 0.8843260453995432, "learning_rate": 2.539891417693774e-07, "loss": 0.2263, "step": 26261 }, { "epoch": 0.901235415236788, "grad_norm": 0.7209823990601023, "learning_rate": 2.5381429843049586e-07, "loss": 0.282, "step": 26262 }, { "epoch": 0.9012697323266987, "grad_norm": 0.7800318628093332, "learning_rate": 2.536395137252057e-07, "loss": 0.2508, "step": 26263 }, { "epoch": 0.9013040494166095, "grad_norm": 0.8920842246343736, "learning_rate": 2.5346478765566676e-07, "loss": 0.2269, "step": 26264 }, { "epoch": 0.9013383665065202, "grad_norm": 0.7088342182981557, "learning_rate": 2.532901202240345e-07, "loss": 0.2454, "step": 26265 }, { "epoch": 0.901372683596431, "grad_norm": 0.8308218573429689, "learning_rate": 2.5311551143247014e-07, "loss": 0.3002, "step": 26266 }, { "epoch": 0.9014070006863418, "grad_norm": 0.9403798727483865, "learning_rate": 2.5294096128312905e-07, "loss": 0.2804, "step": 26267 }, { "epoch": 0.9014413177762526, "grad_norm": 0.8283238063844203, "learning_rate": 2.5276646977816785e-07, "loss": 0.2477, "step": 26268 }, { "epoch": 0.9014756348661633, "grad_norm": 0.8450577456661678, "learning_rate": 2.525920369197421e-07, "loss": 0.3224, "step": 26269 }, { "epoch": 0.9015099519560741, "grad_norm": 0.8770951346567633, "learning_rate": 2.5241766271000665e-07, "loss": 0.2191, "step": 26270 }, { "epoch": 0.901544269045985, "grad_norm": 0.7897992638691568, "learning_rate": 2.5224334715111545e-07, "loss": 0.2978, "step": 26271 }, { "epoch": 0.9015785861358957, "grad_norm": 0.908426659555481, "learning_rate": 2.5206909024522275e-07, "loss": 0.318, "step": 26272 }, { "epoch": 0.9016129032258065, "grad_norm": 0.7666331508499358, "learning_rate": 2.518948919944808e-07, "loss": 0.2156, "step": 26273 }, { "epoch": 0.9016472203157172, "grad_norm": 0.7824997853533745, "learning_rate": 2.517207524010418e-07, "loss": 0.2372, "step": 26274 }, { "epoch": 0.901681537405628, "grad_norm": 0.7259776941597683, "learning_rate": 2.515466714670578e-07, "loss": 0.2604, "step": 26275 }, { "epoch": 0.9017158544955388, "grad_norm": 0.7519352874013046, "learning_rate": 2.5137264919467716e-07, "loss": 0.2292, "step": 26276 }, { "epoch": 0.9017501715854496, "grad_norm": 0.842855815691584, "learning_rate": 2.511986855860521e-07, "loss": 0.2861, "step": 26277 }, { "epoch": 0.9017844886753603, "grad_norm": 0.9306342243492344, "learning_rate": 2.5102478064333027e-07, "loss": 0.2922, "step": 26278 }, { "epoch": 0.9018188057652711, "grad_norm": 0.7753941009200308, "learning_rate": 2.508509343686605e-07, "loss": 0.2615, "step": 26279 }, { "epoch": 0.9018531228551819, "grad_norm": 0.7144917968082303, "learning_rate": 2.506771467641905e-07, "loss": 0.232, "step": 26280 }, { "epoch": 0.9018874399450927, "grad_norm": 0.8050532524503817, "learning_rate": 2.50503417832067e-07, "loss": 0.2389, "step": 26281 }, { "epoch": 0.9019217570350034, "grad_norm": 0.9783488282662797, "learning_rate": 2.503297475744365e-07, "loss": 0.3519, "step": 26282 }, { "epoch": 0.9019560741249142, "grad_norm": 0.7547690627499578, "learning_rate": 2.5015613599344404e-07, "loss": 0.2353, "step": 26283 }, { "epoch": 0.9019903912148249, "grad_norm": 0.8388814923871629, "learning_rate": 2.499825830912361e-07, "loss": 0.3062, "step": 26284 }, { "epoch": 0.9020247083047358, "grad_norm": 0.7400410128113698, "learning_rate": 2.4980908886995335e-07, "loss": 0.2298, "step": 26285 }, { "epoch": 0.9020590253946466, "grad_norm": 0.7897104495599111, "learning_rate": 2.496356533317423e-07, "loss": 0.2528, "step": 26286 }, { "epoch": 0.9020933424845573, "grad_norm": 0.7857734856009906, "learning_rate": 2.4946227647874467e-07, "loss": 0.2741, "step": 26287 }, { "epoch": 0.902127659574468, "grad_norm": 0.7576712910831485, "learning_rate": 2.4928895831310087e-07, "loss": 0.2454, "step": 26288 }, { "epoch": 0.9021619766643788, "grad_norm": 0.7335383496211713, "learning_rate": 2.4911569883695475e-07, "loss": 0.2651, "step": 26289 }, { "epoch": 0.9021962937542897, "grad_norm": 0.7696472156449613, "learning_rate": 2.4894249805244406e-07, "loss": 0.2607, "step": 26290 }, { "epoch": 0.9022306108442004, "grad_norm": 0.7722322633593706, "learning_rate": 2.4876935596170995e-07, "loss": 0.2073, "step": 26291 }, { "epoch": 0.9022649279341112, "grad_norm": 0.8208400014040209, "learning_rate": 2.485962725668906e-07, "loss": 0.2599, "step": 26292 }, { "epoch": 0.9022992450240219, "grad_norm": 0.7368107973739435, "learning_rate": 2.484232478701254e-07, "loss": 0.2664, "step": 26293 }, { "epoch": 0.9023335621139328, "grad_norm": 0.7211351211020142, "learning_rate": 2.482502818735505e-07, "loss": 0.2539, "step": 26294 }, { "epoch": 0.9023678792038435, "grad_norm": 0.696343127483645, "learning_rate": 2.480773745793036e-07, "loss": 0.2336, "step": 26295 }, { "epoch": 0.9024021962937543, "grad_norm": 0.8127472765475162, "learning_rate": 2.479045259895213e-07, "loss": 0.2743, "step": 26296 }, { "epoch": 0.902436513383665, "grad_norm": 0.8525341559803611, "learning_rate": 2.4773173610633695e-07, "loss": 0.301, "step": 26297 }, { "epoch": 0.9024708304735758, "grad_norm": 0.6528144705611749, "learning_rate": 2.4755900493188766e-07, "loss": 0.2343, "step": 26298 }, { "epoch": 0.9025051475634867, "grad_norm": 0.7384688285818677, "learning_rate": 2.473863324683051e-07, "loss": 0.2758, "step": 26299 }, { "epoch": 0.9025394646533974, "grad_norm": 0.8452229703339897, "learning_rate": 2.472137187177237e-07, "loss": 0.2161, "step": 26300 }, { "epoch": 0.9025737817433082, "grad_norm": 0.8313510654431461, "learning_rate": 2.470411636822756e-07, "loss": 0.2914, "step": 26301 }, { "epoch": 0.9026080988332189, "grad_norm": 0.7091084587743953, "learning_rate": 2.468686673640913e-07, "loss": 0.2422, "step": 26302 }, { "epoch": 0.9026424159231298, "grad_norm": 0.8101156461430147, "learning_rate": 2.4669622976530526e-07, "loss": 0.2705, "step": 26303 }, { "epoch": 0.9026767330130405, "grad_norm": 0.8760329190947483, "learning_rate": 2.465238508880441e-07, "loss": 0.2398, "step": 26304 }, { "epoch": 0.9027110501029513, "grad_norm": 0.8263557332615715, "learning_rate": 2.4635153073443884e-07, "loss": 0.2585, "step": 26305 }, { "epoch": 0.902745367192862, "grad_norm": 0.803954661924301, "learning_rate": 2.461792693066184e-07, "loss": 0.2824, "step": 26306 }, { "epoch": 0.9027796842827728, "grad_norm": 0.8281789947758391, "learning_rate": 2.460070666067116e-07, "loss": 0.2715, "step": 26307 }, { "epoch": 0.9028140013726836, "grad_norm": 0.7782899457354376, "learning_rate": 2.458349226368428e-07, "loss": 0.3003, "step": 26308 }, { "epoch": 0.9028483184625944, "grad_norm": 0.8471267318294337, "learning_rate": 2.456628373991421e-07, "loss": 0.2602, "step": 26309 }, { "epoch": 0.9028826355525051, "grad_norm": 0.7354050788093437, "learning_rate": 2.4549081089573436e-07, "loss": 0.2567, "step": 26310 }, { "epoch": 0.9029169526424159, "grad_norm": 0.731133372332223, "learning_rate": 2.453188431287429e-07, "loss": 0.2476, "step": 26311 }, { "epoch": 0.9029512697323266, "grad_norm": 0.6888481613099259, "learning_rate": 2.4514693410029547e-07, "loss": 0.2495, "step": 26312 }, { "epoch": 0.9029855868222375, "grad_norm": 0.8916829463422766, "learning_rate": 2.449750838125131e-07, "loss": 0.2974, "step": 26313 }, { "epoch": 0.9030199039121483, "grad_norm": 0.8393130947451082, "learning_rate": 2.4480329226751974e-07, "loss": 0.2132, "step": 26314 }, { "epoch": 0.903054221002059, "grad_norm": 0.8388589345306443, "learning_rate": 2.4463155946743813e-07, "loss": 0.2093, "step": 26315 }, { "epoch": 0.9030885380919698, "grad_norm": 0.7788864615278054, "learning_rate": 2.444598854143887e-07, "loss": 0.2478, "step": 26316 }, { "epoch": 0.9031228551818806, "grad_norm": 0.7022470541223761, "learning_rate": 2.4428827011049317e-07, "loss": 0.255, "step": 26317 }, { "epoch": 0.9031571722717914, "grad_norm": 0.7667696262125451, "learning_rate": 2.441167135578715e-07, "loss": 0.2951, "step": 26318 }, { "epoch": 0.9031914893617021, "grad_norm": 0.7177765783662132, "learning_rate": 2.439452157586442e-07, "loss": 0.2295, "step": 26319 }, { "epoch": 0.9032258064516129, "grad_norm": 0.8190358459813454, "learning_rate": 2.4377377671492673e-07, "loss": 0.2576, "step": 26320 }, { "epoch": 0.9032601235415236, "grad_norm": 0.7724333270940095, "learning_rate": 2.4360239642884033e-07, "loss": 0.314, "step": 26321 }, { "epoch": 0.9032944406314345, "grad_norm": 0.8948536494326794, "learning_rate": 2.4343107490250093e-07, "loss": 0.2601, "step": 26322 }, { "epoch": 0.9033287577213452, "grad_norm": 0.8126814439099649, "learning_rate": 2.432598121380242e-07, "loss": 0.2778, "step": 26323 }, { "epoch": 0.903363074811256, "grad_norm": 0.7467489477411989, "learning_rate": 2.430886081375272e-07, "loss": 0.2424, "step": 26324 }, { "epoch": 0.9033973919011667, "grad_norm": 0.7445276198370754, "learning_rate": 2.429174629031239e-07, "loss": 0.3062, "step": 26325 }, { "epoch": 0.9034317089910776, "grad_norm": 0.8202529699372684, "learning_rate": 2.427463764369298e-07, "loss": 0.2524, "step": 26326 }, { "epoch": 0.9034660260809884, "grad_norm": 0.7431615665391164, "learning_rate": 2.425753487410576e-07, "loss": 0.2659, "step": 26327 }, { "epoch": 0.9035003431708991, "grad_norm": 0.708915232057857, "learning_rate": 2.424043798176207e-07, "loss": 0.2358, "step": 26328 }, { "epoch": 0.9035346602608099, "grad_norm": 0.7013533925065567, "learning_rate": 2.422334696687295e-07, "loss": 0.2664, "step": 26329 }, { "epoch": 0.9035689773507206, "grad_norm": 0.7506857775841564, "learning_rate": 2.420626182964975e-07, "loss": 0.2396, "step": 26330 }, { "epoch": 0.9036032944406315, "grad_norm": 0.842421518160675, "learning_rate": 2.418918257030356e-07, "loss": 0.2836, "step": 26331 }, { "epoch": 0.9036376115305422, "grad_norm": 0.6960416564797026, "learning_rate": 2.417210918904511e-07, "loss": 0.2524, "step": 26332 }, { "epoch": 0.903671928620453, "grad_norm": 0.8078520121885998, "learning_rate": 2.415504168608568e-07, "loss": 0.3102, "step": 26333 }, { "epoch": 0.9037062457103637, "grad_norm": 0.8641352478497006, "learning_rate": 2.413798006163576e-07, "loss": 0.307, "step": 26334 }, { "epoch": 0.9037405628002745, "grad_norm": 0.6674299069184902, "learning_rate": 2.4120924315906403e-07, "loss": 0.2543, "step": 26335 }, { "epoch": 0.9037748798901853, "grad_norm": 0.832715870089943, "learning_rate": 2.410387444910817e-07, "loss": 0.2969, "step": 26336 }, { "epoch": 0.9038091969800961, "grad_norm": 0.8063286049273616, "learning_rate": 2.4086830461451716e-07, "loss": 0.2143, "step": 26337 }, { "epoch": 0.9038435140700068, "grad_norm": 0.7930150428850692, "learning_rate": 2.40697923531476e-07, "loss": 0.2629, "step": 26338 }, { "epoch": 0.9038778311599176, "grad_norm": 1.2295266682748327, "learning_rate": 2.4052760124406326e-07, "loss": 0.2644, "step": 26339 }, { "epoch": 0.9039121482498285, "grad_norm": 0.7810928290962773, "learning_rate": 2.403573377543833e-07, "loss": 0.2729, "step": 26340 }, { "epoch": 0.9039464653397392, "grad_norm": 0.7793289347347229, "learning_rate": 2.401871330645389e-07, "loss": 0.3359, "step": 26341 }, { "epoch": 0.90398078242965, "grad_norm": 0.7225203564791135, "learning_rate": 2.4001698717663335e-07, "loss": 0.2336, "step": 26342 }, { "epoch": 0.9040150995195607, "grad_norm": 0.7203778071160802, "learning_rate": 2.398469000927678e-07, "loss": 0.2596, "step": 26343 }, { "epoch": 0.9040494166094715, "grad_norm": 0.8369787254762805, "learning_rate": 2.39676871815045e-07, "loss": 0.2696, "step": 26344 }, { "epoch": 0.9040837336993823, "grad_norm": 1.1918917941539786, "learning_rate": 2.395069023455637e-07, "loss": 0.235, "step": 26345 }, { "epoch": 0.9041180507892931, "grad_norm": 0.768308477472588, "learning_rate": 2.3933699168642466e-07, "loss": 0.2744, "step": 26346 }, { "epoch": 0.9041523678792038, "grad_norm": 0.8151295763405532, "learning_rate": 2.3916713983972606e-07, "loss": 0.2952, "step": 26347 }, { "epoch": 0.9041866849691146, "grad_norm": 0.711933538654708, "learning_rate": 2.3899734680756737e-07, "loss": 0.2397, "step": 26348 }, { "epoch": 0.9042210020590254, "grad_norm": 0.836404123263166, "learning_rate": 2.388276125920458e-07, "loss": 0.2484, "step": 26349 }, { "epoch": 0.9042553191489362, "grad_norm": 0.8220597680014593, "learning_rate": 2.3865793719525796e-07, "loss": 0.3432, "step": 26350 }, { "epoch": 0.904289636238847, "grad_norm": 0.9019796329716893, "learning_rate": 2.3848832061930063e-07, "loss": 0.2613, "step": 26351 }, { "epoch": 0.9043239533287577, "grad_norm": 0.7627668100434338, "learning_rate": 2.3831876286626755e-07, "loss": 0.2469, "step": 26352 }, { "epoch": 0.9043582704186685, "grad_norm": 0.8026882469321827, "learning_rate": 2.3814926393825487e-07, "loss": 0.2303, "step": 26353 }, { "epoch": 0.9043925875085793, "grad_norm": 0.8963287712363105, "learning_rate": 2.3797982383735762e-07, "loss": 0.3156, "step": 26354 }, { "epoch": 0.9044269045984901, "grad_norm": 0.8567364231828046, "learning_rate": 2.3781044256566578e-07, "loss": 0.2694, "step": 26355 }, { "epoch": 0.9044612216884008, "grad_norm": 0.7021980317084491, "learning_rate": 2.3764112012527484e-07, "loss": 0.2049, "step": 26356 }, { "epoch": 0.9044955387783116, "grad_norm": 0.7392294247198118, "learning_rate": 2.3747185651827543e-07, "loss": 0.2168, "step": 26357 }, { "epoch": 0.9045298558682223, "grad_norm": 0.9372342097815808, "learning_rate": 2.373026517467586e-07, "loss": 0.2886, "step": 26358 }, { "epoch": 0.9045641729581332, "grad_norm": 0.9050400858570075, "learning_rate": 2.3713350581281437e-07, "loss": 0.3154, "step": 26359 }, { "epoch": 0.9045984900480439, "grad_norm": 0.6947690284165159, "learning_rate": 2.3696441871853327e-07, "loss": 0.3075, "step": 26360 }, { "epoch": 0.9046328071379547, "grad_norm": 0.6995743198250844, "learning_rate": 2.3679539046600307e-07, "loss": 0.2463, "step": 26361 }, { "epoch": 0.9046671242278654, "grad_norm": 0.7178152956848676, "learning_rate": 2.366264210573127e-07, "loss": 0.2521, "step": 26362 }, { "epoch": 0.9047014413177763, "grad_norm": 0.863677736219887, "learning_rate": 2.364575104945499e-07, "loss": 0.2952, "step": 26363 }, { "epoch": 0.904735758407687, "grad_norm": 0.7386375370400872, "learning_rate": 2.3628865877979966e-07, "loss": 0.2686, "step": 26364 }, { "epoch": 0.9047700754975978, "grad_norm": 0.7117458763214476, "learning_rate": 2.3611986591515034e-07, "loss": 0.2366, "step": 26365 }, { "epoch": 0.9048043925875086, "grad_norm": 0.743331779078337, "learning_rate": 2.359511319026847e-07, "loss": 0.2969, "step": 26366 }, { "epoch": 0.9048387096774193, "grad_norm": 0.8506070618966436, "learning_rate": 2.3578245674448942e-07, "loss": 0.2999, "step": 26367 }, { "epoch": 0.9048730267673302, "grad_norm": 0.676051052993841, "learning_rate": 2.3561384044264723e-07, "loss": 0.191, "step": 26368 }, { "epoch": 0.9049073438572409, "grad_norm": 0.9038634504253393, "learning_rate": 2.3544528299923986e-07, "loss": 0.2649, "step": 26369 }, { "epoch": 0.9049416609471517, "grad_norm": 0.770308643928933, "learning_rate": 2.352767844163534e-07, "loss": 0.2557, "step": 26370 }, { "epoch": 0.9049759780370624, "grad_norm": 0.7648256942231355, "learning_rate": 2.3510834469606557e-07, "loss": 0.2463, "step": 26371 }, { "epoch": 0.9050102951269733, "grad_norm": 0.7438906311466525, "learning_rate": 2.3493996384045924e-07, "loss": 0.255, "step": 26372 }, { "epoch": 0.905044612216884, "grad_norm": 0.7498473349650354, "learning_rate": 2.347716418516144e-07, "loss": 0.2625, "step": 26373 }, { "epoch": 0.9050789293067948, "grad_norm": 0.7457950186486042, "learning_rate": 2.3460337873160988e-07, "loss": 0.2351, "step": 26374 }, { "epoch": 0.9051132463967055, "grad_norm": 0.7941786278704076, "learning_rate": 2.3443517448252462e-07, "loss": 0.22, "step": 26375 }, { "epoch": 0.9051475634866163, "grad_norm": 0.8042771519715314, "learning_rate": 2.3426702910643696e-07, "loss": 0.2142, "step": 26376 }, { "epoch": 0.9051818805765272, "grad_norm": 0.7850199175527317, "learning_rate": 2.3409894260542465e-07, "loss": 0.2413, "step": 26377 }, { "epoch": 0.9052161976664379, "grad_norm": 0.6422111446058538, "learning_rate": 2.3393091498156162e-07, "loss": 0.2417, "step": 26378 }, { "epoch": 0.9052505147563487, "grad_norm": 0.7195053065503564, "learning_rate": 2.3376294623692675e-07, "loss": 0.2508, "step": 26379 }, { "epoch": 0.9052848318462594, "grad_norm": 0.7940399518874008, "learning_rate": 2.3359503637359338e-07, "loss": 0.2655, "step": 26380 }, { "epoch": 0.9053191489361702, "grad_norm": 0.8134960181523221, "learning_rate": 2.3342718539363596e-07, "loss": 0.2462, "step": 26381 }, { "epoch": 0.905353466026081, "grad_norm": 0.8505478226521842, "learning_rate": 2.3325939329912895e-07, "loss": 0.2341, "step": 26382 }, { "epoch": 0.9053877831159918, "grad_norm": 0.7903204709112396, "learning_rate": 2.3309166009214402e-07, "loss": 0.2815, "step": 26383 }, { "epoch": 0.9054221002059025, "grad_norm": 0.9014737474839677, "learning_rate": 2.3292398577475451e-07, "loss": 0.2933, "step": 26384 }, { "epoch": 0.9054564172958133, "grad_norm": 0.7730186951896548, "learning_rate": 2.3275637034903098e-07, "loss": 0.2271, "step": 26385 }, { "epoch": 0.9054907343857241, "grad_norm": 0.8528344835073555, "learning_rate": 2.3258881381704512e-07, "loss": 0.2633, "step": 26386 }, { "epoch": 0.9055250514756349, "grad_norm": 0.7613918946878627, "learning_rate": 2.3242131618086472e-07, "loss": 0.2992, "step": 26387 }, { "epoch": 0.9055593685655456, "grad_norm": 0.7572522659305198, "learning_rate": 2.32253877442562e-07, "loss": 0.2532, "step": 26388 }, { "epoch": 0.9055936856554564, "grad_norm": 0.8097134915023615, "learning_rate": 2.3208649760420364e-07, "loss": 0.3292, "step": 26389 }, { "epoch": 0.9056280027453671, "grad_norm": 0.7559035764011844, "learning_rate": 2.3191917666785745e-07, "loss": 0.2332, "step": 26390 }, { "epoch": 0.905662319835278, "grad_norm": 0.7666131666728846, "learning_rate": 2.3175191463559066e-07, "loss": 0.2661, "step": 26391 }, { "epoch": 0.9056966369251888, "grad_norm": 0.9365706579013479, "learning_rate": 2.3158471150946994e-07, "loss": 0.2766, "step": 26392 }, { "epoch": 0.9057309540150995, "grad_norm": 0.7211599996699907, "learning_rate": 2.314175672915603e-07, "loss": 0.2287, "step": 26393 }, { "epoch": 0.9057652711050103, "grad_norm": 0.7525611986950876, "learning_rate": 2.312504819839273e-07, "loss": 0.2862, "step": 26394 }, { "epoch": 0.9057995881949211, "grad_norm": 0.8110874597275444, "learning_rate": 2.3108345558863542e-07, "loss": 0.2975, "step": 26395 }, { "epoch": 0.9058339052848319, "grad_norm": 0.8028884601177371, "learning_rate": 2.3091648810774582e-07, "loss": 0.2678, "step": 26396 }, { "epoch": 0.9058682223747426, "grad_norm": 0.7685155667670599, "learning_rate": 2.30749579543324e-07, "loss": 0.2641, "step": 26397 }, { "epoch": 0.9059025394646534, "grad_norm": 0.8014566960475599, "learning_rate": 2.3058272989743002e-07, "loss": 0.2696, "step": 26398 }, { "epoch": 0.9059368565545641, "grad_norm": 0.7349505676680694, "learning_rate": 2.3041593917212668e-07, "loss": 0.2829, "step": 26399 }, { "epoch": 0.905971173644475, "grad_norm": 0.76593755630866, "learning_rate": 2.3024920736947344e-07, "loss": 0.2625, "step": 26400 }, { "epoch": 0.9060054907343857, "grad_norm": 0.7227967726875882, "learning_rate": 2.3008253449152918e-07, "loss": 0.2323, "step": 26401 }, { "epoch": 0.9060398078242965, "grad_norm": 0.7419182898966331, "learning_rate": 2.299159205403556e-07, "loss": 0.2371, "step": 26402 }, { "epoch": 0.9060741249142072, "grad_norm": 0.7536441339083649, "learning_rate": 2.2974936551800885e-07, "loss": 0.2885, "step": 26403 }, { "epoch": 0.906108442004118, "grad_norm": 0.8703738147820267, "learning_rate": 2.2958286942654673e-07, "loss": 0.2716, "step": 26404 }, { "epoch": 0.9061427590940289, "grad_norm": 0.8052636058080569, "learning_rate": 2.29416432268027e-07, "loss": 0.3041, "step": 26405 }, { "epoch": 0.9061770761839396, "grad_norm": 0.7425102999132867, "learning_rate": 2.2925005404450473e-07, "loss": 0.2515, "step": 26406 }, { "epoch": 0.9062113932738504, "grad_norm": 0.8466893389739617, "learning_rate": 2.2908373475803602e-07, "loss": 0.2201, "step": 26407 }, { "epoch": 0.9062457103637611, "grad_norm": 0.7255083234720561, "learning_rate": 2.2891747441067536e-07, "loss": 0.2352, "step": 26408 }, { "epoch": 0.906280027453672, "grad_norm": 0.853787339365246, "learning_rate": 2.2875127300447775e-07, "loss": 0.3196, "step": 26409 }, { "epoch": 0.9063143445435827, "grad_norm": 0.8688809819347689, "learning_rate": 2.285851305414938e-07, "loss": 0.2375, "step": 26410 }, { "epoch": 0.9063486616334935, "grad_norm": 0.7670867472071184, "learning_rate": 2.2841904702377903e-07, "loss": 0.2283, "step": 26411 }, { "epoch": 0.9063829787234042, "grad_norm": 1.2897082527018715, "learning_rate": 2.2825302245338354e-07, "loss": 0.2705, "step": 26412 }, { "epoch": 0.906417295813315, "grad_norm": 0.8140190314236757, "learning_rate": 2.2808705683235732e-07, "loss": 0.2738, "step": 26413 }, { "epoch": 0.9064516129032258, "grad_norm": 0.898592367931267, "learning_rate": 2.2792115016275374e-07, "loss": 0.28, "step": 26414 }, { "epoch": 0.9064859299931366, "grad_norm": 0.8380894936835596, "learning_rate": 2.2775530244662004e-07, "loss": 0.2436, "step": 26415 }, { "epoch": 0.9065202470830473, "grad_norm": 0.8033380299956393, "learning_rate": 2.2758951368600568e-07, "loss": 0.2537, "step": 26416 }, { "epoch": 0.9065545641729581, "grad_norm": 0.794883245168447, "learning_rate": 2.274237838829585e-07, "loss": 0.2741, "step": 26417 }, { "epoch": 0.906588881262869, "grad_norm": 0.728441017404637, "learning_rate": 2.2725811303952684e-07, "loss": 0.2363, "step": 26418 }, { "epoch": 0.9066231983527797, "grad_norm": 0.864650889348235, "learning_rate": 2.2709250115775573e-07, "loss": 0.2582, "step": 26419 }, { "epoch": 0.9066575154426905, "grad_norm": 0.7517932492412268, "learning_rate": 2.269269482396924e-07, "loss": 0.2819, "step": 26420 }, { "epoch": 0.9066918325326012, "grad_norm": 0.6990380036991203, "learning_rate": 2.2676145428738307e-07, "loss": 0.2421, "step": 26421 }, { "epoch": 0.906726149622512, "grad_norm": 0.7646841788842371, "learning_rate": 2.265960193028688e-07, "loss": 0.2182, "step": 26422 }, { "epoch": 0.9067604667124228, "grad_norm": 0.7905473445068721, "learning_rate": 2.2643064328819687e-07, "loss": 0.2638, "step": 26423 }, { "epoch": 0.9067947838023336, "grad_norm": 0.8483613992005503, "learning_rate": 2.26265326245409e-07, "loss": 0.204, "step": 26424 }, { "epoch": 0.9068291008922443, "grad_norm": 0.8356858039641238, "learning_rate": 2.2610006817654684e-07, "loss": 0.3082, "step": 26425 }, { "epoch": 0.9068634179821551, "grad_norm": 0.8336202171640137, "learning_rate": 2.2593486908365215e-07, "loss": 0.3291, "step": 26426 }, { "epoch": 0.9068977350720658, "grad_norm": 0.8504307677740482, "learning_rate": 2.257697289687666e-07, "loss": 0.249, "step": 26427 }, { "epoch": 0.9069320521619767, "grad_norm": 0.7831644919218126, "learning_rate": 2.2560464783392967e-07, "loss": 0.3086, "step": 26428 }, { "epoch": 0.9069663692518874, "grad_norm": 0.8557251004455463, "learning_rate": 2.2543962568118138e-07, "loss": 0.2445, "step": 26429 }, { "epoch": 0.9070006863417982, "grad_norm": 0.7921037226419755, "learning_rate": 2.2527466251255957e-07, "loss": 0.2275, "step": 26430 }, { "epoch": 0.907035003431709, "grad_norm": 0.8922626368147237, "learning_rate": 2.2510975833010206e-07, "loss": 0.3024, "step": 26431 }, { "epoch": 0.9070693205216198, "grad_norm": 0.757049194065651, "learning_rate": 2.2494491313584776e-07, "loss": 0.2584, "step": 26432 }, { "epoch": 0.9071036376115306, "grad_norm": 0.816624381072437, "learning_rate": 2.247801269318306e-07, "loss": 0.319, "step": 26433 }, { "epoch": 0.9071379547014413, "grad_norm": 0.907263084264797, "learning_rate": 2.2461539972008838e-07, "loss": 0.2275, "step": 26434 }, { "epoch": 0.9071722717913521, "grad_norm": 0.8127211057235456, "learning_rate": 2.2445073150265506e-07, "loss": 0.2499, "step": 26435 }, { "epoch": 0.9072065888812628, "grad_norm": 0.8600281717913308, "learning_rate": 2.2428612228156454e-07, "loss": 0.2445, "step": 26436 }, { "epoch": 0.9072409059711737, "grad_norm": 0.9809387104514108, "learning_rate": 2.2412157205885244e-07, "loss": 0.2779, "step": 26437 }, { "epoch": 0.9072752230610844, "grad_norm": 0.7869085463648741, "learning_rate": 2.2395708083654932e-07, "loss": 0.3166, "step": 26438 }, { "epoch": 0.9073095401509952, "grad_norm": 0.7745618524590636, "learning_rate": 2.2379264861668805e-07, "loss": 0.2459, "step": 26439 }, { "epoch": 0.9073438572409059, "grad_norm": 0.7756612790005786, "learning_rate": 2.2362827540130027e-07, "loss": 0.2463, "step": 26440 }, { "epoch": 0.9073781743308168, "grad_norm": 0.7833999242393342, "learning_rate": 2.2346396119241665e-07, "loss": 0.2832, "step": 26441 }, { "epoch": 0.9074124914207276, "grad_norm": 0.8121542221356778, "learning_rate": 2.2329970599206662e-07, "loss": 0.2318, "step": 26442 }, { "epoch": 0.9074468085106383, "grad_norm": 0.7345266749455972, "learning_rate": 2.231355098022797e-07, "loss": 0.2599, "step": 26443 }, { "epoch": 0.907481125600549, "grad_norm": 0.8111582414683318, "learning_rate": 2.2297137262508484e-07, "loss": 0.2527, "step": 26444 }, { "epoch": 0.9075154426904598, "grad_norm": 0.7652758955370973, "learning_rate": 2.228072944625076e-07, "loss": 0.247, "step": 26445 }, { "epoch": 0.9075497597803707, "grad_norm": 0.8318135782476427, "learning_rate": 2.226432753165786e-07, "loss": 0.2403, "step": 26446 }, { "epoch": 0.9075840768702814, "grad_norm": 0.8453821683763443, "learning_rate": 2.224793151893212e-07, "loss": 0.2638, "step": 26447 }, { "epoch": 0.9076183939601922, "grad_norm": 0.8071100667643342, "learning_rate": 2.2231541408276159e-07, "loss": 0.2725, "step": 26448 }, { "epoch": 0.9076527110501029, "grad_norm": 0.819176494038961, "learning_rate": 2.221515719989248e-07, "loss": 0.331, "step": 26449 }, { "epoch": 0.9076870281400137, "grad_norm": 0.7667936314292365, "learning_rate": 2.2198778893983474e-07, "loss": 0.2141, "step": 26450 }, { "epoch": 0.9077213452299245, "grad_norm": 0.829054154276882, "learning_rate": 2.218240649075154e-07, "loss": 0.249, "step": 26451 }, { "epoch": 0.9077556623198353, "grad_norm": 0.8279336259997375, "learning_rate": 2.2166039990398845e-07, "loss": 0.2769, "step": 26452 }, { "epoch": 0.907789979409746, "grad_norm": 0.7891319414497532, "learning_rate": 2.2149679393127731e-07, "loss": 0.2562, "step": 26453 }, { "epoch": 0.9078242964996568, "grad_norm": 0.6844751426049552, "learning_rate": 2.2133324699140035e-07, "loss": 0.2445, "step": 26454 }, { "epoch": 0.9078586135895677, "grad_norm": 0.8349808261393813, "learning_rate": 2.211697590863815e-07, "loss": 0.2651, "step": 26455 }, { "epoch": 0.9078929306794784, "grad_norm": 0.7940214095747353, "learning_rate": 2.2100633021823803e-07, "loss": 0.1888, "step": 26456 }, { "epoch": 0.9079272477693892, "grad_norm": 0.8700354762459731, "learning_rate": 2.2084296038898945e-07, "loss": 0.2747, "step": 26457 }, { "epoch": 0.9079615648592999, "grad_norm": 0.8016987054812571, "learning_rate": 2.2067964960065413e-07, "loss": 0.2773, "step": 26458 }, { "epoch": 0.9079958819492107, "grad_norm": 0.7992883801556065, "learning_rate": 2.2051639785524882e-07, "loss": 0.2473, "step": 26459 }, { "epoch": 0.9080301990391215, "grad_norm": 0.7208697735051097, "learning_rate": 2.203532051547924e-07, "loss": 0.2817, "step": 26460 }, { "epoch": 0.9080645161290323, "grad_norm": 0.7701192941534679, "learning_rate": 2.201900715012989e-07, "loss": 0.2602, "step": 26461 }, { "epoch": 0.908098833218943, "grad_norm": 0.8325267872088647, "learning_rate": 2.200269968967844e-07, "loss": 0.2731, "step": 26462 }, { "epoch": 0.9081331503088538, "grad_norm": 0.7424224628290866, "learning_rate": 2.198639813432635e-07, "loss": 0.2259, "step": 26463 }, { "epoch": 0.9081674673987646, "grad_norm": 0.8190938055673685, "learning_rate": 2.1970102484275003e-07, "loss": 0.263, "step": 26464 }, { "epoch": 0.9082017844886754, "grad_norm": 0.7081096786594123, "learning_rate": 2.1953812739725745e-07, "loss": 0.307, "step": 26465 }, { "epoch": 0.9082361015785861, "grad_norm": 0.7400605903751861, "learning_rate": 2.1937528900879745e-07, "loss": 0.2256, "step": 26466 }, { "epoch": 0.9082704186684969, "grad_norm": 0.911782558566038, "learning_rate": 2.1921250967938235e-07, "loss": 0.3342, "step": 26467 }, { "epoch": 0.9083047357584076, "grad_norm": 0.7686297093658122, "learning_rate": 2.1904978941102218e-07, "loss": 0.2237, "step": 26468 }, { "epoch": 0.9083390528483185, "grad_norm": 0.7358362884642063, "learning_rate": 2.1888712820572867e-07, "loss": 0.2626, "step": 26469 }, { "epoch": 0.9083733699382293, "grad_norm": 0.7118964109606268, "learning_rate": 2.187245260655102e-07, "loss": 0.2547, "step": 26470 }, { "epoch": 0.90840768702814, "grad_norm": 0.808143805587932, "learning_rate": 2.1856198299237574e-07, "loss": 0.2383, "step": 26471 }, { "epoch": 0.9084420041180508, "grad_norm": 0.7600108008289258, "learning_rate": 2.1839949898833313e-07, "loss": 0.2706, "step": 26472 }, { "epoch": 0.9084763212079615, "grad_norm": 0.8290958948225181, "learning_rate": 2.1823707405539019e-07, "loss": 0.2496, "step": 26473 }, { "epoch": 0.9085106382978724, "grad_norm": 0.7312387302169796, "learning_rate": 2.180747081955531e-07, "loss": 0.2419, "step": 26474 }, { "epoch": 0.9085449553877831, "grad_norm": 0.7612480549594326, "learning_rate": 2.1791240141082747e-07, "loss": 0.2184, "step": 26475 }, { "epoch": 0.9085792724776939, "grad_norm": 0.8407828389658767, "learning_rate": 2.177501537032195e-07, "loss": 0.2775, "step": 26476 }, { "epoch": 0.9086135895676046, "grad_norm": 0.8631850878485139, "learning_rate": 2.17587965074732e-07, "loss": 0.2633, "step": 26477 }, { "epoch": 0.9086479066575155, "grad_norm": 0.7705708042496674, "learning_rate": 2.1742583552737062e-07, "loss": 0.2131, "step": 26478 }, { "epoch": 0.9086822237474262, "grad_norm": 0.8253032383262028, "learning_rate": 2.1726376506313596e-07, "loss": 0.2272, "step": 26479 }, { "epoch": 0.908716540837337, "grad_norm": 0.7479660132700181, "learning_rate": 2.1710175368403086e-07, "loss": 0.2716, "step": 26480 }, { "epoch": 0.9087508579272477, "grad_norm": 0.7823287003522569, "learning_rate": 2.1693980139205873e-07, "loss": 0.2141, "step": 26481 }, { "epoch": 0.9087851750171585, "grad_norm": 1.0799946202990074, "learning_rate": 2.167779081892185e-07, "loss": 0.2051, "step": 26482 }, { "epoch": 0.9088194921070694, "grad_norm": 0.8144029020076614, "learning_rate": 2.1661607407751028e-07, "loss": 0.2408, "step": 26483 }, { "epoch": 0.9088538091969801, "grad_norm": 0.8478487645005243, "learning_rate": 2.16454299058933e-07, "loss": 0.235, "step": 26484 }, { "epoch": 0.9088881262868909, "grad_norm": 0.6952223140408001, "learning_rate": 2.1629258313548674e-07, "loss": 0.2774, "step": 26485 }, { "epoch": 0.9089224433768016, "grad_norm": 0.7121165321222834, "learning_rate": 2.1613092630916765e-07, "loss": 0.2665, "step": 26486 }, { "epoch": 0.9089567604667125, "grad_norm": 0.7888039794786024, "learning_rate": 2.1596932858197362e-07, "loss": 0.273, "step": 26487 }, { "epoch": 0.9089910775566232, "grad_norm": 0.710569147381559, "learning_rate": 2.158077899559019e-07, "loss": 0.2006, "step": 26488 }, { "epoch": 0.909025394646534, "grad_norm": 0.7969799723527234, "learning_rate": 2.1564631043294536e-07, "loss": 0.2775, "step": 26489 }, { "epoch": 0.9090597117364447, "grad_norm": 0.7701744140951904, "learning_rate": 2.154848900151024e-07, "loss": 0.2184, "step": 26490 }, { "epoch": 0.9090940288263555, "grad_norm": 0.7944256562795651, "learning_rate": 2.1532352870436368e-07, "loss": 0.3076, "step": 26491 }, { "epoch": 0.9091283459162663, "grad_norm": 0.7313901998109492, "learning_rate": 2.151622265027259e-07, "loss": 0.2423, "step": 26492 }, { "epoch": 0.9091626630061771, "grad_norm": 0.7405332681618112, "learning_rate": 2.1500098341217968e-07, "loss": 0.2403, "step": 26493 }, { "epoch": 0.9091969800960878, "grad_norm": 0.7488359928092178, "learning_rate": 2.1483979943471734e-07, "loss": 0.2271, "step": 26494 }, { "epoch": 0.9092312971859986, "grad_norm": 0.8238682524469365, "learning_rate": 2.146786745723306e-07, "loss": 0.2518, "step": 26495 }, { "epoch": 0.9092656142759094, "grad_norm": 0.7260204926695567, "learning_rate": 2.1451760882700956e-07, "loss": 0.2475, "step": 26496 }, { "epoch": 0.9092999313658202, "grad_norm": 0.9801671278681351, "learning_rate": 2.1435660220074372e-07, "loss": 0.2614, "step": 26497 }, { "epoch": 0.909334248455731, "grad_norm": 0.8873566188222262, "learning_rate": 2.1419565469552317e-07, "loss": 0.3038, "step": 26498 }, { "epoch": 0.9093685655456417, "grad_norm": 0.6845620272321531, "learning_rate": 2.1403476631333575e-07, "loss": 0.2224, "step": 26499 }, { "epoch": 0.9094028826355525, "grad_norm": 0.8387990840862718, "learning_rate": 2.1387393705616822e-07, "loss": 0.2803, "step": 26500 }, { "epoch": 0.9094371997254633, "grad_norm": 0.799763712165025, "learning_rate": 2.1371316692600897e-07, "loss": 0.2889, "step": 26501 }, { "epoch": 0.9094715168153741, "grad_norm": 0.8055289616331035, "learning_rate": 2.1355245592484252e-07, "loss": 0.2847, "step": 26502 }, { "epoch": 0.9095058339052848, "grad_norm": 0.7538412780427494, "learning_rate": 2.1339180405465455e-07, "loss": 0.298, "step": 26503 }, { "epoch": 0.9095401509951956, "grad_norm": 0.7510077297395782, "learning_rate": 2.132312113174312e-07, "loss": 0.2747, "step": 26504 }, { "epoch": 0.9095744680851063, "grad_norm": 0.7927842395162131, "learning_rate": 2.1307067771515533e-07, "loss": 0.2703, "step": 26505 }, { "epoch": 0.9096087851750172, "grad_norm": 0.7136126791162261, "learning_rate": 2.1291020324980983e-07, "loss": 0.2219, "step": 26506 }, { "epoch": 0.909643102264928, "grad_norm": 0.7902317258726136, "learning_rate": 2.1274978792337753e-07, "loss": 0.2581, "step": 26507 }, { "epoch": 0.9096774193548387, "grad_norm": 0.7633180006595867, "learning_rate": 2.1258943173784018e-07, "loss": 0.291, "step": 26508 }, { "epoch": 0.9097117364447495, "grad_norm": 0.7419252357917482, "learning_rate": 2.12429134695179e-07, "loss": 0.2096, "step": 26509 }, { "epoch": 0.9097460535346603, "grad_norm": 0.8765558290968771, "learning_rate": 2.1226889679737405e-07, "loss": 0.3109, "step": 26510 }, { "epoch": 0.9097803706245711, "grad_norm": 0.7958977627472312, "learning_rate": 2.12108718046406e-07, "loss": 0.2898, "step": 26511 }, { "epoch": 0.9098146877144818, "grad_norm": 0.7358000047491665, "learning_rate": 2.1194859844425042e-07, "loss": 0.234, "step": 26512 }, { "epoch": 0.9098490048043926, "grad_norm": 0.7646184863855514, "learning_rate": 2.1178853799288967e-07, "loss": 0.2398, "step": 26513 }, { "epoch": 0.9098833218943033, "grad_norm": 1.2344024127607647, "learning_rate": 2.1162853669429772e-07, "loss": 0.2517, "step": 26514 }, { "epoch": 0.9099176389842142, "grad_norm": 0.8040032939026015, "learning_rate": 2.11468594550453e-07, "loss": 0.2896, "step": 26515 }, { "epoch": 0.9099519560741249, "grad_norm": 0.79885662309048, "learning_rate": 2.1130871156333055e-07, "loss": 0.2195, "step": 26516 }, { "epoch": 0.9099862731640357, "grad_norm": 0.8139905354453145, "learning_rate": 2.111488877349066e-07, "loss": 0.2702, "step": 26517 }, { "epoch": 0.9100205902539464, "grad_norm": 0.7473997309185685, "learning_rate": 2.1098912306715402e-07, "loss": 0.2385, "step": 26518 }, { "epoch": 0.9100549073438572, "grad_norm": 0.7220161339075148, "learning_rate": 2.1082941756204788e-07, "loss": 0.2322, "step": 26519 }, { "epoch": 0.910089224433768, "grad_norm": 0.7374721359089242, "learning_rate": 2.1066977122156106e-07, "loss": 0.2305, "step": 26520 }, { "epoch": 0.9101235415236788, "grad_norm": 0.8415360602993653, "learning_rate": 2.1051018404766421e-07, "loss": 0.2693, "step": 26521 }, { "epoch": 0.9101578586135896, "grad_norm": 0.8496762769547026, "learning_rate": 2.103506560423313e-07, "loss": 0.2858, "step": 26522 }, { "epoch": 0.9101921757035003, "grad_norm": 0.811772426154369, "learning_rate": 2.1019118720753074e-07, "loss": 0.2799, "step": 26523 }, { "epoch": 0.9102264927934112, "grad_norm": 0.7327227833946948, "learning_rate": 2.1003177754523373e-07, "loss": 0.2637, "step": 26524 }, { "epoch": 0.9102608098833219, "grad_norm": 0.8169708984978941, "learning_rate": 2.0987242705741094e-07, "loss": 0.3106, "step": 26525 }, { "epoch": 0.9102951269732327, "grad_norm": 0.8109635241936423, "learning_rate": 2.0971313574602803e-07, "loss": 0.2923, "step": 26526 }, { "epoch": 0.9103294440631434, "grad_norm": 1.073564926953964, "learning_rate": 2.095539036130556e-07, "loss": 0.2615, "step": 26527 }, { "epoch": 0.9103637611530542, "grad_norm": 0.750539145205663, "learning_rate": 2.0939473066045878e-07, "loss": 0.2357, "step": 26528 }, { "epoch": 0.910398078242965, "grad_norm": 0.8056504637379012, "learning_rate": 2.0923561689020488e-07, "loss": 0.2374, "step": 26529 }, { "epoch": 0.9104323953328758, "grad_norm": 0.7743034075438405, "learning_rate": 2.0907656230425954e-07, "loss": 0.2336, "step": 26530 }, { "epoch": 0.9104667124227865, "grad_norm": 0.7410845486396858, "learning_rate": 2.089175669045873e-07, "loss": 0.2758, "step": 26531 }, { "epoch": 0.9105010295126973, "grad_norm": 0.7032608789559174, "learning_rate": 2.0875863069315327e-07, "loss": 0.232, "step": 26532 }, { "epoch": 0.9105353466026082, "grad_norm": 0.8041241670544429, "learning_rate": 2.0859975367191977e-07, "loss": 0.2614, "step": 26533 }, { "epoch": 0.9105696636925189, "grad_norm": 0.73602568439793, "learning_rate": 2.0844093584285076e-07, "loss": 0.2467, "step": 26534 }, { "epoch": 0.9106039807824297, "grad_norm": 0.8899995727970009, "learning_rate": 2.0828217720790632e-07, "loss": 0.2914, "step": 26535 }, { "epoch": 0.9106382978723404, "grad_norm": 0.8469801309224811, "learning_rate": 2.0812347776905051e-07, "loss": 0.2452, "step": 26536 }, { "epoch": 0.9106726149622512, "grad_norm": 0.8503012876639864, "learning_rate": 2.079648375282417e-07, "loss": 0.2866, "step": 26537 }, { "epoch": 0.910706932052162, "grad_norm": 0.8132799070992932, "learning_rate": 2.0780625648744002e-07, "loss": 0.2124, "step": 26538 }, { "epoch": 0.9107412491420728, "grad_norm": 0.6491218713392989, "learning_rate": 2.07647734648605e-07, "loss": 0.2722, "step": 26539 }, { "epoch": 0.9107755662319835, "grad_norm": 0.7760790432529775, "learning_rate": 2.0748927201369506e-07, "loss": 0.27, "step": 26540 }, { "epoch": 0.9108098833218943, "grad_norm": 0.8454529072324474, "learning_rate": 2.0733086858466755e-07, "loss": 0.2158, "step": 26541 }, { "epoch": 0.910844200411805, "grad_norm": 0.8212908935916579, "learning_rate": 2.0717252436347978e-07, "loss": 0.2463, "step": 26542 }, { "epoch": 0.9108785175017159, "grad_norm": 0.8066616615122467, "learning_rate": 2.0701423935208743e-07, "loss": 0.2274, "step": 26543 }, { "epoch": 0.9109128345916266, "grad_norm": 0.7601142426133691, "learning_rate": 2.0685601355244556e-07, "loss": 0.234, "step": 26544 }, { "epoch": 0.9109471516815374, "grad_norm": 0.7293693561665462, "learning_rate": 2.066978469665104e-07, "loss": 0.2873, "step": 26545 }, { "epoch": 0.9109814687714481, "grad_norm": 0.848911590366501, "learning_rate": 2.0653973959623375e-07, "loss": 0.2859, "step": 26546 }, { "epoch": 0.911015785861359, "grad_norm": 0.6865884021466587, "learning_rate": 2.0638169144357013e-07, "loss": 0.2358, "step": 26547 }, { "epoch": 0.9110501029512698, "grad_norm": 0.7216342210345973, "learning_rate": 2.0622370251047297e-07, "loss": 0.2605, "step": 26548 }, { "epoch": 0.9110844200411805, "grad_norm": 0.7271027504939739, "learning_rate": 2.0606577279889184e-07, "loss": 0.2575, "step": 26549 }, { "epoch": 0.9111187371310913, "grad_norm": 0.705439519334814, "learning_rate": 2.0590790231077963e-07, "loss": 0.2481, "step": 26550 }, { "epoch": 0.911153054221002, "grad_norm": 0.8266802084343242, "learning_rate": 2.057500910480853e-07, "loss": 0.2758, "step": 26551 }, { "epoch": 0.9111873713109129, "grad_norm": 0.8461953294882002, "learning_rate": 2.0559233901275898e-07, "loss": 0.2359, "step": 26552 }, { "epoch": 0.9112216884008236, "grad_norm": 0.7470601623525756, "learning_rate": 2.0543464620675023e-07, "loss": 0.3074, "step": 26553 }, { "epoch": 0.9112560054907344, "grad_norm": 0.7598100680878302, "learning_rate": 2.0527701263200583e-07, "loss": 0.2574, "step": 26554 }, { "epoch": 0.9112903225806451, "grad_norm": 0.7534833966620984, "learning_rate": 2.051194382904742e-07, "loss": 0.2327, "step": 26555 }, { "epoch": 0.911324639670556, "grad_norm": 0.7412983633106884, "learning_rate": 2.0496192318410157e-07, "loss": 0.2583, "step": 26556 }, { "epoch": 0.9113589567604667, "grad_norm": 0.7720160257378003, "learning_rate": 2.0480446731483472e-07, "loss": 0.2691, "step": 26557 }, { "epoch": 0.9113932738503775, "grad_norm": 0.9128063988330395, "learning_rate": 2.0464707068461653e-07, "loss": 0.2218, "step": 26558 }, { "epoch": 0.9114275909402882, "grad_norm": 0.7038824310759493, "learning_rate": 2.0448973329539433e-07, "loss": 0.2351, "step": 26559 }, { "epoch": 0.911461908030199, "grad_norm": 0.8348858111223977, "learning_rate": 2.0433245514910992e-07, "loss": 0.2404, "step": 26560 }, { "epoch": 0.9114962251201099, "grad_norm": 0.7921875492531705, "learning_rate": 2.0417523624770618e-07, "loss": 0.2859, "step": 26561 }, { "epoch": 0.9115305422100206, "grad_norm": 0.708773954557086, "learning_rate": 2.0401807659312657e-07, "loss": 0.2714, "step": 26562 }, { "epoch": 0.9115648592999314, "grad_norm": 0.7911836095703028, "learning_rate": 2.0386097618731228e-07, "loss": 0.3017, "step": 26563 }, { "epoch": 0.9115991763898421, "grad_norm": 0.7708714870806863, "learning_rate": 2.0370393503220344e-07, "loss": 0.2689, "step": 26564 }, { "epoch": 0.9116334934797529, "grad_norm": 0.8144301116893274, "learning_rate": 2.035469531297407e-07, "loss": 0.2485, "step": 26565 }, { "epoch": 0.9116678105696637, "grad_norm": 0.7292579396067728, "learning_rate": 2.0339003048186368e-07, "loss": 0.2382, "step": 26566 }, { "epoch": 0.9117021276595745, "grad_norm": 0.9602221970308321, "learning_rate": 2.032331670905091e-07, "loss": 0.3137, "step": 26567 }, { "epoch": 0.9117364447494852, "grad_norm": 0.9193765803197832, "learning_rate": 2.0307636295761823e-07, "loss": 0.2564, "step": 26568 }, { "epoch": 0.911770761839396, "grad_norm": 0.7135557364669395, "learning_rate": 2.0291961808512505e-07, "loss": 0.2631, "step": 26569 }, { "epoch": 0.9118050789293068, "grad_norm": 0.683918272202475, "learning_rate": 2.0276293247496636e-07, "loss": 0.2524, "step": 26570 }, { "epoch": 0.9118393960192176, "grad_norm": 0.835509514734401, "learning_rate": 2.026063061290806e-07, "loss": 0.2479, "step": 26571 }, { "epoch": 0.9118737131091283, "grad_norm": 0.8666159493594785, "learning_rate": 2.0244973904939957e-07, "loss": 0.2825, "step": 26572 }, { "epoch": 0.9119080301990391, "grad_norm": 0.7885097849850524, "learning_rate": 2.0229323123785838e-07, "loss": 0.2574, "step": 26573 }, { "epoch": 0.9119423472889499, "grad_norm": 0.7399239763094888, "learning_rate": 2.0213678269639103e-07, "loss": 0.3008, "step": 26574 }, { "epoch": 0.9119766643788607, "grad_norm": 0.9285106705802051, "learning_rate": 2.0198039342692987e-07, "loss": 0.3143, "step": 26575 }, { "epoch": 0.9120109814687715, "grad_norm": 0.8990836289885625, "learning_rate": 2.0182406343140726e-07, "loss": 0.3058, "step": 26576 }, { "epoch": 0.9120452985586822, "grad_norm": 0.6570642211296922, "learning_rate": 2.0166779271175386e-07, "loss": 0.2372, "step": 26577 }, { "epoch": 0.912079615648593, "grad_norm": 0.8138751796581761, "learning_rate": 2.0151158126990144e-07, "loss": 0.2365, "step": 26578 }, { "epoch": 0.9121139327385038, "grad_norm": 0.918305994799474, "learning_rate": 2.0135542910777794e-07, "loss": 0.2544, "step": 26579 }, { "epoch": 0.9121482498284146, "grad_norm": 0.861602517181127, "learning_rate": 2.0119933622731403e-07, "loss": 0.2685, "step": 26580 }, { "epoch": 0.9121825669183253, "grad_norm": 0.704278703649482, "learning_rate": 2.0104330263043703e-07, "loss": 0.3047, "step": 26581 }, { "epoch": 0.9122168840082361, "grad_norm": 0.8438466449352254, "learning_rate": 2.0088732831907542e-07, "loss": 0.2428, "step": 26582 }, { "epoch": 0.9122512010981468, "grad_norm": 0.7401896458320433, "learning_rate": 2.0073141329515545e-07, "loss": 0.2932, "step": 26583 }, { "epoch": 0.9122855181880577, "grad_norm": 0.7915134800217116, "learning_rate": 2.0057555756060332e-07, "loss": 0.3039, "step": 26584 }, { "epoch": 0.9123198352779684, "grad_norm": 0.7126999840721074, "learning_rate": 2.0041976111734474e-07, "loss": 0.2345, "step": 26585 }, { "epoch": 0.9123541523678792, "grad_norm": 0.7293817380152277, "learning_rate": 2.0026402396730371e-07, "loss": 0.2791, "step": 26586 }, { "epoch": 0.91238846945779, "grad_norm": 0.7965609043680256, "learning_rate": 2.0010834611240537e-07, "loss": 0.2777, "step": 26587 }, { "epoch": 0.9124227865477007, "grad_norm": 0.9395248401890747, "learning_rate": 1.9995272755457206e-07, "loss": 0.2556, "step": 26588 }, { "epoch": 0.9124571036376116, "grad_norm": 1.1645537756989313, "learning_rate": 1.9979716829572726e-07, "loss": 0.2647, "step": 26589 }, { "epoch": 0.9124914207275223, "grad_norm": 0.7736469284764066, "learning_rate": 1.9964166833779054e-07, "loss": 0.2485, "step": 26590 }, { "epoch": 0.9125257378174331, "grad_norm": 0.8517780038592695, "learning_rate": 1.9948622768268478e-07, "loss": 0.2645, "step": 26591 }, { "epoch": 0.9125600549073438, "grad_norm": 0.8061157312333128, "learning_rate": 1.9933084633233067e-07, "loss": 0.275, "step": 26592 }, { "epoch": 0.9125943719972547, "grad_norm": 0.8328933668824499, "learning_rate": 1.9917552428864562e-07, "loss": 0.1906, "step": 26593 }, { "epoch": 0.9126286890871654, "grad_norm": 0.7091630441850503, "learning_rate": 1.990202615535508e-07, "loss": 0.2636, "step": 26594 }, { "epoch": 0.9126630061770762, "grad_norm": 0.7689273076284906, "learning_rate": 1.9886505812896307e-07, "loss": 0.231, "step": 26595 }, { "epoch": 0.9126973232669869, "grad_norm": 0.836891320627312, "learning_rate": 1.9870991401679972e-07, "loss": 0.2372, "step": 26596 }, { "epoch": 0.9127316403568977, "grad_norm": 0.7793403460229438, "learning_rate": 1.985548292189776e-07, "loss": 0.2737, "step": 26597 }, { "epoch": 0.9127659574468086, "grad_norm": 1.0060641440703553, "learning_rate": 1.9839980373741296e-07, "loss": 0.2916, "step": 26598 }, { "epoch": 0.9128002745367193, "grad_norm": 0.7950450920327322, "learning_rate": 1.9824483757402036e-07, "loss": 0.3155, "step": 26599 }, { "epoch": 0.91283459162663, "grad_norm": 0.7810401848200059, "learning_rate": 1.9808993073071437e-07, "loss": 0.2609, "step": 26600 }, { "epoch": 0.9128689087165408, "grad_norm": 0.7411551387419276, "learning_rate": 1.9793508320940956e-07, "loss": 0.2625, "step": 26601 }, { "epoch": 0.9129032258064517, "grad_norm": 0.693433282375873, "learning_rate": 1.9778029501201667e-07, "loss": 0.2018, "step": 26602 }, { "epoch": 0.9129375428963624, "grad_norm": 0.8402275588092768, "learning_rate": 1.976255661404508e-07, "loss": 0.2322, "step": 26603 }, { "epoch": 0.9129718599862732, "grad_norm": 0.9167952530633755, "learning_rate": 1.9747089659662155e-07, "loss": 0.2567, "step": 26604 }, { "epoch": 0.9130061770761839, "grad_norm": 0.8231378506852645, "learning_rate": 1.973162863824396e-07, "loss": 0.2868, "step": 26605 }, { "epoch": 0.9130404941660947, "grad_norm": 0.8467899459672192, "learning_rate": 1.9716173549981622e-07, "loss": 0.2719, "step": 26606 }, { "epoch": 0.9130748112560055, "grad_norm": 0.8135527544333628, "learning_rate": 1.9700724395065985e-07, "loss": 0.242, "step": 26607 }, { "epoch": 0.9131091283459163, "grad_norm": 0.9233076230149053, "learning_rate": 1.9685281173687898e-07, "loss": 0.2908, "step": 26608 }, { "epoch": 0.913143445435827, "grad_norm": 0.7895625323978024, "learning_rate": 1.9669843886038209e-07, "loss": 0.2443, "step": 26609 }, { "epoch": 0.9131777625257378, "grad_norm": 0.6977276349510326, "learning_rate": 1.96544125323076e-07, "loss": 0.2285, "step": 26610 }, { "epoch": 0.9132120796156485, "grad_norm": 0.7065855245848833, "learning_rate": 1.9638987112686635e-07, "loss": 0.2263, "step": 26611 }, { "epoch": 0.9132463967055594, "grad_norm": 0.7693546560397786, "learning_rate": 1.9623567627366001e-07, "loss": 0.2653, "step": 26612 }, { "epoch": 0.9132807137954702, "grad_norm": 0.7254267588991963, "learning_rate": 1.9608154076536102e-07, "loss": 0.2232, "step": 26613 }, { "epoch": 0.9133150308853809, "grad_norm": 0.7054620335968317, "learning_rate": 1.9592746460387278e-07, "loss": 0.2156, "step": 26614 }, { "epoch": 0.9133493479752917, "grad_norm": 0.9563472941846409, "learning_rate": 1.9577344779110164e-07, "loss": 0.2482, "step": 26615 }, { "epoch": 0.9133836650652025, "grad_norm": 0.7685679642254383, "learning_rate": 1.9561949032894657e-07, "loss": 0.2653, "step": 26616 }, { "epoch": 0.9134179821551133, "grad_norm": 1.0177294024426606, "learning_rate": 1.9546559221931273e-07, "loss": 0.3132, "step": 26617 }, { "epoch": 0.913452299245024, "grad_norm": 0.7489085693665828, "learning_rate": 1.9531175346409915e-07, "loss": 0.302, "step": 26618 }, { "epoch": 0.9134866163349348, "grad_norm": 0.8335012581267984, "learning_rate": 1.951579740652071e-07, "loss": 0.2048, "step": 26619 }, { "epoch": 0.9135209334248455, "grad_norm": 0.7419688718892135, "learning_rate": 1.950042540245367e-07, "loss": 0.287, "step": 26620 }, { "epoch": 0.9135552505147564, "grad_norm": 0.7664161024496584, "learning_rate": 1.948505933439865e-07, "loss": 0.248, "step": 26621 }, { "epoch": 0.9135895676046671, "grad_norm": 0.8696691056897737, "learning_rate": 1.9469699202545489e-07, "loss": 0.2609, "step": 26622 }, { "epoch": 0.9136238846945779, "grad_norm": 0.8313784703358772, "learning_rate": 1.9454345007083987e-07, "loss": 0.2765, "step": 26623 }, { "epoch": 0.9136582017844886, "grad_norm": 0.763951736593932, "learning_rate": 1.9438996748203819e-07, "loss": 0.2041, "step": 26624 }, { "epoch": 0.9136925188743995, "grad_norm": 0.8205736403184453, "learning_rate": 1.9423654426094452e-07, "loss": 0.2597, "step": 26625 }, { "epoch": 0.9137268359643103, "grad_norm": 0.7816739433002816, "learning_rate": 1.9408318040945617e-07, "loss": 0.2314, "step": 26626 }, { "epoch": 0.913761153054221, "grad_norm": 0.8119574157769364, "learning_rate": 1.9392987592946666e-07, "loss": 0.2306, "step": 26627 }, { "epoch": 0.9137954701441318, "grad_norm": 0.7099791430519081, "learning_rate": 1.9377663082287057e-07, "loss": 0.2325, "step": 26628 }, { "epoch": 0.9138297872340425, "grad_norm": 0.7996609226323451, "learning_rate": 1.9362344509156028e-07, "loss": 0.2551, "step": 26629 }, { "epoch": 0.9138641043239534, "grad_norm": 0.7569132929211254, "learning_rate": 1.9347031873742872e-07, "loss": 0.2073, "step": 26630 }, { "epoch": 0.9138984214138641, "grad_norm": 0.7068346142622548, "learning_rate": 1.933172517623677e-07, "loss": 0.2769, "step": 26631 }, { "epoch": 0.9139327385037749, "grad_norm": 0.8472468019206175, "learning_rate": 1.9316424416826795e-07, "loss": 0.2424, "step": 26632 }, { "epoch": 0.9139670555936856, "grad_norm": 0.7976786314171749, "learning_rate": 1.9301129595702017e-07, "loss": 0.2862, "step": 26633 }, { "epoch": 0.9140013726835964, "grad_norm": 0.7512697139989202, "learning_rate": 1.928584071305123e-07, "loss": 0.2442, "step": 26634 }, { "epoch": 0.9140356897735072, "grad_norm": 0.7366525020639715, "learning_rate": 1.9270557769063446e-07, "loss": 0.2899, "step": 26635 }, { "epoch": 0.914070006863418, "grad_norm": 0.8208936065024138, "learning_rate": 1.9255280763927576e-07, "loss": 0.2714, "step": 26636 }, { "epoch": 0.9141043239533287, "grad_norm": 0.9290838515795133, "learning_rate": 1.9240009697832018e-07, "loss": 0.2554, "step": 26637 }, { "epoch": 0.9141386410432395, "grad_norm": 0.7779072580834676, "learning_rate": 1.9224744570965793e-07, "loss": 0.2894, "step": 26638 }, { "epoch": 0.9141729581331504, "grad_norm": 0.7850927271014274, "learning_rate": 1.92094853835173e-07, "loss": 0.2595, "step": 26639 }, { "epoch": 0.9142072752230611, "grad_norm": 0.7073028290079927, "learning_rate": 1.9194232135675007e-07, "loss": 0.206, "step": 26640 }, { "epoch": 0.9142415923129719, "grad_norm": 0.7541282390185415, "learning_rate": 1.9178984827627423e-07, "loss": 0.2931, "step": 26641 }, { "epoch": 0.9142759094028826, "grad_norm": 0.7446649151719085, "learning_rate": 1.91637434595629e-07, "loss": 0.2382, "step": 26642 }, { "epoch": 0.9143102264927934, "grad_norm": 0.9150477149877039, "learning_rate": 1.9148508031669733e-07, "loss": 0.3109, "step": 26643 }, { "epoch": 0.9143445435827042, "grad_norm": 0.7693435673814925, "learning_rate": 1.9133278544136102e-07, "loss": 0.2609, "step": 26644 }, { "epoch": 0.914378860672615, "grad_norm": 0.8657784261724114, "learning_rate": 1.9118054997150304e-07, "loss": 0.2716, "step": 26645 }, { "epoch": 0.9144131777625257, "grad_norm": 0.7258475408373435, "learning_rate": 1.910283739090013e-07, "loss": 0.2768, "step": 26646 }, { "epoch": 0.9144474948524365, "grad_norm": 0.7842673640755742, "learning_rate": 1.9087625725573823e-07, "loss": 0.2701, "step": 26647 }, { "epoch": 0.9144818119423472, "grad_norm": 0.7443386905966992, "learning_rate": 1.9072420001359117e-07, "loss": 0.2274, "step": 26648 }, { "epoch": 0.9145161290322581, "grad_norm": 0.7934773168533826, "learning_rate": 1.9057220218444027e-07, "loss": 0.2043, "step": 26649 }, { "epoch": 0.9145504461221688, "grad_norm": 0.7132087905527528, "learning_rate": 1.9042026377016244e-07, "loss": 0.2846, "step": 26650 }, { "epoch": 0.9145847632120796, "grad_norm": 0.8364657995614472, "learning_rate": 1.9026838477263498e-07, "loss": 0.2599, "step": 26651 }, { "epoch": 0.9146190803019904, "grad_norm": 0.7476467184226822, "learning_rate": 1.9011656519373366e-07, "loss": 0.2706, "step": 26652 }, { "epoch": 0.9146533973919012, "grad_norm": 0.7342854628513557, "learning_rate": 1.8996480503533477e-07, "loss": 0.2504, "step": 26653 }, { "epoch": 0.914687714481812, "grad_norm": 0.9082098394699376, "learning_rate": 1.8981310429931231e-07, "loss": 0.2572, "step": 26654 }, { "epoch": 0.9147220315717227, "grad_norm": 0.8102059189206737, "learning_rate": 1.8966146298754094e-07, "loss": 0.2706, "step": 26655 }, { "epoch": 0.9147563486616335, "grad_norm": 0.7464855693790008, "learning_rate": 1.8950988110189473e-07, "loss": 0.2834, "step": 26656 }, { "epoch": 0.9147906657515442, "grad_norm": 0.7786793710083938, "learning_rate": 1.8935835864424378e-07, "loss": 0.258, "step": 26657 }, { "epoch": 0.9148249828414551, "grad_norm": 1.0309472045108385, "learning_rate": 1.8920689561646277e-07, "loss": 0.2507, "step": 26658 }, { "epoch": 0.9148592999313658, "grad_norm": 0.8297264431779067, "learning_rate": 1.8905549202042184e-07, "loss": 0.2598, "step": 26659 }, { "epoch": 0.9148936170212766, "grad_norm": 0.7281153400635354, "learning_rate": 1.889041478579906e-07, "loss": 0.2361, "step": 26660 }, { "epoch": 0.9149279341111873, "grad_norm": 0.7438444096024135, "learning_rate": 1.8875286313104037e-07, "loss": 0.2438, "step": 26661 }, { "epoch": 0.9149622512010982, "grad_norm": 0.8078704304457321, "learning_rate": 1.8860163784143847e-07, "loss": 0.288, "step": 26662 }, { "epoch": 0.914996568291009, "grad_norm": 0.7756816842060605, "learning_rate": 1.8845047199105405e-07, "loss": 0.2576, "step": 26663 }, { "epoch": 0.9150308853809197, "grad_norm": 0.7165510708310726, "learning_rate": 1.8829936558175389e-07, "loss": 0.2109, "step": 26664 }, { "epoch": 0.9150652024708305, "grad_norm": 0.8022687175174792, "learning_rate": 1.881483186154054e-07, "loss": 0.2658, "step": 26665 }, { "epoch": 0.9150995195607412, "grad_norm": 0.7682521692948158, "learning_rate": 1.879973310938743e-07, "loss": 0.2832, "step": 26666 }, { "epoch": 0.9151338366506521, "grad_norm": 0.7852975695895088, "learning_rate": 1.8784640301902635e-07, "loss": 0.287, "step": 26667 }, { "epoch": 0.9151681537405628, "grad_norm": 0.8435410637369232, "learning_rate": 1.8769553439272557e-07, "loss": 0.2644, "step": 26668 }, { "epoch": 0.9152024708304736, "grad_norm": 0.7911619631538117, "learning_rate": 1.8754472521683497e-07, "loss": 0.2448, "step": 26669 }, { "epoch": 0.9152367879203843, "grad_norm": 0.7145198483174716, "learning_rate": 1.873939754932197e-07, "loss": 0.249, "step": 26670 }, { "epoch": 0.9152711050102951, "grad_norm": 0.7528405178685084, "learning_rate": 1.8724328522374048e-07, "loss": 0.2704, "step": 26671 }, { "epoch": 0.9153054221002059, "grad_norm": 0.8100763036522902, "learning_rate": 1.8709265441025915e-07, "loss": 0.2556, "step": 26672 }, { "epoch": 0.9153397391901167, "grad_norm": 0.7219745062078237, "learning_rate": 1.86942083054637e-07, "loss": 0.2658, "step": 26673 }, { "epoch": 0.9153740562800274, "grad_norm": 0.738623125064492, "learning_rate": 1.8679157115873368e-07, "loss": 0.2235, "step": 26674 }, { "epoch": 0.9154083733699382, "grad_norm": 0.7364919403368763, "learning_rate": 1.8664111872440882e-07, "loss": 0.2654, "step": 26675 }, { "epoch": 0.915442690459849, "grad_norm": 0.7750063431504138, "learning_rate": 1.8649072575352144e-07, "loss": 0.2552, "step": 26676 }, { "epoch": 0.9154770075497598, "grad_norm": 0.7121789153522166, "learning_rate": 1.8634039224792954e-07, "loss": 0.2131, "step": 26677 }, { "epoch": 0.9155113246396706, "grad_norm": 0.9027347607323911, "learning_rate": 1.8619011820948886e-07, "loss": 0.2274, "step": 26678 }, { "epoch": 0.9155456417295813, "grad_norm": 0.7454439792047334, "learning_rate": 1.8603990364005786e-07, "loss": 0.3183, "step": 26679 }, { "epoch": 0.9155799588194921, "grad_norm": 0.8070359475034488, "learning_rate": 1.858897485414901e-07, "loss": 0.2287, "step": 26680 }, { "epoch": 0.9156142759094029, "grad_norm": 0.7473330055830193, "learning_rate": 1.8573965291564243e-07, "loss": 0.2087, "step": 26681 }, { "epoch": 0.9156485929993137, "grad_norm": 0.7815386948871229, "learning_rate": 1.8558961676436893e-07, "loss": 0.2868, "step": 26682 }, { "epoch": 0.9156829100892244, "grad_norm": 0.7282330318189918, "learning_rate": 1.8543964008952198e-07, "loss": 0.202, "step": 26683 }, { "epoch": 0.9157172271791352, "grad_norm": 0.7949902711225573, "learning_rate": 1.8528972289295567e-07, "loss": 0.2506, "step": 26684 }, { "epoch": 0.915751544269046, "grad_norm": 0.769072968166359, "learning_rate": 1.8513986517652128e-07, "loss": 0.2694, "step": 26685 }, { "epoch": 0.9157858613589568, "grad_norm": 0.9111371088916483, "learning_rate": 1.8499006694207012e-07, "loss": 0.255, "step": 26686 }, { "epoch": 0.9158201784488675, "grad_norm": 0.7357515516243532, "learning_rate": 1.8484032819145237e-07, "loss": 0.2781, "step": 26687 }, { "epoch": 0.9158544955387783, "grad_norm": 0.8190383910460101, "learning_rate": 1.8469064892651933e-07, "loss": 0.3225, "step": 26688 }, { "epoch": 0.915888812628689, "grad_norm": 0.7681954805168592, "learning_rate": 1.8454102914911841e-07, "loss": 0.2234, "step": 26689 }, { "epoch": 0.9159231297185999, "grad_norm": 0.7894029941058754, "learning_rate": 1.8439146886109926e-07, "loss": 0.2857, "step": 26690 }, { "epoch": 0.9159574468085107, "grad_norm": 0.7330155404948837, "learning_rate": 1.8424196806430982e-07, "loss": 0.2312, "step": 26691 }, { "epoch": 0.9159917638984214, "grad_norm": 0.7355144991841029, "learning_rate": 1.8409252676059475e-07, "loss": 0.2369, "step": 26692 }, { "epoch": 0.9160260809883322, "grad_norm": 0.8243380695574895, "learning_rate": 1.839431449518031e-07, "loss": 0.2378, "step": 26693 }, { "epoch": 0.9160603980782429, "grad_norm": 0.7715068350432386, "learning_rate": 1.8379382263977786e-07, "loss": 0.2123, "step": 26694 }, { "epoch": 0.9160947151681538, "grad_norm": 0.9454403813380915, "learning_rate": 1.836445598263653e-07, "loss": 0.2287, "step": 26695 }, { "epoch": 0.9161290322580645, "grad_norm": 0.8695531711340216, "learning_rate": 1.8349535651340844e-07, "loss": 0.3189, "step": 26696 }, { "epoch": 0.9161633493479753, "grad_norm": 0.8505334433259617, "learning_rate": 1.8334621270275132e-07, "loss": 0.2388, "step": 26697 }, { "epoch": 0.916197666437886, "grad_norm": 0.6846256462042806, "learning_rate": 1.8319712839623638e-07, "loss": 0.2447, "step": 26698 }, { "epoch": 0.9162319835277969, "grad_norm": 0.8333043079416229, "learning_rate": 1.8304810359570436e-07, "loss": 0.2281, "step": 26699 }, { "epoch": 0.9162663006177076, "grad_norm": 0.7872362839554586, "learning_rate": 1.8289913830299766e-07, "loss": 0.2323, "step": 26700 }, { "epoch": 0.9163006177076184, "grad_norm": 0.7257094199654673, "learning_rate": 1.827502325199554e-07, "loss": 0.2321, "step": 26701 }, { "epoch": 0.9163349347975291, "grad_norm": 0.8983196813699222, "learning_rate": 1.8260138624841772e-07, "loss": 0.2523, "step": 26702 }, { "epoch": 0.9163692518874399, "grad_norm": 0.6951097394260329, "learning_rate": 1.8245259949022376e-07, "loss": 0.25, "step": 26703 }, { "epoch": 0.9164035689773508, "grad_norm": 0.8203019077637471, "learning_rate": 1.8230387224721035e-07, "loss": 0.2262, "step": 26704 }, { "epoch": 0.9164378860672615, "grad_norm": 0.8244017843312862, "learning_rate": 1.8215520452121715e-07, "loss": 0.3319, "step": 26705 }, { "epoch": 0.9164722031571723, "grad_norm": 0.7544824130739304, "learning_rate": 1.8200659631407824e-07, "loss": 0.2999, "step": 26706 }, { "epoch": 0.916506520247083, "grad_norm": 0.7019490490917707, "learning_rate": 1.8185804762763047e-07, "loss": 0.2937, "step": 26707 }, { "epoch": 0.9165408373369939, "grad_norm": 0.717192221491092, "learning_rate": 1.8170955846370963e-07, "loss": 0.2481, "step": 26708 }, { "epoch": 0.9165751544269046, "grad_norm": 0.7991145906025415, "learning_rate": 1.8156112882414922e-07, "loss": 0.2812, "step": 26709 }, { "epoch": 0.9166094715168154, "grad_norm": 0.7933566334493691, "learning_rate": 1.814127587107839e-07, "loss": 0.2676, "step": 26710 }, { "epoch": 0.9166437886067261, "grad_norm": 0.7329065267191939, "learning_rate": 1.812644481254455e-07, "loss": 0.2395, "step": 26711 }, { "epoch": 0.9166781056966369, "grad_norm": 0.7705070932119605, "learning_rate": 1.8111619706996708e-07, "loss": 0.2463, "step": 26712 }, { "epoch": 0.9167124227865477, "grad_norm": 0.7665609284482382, "learning_rate": 1.8096800554617934e-07, "loss": 0.2679, "step": 26713 }, { "epoch": 0.9167467398764585, "grad_norm": 0.7752664853275516, "learning_rate": 1.8081987355591412e-07, "loss": 0.2742, "step": 26714 }, { "epoch": 0.9167810569663692, "grad_norm": 0.7268045021243149, "learning_rate": 1.8067180110100003e-07, "loss": 0.2184, "step": 26715 }, { "epoch": 0.91681537405628, "grad_norm": 0.745380803944838, "learning_rate": 1.8052378818326776e-07, "loss": 0.2334, "step": 26716 }, { "epoch": 0.9168496911461907, "grad_norm": 0.8821651045777593, "learning_rate": 1.803758348045448e-07, "loss": 0.2458, "step": 26717 }, { "epoch": 0.9168840082361016, "grad_norm": 0.7707149406620111, "learning_rate": 1.8022794096665908e-07, "loss": 0.2449, "step": 26718 }, { "epoch": 0.9169183253260124, "grad_norm": 0.7635000338434635, "learning_rate": 1.8008010667143748e-07, "loss": 0.2548, "step": 26719 }, { "epoch": 0.9169526424159231, "grad_norm": 0.859585938491625, "learning_rate": 1.7993233192070746e-07, "loss": 0.2827, "step": 26720 }, { "epoch": 0.9169869595058339, "grad_norm": 0.7997323959736219, "learning_rate": 1.7978461671629365e-07, "loss": 0.2757, "step": 26721 }, { "epoch": 0.9170212765957447, "grad_norm": 0.8156532764609491, "learning_rate": 1.796369610600207e-07, "loss": 0.3319, "step": 26722 }, { "epoch": 0.9170555936856555, "grad_norm": 0.7439719165327527, "learning_rate": 1.7948936495371383e-07, "loss": 0.2579, "step": 26723 }, { "epoch": 0.9170899107755662, "grad_norm": 0.8130402239304312, "learning_rate": 1.7934182839919435e-07, "loss": 0.2369, "step": 26724 }, { "epoch": 0.917124227865477, "grad_norm": 0.7167391083038122, "learning_rate": 1.7919435139828745e-07, "loss": 0.287, "step": 26725 }, { "epoch": 0.9171585449553877, "grad_norm": 0.8329750528095022, "learning_rate": 1.790469339528139e-07, "loss": 0.2749, "step": 26726 }, { "epoch": 0.9171928620452986, "grad_norm": 0.8353180261271369, "learning_rate": 1.788995760645934e-07, "loss": 0.2523, "step": 26727 }, { "epoch": 0.9172271791352093, "grad_norm": 0.74239213070529, "learning_rate": 1.7875227773544944e-07, "loss": 0.3037, "step": 26728 }, { "epoch": 0.9172614962251201, "grad_norm": 0.9002988115591273, "learning_rate": 1.7860503896719948e-07, "loss": 0.2547, "step": 26729 }, { "epoch": 0.9172958133150309, "grad_norm": 0.7957535924720213, "learning_rate": 1.7845785976166263e-07, "loss": 0.2435, "step": 26730 }, { "epoch": 0.9173301304049417, "grad_norm": 0.6895061066665049, "learning_rate": 1.7831074012065796e-07, "loss": 0.3059, "step": 26731 }, { "epoch": 0.9173644474948525, "grad_norm": 1.025016119334691, "learning_rate": 1.7816368004600237e-07, "loss": 0.2342, "step": 26732 }, { "epoch": 0.9173987645847632, "grad_norm": 0.7492188226018653, "learning_rate": 1.7801667953951274e-07, "loss": 0.2141, "step": 26733 }, { "epoch": 0.917433081674674, "grad_norm": 0.7804424845041232, "learning_rate": 1.7786973860300537e-07, "loss": 0.2921, "step": 26734 }, { "epoch": 0.9174673987645847, "grad_norm": 0.6607238961688008, "learning_rate": 1.777228572382955e-07, "loss": 0.2569, "step": 26735 }, { "epoch": 0.9175017158544956, "grad_norm": 0.8475517961945375, "learning_rate": 1.775760354471967e-07, "loss": 0.2534, "step": 26736 }, { "epoch": 0.9175360329444063, "grad_norm": 0.8421604812493929, "learning_rate": 1.774292732315247e-07, "loss": 0.2552, "step": 26737 }, { "epoch": 0.9175703500343171, "grad_norm": 0.7393228314220079, "learning_rate": 1.7728257059309084e-07, "loss": 0.2432, "step": 26738 }, { "epoch": 0.9176046671242278, "grad_norm": 0.8191701349142211, "learning_rate": 1.771359275337081e-07, "loss": 0.3082, "step": 26739 }, { "epoch": 0.9176389842141386, "grad_norm": 0.7832652389340011, "learning_rate": 1.7698934405518785e-07, "loss": 0.243, "step": 26740 }, { "epoch": 0.9176733013040494, "grad_norm": 0.8986660358695402, "learning_rate": 1.768428201593414e-07, "loss": 0.2386, "step": 26741 }, { "epoch": 0.9177076183939602, "grad_norm": 0.7463586319597992, "learning_rate": 1.766963558479784e-07, "loss": 0.3062, "step": 26742 }, { "epoch": 0.917741935483871, "grad_norm": 0.746914267948132, "learning_rate": 1.7654995112290907e-07, "loss": 0.2524, "step": 26743 }, { "epoch": 0.9177762525737817, "grad_norm": 0.812945194991864, "learning_rate": 1.7640360598594086e-07, "loss": 0.228, "step": 26744 }, { "epoch": 0.9178105696636926, "grad_norm": 0.7663364205075893, "learning_rate": 1.7625732043888232e-07, "loss": 0.2935, "step": 26745 }, { "epoch": 0.9178448867536033, "grad_norm": 0.82184382323117, "learning_rate": 1.761110944835409e-07, "loss": 0.295, "step": 26746 }, { "epoch": 0.9178792038435141, "grad_norm": 0.8114517132400079, "learning_rate": 1.7596492812172295e-07, "loss": 0.2333, "step": 26747 }, { "epoch": 0.9179135209334248, "grad_norm": 0.8245500463440252, "learning_rate": 1.7581882135523422e-07, "loss": 0.2411, "step": 26748 }, { "epoch": 0.9179478380233356, "grad_norm": 0.776655469068639, "learning_rate": 1.7567277418587936e-07, "loss": 0.2744, "step": 26749 }, { "epoch": 0.9179821551132464, "grad_norm": 0.8037448752522168, "learning_rate": 1.7552678661546197e-07, "loss": 0.3114, "step": 26750 }, { "epoch": 0.9180164722031572, "grad_norm": 0.746918819479684, "learning_rate": 1.753808586457878e-07, "loss": 0.2488, "step": 26751 }, { "epoch": 0.9180507892930679, "grad_norm": 0.8682678768427986, "learning_rate": 1.752349902786571e-07, "loss": 0.276, "step": 26752 }, { "epoch": 0.9180851063829787, "grad_norm": 0.8515874742371805, "learning_rate": 1.750891815158734e-07, "loss": 0.2365, "step": 26753 }, { "epoch": 0.9181194234728896, "grad_norm": 0.7419432574235959, "learning_rate": 1.7494343235923693e-07, "loss": 0.2391, "step": 26754 }, { "epoch": 0.9181537405628003, "grad_norm": 0.7646849339145113, "learning_rate": 1.7479774281054962e-07, "loss": 0.2533, "step": 26755 }, { "epoch": 0.918188057652711, "grad_norm": 0.7166225506187754, "learning_rate": 1.7465211287160998e-07, "loss": 0.2336, "step": 26756 }, { "epoch": 0.9182223747426218, "grad_norm": 0.7625828942286612, "learning_rate": 1.745065425442183e-07, "loss": 0.2516, "step": 26757 }, { "epoch": 0.9182566918325326, "grad_norm": 0.7489213022330822, "learning_rate": 1.743610318301725e-07, "loss": 0.2595, "step": 26758 }, { "epoch": 0.9182910089224434, "grad_norm": 0.7806091671856493, "learning_rate": 1.7421558073126844e-07, "loss": 0.235, "step": 26759 }, { "epoch": 0.9183253260123542, "grad_norm": 0.8786196466555288, "learning_rate": 1.740701892493063e-07, "loss": 0.2896, "step": 26760 }, { "epoch": 0.9183596431022649, "grad_norm": 0.849910471767184, "learning_rate": 1.7392485738607968e-07, "loss": 0.2087, "step": 26761 }, { "epoch": 0.9183939601921757, "grad_norm": 0.8053523106692481, "learning_rate": 1.7377958514338488e-07, "loss": 0.3011, "step": 26762 }, { "epoch": 0.9184282772820864, "grad_norm": 0.8390625134553864, "learning_rate": 1.7363437252301662e-07, "loss": 0.2985, "step": 26763 }, { "epoch": 0.9184625943719973, "grad_norm": 0.6516698395105842, "learning_rate": 1.7348921952676844e-07, "loss": 0.2506, "step": 26764 }, { "epoch": 0.918496911461908, "grad_norm": 0.7677562861503541, "learning_rate": 1.733441261564339e-07, "loss": 0.2778, "step": 26765 }, { "epoch": 0.9185312285518188, "grad_norm": 0.8745808052602927, "learning_rate": 1.7319909241380495e-07, "loss": 0.3145, "step": 26766 }, { "epoch": 0.9185655456417295, "grad_norm": 0.9042484921874736, "learning_rate": 1.7305411830067508e-07, "loss": 0.2277, "step": 26767 }, { "epoch": 0.9185998627316404, "grad_norm": 0.8009898266898482, "learning_rate": 1.729092038188318e-07, "loss": 0.2302, "step": 26768 }, { "epoch": 0.9186341798215512, "grad_norm": 0.7279488149659183, "learning_rate": 1.7276434897006865e-07, "loss": 0.2371, "step": 26769 }, { "epoch": 0.9186684969114619, "grad_norm": 0.7670454075469477, "learning_rate": 1.7261955375617478e-07, "loss": 0.3274, "step": 26770 }, { "epoch": 0.9187028140013727, "grad_norm": 0.7604294283153143, "learning_rate": 1.7247481817893652e-07, "loss": 0.3019, "step": 26771 }, { "epoch": 0.9187371310912834, "grad_norm": 0.7848005995828902, "learning_rate": 1.7233014224014466e-07, "loss": 0.2372, "step": 26772 }, { "epoch": 0.9187714481811943, "grad_norm": 1.015298685109818, "learning_rate": 1.7218552594158443e-07, "loss": 0.2284, "step": 26773 }, { "epoch": 0.918805765271105, "grad_norm": 0.8488148343474, "learning_rate": 1.7204096928504445e-07, "loss": 0.2895, "step": 26774 }, { "epoch": 0.9188400823610158, "grad_norm": 0.7923218475210431, "learning_rate": 1.7189647227230877e-07, "loss": 0.2872, "step": 26775 }, { "epoch": 0.9188743994509265, "grad_norm": 0.7619919325191086, "learning_rate": 1.7175203490516324e-07, "loss": 0.2732, "step": 26776 }, { "epoch": 0.9189087165408374, "grad_norm": 0.8638566949792202, "learning_rate": 1.7160765718539196e-07, "loss": 0.293, "step": 26777 }, { "epoch": 0.9189430336307481, "grad_norm": 0.802160449975816, "learning_rate": 1.7146333911477907e-07, "loss": 0.2313, "step": 26778 }, { "epoch": 0.9189773507206589, "grad_norm": 0.8358467752219392, "learning_rate": 1.7131908069510705e-07, "loss": 0.2773, "step": 26779 }, { "epoch": 0.9190116678105696, "grad_norm": 0.7677598238047738, "learning_rate": 1.7117488192815778e-07, "loss": 0.2454, "step": 26780 }, { "epoch": 0.9190459849004804, "grad_norm": 0.8017045521865611, "learning_rate": 1.7103074281571375e-07, "loss": 0.2471, "step": 26781 }, { "epoch": 0.9190803019903913, "grad_norm": 0.9055421496635319, "learning_rate": 1.708866633595535e-07, "loss": 0.3752, "step": 26782 }, { "epoch": 0.919114619080302, "grad_norm": 0.9051780426680062, "learning_rate": 1.707426435614601e-07, "loss": 0.2667, "step": 26783 }, { "epoch": 0.9191489361702128, "grad_norm": 0.8839889441939016, "learning_rate": 1.7059868342320984e-07, "loss": 0.2396, "step": 26784 }, { "epoch": 0.9191832532601235, "grad_norm": 0.8374160246428849, "learning_rate": 1.704547829465819e-07, "loss": 0.2537, "step": 26785 }, { "epoch": 0.9192175703500343, "grad_norm": 0.7817180714300338, "learning_rate": 1.703109421333554e-07, "loss": 0.2784, "step": 26786 }, { "epoch": 0.9192518874399451, "grad_norm": 0.8784664453029664, "learning_rate": 1.701671609853056e-07, "loss": 0.2554, "step": 26787 }, { "epoch": 0.9192862045298559, "grad_norm": 0.7956598026099125, "learning_rate": 1.7002343950420996e-07, "loss": 0.23, "step": 26788 }, { "epoch": 0.9193205216197666, "grad_norm": 0.7472758430281602, "learning_rate": 1.6987977769184317e-07, "loss": 0.24, "step": 26789 }, { "epoch": 0.9193548387096774, "grad_norm": 0.6980721179114447, "learning_rate": 1.6973617554998157e-07, "loss": 0.2275, "step": 26790 }, { "epoch": 0.9193891557995882, "grad_norm": 0.8348354694471682, "learning_rate": 1.6959263308039597e-07, "loss": 0.2502, "step": 26791 }, { "epoch": 0.919423472889499, "grad_norm": 0.7642719211849414, "learning_rate": 1.6944915028486275e-07, "loss": 0.2592, "step": 26792 }, { "epoch": 0.9194577899794097, "grad_norm": 0.7777966645510938, "learning_rate": 1.693057271651538e-07, "loss": 0.2082, "step": 26793 }, { "epoch": 0.9194921070693205, "grad_norm": 0.8167266074048017, "learning_rate": 1.691623637230394e-07, "loss": 0.2197, "step": 26794 }, { "epoch": 0.9195264241592312, "grad_norm": 0.8527114046961024, "learning_rate": 1.690190599602931e-07, "loss": 0.2711, "step": 26795 }, { "epoch": 0.9195607412491421, "grad_norm": 0.8079719621840527, "learning_rate": 1.6887581587868297e-07, "loss": 0.2441, "step": 26796 }, { "epoch": 0.9195950583390529, "grad_norm": 0.8444621160357821, "learning_rate": 1.6873263147998032e-07, "loss": 0.2906, "step": 26797 }, { "epoch": 0.9196293754289636, "grad_norm": 0.7143163338213981, "learning_rate": 1.6858950676595266e-07, "loss": 0.2242, "step": 26798 }, { "epoch": 0.9196636925188744, "grad_norm": 0.8091924327039618, "learning_rate": 1.684464417383691e-07, "loss": 0.2785, "step": 26799 }, { "epoch": 0.9196980096087852, "grad_norm": 0.8158650272421626, "learning_rate": 1.683034363989966e-07, "loss": 0.3035, "step": 26800 }, { "epoch": 0.919732326698696, "grad_norm": 0.7465406118133222, "learning_rate": 1.681604907496015e-07, "loss": 0.2786, "step": 26801 }, { "epoch": 0.9197666437886067, "grad_norm": 0.7422744066663247, "learning_rate": 1.6801760479195128e-07, "loss": 0.3201, "step": 26802 }, { "epoch": 0.9198009608785175, "grad_norm": 0.8972041062251072, "learning_rate": 1.6787477852780897e-07, "loss": 0.2664, "step": 26803 }, { "epoch": 0.9198352779684282, "grad_norm": 0.7677322126772443, "learning_rate": 1.6773201195894095e-07, "loss": 0.2382, "step": 26804 }, { "epoch": 0.9198695950583391, "grad_norm": 0.731815942586678, "learning_rate": 1.6758930508710858e-07, "loss": 0.2146, "step": 26805 }, { "epoch": 0.9199039121482498, "grad_norm": 0.7127128091516597, "learning_rate": 1.6744665791407765e-07, "loss": 0.2476, "step": 26806 }, { "epoch": 0.9199382292381606, "grad_norm": 0.693612683280327, "learning_rate": 1.67304070441609e-07, "loss": 0.2507, "step": 26807 }, { "epoch": 0.9199725463280713, "grad_norm": 0.9219004662713823, "learning_rate": 1.671615426714629e-07, "loss": 0.2605, "step": 26808 }, { "epoch": 0.9200068634179821, "grad_norm": 0.7291086351248518, "learning_rate": 1.670190746054029e-07, "loss": 0.2476, "step": 26809 }, { "epoch": 0.920041180507893, "grad_norm": 0.8292505237322263, "learning_rate": 1.6687666624518705e-07, "loss": 0.2243, "step": 26810 }, { "epoch": 0.9200754975978037, "grad_norm": 0.7355285544753828, "learning_rate": 1.6673431759257507e-07, "loss": 0.1879, "step": 26811 }, { "epoch": 0.9201098146877145, "grad_norm": 0.7589286795328753, "learning_rate": 1.66592028649325e-07, "loss": 0.2502, "step": 26812 }, { "epoch": 0.9201441317776252, "grad_norm": 0.7737076337937967, "learning_rate": 1.6644979941719596e-07, "loss": 0.2979, "step": 26813 }, { "epoch": 0.9201784488675361, "grad_norm": 0.771923562012887, "learning_rate": 1.663076298979438e-07, "loss": 0.2571, "step": 26814 }, { "epoch": 0.9202127659574468, "grad_norm": 0.8004852509841187, "learning_rate": 1.6616552009332541e-07, "loss": 0.3155, "step": 26815 }, { "epoch": 0.9202470830473576, "grad_norm": 0.7590679496847121, "learning_rate": 1.6602347000509722e-07, "loss": 0.2588, "step": 26816 }, { "epoch": 0.9202814001372683, "grad_norm": 0.6635949880268546, "learning_rate": 1.6588147963501167e-07, "loss": 0.2443, "step": 26817 }, { "epoch": 0.9203157172271791, "grad_norm": 0.8220484020277244, "learning_rate": 1.657395489848257e-07, "loss": 0.2895, "step": 26818 }, { "epoch": 0.92035003431709, "grad_norm": 0.6831114029090312, "learning_rate": 1.6559767805629068e-07, "loss": 0.2379, "step": 26819 }, { "epoch": 0.9203843514070007, "grad_norm": 0.8520669092082619, "learning_rate": 1.654558668511602e-07, "loss": 0.2963, "step": 26820 }, { "epoch": 0.9204186684969115, "grad_norm": 0.7983805234990273, "learning_rate": 1.6531411537118568e-07, "loss": 0.2287, "step": 26821 }, { "epoch": 0.9204529855868222, "grad_norm": 0.7313248666935878, "learning_rate": 1.6517242361811903e-07, "loss": 0.2963, "step": 26822 }, { "epoch": 0.9204873026767331, "grad_norm": 0.7244548276287162, "learning_rate": 1.6503079159370993e-07, "loss": 0.2515, "step": 26823 }, { "epoch": 0.9205216197666438, "grad_norm": 0.7995920396486432, "learning_rate": 1.6488921929970814e-07, "loss": 0.25, "step": 26824 }, { "epoch": 0.9205559368565546, "grad_norm": 0.7917969820850164, "learning_rate": 1.6474770673786388e-07, "loss": 0.2457, "step": 26825 }, { "epoch": 0.9205902539464653, "grad_norm": 0.8304426082133674, "learning_rate": 1.6460625390992303e-07, "loss": 0.2467, "step": 26826 }, { "epoch": 0.9206245710363761, "grad_norm": 0.7830329850630604, "learning_rate": 1.6446486081763525e-07, "loss": 0.2251, "step": 26827 }, { "epoch": 0.9206588881262869, "grad_norm": 0.7746021773204191, "learning_rate": 1.6432352746274637e-07, "loss": 0.2735, "step": 26828 }, { "epoch": 0.9206932052161977, "grad_norm": 0.8006048117514841, "learning_rate": 1.6418225384700227e-07, "loss": 0.2977, "step": 26829 }, { "epoch": 0.9207275223061084, "grad_norm": 0.7464113115393586, "learning_rate": 1.6404103997214815e-07, "loss": 0.3047, "step": 26830 }, { "epoch": 0.9207618393960192, "grad_norm": 0.7949610859877949, "learning_rate": 1.6389988583992876e-07, "loss": 0.2781, "step": 26831 }, { "epoch": 0.9207961564859299, "grad_norm": 0.7539396031159967, "learning_rate": 1.6375879145208828e-07, "loss": 0.2869, "step": 26832 }, { "epoch": 0.9208304735758408, "grad_norm": 0.7186032108220778, "learning_rate": 1.6361775681036917e-07, "loss": 0.2513, "step": 26833 }, { "epoch": 0.9208647906657516, "grad_norm": 0.6835212029458975, "learning_rate": 1.6347678191651506e-07, "loss": 0.253, "step": 26834 }, { "epoch": 0.9208991077556623, "grad_norm": 0.6899093884205565, "learning_rate": 1.6333586677226455e-07, "loss": 0.231, "step": 26835 }, { "epoch": 0.9209334248455731, "grad_norm": 0.9385616034598586, "learning_rate": 1.6319501137936177e-07, "loss": 0.2646, "step": 26836 }, { "epoch": 0.9209677419354839, "grad_norm": 0.7903673981952646, "learning_rate": 1.6305421573954482e-07, "loss": 0.2532, "step": 26837 }, { "epoch": 0.9210020590253947, "grad_norm": 0.7621373297934406, "learning_rate": 1.6291347985455398e-07, "loss": 0.2393, "step": 26838 }, { "epoch": 0.9210363761153054, "grad_norm": 0.790533514728636, "learning_rate": 1.6277280372612836e-07, "loss": 0.2789, "step": 26839 }, { "epoch": 0.9210706932052162, "grad_norm": 0.781336585523349, "learning_rate": 1.6263218735600383e-07, "loss": 0.2328, "step": 26840 }, { "epoch": 0.9211050102951269, "grad_norm": 0.7741030158486849, "learning_rate": 1.6249163074591956e-07, "loss": 0.2404, "step": 26841 }, { "epoch": 0.9211393273850378, "grad_norm": 0.8248681021104022, "learning_rate": 1.6235113389761082e-07, "loss": 0.2744, "step": 26842 }, { "epoch": 0.9211736444749485, "grad_norm": 0.7965616904460809, "learning_rate": 1.6221069681281399e-07, "loss": 0.2744, "step": 26843 }, { "epoch": 0.9212079615648593, "grad_norm": 0.7359165702836342, "learning_rate": 1.6207031949326324e-07, "loss": 0.3199, "step": 26844 }, { "epoch": 0.92124227865477, "grad_norm": 0.7967882542684316, "learning_rate": 1.6193000194069385e-07, "loss": 0.2797, "step": 26845 }, { "epoch": 0.9212765957446809, "grad_norm": 0.8465109335804174, "learning_rate": 1.617897441568378e-07, "loss": 0.2971, "step": 26846 }, { "epoch": 0.9213109128345917, "grad_norm": 0.8617696110081012, "learning_rate": 1.6164954614342922e-07, "loss": 0.2824, "step": 26847 }, { "epoch": 0.9213452299245024, "grad_norm": 0.809204238085165, "learning_rate": 1.6150940790220003e-07, "loss": 0.2969, "step": 26848 }, { "epoch": 0.9213795470144132, "grad_norm": 0.7700924746956743, "learning_rate": 1.6136932943488005e-07, "loss": 0.2752, "step": 26849 }, { "epoch": 0.9214138641043239, "grad_norm": 0.7102745360011536, "learning_rate": 1.6122931074320114e-07, "loss": 0.2483, "step": 26850 }, { "epoch": 0.9214481811942348, "grad_norm": 0.6781660378175248, "learning_rate": 1.6108935182889252e-07, "loss": 0.2414, "step": 26851 }, { "epoch": 0.9214824982841455, "grad_norm": 0.6810729783633609, "learning_rate": 1.6094945269368279e-07, "loss": 0.2103, "step": 26852 }, { "epoch": 0.9215168153740563, "grad_norm": 0.8158459999397362, "learning_rate": 1.6080961333930223e-07, "loss": 0.2428, "step": 26853 }, { "epoch": 0.921551132463967, "grad_norm": 0.6571572863755589, "learning_rate": 1.606698337674756e-07, "loss": 0.2492, "step": 26854 }, { "epoch": 0.9215854495538778, "grad_norm": 0.7281702550158692, "learning_rate": 1.6053011397993145e-07, "loss": 0.2365, "step": 26855 }, { "epoch": 0.9216197666437886, "grad_norm": 0.7539527518063288, "learning_rate": 1.603904539783957e-07, "loss": 0.28, "step": 26856 }, { "epoch": 0.9216540837336994, "grad_norm": 0.7536609289884709, "learning_rate": 1.602508537645936e-07, "loss": 0.266, "step": 26857 }, { "epoch": 0.9216884008236101, "grad_norm": 0.7369032350087603, "learning_rate": 1.6011131334024876e-07, "loss": 0.269, "step": 26858 }, { "epoch": 0.9217227179135209, "grad_norm": 0.8087861029679508, "learning_rate": 1.599718327070865e-07, "loss": 0.2502, "step": 26859 }, { "epoch": 0.9217570350034318, "grad_norm": 0.8175898167288064, "learning_rate": 1.5983241186682986e-07, "loss": 0.2601, "step": 26860 }, { "epoch": 0.9217913520933425, "grad_norm": 0.696396982558083, "learning_rate": 1.5969305082119968e-07, "loss": 0.2262, "step": 26861 }, { "epoch": 0.9218256691832533, "grad_norm": 0.7550482978245938, "learning_rate": 1.5955374957191904e-07, "loss": 0.2385, "step": 26862 }, { "epoch": 0.921859986273164, "grad_norm": 0.8171622302602841, "learning_rate": 1.594145081207088e-07, "loss": 0.2112, "step": 26863 }, { "epoch": 0.9218943033630748, "grad_norm": 0.8044510232516675, "learning_rate": 1.5927532646928812e-07, "loss": 0.2486, "step": 26864 }, { "epoch": 0.9219286204529856, "grad_norm": 0.7938690000562522, "learning_rate": 1.5913620461937728e-07, "loss": 0.2752, "step": 26865 }, { "epoch": 0.9219629375428964, "grad_norm": 0.9369835576623333, "learning_rate": 1.589971425726944e-07, "loss": 0.2498, "step": 26866 }, { "epoch": 0.9219972546328071, "grad_norm": 0.7890976279869935, "learning_rate": 1.5885814033095804e-07, "loss": 0.2732, "step": 26867 }, { "epoch": 0.9220315717227179, "grad_norm": 0.7287916206312758, "learning_rate": 1.5871919789588518e-07, "loss": 0.2258, "step": 26868 }, { "epoch": 0.9220658888126287, "grad_norm": 0.7755904813461262, "learning_rate": 1.5858031526919227e-07, "loss": 0.2696, "step": 26869 }, { "epoch": 0.9221002059025395, "grad_norm": 0.7554180724158772, "learning_rate": 1.5844149245259455e-07, "loss": 0.2967, "step": 26870 }, { "epoch": 0.9221345229924502, "grad_norm": 0.8021083673509235, "learning_rate": 1.5830272944780843e-07, "loss": 0.1996, "step": 26871 }, { "epoch": 0.922168840082361, "grad_norm": 0.7159132582357398, "learning_rate": 1.5816402625654648e-07, "loss": 0.2307, "step": 26872 }, { "epoch": 0.9222031571722717, "grad_norm": 0.7167360782250386, "learning_rate": 1.5802538288052338e-07, "loss": 0.2718, "step": 26873 }, { "epoch": 0.9222374742621826, "grad_norm": 0.7777473663927449, "learning_rate": 1.5788679932145112e-07, "loss": 0.2272, "step": 26874 }, { "epoch": 0.9222717913520934, "grad_norm": 0.8661123742259873, "learning_rate": 1.5774827558104166e-07, "loss": 0.2394, "step": 26875 }, { "epoch": 0.9223061084420041, "grad_norm": 0.7969504142524979, "learning_rate": 1.5760981166100807e-07, "loss": 0.2736, "step": 26876 }, { "epoch": 0.9223404255319149, "grad_norm": 0.7983111478744164, "learning_rate": 1.5747140756305844e-07, "loss": 0.2801, "step": 26877 }, { "epoch": 0.9223747426218256, "grad_norm": 0.9341354753077289, "learning_rate": 1.5733306328890418e-07, "loss": 0.2512, "step": 26878 }, { "epoch": 0.9224090597117365, "grad_norm": 0.7332679728672964, "learning_rate": 1.5719477884025392e-07, "loss": 0.2478, "step": 26879 }, { "epoch": 0.9224433768016472, "grad_norm": 0.8997735174520947, "learning_rate": 1.5705655421881571e-07, "loss": 0.3206, "step": 26880 }, { "epoch": 0.922477693891558, "grad_norm": 0.7153683782255509, "learning_rate": 1.5691838942629766e-07, "loss": 0.2233, "step": 26881 }, { "epoch": 0.9225120109814687, "grad_norm": 0.8750348010583056, "learning_rate": 1.567802844644062e-07, "loss": 0.2651, "step": 26882 }, { "epoch": 0.9225463280713796, "grad_norm": 0.6690484803204576, "learning_rate": 1.566422393348488e-07, "loss": 0.2066, "step": 26883 }, { "epoch": 0.9225806451612903, "grad_norm": 0.7110680729839608, "learning_rate": 1.56504254039328e-07, "loss": 0.2236, "step": 26884 }, { "epoch": 0.9226149622512011, "grad_norm": 0.6869515153700905, "learning_rate": 1.5636632857955136e-07, "loss": 0.2255, "step": 26885 }, { "epoch": 0.9226492793411118, "grad_norm": 0.7904809124470477, "learning_rate": 1.562284629572214e-07, "loss": 0.2681, "step": 26886 }, { "epoch": 0.9226835964310226, "grad_norm": 0.818966522816983, "learning_rate": 1.5609065717404115e-07, "loss": 0.2879, "step": 26887 }, { "epoch": 0.9227179135209335, "grad_norm": 0.7502888927384771, "learning_rate": 1.5595291123171374e-07, "loss": 0.2477, "step": 26888 }, { "epoch": 0.9227522306108442, "grad_norm": 0.7769003921203718, "learning_rate": 1.5581522513194004e-07, "loss": 0.2424, "step": 26889 }, { "epoch": 0.922786547700755, "grad_norm": 0.8354672757032223, "learning_rate": 1.5567759887642197e-07, "loss": 0.2909, "step": 26890 }, { "epoch": 0.9228208647906657, "grad_norm": 0.7901535852223253, "learning_rate": 1.555400324668588e-07, "loss": 0.2967, "step": 26891 }, { "epoch": 0.9228551818805766, "grad_norm": 0.7711317248147104, "learning_rate": 1.5540252590495132e-07, "loss": 0.3112, "step": 26892 }, { "epoch": 0.9228894989704873, "grad_norm": 0.6940219515196208, "learning_rate": 1.5526507919239597e-07, "loss": 0.3012, "step": 26893 }, { "epoch": 0.9229238160603981, "grad_norm": 0.8885909886617319, "learning_rate": 1.5512769233089364e-07, "loss": 0.243, "step": 26894 }, { "epoch": 0.9229581331503088, "grad_norm": 0.8294542658487952, "learning_rate": 1.5499036532213906e-07, "loss": 0.2693, "step": 26895 }, { "epoch": 0.9229924502402196, "grad_norm": 0.7933118229250925, "learning_rate": 1.5485309816782979e-07, "loss": 0.2511, "step": 26896 }, { "epoch": 0.9230267673301304, "grad_norm": 0.8105935131008466, "learning_rate": 1.5471589086966222e-07, "loss": 0.2391, "step": 26897 }, { "epoch": 0.9230610844200412, "grad_norm": 0.7107388349696426, "learning_rate": 1.5457874342933e-07, "loss": 0.2358, "step": 26898 }, { "epoch": 0.923095401509952, "grad_norm": 0.6823231212124887, "learning_rate": 1.5444165584852899e-07, "loss": 0.2624, "step": 26899 }, { "epoch": 0.9231297185998627, "grad_norm": 0.8169116500530872, "learning_rate": 1.543046281289512e-07, "loss": 0.2856, "step": 26900 }, { "epoch": 0.9231640356897735, "grad_norm": 0.7866587790347905, "learning_rate": 1.541676602722908e-07, "loss": 0.2369, "step": 26901 }, { "epoch": 0.9231983527796843, "grad_norm": 0.7145080299939107, "learning_rate": 1.5403075228023867e-07, "loss": 0.2305, "step": 26902 }, { "epoch": 0.9232326698695951, "grad_norm": 0.7620016846108486, "learning_rate": 1.5389390415448736e-07, "loss": 0.2553, "step": 26903 }, { "epoch": 0.9232669869595058, "grad_norm": 0.8377762661857293, "learning_rate": 1.5375711589672658e-07, "loss": 0.2699, "step": 26904 }, { "epoch": 0.9233013040494166, "grad_norm": 0.8141360433733597, "learning_rate": 1.5362038750864617e-07, "loss": 0.2416, "step": 26905 }, { "epoch": 0.9233356211393274, "grad_norm": 0.8753104468774796, "learning_rate": 1.5348371899193637e-07, "loss": 0.2756, "step": 26906 }, { "epoch": 0.9233699382292382, "grad_norm": 0.674012550926334, "learning_rate": 1.5334711034828365e-07, "loss": 0.2442, "step": 26907 }, { "epoch": 0.9234042553191489, "grad_norm": 0.8234211048086431, "learning_rate": 1.5321056157937774e-07, "loss": 0.2489, "step": 26908 }, { "epoch": 0.9234385724090597, "grad_norm": 0.84911489832006, "learning_rate": 1.5307407268690455e-07, "loss": 0.2405, "step": 26909 }, { "epoch": 0.9234728894989704, "grad_norm": 0.9649176224775717, "learning_rate": 1.5293764367254937e-07, "loss": 0.2926, "step": 26910 }, { "epoch": 0.9235072065888813, "grad_norm": 0.7612492299304611, "learning_rate": 1.528012745379992e-07, "loss": 0.2495, "step": 26911 }, { "epoch": 0.923541523678792, "grad_norm": 0.7502528787284701, "learning_rate": 1.5266496528493712e-07, "loss": 0.2744, "step": 26912 }, { "epoch": 0.9235758407687028, "grad_norm": 0.7977994391006455, "learning_rate": 1.5252871591504848e-07, "loss": 0.2545, "step": 26913 }, { "epoch": 0.9236101578586136, "grad_norm": 0.6996064654920662, "learning_rate": 1.5239252643001635e-07, "loss": 0.2244, "step": 26914 }, { "epoch": 0.9236444749485244, "grad_norm": 0.8247037245729775, "learning_rate": 1.522563968315227e-07, "loss": 0.2968, "step": 26915 }, { "epoch": 0.9236787920384352, "grad_norm": 0.795045330376089, "learning_rate": 1.5212032712124846e-07, "loss": 0.2768, "step": 26916 }, { "epoch": 0.9237131091283459, "grad_norm": 0.6820186158225491, "learning_rate": 1.519843173008767e-07, "loss": 0.2394, "step": 26917 }, { "epoch": 0.9237474262182567, "grad_norm": 0.8402528400132006, "learning_rate": 1.5184836737208607e-07, "loss": 0.2396, "step": 26918 }, { "epoch": 0.9237817433081674, "grad_norm": 0.8357729495476077, "learning_rate": 1.5171247733655527e-07, "loss": 0.2949, "step": 26919 }, { "epoch": 0.9238160603980783, "grad_norm": 0.8349728051622266, "learning_rate": 1.5157664719596565e-07, "loss": 0.2379, "step": 26920 }, { "epoch": 0.923850377487989, "grad_norm": 0.7762192491372993, "learning_rate": 1.514408769519937e-07, "loss": 0.285, "step": 26921 }, { "epoch": 0.9238846945778998, "grad_norm": 0.7937618461915208, "learning_rate": 1.5130516660631644e-07, "loss": 0.244, "step": 26922 }, { "epoch": 0.9239190116678105, "grad_norm": 0.8701781188963201, "learning_rate": 1.5116951616061083e-07, "loss": 0.2189, "step": 26923 }, { "epoch": 0.9239533287577213, "grad_norm": 0.7211748748430515, "learning_rate": 1.5103392561655273e-07, "loss": 0.2332, "step": 26924 }, { "epoch": 0.9239876458476322, "grad_norm": 0.7906946453351629, "learning_rate": 1.5089839497581748e-07, "loss": 0.2062, "step": 26925 }, { "epoch": 0.9240219629375429, "grad_norm": 0.7528982172694574, "learning_rate": 1.5076292424007876e-07, "loss": 0.2277, "step": 26926 }, { "epoch": 0.9240562800274537, "grad_norm": 0.805018316471116, "learning_rate": 1.5062751341101077e-07, "loss": 0.2564, "step": 26927 }, { "epoch": 0.9240905971173644, "grad_norm": 0.8085502703017768, "learning_rate": 1.5049216249028498e-07, "loss": 0.2549, "step": 26928 }, { "epoch": 0.9241249142072753, "grad_norm": 0.812213308472576, "learning_rate": 1.5035687147957611e-07, "loss": 0.3029, "step": 26929 }, { "epoch": 0.924159231297186, "grad_norm": 0.8480554614253284, "learning_rate": 1.5022164038055231e-07, "loss": 0.2596, "step": 26930 }, { "epoch": 0.9241935483870968, "grad_norm": 0.8135432231732054, "learning_rate": 1.5008646919488724e-07, "loss": 0.2712, "step": 26931 }, { "epoch": 0.9242278654770075, "grad_norm": 0.7362712570801858, "learning_rate": 1.4995135792424896e-07, "loss": 0.2157, "step": 26932 }, { "epoch": 0.9242621825669183, "grad_norm": 0.6126205024178518, "learning_rate": 1.4981630657030678e-07, "loss": 0.2207, "step": 26933 }, { "epoch": 0.9242964996568291, "grad_norm": 0.7181592392135286, "learning_rate": 1.4968131513472928e-07, "loss": 0.271, "step": 26934 }, { "epoch": 0.9243308167467399, "grad_norm": 1.037770887914901, "learning_rate": 1.495463836191846e-07, "loss": 0.2588, "step": 26935 }, { "epoch": 0.9243651338366506, "grad_norm": 0.8728982618512006, "learning_rate": 1.4941151202533866e-07, "loss": 0.3521, "step": 26936 }, { "epoch": 0.9243994509265614, "grad_norm": 0.7024858873621584, "learning_rate": 1.4927670035485896e-07, "loss": 0.2063, "step": 26937 }, { "epoch": 0.9244337680164723, "grad_norm": 0.9201859461711692, "learning_rate": 1.4914194860941034e-07, "loss": 0.2087, "step": 26938 }, { "epoch": 0.924468085106383, "grad_norm": 0.7758788487241488, "learning_rate": 1.4900725679065643e-07, "loss": 0.2768, "step": 26939 }, { "epoch": 0.9245024021962938, "grad_norm": 0.8275549819144036, "learning_rate": 1.4887262490026312e-07, "loss": 0.2804, "step": 26940 }, { "epoch": 0.9245367192862045, "grad_norm": 0.755480347795765, "learning_rate": 1.487380529398924e-07, "loss": 0.2547, "step": 26941 }, { "epoch": 0.9245710363761153, "grad_norm": 0.8197279834035552, "learning_rate": 1.4860354091120633e-07, "loss": 0.2509, "step": 26942 }, { "epoch": 0.9246053534660261, "grad_norm": 0.7253880933533228, "learning_rate": 1.4846908881586852e-07, "loss": 0.3034, "step": 26943 }, { "epoch": 0.9246396705559369, "grad_norm": 0.7695358973877847, "learning_rate": 1.4833469665553825e-07, "loss": 0.2435, "step": 26944 }, { "epoch": 0.9246739876458476, "grad_norm": 0.8324890900439093, "learning_rate": 1.482003644318758e-07, "loss": 0.2713, "step": 26945 }, { "epoch": 0.9247083047357584, "grad_norm": 0.88671627280954, "learning_rate": 1.4806609214654156e-07, "loss": 0.2457, "step": 26946 }, { "epoch": 0.9247426218256691, "grad_norm": 0.9031580704554499, "learning_rate": 1.4793187980119416e-07, "loss": 0.314, "step": 26947 }, { "epoch": 0.92477693891558, "grad_norm": 0.7157976173580345, "learning_rate": 1.477977273974912e-07, "loss": 0.2162, "step": 26948 }, { "epoch": 0.9248112560054907, "grad_norm": 0.836930914503519, "learning_rate": 1.4766363493709025e-07, "loss": 0.2881, "step": 26949 }, { "epoch": 0.9248455730954015, "grad_norm": 0.806560223391154, "learning_rate": 1.4752960242164827e-07, "loss": 0.2742, "step": 26950 }, { "epoch": 0.9248798901853122, "grad_norm": 0.7375629424044636, "learning_rate": 1.4739562985281952e-07, "loss": 0.301, "step": 26951 }, { "epoch": 0.9249142072752231, "grad_norm": 0.7346422306408763, "learning_rate": 1.4726171723226156e-07, "loss": 0.25, "step": 26952 }, { "epoch": 0.9249485243651339, "grad_norm": 0.7590952331237821, "learning_rate": 1.4712786456162642e-07, "loss": 0.2945, "step": 26953 }, { "epoch": 0.9249828414550446, "grad_norm": 0.7603044633478944, "learning_rate": 1.4699407184256886e-07, "loss": 0.2763, "step": 26954 }, { "epoch": 0.9250171585449554, "grad_norm": 0.7524888119649455, "learning_rate": 1.4686033907674147e-07, "loss": 0.228, "step": 26955 }, { "epoch": 0.9250514756348661, "grad_norm": 0.8429957206211215, "learning_rate": 1.4672666626579623e-07, "loss": 0.2956, "step": 26956 }, { "epoch": 0.925085792724777, "grad_norm": 0.9194629904383811, "learning_rate": 1.4659305341138463e-07, "loss": 0.2851, "step": 26957 }, { "epoch": 0.9251201098146877, "grad_norm": 0.7840937973148295, "learning_rate": 1.4645950051515757e-07, "loss": 0.2672, "step": 26958 }, { "epoch": 0.9251544269045985, "grad_norm": 0.9007553407250233, "learning_rate": 1.463260075787648e-07, "loss": 0.3096, "step": 26959 }, { "epoch": 0.9251887439945092, "grad_norm": 0.8614046622949695, "learning_rate": 1.4619257460385506e-07, "loss": 0.2997, "step": 26960 }, { "epoch": 0.9252230610844201, "grad_norm": 0.9673891214292539, "learning_rate": 1.4605920159207752e-07, "loss": 0.3402, "step": 26961 }, { "epoch": 0.9252573781743308, "grad_norm": 1.1076521994969488, "learning_rate": 1.4592588854507817e-07, "loss": 0.2761, "step": 26962 }, { "epoch": 0.9252916952642416, "grad_norm": 0.8168129113085527, "learning_rate": 1.4579263546450616e-07, "loss": 0.2456, "step": 26963 }, { "epoch": 0.9253260123541523, "grad_norm": 0.9010649626568985, "learning_rate": 1.4565944235200692e-07, "loss": 0.2848, "step": 26964 }, { "epoch": 0.9253603294440631, "grad_norm": 0.727512036618782, "learning_rate": 1.4552630920922517e-07, "loss": 0.2545, "step": 26965 }, { "epoch": 0.925394646533974, "grad_norm": 0.8039392477133466, "learning_rate": 1.453932360378063e-07, "loss": 0.2649, "step": 26966 }, { "epoch": 0.9254289636238847, "grad_norm": 0.9654017651207745, "learning_rate": 1.45260222839394e-07, "loss": 0.3001, "step": 26967 }, { "epoch": 0.9254632807137955, "grad_norm": 0.8128064611777561, "learning_rate": 1.4512726961563196e-07, "loss": 0.3084, "step": 26968 }, { "epoch": 0.9254975978037062, "grad_norm": 0.7399436997007682, "learning_rate": 1.449943763681616e-07, "loss": 0.2517, "step": 26969 }, { "epoch": 0.925531914893617, "grad_norm": 0.7819877417587735, "learning_rate": 1.4486154309862555e-07, "loss": 0.2978, "step": 26970 }, { "epoch": 0.9255662319835278, "grad_norm": 0.774937107223164, "learning_rate": 1.447287698086647e-07, "loss": 0.2634, "step": 26971 }, { "epoch": 0.9256005490734386, "grad_norm": 0.7457990624485874, "learning_rate": 1.4459605649991937e-07, "loss": 0.2076, "step": 26972 }, { "epoch": 0.9256348661633493, "grad_norm": 0.754004528518055, "learning_rate": 1.4446340317402884e-07, "loss": 0.2713, "step": 26973 }, { "epoch": 0.9256691832532601, "grad_norm": 0.8397346102667478, "learning_rate": 1.4433080983263182e-07, "loss": 0.2616, "step": 26974 }, { "epoch": 0.925703500343171, "grad_norm": 0.7731594447463322, "learning_rate": 1.4419827647736694e-07, "loss": 0.2969, "step": 26975 }, { "epoch": 0.9257378174330817, "grad_norm": 0.8569637687292206, "learning_rate": 1.4406580310987017e-07, "loss": 0.2648, "step": 26976 }, { "epoch": 0.9257721345229925, "grad_norm": 0.8808886436690063, "learning_rate": 1.4393338973177961e-07, "loss": 0.2144, "step": 26977 }, { "epoch": 0.9258064516129032, "grad_norm": 0.7095876300024536, "learning_rate": 1.4380103634473008e-07, "loss": 0.203, "step": 26978 }, { "epoch": 0.925840768702814, "grad_norm": 0.7522855974920082, "learning_rate": 1.4366874295035694e-07, "loss": 0.261, "step": 26979 }, { "epoch": 0.9258750857927248, "grad_norm": 0.7262270302394737, "learning_rate": 1.435365095502944e-07, "loss": 0.2334, "step": 26980 }, { "epoch": 0.9259094028826356, "grad_norm": 0.7729620595988154, "learning_rate": 1.4340433614617677e-07, "loss": 0.2741, "step": 26981 }, { "epoch": 0.9259437199725463, "grad_norm": 0.7654500414117229, "learning_rate": 1.4327222273963658e-07, "loss": 0.277, "step": 26982 }, { "epoch": 0.9259780370624571, "grad_norm": 0.7839280061423229, "learning_rate": 1.4314016933230423e-07, "loss": 0.2775, "step": 26983 }, { "epoch": 0.9260123541523679, "grad_norm": 0.7629552877214826, "learning_rate": 1.4300817592581395e-07, "loss": 0.2437, "step": 26984 }, { "epoch": 0.9260466712422787, "grad_norm": 0.7270297863763799, "learning_rate": 1.4287624252179444e-07, "loss": 0.2063, "step": 26985 }, { "epoch": 0.9260809883321894, "grad_norm": 0.7726512263416528, "learning_rate": 1.427443691218755e-07, "loss": 0.2859, "step": 26986 }, { "epoch": 0.9261153054221002, "grad_norm": 0.7989906217875549, "learning_rate": 1.4261255572768805e-07, "loss": 0.2713, "step": 26987 }, { "epoch": 0.9261496225120109, "grad_norm": 0.8328060299295897, "learning_rate": 1.424808023408586e-07, "loss": 0.2707, "step": 26988 }, { "epoch": 0.9261839396019218, "grad_norm": 0.7480394044806766, "learning_rate": 1.4234910896301579e-07, "loss": 0.3037, "step": 26989 }, { "epoch": 0.9262182566918326, "grad_norm": 0.7935397072912848, "learning_rate": 1.4221747559578557e-07, "loss": 0.2607, "step": 26990 }, { "epoch": 0.9262525737817433, "grad_norm": 0.802093395529938, "learning_rate": 1.4208590224079554e-07, "loss": 0.2385, "step": 26991 }, { "epoch": 0.9262868908716541, "grad_norm": 0.8371910021216852, "learning_rate": 1.4195438889966994e-07, "loss": 0.2116, "step": 26992 }, { "epoch": 0.9263212079615648, "grad_norm": 0.7761721429750604, "learning_rate": 1.418229355740347e-07, "loss": 0.2903, "step": 26993 }, { "epoch": 0.9263555250514757, "grad_norm": 0.7984361520671083, "learning_rate": 1.416915422655124e-07, "loss": 0.2097, "step": 26994 }, { "epoch": 0.9263898421413864, "grad_norm": 0.7977680700898128, "learning_rate": 1.4156020897572676e-07, "loss": 0.2107, "step": 26995 }, { "epoch": 0.9264241592312972, "grad_norm": 0.7607847162630306, "learning_rate": 1.414289357063009e-07, "loss": 0.2401, "step": 26996 }, { "epoch": 0.9264584763212079, "grad_norm": 0.9246694567815933, "learning_rate": 1.4129772245885521e-07, "loss": 0.2126, "step": 26997 }, { "epoch": 0.9264927934111188, "grad_norm": 0.8623056947597594, "learning_rate": 1.4116656923501226e-07, "loss": 0.3122, "step": 26998 }, { "epoch": 0.9265271105010295, "grad_norm": 0.7599557663362517, "learning_rate": 1.4103547603639134e-07, "loss": 0.2772, "step": 26999 }, { "epoch": 0.9265614275909403, "grad_norm": 0.7149506981882214, "learning_rate": 1.4090444286461168e-07, "loss": 0.2402, "step": 27000 }, { "epoch": 0.926595744680851, "grad_norm": 0.7451949783844813, "learning_rate": 1.4077346972129257e-07, "loss": 0.2563, "step": 27001 }, { "epoch": 0.9266300617707618, "grad_norm": 0.8858812901332389, "learning_rate": 1.4064255660805216e-07, "loss": 0.2745, "step": 27002 }, { "epoch": 0.9266643788606727, "grad_norm": 0.7165209923960382, "learning_rate": 1.4051170352650744e-07, "loss": 0.2383, "step": 27003 }, { "epoch": 0.9266986959505834, "grad_norm": 0.706874713830468, "learning_rate": 1.4038091047827497e-07, "loss": 0.216, "step": 27004 }, { "epoch": 0.9267330130404942, "grad_norm": 0.7086552457079385, "learning_rate": 1.4025017746497115e-07, "loss": 0.2238, "step": 27005 }, { "epoch": 0.9267673301304049, "grad_norm": 0.8651011300929005, "learning_rate": 1.401195044882092e-07, "loss": 0.2225, "step": 27006 }, { "epoch": 0.9268016472203158, "grad_norm": 0.8405740564740211, "learning_rate": 1.399888915496056e-07, "loss": 0.2828, "step": 27007 }, { "epoch": 0.9268359643102265, "grad_norm": 0.7333795353358256, "learning_rate": 1.3985833865077291e-07, "loss": 0.2598, "step": 27008 }, { "epoch": 0.9268702814001373, "grad_norm": 0.7534537776741671, "learning_rate": 1.3972784579332377e-07, "loss": 0.2562, "step": 27009 }, { "epoch": 0.926904598490048, "grad_norm": 0.812311499902473, "learning_rate": 1.395974129788713e-07, "loss": 0.2657, "step": 27010 }, { "epoch": 0.9269389155799588, "grad_norm": 0.7652959015668587, "learning_rate": 1.3946704020902534e-07, "loss": 0.2955, "step": 27011 }, { "epoch": 0.9269732326698696, "grad_norm": 0.8002499816518339, "learning_rate": 1.393367274853974e-07, "loss": 0.2407, "step": 27012 }, { "epoch": 0.9270075497597804, "grad_norm": 0.7684963687263191, "learning_rate": 1.392064748095978e-07, "loss": 0.2169, "step": 27013 }, { "epoch": 0.9270418668496911, "grad_norm": 0.7891582754251648, "learning_rate": 1.390762821832342e-07, "loss": 0.259, "step": 27014 }, { "epoch": 0.9270761839396019, "grad_norm": 0.8314718757005577, "learning_rate": 1.389461496079164e-07, "loss": 0.2922, "step": 27015 }, { "epoch": 0.9271105010295126, "grad_norm": 0.7763757884054864, "learning_rate": 1.3881607708525146e-07, "loss": 0.2473, "step": 27016 }, { "epoch": 0.9271448181194235, "grad_norm": 0.6733467137259701, "learning_rate": 1.3868606461684642e-07, "loss": 0.2298, "step": 27017 }, { "epoch": 0.9271791352093343, "grad_norm": 0.87175816769164, "learning_rate": 1.3855611220430665e-07, "loss": 0.2988, "step": 27018 }, { "epoch": 0.927213452299245, "grad_norm": 0.7044866318985529, "learning_rate": 1.384262198492392e-07, "loss": 0.2772, "step": 27019 }, { "epoch": 0.9272477693891558, "grad_norm": 0.8774689449213995, "learning_rate": 1.382963875532467e-07, "loss": 0.2677, "step": 27020 }, { "epoch": 0.9272820864790666, "grad_norm": 0.8707983225247837, "learning_rate": 1.3816661531793506e-07, "loss": 0.3077, "step": 27021 }, { "epoch": 0.9273164035689774, "grad_norm": 0.7689680503548552, "learning_rate": 1.3803690314490582e-07, "loss": 0.3067, "step": 27022 }, { "epoch": 0.9273507206588881, "grad_norm": 0.7503950917449763, "learning_rate": 1.3790725103576263e-07, "loss": 0.2566, "step": 27023 }, { "epoch": 0.9273850377487989, "grad_norm": 0.7024247381900003, "learning_rate": 1.3777765899210649e-07, "loss": 0.2105, "step": 27024 }, { "epoch": 0.9274193548387096, "grad_norm": 0.6991232106262444, "learning_rate": 1.3764812701553832e-07, "loss": 0.2332, "step": 27025 }, { "epoch": 0.9274536719286205, "grad_norm": 0.8250872568316807, "learning_rate": 1.3751865510765905e-07, "loss": 0.2605, "step": 27026 }, { "epoch": 0.9274879890185312, "grad_norm": 0.9035250994419967, "learning_rate": 1.3738924327006742e-07, "loss": 0.2665, "step": 27027 }, { "epoch": 0.927522306108442, "grad_norm": 0.7285247231368503, "learning_rate": 1.372598915043627e-07, "loss": 0.2461, "step": 27028 }, { "epoch": 0.9275566231983527, "grad_norm": 0.8248605898810468, "learning_rate": 1.3713059981214195e-07, "loss": 0.2583, "step": 27029 }, { "epoch": 0.9275909402882636, "grad_norm": 0.7394970987302049, "learning_rate": 1.3700136819500388e-07, "loss": 0.224, "step": 27030 }, { "epoch": 0.9276252573781744, "grad_norm": 0.7272429726477428, "learning_rate": 1.3687219665454387e-07, "loss": 0.2443, "step": 27031 }, { "epoch": 0.9276595744680851, "grad_norm": 0.8392205216083185, "learning_rate": 1.367430851923579e-07, "loss": 0.2198, "step": 27032 }, { "epoch": 0.9276938915579959, "grad_norm": 0.7899194330535008, "learning_rate": 1.3661403381004135e-07, "loss": 0.2962, "step": 27033 }, { "epoch": 0.9277282086479066, "grad_norm": 0.7475566998236485, "learning_rate": 1.3648504250918847e-07, "loss": 0.2476, "step": 27034 }, { "epoch": 0.9277625257378175, "grad_norm": 0.7487789207234583, "learning_rate": 1.3635611129139192e-07, "loss": 0.2711, "step": 27035 }, { "epoch": 0.9277968428277282, "grad_norm": 0.7734363192377907, "learning_rate": 1.362272401582454e-07, "loss": 0.2289, "step": 27036 }, { "epoch": 0.927831159917639, "grad_norm": 0.8956212153932689, "learning_rate": 1.3609842911134097e-07, "loss": 0.2539, "step": 27037 }, { "epoch": 0.9278654770075497, "grad_norm": 0.804072400553721, "learning_rate": 1.3596967815226957e-07, "loss": 0.2887, "step": 27038 }, { "epoch": 0.9278997940974605, "grad_norm": 0.812700513741356, "learning_rate": 1.3584098728262163e-07, "loss": 0.2632, "step": 27039 }, { "epoch": 0.9279341111873713, "grad_norm": 0.7665290380295784, "learning_rate": 1.3571235650398806e-07, "loss": 0.2522, "step": 27040 }, { "epoch": 0.9279684282772821, "grad_norm": 0.8180981206073026, "learning_rate": 1.3558378581795594e-07, "loss": 0.3118, "step": 27041 }, { "epoch": 0.9280027453671928, "grad_norm": 0.7102125893334154, "learning_rate": 1.3545527522611567e-07, "loss": 0.3059, "step": 27042 }, { "epoch": 0.9280370624571036, "grad_norm": 0.7204265199438628, "learning_rate": 1.3532682473005375e-07, "loss": 0.2398, "step": 27043 }, { "epoch": 0.9280713795470145, "grad_norm": 0.777884080180961, "learning_rate": 1.3519843433135725e-07, "loss": 0.2468, "step": 27044 }, { "epoch": 0.9281056966369252, "grad_norm": 0.8185940755364318, "learning_rate": 1.3507010403161213e-07, "loss": 0.2559, "step": 27045 }, { "epoch": 0.928140013726836, "grad_norm": 0.8242507008299135, "learning_rate": 1.3494183383240378e-07, "loss": 0.254, "step": 27046 }, { "epoch": 0.9281743308167467, "grad_norm": 0.7720187032912775, "learning_rate": 1.3481362373531704e-07, "loss": 0.2336, "step": 27047 }, { "epoch": 0.9282086479066575, "grad_norm": 0.8594167892784808, "learning_rate": 1.3468547374193563e-07, "loss": 0.2721, "step": 27048 }, { "epoch": 0.9282429649965683, "grad_norm": 0.7686901996325046, "learning_rate": 1.3455738385384388e-07, "loss": 0.2391, "step": 27049 }, { "epoch": 0.9282772820864791, "grad_norm": 0.7244592750780541, "learning_rate": 1.344293540726216e-07, "loss": 0.2386, "step": 27050 }, { "epoch": 0.9283115991763898, "grad_norm": 0.8503860330567816, "learning_rate": 1.3430138439985307e-07, "loss": 0.2589, "step": 27051 }, { "epoch": 0.9283459162663006, "grad_norm": 0.751281216086656, "learning_rate": 1.3417347483711707e-07, "loss": 0.2741, "step": 27052 }, { "epoch": 0.9283802333562114, "grad_norm": 0.7344577459598977, "learning_rate": 1.3404562538599508e-07, "loss": 0.252, "step": 27053 }, { "epoch": 0.9284145504461222, "grad_norm": 0.8054693711192122, "learning_rate": 1.3391783604806696e-07, "loss": 0.2134, "step": 27054 }, { "epoch": 0.928448867536033, "grad_norm": 0.7077015318794441, "learning_rate": 1.3379010682490924e-07, "loss": 0.2996, "step": 27055 }, { "epoch": 0.9284831846259437, "grad_norm": 0.8327398141470862, "learning_rate": 1.3366243771810283e-07, "loss": 0.2288, "step": 27056 }, { "epoch": 0.9285175017158545, "grad_norm": 0.7148169459357702, "learning_rate": 1.3353482872922262e-07, "loss": 0.2298, "step": 27057 }, { "epoch": 0.9285518188057653, "grad_norm": 0.7420849083581158, "learning_rate": 1.3340727985984626e-07, "loss": 0.3, "step": 27058 }, { "epoch": 0.9285861358956761, "grad_norm": 0.8167381143221905, "learning_rate": 1.3327979111154854e-07, "loss": 0.2858, "step": 27059 }, { "epoch": 0.9286204529855868, "grad_norm": 0.7224795238008229, "learning_rate": 1.3315236248590547e-07, "loss": 0.2135, "step": 27060 }, { "epoch": 0.9286547700754976, "grad_norm": 0.8496185573402676, "learning_rate": 1.330249939844902e-07, "loss": 0.2938, "step": 27061 }, { "epoch": 0.9286890871654083, "grad_norm": 0.7255504436468883, "learning_rate": 1.3289768560887706e-07, "loss": 0.2522, "step": 27062 }, { "epoch": 0.9287234042553192, "grad_norm": 0.7931923787611541, "learning_rate": 1.3277043736063922e-07, "loss": 0.2894, "step": 27063 }, { "epoch": 0.9287577213452299, "grad_norm": 0.7255374872146155, "learning_rate": 1.326432492413471e-07, "loss": 0.2509, "step": 27064 }, { "epoch": 0.9287920384351407, "grad_norm": 0.8645489604715128, "learning_rate": 1.325161212525733e-07, "loss": 0.2465, "step": 27065 }, { "epoch": 0.9288263555250514, "grad_norm": 0.7561789444752236, "learning_rate": 1.323890533958877e-07, "loss": 0.2699, "step": 27066 }, { "epoch": 0.9288606726149623, "grad_norm": 0.7794881767668324, "learning_rate": 1.3226204567286016e-07, "loss": 0.2032, "step": 27067 }, { "epoch": 0.928894989704873, "grad_norm": 0.9058795370448421, "learning_rate": 1.3213509808505997e-07, "loss": 0.2928, "step": 27068 }, { "epoch": 0.9289293067947838, "grad_norm": 0.8001089335973844, "learning_rate": 1.3200821063405532e-07, "loss": 0.2809, "step": 27069 }, { "epoch": 0.9289636238846946, "grad_norm": 0.7180988258333642, "learning_rate": 1.3188138332141332e-07, "loss": 0.2248, "step": 27070 }, { "epoch": 0.9289979409746053, "grad_norm": 0.8845184352923606, "learning_rate": 1.3175461614870154e-07, "loss": 0.3229, "step": 27071 }, { "epoch": 0.9290322580645162, "grad_norm": 0.8317776004491609, "learning_rate": 1.3162790911748602e-07, "loss": 0.2109, "step": 27072 }, { "epoch": 0.9290665751544269, "grad_norm": 0.8167181191205454, "learning_rate": 1.3150126222933047e-07, "loss": 0.2381, "step": 27073 }, { "epoch": 0.9291008922443377, "grad_norm": 0.7163233655825167, "learning_rate": 1.313746754858014e-07, "loss": 0.2557, "step": 27074 }, { "epoch": 0.9291352093342484, "grad_norm": 0.7880953796073106, "learning_rate": 1.312481488884626e-07, "loss": 0.2668, "step": 27075 }, { "epoch": 0.9291695264241593, "grad_norm": 0.8891059434083793, "learning_rate": 1.3112168243887502e-07, "loss": 0.2011, "step": 27076 }, { "epoch": 0.92920384351407, "grad_norm": 0.7329271455103845, "learning_rate": 1.309952761386041e-07, "loss": 0.2739, "step": 27077 }, { "epoch": 0.9292381606039808, "grad_norm": 0.7055316734331535, "learning_rate": 1.3086892998920908e-07, "loss": 0.2509, "step": 27078 }, { "epoch": 0.9292724776938915, "grad_norm": 0.7408884201349665, "learning_rate": 1.3074264399225155e-07, "loss": 0.2653, "step": 27079 }, { "epoch": 0.9293067947838023, "grad_norm": 0.7376453892475626, "learning_rate": 1.3061641814929137e-07, "loss": 0.1927, "step": 27080 }, { "epoch": 0.9293411118737132, "grad_norm": 0.7436373781035317, "learning_rate": 1.3049025246188896e-07, "loss": 0.2278, "step": 27081 }, { "epoch": 0.9293754289636239, "grad_norm": 0.7553839501413377, "learning_rate": 1.3036414693160137e-07, "loss": 0.3142, "step": 27082 }, { "epoch": 0.9294097460535347, "grad_norm": 0.7150451524232843, "learning_rate": 1.3023810155998796e-07, "loss": 0.2546, "step": 27083 }, { "epoch": 0.9294440631434454, "grad_norm": 0.7990968947257129, "learning_rate": 1.3011211634860522e-07, "loss": 0.2325, "step": 27084 }, { "epoch": 0.9294783802333562, "grad_norm": 0.7727834807809477, "learning_rate": 1.2998619129900857e-07, "loss": 0.3053, "step": 27085 }, { "epoch": 0.929512697323267, "grad_norm": 0.7774538354160015, "learning_rate": 1.298603264127557e-07, "loss": 0.2882, "step": 27086 }, { "epoch": 0.9295470144131778, "grad_norm": 0.684879924654999, "learning_rate": 1.2973452169139977e-07, "loss": 0.2868, "step": 27087 }, { "epoch": 0.9295813315030885, "grad_norm": 0.8506488091239122, "learning_rate": 1.2960877713649623e-07, "loss": 0.2991, "step": 27088 }, { "epoch": 0.9296156485929993, "grad_norm": 0.7943927583565051, "learning_rate": 1.294830927495977e-07, "loss": 0.3184, "step": 27089 }, { "epoch": 0.9296499656829101, "grad_norm": 0.7948508904962761, "learning_rate": 1.293574685322574e-07, "loss": 0.2376, "step": 27090 }, { "epoch": 0.9296842827728209, "grad_norm": 0.888413448543552, "learning_rate": 1.2923190448602629e-07, "loss": 0.2434, "step": 27091 }, { "epoch": 0.9297185998627316, "grad_norm": 0.7370854775223941, "learning_rate": 1.291064006124565e-07, "loss": 0.2687, "step": 27092 }, { "epoch": 0.9297529169526424, "grad_norm": 0.7422152232234281, "learning_rate": 1.2898095691309843e-07, "loss": 0.2213, "step": 27093 }, { "epoch": 0.9297872340425531, "grad_norm": 0.7946752750768288, "learning_rate": 1.288555733895014e-07, "loss": 0.2566, "step": 27094 }, { "epoch": 0.929821551132464, "grad_norm": 0.7259211710141231, "learning_rate": 1.2873025004321472e-07, "loss": 0.2413, "step": 27095 }, { "epoch": 0.9298558682223748, "grad_norm": 0.7436862789788702, "learning_rate": 1.2860498687578604e-07, "loss": 0.2565, "step": 27096 }, { "epoch": 0.9298901853122855, "grad_norm": 0.8081587315880361, "learning_rate": 1.284797838887636e-07, "loss": 0.2979, "step": 27097 }, { "epoch": 0.9299245024021963, "grad_norm": 0.7505069308630576, "learning_rate": 1.2835464108369388e-07, "loss": 0.34, "step": 27098 }, { "epoch": 0.929958819492107, "grad_norm": 0.779763537365521, "learning_rate": 1.2822955846212237e-07, "loss": 0.2393, "step": 27099 }, { "epoch": 0.9299931365820179, "grad_norm": 0.8075627376975496, "learning_rate": 1.2810453602559504e-07, "loss": 0.2441, "step": 27100 }, { "epoch": 0.9300274536719286, "grad_norm": 0.781772139836515, "learning_rate": 1.2797957377565618e-07, "loss": 0.2399, "step": 27101 }, { "epoch": 0.9300617707618394, "grad_norm": 0.7900644355655526, "learning_rate": 1.278546717138496e-07, "loss": 0.2436, "step": 27102 }, { "epoch": 0.9300960878517501, "grad_norm": 0.8405643785534668, "learning_rate": 1.2772982984171735e-07, "loss": 0.249, "step": 27103 }, { "epoch": 0.930130404941661, "grad_norm": 0.7210125453950559, "learning_rate": 1.2760504816080322e-07, "loss": 0.294, "step": 27104 }, { "epoch": 0.9301647220315717, "grad_norm": 0.8119723914681267, "learning_rate": 1.2748032667264765e-07, "loss": 0.2554, "step": 27105 }, { "epoch": 0.9301990391214825, "grad_norm": 0.7551069332159727, "learning_rate": 1.2735566537879218e-07, "loss": 0.2908, "step": 27106 }, { "epoch": 0.9302333562113932, "grad_norm": 0.7596764802357939, "learning_rate": 1.2723106428077724e-07, "loss": 0.2503, "step": 27107 }, { "epoch": 0.930267673301304, "grad_norm": 0.7839201184040595, "learning_rate": 1.2710652338013996e-07, "loss": 0.2663, "step": 27108 }, { "epoch": 0.9303019903912149, "grad_norm": 0.8025744456330686, "learning_rate": 1.2698204267842128e-07, "loss": 0.2467, "step": 27109 }, { "epoch": 0.9303363074811256, "grad_norm": 0.8057920221120811, "learning_rate": 1.268576221771578e-07, "loss": 0.2686, "step": 27110 }, { "epoch": 0.9303706245710364, "grad_norm": 0.7943326755951696, "learning_rate": 1.267332618778866e-07, "loss": 0.2475, "step": 27111 }, { "epoch": 0.9304049416609471, "grad_norm": 0.8775674820169087, "learning_rate": 1.2660896178214478e-07, "loss": 0.2749, "step": 27112 }, { "epoch": 0.930439258750858, "grad_norm": 0.7454605899432538, "learning_rate": 1.2648472189146721e-07, "loss": 0.2558, "step": 27113 }, { "epoch": 0.9304735758407687, "grad_norm": 0.7631140352407008, "learning_rate": 1.263605422073888e-07, "loss": 0.2333, "step": 27114 }, { "epoch": 0.9305078929306795, "grad_norm": 0.7218582301185834, "learning_rate": 1.2623642273144388e-07, "loss": 0.229, "step": 27115 }, { "epoch": 0.9305422100205902, "grad_norm": 0.8182789989041289, "learning_rate": 1.2611236346516564e-07, "loss": 0.2654, "step": 27116 }, { "epoch": 0.930576527110501, "grad_norm": 0.7881685952417368, "learning_rate": 1.259883644100862e-07, "loss": 0.2436, "step": 27117 }, { "epoch": 0.9306108442004118, "grad_norm": 0.7825648660371086, "learning_rate": 1.2586442556773882e-07, "loss": 0.2994, "step": 27118 }, { "epoch": 0.9306451612903226, "grad_norm": 0.7557446483740035, "learning_rate": 1.257405469396533e-07, "loss": 0.2659, "step": 27119 }, { "epoch": 0.9306794783802333, "grad_norm": 0.7159790586772307, "learning_rate": 1.2561672852736073e-07, "loss": 0.2409, "step": 27120 }, { "epoch": 0.9307137954701441, "grad_norm": 0.8183557130721686, "learning_rate": 1.2549297033239094e-07, "loss": 0.2969, "step": 27121 }, { "epoch": 0.9307481125600549, "grad_norm": 0.7864680383524912, "learning_rate": 1.253692723562716e-07, "loss": 0.252, "step": 27122 }, { "epoch": 0.9307824296499657, "grad_norm": 0.7822156436559163, "learning_rate": 1.2524563460053207e-07, "loss": 0.2583, "step": 27123 }, { "epoch": 0.9308167467398765, "grad_norm": 0.8043585973557378, "learning_rate": 1.2512205706669945e-07, "loss": 0.2928, "step": 27124 }, { "epoch": 0.9308510638297872, "grad_norm": 0.8176709810664591, "learning_rate": 1.2499853975629972e-07, "loss": 0.2327, "step": 27125 }, { "epoch": 0.930885380919698, "grad_norm": 0.7856265493743274, "learning_rate": 1.2487508267085946e-07, "loss": 0.2956, "step": 27126 }, { "epoch": 0.9309196980096088, "grad_norm": 0.7833463594075486, "learning_rate": 1.247516858119041e-07, "loss": 0.3097, "step": 27127 }, { "epoch": 0.9309540150995196, "grad_norm": 0.7557931490820146, "learning_rate": 1.246283491809569e-07, "loss": 0.2403, "step": 27128 }, { "epoch": 0.9309883321894303, "grad_norm": 0.8629940545707685, "learning_rate": 1.2450507277954328e-07, "loss": 0.1921, "step": 27129 }, { "epoch": 0.9310226492793411, "grad_norm": 0.7432113010807883, "learning_rate": 1.243818566091848e-07, "loss": 0.2234, "step": 27130 }, { "epoch": 0.9310569663692518, "grad_norm": 0.7166038273137466, "learning_rate": 1.2425870067140355e-07, "loss": 0.2171, "step": 27131 }, { "epoch": 0.9310912834591627, "grad_norm": 0.7641075369636597, "learning_rate": 1.2413560496772226e-07, "loss": 0.2416, "step": 27132 }, { "epoch": 0.9311256005490735, "grad_norm": 0.8602889133210372, "learning_rate": 1.2401256949966078e-07, "loss": 0.277, "step": 27133 }, { "epoch": 0.9311599176389842, "grad_norm": 0.7876286967847009, "learning_rate": 1.2388959426873903e-07, "loss": 0.2378, "step": 27134 }, { "epoch": 0.931194234728895, "grad_norm": 0.8836608959966337, "learning_rate": 1.2376667927647634e-07, "loss": 0.2888, "step": 27135 }, { "epoch": 0.9312285518188058, "grad_norm": 0.7984685672480608, "learning_rate": 1.2364382452439094e-07, "loss": 0.2361, "step": 27136 }, { "epoch": 0.9312628689087166, "grad_norm": 0.7920753921, "learning_rate": 1.2352103001400107e-07, "loss": 0.2498, "step": 27137 }, { "epoch": 0.9312971859986273, "grad_norm": 0.7552875623594236, "learning_rate": 1.2339829574682327e-07, "loss": 0.2337, "step": 27138 }, { "epoch": 0.9313315030885381, "grad_norm": 0.8158705423198643, "learning_rate": 1.2327562172437413e-07, "loss": 0.2717, "step": 27139 }, { "epoch": 0.9313658201784488, "grad_norm": 0.7444571289417052, "learning_rate": 1.2315300794816854e-07, "loss": 0.2676, "step": 27140 }, { "epoch": 0.9314001372683597, "grad_norm": 0.7716886414483216, "learning_rate": 1.2303045441972194e-07, "loss": 0.2774, "step": 27141 }, { "epoch": 0.9314344543582704, "grad_norm": 0.9120919153469618, "learning_rate": 1.229079611405487e-07, "loss": 0.288, "step": 27142 }, { "epoch": 0.9314687714481812, "grad_norm": 0.7198265514048704, "learning_rate": 1.2278552811216038e-07, "loss": 0.2552, "step": 27143 }, { "epoch": 0.9315030885380919, "grad_norm": 0.7717749992672509, "learning_rate": 1.2266315533607186e-07, "loss": 0.2272, "step": 27144 }, { "epoch": 0.9315374056280027, "grad_norm": 0.8048849055135489, "learning_rate": 1.2254084281379254e-07, "loss": 0.2678, "step": 27145 }, { "epoch": 0.9315717227179136, "grad_norm": 0.7778273531322731, "learning_rate": 1.2241859054683502e-07, "loss": 0.218, "step": 27146 }, { "epoch": 0.9316060398078243, "grad_norm": 0.7486579464939008, "learning_rate": 1.2229639853670873e-07, "loss": 0.2517, "step": 27147 }, { "epoch": 0.9316403568977351, "grad_norm": 0.7438876596534666, "learning_rate": 1.2217426678492405e-07, "loss": 0.2276, "step": 27148 }, { "epoch": 0.9316746739876458, "grad_norm": 0.761989060093256, "learning_rate": 1.2205219529298928e-07, "loss": 0.2444, "step": 27149 }, { "epoch": 0.9317089910775567, "grad_norm": 0.7390920222069878, "learning_rate": 1.2193018406241207e-07, "loss": 0.2213, "step": 27150 }, { "epoch": 0.9317433081674674, "grad_norm": 0.8172759565126038, "learning_rate": 1.2180823309470014e-07, "loss": 0.2399, "step": 27151 }, { "epoch": 0.9317776252573782, "grad_norm": 0.8040025195964107, "learning_rate": 1.2168634239136058e-07, "loss": 0.2366, "step": 27152 }, { "epoch": 0.9318119423472889, "grad_norm": 0.7889794114553677, "learning_rate": 1.2156451195389885e-07, "loss": 0.3304, "step": 27153 }, { "epoch": 0.9318462594371997, "grad_norm": 0.803316515240155, "learning_rate": 1.2144274178381877e-07, "loss": 0.2562, "step": 27154 }, { "epoch": 0.9318805765271105, "grad_norm": 0.8135990600138853, "learning_rate": 1.2132103188262745e-07, "loss": 0.2614, "step": 27155 }, { "epoch": 0.9319148936170213, "grad_norm": 0.799988074517732, "learning_rate": 1.2119938225182537e-07, "loss": 0.3013, "step": 27156 }, { "epoch": 0.931949210706932, "grad_norm": 0.7081505453675195, "learning_rate": 1.210777928929169e-07, "loss": 0.2646, "step": 27157 }, { "epoch": 0.9319835277968428, "grad_norm": 0.7692146411297337, "learning_rate": 1.2095626380740466e-07, "loss": 0.2247, "step": 27158 }, { "epoch": 0.9320178448867537, "grad_norm": 0.7994320889699581, "learning_rate": 1.208347949967892e-07, "loss": 0.2412, "step": 27159 }, { "epoch": 0.9320521619766644, "grad_norm": 0.8189818249308887, "learning_rate": 1.2071338646257147e-07, "loss": 0.2802, "step": 27160 }, { "epoch": 0.9320864790665752, "grad_norm": 0.7993327504155958, "learning_rate": 1.2059203820625033e-07, "loss": 0.2609, "step": 27161 }, { "epoch": 0.9321207961564859, "grad_norm": 0.7468033559290121, "learning_rate": 1.2047075022932675e-07, "loss": 0.285, "step": 27162 }, { "epoch": 0.9321551132463967, "grad_norm": 0.7656474311226011, "learning_rate": 1.2034952253329735e-07, "loss": 0.2816, "step": 27163 }, { "epoch": 0.9321894303363075, "grad_norm": 0.8241942808839146, "learning_rate": 1.2022835511966034e-07, "loss": 0.2692, "step": 27164 }, { "epoch": 0.9322237474262183, "grad_norm": 0.7760634384833446, "learning_rate": 1.20107247989914e-07, "loss": 0.2396, "step": 27165 }, { "epoch": 0.932258064516129, "grad_norm": 0.7589252429186581, "learning_rate": 1.199862011455516e-07, "loss": 0.2984, "step": 27166 }, { "epoch": 0.9322923816060398, "grad_norm": 0.9072435974837338, "learning_rate": 1.1986521458807131e-07, "loss": 0.3545, "step": 27167 }, { "epoch": 0.9323266986959505, "grad_norm": 0.7809331883659382, "learning_rate": 1.197442883189659e-07, "loss": 0.2071, "step": 27168 }, { "epoch": 0.9323610157858614, "grad_norm": 0.826272560611013, "learning_rate": 1.1962342233973023e-07, "loss": 0.2755, "step": 27169 }, { "epoch": 0.9323953328757721, "grad_norm": 0.7982214214914616, "learning_rate": 1.195026166518576e-07, "loss": 0.2509, "step": 27170 }, { "epoch": 0.9324296499656829, "grad_norm": 0.8062316095036092, "learning_rate": 1.1938187125683954e-07, "loss": 0.2314, "step": 27171 }, { "epoch": 0.9324639670555936, "grad_norm": 0.7077787153662269, "learning_rate": 1.1926118615616822e-07, "loss": 0.2578, "step": 27172 }, { "epoch": 0.9324982841455045, "grad_norm": 0.7978358696003494, "learning_rate": 1.1914056135133467e-07, "loss": 0.2829, "step": 27173 }, { "epoch": 0.9325326012354153, "grad_norm": 0.716578225992631, "learning_rate": 1.1901999684382936e-07, "loss": 0.2037, "step": 27174 }, { "epoch": 0.932566918325326, "grad_norm": 0.698330981525376, "learning_rate": 1.1889949263514056e-07, "loss": 0.264, "step": 27175 }, { "epoch": 0.9326012354152368, "grad_norm": 0.8241512619894408, "learning_rate": 1.1877904872675816e-07, "loss": 0.2506, "step": 27176 }, { "epoch": 0.9326355525051475, "grad_norm": 0.7052259626836798, "learning_rate": 1.1865866512016988e-07, "loss": 0.2188, "step": 27177 }, { "epoch": 0.9326698695950584, "grad_norm": 0.7452927383044282, "learning_rate": 1.185383418168623e-07, "loss": 0.2269, "step": 27178 }, { "epoch": 0.9327041866849691, "grad_norm": 0.8506954616241689, "learning_rate": 1.1841807881832201e-07, "loss": 0.2403, "step": 27179 }, { "epoch": 0.9327385037748799, "grad_norm": 0.8063312771312231, "learning_rate": 1.1829787612603561e-07, "loss": 0.2618, "step": 27180 }, { "epoch": 0.9327728208647906, "grad_norm": 0.7556584949936218, "learning_rate": 1.1817773374148688e-07, "loss": 0.2751, "step": 27181 }, { "epoch": 0.9328071379547015, "grad_norm": 0.7985908806901321, "learning_rate": 1.1805765166616079e-07, "loss": 0.2836, "step": 27182 }, { "epoch": 0.9328414550446122, "grad_norm": 0.7978822371261339, "learning_rate": 1.1793762990154056e-07, "loss": 0.2867, "step": 27183 }, { "epoch": 0.932875772134523, "grad_norm": 0.8231458933287138, "learning_rate": 1.1781766844910947e-07, "loss": 0.2251, "step": 27184 }, { "epoch": 0.9329100892244337, "grad_norm": 0.8149096579988901, "learning_rate": 1.1769776731034855e-07, "loss": 0.3029, "step": 27185 }, { "epoch": 0.9329444063143445, "grad_norm": 0.7935932257779477, "learning_rate": 1.1757792648673938e-07, "loss": 0.3002, "step": 27186 }, { "epoch": 0.9329787234042554, "grad_norm": 0.8236778101279718, "learning_rate": 1.1745814597976301e-07, "loss": 0.2702, "step": 27187 }, { "epoch": 0.9330130404941661, "grad_norm": 0.8026096492266126, "learning_rate": 1.1733842579089882e-07, "loss": 0.2504, "step": 27188 }, { "epoch": 0.9330473575840769, "grad_norm": 0.759901278420328, "learning_rate": 1.1721876592162507e-07, "loss": 0.2955, "step": 27189 }, { "epoch": 0.9330816746739876, "grad_norm": 0.7887579554198864, "learning_rate": 1.1709916637342166e-07, "loss": 0.26, "step": 27190 }, { "epoch": 0.9331159917638984, "grad_norm": 0.7326118096335006, "learning_rate": 1.1697962714776468e-07, "loss": 0.2285, "step": 27191 }, { "epoch": 0.9331503088538092, "grad_norm": 0.7870087200244225, "learning_rate": 1.1686014824613123e-07, "loss": 0.2669, "step": 27192 }, { "epoch": 0.93318462594372, "grad_norm": 0.8167241172961063, "learning_rate": 1.1674072966999739e-07, "loss": 0.2608, "step": 27193 }, { "epoch": 0.9332189430336307, "grad_norm": 0.8062583118517275, "learning_rate": 1.1662137142083862e-07, "loss": 0.2277, "step": 27194 }, { "epoch": 0.9332532601235415, "grad_norm": 1.093818829419459, "learning_rate": 1.1650207350012933e-07, "loss": 0.2599, "step": 27195 }, { "epoch": 0.9332875772134523, "grad_norm": 0.8225991309756346, "learning_rate": 1.163828359093433e-07, "loss": 0.2725, "step": 27196 }, { "epoch": 0.9333218943033631, "grad_norm": 0.6594817033807133, "learning_rate": 1.1626365864995381e-07, "loss": 0.2475, "step": 27197 }, { "epoch": 0.9333562113932738, "grad_norm": 0.727403350968467, "learning_rate": 1.1614454172343248e-07, "loss": 0.2281, "step": 27198 }, { "epoch": 0.9333905284831846, "grad_norm": 0.7627553936294047, "learning_rate": 1.16025485131252e-07, "loss": 0.2332, "step": 27199 }, { "epoch": 0.9334248455730954, "grad_norm": 0.9008913441792357, "learning_rate": 1.1590648887488177e-07, "loss": 0.2485, "step": 27200 }, { "epoch": 0.9334591626630062, "grad_norm": 0.7860054058824909, "learning_rate": 1.157875529557928e-07, "loss": 0.2484, "step": 27201 }, { "epoch": 0.933493479752917, "grad_norm": 0.8181744878346733, "learning_rate": 1.1566867737545451e-07, "loss": 0.2801, "step": 27202 }, { "epoch": 0.9335277968428277, "grad_norm": 0.8836367914124011, "learning_rate": 1.1554986213533458e-07, "loss": 0.225, "step": 27203 }, { "epoch": 0.9335621139327385, "grad_norm": 0.7879272230024679, "learning_rate": 1.1543110723690187e-07, "loss": 0.2578, "step": 27204 }, { "epoch": 0.9335964310226493, "grad_norm": 0.8668573243571605, "learning_rate": 1.1531241268162297e-07, "loss": 0.2751, "step": 27205 }, { "epoch": 0.9336307481125601, "grad_norm": 0.812295985284928, "learning_rate": 1.1519377847096447e-07, "loss": 0.2528, "step": 27206 }, { "epoch": 0.9336650652024708, "grad_norm": 0.7997835212174483, "learning_rate": 1.1507520460639021e-07, "loss": 0.3397, "step": 27207 }, { "epoch": 0.9336993822923816, "grad_norm": 0.833351563846748, "learning_rate": 1.1495669108936791e-07, "loss": 0.3145, "step": 27208 }, { "epoch": 0.9337336993822923, "grad_norm": 0.8755971395808643, "learning_rate": 1.1483823792136029e-07, "loss": 0.2944, "step": 27209 }, { "epoch": 0.9337680164722032, "grad_norm": 0.7791133339331858, "learning_rate": 1.1471984510383006e-07, "loss": 0.2583, "step": 27210 }, { "epoch": 0.933802333562114, "grad_norm": 0.8153217498120418, "learning_rate": 1.1460151263824105e-07, "loss": 0.2513, "step": 27211 }, { "epoch": 0.9338366506520247, "grad_norm": 0.733552550328575, "learning_rate": 1.1448324052605375e-07, "loss": 0.2689, "step": 27212 }, { "epoch": 0.9338709677419355, "grad_norm": 0.729593237290242, "learning_rate": 1.143650287687309e-07, "loss": 0.2685, "step": 27213 }, { "epoch": 0.9339052848318462, "grad_norm": 0.7682903342529864, "learning_rate": 1.1424687736773188e-07, "loss": 0.2512, "step": 27214 }, { "epoch": 0.9339396019217571, "grad_norm": 0.7600730250430325, "learning_rate": 1.1412878632451608e-07, "loss": 0.2357, "step": 27215 }, { "epoch": 0.9339739190116678, "grad_norm": 0.7193368637616563, "learning_rate": 1.1401075564054287e-07, "loss": 0.2097, "step": 27216 }, { "epoch": 0.9340082361015786, "grad_norm": 0.6819953199551495, "learning_rate": 1.1389278531727e-07, "loss": 0.2537, "step": 27217 }, { "epoch": 0.9340425531914893, "grad_norm": 0.7570266532377807, "learning_rate": 1.1377487535615572e-07, "loss": 0.2316, "step": 27218 }, { "epoch": 0.9340768702814002, "grad_norm": 0.7430912966779462, "learning_rate": 1.1365702575865556e-07, "loss": 0.2472, "step": 27219 }, { "epoch": 0.9341111873713109, "grad_norm": 0.7047565972820296, "learning_rate": 1.1353923652622667e-07, "loss": 0.2276, "step": 27220 }, { "epoch": 0.9341455044612217, "grad_norm": 0.7692325990959775, "learning_rate": 1.1342150766032234e-07, "loss": 0.252, "step": 27221 }, { "epoch": 0.9341798215511324, "grad_norm": 0.8680600349242851, "learning_rate": 1.1330383916239917e-07, "loss": 0.2668, "step": 27222 }, { "epoch": 0.9342141386410432, "grad_norm": 0.8843697754466482, "learning_rate": 1.131862310339088e-07, "loss": 0.2447, "step": 27223 }, { "epoch": 0.934248455730954, "grad_norm": 0.5824497187993957, "learning_rate": 1.1306868327630505e-07, "loss": 0.1963, "step": 27224 }, { "epoch": 0.9342827728208648, "grad_norm": 0.8515002242179216, "learning_rate": 1.129511958910412e-07, "loss": 0.2974, "step": 27225 }, { "epoch": 0.9343170899107756, "grad_norm": 0.8984186499751768, "learning_rate": 1.1283376887956666e-07, "loss": 0.277, "step": 27226 }, { "epoch": 0.9343514070006863, "grad_norm": 0.859779806997223, "learning_rate": 1.1271640224333358e-07, "loss": 0.2966, "step": 27227 }, { "epoch": 0.9343857240905972, "grad_norm": 0.7551818220846659, "learning_rate": 1.1259909598379081e-07, "loss": 0.2192, "step": 27228 }, { "epoch": 0.9344200411805079, "grad_norm": 0.8875089196355473, "learning_rate": 1.1248185010238832e-07, "loss": 0.2692, "step": 27229 }, { "epoch": 0.9344543582704187, "grad_norm": 0.798719537471716, "learning_rate": 1.1236466460057438e-07, "loss": 0.2945, "step": 27230 }, { "epoch": 0.9344886753603294, "grad_norm": 0.7041622342956333, "learning_rate": 1.1224753947979616e-07, "loss": 0.2268, "step": 27231 }, { "epoch": 0.9345229924502402, "grad_norm": 0.8600935580541583, "learning_rate": 1.1213047474150196e-07, "loss": 0.2657, "step": 27232 }, { "epoch": 0.934557309540151, "grad_norm": 0.7656683512618784, "learning_rate": 1.1201347038713561e-07, "loss": 0.2272, "step": 27233 }, { "epoch": 0.9345916266300618, "grad_norm": 0.8106154548862335, "learning_rate": 1.1189652641814541e-07, "loss": 0.2737, "step": 27234 }, { "epoch": 0.9346259437199725, "grad_norm": 0.7852685534815045, "learning_rate": 1.117796428359741e-07, "loss": 0.2853, "step": 27235 }, { "epoch": 0.9346602608098833, "grad_norm": 0.8526728745495208, "learning_rate": 1.1166281964206604e-07, "loss": 0.2731, "step": 27236 }, { "epoch": 0.934694577899794, "grad_norm": 0.8424035073837692, "learning_rate": 1.1154605683786457e-07, "loss": 0.2633, "step": 27237 }, { "epoch": 0.9347288949897049, "grad_norm": 0.8274828719970797, "learning_rate": 1.1142935442481239e-07, "loss": 0.2195, "step": 27238 }, { "epoch": 0.9347632120796157, "grad_norm": 0.7080266015523297, "learning_rate": 1.1131271240435059e-07, "loss": 0.2149, "step": 27239 }, { "epoch": 0.9347975291695264, "grad_norm": 0.6974598592332858, "learning_rate": 1.111961307779208e-07, "loss": 0.2472, "step": 27240 }, { "epoch": 0.9348318462594372, "grad_norm": 0.754596898967811, "learning_rate": 1.110796095469635e-07, "loss": 0.2424, "step": 27241 }, { "epoch": 0.934866163349348, "grad_norm": 0.7321583621955422, "learning_rate": 1.1096314871291703e-07, "loss": 0.3058, "step": 27242 }, { "epoch": 0.9349004804392588, "grad_norm": 0.7645490900241051, "learning_rate": 1.1084674827722131e-07, "loss": 0.2759, "step": 27243 }, { "epoch": 0.9349347975291695, "grad_norm": 0.7000505563324144, "learning_rate": 1.1073040824131299e-07, "loss": 0.221, "step": 27244 }, { "epoch": 0.9349691146190803, "grad_norm": 0.8434244098296093, "learning_rate": 1.1061412860663146e-07, "loss": 0.2922, "step": 27245 }, { "epoch": 0.935003431708991, "grad_norm": 0.663335115482748, "learning_rate": 1.1049790937461114e-07, "loss": 0.247, "step": 27246 }, { "epoch": 0.9350377487989019, "grad_norm": 0.8846271308407522, "learning_rate": 1.103817505466881e-07, "loss": 0.2656, "step": 27247 }, { "epoch": 0.9350720658888126, "grad_norm": 0.8078889660409914, "learning_rate": 1.102656521242984e-07, "loss": 0.2551, "step": 27248 }, { "epoch": 0.9351063829787234, "grad_norm": 0.7679296342009063, "learning_rate": 1.1014961410887537e-07, "loss": 0.2344, "step": 27249 }, { "epoch": 0.9351407000686341, "grad_norm": 0.7574883906860699, "learning_rate": 1.100336365018534e-07, "loss": 0.2433, "step": 27250 }, { "epoch": 0.935175017158545, "grad_norm": 0.8780643968967666, "learning_rate": 1.0991771930466411e-07, "loss": 0.2859, "step": 27251 }, { "epoch": 0.9352093342484558, "grad_norm": 0.801316840555907, "learning_rate": 1.0980186251874026e-07, "loss": 0.2631, "step": 27252 }, { "epoch": 0.9352436513383665, "grad_norm": 0.7534137419804849, "learning_rate": 1.0968606614551291e-07, "loss": 0.2095, "step": 27253 }, { "epoch": 0.9352779684282773, "grad_norm": 0.9877263345363456, "learning_rate": 1.0957033018641317e-07, "loss": 0.2585, "step": 27254 }, { "epoch": 0.935312285518188, "grad_norm": 0.7742969087812294, "learning_rate": 1.0945465464286987e-07, "loss": 0.2879, "step": 27255 }, { "epoch": 0.9353466026080989, "grad_norm": 0.8502558427506249, "learning_rate": 1.0933903951631242e-07, "loss": 0.2499, "step": 27256 }, { "epoch": 0.9353809196980096, "grad_norm": 0.804117080232643, "learning_rate": 1.0922348480816969e-07, "loss": 0.2335, "step": 27257 }, { "epoch": 0.9354152367879204, "grad_norm": 0.7635431023200656, "learning_rate": 1.0910799051986831e-07, "loss": 0.2815, "step": 27258 }, { "epoch": 0.9354495538778311, "grad_norm": 0.7423080365267513, "learning_rate": 1.0899255665283548e-07, "loss": 0.2571, "step": 27259 }, { "epoch": 0.9354838709677419, "grad_norm": 0.7054093535040228, "learning_rate": 1.0887718320849727e-07, "loss": 0.2343, "step": 27260 }, { "epoch": 0.9355181880576527, "grad_norm": 0.9072345126314386, "learning_rate": 1.0876187018827867e-07, "loss": 0.2489, "step": 27261 }, { "epoch": 0.9355525051475635, "grad_norm": 0.854687759549818, "learning_rate": 1.0864661759360517e-07, "loss": 0.2631, "step": 27262 }, { "epoch": 0.9355868222374742, "grad_norm": 0.7280252995415117, "learning_rate": 1.0853142542589956e-07, "loss": 0.2397, "step": 27263 }, { "epoch": 0.935621139327385, "grad_norm": 1.1338791492467848, "learning_rate": 1.0841629368658568e-07, "loss": 0.2351, "step": 27264 }, { "epoch": 0.9356554564172959, "grad_norm": 0.7922534482566822, "learning_rate": 1.0830122237708462e-07, "loss": 0.2511, "step": 27265 }, { "epoch": 0.9356897735072066, "grad_norm": 0.8624374505687438, "learning_rate": 1.081862114988197e-07, "loss": 0.2599, "step": 27266 }, { "epoch": 0.9357240905971174, "grad_norm": 0.7871611472539229, "learning_rate": 1.0807126105321031e-07, "loss": 0.2348, "step": 27267 }, { "epoch": 0.9357584076870281, "grad_norm": 0.8233080796981067, "learning_rate": 1.07956371041677e-07, "loss": 0.2493, "step": 27268 }, { "epoch": 0.9357927247769389, "grad_norm": 0.8494936605696555, "learning_rate": 1.0784154146563975e-07, "loss": 0.2531, "step": 27269 }, { "epoch": 0.9358270418668497, "grad_norm": 0.7576141796805096, "learning_rate": 1.0772677232651573e-07, "loss": 0.259, "step": 27270 }, { "epoch": 0.9358613589567605, "grad_norm": 0.7580035012467674, "learning_rate": 1.076120636257244e-07, "loss": 0.263, "step": 27271 }, { "epoch": 0.9358956760466712, "grad_norm": 0.7975505399358763, "learning_rate": 1.0749741536468128e-07, "loss": 0.2707, "step": 27272 }, { "epoch": 0.935929993136582, "grad_norm": 0.8503438357813499, "learning_rate": 1.0738282754480411e-07, "loss": 0.2621, "step": 27273 }, { "epoch": 0.9359643102264928, "grad_norm": 0.7522318298629015, "learning_rate": 1.0726830016750734e-07, "loss": 0.2488, "step": 27274 }, { "epoch": 0.9359986273164036, "grad_norm": 0.7728514364585444, "learning_rate": 1.0715383323420647e-07, "loss": 0.265, "step": 27275 }, { "epoch": 0.9360329444063143, "grad_norm": 0.8043861094591431, "learning_rate": 1.0703942674631596e-07, "loss": 0.2986, "step": 27276 }, { "epoch": 0.9360672614962251, "grad_norm": 0.8197751177685021, "learning_rate": 1.0692508070524798e-07, "loss": 0.3037, "step": 27277 }, { "epoch": 0.9361015785861359, "grad_norm": 0.7960959905169084, "learning_rate": 1.0681079511241698e-07, "loss": 0.2463, "step": 27278 }, { "epoch": 0.9361358956760467, "grad_norm": 0.6874572170843622, "learning_rate": 1.0669656996923238e-07, "loss": 0.2357, "step": 27279 }, { "epoch": 0.9361702127659575, "grad_norm": 0.7568309328433678, "learning_rate": 1.065824052771075e-07, "loss": 0.2253, "step": 27280 }, { "epoch": 0.9362045298558682, "grad_norm": 0.7434529546559483, "learning_rate": 1.0646830103745176e-07, "loss": 0.2559, "step": 27281 }, { "epoch": 0.936238846945779, "grad_norm": 0.8146018864301335, "learning_rate": 1.0635425725167459e-07, "loss": 0.309, "step": 27282 }, { "epoch": 0.9362731640356897, "grad_norm": 0.7076348773610333, "learning_rate": 1.0624027392118542e-07, "loss": 0.2786, "step": 27283 }, { "epoch": 0.9363074811256006, "grad_norm": 0.8279343171533816, "learning_rate": 1.0612635104739144e-07, "loss": 0.3351, "step": 27284 }, { "epoch": 0.9363417982155113, "grad_norm": 0.8815358869187164, "learning_rate": 1.0601248863170099e-07, "loss": 0.2682, "step": 27285 }, { "epoch": 0.9363761153054221, "grad_norm": 0.8003977871999393, "learning_rate": 1.0589868667552073e-07, "loss": 0.2805, "step": 27286 }, { "epoch": 0.9364104323953328, "grad_norm": 0.7697163452220885, "learning_rate": 1.0578494518025617e-07, "loss": 0.2403, "step": 27287 }, { "epoch": 0.9364447494852437, "grad_norm": 0.7637151322907343, "learning_rate": 1.0567126414731233e-07, "loss": 0.2628, "step": 27288 }, { "epoch": 0.9364790665751545, "grad_norm": 0.8008412655748373, "learning_rate": 1.0555764357809418e-07, "loss": 0.2799, "step": 27289 }, { "epoch": 0.9365133836650652, "grad_norm": 0.7331797673001547, "learning_rate": 1.0544408347400448e-07, "loss": 0.2452, "step": 27290 }, { "epoch": 0.936547700754976, "grad_norm": 0.777454204559327, "learning_rate": 1.0533058383644656e-07, "loss": 0.2853, "step": 27291 }, { "epoch": 0.9365820178448867, "grad_norm": 0.7902354794638824, "learning_rate": 1.052171446668232e-07, "loss": 0.2264, "step": 27292 }, { "epoch": 0.9366163349347976, "grad_norm": 0.7541282460077066, "learning_rate": 1.0510376596653493e-07, "loss": 0.2594, "step": 27293 }, { "epoch": 0.9366506520247083, "grad_norm": 0.7592919753713046, "learning_rate": 1.0499044773698231e-07, "loss": 0.2265, "step": 27294 }, { "epoch": 0.9366849691146191, "grad_norm": 0.71458808380109, "learning_rate": 1.0487718997956642e-07, "loss": 0.2155, "step": 27295 }, { "epoch": 0.9367192862045298, "grad_norm": 0.8402923881814133, "learning_rate": 1.0476399269568504e-07, "loss": 0.2612, "step": 27296 }, { "epoch": 0.9367536032944407, "grad_norm": 0.7788493551566941, "learning_rate": 1.0465085588673762e-07, "loss": 0.2584, "step": 27297 }, { "epoch": 0.9367879203843514, "grad_norm": 0.7579959295548995, "learning_rate": 1.0453777955412137e-07, "loss": 0.349, "step": 27298 }, { "epoch": 0.9368222374742622, "grad_norm": 0.9699570079364144, "learning_rate": 1.0442476369923405e-07, "loss": 0.2425, "step": 27299 }, { "epoch": 0.9368565545641729, "grad_norm": 0.848237489870212, "learning_rate": 1.0431180832346954e-07, "loss": 0.2502, "step": 27300 }, { "epoch": 0.9368908716540837, "grad_norm": 0.7605844335258812, "learning_rate": 1.0419891342822618e-07, "loss": 0.2125, "step": 27301 }, { "epoch": 0.9369251887439946, "grad_norm": 0.814076310564026, "learning_rate": 1.0408607901489676e-07, "loss": 0.2598, "step": 27302 }, { "epoch": 0.9369595058339053, "grad_norm": 0.8231699342956028, "learning_rate": 1.0397330508487569e-07, "loss": 0.2785, "step": 27303 }, { "epoch": 0.936993822923816, "grad_norm": 0.7466658461954825, "learning_rate": 1.0386059163955631e-07, "loss": 0.273, "step": 27304 }, { "epoch": 0.9370281400137268, "grad_norm": 0.7866032100529315, "learning_rate": 1.0374793868033084e-07, "loss": 0.3094, "step": 27305 }, { "epoch": 0.9370624571036376, "grad_norm": 0.7834509588533555, "learning_rate": 1.0363534620859094e-07, "loss": 0.2284, "step": 27306 }, { "epoch": 0.9370967741935484, "grad_norm": 0.8227748524740182, "learning_rate": 1.0352281422572829e-07, "loss": 0.2876, "step": 27307 }, { "epoch": 0.9371310912834592, "grad_norm": 0.8106114855086346, "learning_rate": 1.0341034273313233e-07, "loss": 0.2559, "step": 27308 }, { "epoch": 0.9371654083733699, "grad_norm": 0.7823152801725409, "learning_rate": 1.0329793173219249e-07, "loss": 0.2384, "step": 27309 }, { "epoch": 0.9371997254632807, "grad_norm": 0.7612353955576331, "learning_rate": 1.0318558122429823e-07, "loss": 0.2547, "step": 27310 }, { "epoch": 0.9372340425531915, "grad_norm": 0.7927913433943173, "learning_rate": 1.030732912108362e-07, "loss": 0.2512, "step": 27311 }, { "epoch": 0.9372683596431023, "grad_norm": 0.8480859656737484, "learning_rate": 1.0296106169319531e-07, "loss": 0.2932, "step": 27312 }, { "epoch": 0.937302676733013, "grad_norm": 0.7980799579872452, "learning_rate": 1.0284889267276055e-07, "loss": 0.2569, "step": 27313 }, { "epoch": 0.9373369938229238, "grad_norm": 0.7040152002081301, "learning_rate": 1.0273678415091747e-07, "loss": 0.2639, "step": 27314 }, { "epoch": 0.9373713109128345, "grad_norm": 0.8409204716560116, "learning_rate": 1.0262473612905276e-07, "loss": 0.271, "step": 27315 }, { "epoch": 0.9374056280027454, "grad_norm": 0.8299172035433687, "learning_rate": 1.0251274860854976e-07, "loss": 0.2698, "step": 27316 }, { "epoch": 0.9374399450926562, "grad_norm": 0.7860262057078017, "learning_rate": 1.0240082159079123e-07, "loss": 0.3759, "step": 27317 }, { "epoch": 0.9374742621825669, "grad_norm": 0.74328546381068, "learning_rate": 1.0228895507716108e-07, "loss": 0.2616, "step": 27318 }, { "epoch": 0.9375085792724777, "grad_norm": 0.8313445337845723, "learning_rate": 1.0217714906904041e-07, "loss": 0.2769, "step": 27319 }, { "epoch": 0.9375428963623885, "grad_norm": 0.7684747485303502, "learning_rate": 1.0206540356781036e-07, "loss": 0.2338, "step": 27320 }, { "epoch": 0.9375772134522993, "grad_norm": 0.7948156641532098, "learning_rate": 1.0195371857485204e-07, "loss": 0.2367, "step": 27321 }, { "epoch": 0.93761153054221, "grad_norm": 0.6740773379082615, "learning_rate": 1.01842094091546e-07, "loss": 0.2237, "step": 27322 }, { "epoch": 0.9376458476321208, "grad_norm": 0.8265946751329759, "learning_rate": 1.0173053011926837e-07, "loss": 0.2936, "step": 27323 }, { "epoch": 0.9376801647220315, "grad_norm": 0.7724827113177658, "learning_rate": 1.0161902665940082e-07, "loss": 0.2588, "step": 27324 }, { "epoch": 0.9377144818119424, "grad_norm": 0.7636856942028132, "learning_rate": 1.0150758371331837e-07, "loss": 0.2365, "step": 27325 }, { "epoch": 0.9377487989018531, "grad_norm": 0.8589392966450626, "learning_rate": 1.013962012823988e-07, "loss": 0.2384, "step": 27326 }, { "epoch": 0.9377831159917639, "grad_norm": 0.8703524915641753, "learning_rate": 1.0128487936801823e-07, "loss": 0.2589, "step": 27327 }, { "epoch": 0.9378174330816746, "grad_norm": 0.7386051086327696, "learning_rate": 1.0117361797155112e-07, "loss": 0.3046, "step": 27328 }, { "epoch": 0.9378517501715854, "grad_norm": 0.8166446731992079, "learning_rate": 1.0106241709437304e-07, "loss": 0.2964, "step": 27329 }, { "epoch": 0.9378860672614963, "grad_norm": 0.7224068791393478, "learning_rate": 1.0095127673785676e-07, "loss": 0.248, "step": 27330 }, { "epoch": 0.937920384351407, "grad_norm": 0.7641106864615075, "learning_rate": 1.0084019690337621e-07, "loss": 0.3003, "step": 27331 }, { "epoch": 0.9379547014413178, "grad_norm": 0.7685421366815056, "learning_rate": 1.007291775923025e-07, "loss": 0.2791, "step": 27332 }, { "epoch": 0.9379890185312285, "grad_norm": 0.8336008038925082, "learning_rate": 1.0061821880600842e-07, "loss": 0.2676, "step": 27333 }, { "epoch": 0.9380233356211394, "grad_norm": 0.7379318390829785, "learning_rate": 1.0050732054586343e-07, "loss": 0.2372, "step": 27334 }, { "epoch": 0.9380576527110501, "grad_norm": 0.7686375514540944, "learning_rate": 1.0039648281323866e-07, "loss": 0.2723, "step": 27335 }, { "epoch": 0.9380919698009609, "grad_norm": 0.8096308807164135, "learning_rate": 1.0028570560950301e-07, "loss": 0.242, "step": 27336 }, { "epoch": 0.9381262868908716, "grad_norm": 0.7515768270844804, "learning_rate": 1.0017498893602484e-07, "loss": 0.2697, "step": 27337 }, { "epoch": 0.9381606039807824, "grad_norm": 0.7913285537312572, "learning_rate": 1.0006433279417194e-07, "loss": 0.2947, "step": 27338 }, { "epoch": 0.9381949210706932, "grad_norm": 0.6915970992230401, "learning_rate": 9.995373718531153e-08, "loss": 0.2282, "step": 27339 }, { "epoch": 0.938229238160604, "grad_norm": 0.8615206361113665, "learning_rate": 9.984320211080978e-08, "loss": 0.2467, "step": 27340 }, { "epoch": 0.9382635552505147, "grad_norm": 0.8386418900889807, "learning_rate": 9.973272757203223e-08, "loss": 0.2434, "step": 27341 }, { "epoch": 0.9382978723404255, "grad_norm": 0.8164639771725081, "learning_rate": 9.96223135703439e-08, "loss": 0.2311, "step": 27342 }, { "epoch": 0.9383321894303364, "grad_norm": 0.7101205715958244, "learning_rate": 9.951196010710817e-08, "loss": 0.2563, "step": 27343 }, { "epoch": 0.9383665065202471, "grad_norm": 0.7056487565399171, "learning_rate": 9.940166718368949e-08, "loss": 0.2624, "step": 27344 }, { "epoch": 0.9384008236101579, "grad_norm": 0.7194927406588493, "learning_rate": 9.929143480144954e-08, "loss": 0.2422, "step": 27345 }, { "epoch": 0.9384351407000686, "grad_norm": 0.7724567093001564, "learning_rate": 9.918126296174946e-08, "loss": 0.3074, "step": 27346 }, { "epoch": 0.9384694577899794, "grad_norm": 0.7615030073541122, "learning_rate": 9.90711516659515e-08, "loss": 0.226, "step": 27347 }, { "epoch": 0.9385037748798902, "grad_norm": 0.9142164243476957, "learning_rate": 9.896110091541566e-08, "loss": 0.2519, "step": 27348 }, { "epoch": 0.938538091969801, "grad_norm": 0.7836226508676236, "learning_rate": 9.885111071150145e-08, "loss": 0.3081, "step": 27349 }, { "epoch": 0.9385724090597117, "grad_norm": 0.7378937886051246, "learning_rate": 9.874118105556718e-08, "loss": 0.2162, "step": 27350 }, { "epoch": 0.9386067261496225, "grad_norm": 0.8125944592111539, "learning_rate": 9.86313119489718e-08, "loss": 0.235, "step": 27351 }, { "epoch": 0.9386410432395332, "grad_norm": 0.8827578832640349, "learning_rate": 9.852150339307143e-08, "loss": 0.275, "step": 27352 }, { "epoch": 0.9386753603294441, "grad_norm": 0.7323567487042988, "learning_rate": 9.841175538922387e-08, "loss": 0.2827, "step": 27353 }, { "epoch": 0.9387096774193548, "grad_norm": 0.7773152689199972, "learning_rate": 9.830206793878471e-08, "loss": 0.2396, "step": 27354 }, { "epoch": 0.9387439945092656, "grad_norm": 0.8044736765202487, "learning_rate": 9.819244104310733e-08, "loss": 0.2794, "step": 27355 }, { "epoch": 0.9387783115991764, "grad_norm": 0.8488205846050304, "learning_rate": 9.808287470354894e-08, "loss": 0.2532, "step": 27356 }, { "epoch": 0.9388126286890872, "grad_norm": 0.7790042109287674, "learning_rate": 9.79733689214607e-08, "loss": 0.2829, "step": 27357 }, { "epoch": 0.938846945778998, "grad_norm": 0.6756855078009938, "learning_rate": 9.786392369819598e-08, "loss": 0.2572, "step": 27358 }, { "epoch": 0.9388812628689087, "grad_norm": 0.8646860992078599, "learning_rate": 9.775453903510813e-08, "loss": 0.2401, "step": 27359 }, { "epoch": 0.9389155799588195, "grad_norm": 0.8582891747956345, "learning_rate": 9.764521493354662e-08, "loss": 0.2978, "step": 27360 }, { "epoch": 0.9389498970487302, "grad_norm": 0.8933581196124215, "learning_rate": 9.753595139486316e-08, "loss": 0.2738, "step": 27361 }, { "epoch": 0.9389842141386411, "grad_norm": 0.7311558857936231, "learning_rate": 9.742674842040779e-08, "loss": 0.2555, "step": 27362 }, { "epoch": 0.9390185312285518, "grad_norm": 0.6991821640900595, "learning_rate": 9.731760601152829e-08, "loss": 0.3067, "step": 27363 }, { "epoch": 0.9390528483184626, "grad_norm": 0.7848913552469406, "learning_rate": 9.720852416957416e-08, "loss": 0.2376, "step": 27364 }, { "epoch": 0.9390871654083733, "grad_norm": 0.8189804643312198, "learning_rate": 9.709950289589265e-08, "loss": 0.2258, "step": 27365 }, { "epoch": 0.9391214824982842, "grad_norm": 0.7834353818259293, "learning_rate": 9.699054219183101e-08, "loss": 0.3196, "step": 27366 }, { "epoch": 0.939155799588195, "grad_norm": 1.085961580603674, "learning_rate": 9.688164205873373e-08, "loss": 0.2596, "step": 27367 }, { "epoch": 0.9391901166781057, "grad_norm": 0.7628761974118147, "learning_rate": 9.677280249794862e-08, "loss": 0.2316, "step": 27368 }, { "epoch": 0.9392244337680165, "grad_norm": 0.8958640019186023, "learning_rate": 9.666402351081794e-08, "loss": 0.262, "step": 27369 }, { "epoch": 0.9392587508579272, "grad_norm": 0.7663767330487414, "learning_rate": 9.655530509868727e-08, "loss": 0.3002, "step": 27370 }, { "epoch": 0.9392930679478381, "grad_norm": 0.8656725349472484, "learning_rate": 9.644664726289832e-08, "loss": 0.2925, "step": 27371 }, { "epoch": 0.9393273850377488, "grad_norm": 0.8001076403081355, "learning_rate": 9.633805000479391e-08, "loss": 0.2754, "step": 27372 }, { "epoch": 0.9393617021276596, "grad_norm": 0.7533751667076378, "learning_rate": 9.622951332571628e-08, "loss": 0.2495, "step": 27373 }, { "epoch": 0.9393960192175703, "grad_norm": 0.862169470056272, "learning_rate": 9.61210372270055e-08, "loss": 0.308, "step": 27374 }, { "epoch": 0.9394303363074811, "grad_norm": 0.7571415255891093, "learning_rate": 9.601262171000214e-08, "loss": 0.2284, "step": 27375 }, { "epoch": 0.9394646533973919, "grad_norm": 0.8220908227094159, "learning_rate": 9.590426677604514e-08, "loss": 0.2511, "step": 27376 }, { "epoch": 0.9394989704873027, "grad_norm": 0.8136534097662108, "learning_rate": 9.579597242647398e-08, "loss": 0.3247, "step": 27377 }, { "epoch": 0.9395332875772134, "grad_norm": 0.7409333998743641, "learning_rate": 9.568773866262481e-08, "loss": 0.2253, "step": 27378 }, { "epoch": 0.9395676046671242, "grad_norm": 0.7800068435802608, "learning_rate": 9.557956548583602e-08, "loss": 0.2438, "step": 27379 }, { "epoch": 0.939601921757035, "grad_norm": 0.8207091101764733, "learning_rate": 9.54714528974443e-08, "loss": 0.2705, "step": 27380 }, { "epoch": 0.9396362388469458, "grad_norm": 0.7421422567977511, "learning_rate": 9.53634008987836e-08, "loss": 0.242, "step": 27381 }, { "epoch": 0.9396705559368566, "grad_norm": 0.7393675059753697, "learning_rate": 9.525540949119117e-08, "loss": 0.2477, "step": 27382 }, { "epoch": 0.9397048730267673, "grad_norm": 0.777712137871226, "learning_rate": 9.514747867599872e-08, "loss": 0.2493, "step": 27383 }, { "epoch": 0.9397391901166781, "grad_norm": 0.7693561000385569, "learning_rate": 9.50396084545413e-08, "loss": 0.257, "step": 27384 }, { "epoch": 0.9397735072065889, "grad_norm": 0.7829539500216068, "learning_rate": 9.493179882815063e-08, "loss": 0.2838, "step": 27385 }, { "epoch": 0.9398078242964997, "grad_norm": 0.6886931050804649, "learning_rate": 9.48240497981584e-08, "loss": 0.2215, "step": 27386 }, { "epoch": 0.9398421413864104, "grad_norm": 0.744002002491038, "learning_rate": 9.47163613658969e-08, "loss": 0.2395, "step": 27387 }, { "epoch": 0.9398764584763212, "grad_norm": 0.8604674435693664, "learning_rate": 9.460873353269506e-08, "loss": 0.2952, "step": 27388 }, { "epoch": 0.939910775566232, "grad_norm": 0.7247800784343038, "learning_rate": 9.450116629988403e-08, "loss": 0.2495, "step": 27389 }, { "epoch": 0.9399450926561428, "grad_norm": 0.8149768585005961, "learning_rate": 9.43936596687911e-08, "loss": 0.2386, "step": 27390 }, { "epoch": 0.9399794097460535, "grad_norm": 0.7770297665571751, "learning_rate": 9.428621364074576e-08, "loss": 0.2372, "step": 27391 }, { "epoch": 0.9400137268359643, "grad_norm": 0.8339724310466416, "learning_rate": 9.417882821707413e-08, "loss": 0.2873, "step": 27392 }, { "epoch": 0.940048043925875, "grad_norm": 0.7883007455736294, "learning_rate": 9.407150339910354e-08, "loss": 0.2127, "step": 27393 }, { "epoch": 0.9400823610157859, "grad_norm": 0.847105144305379, "learning_rate": 9.396423918816011e-08, "loss": 0.2567, "step": 27394 }, { "epoch": 0.9401166781056967, "grad_norm": 0.6907063278407808, "learning_rate": 9.385703558556835e-08, "loss": 0.2614, "step": 27395 }, { "epoch": 0.9401509951956074, "grad_norm": 0.705817096414527, "learning_rate": 9.374989259265277e-08, "loss": 0.3272, "step": 27396 }, { "epoch": 0.9401853122855182, "grad_norm": 0.7293354324660277, "learning_rate": 9.364281021073728e-08, "loss": 0.2763, "step": 27397 }, { "epoch": 0.9402196293754289, "grad_norm": 1.9660273013638383, "learning_rate": 9.35357884411453e-08, "loss": 0.3052, "step": 27398 }, { "epoch": 0.9402539464653398, "grad_norm": 0.7604148591254122, "learning_rate": 9.342882728519687e-08, "loss": 0.2563, "step": 27399 }, { "epoch": 0.9402882635552505, "grad_norm": 0.8179952277047927, "learning_rate": 9.332192674421592e-08, "loss": 0.3353, "step": 27400 }, { "epoch": 0.9403225806451613, "grad_norm": 0.7427726868838052, "learning_rate": 9.321508681952141e-08, "loss": 0.2376, "step": 27401 }, { "epoch": 0.940356897735072, "grad_norm": 0.8157723265776375, "learning_rate": 9.310830751243338e-08, "loss": 0.2683, "step": 27402 }, { "epoch": 0.9403912148249829, "grad_norm": 0.764676143165552, "learning_rate": 9.300158882427246e-08, "loss": 0.2522, "step": 27403 }, { "epoch": 0.9404255319148936, "grad_norm": 0.7669681764847635, "learning_rate": 9.289493075635481e-08, "loss": 0.2417, "step": 27404 }, { "epoch": 0.9404598490048044, "grad_norm": 0.781026088066672, "learning_rate": 9.278833330999992e-08, "loss": 0.2179, "step": 27405 }, { "epoch": 0.9404941660947151, "grad_norm": 0.7273471325806982, "learning_rate": 9.268179648652343e-08, "loss": 0.2669, "step": 27406 }, { "epoch": 0.9405284831846259, "grad_norm": 0.8003616310705498, "learning_rate": 9.257532028724259e-08, "loss": 0.2442, "step": 27407 }, { "epoch": 0.9405628002745368, "grad_norm": 0.7585164287080891, "learning_rate": 9.246890471347136e-08, "loss": 0.2402, "step": 27408 }, { "epoch": 0.9405971173644475, "grad_norm": 0.8999120283118394, "learning_rate": 9.236254976652592e-08, "loss": 0.2709, "step": 27409 }, { "epoch": 0.9406314344543583, "grad_norm": 0.8128704260800855, "learning_rate": 9.225625544771855e-08, "loss": 0.2557, "step": 27410 }, { "epoch": 0.940665751544269, "grad_norm": 0.8047792925722145, "learning_rate": 9.215002175836374e-08, "loss": 0.325, "step": 27411 }, { "epoch": 0.9407000686341799, "grad_norm": 0.7630292685995723, "learning_rate": 9.204384869977434e-08, "loss": 0.2474, "step": 27412 }, { "epoch": 0.9407343857240906, "grad_norm": 0.7916381728374774, "learning_rate": 9.193773627325931e-08, "loss": 0.2617, "step": 27413 }, { "epoch": 0.9407687028140014, "grad_norm": 0.7013299936904098, "learning_rate": 9.183168448013258e-08, "loss": 0.2325, "step": 27414 }, { "epoch": 0.9408030199039121, "grad_norm": 0.7368528627660372, "learning_rate": 9.172569332170255e-08, "loss": 0.1946, "step": 27415 }, { "epoch": 0.9408373369938229, "grad_norm": 0.8186547705924099, "learning_rate": 9.161976279927931e-08, "loss": 0.2647, "step": 27416 }, { "epoch": 0.9408716540837337, "grad_norm": 0.7609212338094006, "learning_rate": 9.151389291417123e-08, "loss": 0.2648, "step": 27417 }, { "epoch": 0.9409059711736445, "grad_norm": 0.9788239728946911, "learning_rate": 9.140808366768617e-08, "loss": 0.2836, "step": 27418 }, { "epoch": 0.9409402882635552, "grad_norm": 0.6743360405749315, "learning_rate": 9.13023350611314e-08, "loss": 0.1973, "step": 27419 }, { "epoch": 0.940974605353466, "grad_norm": 0.8320843891172273, "learning_rate": 9.119664709581311e-08, "loss": 0.2266, "step": 27420 }, { "epoch": 0.9410089224433767, "grad_norm": 0.8728072568018526, "learning_rate": 9.109101977303747e-08, "loss": 0.2547, "step": 27421 }, { "epoch": 0.9410432395332876, "grad_norm": 0.882382351961705, "learning_rate": 9.098545309410845e-08, "loss": 0.247, "step": 27422 }, { "epoch": 0.9410775566231984, "grad_norm": 0.7856737202455772, "learning_rate": 9.08799470603311e-08, "loss": 0.2473, "step": 27423 }, { "epoch": 0.9411118737131091, "grad_norm": 0.7843049505794435, "learning_rate": 9.077450167300939e-08, "loss": 0.1955, "step": 27424 }, { "epoch": 0.9411461908030199, "grad_norm": 0.7559635303565666, "learning_rate": 9.066911693344394e-08, "loss": 0.2686, "step": 27425 }, { "epoch": 0.9411805078929307, "grad_norm": 0.7995041741659837, "learning_rate": 9.05637928429387e-08, "loss": 0.3151, "step": 27426 }, { "epoch": 0.9412148249828415, "grad_norm": 0.8157360988010995, "learning_rate": 9.04585294027932e-08, "loss": 0.274, "step": 27427 }, { "epoch": 0.9412491420727522, "grad_norm": 0.9290305083278859, "learning_rate": 9.035332661430918e-08, "loss": 0.2886, "step": 27428 }, { "epoch": 0.941283459162663, "grad_norm": 0.8067475645833487, "learning_rate": 9.024818447878502e-08, "loss": 0.2688, "step": 27429 }, { "epoch": 0.9413177762525737, "grad_norm": 0.7461812037660889, "learning_rate": 9.014310299752083e-08, "loss": 0.2526, "step": 27430 }, { "epoch": 0.9413520933424846, "grad_norm": 0.8405667461147434, "learning_rate": 9.003808217181442e-08, "loss": 0.2879, "step": 27431 }, { "epoch": 0.9413864104323953, "grad_norm": 0.7596285181203157, "learning_rate": 8.993312200296256e-08, "loss": 0.2575, "step": 27432 }, { "epoch": 0.9414207275223061, "grad_norm": 0.7603729158965689, "learning_rate": 8.982822249226253e-08, "loss": 0.2776, "step": 27433 }, { "epoch": 0.9414550446122169, "grad_norm": 0.7946741889518253, "learning_rate": 8.972338364101051e-08, "loss": 0.2392, "step": 27434 }, { "epoch": 0.9414893617021277, "grad_norm": 0.7868802521891916, "learning_rate": 8.961860545050105e-08, "loss": 0.2252, "step": 27435 }, { "epoch": 0.9415236787920385, "grad_norm": 0.7612033602043177, "learning_rate": 8.951388792202808e-08, "loss": 0.2982, "step": 27436 }, { "epoch": 0.9415579958819492, "grad_norm": 0.8615192721941825, "learning_rate": 8.940923105688726e-08, "loss": 0.225, "step": 27437 }, { "epoch": 0.94159231297186, "grad_norm": 0.7982774987584318, "learning_rate": 8.93046348563692e-08, "loss": 0.2544, "step": 27438 }, { "epoch": 0.9416266300617707, "grad_norm": 0.7719977839432037, "learning_rate": 8.920009932176732e-08, "loss": 0.2596, "step": 27439 }, { "epoch": 0.9416609471516816, "grad_norm": 0.777596885505848, "learning_rate": 8.90956244543728e-08, "loss": 0.272, "step": 27440 }, { "epoch": 0.9416952642415923, "grad_norm": 0.8206075892582293, "learning_rate": 8.899121025547631e-08, "loss": 0.2714, "step": 27441 }, { "epoch": 0.9417295813315031, "grad_norm": 0.7408542599159229, "learning_rate": 8.888685672636788e-08, "loss": 0.2728, "step": 27442 }, { "epoch": 0.9417638984214138, "grad_norm": 0.8189320382881439, "learning_rate": 8.87825638683365e-08, "loss": 0.2787, "step": 27443 }, { "epoch": 0.9417982155113246, "grad_norm": 0.793984963788062, "learning_rate": 8.867833168267059e-08, "loss": 0.2159, "step": 27444 }, { "epoch": 0.9418325326012355, "grad_norm": 0.7384727427714518, "learning_rate": 8.857416017065745e-08, "loss": 0.2996, "step": 27445 }, { "epoch": 0.9418668496911462, "grad_norm": 0.8274973387276546, "learning_rate": 8.847004933358438e-08, "loss": 0.2346, "step": 27446 }, { "epoch": 0.941901166781057, "grad_norm": 0.7678928304683066, "learning_rate": 8.836599917273869e-08, "loss": 0.2782, "step": 27447 }, { "epoch": 0.9419354838709677, "grad_norm": 0.8435194763239812, "learning_rate": 8.826200968940323e-08, "loss": 0.2744, "step": 27448 }, { "epoch": 0.9419698009608786, "grad_norm": 0.7002986297857028, "learning_rate": 8.815808088486533e-08, "loss": 0.2297, "step": 27449 }, { "epoch": 0.9420041180507893, "grad_norm": 0.8106900048172079, "learning_rate": 8.80542127604067e-08, "loss": 0.2363, "step": 27450 }, { "epoch": 0.9420384351407001, "grad_norm": 0.8361736829483891, "learning_rate": 8.795040531731191e-08, "loss": 0.3132, "step": 27451 }, { "epoch": 0.9420727522306108, "grad_norm": 0.8053823410113888, "learning_rate": 8.78466585568627e-08, "loss": 0.2923, "step": 27452 }, { "epoch": 0.9421070693205216, "grad_norm": 0.7548544308930784, "learning_rate": 8.774297248034136e-08, "loss": 0.2684, "step": 27453 }, { "epoch": 0.9421413864104324, "grad_norm": 0.74340234028956, "learning_rate": 8.763934708902855e-08, "loss": 0.2234, "step": 27454 }, { "epoch": 0.9421757035003432, "grad_norm": 0.7748836098049061, "learning_rate": 8.75357823842038e-08, "loss": 0.2365, "step": 27455 }, { "epoch": 0.9422100205902539, "grad_norm": 0.7882660069731178, "learning_rate": 8.74322783671483e-08, "loss": 0.2647, "step": 27456 }, { "epoch": 0.9422443376801647, "grad_norm": 0.8263515631581957, "learning_rate": 8.73288350391377e-08, "loss": 0.245, "step": 27457 }, { "epoch": 0.9422786547700756, "grad_norm": 0.7892422292622552, "learning_rate": 8.72254524014532e-08, "loss": 0.2209, "step": 27458 }, { "epoch": 0.9423129718599863, "grad_norm": 0.7796055082371744, "learning_rate": 8.712213045536987e-08, "loss": 0.3236, "step": 27459 }, { "epoch": 0.942347288949897, "grad_norm": 0.7513203188929959, "learning_rate": 8.701886920216507e-08, "loss": 0.2654, "step": 27460 }, { "epoch": 0.9423816060398078, "grad_norm": 0.7362944248879193, "learning_rate": 8.691566864311385e-08, "loss": 0.2215, "step": 27461 }, { "epoch": 0.9424159231297186, "grad_norm": 0.7185385048007896, "learning_rate": 8.681252877949131e-08, "loss": 0.2104, "step": 27462 }, { "epoch": 0.9424502402196294, "grad_norm": 0.7589999917856464, "learning_rate": 8.670944961257199e-08, "loss": 0.2429, "step": 27463 }, { "epoch": 0.9424845573095402, "grad_norm": 0.7885872883988853, "learning_rate": 8.660643114362932e-08, "loss": 0.2546, "step": 27464 }, { "epoch": 0.9425188743994509, "grad_norm": 0.7346058601375047, "learning_rate": 8.650347337393505e-08, "loss": 0.2423, "step": 27465 }, { "epoch": 0.9425531914893617, "grad_norm": 0.8415108852663451, "learning_rate": 8.640057630476262e-08, "loss": 0.2381, "step": 27466 }, { "epoch": 0.9425875085792724, "grad_norm": 0.7577902170384131, "learning_rate": 8.62977399373821e-08, "loss": 0.2487, "step": 27467 }, { "epoch": 0.9426218256691833, "grad_norm": 0.8302418870025382, "learning_rate": 8.619496427306362e-08, "loss": 0.2516, "step": 27468 }, { "epoch": 0.942656142759094, "grad_norm": 0.8394623884433068, "learning_rate": 8.609224931307781e-08, "loss": 0.2611, "step": 27469 }, { "epoch": 0.9426904598490048, "grad_norm": 0.6831707002998298, "learning_rate": 8.598959505869364e-08, "loss": 0.204, "step": 27470 }, { "epoch": 0.9427247769389155, "grad_norm": 0.7442731811778007, "learning_rate": 8.588700151117791e-08, "loss": 0.2861, "step": 27471 }, { "epoch": 0.9427590940288264, "grad_norm": 0.7307233635647798, "learning_rate": 8.578446867180012e-08, "loss": 0.222, "step": 27472 }, { "epoch": 0.9427934111187372, "grad_norm": 0.8176396308498121, "learning_rate": 8.568199654182486e-08, "loss": 0.2247, "step": 27473 }, { "epoch": 0.9428277282086479, "grad_norm": 0.7757302707813188, "learning_rate": 8.55795851225194e-08, "loss": 0.2611, "step": 27474 }, { "epoch": 0.9428620452985587, "grad_norm": 0.7039870729499612, "learning_rate": 8.547723441514832e-08, "loss": 0.2742, "step": 27475 }, { "epoch": 0.9428963623884694, "grad_norm": 0.7940115434306478, "learning_rate": 8.537494442097672e-08, "loss": 0.2974, "step": 27476 }, { "epoch": 0.9429306794783803, "grad_norm": 0.7809216197118272, "learning_rate": 8.52727151412669e-08, "loss": 0.2952, "step": 27477 }, { "epoch": 0.942964996568291, "grad_norm": 0.7282279440750036, "learning_rate": 8.51705465772834e-08, "loss": 0.2112, "step": 27478 }, { "epoch": 0.9429993136582018, "grad_norm": 0.7342261298642153, "learning_rate": 8.506843873028803e-08, "loss": 0.2859, "step": 27479 }, { "epoch": 0.9430336307481125, "grad_norm": 0.8376854273565072, "learning_rate": 8.49663916015403e-08, "loss": 0.2131, "step": 27480 }, { "epoch": 0.9430679478380234, "grad_norm": 0.8115213857615802, "learning_rate": 8.486440519230422e-08, "loss": 0.2214, "step": 27481 }, { "epoch": 0.9431022649279341, "grad_norm": 0.8233776148246252, "learning_rate": 8.476247950383654e-08, "loss": 0.2642, "step": 27482 }, { "epoch": 0.9431365820178449, "grad_norm": 0.8272193427066372, "learning_rate": 8.466061453739849e-08, "loss": 0.2716, "step": 27483 }, { "epoch": 0.9431708991077556, "grad_norm": 0.7357064248403932, "learning_rate": 8.455881029424795e-08, "loss": 0.2307, "step": 27484 }, { "epoch": 0.9432052161976664, "grad_norm": 0.7835922193072055, "learning_rate": 8.445706677564169e-08, "loss": 0.3045, "step": 27485 }, { "epoch": 0.9432395332875773, "grad_norm": 0.8829866134207317, "learning_rate": 8.435538398283815e-08, "loss": 0.2774, "step": 27486 }, { "epoch": 0.943273850377488, "grad_norm": 0.8384811159051844, "learning_rate": 8.425376191709245e-08, "loss": 0.2655, "step": 27487 }, { "epoch": 0.9433081674673988, "grad_norm": 0.8075938032062039, "learning_rate": 8.415220057966079e-08, "loss": 0.2762, "step": 27488 }, { "epoch": 0.9433424845573095, "grad_norm": 0.6894747688334335, "learning_rate": 8.40506999717966e-08, "loss": 0.2243, "step": 27489 }, { "epoch": 0.9433768016472203, "grad_norm": 0.7695296042164051, "learning_rate": 8.394926009475502e-08, "loss": 0.2511, "step": 27490 }, { "epoch": 0.9434111187371311, "grad_norm": 0.752679879602683, "learning_rate": 8.384788094978891e-08, "loss": 0.2939, "step": 27491 }, { "epoch": 0.9434454358270419, "grad_norm": 0.7711246116904852, "learning_rate": 8.374656253815005e-08, "loss": 0.2823, "step": 27492 }, { "epoch": 0.9434797529169526, "grad_norm": 0.8898903836510881, "learning_rate": 8.364530486109079e-08, "loss": 0.2551, "step": 27493 }, { "epoch": 0.9435140700068634, "grad_norm": 0.7633946028219077, "learning_rate": 8.354410791986234e-08, "loss": 0.2762, "step": 27494 }, { "epoch": 0.9435483870967742, "grad_norm": 0.834280413475035, "learning_rate": 8.34429717157137e-08, "loss": 0.271, "step": 27495 }, { "epoch": 0.943582704186685, "grad_norm": 0.848462067403424, "learning_rate": 8.334189624989552e-08, "loss": 0.21, "step": 27496 }, { "epoch": 0.9436170212765957, "grad_norm": 0.7573251086114858, "learning_rate": 8.324088152365573e-08, "loss": 0.304, "step": 27497 }, { "epoch": 0.9436513383665065, "grad_norm": 0.7722090366069617, "learning_rate": 8.313992753824218e-08, "loss": 0.2434, "step": 27498 }, { "epoch": 0.9436856554564172, "grad_norm": 0.856674269311594, "learning_rate": 8.303903429490279e-08, "loss": 0.2444, "step": 27499 }, { "epoch": 0.9437199725463281, "grad_norm": 0.7408243357815452, "learning_rate": 8.29382017948832e-08, "loss": 0.1994, "step": 27500 }, { "epoch": 0.9437542896362389, "grad_norm": 0.7097501878412928, "learning_rate": 8.283743003942968e-08, "loss": 0.2665, "step": 27501 }, { "epoch": 0.9437886067261496, "grad_norm": 0.7239736254588142, "learning_rate": 8.27367190297873e-08, "loss": 0.2554, "step": 27502 }, { "epoch": 0.9438229238160604, "grad_norm": 0.7851742994859213, "learning_rate": 8.263606876719843e-08, "loss": 0.2266, "step": 27503 }, { "epoch": 0.9438572409059712, "grad_norm": 0.7730480420221002, "learning_rate": 8.253547925290928e-08, "loss": 0.2424, "step": 27504 }, { "epoch": 0.943891557995882, "grad_norm": 0.8217302802449226, "learning_rate": 8.243495048815996e-08, "loss": 0.2343, "step": 27505 }, { "epoch": 0.9439258750857927, "grad_norm": 0.7539996308681267, "learning_rate": 8.233448247419395e-08, "loss": 0.2656, "step": 27506 }, { "epoch": 0.9439601921757035, "grad_norm": 0.768564302709036, "learning_rate": 8.223407521225191e-08, "loss": 0.2242, "step": 27507 }, { "epoch": 0.9439945092656142, "grad_norm": 0.8328793451078909, "learning_rate": 8.21337287035745e-08, "loss": 0.2448, "step": 27508 }, { "epoch": 0.9440288263555251, "grad_norm": 0.7670542601791801, "learning_rate": 8.203344294940129e-08, "loss": 0.2645, "step": 27509 }, { "epoch": 0.9440631434454358, "grad_norm": 0.8546957870744942, "learning_rate": 8.193321795097076e-08, "loss": 0.2938, "step": 27510 }, { "epoch": 0.9440974605353466, "grad_norm": 0.8164076668506751, "learning_rate": 8.183305370952188e-08, "loss": 0.246, "step": 27511 }, { "epoch": 0.9441317776252574, "grad_norm": 0.8640849652458596, "learning_rate": 8.173295022629091e-08, "loss": 0.2381, "step": 27512 }, { "epoch": 0.9441660947151681, "grad_norm": 0.7451258248831741, "learning_rate": 8.163290750251518e-08, "loss": 0.2276, "step": 27513 }, { "epoch": 0.944200411805079, "grad_norm": 0.7517930864276955, "learning_rate": 8.153292553943149e-08, "loss": 0.2779, "step": 27514 }, { "epoch": 0.9442347288949897, "grad_norm": 0.825223687803914, "learning_rate": 8.143300433827327e-08, "loss": 0.2479, "step": 27515 }, { "epoch": 0.9442690459849005, "grad_norm": 0.9552860417067071, "learning_rate": 8.133314390027624e-08, "loss": 0.2354, "step": 27516 }, { "epoch": 0.9443033630748112, "grad_norm": 0.766011169598229, "learning_rate": 8.123334422667384e-08, "loss": 0.3091, "step": 27517 }, { "epoch": 0.9443376801647221, "grad_norm": 0.8041505621440136, "learning_rate": 8.113360531869841e-08, "loss": 0.2148, "step": 27518 }, { "epoch": 0.9443719972546328, "grad_norm": 0.8404146211611678, "learning_rate": 8.103392717758229e-08, "loss": 0.2337, "step": 27519 }, { "epoch": 0.9444063143445436, "grad_norm": 0.7651975839986019, "learning_rate": 8.09343098045573e-08, "loss": 0.2877, "step": 27520 }, { "epoch": 0.9444406314344543, "grad_norm": 0.8309928748236152, "learning_rate": 8.083475320085355e-08, "loss": 0.2473, "step": 27521 }, { "epoch": 0.9444749485243651, "grad_norm": 0.7029772292943623, "learning_rate": 8.073525736770115e-08, "loss": 0.2616, "step": 27522 }, { "epoch": 0.944509265614276, "grad_norm": 0.8302142509213909, "learning_rate": 8.063582230633027e-08, "loss": 0.2837, "step": 27523 }, { "epoch": 0.9445435827041867, "grad_norm": 0.7663796156292476, "learning_rate": 8.053644801796712e-08, "loss": 0.2185, "step": 27524 }, { "epoch": 0.9445778997940975, "grad_norm": 0.7375543747017713, "learning_rate": 8.043713450384183e-08, "loss": 0.2348, "step": 27525 }, { "epoch": 0.9446122168840082, "grad_norm": 0.7657256821663404, "learning_rate": 8.033788176517898e-08, "loss": 0.2248, "step": 27526 }, { "epoch": 0.9446465339739191, "grad_norm": 0.811357022179805, "learning_rate": 8.023868980320649e-08, "loss": 0.2693, "step": 27527 }, { "epoch": 0.9446808510638298, "grad_norm": 0.7707469309216511, "learning_rate": 8.013955861914891e-08, "loss": 0.253, "step": 27528 }, { "epoch": 0.9447151681537406, "grad_norm": 0.7170873981115579, "learning_rate": 8.004048821423082e-08, "loss": 0.2567, "step": 27529 }, { "epoch": 0.9447494852436513, "grad_norm": 0.7901399952653225, "learning_rate": 7.994147858967682e-08, "loss": 0.2831, "step": 27530 }, { "epoch": 0.9447838023335621, "grad_norm": 0.7713547309397503, "learning_rate": 7.984252974670925e-08, "loss": 0.2424, "step": 27531 }, { "epoch": 0.9448181194234729, "grad_norm": 0.7888520333657307, "learning_rate": 7.974364168655158e-08, "loss": 0.2303, "step": 27532 }, { "epoch": 0.9448524365133837, "grad_norm": 0.7180088763783787, "learning_rate": 7.964481441042393e-08, "loss": 0.299, "step": 27533 }, { "epoch": 0.9448867536032944, "grad_norm": 0.7765099104640838, "learning_rate": 7.954604791954811e-08, "loss": 0.2779, "step": 27534 }, { "epoch": 0.9449210706932052, "grad_norm": 0.7383069903574634, "learning_rate": 7.944734221514483e-08, "loss": 0.295, "step": 27535 }, { "epoch": 0.9449553877831159, "grad_norm": 0.7403987374065952, "learning_rate": 7.934869729843197e-08, "loss": 0.2402, "step": 27536 }, { "epoch": 0.9449897048730268, "grad_norm": 0.7344104462618545, "learning_rate": 7.925011317063025e-08, "loss": 0.2578, "step": 27537 }, { "epoch": 0.9450240219629376, "grad_norm": 0.776244260639618, "learning_rate": 7.91515898329548e-08, "loss": 0.2065, "step": 27538 }, { "epoch": 0.9450583390528483, "grad_norm": 0.8711572539800002, "learning_rate": 7.905312728662573e-08, "loss": 0.2257, "step": 27539 }, { "epoch": 0.9450926561427591, "grad_norm": 0.7876937379979224, "learning_rate": 7.89547255328571e-08, "loss": 0.2569, "step": 27540 }, { "epoch": 0.9451269732326699, "grad_norm": 0.8112522138609748, "learning_rate": 7.885638457286571e-08, "loss": 0.2833, "step": 27541 }, { "epoch": 0.9451612903225807, "grad_norm": 0.8102510605894403, "learning_rate": 7.875810440786613e-08, "loss": 0.2796, "step": 27542 }, { "epoch": 0.9451956074124914, "grad_norm": 0.8226663893687706, "learning_rate": 7.865988503907295e-08, "loss": 0.2665, "step": 27543 }, { "epoch": 0.9452299245024022, "grad_norm": 0.774510737921083, "learning_rate": 7.856172646769856e-08, "loss": 0.2422, "step": 27544 }, { "epoch": 0.9452642415923129, "grad_norm": 0.7236649157698924, "learning_rate": 7.846362869495638e-08, "loss": 0.2215, "step": 27545 }, { "epoch": 0.9452985586822238, "grad_norm": 0.9455382501508567, "learning_rate": 7.836559172205882e-08, "loss": 0.2718, "step": 27546 }, { "epoch": 0.9453328757721345, "grad_norm": 0.7175776086291594, "learning_rate": 7.826761555021545e-08, "loss": 0.2125, "step": 27547 }, { "epoch": 0.9453671928620453, "grad_norm": 0.8691929158923524, "learning_rate": 7.816970018063808e-08, "loss": 0.2636, "step": 27548 }, { "epoch": 0.945401509951956, "grad_norm": 0.7735353794385996, "learning_rate": 7.807184561453518e-08, "loss": 0.2416, "step": 27549 }, { "epoch": 0.9454358270418668, "grad_norm": 0.8661139703462695, "learning_rate": 7.797405185311634e-08, "loss": 0.2262, "step": 27550 }, { "epoch": 0.9454701441317777, "grad_norm": 0.7898314002176529, "learning_rate": 7.787631889759006e-08, "loss": 0.2449, "step": 27551 }, { "epoch": 0.9455044612216884, "grad_norm": 0.872290073163023, "learning_rate": 7.777864674916258e-08, "loss": 0.2382, "step": 27552 }, { "epoch": 0.9455387783115992, "grad_norm": 0.755415608261911, "learning_rate": 7.768103540904126e-08, "loss": 0.2856, "step": 27553 }, { "epoch": 0.9455730954015099, "grad_norm": 0.6813708866820285, "learning_rate": 7.758348487843238e-08, "loss": 0.2474, "step": 27554 }, { "epoch": 0.9456074124914208, "grad_norm": 0.7439373269664863, "learning_rate": 7.74859951585405e-08, "loss": 0.292, "step": 27555 }, { "epoch": 0.9456417295813315, "grad_norm": 0.9218318869150124, "learning_rate": 7.738856625056912e-08, "loss": 0.3428, "step": 27556 }, { "epoch": 0.9456760466712423, "grad_norm": 0.7308387845429908, "learning_rate": 7.729119815572338e-08, "loss": 0.2447, "step": 27557 }, { "epoch": 0.945710363761153, "grad_norm": 0.8336566416273832, "learning_rate": 7.719389087520568e-08, "loss": 0.2199, "step": 27558 }, { "epoch": 0.9457446808510638, "grad_norm": 0.7938478785073796, "learning_rate": 7.709664441021836e-08, "loss": 0.2675, "step": 27559 }, { "epoch": 0.9457789979409746, "grad_norm": 0.8824144096615303, "learning_rate": 7.699945876196213e-08, "loss": 0.2495, "step": 27560 }, { "epoch": 0.9458133150308854, "grad_norm": 0.7138389433677232, "learning_rate": 7.690233393163771e-08, "loss": 0.2541, "step": 27561 }, { "epoch": 0.9458476321207961, "grad_norm": 0.6970357799180604, "learning_rate": 7.68052699204458e-08, "loss": 0.2732, "step": 27562 }, { "epoch": 0.9458819492107069, "grad_norm": 0.7384441156103784, "learning_rate": 7.670826672958431e-08, "loss": 0.2377, "step": 27563 }, { "epoch": 0.9459162663006178, "grad_norm": 0.9450511015294947, "learning_rate": 7.661132436025232e-08, "loss": 0.2538, "step": 27564 }, { "epoch": 0.9459505833905285, "grad_norm": 0.9499084754145193, "learning_rate": 7.651444281364718e-08, "loss": 0.2229, "step": 27565 }, { "epoch": 0.9459849004804393, "grad_norm": 0.8240299502181933, "learning_rate": 7.641762209096626e-08, "loss": 0.2808, "step": 27566 }, { "epoch": 0.94601921757035, "grad_norm": 0.8568545157460674, "learning_rate": 7.632086219340473e-08, "loss": 0.2524, "step": 27567 }, { "epoch": 0.9460535346602608, "grad_norm": 0.7887731419320702, "learning_rate": 7.622416312215886e-08, "loss": 0.2835, "step": 27568 }, { "epoch": 0.9460878517501716, "grad_norm": 0.8054814853720322, "learning_rate": 7.612752487842323e-08, "loss": 0.2687, "step": 27569 }, { "epoch": 0.9461221688400824, "grad_norm": 0.7942923729779451, "learning_rate": 7.603094746339024e-08, "loss": 0.2638, "step": 27570 }, { "epoch": 0.9461564859299931, "grad_norm": 0.788396186153585, "learning_rate": 7.593443087825558e-08, "loss": 0.2958, "step": 27571 }, { "epoch": 0.9461908030199039, "grad_norm": 0.7163258471874551, "learning_rate": 7.583797512420887e-08, "loss": 0.2831, "step": 27572 }, { "epoch": 0.9462251201098146, "grad_norm": 0.7288951163062358, "learning_rate": 7.57415802024436e-08, "loss": 0.2313, "step": 27573 }, { "epoch": 0.9462594371997255, "grad_norm": 0.8422883244103972, "learning_rate": 7.564524611414991e-08, "loss": 0.2956, "step": 27574 }, { "epoch": 0.9462937542896362, "grad_norm": 0.7803461119688494, "learning_rate": 7.554897286051799e-08, "loss": 0.2588, "step": 27575 }, { "epoch": 0.946328071379547, "grad_norm": 0.874355397816354, "learning_rate": 7.545276044273687e-08, "loss": 0.2217, "step": 27576 }, { "epoch": 0.9463623884694577, "grad_norm": 0.7619602114595398, "learning_rate": 7.535660886199558e-08, "loss": 0.231, "step": 27577 }, { "epoch": 0.9463967055593686, "grad_norm": 0.7228828344099698, "learning_rate": 7.52605181194821e-08, "loss": 0.2942, "step": 27578 }, { "epoch": 0.9464310226492794, "grad_norm": 0.7431284533666852, "learning_rate": 7.516448821638267e-08, "loss": 0.3187, "step": 27579 }, { "epoch": 0.9464653397391901, "grad_norm": 0.8337991575958094, "learning_rate": 7.506851915388414e-08, "loss": 0.217, "step": 27580 }, { "epoch": 0.9464996568291009, "grad_norm": 0.7989595265752714, "learning_rate": 7.497261093317276e-08, "loss": 0.2309, "step": 27581 }, { "epoch": 0.9465339739190116, "grad_norm": 0.8346756326238352, "learning_rate": 7.487676355543205e-08, "loss": 0.3283, "step": 27582 }, { "epoch": 0.9465682910089225, "grad_norm": 0.799700021561983, "learning_rate": 7.478097702184773e-08, "loss": 0.2388, "step": 27583 }, { "epoch": 0.9466026080988332, "grad_norm": 0.7981195596400471, "learning_rate": 7.46852513336016e-08, "loss": 0.2095, "step": 27584 }, { "epoch": 0.946636925188744, "grad_norm": 0.6901785704053186, "learning_rate": 7.45895864918772e-08, "loss": 0.2868, "step": 27585 }, { "epoch": 0.9466712422786547, "grad_norm": 0.7467416256750125, "learning_rate": 7.449398249785523e-08, "loss": 0.2779, "step": 27586 }, { "epoch": 0.9467055593685656, "grad_norm": 0.7348590059479534, "learning_rate": 7.439843935271807e-08, "loss": 0.266, "step": 27587 }, { "epoch": 0.9467398764584763, "grad_norm": 0.7691610842484029, "learning_rate": 7.430295705764535e-08, "loss": 0.2392, "step": 27588 }, { "epoch": 0.9467741935483871, "grad_norm": 0.7664545791251453, "learning_rate": 7.420753561381666e-08, "loss": 0.2848, "step": 27589 }, { "epoch": 0.9468085106382979, "grad_norm": 0.7934578784899918, "learning_rate": 7.411217502241164e-08, "loss": 0.2713, "step": 27590 }, { "epoch": 0.9468428277282086, "grad_norm": 0.6561017549751748, "learning_rate": 7.40168752846071e-08, "loss": 0.1874, "step": 27591 }, { "epoch": 0.9468771448181195, "grad_norm": 0.7083846210673037, "learning_rate": 7.392163640158102e-08, "loss": 0.2291, "step": 27592 }, { "epoch": 0.9469114619080302, "grad_norm": 0.7469933469155693, "learning_rate": 7.382645837450963e-08, "loss": 0.2697, "step": 27593 }, { "epoch": 0.946945778997941, "grad_norm": 0.885644036988191, "learning_rate": 7.373134120456926e-08, "loss": 0.265, "step": 27594 }, { "epoch": 0.9469800960878517, "grad_norm": 0.744176710893774, "learning_rate": 7.363628489293506e-08, "loss": 0.2823, "step": 27595 }, { "epoch": 0.9470144131777625, "grad_norm": 0.7525471028195792, "learning_rate": 7.354128944077998e-08, "loss": 0.2189, "step": 27596 }, { "epoch": 0.9470487302676733, "grad_norm": 0.7709340339284181, "learning_rate": 7.34463548492792e-08, "loss": 0.2474, "step": 27597 }, { "epoch": 0.9470830473575841, "grad_norm": 0.7748937357390764, "learning_rate": 7.33514811196051e-08, "loss": 0.2926, "step": 27598 }, { "epoch": 0.9471173644474948, "grad_norm": 0.7510503390899861, "learning_rate": 7.325666825292898e-08, "loss": 0.2416, "step": 27599 }, { "epoch": 0.9471516815374056, "grad_norm": 0.9528924442774326, "learning_rate": 7.316191625042324e-08, "loss": 0.2306, "step": 27600 }, { "epoch": 0.9471859986273165, "grad_norm": 0.7823276984508688, "learning_rate": 7.306722511325747e-08, "loss": 0.2523, "step": 27601 }, { "epoch": 0.9472203157172272, "grad_norm": 0.7989265066390004, "learning_rate": 7.297259484260189e-08, "loss": 0.2866, "step": 27602 }, { "epoch": 0.947254632807138, "grad_norm": 0.8057570103801605, "learning_rate": 7.287802543962607e-08, "loss": 0.2801, "step": 27603 }, { "epoch": 0.9472889498970487, "grad_norm": 0.8064683521980873, "learning_rate": 7.278351690549745e-08, "loss": 0.256, "step": 27604 }, { "epoch": 0.9473232669869595, "grad_norm": 0.8034565081982863, "learning_rate": 7.268906924138396e-08, "loss": 0.4002, "step": 27605 }, { "epoch": 0.9473575840768703, "grad_norm": 0.7759936413578221, "learning_rate": 7.259468244845247e-08, "loss": 0.2227, "step": 27606 }, { "epoch": 0.9473919011667811, "grad_norm": 0.7362536439730387, "learning_rate": 7.250035652786868e-08, "loss": 0.2363, "step": 27607 }, { "epoch": 0.9474262182566918, "grad_norm": 0.7726180438408773, "learning_rate": 7.24060914807978e-08, "loss": 0.2892, "step": 27608 }, { "epoch": 0.9474605353466026, "grad_norm": 0.7816128288045866, "learning_rate": 7.2311887308405e-08, "loss": 0.2479, "step": 27609 }, { "epoch": 0.9474948524365134, "grad_norm": 0.7835837666534776, "learning_rate": 7.221774401185378e-08, "loss": 0.2593, "step": 27610 }, { "epoch": 0.9475291695264242, "grad_norm": 0.6718718267716927, "learning_rate": 7.212366159230655e-08, "loss": 0.2142, "step": 27611 }, { "epoch": 0.9475634866163349, "grad_norm": 0.8127882865210081, "learning_rate": 7.202964005092683e-08, "loss": 0.296, "step": 27612 }, { "epoch": 0.9475978037062457, "grad_norm": 0.8674299610574167, "learning_rate": 7.193567938887536e-08, "loss": 0.2847, "step": 27613 }, { "epoch": 0.9476321207961564, "grad_norm": 0.80703765495687, "learning_rate": 7.184177960731231e-08, "loss": 0.2575, "step": 27614 }, { "epoch": 0.9476664378860673, "grad_norm": 0.7565477864101101, "learning_rate": 7.174794070739899e-08, "loss": 0.2937, "step": 27615 }, { "epoch": 0.947700754975978, "grad_norm": 0.7508077022461833, "learning_rate": 7.16541626902939e-08, "loss": 0.2636, "step": 27616 }, { "epoch": 0.9477350720658888, "grad_norm": 0.7848446031944565, "learning_rate": 7.156044555715613e-08, "loss": 0.2416, "step": 27617 }, { "epoch": 0.9477693891557996, "grad_norm": 0.7450131495583481, "learning_rate": 7.146678930914253e-08, "loss": 0.2114, "step": 27618 }, { "epoch": 0.9478037062457103, "grad_norm": 0.8217162963670361, "learning_rate": 7.137319394741105e-08, "loss": 0.292, "step": 27619 }, { "epoch": 0.9478380233356212, "grad_norm": 0.9348926260358585, "learning_rate": 7.127965947311688e-08, "loss": 0.2505, "step": 27620 }, { "epoch": 0.9478723404255319, "grad_norm": 0.8404834971499493, "learning_rate": 7.118618588741688e-08, "loss": 0.3187, "step": 27621 }, { "epoch": 0.9479066575154427, "grad_norm": 0.7240142905253739, "learning_rate": 7.109277319146457e-08, "loss": 0.3152, "step": 27622 }, { "epoch": 0.9479409746053534, "grad_norm": 0.7739862561625754, "learning_rate": 7.099942138641514e-08, "loss": 0.2561, "step": 27623 }, { "epoch": 0.9479752916952643, "grad_norm": 0.7180774140541736, "learning_rate": 7.090613047342043e-08, "loss": 0.281, "step": 27624 }, { "epoch": 0.948009608785175, "grad_norm": 0.69419785646623, "learning_rate": 7.081290045363398e-08, "loss": 0.2155, "step": 27625 }, { "epoch": 0.9480439258750858, "grad_norm": 0.7879905027008536, "learning_rate": 7.071973132820764e-08, "loss": 0.2804, "step": 27626 }, { "epoch": 0.9480782429649965, "grad_norm": 0.8525273113272804, "learning_rate": 7.062662309829216e-08, "loss": 0.2266, "step": 27627 }, { "epoch": 0.9481125600549073, "grad_norm": 0.6661549191929149, "learning_rate": 7.053357576503716e-08, "loss": 0.2559, "step": 27628 }, { "epoch": 0.9481468771448182, "grad_norm": 0.7588664337317671, "learning_rate": 7.044058932959286e-08, "loss": 0.2626, "step": 27629 }, { "epoch": 0.9481811942347289, "grad_norm": 0.7417214673545909, "learning_rate": 7.034766379310776e-08, "loss": 0.2559, "step": 27630 }, { "epoch": 0.9482155113246397, "grad_norm": 0.7528402492584265, "learning_rate": 7.025479915672983e-08, "loss": 0.2571, "step": 27631 }, { "epoch": 0.9482498284145504, "grad_norm": 0.7941392859647359, "learning_rate": 7.016199542160595e-08, "loss": 0.2356, "step": 27632 }, { "epoch": 0.9482841455044613, "grad_norm": 0.7339401672241105, "learning_rate": 7.006925258888353e-08, "loss": 0.2749, "step": 27633 }, { "epoch": 0.948318462594372, "grad_norm": 0.8271870809935205, "learning_rate": 6.997657065970775e-08, "loss": 0.2574, "step": 27634 }, { "epoch": 0.9483527796842828, "grad_norm": 0.7135320750857927, "learning_rate": 6.988394963522327e-08, "loss": 0.2621, "step": 27635 }, { "epoch": 0.9483870967741935, "grad_norm": 0.837060824312789, "learning_rate": 6.979138951657472e-08, "loss": 0.2236, "step": 27636 }, { "epoch": 0.9484214138641043, "grad_norm": 0.7925470074918567, "learning_rate": 6.969889030490507e-08, "loss": 0.2166, "step": 27637 }, { "epoch": 0.9484557309540151, "grad_norm": 0.7198565611707669, "learning_rate": 6.960645200135841e-08, "loss": 0.2781, "step": 27638 }, { "epoch": 0.9484900480439259, "grad_norm": 0.7514449360887426, "learning_rate": 6.951407460707549e-08, "loss": 0.2331, "step": 27639 }, { "epoch": 0.9485243651338366, "grad_norm": 0.7999201059491804, "learning_rate": 6.942175812319707e-08, "loss": 0.2344, "step": 27640 }, { "epoch": 0.9485586822237474, "grad_norm": 1.0211090889820438, "learning_rate": 6.932950255086501e-08, "loss": 0.2374, "step": 27641 }, { "epoch": 0.9485929993136581, "grad_norm": 0.8098152893521475, "learning_rate": 6.923730789121841e-08, "loss": 0.2179, "step": 27642 }, { "epoch": 0.948627316403569, "grad_norm": 0.8307744121217499, "learning_rate": 6.914517414539578e-08, "loss": 0.3066, "step": 27643 }, { "epoch": 0.9486616334934798, "grad_norm": 1.022089841304366, "learning_rate": 6.905310131453569e-08, "loss": 0.2441, "step": 27644 }, { "epoch": 0.9486959505833905, "grad_norm": 0.8357740978388254, "learning_rate": 6.896108939977553e-08, "loss": 0.2626, "step": 27645 }, { "epoch": 0.9487302676733013, "grad_norm": 0.7669752389910957, "learning_rate": 6.886913840225218e-08, "loss": 0.2246, "step": 27646 }, { "epoch": 0.9487645847632121, "grad_norm": 0.8431239455186939, "learning_rate": 6.877724832310196e-08, "loss": 0.2519, "step": 27647 }, { "epoch": 0.9487989018531229, "grad_norm": 0.8366536585419828, "learning_rate": 6.86854191634595e-08, "loss": 0.2554, "step": 27648 }, { "epoch": 0.9488332189430336, "grad_norm": 0.7414376102112668, "learning_rate": 6.859365092445835e-08, "loss": 0.269, "step": 27649 }, { "epoch": 0.9488675360329444, "grad_norm": 0.7459477237310528, "learning_rate": 6.850194360723427e-08, "loss": 0.3001, "step": 27650 }, { "epoch": 0.9489018531228551, "grad_norm": 0.855333420868326, "learning_rate": 6.841029721291859e-08, "loss": 0.2647, "step": 27651 }, { "epoch": 0.948936170212766, "grad_norm": 0.7142149288815336, "learning_rate": 6.831871174264426e-08, "loss": 0.2142, "step": 27652 }, { "epoch": 0.9489704873026767, "grad_norm": 0.7502016847186407, "learning_rate": 6.822718719754207e-08, "loss": 0.2421, "step": 27653 }, { "epoch": 0.9490048043925875, "grad_norm": 0.8245288869212244, "learning_rate": 6.813572357874387e-08, "loss": 0.2377, "step": 27654 }, { "epoch": 0.9490391214824982, "grad_norm": 0.6887844382826038, "learning_rate": 6.804432088737822e-08, "loss": 0.2144, "step": 27655 }, { "epoch": 0.9490734385724091, "grad_norm": 0.7717036214121877, "learning_rate": 6.795297912457477e-08, "loss": 0.301, "step": 27656 }, { "epoch": 0.9491077556623199, "grad_norm": 0.8196404724350069, "learning_rate": 6.786169829146206e-08, "loss": 0.2565, "step": 27657 }, { "epoch": 0.9491420727522306, "grad_norm": 0.8340886410627392, "learning_rate": 6.777047838916806e-08, "loss": 0.2481, "step": 27658 }, { "epoch": 0.9491763898421414, "grad_norm": 0.7767775679651169, "learning_rate": 6.767931941881966e-08, "loss": 0.2935, "step": 27659 }, { "epoch": 0.9492107069320521, "grad_norm": 0.7948577300529988, "learning_rate": 6.758822138154209e-08, "loss": 0.2711, "step": 27660 }, { "epoch": 0.949245024021963, "grad_norm": 0.7276717813454927, "learning_rate": 6.749718427846164e-08, "loss": 0.2538, "step": 27661 }, { "epoch": 0.9492793411118737, "grad_norm": 0.8027653523054958, "learning_rate": 6.740620811070297e-08, "loss": 0.271, "step": 27662 }, { "epoch": 0.9493136582017845, "grad_norm": 0.7335378370731145, "learning_rate": 6.73152928793891e-08, "loss": 0.2602, "step": 27663 }, { "epoch": 0.9493479752916952, "grad_norm": 0.8720888256614517, "learning_rate": 6.722443858564465e-08, "loss": 0.3227, "step": 27664 }, { "epoch": 0.949382292381606, "grad_norm": 2.8897516946643553, "learning_rate": 6.713364523059096e-08, "loss": 0.2152, "step": 27665 }, { "epoch": 0.9494166094715168, "grad_norm": 0.7239699964046916, "learning_rate": 6.704291281534936e-08, "loss": 0.2741, "step": 27666 }, { "epoch": 0.9494509265614276, "grad_norm": 0.7901663948034832, "learning_rate": 6.695224134104173e-08, "loss": 0.2532, "step": 27667 }, { "epoch": 0.9494852436513384, "grad_norm": 0.7341270524684067, "learning_rate": 6.686163080878772e-08, "loss": 0.2646, "step": 27668 }, { "epoch": 0.9495195607412491, "grad_norm": 0.7810917616915264, "learning_rate": 6.677108121970699e-08, "loss": 0.2398, "step": 27669 }, { "epoch": 0.94955387783116, "grad_norm": 0.7459270773783566, "learning_rate": 6.668059257491754e-08, "loss": 0.2285, "step": 27670 }, { "epoch": 0.9495881949210707, "grad_norm": 0.7296369566604285, "learning_rate": 6.659016487553849e-08, "loss": 0.2813, "step": 27671 }, { "epoch": 0.9496225120109815, "grad_norm": 0.8744106330854889, "learning_rate": 6.649979812268503e-08, "loss": 0.3145, "step": 27672 }, { "epoch": 0.9496568291008922, "grad_norm": 0.8849863267260225, "learning_rate": 6.640949231747574e-08, "loss": 0.2872, "step": 27673 }, { "epoch": 0.949691146190803, "grad_norm": 0.8674242368174493, "learning_rate": 6.631924746102469e-08, "loss": 0.2927, "step": 27674 }, { "epoch": 0.9497254632807138, "grad_norm": 0.8078854308011624, "learning_rate": 6.622906355444714e-08, "loss": 0.274, "step": 27675 }, { "epoch": 0.9497597803706246, "grad_norm": 0.8248465858581855, "learning_rate": 6.613894059885717e-08, "loss": 0.2773, "step": 27676 }, { "epoch": 0.9497940974605353, "grad_norm": 0.8159786284620203, "learning_rate": 6.60488785953689e-08, "loss": 0.2829, "step": 27677 }, { "epoch": 0.9498284145504461, "grad_norm": 0.8410714177822831, "learning_rate": 6.595887754509367e-08, "loss": 0.2548, "step": 27678 }, { "epoch": 0.949862731640357, "grad_norm": 0.8153368480952274, "learning_rate": 6.586893744914391e-08, "loss": 0.2236, "step": 27679 }, { "epoch": 0.9498970487302677, "grad_norm": 0.698589124976031, "learning_rate": 6.577905830863151e-08, "loss": 0.1945, "step": 27680 }, { "epoch": 0.9499313658201785, "grad_norm": 0.8366605783979114, "learning_rate": 6.568924012466505e-08, "loss": 0.2972, "step": 27681 }, { "epoch": 0.9499656829100892, "grad_norm": 0.8123318495228007, "learning_rate": 6.559948289835639e-08, "loss": 0.3156, "step": 27682 }, { "epoch": 0.95, "grad_norm": 0.6525162317567116, "learning_rate": 6.550978663081187e-08, "loss": 0.241, "step": 27683 }, { "epoch": 0.9500343170899108, "grad_norm": 0.7430725984406689, "learning_rate": 6.542015132314172e-08, "loss": 0.2864, "step": 27684 }, { "epoch": 0.9500686341798216, "grad_norm": 0.7519886005045017, "learning_rate": 6.533057697645285e-08, "loss": 0.2785, "step": 27685 }, { "epoch": 0.9501029512697323, "grad_norm": 0.7662580158546842, "learning_rate": 6.524106359185045e-08, "loss": 0.264, "step": 27686 }, { "epoch": 0.9501372683596431, "grad_norm": 0.7429000043302588, "learning_rate": 6.5151611170442e-08, "loss": 0.2921, "step": 27687 }, { "epoch": 0.9501715854495538, "grad_norm": 0.7670850146050281, "learning_rate": 6.506221971333159e-08, "loss": 0.2645, "step": 27688 }, { "epoch": 0.9502059025394647, "grad_norm": 0.7724466841100347, "learning_rate": 6.497288922162392e-08, "loss": 0.2406, "step": 27689 }, { "epoch": 0.9502402196293754, "grad_norm": 0.7333300976141076, "learning_rate": 6.488361969642254e-08, "loss": 0.2345, "step": 27690 }, { "epoch": 0.9502745367192862, "grad_norm": 0.8593986278052437, "learning_rate": 6.479441113882989e-08, "loss": 0.3089, "step": 27691 }, { "epoch": 0.9503088538091969, "grad_norm": 0.8012199197205584, "learning_rate": 6.470526354994899e-08, "loss": 0.2295, "step": 27692 }, { "epoch": 0.9503431708991078, "grad_norm": 0.7619122366840761, "learning_rate": 6.461617693088007e-08, "loss": 0.2563, "step": 27693 }, { "epoch": 0.9503774879890186, "grad_norm": 0.8120702629461942, "learning_rate": 6.452715128272502e-08, "loss": 0.3142, "step": 27694 }, { "epoch": 0.9504118050789293, "grad_norm": 0.7321888174060281, "learning_rate": 6.443818660658185e-08, "loss": 0.2243, "step": 27695 }, { "epoch": 0.9504461221688401, "grad_norm": 0.8447184095499892, "learning_rate": 6.434928290355136e-08, "loss": 0.2426, "step": 27696 }, { "epoch": 0.9504804392587508, "grad_norm": 0.7882328785280026, "learning_rate": 6.426044017473043e-08, "loss": 0.2618, "step": 27697 }, { "epoch": 0.9505147563486617, "grad_norm": 0.8073320500925024, "learning_rate": 6.417165842121764e-08, "loss": 0.3161, "step": 27698 }, { "epoch": 0.9505490734385724, "grad_norm": 0.73965230911815, "learning_rate": 6.408293764410933e-08, "loss": 0.2079, "step": 27699 }, { "epoch": 0.9505833905284832, "grad_norm": 0.7224016048878705, "learning_rate": 6.39942778445013e-08, "loss": 0.2149, "step": 27700 }, { "epoch": 0.9506177076183939, "grad_norm": 0.7300582777945485, "learning_rate": 6.390567902348933e-08, "loss": 0.3239, "step": 27701 }, { "epoch": 0.9506520247083048, "grad_norm": 0.7048918285822944, "learning_rate": 6.381714118216808e-08, "loss": 0.2235, "step": 27702 }, { "epoch": 0.9506863417982155, "grad_norm": 0.8995239462355559, "learning_rate": 6.372866432163117e-08, "loss": 0.2241, "step": 27703 }, { "epoch": 0.9507206588881263, "grad_norm": 0.8221392972935722, "learning_rate": 6.364024844297046e-08, "loss": 0.2777, "step": 27704 }, { "epoch": 0.950754975978037, "grad_norm": 0.7983640417149707, "learning_rate": 6.355189354728009e-08, "loss": 0.2896, "step": 27705 }, { "epoch": 0.9507892930679478, "grad_norm": 0.7046414198802958, "learning_rate": 6.34635996356503e-08, "loss": 0.21, "step": 27706 }, { "epoch": 0.9508236101578587, "grad_norm": 0.7651215338482853, "learning_rate": 6.337536670917243e-08, "loss": 0.2665, "step": 27707 }, { "epoch": 0.9508579272477694, "grad_norm": 0.8351182894265821, "learning_rate": 6.328719476893674e-08, "loss": 0.2529, "step": 27708 }, { "epoch": 0.9508922443376802, "grad_norm": 0.7730588590711889, "learning_rate": 6.31990838160318e-08, "loss": 0.2568, "step": 27709 }, { "epoch": 0.9509265614275909, "grad_norm": 0.7712422138253359, "learning_rate": 6.311103385154615e-08, "loss": 0.2293, "step": 27710 }, { "epoch": 0.9509608785175017, "grad_norm": 0.7842288354131659, "learning_rate": 6.302304487656841e-08, "loss": 0.2425, "step": 27711 }, { "epoch": 0.9509951956074125, "grad_norm": 0.7794418641710549, "learning_rate": 6.293511689218434e-08, "loss": 0.2716, "step": 27712 }, { "epoch": 0.9510295126973233, "grad_norm": 0.7957817478153105, "learning_rate": 6.284724989948144e-08, "loss": 0.2671, "step": 27713 }, { "epoch": 0.951063829787234, "grad_norm": 0.7192518282257426, "learning_rate": 6.275944389954381e-08, "loss": 0.2488, "step": 27714 }, { "epoch": 0.9510981468771448, "grad_norm": 0.7007884805980378, "learning_rate": 6.267169889345781e-08, "loss": 0.2651, "step": 27715 }, { "epoch": 0.9511324639670556, "grad_norm": 0.7724856400950529, "learning_rate": 6.258401488230592e-08, "loss": 0.2415, "step": 27716 }, { "epoch": 0.9511667810569664, "grad_norm": 0.789453747336476, "learning_rate": 6.249639186717282e-08, "loss": 0.2743, "step": 27717 }, { "epoch": 0.9512010981468771, "grad_norm": 0.8424399494764094, "learning_rate": 6.240882984913988e-08, "loss": 0.3034, "step": 27718 }, { "epoch": 0.9512354152367879, "grad_norm": 0.7777984926389396, "learning_rate": 6.232132882928954e-08, "loss": 0.2445, "step": 27719 }, { "epoch": 0.9512697323266986, "grad_norm": 0.7341341436141946, "learning_rate": 6.223388880870152e-08, "loss": 0.2623, "step": 27720 }, { "epoch": 0.9513040494166095, "grad_norm": 0.7617345843705366, "learning_rate": 6.214650978845771e-08, "loss": 0.2747, "step": 27721 }, { "epoch": 0.9513383665065203, "grad_norm": 0.7184382952176162, "learning_rate": 6.205919176963615e-08, "loss": 0.2672, "step": 27722 }, { "epoch": 0.951372683596431, "grad_norm": 0.8597556349037748, "learning_rate": 6.197193475331653e-08, "loss": 0.2778, "step": 27723 }, { "epoch": 0.9514070006863418, "grad_norm": 0.9236254238498145, "learning_rate": 6.188473874057687e-08, "loss": 0.2723, "step": 27724 }, { "epoch": 0.9514413177762526, "grad_norm": 0.913980563111465, "learning_rate": 6.179760373249355e-08, "loss": 0.2547, "step": 27725 }, { "epoch": 0.9514756348661634, "grad_norm": 0.7203520149031967, "learning_rate": 6.171052973014346e-08, "loss": 0.2768, "step": 27726 }, { "epoch": 0.9515099519560741, "grad_norm": 0.8257166316077996, "learning_rate": 6.162351673460187e-08, "loss": 0.2619, "step": 27727 }, { "epoch": 0.9515442690459849, "grad_norm": 0.8204735589810151, "learning_rate": 6.153656474694514e-08, "loss": 0.3023, "step": 27728 }, { "epoch": 0.9515785861358956, "grad_norm": 0.7524793797929001, "learning_rate": 6.144967376824573e-08, "loss": 0.2156, "step": 27729 }, { "epoch": 0.9516129032258065, "grad_norm": 0.9363213907099756, "learning_rate": 6.13628437995778e-08, "loss": 0.3699, "step": 27730 }, { "epoch": 0.9516472203157172, "grad_norm": 0.7570728167876571, "learning_rate": 6.127607484201437e-08, "loss": 0.3067, "step": 27731 }, { "epoch": 0.951681537405628, "grad_norm": 0.8898618299271314, "learning_rate": 6.118936689662736e-08, "loss": 0.2303, "step": 27732 }, { "epoch": 0.9517158544955387, "grad_norm": 0.7640906261692556, "learning_rate": 6.110271996448702e-08, "loss": 0.2561, "step": 27733 }, { "epoch": 0.9517501715854495, "grad_norm": 0.7080560382161469, "learning_rate": 6.101613404666417e-08, "loss": 0.2505, "step": 27734 }, { "epoch": 0.9517844886753604, "grad_norm": 0.8182929386793724, "learning_rate": 6.092960914422908e-08, "loss": 0.3119, "step": 27735 }, { "epoch": 0.9518188057652711, "grad_norm": 0.7355669957111227, "learning_rate": 6.084314525825031e-08, "loss": 0.2301, "step": 27736 }, { "epoch": 0.9518531228551819, "grad_norm": 0.8368369071374485, "learning_rate": 6.07567423897959e-08, "loss": 0.286, "step": 27737 }, { "epoch": 0.9518874399450926, "grad_norm": 0.7036933645579735, "learning_rate": 6.06704005399339e-08, "loss": 0.2307, "step": 27738 }, { "epoch": 0.9519217570350035, "grad_norm": 0.7443413626171564, "learning_rate": 6.058411970972955e-08, "loss": 0.2477, "step": 27739 }, { "epoch": 0.9519560741249142, "grad_norm": 0.6651166298713247, "learning_rate": 6.049789990025034e-08, "loss": 0.2043, "step": 27740 }, { "epoch": 0.951990391214825, "grad_norm": 0.8391946228027238, "learning_rate": 6.041174111255988e-08, "loss": 0.2678, "step": 27741 }, { "epoch": 0.9520247083047357, "grad_norm": 0.6610426490190772, "learning_rate": 6.032564334772395e-08, "loss": 0.2233, "step": 27742 }, { "epoch": 0.9520590253946465, "grad_norm": 0.7587778901042072, "learning_rate": 6.023960660680506e-08, "loss": 0.2778, "step": 27743 }, { "epoch": 0.9520933424845573, "grad_norm": 0.718227956581436, "learning_rate": 6.015363089086734e-08, "loss": 0.2883, "step": 27744 }, { "epoch": 0.9521276595744681, "grad_norm": 0.8259943465480873, "learning_rate": 6.006771620097162e-08, "loss": 0.2763, "step": 27745 }, { "epoch": 0.9521619766643789, "grad_norm": 0.8134198941887508, "learning_rate": 5.998186253817983e-08, "loss": 0.2903, "step": 27746 }, { "epoch": 0.9521962937542896, "grad_norm": 0.8201240656197657, "learning_rate": 5.989606990355279e-08, "loss": 0.242, "step": 27747 }, { "epoch": 0.9522306108442005, "grad_norm": 0.7600093892615191, "learning_rate": 5.98103382981502e-08, "loss": 0.2308, "step": 27748 }, { "epoch": 0.9522649279341112, "grad_norm": 0.7045936775906202, "learning_rate": 5.972466772303175e-08, "loss": 0.2307, "step": 27749 }, { "epoch": 0.952299245024022, "grad_norm": 0.7884922747588217, "learning_rate": 5.963905817925441e-08, "loss": 0.2481, "step": 27750 }, { "epoch": 0.9523335621139327, "grad_norm": 0.7144208644146398, "learning_rate": 5.955350966787676e-08, "loss": 0.2529, "step": 27751 }, { "epoch": 0.9523678792038435, "grad_norm": 0.6857212394719537, "learning_rate": 5.9468022189956284e-08, "loss": 0.2076, "step": 27752 }, { "epoch": 0.9524021962937543, "grad_norm": 0.7973607998992233, "learning_rate": 5.938259574654714e-08, "loss": 0.2337, "step": 27753 }, { "epoch": 0.9524365133836651, "grad_norm": 0.8142952122640271, "learning_rate": 5.929723033870682e-08, "loss": 0.2351, "step": 27754 }, { "epoch": 0.9524708304735758, "grad_norm": 0.8208964009802393, "learning_rate": 5.921192596748837e-08, "loss": 0.2431, "step": 27755 }, { "epoch": 0.9525051475634866, "grad_norm": 0.7368089984478948, "learning_rate": 5.912668263394594e-08, "loss": 0.2553, "step": 27756 }, { "epoch": 0.9525394646533973, "grad_norm": 0.8206184818935737, "learning_rate": 5.904150033913314e-08, "loss": 0.293, "step": 27757 }, { "epoch": 0.9525737817433082, "grad_norm": 0.7038136443162805, "learning_rate": 5.89563790841019e-08, "loss": 0.248, "step": 27758 }, { "epoch": 0.952608098833219, "grad_norm": 0.9028638083186676, "learning_rate": 5.887131886990416e-08, "loss": 0.281, "step": 27759 }, { "epoch": 0.9526424159231297, "grad_norm": 0.719100239679979, "learning_rate": 5.878631969759019e-08, "loss": 0.2583, "step": 27760 }, { "epoch": 0.9526767330130405, "grad_norm": 0.8149097952755546, "learning_rate": 5.8701381568210814e-08, "loss": 0.2952, "step": 27761 }, { "epoch": 0.9527110501029513, "grad_norm": 0.7019084426621826, "learning_rate": 5.861650448281408e-08, "loss": 0.2855, "step": 27762 }, { "epoch": 0.9527453671928621, "grad_norm": 0.6659144857982104, "learning_rate": 5.853168844245027e-08, "loss": 0.2894, "step": 27763 }, { "epoch": 0.9527796842827728, "grad_norm": 0.7124389819036226, "learning_rate": 5.8446933448165763e-08, "loss": 0.2375, "step": 27764 }, { "epoch": 0.9528140013726836, "grad_norm": 0.8285755209117176, "learning_rate": 5.8362239501008054e-08, "loss": 0.2628, "step": 27765 }, { "epoch": 0.9528483184625943, "grad_norm": 0.8130196967224177, "learning_rate": 5.8277606602023526e-08, "loss": 0.2787, "step": 27766 }, { "epoch": 0.9528826355525052, "grad_norm": 0.7614213249105052, "learning_rate": 5.8193034752258015e-08, "loss": 0.3143, "step": 27767 }, { "epoch": 0.9529169526424159, "grad_norm": 0.8058523000716102, "learning_rate": 5.810852395275568e-08, "loss": 0.3392, "step": 27768 }, { "epoch": 0.9529512697323267, "grad_norm": 0.7439375405026563, "learning_rate": 5.802407420456069e-08, "loss": 0.2288, "step": 27769 }, { "epoch": 0.9529855868222374, "grad_norm": 0.8561197494215392, "learning_rate": 5.793968550871665e-08, "loss": 0.2617, "step": 27770 }, { "epoch": 0.9530199039121483, "grad_norm": 0.7564441987729786, "learning_rate": 5.785535786626551e-08, "loss": 0.2473, "step": 27771 }, { "epoch": 0.953054221002059, "grad_norm": 0.7020294314161377, "learning_rate": 5.777109127825031e-08, "loss": 0.2237, "step": 27772 }, { "epoch": 0.9530885380919698, "grad_norm": 0.7902671069943691, "learning_rate": 5.76868857457108e-08, "loss": 0.3247, "step": 27773 }, { "epoch": 0.9531228551818806, "grad_norm": 0.8454092489846633, "learning_rate": 5.7602741269687234e-08, "loss": 0.2837, "step": 27774 }, { "epoch": 0.9531571722717913, "grad_norm": 0.7784688073879779, "learning_rate": 5.7518657851219904e-08, "loss": 0.2573, "step": 27775 }, { "epoch": 0.9531914893617022, "grad_norm": 0.7407085645908807, "learning_rate": 5.7434635491347425e-08, "loss": 0.2513, "step": 27776 }, { "epoch": 0.9532258064516129, "grad_norm": 0.7889087121159357, "learning_rate": 5.735067419110729e-08, "loss": 0.2139, "step": 27777 }, { "epoch": 0.9532601235415237, "grad_norm": 0.8151258211186451, "learning_rate": 5.726677395153646e-08, "loss": 0.3305, "step": 27778 }, { "epoch": 0.9532944406314344, "grad_norm": 0.8137944450731761, "learning_rate": 5.7182934773672425e-08, "loss": 0.3109, "step": 27779 }, { "epoch": 0.9533287577213452, "grad_norm": 0.7216651717350673, "learning_rate": 5.709915665855048e-08, "loss": 0.2772, "step": 27780 }, { "epoch": 0.953363074811256, "grad_norm": 0.7211990213194457, "learning_rate": 5.70154396072059e-08, "loss": 0.2181, "step": 27781 }, { "epoch": 0.9533973919011668, "grad_norm": 0.8468663245157196, "learning_rate": 5.693178362067231e-08, "loss": 0.2583, "step": 27782 }, { "epoch": 0.9534317089910775, "grad_norm": 0.8753340484265999, "learning_rate": 5.684818869998332e-08, "loss": 0.2485, "step": 27783 }, { "epoch": 0.9534660260809883, "grad_norm": 0.7934485732656328, "learning_rate": 5.676465484617255e-08, "loss": 0.2599, "step": 27784 }, { "epoch": 0.9535003431708992, "grad_norm": 0.7965472452405313, "learning_rate": 5.668118206027029e-08, "loss": 0.2536, "step": 27785 }, { "epoch": 0.9535346602608099, "grad_norm": 0.7439157156802136, "learning_rate": 5.6597770343309046e-08, "loss": 0.2424, "step": 27786 }, { "epoch": 0.9535689773507207, "grad_norm": 0.7472103871691709, "learning_rate": 5.65144196963191e-08, "loss": 0.2356, "step": 27787 }, { "epoch": 0.9536032944406314, "grad_norm": 0.8374354153908856, "learning_rate": 5.643113012032964e-08, "loss": 0.262, "step": 27788 }, { "epoch": 0.9536376115305422, "grad_norm": 0.7575038606178249, "learning_rate": 5.634790161636983e-08, "loss": 0.2976, "step": 27789 }, { "epoch": 0.953671928620453, "grad_norm": 0.7755228337283331, "learning_rate": 5.6264734185468295e-08, "loss": 0.26, "step": 27790 }, { "epoch": 0.9537062457103638, "grad_norm": 0.6888688297914746, "learning_rate": 5.6181627828652e-08, "loss": 0.2021, "step": 27791 }, { "epoch": 0.9537405628002745, "grad_norm": 0.8192702707896072, "learning_rate": 5.6098582546947336e-08, "loss": 0.2603, "step": 27792 }, { "epoch": 0.9537748798901853, "grad_norm": 0.765983539572937, "learning_rate": 5.6015598341381814e-08, "loss": 0.2707, "step": 27793 }, { "epoch": 0.9538091969800961, "grad_norm": 0.7524856186759312, "learning_rate": 5.593267521297796e-08, "loss": 0.1843, "step": 27794 }, { "epoch": 0.9538435140700069, "grad_norm": 0.7379900444340979, "learning_rate": 5.5849813162762725e-08, "loss": 0.2237, "step": 27795 }, { "epoch": 0.9538778311599176, "grad_norm": 0.7428573805627515, "learning_rate": 5.576701219175862e-08, "loss": 0.2646, "step": 27796 }, { "epoch": 0.9539121482498284, "grad_norm": 0.9134066876741281, "learning_rate": 5.568427230098816e-08, "loss": 0.2971, "step": 27797 }, { "epoch": 0.9539464653397391, "grad_norm": 0.7615350548404833, "learning_rate": 5.5601593491474426e-08, "loss": 0.3431, "step": 27798 }, { "epoch": 0.95398078242965, "grad_norm": 0.808479447084196, "learning_rate": 5.551897576423826e-08, "loss": 0.2864, "step": 27799 }, { "epoch": 0.9540150995195608, "grad_norm": 0.8642333630554153, "learning_rate": 5.543641912030051e-08, "loss": 0.2671, "step": 27800 }, { "epoch": 0.9540494166094715, "grad_norm": 0.8418668445567331, "learning_rate": 5.5353923560680925e-08, "loss": 0.2365, "step": 27801 }, { "epoch": 0.9540837336993823, "grad_norm": 0.90093683606626, "learning_rate": 5.527148908639868e-08, "loss": 0.2531, "step": 27802 }, { "epoch": 0.954118050789293, "grad_norm": 0.8248083652354303, "learning_rate": 5.518911569847241e-08, "loss": 0.2935, "step": 27803 }, { "epoch": 0.9541523678792039, "grad_norm": 0.7990869386434588, "learning_rate": 5.510680339791907e-08, "loss": 0.2621, "step": 27804 }, { "epoch": 0.9541866849691146, "grad_norm": 0.8480396864869856, "learning_rate": 5.5024552185756755e-08, "loss": 0.2928, "step": 27805 }, { "epoch": 0.9542210020590254, "grad_norm": 0.7848231105120828, "learning_rate": 5.4942362063000186e-08, "loss": 0.3278, "step": 27806 }, { "epoch": 0.9542553191489361, "grad_norm": 0.7670528579328791, "learning_rate": 5.486023303066579e-08, "loss": 0.2665, "step": 27807 }, { "epoch": 0.954289636238847, "grad_norm": 0.8134593288767715, "learning_rate": 5.477816508976719e-08, "loss": 0.2553, "step": 27808 }, { "epoch": 0.9543239533287577, "grad_norm": 0.8894116602801813, "learning_rate": 5.469615824131913e-08, "loss": 0.2882, "step": 27809 }, { "epoch": 0.9543582704186685, "grad_norm": 0.7211891047521661, "learning_rate": 5.461421248633414e-08, "loss": 0.2895, "step": 27810 }, { "epoch": 0.9543925875085792, "grad_norm": 0.8028156081002804, "learning_rate": 5.453232782582529e-08, "loss": 0.2197, "step": 27811 }, { "epoch": 0.95442690459849, "grad_norm": 0.9072160568731014, "learning_rate": 5.44505042608029e-08, "loss": 0.287, "step": 27812 }, { "epoch": 0.9544612216884009, "grad_norm": 0.8241738979846805, "learning_rate": 5.436874179227947e-08, "loss": 0.3389, "step": 27813 }, { "epoch": 0.9544955387783116, "grad_norm": 0.7894329163483865, "learning_rate": 5.4287040421263645e-08, "loss": 0.2498, "step": 27814 }, { "epoch": 0.9545298558682224, "grad_norm": 0.7858847285808659, "learning_rate": 5.420540014876519e-08, "loss": 0.2332, "step": 27815 }, { "epoch": 0.9545641729581331, "grad_norm": 0.7657100868589077, "learning_rate": 5.4123820975793275e-08, "loss": 0.2508, "step": 27816 }, { "epoch": 0.954598490048044, "grad_norm": 0.8038446150202287, "learning_rate": 5.4042302903354324e-08, "loss": 0.2696, "step": 27817 }, { "epoch": 0.9546328071379547, "grad_norm": 0.6981144497403259, "learning_rate": 5.3960845932456984e-08, "loss": 0.248, "step": 27818 }, { "epoch": 0.9546671242278655, "grad_norm": 0.8223800753072952, "learning_rate": 5.3879450064107106e-08, "loss": 0.3963, "step": 27819 }, { "epoch": 0.9547014413177762, "grad_norm": 0.8601289422902364, "learning_rate": 5.379811529930945e-08, "loss": 0.256, "step": 27820 }, { "epoch": 0.954735758407687, "grad_norm": 1.0922610686915337, "learning_rate": 5.371684163906987e-08, "loss": 0.3003, "step": 27821 }, { "epoch": 0.9547700754975978, "grad_norm": 0.8026911948789482, "learning_rate": 5.363562908439146e-08, "loss": 0.2206, "step": 27822 }, { "epoch": 0.9548043925875086, "grad_norm": 0.7427295153947217, "learning_rate": 5.355447763627841e-08, "loss": 0.2343, "step": 27823 }, { "epoch": 0.9548387096774194, "grad_norm": 0.7958069217598138, "learning_rate": 5.347338729573215e-08, "loss": 0.2466, "step": 27824 }, { "epoch": 0.9548730267673301, "grad_norm": 0.7589207000507258, "learning_rate": 5.3392358063755754e-08, "loss": 0.2606, "step": 27825 }, { "epoch": 0.9549073438572409, "grad_norm": 0.7889390803407678, "learning_rate": 5.331138994134899e-08, "loss": 0.2592, "step": 27826 }, { "epoch": 0.9549416609471517, "grad_norm": 0.7984530610232755, "learning_rate": 5.323048292951327e-08, "loss": 0.238, "step": 27827 }, { "epoch": 0.9549759780370625, "grad_norm": 0.7790007772110519, "learning_rate": 5.3149637029247806e-08, "loss": 0.2581, "step": 27828 }, { "epoch": 0.9550102951269732, "grad_norm": 0.7902333659228267, "learning_rate": 5.3068852241550115e-08, "loss": 0.2651, "step": 27829 }, { "epoch": 0.955044612216884, "grad_norm": 0.8067803386889426, "learning_rate": 5.298812856741997e-08, "loss": 0.2778, "step": 27830 }, { "epoch": 0.9550789293067948, "grad_norm": 0.7077484715363751, "learning_rate": 5.2907466007853234e-08, "loss": 0.2555, "step": 27831 }, { "epoch": 0.9551132463967056, "grad_norm": 0.713267532806739, "learning_rate": 5.282686456384745e-08, "loss": 0.2217, "step": 27832 }, { "epoch": 0.9551475634866163, "grad_norm": 0.7570351089234845, "learning_rate": 5.2746324236397914e-08, "loss": 0.2238, "step": 27833 }, { "epoch": 0.9551818805765271, "grad_norm": 0.7012158327123802, "learning_rate": 5.266584502649941e-08, "loss": 0.1901, "step": 27834 }, { "epoch": 0.9552161976664378, "grad_norm": 0.7760701848701757, "learning_rate": 5.2585426935146675e-08, "loss": 0.2263, "step": 27835 }, { "epoch": 0.9552505147563487, "grad_norm": 0.8666677176436335, "learning_rate": 5.250506996333227e-08, "loss": 0.2471, "step": 27836 }, { "epoch": 0.9552848318462595, "grad_norm": 0.7844370110748535, "learning_rate": 5.242477411205038e-08, "loss": 0.2507, "step": 27837 }, { "epoch": 0.9553191489361702, "grad_norm": 0.8506524301557341, "learning_rate": 5.234453938229134e-08, "loss": 0.3403, "step": 27838 }, { "epoch": 0.955353466026081, "grad_norm": 0.8817758554281239, "learning_rate": 5.226436577504768e-08, "loss": 0.2727, "step": 27839 }, { "epoch": 0.9553877831159918, "grad_norm": 0.7589847498859649, "learning_rate": 5.2184253291308604e-08, "loss": 0.2592, "step": 27840 }, { "epoch": 0.9554221002059026, "grad_norm": 0.7288032638103923, "learning_rate": 5.210420193206445e-08, "loss": 0.2409, "step": 27841 }, { "epoch": 0.9554564172958133, "grad_norm": 0.7176302270601608, "learning_rate": 5.202421169830496e-08, "loss": 0.2194, "step": 27842 }, { "epoch": 0.9554907343857241, "grad_norm": 0.8433873249070111, "learning_rate": 5.194428259101658e-08, "loss": 0.3214, "step": 27843 }, { "epoch": 0.9555250514756348, "grad_norm": 0.7897242503247098, "learning_rate": 5.186441461118907e-08, "loss": 0.2582, "step": 27844 }, { "epoch": 0.9555593685655457, "grad_norm": 0.8796747099443104, "learning_rate": 5.1784607759806646e-08, "loss": 0.298, "step": 27845 }, { "epoch": 0.9555936856554564, "grad_norm": 0.8071788529885792, "learning_rate": 5.170486203785685e-08, "loss": 0.2328, "step": 27846 }, { "epoch": 0.9556280027453672, "grad_norm": 0.8132628618084932, "learning_rate": 5.1625177446323895e-08, "loss": 0.2597, "step": 27847 }, { "epoch": 0.9556623198352779, "grad_norm": 0.7947644887597899, "learning_rate": 5.15455539861931e-08, "loss": 0.2529, "step": 27848 }, { "epoch": 0.9556966369251887, "grad_norm": 0.8655579677568097, "learning_rate": 5.1465991658447566e-08, "loss": 0.3285, "step": 27849 }, { "epoch": 0.9557309540150996, "grad_norm": 0.8643494564054227, "learning_rate": 5.138649046406985e-08, "loss": 0.274, "step": 27850 }, { "epoch": 0.9557652711050103, "grad_norm": 0.8479058885013457, "learning_rate": 5.130705040404305e-08, "loss": 0.285, "step": 27851 }, { "epoch": 0.9557995881949211, "grad_norm": 0.8671479689268554, "learning_rate": 5.1227671479347484e-08, "loss": 0.2795, "step": 27852 }, { "epoch": 0.9558339052848318, "grad_norm": 0.7234183748453249, "learning_rate": 5.114835369096516e-08, "loss": 0.2283, "step": 27853 }, { "epoch": 0.9558682223747427, "grad_norm": 0.7326434871963218, "learning_rate": 5.106909703987417e-08, "loss": 0.2682, "step": 27854 }, { "epoch": 0.9559025394646534, "grad_norm": 0.6587574686151241, "learning_rate": 5.0989901527055406e-08, "loss": 0.2573, "step": 27855 }, { "epoch": 0.9559368565545642, "grad_norm": 0.8112641734814283, "learning_rate": 5.0910767153485864e-08, "loss": 0.29, "step": 27856 }, { "epoch": 0.9559711736444749, "grad_norm": 0.857816459835919, "learning_rate": 5.083169392014364e-08, "loss": 0.2211, "step": 27857 }, { "epoch": 0.9560054907343857, "grad_norm": 0.7381004749053747, "learning_rate": 5.07526818280063e-08, "loss": 0.2679, "step": 27858 }, { "epoch": 0.9560398078242965, "grad_norm": 0.7755222513746263, "learning_rate": 5.067373087804861e-08, "loss": 0.2531, "step": 27859 }, { "epoch": 0.9560741249142073, "grad_norm": 0.8236452373627084, "learning_rate": 5.0594841071247015e-08, "loss": 0.3147, "step": 27860 }, { "epoch": 0.956108442004118, "grad_norm": 0.7646039686608471, "learning_rate": 5.051601240857573e-08, "loss": 0.2428, "step": 27861 }, { "epoch": 0.9561427590940288, "grad_norm": 0.7538784668421424, "learning_rate": 5.043724489100843e-08, "loss": 0.2416, "step": 27862 }, { "epoch": 0.9561770761839397, "grad_norm": 0.8579444462791141, "learning_rate": 5.035853851951877e-08, "loss": 0.3085, "step": 27863 }, { "epoch": 0.9562113932738504, "grad_norm": 0.7576093385766643, "learning_rate": 5.027989329507765e-08, "loss": 0.237, "step": 27864 }, { "epoch": 0.9562457103637612, "grad_norm": 0.7681812177874537, "learning_rate": 5.020130921865873e-08, "loss": 0.2495, "step": 27865 }, { "epoch": 0.9562800274536719, "grad_norm": 0.769541249178135, "learning_rate": 5.012278629123124e-08, "loss": 0.2003, "step": 27866 }, { "epoch": 0.9563143445435827, "grad_norm": 0.8898199906373575, "learning_rate": 5.004432451376551e-08, "loss": 0.2516, "step": 27867 }, { "epoch": 0.9563486616334935, "grad_norm": 0.643262680348212, "learning_rate": 4.9965923887231315e-08, "loss": 0.2453, "step": 27868 }, { "epoch": 0.9563829787234043, "grad_norm": 0.884308013829999, "learning_rate": 4.988758441259678e-08, "loss": 0.2563, "step": 27869 }, { "epoch": 0.956417295813315, "grad_norm": 1.228029752989749, "learning_rate": 4.9809306090830014e-08, "loss": 0.2978, "step": 27870 }, { "epoch": 0.9564516129032258, "grad_norm": 0.7616826131727823, "learning_rate": 4.973108892289802e-08, "loss": 0.26, "step": 27871 }, { "epoch": 0.9564859299931365, "grad_norm": 0.7621428511666722, "learning_rate": 4.9652932909766695e-08, "loss": 0.2671, "step": 27872 }, { "epoch": 0.9565202470830474, "grad_norm": 0.7296297452946265, "learning_rate": 4.957483805240193e-08, "loss": 0.2718, "step": 27873 }, { "epoch": 0.9565545641729581, "grad_norm": 0.7837938904400702, "learning_rate": 4.949680435176907e-08, "loss": 0.2194, "step": 27874 }, { "epoch": 0.9565888812628689, "grad_norm": 0.7948576995659342, "learning_rate": 4.9418831808830117e-08, "loss": 0.2576, "step": 27875 }, { "epoch": 0.9566231983527796, "grad_norm": 0.7827930133993083, "learning_rate": 4.9340920424550965e-08, "loss": 0.2469, "step": 27876 }, { "epoch": 0.9566575154426905, "grad_norm": 0.7518058734645008, "learning_rate": 4.9263070199892515e-08, "loss": 0.2618, "step": 27877 }, { "epoch": 0.9566918325326013, "grad_norm": 0.8054407269262482, "learning_rate": 4.918528113581622e-08, "loss": 0.2586, "step": 27878 }, { "epoch": 0.956726149622512, "grad_norm": 0.7673023138759456, "learning_rate": 4.910755323328409e-08, "loss": 0.2423, "step": 27879 }, { "epoch": 0.9567604667124228, "grad_norm": 0.7250326609173255, "learning_rate": 4.9029886493255905e-08, "loss": 0.2265, "step": 27880 }, { "epoch": 0.9567947838023335, "grad_norm": 0.7690483153889939, "learning_rate": 4.8952280916691465e-08, "loss": 0.2272, "step": 27881 }, { "epoch": 0.9568291008922444, "grad_norm": 0.8052707238451944, "learning_rate": 4.887473650454888e-08, "loss": 0.2689, "step": 27882 }, { "epoch": 0.9568634179821551, "grad_norm": 0.8021874555820693, "learning_rate": 4.8797253257786836e-08, "loss": 0.2727, "step": 27883 }, { "epoch": 0.9568977350720659, "grad_norm": 1.3125090292819135, "learning_rate": 4.871983117736179e-08, "loss": 0.2628, "step": 27884 }, { "epoch": 0.9569320521619766, "grad_norm": 0.8238263988644172, "learning_rate": 4.864247026423075e-08, "loss": 0.3062, "step": 27885 }, { "epoch": 0.9569663692518875, "grad_norm": 0.7526012872957354, "learning_rate": 4.8565170519349616e-08, "loss": 0.2609, "step": 27886 }, { "epoch": 0.9570006863417982, "grad_norm": 0.7428283982352732, "learning_rate": 4.848793194367207e-08, "loss": 0.2295, "step": 27887 }, { "epoch": 0.957035003431709, "grad_norm": 0.7937258940820549, "learning_rate": 4.841075453815403e-08, "loss": 0.2766, "step": 27888 }, { "epoch": 0.9570693205216197, "grad_norm": 0.8771347151041706, "learning_rate": 4.8333638303748045e-08, "loss": 0.2573, "step": 27889 }, { "epoch": 0.9571036376115305, "grad_norm": 0.7652392363208194, "learning_rate": 4.8256583241406695e-08, "loss": 0.2196, "step": 27890 }, { "epoch": 0.9571379547014414, "grad_norm": 0.8618577462590884, "learning_rate": 4.8179589352082e-08, "loss": 0.3089, "step": 27891 }, { "epoch": 0.9571722717913521, "grad_norm": 0.7332892990063764, "learning_rate": 4.810265663672542e-08, "loss": 0.2833, "step": 27892 }, { "epoch": 0.9572065888812629, "grad_norm": 0.7701066707398374, "learning_rate": 4.80257850962873e-08, "loss": 0.2693, "step": 27893 }, { "epoch": 0.9572409059711736, "grad_norm": 0.7598627572261734, "learning_rate": 4.794897473171689e-08, "loss": 0.2503, "step": 27894 }, { "epoch": 0.9572752230610844, "grad_norm": 0.7313975117149722, "learning_rate": 4.787222554396342e-08, "loss": 0.2359, "step": 27895 }, { "epoch": 0.9573095401509952, "grad_norm": 0.7667474603553933, "learning_rate": 4.779553753397503e-08, "loss": 0.2649, "step": 27896 }, { "epoch": 0.957343857240906, "grad_norm": 0.8032780127055337, "learning_rate": 4.77189107026993e-08, "loss": 0.2302, "step": 27897 }, { "epoch": 0.9573781743308167, "grad_norm": 0.7817560907435153, "learning_rate": 4.764234505108212e-08, "loss": 0.212, "step": 27898 }, { "epoch": 0.9574124914207275, "grad_norm": 0.7488483874282401, "learning_rate": 4.7565840580069966e-08, "loss": 0.2069, "step": 27899 }, { "epoch": 0.9574468085106383, "grad_norm": 0.7848543650366364, "learning_rate": 4.7489397290608196e-08, "loss": 0.2501, "step": 27900 }, { "epoch": 0.9574811256005491, "grad_norm": 0.8089038598491778, "learning_rate": 4.74130151836405e-08, "loss": 0.2339, "step": 27901 }, { "epoch": 0.9575154426904599, "grad_norm": 0.7652320867134769, "learning_rate": 4.7336694260111117e-08, "loss": 0.2732, "step": 27902 }, { "epoch": 0.9575497597803706, "grad_norm": 0.7336390903951507, "learning_rate": 4.7260434520962625e-08, "loss": 0.2665, "step": 27903 }, { "epoch": 0.9575840768702814, "grad_norm": 0.8460650646962103, "learning_rate": 4.718423596713706e-08, "loss": 0.2565, "step": 27904 }, { "epoch": 0.9576183939601922, "grad_norm": 0.814518747720563, "learning_rate": 4.710809859957533e-08, "loss": 0.2696, "step": 27905 }, { "epoch": 0.957652711050103, "grad_norm": 0.6687262632299901, "learning_rate": 4.703202241921945e-08, "loss": 0.2369, "step": 27906 }, { "epoch": 0.9576870281400137, "grad_norm": 0.797839996379085, "learning_rate": 4.6956007427007566e-08, "loss": 0.3395, "step": 27907 }, { "epoch": 0.9577213452299245, "grad_norm": 0.767184810052355, "learning_rate": 4.688005362388004e-08, "loss": 0.2467, "step": 27908 }, { "epoch": 0.9577556623198353, "grad_norm": 0.7963547254981465, "learning_rate": 4.680416101077445e-08, "loss": 0.2463, "step": 27909 }, { "epoch": 0.9577899794097461, "grad_norm": 0.7451280727930943, "learning_rate": 4.672832958862838e-08, "loss": 0.2326, "step": 27910 }, { "epoch": 0.9578242964996568, "grad_norm": 0.7812462561708713, "learning_rate": 4.6652559358379424e-08, "loss": 0.3041, "step": 27911 }, { "epoch": 0.9578586135895676, "grad_norm": 0.8239127086338891, "learning_rate": 4.657685032096293e-08, "loss": 0.2943, "step": 27912 }, { "epoch": 0.9578929306794783, "grad_norm": 0.9218730315845967, "learning_rate": 4.650120247731371e-08, "loss": 0.2637, "step": 27913 }, { "epoch": 0.9579272477693892, "grad_norm": 0.8915385736693532, "learning_rate": 4.6425615828367684e-08, "loss": 0.262, "step": 27914 }, { "epoch": 0.9579615648593, "grad_norm": 0.8351478592784881, "learning_rate": 4.6350090375057444e-08, "loss": 0.2233, "step": 27915 }, { "epoch": 0.9579958819492107, "grad_norm": 0.7689594265417613, "learning_rate": 4.6274626118316125e-08, "loss": 0.2454, "step": 27916 }, { "epoch": 0.9580301990391215, "grad_norm": 0.8485966234854198, "learning_rate": 4.619922305907687e-08, "loss": 0.2706, "step": 27917 }, { "epoch": 0.9580645161290322, "grad_norm": 0.7330280008062986, "learning_rate": 4.61238811982706e-08, "loss": 0.2206, "step": 27918 }, { "epoch": 0.9580988332189431, "grad_norm": 0.8487986869699956, "learning_rate": 4.604860053682769e-08, "loss": 0.228, "step": 27919 }, { "epoch": 0.9581331503088538, "grad_norm": 0.8855024769129238, "learning_rate": 4.5973381075679055e-08, "loss": 0.2852, "step": 27920 }, { "epoch": 0.9581674673987646, "grad_norm": 0.8236976121994263, "learning_rate": 4.58982228157534e-08, "loss": 0.2452, "step": 27921 }, { "epoch": 0.9582017844886753, "grad_norm": 0.7522208623480893, "learning_rate": 4.5823125757979427e-08, "loss": 0.2328, "step": 27922 }, { "epoch": 0.9582361015785862, "grad_norm": 0.7889068794360395, "learning_rate": 4.5748089903284166e-08, "loss": 0.2614, "step": 27923 }, { "epoch": 0.9582704186684969, "grad_norm": 0.7941903832131245, "learning_rate": 4.567311525259521e-08, "loss": 0.2338, "step": 27924 }, { "epoch": 0.9583047357584077, "grad_norm": 0.7902584110164576, "learning_rate": 4.559820180683905e-08, "loss": 0.2781, "step": 27925 }, { "epoch": 0.9583390528483184, "grad_norm": 0.7927477048462046, "learning_rate": 4.552334956694104e-08, "loss": 0.2541, "step": 27926 }, { "epoch": 0.9583733699382292, "grad_norm": 0.8176400136055365, "learning_rate": 4.544855853382546e-08, "loss": 0.2542, "step": 27927 }, { "epoch": 0.95840768702814, "grad_norm": 0.7397788302527112, "learning_rate": 4.5373828708416003e-08, "loss": 0.2704, "step": 27928 }, { "epoch": 0.9584420041180508, "grad_norm": 0.7451759554978098, "learning_rate": 4.5299160091636927e-08, "loss": 0.244, "step": 27929 }, { "epoch": 0.9584763212079616, "grad_norm": 0.8414160559900077, "learning_rate": 4.5224552684410283e-08, "loss": 0.2472, "step": 27930 }, { "epoch": 0.9585106382978723, "grad_norm": 0.7149138327797961, "learning_rate": 4.5150006487657e-08, "loss": 0.2882, "step": 27931 }, { "epoch": 0.9585449553877832, "grad_norm": 0.7503949480429367, "learning_rate": 4.50755215022991e-08, "loss": 0.208, "step": 27932 }, { "epoch": 0.9585792724776939, "grad_norm": 0.7690245969434232, "learning_rate": 4.500109772925587e-08, "loss": 0.2386, "step": 27933 }, { "epoch": 0.9586135895676047, "grad_norm": 0.8856457752031134, "learning_rate": 4.492673516944712e-08, "loss": 0.2992, "step": 27934 }, { "epoch": 0.9586479066575154, "grad_norm": 0.7130592163737136, "learning_rate": 4.485243382379157e-08, "loss": 0.2286, "step": 27935 }, { "epoch": 0.9586822237474262, "grad_norm": 0.8956040657036438, "learning_rate": 4.477819369320735e-08, "loss": 0.2389, "step": 27936 }, { "epoch": 0.958716540837337, "grad_norm": 0.847302116664209, "learning_rate": 4.470401477861097e-08, "loss": 0.2846, "step": 27937 }, { "epoch": 0.9587508579272478, "grad_norm": 0.8125131781081679, "learning_rate": 4.462989708091892e-08, "loss": 0.2421, "step": 27938 }, { "epoch": 0.9587851750171585, "grad_norm": 0.8465203091150042, "learning_rate": 4.455584060104767e-08, "loss": 0.3124, "step": 27939 }, { "epoch": 0.9588194921070693, "grad_norm": 0.6936313197366681, "learning_rate": 4.448184533991151e-08, "loss": 0.2435, "step": 27940 }, { "epoch": 0.95885380919698, "grad_norm": 0.7293759903856308, "learning_rate": 4.4407911298424144e-08, "loss": 0.2477, "step": 27941 }, { "epoch": 0.9588881262868909, "grad_norm": 0.7604767969176988, "learning_rate": 4.4334038477499284e-08, "loss": 0.3345, "step": 27942 }, { "epoch": 0.9589224433768017, "grad_norm": 0.7367299953000731, "learning_rate": 4.426022687805065e-08, "loss": 0.2289, "step": 27943 }, { "epoch": 0.9589567604667124, "grad_norm": 0.7439561716725742, "learning_rate": 4.418647650098806e-08, "loss": 0.2186, "step": 27944 }, { "epoch": 0.9589910775566232, "grad_norm": 0.7904572079713378, "learning_rate": 4.4112787347223575e-08, "loss": 0.2597, "step": 27945 }, { "epoch": 0.959025394646534, "grad_norm": 0.9388031963691584, "learning_rate": 4.403915941766867e-08, "loss": 0.3134, "step": 27946 }, { "epoch": 0.9590597117364448, "grad_norm": 0.794320562108385, "learning_rate": 4.396559271323098e-08, "loss": 0.256, "step": 27947 }, { "epoch": 0.9590940288263555, "grad_norm": 0.6965538941869249, "learning_rate": 4.38920872348203e-08, "loss": 0.2646, "step": 27948 }, { "epoch": 0.9591283459162663, "grad_norm": 0.8556701828934271, "learning_rate": 4.3818642983344816e-08, "loss": 0.2869, "step": 27949 }, { "epoch": 0.959162663006177, "grad_norm": 0.7431858865462974, "learning_rate": 4.3745259959711575e-08, "loss": 0.2996, "step": 27950 }, { "epoch": 0.9591969800960879, "grad_norm": 0.8134449016486369, "learning_rate": 4.367193816482707e-08, "loss": 0.258, "step": 27951 }, { "epoch": 0.9592312971859986, "grad_norm": 0.7899243512091942, "learning_rate": 4.359867759959724e-08, "loss": 0.2786, "step": 27952 }, { "epoch": 0.9592656142759094, "grad_norm": 0.827240758303401, "learning_rate": 4.3525478264927475e-08, "loss": 0.2371, "step": 27953 }, { "epoch": 0.9592999313658201, "grad_norm": 0.7366902549499436, "learning_rate": 4.3452340161721506e-08, "loss": 0.2159, "step": 27954 }, { "epoch": 0.959334248455731, "grad_norm": 0.7368370365775713, "learning_rate": 4.3379263290883597e-08, "loss": 0.2312, "step": 27955 }, { "epoch": 0.9593685655456418, "grad_norm": 0.7313400923103524, "learning_rate": 4.330624765331526e-08, "loss": 0.2259, "step": 27956 }, { "epoch": 0.9594028826355525, "grad_norm": 0.8602010756778993, "learning_rate": 4.323329324991965e-08, "loss": 0.308, "step": 27957 }, { "epoch": 0.9594371997254633, "grad_norm": 0.9644976985397243, "learning_rate": 4.316040008159772e-08, "loss": 0.2816, "step": 27958 }, { "epoch": 0.959471516815374, "grad_norm": 0.7452953789074142, "learning_rate": 4.308756814924986e-08, "loss": 0.2295, "step": 27959 }, { "epoch": 0.9595058339052849, "grad_norm": 0.7420151672930932, "learning_rate": 4.30147974537759e-08, "loss": 0.2869, "step": 27960 }, { "epoch": 0.9595401509951956, "grad_norm": 0.7301614299366646, "learning_rate": 4.2942087996074575e-08, "loss": 0.2681, "step": 27961 }, { "epoch": 0.9595744680851064, "grad_norm": 0.7339015469741244, "learning_rate": 4.2869439777045165e-08, "loss": 0.2569, "step": 27962 }, { "epoch": 0.9596087851750171, "grad_norm": 0.7274094226191833, "learning_rate": 4.279685279758305e-08, "loss": 0.2717, "step": 27963 }, { "epoch": 0.9596431022649279, "grad_norm": 0.6837994029009784, "learning_rate": 4.2724327058587536e-08, "loss": 0.2601, "step": 27964 }, { "epoch": 0.9596774193548387, "grad_norm": 0.6976582035305332, "learning_rate": 4.2651862560952327e-08, "loss": 0.2806, "step": 27965 }, { "epoch": 0.9597117364447495, "grad_norm": 0.845116487017866, "learning_rate": 4.25794593055745e-08, "loss": 0.2917, "step": 27966 }, { "epoch": 0.9597460535346602, "grad_norm": 0.8162951740789619, "learning_rate": 4.250711729334778e-08, "loss": 0.2769, "step": 27967 }, { "epoch": 0.959780370624571, "grad_norm": 0.7280509138651616, "learning_rate": 4.243483652516478e-08, "loss": 0.269, "step": 27968 }, { "epoch": 0.9598146877144819, "grad_norm": 0.7361000814813163, "learning_rate": 4.236261700192035e-08, "loss": 0.2602, "step": 27969 }, { "epoch": 0.9598490048043926, "grad_norm": 0.7502531729045743, "learning_rate": 4.229045872450543e-08, "loss": 0.2673, "step": 27970 }, { "epoch": 0.9598833218943034, "grad_norm": 0.7211237559544836, "learning_rate": 4.221836169381155e-08, "loss": 0.2959, "step": 27971 }, { "epoch": 0.9599176389842141, "grad_norm": 0.7315692371090209, "learning_rate": 4.2146325910729646e-08, "loss": 0.2891, "step": 27972 }, { "epoch": 0.9599519560741249, "grad_norm": 0.752671160272771, "learning_rate": 4.207435137614957e-08, "loss": 0.2435, "step": 27973 }, { "epoch": 0.9599862731640357, "grad_norm": 0.812765546329439, "learning_rate": 4.200243809096061e-08, "loss": 0.2391, "step": 27974 }, { "epoch": 0.9600205902539465, "grad_norm": 0.7919805079286381, "learning_rate": 4.1930586056050937e-08, "loss": 0.2574, "step": 27975 }, { "epoch": 0.9600549073438572, "grad_norm": 0.8347393538812143, "learning_rate": 4.185879527230874e-08, "loss": 0.2675, "step": 27976 }, { "epoch": 0.960089224433768, "grad_norm": 0.8262603474257996, "learning_rate": 4.178706574061997e-08, "loss": 0.2568, "step": 27977 }, { "epoch": 0.9601235415236788, "grad_norm": 0.8544846314605807, "learning_rate": 4.17153974618717e-08, "loss": 0.2595, "step": 27978 }, { "epoch": 0.9601578586135896, "grad_norm": 0.734010506384503, "learning_rate": 4.164379043694877e-08, "loss": 0.2587, "step": 27979 }, { "epoch": 0.9601921757035004, "grad_norm": 0.7177496511080877, "learning_rate": 4.157224466673604e-08, "loss": 0.2618, "step": 27980 }, { "epoch": 0.9602264927934111, "grad_norm": 0.7943190581412912, "learning_rate": 4.150076015211668e-08, "loss": 0.2168, "step": 27981 }, { "epoch": 0.9602608098833219, "grad_norm": 0.7088488775318054, "learning_rate": 4.142933689397499e-08, "loss": 0.2602, "step": 27982 }, { "epoch": 0.9602951269732327, "grad_norm": 0.7713429580987021, "learning_rate": 4.135797489319249e-08, "loss": 0.2472, "step": 27983 }, { "epoch": 0.9603294440631435, "grad_norm": 0.7938462021706795, "learning_rate": 4.1286674150650686e-08, "loss": 0.2602, "step": 27984 }, { "epoch": 0.9603637611530542, "grad_norm": 0.7793340364337545, "learning_rate": 4.121543466723166e-08, "loss": 0.2378, "step": 27985 }, { "epoch": 0.960398078242965, "grad_norm": 0.7567401935551524, "learning_rate": 4.114425644381359e-08, "loss": 0.3114, "step": 27986 }, { "epoch": 0.9604323953328757, "grad_norm": 0.830862806404337, "learning_rate": 4.1073139481277445e-08, "loss": 0.2629, "step": 27987 }, { "epoch": 0.9604667124227866, "grad_norm": 0.8230390224948498, "learning_rate": 4.1002083780500854e-08, "loss": 0.244, "step": 27988 }, { "epoch": 0.9605010295126973, "grad_norm": 0.7489311052260389, "learning_rate": 4.093108934236145e-08, "loss": 0.2695, "step": 27989 }, { "epoch": 0.9605353466026081, "grad_norm": 0.8308581431703354, "learning_rate": 4.086015616773742e-08, "loss": 0.317, "step": 27990 }, { "epoch": 0.9605696636925188, "grad_norm": 0.7205864456485206, "learning_rate": 4.078928425750417e-08, "loss": 0.2698, "step": 27991 }, { "epoch": 0.9606039807824297, "grad_norm": 0.7779339486282586, "learning_rate": 4.071847361253711e-08, "loss": 0.2648, "step": 27992 }, { "epoch": 0.9606382978723405, "grad_norm": 0.759909826000933, "learning_rate": 4.064772423371166e-08, "loss": 0.2608, "step": 27993 }, { "epoch": 0.9606726149622512, "grad_norm": 0.7392403960390797, "learning_rate": 4.057703612190156e-08, "loss": 0.2781, "step": 27994 }, { "epoch": 0.960706932052162, "grad_norm": 0.7975088409208984, "learning_rate": 4.050640927797944e-08, "loss": 0.2562, "step": 27995 }, { "epoch": 0.9607412491420727, "grad_norm": 0.7958099644508586, "learning_rate": 4.0435843702819055e-08, "loss": 0.3222, "step": 27996 }, { "epoch": 0.9607755662319836, "grad_norm": 0.7842352733400055, "learning_rate": 4.036533939729137e-08, "loss": 0.3811, "step": 27997 }, { "epoch": 0.9608098833218943, "grad_norm": 0.8254262206632949, "learning_rate": 4.0294896362267354e-08, "loss": 0.2417, "step": 27998 }, { "epoch": 0.9608442004118051, "grad_norm": 0.7600710294614554, "learning_rate": 4.022451459861798e-08, "loss": 0.2601, "step": 27999 }, { "epoch": 0.9608785175017158, "grad_norm": 1.0173191757597069, "learning_rate": 4.015419410721144e-08, "loss": 0.3151, "step": 28000 }, { "epoch": 0.9609128345916267, "grad_norm": 0.7587468139026486, "learning_rate": 4.008393488891815e-08, "loss": 0.2607, "step": 28001 }, { "epoch": 0.9609471516815374, "grad_norm": 0.8681928524295434, "learning_rate": 4.001373694460464e-08, "loss": 0.2281, "step": 28002 }, { "epoch": 0.9609814687714482, "grad_norm": 0.6858616435566671, "learning_rate": 3.994360027513855e-08, "loss": 0.2629, "step": 28003 }, { "epoch": 0.9610157858613589, "grad_norm": 0.7130779677882234, "learning_rate": 3.9873524881386404e-08, "loss": 0.2362, "step": 28004 }, { "epoch": 0.9610501029512697, "grad_norm": 0.8729289365678993, "learning_rate": 3.9803510764213625e-08, "loss": 0.2512, "step": 28005 }, { "epoch": 0.9610844200411806, "grad_norm": 0.674789755132331, "learning_rate": 3.973355792448563e-08, "loss": 0.2734, "step": 28006 }, { "epoch": 0.9611187371310913, "grad_norm": 0.7455006097961456, "learning_rate": 3.966366636306673e-08, "loss": 0.2491, "step": 28007 }, { "epoch": 0.9611530542210021, "grad_norm": 0.7401060386691865, "learning_rate": 3.959383608082012e-08, "loss": 0.2545, "step": 28008 }, { "epoch": 0.9611873713109128, "grad_norm": 0.6930344338897287, "learning_rate": 3.9524067078607895e-08, "loss": 0.2721, "step": 28009 }, { "epoch": 0.9612216884008236, "grad_norm": 0.8542604420205363, "learning_rate": 3.945435935729325e-08, "loss": 0.3107, "step": 28010 }, { "epoch": 0.9612560054907344, "grad_norm": 0.849760602649085, "learning_rate": 3.9384712917736044e-08, "loss": 0.2259, "step": 28011 }, { "epoch": 0.9612903225806452, "grad_norm": 0.8834584189746324, "learning_rate": 3.931512776079671e-08, "loss": 0.2589, "step": 28012 }, { "epoch": 0.9613246396705559, "grad_norm": 0.848079237729852, "learning_rate": 3.9245603887336226e-08, "loss": 0.2973, "step": 28013 }, { "epoch": 0.9613589567604667, "grad_norm": 0.6679223515043324, "learning_rate": 3.917614129821223e-08, "loss": 0.2543, "step": 28014 }, { "epoch": 0.9613932738503775, "grad_norm": 0.7815176712425003, "learning_rate": 3.9106739994282935e-08, "loss": 0.2731, "step": 28015 }, { "epoch": 0.9614275909402883, "grad_norm": 0.9068426514971426, "learning_rate": 3.903739997640654e-08, "loss": 0.2887, "step": 28016 }, { "epoch": 0.961461908030199, "grad_norm": 0.8714577108147572, "learning_rate": 3.896812124543903e-08, "loss": 0.2059, "step": 28017 }, { "epoch": 0.9614962251201098, "grad_norm": 0.8784809533968312, "learning_rate": 3.8898903802236374e-08, "loss": 0.2419, "step": 28018 }, { "epoch": 0.9615305422100205, "grad_norm": 0.8360134277976652, "learning_rate": 3.882974764765346e-08, "loss": 0.2608, "step": 28019 }, { "epoch": 0.9615648592999314, "grad_norm": 0.6427876236000872, "learning_rate": 3.8760652782545146e-08, "loss": 0.2167, "step": 28020 }, { "epoch": 0.9615991763898422, "grad_norm": 0.8941014001332417, "learning_rate": 3.869161920776465e-08, "loss": 0.2515, "step": 28021 }, { "epoch": 0.9616334934797529, "grad_norm": 0.7308477945873958, "learning_rate": 3.862264692416462e-08, "loss": 0.2451, "step": 28022 }, { "epoch": 0.9616678105696637, "grad_norm": 0.8324823437225174, "learning_rate": 3.85537359325977e-08, "loss": 0.2603, "step": 28023 }, { "epoch": 0.9617021276595744, "grad_norm": 0.9066779891101516, "learning_rate": 3.8484886233914885e-08, "loss": 0.2569, "step": 28024 }, { "epoch": 0.9617364447494853, "grad_norm": 0.7789113393492005, "learning_rate": 3.84160978289666e-08, "loss": 0.259, "step": 28025 }, { "epoch": 0.961770761839396, "grad_norm": 0.8345907381621932, "learning_rate": 3.8347370718602726e-08, "loss": 0.2569, "step": 28026 }, { "epoch": 0.9618050789293068, "grad_norm": 0.8600546626196243, "learning_rate": 3.827870490367258e-08, "loss": 0.2705, "step": 28027 }, { "epoch": 0.9618393960192175, "grad_norm": 0.7612019703191624, "learning_rate": 3.821010038502382e-08, "loss": 0.266, "step": 28028 }, { "epoch": 0.9618737131091284, "grad_norm": 0.7259519511836997, "learning_rate": 3.814155716350465e-08, "loss": 0.2248, "step": 28029 }, { "epoch": 0.9619080301990391, "grad_norm": 0.7396312208483572, "learning_rate": 3.807307523996162e-08, "loss": 0.2687, "step": 28030 }, { "epoch": 0.9619423472889499, "grad_norm": 0.830845280687568, "learning_rate": 3.8004654615240724e-08, "loss": 0.2502, "step": 28031 }, { "epoch": 0.9619766643788606, "grad_norm": 0.7875522731286315, "learning_rate": 3.793629529018683e-08, "loss": 0.2598, "step": 28032 }, { "epoch": 0.9620109814687714, "grad_norm": 0.718901592713039, "learning_rate": 3.786799726564538e-08, "loss": 0.2109, "step": 28033 }, { "epoch": 0.9620452985586823, "grad_norm": 0.792667431026426, "learning_rate": 3.77997605424596e-08, "loss": 0.2611, "step": 28034 }, { "epoch": 0.962079615648593, "grad_norm": 0.7418863445677445, "learning_rate": 3.773158512147157e-08, "loss": 0.2878, "step": 28035 }, { "epoch": 0.9621139327385038, "grad_norm": 0.8622780158603165, "learning_rate": 3.766347100352508e-08, "loss": 0.2251, "step": 28036 }, { "epoch": 0.9621482498284145, "grad_norm": 0.9518869390196696, "learning_rate": 3.759541818946055e-08, "loss": 0.2672, "step": 28037 }, { "epoch": 0.9621825669183254, "grad_norm": 0.7988217744982579, "learning_rate": 3.752742668011955e-08, "loss": 0.287, "step": 28038 }, { "epoch": 0.9622168840082361, "grad_norm": 0.8602562328879271, "learning_rate": 3.745949647634084e-08, "loss": 0.2612, "step": 28039 }, { "epoch": 0.9622512010981469, "grad_norm": 0.9240910379101382, "learning_rate": 3.739162757896431e-08, "loss": 0.2681, "step": 28040 }, { "epoch": 0.9622855181880576, "grad_norm": 0.7825545345786697, "learning_rate": 3.732381998882873e-08, "loss": 0.2159, "step": 28041 }, { "epoch": 0.9623198352779684, "grad_norm": 0.8408960007827438, "learning_rate": 3.72560737067712e-08, "loss": 0.2606, "step": 28042 }, { "epoch": 0.9623541523678792, "grad_norm": 0.8418169212771225, "learning_rate": 3.71883887336294e-08, "loss": 0.2633, "step": 28043 }, { "epoch": 0.96238846945779, "grad_norm": 0.8270427084609169, "learning_rate": 3.712076507023821e-08, "loss": 0.2661, "step": 28044 }, { "epoch": 0.9624227865477007, "grad_norm": 0.7839520849118548, "learning_rate": 3.7053202717434735e-08, "loss": 0.2274, "step": 28045 }, { "epoch": 0.9624571036376115, "grad_norm": 0.8631097293597959, "learning_rate": 3.69857016760522e-08, "loss": 0.2281, "step": 28046 }, { "epoch": 0.9624914207275223, "grad_norm": 0.7815371290847233, "learning_rate": 3.691826194692494e-08, "loss": 0.2656, "step": 28047 }, { "epoch": 0.9625257378174331, "grad_norm": 0.7012753700968474, "learning_rate": 3.6850883530886174e-08, "loss": 0.2185, "step": 28048 }, { "epoch": 0.9625600549073439, "grad_norm": 0.8006406388790731, "learning_rate": 3.678356642876801e-08, "loss": 0.2247, "step": 28049 }, { "epoch": 0.9625943719972546, "grad_norm": 0.7973229640543467, "learning_rate": 3.671631064140257e-08, "loss": 0.2225, "step": 28050 }, { "epoch": 0.9626286890871654, "grad_norm": 0.78282553263916, "learning_rate": 3.664911616962086e-08, "loss": 0.3042, "step": 28051 }, { "epoch": 0.9626630061770762, "grad_norm": 0.7919359857281048, "learning_rate": 3.65819830142522e-08, "loss": 0.2631, "step": 28052 }, { "epoch": 0.962697323266987, "grad_norm": 0.8185376332061818, "learning_rate": 3.651491117612649e-08, "loss": 0.2425, "step": 28053 }, { "epoch": 0.9627316403568977, "grad_norm": 0.841856692848528, "learning_rate": 3.6447900656071954e-08, "loss": 0.2619, "step": 28054 }, { "epoch": 0.9627659574468085, "grad_norm": 0.800568565927941, "learning_rate": 3.6380951454916826e-08, "loss": 0.2868, "step": 28055 }, { "epoch": 0.9628002745367192, "grad_norm": 0.8296066504122145, "learning_rate": 3.631406357348821e-08, "loss": 0.3329, "step": 28056 }, { "epoch": 0.9628345916266301, "grad_norm": 0.7790617117515145, "learning_rate": 3.624723701261212e-08, "loss": 0.2266, "step": 28057 }, { "epoch": 0.9628689087165409, "grad_norm": 0.7305091266950179, "learning_rate": 3.6180471773114015e-08, "loss": 0.2178, "step": 28058 }, { "epoch": 0.9629032258064516, "grad_norm": 0.7897127167438605, "learning_rate": 3.611376785581933e-08, "loss": 0.2707, "step": 28059 }, { "epoch": 0.9629375428963624, "grad_norm": 0.6804495934722132, "learning_rate": 3.6047125261551296e-08, "loss": 0.2943, "step": 28060 }, { "epoch": 0.9629718599862732, "grad_norm": 0.7778161033853771, "learning_rate": 3.598054399113426e-08, "loss": 0.2723, "step": 28061 }, { "epoch": 0.963006177076184, "grad_norm": 0.6696508900459484, "learning_rate": 3.591402404538924e-08, "loss": 0.2125, "step": 28062 }, { "epoch": 0.9630404941660947, "grad_norm": 0.7459102555599749, "learning_rate": 3.584756542513945e-08, "loss": 0.2917, "step": 28063 }, { "epoch": 0.9630748112560055, "grad_norm": 0.8024408084058713, "learning_rate": 3.578116813120536e-08, "loss": 0.2665, "step": 28064 }, { "epoch": 0.9631091283459162, "grad_norm": 0.8767697106759479, "learning_rate": 3.571483216440686e-08, "loss": 0.2601, "step": 28065 }, { "epoch": 0.9631434454358271, "grad_norm": 0.9016609235503822, "learning_rate": 3.56485575255644e-08, "loss": 0.3578, "step": 28066 }, { "epoch": 0.9631777625257378, "grad_norm": 0.8054268527373667, "learning_rate": 3.558234421549567e-08, "loss": 0.2228, "step": 28067 }, { "epoch": 0.9632120796156486, "grad_norm": 0.7360409637822605, "learning_rate": 3.5516192235019456e-08, "loss": 0.2918, "step": 28068 }, { "epoch": 0.9632463967055593, "grad_norm": 0.7666409212038205, "learning_rate": 3.545010158495232e-08, "loss": 0.2384, "step": 28069 }, { "epoch": 0.9632807137954701, "grad_norm": 0.8560474831883107, "learning_rate": 3.5384072266111406e-08, "loss": 0.2729, "step": 28070 }, { "epoch": 0.963315030885381, "grad_norm": 0.8201700352578568, "learning_rate": 3.5318104279311595e-08, "loss": 0.2381, "step": 28071 }, { "epoch": 0.9633493479752917, "grad_norm": 0.8418196559393806, "learning_rate": 3.525219762536891e-08, "loss": 0.2579, "step": 28072 }, { "epoch": 0.9633836650652025, "grad_norm": 0.7018575772327421, "learning_rate": 3.51863523050966e-08, "loss": 0.2428, "step": 28073 }, { "epoch": 0.9634179821551132, "grad_norm": 0.8460939879469787, "learning_rate": 3.5120568319308434e-08, "loss": 0.2644, "step": 28074 }, { "epoch": 0.9634522992450241, "grad_norm": 0.7632900548419, "learning_rate": 3.505484566881767e-08, "loss": 0.2154, "step": 28075 }, { "epoch": 0.9634866163349348, "grad_norm": 0.8419730232734918, "learning_rate": 3.498918435443532e-08, "loss": 0.2626, "step": 28076 }, { "epoch": 0.9635209334248456, "grad_norm": 0.8673210146860768, "learning_rate": 3.492358437697296e-08, "loss": 0.3294, "step": 28077 }, { "epoch": 0.9635552505147563, "grad_norm": 0.8382688804379875, "learning_rate": 3.485804573724105e-08, "loss": 0.2741, "step": 28078 }, { "epoch": 0.9635895676046671, "grad_norm": 0.8781165704266304, "learning_rate": 3.479256843604895e-08, "loss": 0.2825, "step": 28079 }, { "epoch": 0.9636238846945779, "grad_norm": 0.7365202710600585, "learning_rate": 3.472715247420655e-08, "loss": 0.2783, "step": 28080 }, { "epoch": 0.9636582017844887, "grad_norm": 0.8438336976114686, "learning_rate": 3.466179785252044e-08, "loss": 0.2235, "step": 28081 }, { "epoch": 0.9636925188743994, "grad_norm": 0.7557824016134269, "learning_rate": 3.4596504571799414e-08, "loss": 0.3062, "step": 28082 }, { "epoch": 0.9637268359643102, "grad_norm": 0.7565524968535813, "learning_rate": 3.453127263284895e-08, "loss": 0.2241, "step": 28083 }, { "epoch": 0.963761153054221, "grad_norm": 1.0289533012995953, "learning_rate": 3.44661020364756e-08, "loss": 0.2882, "step": 28084 }, { "epoch": 0.9637954701441318, "grad_norm": 0.7757663779745233, "learning_rate": 3.44009927834843e-08, "loss": 0.2621, "step": 28085 }, { "epoch": 0.9638297872340426, "grad_norm": 0.8105237672229426, "learning_rate": 3.4335944874679394e-08, "loss": 0.2449, "step": 28086 }, { "epoch": 0.9638641043239533, "grad_norm": 0.8062504894281798, "learning_rate": 3.4270958310865244e-08, "loss": 0.2314, "step": 28087 }, { "epoch": 0.9638984214138641, "grad_norm": 0.768864213256245, "learning_rate": 3.4206033092842874e-08, "loss": 0.2732, "step": 28088 }, { "epoch": 0.9639327385037749, "grad_norm": 0.7326720329994733, "learning_rate": 3.414116922141608e-08, "loss": 0.261, "step": 28089 }, { "epoch": 0.9639670555936857, "grad_norm": 0.6885274428850103, "learning_rate": 3.407636669738479e-08, "loss": 0.2757, "step": 28090 }, { "epoch": 0.9640013726835964, "grad_norm": 0.7348337828903493, "learning_rate": 3.401162552155113e-08, "loss": 0.267, "step": 28091 }, { "epoch": 0.9640356897735072, "grad_norm": 0.7630368901133028, "learning_rate": 3.3946945694713353e-08, "loss": 0.2331, "step": 28092 }, { "epoch": 0.9640700068634179, "grad_norm": 0.7712375726566603, "learning_rate": 3.3882327217671374e-08, "loss": 0.2383, "step": 28093 }, { "epoch": 0.9641043239533288, "grad_norm": 0.8268715520731624, "learning_rate": 3.381777009122345e-08, "loss": 0.2411, "step": 28094 }, { "epoch": 0.9641386410432395, "grad_norm": 0.7828107318711374, "learning_rate": 3.375327431616726e-08, "loss": 0.2412, "step": 28095 }, { "epoch": 0.9641729581331503, "grad_norm": 0.7874641216331107, "learning_rate": 3.368883989329885e-08, "loss": 0.2901, "step": 28096 }, { "epoch": 0.964207275223061, "grad_norm": 0.8414050227940938, "learning_rate": 3.3624466823414804e-08, "loss": 0.2576, "step": 28097 }, { "epoch": 0.9642415923129719, "grad_norm": 0.831303217835234, "learning_rate": 3.356015510731059e-08, "loss": 0.2935, "step": 28098 }, { "epoch": 0.9642759094028827, "grad_norm": 0.8364294262120018, "learning_rate": 3.349590474577946e-08, "loss": 0.3267, "step": 28099 }, { "epoch": 0.9643102264927934, "grad_norm": 0.7291127138071535, "learning_rate": 3.34317157396169e-08, "loss": 0.3307, "step": 28100 }, { "epoch": 0.9643445435827042, "grad_norm": 0.7429378287918048, "learning_rate": 3.336758808961449e-08, "loss": 0.2489, "step": 28101 }, { "epoch": 0.9643788606726149, "grad_norm": 0.7231135378558519, "learning_rate": 3.330352179656493e-08, "loss": 0.2186, "step": 28102 }, { "epoch": 0.9644131777625258, "grad_norm": 0.8105533924223642, "learning_rate": 3.323951686126037e-08, "loss": 0.2914, "step": 28103 }, { "epoch": 0.9644474948524365, "grad_norm": 0.7795877313706422, "learning_rate": 3.3175573284490724e-08, "loss": 0.2594, "step": 28104 }, { "epoch": 0.9644818119423473, "grad_norm": 0.7629960034290525, "learning_rate": 3.311169106704592e-08, "loss": 0.2396, "step": 28105 }, { "epoch": 0.964516129032258, "grad_norm": 0.7200392716526602, "learning_rate": 3.3047870209715336e-08, "loss": 0.2474, "step": 28106 }, { "epoch": 0.9645504461221689, "grad_norm": 0.8310867651946774, "learning_rate": 3.298411071328722e-08, "loss": 0.2693, "step": 28107 }, { "epoch": 0.9645847632120796, "grad_norm": 0.8235699872572699, "learning_rate": 3.292041257854983e-08, "loss": 0.2282, "step": 28108 }, { "epoch": 0.9646190803019904, "grad_norm": 0.7655281853947512, "learning_rate": 3.285677580628921e-08, "loss": 0.2464, "step": 28109 }, { "epoch": 0.9646533973919011, "grad_norm": 0.7228237117078475, "learning_rate": 3.2793200397292504e-08, "loss": 0.2681, "step": 28110 }, { "epoch": 0.9646877144818119, "grad_norm": 0.6554432812348748, "learning_rate": 3.272968635234464e-08, "loss": 0.2096, "step": 28111 }, { "epoch": 0.9647220315717228, "grad_norm": 0.7890376694821469, "learning_rate": 3.2666233672229985e-08, "loss": 0.3269, "step": 28112 }, { "epoch": 0.9647563486616335, "grad_norm": 0.9058645139347219, "learning_rate": 3.2602842357732924e-08, "loss": 0.2667, "step": 28113 }, { "epoch": 0.9647906657515443, "grad_norm": 0.774479060299563, "learning_rate": 3.25395124096356e-08, "loss": 0.2739, "step": 28114 }, { "epoch": 0.964824982841455, "grad_norm": 0.9896990927188962, "learning_rate": 3.247624382872183e-08, "loss": 0.2777, "step": 28115 }, { "epoch": 0.9648592999313658, "grad_norm": 0.8611167499925386, "learning_rate": 3.2413036615772106e-08, "loss": 0.2606, "step": 28116 }, { "epoch": 0.9648936170212766, "grad_norm": 0.7852332894390165, "learning_rate": 3.234989077156803e-08, "loss": 0.243, "step": 28117 }, { "epoch": 0.9649279341111874, "grad_norm": 0.7853076044618973, "learning_rate": 3.228680629688896e-08, "loss": 0.2695, "step": 28118 }, { "epoch": 0.9649622512010981, "grad_norm": 0.7793052928406667, "learning_rate": 3.2223783192514844e-08, "loss": 0.2462, "step": 28119 }, { "epoch": 0.9649965682910089, "grad_norm": 0.7059816188648053, "learning_rate": 3.2160821459223947e-08, "loss": 0.2498, "step": 28120 }, { "epoch": 0.9650308853809197, "grad_norm": 0.905600608486679, "learning_rate": 3.209792109779397e-08, "loss": 0.2528, "step": 28121 }, { "epoch": 0.9650652024708305, "grad_norm": 0.7033117477735042, "learning_rate": 3.203508210900208e-08, "loss": 0.2551, "step": 28122 }, { "epoch": 0.9650995195607412, "grad_norm": 0.7856766768500483, "learning_rate": 3.1972304493625425e-08, "loss": 0.2256, "step": 28123 }, { "epoch": 0.965133836650652, "grad_norm": 0.6940106508325493, "learning_rate": 3.19095882524384e-08, "loss": 0.2118, "step": 28124 }, { "epoch": 0.9651681537405628, "grad_norm": 0.8181214753976036, "learning_rate": 3.184693338621592e-08, "loss": 0.3548, "step": 28125 }, { "epoch": 0.9652024708304736, "grad_norm": 0.740201399771627, "learning_rate": 3.178433989573237e-08, "loss": 0.2435, "step": 28126 }, { "epoch": 0.9652367879203844, "grad_norm": 0.7084609331497268, "learning_rate": 3.172180778176104e-08, "loss": 0.2377, "step": 28127 }, { "epoch": 0.9652711050102951, "grad_norm": 0.8030601662006464, "learning_rate": 3.165933704507407e-08, "loss": 0.2253, "step": 28128 }, { "epoch": 0.9653054221002059, "grad_norm": 0.7994469675419328, "learning_rate": 3.159692768644362e-08, "loss": 0.3215, "step": 28129 }, { "epoch": 0.9653397391901167, "grad_norm": 0.7318130494187877, "learning_rate": 3.1534579706640756e-08, "loss": 0.221, "step": 28130 }, { "epoch": 0.9653740562800275, "grad_norm": 0.6808216324450045, "learning_rate": 3.1472293106435404e-08, "loss": 0.2543, "step": 28131 }, { "epoch": 0.9654083733699382, "grad_norm": 0.820191849346298, "learning_rate": 3.141006788659695e-08, "loss": 0.2861, "step": 28132 }, { "epoch": 0.965442690459849, "grad_norm": 0.7276675585273839, "learning_rate": 3.134790404789478e-08, "loss": 0.2384, "step": 28133 }, { "epoch": 0.9654770075497597, "grad_norm": 0.7094761846227498, "learning_rate": 3.128580159109551e-08, "loss": 0.2071, "step": 28134 }, { "epoch": 0.9655113246396706, "grad_norm": 0.9088579046923234, "learning_rate": 3.122376051696796e-08, "loss": 0.2683, "step": 28135 }, { "epoch": 0.9655456417295813, "grad_norm": 0.757373993206285, "learning_rate": 3.116178082627763e-08, "loss": 0.2074, "step": 28136 }, { "epoch": 0.9655799588194921, "grad_norm": 0.7761685448305927, "learning_rate": 3.109986251979002e-08, "loss": 0.2354, "step": 28137 }, { "epoch": 0.9656142759094029, "grad_norm": 0.7762637624531767, "learning_rate": 3.1038005598270635e-08, "loss": 0.2669, "step": 28138 }, { "epoch": 0.9656485929993136, "grad_norm": 0.7723473729011835, "learning_rate": 3.097621006248386e-08, "loss": 0.2597, "step": 28139 }, { "epoch": 0.9656829100892245, "grad_norm": 0.7017705456243523, "learning_rate": 3.091447591319241e-08, "loss": 0.2704, "step": 28140 }, { "epoch": 0.9657172271791352, "grad_norm": 0.7600486734839865, "learning_rate": 3.085280315115902e-08, "loss": 0.2491, "step": 28141 }, { "epoch": 0.965751544269046, "grad_norm": 0.8323635296319528, "learning_rate": 3.0791191777145846e-08, "loss": 0.2608, "step": 28142 }, { "epoch": 0.9657858613589567, "grad_norm": 0.7502567603563906, "learning_rate": 3.072964179191396e-08, "loss": 0.2604, "step": 28143 }, { "epoch": 0.9658201784488676, "grad_norm": 0.7330305090919749, "learning_rate": 3.066815319622385e-08, "loss": 0.2717, "step": 28144 }, { "epoch": 0.9658544955387783, "grad_norm": 0.7641600660335903, "learning_rate": 3.060672599083547e-08, "loss": 0.2402, "step": 28145 }, { "epoch": 0.9658888126286891, "grad_norm": 0.8034069318857076, "learning_rate": 3.0545360176506e-08, "loss": 0.2716, "step": 28146 }, { "epoch": 0.9659231297185998, "grad_norm": 0.9101368685753695, "learning_rate": 3.048405575399593e-08, "loss": 0.2673, "step": 28147 }, { "epoch": 0.9659574468085106, "grad_norm": 0.7175111525468343, "learning_rate": 3.042281272406133e-08, "loss": 0.2616, "step": 28148 }, { "epoch": 0.9659917638984215, "grad_norm": 0.7446148860246812, "learning_rate": 3.0361631087458265e-08, "loss": 0.2619, "step": 28149 }, { "epoch": 0.9660260809883322, "grad_norm": 0.7815479063923738, "learning_rate": 3.0300510844943896e-08, "loss": 0.2226, "step": 28150 }, { "epoch": 0.966060398078243, "grad_norm": 0.8853748477293206, "learning_rate": 3.023945199727207e-08, "loss": 0.2941, "step": 28151 }, { "epoch": 0.9660947151681537, "grad_norm": 0.7132362531275156, "learning_rate": 3.0178454545197746e-08, "loss": 0.2065, "step": 28152 }, { "epoch": 0.9661290322580646, "grad_norm": 0.7524957201380409, "learning_rate": 3.011751848947475e-08, "loss": 0.2371, "step": 28153 }, { "epoch": 0.9661633493479753, "grad_norm": 0.7883515828584833, "learning_rate": 3.005664383085527e-08, "loss": 0.2717, "step": 28154 }, { "epoch": 0.9661976664378861, "grad_norm": 0.7998242872489422, "learning_rate": 2.999583057009148e-08, "loss": 0.2331, "step": 28155 }, { "epoch": 0.9662319835277968, "grad_norm": 0.7637351341767249, "learning_rate": 2.993507870793499e-08, "loss": 0.2968, "step": 28156 }, { "epoch": 0.9662663006177076, "grad_norm": 0.8046258029526037, "learning_rate": 2.987438824513578e-08, "loss": 0.2042, "step": 28157 }, { "epoch": 0.9663006177076184, "grad_norm": 0.7326824314381434, "learning_rate": 2.9813759182444336e-08, "loss": 0.2371, "step": 28158 }, { "epoch": 0.9663349347975292, "grad_norm": 0.7823528122365747, "learning_rate": 2.9753191520608406e-08, "loss": 0.2622, "step": 28159 }, { "epoch": 0.9663692518874399, "grad_norm": 0.7588550155527954, "learning_rate": 2.969268526037794e-08, "loss": 0.2758, "step": 28160 }, { "epoch": 0.9664035689773507, "grad_norm": 0.7528092543447445, "learning_rate": 2.9632240402499013e-08, "loss": 0.2559, "step": 28161 }, { "epoch": 0.9664378860672614, "grad_norm": 0.8409450837999685, "learning_rate": 2.9571856947718803e-08, "loss": 0.2666, "step": 28162 }, { "epoch": 0.9664722031571723, "grad_norm": 0.7457841523980204, "learning_rate": 2.9511534896783378e-08, "loss": 0.2597, "step": 28163 }, { "epoch": 0.9665065202470831, "grad_norm": 0.9727876320921635, "learning_rate": 2.9451274250438256e-08, "loss": 0.2233, "step": 28164 }, { "epoch": 0.9665408373369938, "grad_norm": 0.7149804004139021, "learning_rate": 2.9391075009427284e-08, "loss": 0.2304, "step": 28165 }, { "epoch": 0.9665751544269046, "grad_norm": 0.7777860110947697, "learning_rate": 2.9330937174494312e-08, "loss": 0.2869, "step": 28166 }, { "epoch": 0.9666094715168154, "grad_norm": 0.6911967485294751, "learning_rate": 2.9270860746382636e-08, "loss": 0.2277, "step": 28167 }, { "epoch": 0.9666437886067262, "grad_norm": 0.9965212146516627, "learning_rate": 2.9210845725834437e-08, "loss": 0.2364, "step": 28168 }, { "epoch": 0.9666781056966369, "grad_norm": 0.8032754345106579, "learning_rate": 2.9150892113590234e-08, "loss": 0.2166, "step": 28169 }, { "epoch": 0.9667124227865477, "grad_norm": 1.0379248833000467, "learning_rate": 2.909099991039166e-08, "loss": 0.2754, "step": 28170 }, { "epoch": 0.9667467398764584, "grad_norm": 0.8228422596126812, "learning_rate": 2.903116911697812e-08, "loss": 0.3009, "step": 28171 }, { "epoch": 0.9667810569663693, "grad_norm": 0.8390925542567946, "learning_rate": 2.897139973408902e-08, "loss": 0.2647, "step": 28172 }, { "epoch": 0.96681537405628, "grad_norm": 0.8254012249756064, "learning_rate": 2.8911691762462668e-08, "loss": 0.2851, "step": 28173 }, { "epoch": 0.9668496911461908, "grad_norm": 0.7253416619379061, "learning_rate": 2.8852045202836244e-08, "loss": 0.2744, "step": 28174 }, { "epoch": 0.9668840082361015, "grad_norm": 0.697266097112691, "learning_rate": 2.879246005594749e-08, "loss": 0.2537, "step": 28175 }, { "epoch": 0.9669183253260124, "grad_norm": 0.7717163679838742, "learning_rate": 2.873293632253138e-08, "loss": 0.302, "step": 28176 }, { "epoch": 0.9669526424159232, "grad_norm": 0.8593713093538018, "learning_rate": 2.8673474003324542e-08, "loss": 0.2786, "step": 28177 }, { "epoch": 0.9669869595058339, "grad_norm": 0.7725997674863222, "learning_rate": 2.8614073099060835e-08, "loss": 0.2428, "step": 28178 }, { "epoch": 0.9670212765957447, "grad_norm": 0.8006251466294377, "learning_rate": 2.8554733610474116e-08, "loss": 0.2733, "step": 28179 }, { "epoch": 0.9670555936856554, "grad_norm": 0.7947804830872213, "learning_rate": 2.849545553829769e-08, "loss": 0.2145, "step": 28180 }, { "epoch": 0.9670899107755663, "grad_norm": 0.8093871558317708, "learning_rate": 2.843623888326319e-08, "loss": 0.2652, "step": 28181 }, { "epoch": 0.967124227865477, "grad_norm": 0.7569821243749466, "learning_rate": 2.837708364610281e-08, "loss": 0.2537, "step": 28182 }, { "epoch": 0.9671585449553878, "grad_norm": 0.71758990714419, "learning_rate": 2.831798982754763e-08, "loss": 0.2504, "step": 28183 }, { "epoch": 0.9671928620452985, "grad_norm": 0.777315829997832, "learning_rate": 2.8258957428327073e-08, "loss": 0.2331, "step": 28184 }, { "epoch": 0.9672271791352093, "grad_norm": 0.7221495718380372, "learning_rate": 2.8199986449170547e-08, "loss": 0.2816, "step": 28185 }, { "epoch": 0.9672614962251201, "grad_norm": 0.8156429879947114, "learning_rate": 2.8141076890806364e-08, "loss": 0.2553, "step": 28186 }, { "epoch": 0.9672958133150309, "grad_norm": 0.6896754328105033, "learning_rate": 2.8082228753962826e-08, "loss": 0.2498, "step": 28187 }, { "epoch": 0.9673301304049416, "grad_norm": 0.7903554025078011, "learning_rate": 2.802344203936713e-08, "loss": 0.2827, "step": 28188 }, { "epoch": 0.9673644474948524, "grad_norm": 0.8376281666711931, "learning_rate": 2.796471674774426e-08, "loss": 0.225, "step": 28189 }, { "epoch": 0.9673987645847633, "grad_norm": 0.8426262221216929, "learning_rate": 2.7906052879820843e-08, "loss": 0.2541, "step": 28190 }, { "epoch": 0.967433081674674, "grad_norm": 0.6770969037602719, "learning_rate": 2.784745043632131e-08, "loss": 0.2138, "step": 28191 }, { "epoch": 0.9674673987645848, "grad_norm": 0.705684418922922, "learning_rate": 2.778890941796897e-08, "loss": 0.2663, "step": 28192 }, { "epoch": 0.9675017158544955, "grad_norm": 0.7461097547513571, "learning_rate": 2.7730429825488236e-08, "loss": 0.2883, "step": 28193 }, { "epoch": 0.9675360329444063, "grad_norm": 0.864777179366412, "learning_rate": 2.7672011659601316e-08, "loss": 0.2777, "step": 28194 }, { "epoch": 0.9675703500343171, "grad_norm": 0.9888939217761064, "learning_rate": 2.7613654921028744e-08, "loss": 0.318, "step": 28195 }, { "epoch": 0.9676046671242279, "grad_norm": 0.7177038389332747, "learning_rate": 2.755535961049216e-08, "loss": 0.2872, "step": 28196 }, { "epoch": 0.9676389842141386, "grad_norm": 0.8340100128415759, "learning_rate": 2.7497125728712104e-08, "loss": 0.2287, "step": 28197 }, { "epoch": 0.9676733013040494, "grad_norm": 0.9289648010536146, "learning_rate": 2.7438953276407997e-08, "loss": 0.2188, "step": 28198 }, { "epoch": 0.9677076183939602, "grad_norm": 0.7753681975490743, "learning_rate": 2.7380842254297603e-08, "loss": 0.2462, "step": 28199 }, { "epoch": 0.967741935483871, "grad_norm": 0.8135811718036342, "learning_rate": 2.732279266310034e-08, "loss": 0.2434, "step": 28200 }, { "epoch": 0.9677762525737817, "grad_norm": 0.8507776058345602, "learning_rate": 2.7264804503531196e-08, "loss": 0.2412, "step": 28201 }, { "epoch": 0.9678105696636925, "grad_norm": 0.800335589058379, "learning_rate": 2.7206877776308483e-08, "loss": 0.277, "step": 28202 }, { "epoch": 0.9678448867536033, "grad_norm": 0.776457206649226, "learning_rate": 2.714901248214663e-08, "loss": 0.233, "step": 28203 }, { "epoch": 0.9678792038435141, "grad_norm": 0.7581684019180592, "learning_rate": 2.7091208621761178e-08, "loss": 0.1976, "step": 28204 }, { "epoch": 0.9679135209334249, "grad_norm": 0.9132074845803166, "learning_rate": 2.7033466195865997e-08, "loss": 0.2769, "step": 28205 }, { "epoch": 0.9679478380233356, "grad_norm": 0.9262086372997052, "learning_rate": 2.697578520517441e-08, "loss": 0.2589, "step": 28206 }, { "epoch": 0.9679821551132464, "grad_norm": 0.7451806435428232, "learning_rate": 2.6918165650399175e-08, "loss": 0.2262, "step": 28207 }, { "epoch": 0.9680164722031571, "grad_norm": 0.7973635315603724, "learning_rate": 2.6860607532251948e-08, "loss": 0.2226, "step": 28208 }, { "epoch": 0.968050789293068, "grad_norm": 0.6822634366732055, "learning_rate": 2.6803110851443826e-08, "loss": 0.2826, "step": 28209 }, { "epoch": 0.9680851063829787, "grad_norm": 0.7772221574355698, "learning_rate": 2.6745675608684797e-08, "loss": 0.2004, "step": 28210 }, { "epoch": 0.9681194234728895, "grad_norm": 0.7905449443089337, "learning_rate": 2.6688301804684847e-08, "loss": 0.3043, "step": 28211 }, { "epoch": 0.9681537405628002, "grad_norm": 0.7131545306670047, "learning_rate": 2.6630989440152854e-08, "loss": 0.2406, "step": 28212 }, { "epoch": 0.9681880576527111, "grad_norm": 0.7324284881538589, "learning_rate": 2.6573738515796034e-08, "loss": 0.2359, "step": 28213 }, { "epoch": 0.9682223747426218, "grad_norm": 0.7496720198156844, "learning_rate": 2.65165490323227e-08, "loss": 0.2319, "step": 28214 }, { "epoch": 0.9682566918325326, "grad_norm": 0.7841647348632454, "learning_rate": 2.645942099043841e-08, "loss": 0.2524, "step": 28215 }, { "epoch": 0.9682910089224434, "grad_norm": 0.7325913411507805, "learning_rate": 2.6402354390849815e-08, "loss": 0.2466, "step": 28216 }, { "epoch": 0.9683253260123541, "grad_norm": 0.7800836622183407, "learning_rate": 2.6345349234261352e-08, "loss": 0.2611, "step": 28217 }, { "epoch": 0.968359643102265, "grad_norm": 0.7114785243728516, "learning_rate": 2.628840552137746e-08, "loss": 0.335, "step": 28218 }, { "epoch": 0.9683939601921757, "grad_norm": 0.7810638344054748, "learning_rate": 2.623152325290146e-08, "loss": 0.2813, "step": 28219 }, { "epoch": 0.9684282772820865, "grad_norm": 0.7252816272149202, "learning_rate": 2.617470242953557e-08, "loss": 0.2534, "step": 28220 }, { "epoch": 0.9684625943719972, "grad_norm": 0.8578025591641377, "learning_rate": 2.6117943051983118e-08, "loss": 0.305, "step": 28221 }, { "epoch": 0.9684969114619081, "grad_norm": 0.9879276632207186, "learning_rate": 2.6061245120944102e-08, "loss": 0.2703, "step": 28222 }, { "epoch": 0.9685312285518188, "grad_norm": 0.7180058209733606, "learning_rate": 2.6004608637119622e-08, "loss": 0.2157, "step": 28223 }, { "epoch": 0.9685655456417296, "grad_norm": 0.7731997116265654, "learning_rate": 2.5948033601209122e-08, "loss": 0.2708, "step": 28224 }, { "epoch": 0.9685998627316403, "grad_norm": 0.990176499723781, "learning_rate": 2.5891520013911488e-08, "loss": 0.2741, "step": 28225 }, { "epoch": 0.9686341798215511, "grad_norm": 0.8359450532950953, "learning_rate": 2.583506787592449e-08, "loss": 0.2551, "step": 28226 }, { "epoch": 0.968668496911462, "grad_norm": 0.7073610671399994, "learning_rate": 2.5778677187945912e-08, "loss": 0.2684, "step": 28227 }, { "epoch": 0.9687028140013727, "grad_norm": 0.7924682416037793, "learning_rate": 2.5722347950672966e-08, "loss": 0.2274, "step": 28228 }, { "epoch": 0.9687371310912835, "grad_norm": 0.7133625241993866, "learning_rate": 2.5666080164800655e-08, "loss": 0.2452, "step": 28229 }, { "epoch": 0.9687714481811942, "grad_norm": 0.8288088233459381, "learning_rate": 2.560987383102398e-08, "loss": 0.2919, "step": 28230 }, { "epoch": 0.968805765271105, "grad_norm": 0.7572229938805956, "learning_rate": 2.555372895003849e-08, "loss": 0.2347, "step": 28231 }, { "epoch": 0.9688400823610158, "grad_norm": 0.7369378195565667, "learning_rate": 2.5497645522536975e-08, "loss": 0.2465, "step": 28232 }, { "epoch": 0.9688743994509266, "grad_norm": 0.7362670638506967, "learning_rate": 2.5441623549211646e-08, "loss": 0.287, "step": 28233 }, { "epoch": 0.9689087165408373, "grad_norm": 0.7828177382949356, "learning_rate": 2.538566303075529e-08, "loss": 0.2415, "step": 28234 }, { "epoch": 0.9689430336307481, "grad_norm": 0.6635357866207138, "learning_rate": 2.5329763967860132e-08, "loss": 0.2384, "step": 28235 }, { "epoch": 0.9689773507206589, "grad_norm": 0.777019081679785, "learning_rate": 2.527392636121506e-08, "loss": 0.2476, "step": 28236 }, { "epoch": 0.9690116678105697, "grad_norm": 0.8294470298990764, "learning_rate": 2.5218150211511194e-08, "loss": 0.273, "step": 28237 }, { "epoch": 0.9690459849004804, "grad_norm": 0.7712459159562913, "learning_rate": 2.5162435519436867e-08, "loss": 0.2409, "step": 28238 }, { "epoch": 0.9690803019903912, "grad_norm": 0.7993977871499941, "learning_rate": 2.510678228568042e-08, "loss": 0.2619, "step": 28239 }, { "epoch": 0.9691146190803019, "grad_norm": 0.7591095899596086, "learning_rate": 2.5051190510929633e-08, "loss": 0.2781, "step": 28240 }, { "epoch": 0.9691489361702128, "grad_norm": 0.7607138999767569, "learning_rate": 2.499566019587063e-08, "loss": 0.308, "step": 28241 }, { "epoch": 0.9691832532601236, "grad_norm": 0.7369998313178985, "learning_rate": 2.4940191341190633e-08, "loss": 0.2519, "step": 28242 }, { "epoch": 0.9692175703500343, "grad_norm": 0.8882103257175096, "learning_rate": 2.4884783947574098e-08, "loss": 0.2585, "step": 28243 }, { "epoch": 0.9692518874399451, "grad_norm": 0.825323168246462, "learning_rate": 2.4829438015705477e-08, "loss": 0.2548, "step": 28244 }, { "epoch": 0.9692862045298559, "grad_norm": 0.751380012939427, "learning_rate": 2.4774153546268664e-08, "loss": 0.2671, "step": 28245 }, { "epoch": 0.9693205216197667, "grad_norm": 0.8064225096973373, "learning_rate": 2.4718930539947007e-08, "loss": 0.2808, "step": 28246 }, { "epoch": 0.9693548387096774, "grad_norm": 0.920614438984116, "learning_rate": 2.4663768997421623e-08, "loss": 0.2416, "step": 28247 }, { "epoch": 0.9693891557995882, "grad_norm": 0.7782363389859402, "learning_rate": 2.46086689193753e-08, "loss": 0.269, "step": 28248 }, { "epoch": 0.9694234728894989, "grad_norm": 0.773551690580732, "learning_rate": 2.4553630306488053e-08, "loss": 0.2736, "step": 28249 }, { "epoch": 0.9694577899794098, "grad_norm": 0.8128392093727687, "learning_rate": 2.449865315943989e-08, "loss": 0.2607, "step": 28250 }, { "epoch": 0.9694921070693205, "grad_norm": 0.708956358300325, "learning_rate": 2.444373747890971e-08, "loss": 0.2231, "step": 28251 }, { "epoch": 0.9695264241592313, "grad_norm": 0.7750998073492038, "learning_rate": 2.4388883265576424e-08, "loss": 0.246, "step": 28252 }, { "epoch": 0.969560741249142, "grad_norm": 0.7358964248915435, "learning_rate": 2.433409052011726e-08, "loss": 0.2309, "step": 28253 }, { "epoch": 0.9695950583390528, "grad_norm": 0.7997807071899399, "learning_rate": 2.4279359243209454e-08, "loss": 0.2754, "step": 28254 }, { "epoch": 0.9696293754289637, "grad_norm": 0.8372333950068531, "learning_rate": 2.422468943552969e-08, "loss": 0.2759, "step": 28255 }, { "epoch": 0.9696636925188744, "grad_norm": 0.8867911886417971, "learning_rate": 2.4170081097751874e-08, "loss": 0.2942, "step": 28256 }, { "epoch": 0.9696980096087852, "grad_norm": 0.7896737366445995, "learning_rate": 2.4115534230551575e-08, "loss": 0.2452, "step": 28257 }, { "epoch": 0.9697323266986959, "grad_norm": 0.7340559873446384, "learning_rate": 2.4061048834602697e-08, "loss": 0.2973, "step": 28258 }, { "epoch": 0.9697666437886068, "grad_norm": 0.9323822876205375, "learning_rate": 2.4006624910578034e-08, "loss": 0.2572, "step": 28259 }, { "epoch": 0.9698009608785175, "grad_norm": 1.0018630874025438, "learning_rate": 2.3952262459150388e-08, "loss": 0.272, "step": 28260 }, { "epoch": 0.9698352779684283, "grad_norm": 0.9286462954359596, "learning_rate": 2.3897961480990883e-08, "loss": 0.2064, "step": 28261 }, { "epoch": 0.969869595058339, "grad_norm": 0.7149864252899983, "learning_rate": 2.3843721976770094e-08, "loss": 0.255, "step": 28262 }, { "epoch": 0.9699039121482498, "grad_norm": 0.8065080193207311, "learning_rate": 2.3789543947159153e-08, "loss": 0.2595, "step": 28263 }, { "epoch": 0.9699382292381606, "grad_norm": 0.7257976547083359, "learning_rate": 2.3735427392826416e-08, "loss": 0.2727, "step": 28264 }, { "epoch": 0.9699725463280714, "grad_norm": 0.8244354576721761, "learning_rate": 2.368137231444023e-08, "loss": 0.2423, "step": 28265 }, { "epoch": 0.9700068634179821, "grad_norm": 0.8696855355677188, "learning_rate": 2.3627378712669512e-08, "loss": 0.2584, "step": 28266 }, { "epoch": 0.9700411805078929, "grad_norm": 0.7444272065076771, "learning_rate": 2.3573446588180392e-08, "loss": 0.2828, "step": 28267 }, { "epoch": 0.9700754975978038, "grad_norm": 0.8597506850828037, "learning_rate": 2.351957594163956e-08, "loss": 0.2529, "step": 28268 }, { "epoch": 0.9701098146877145, "grad_norm": 0.7108960543157601, "learning_rate": 2.346576677371204e-08, "loss": 0.2421, "step": 28269 }, { "epoch": 0.9701441317776253, "grad_norm": 0.8549172074041459, "learning_rate": 2.341201908506341e-08, "loss": 0.2778, "step": 28270 }, { "epoch": 0.970178448867536, "grad_norm": 0.8405367419461376, "learning_rate": 2.335833287635647e-08, "loss": 0.2109, "step": 28271 }, { "epoch": 0.9702127659574468, "grad_norm": 0.8535437423182956, "learning_rate": 2.3304708148255696e-08, "loss": 0.2742, "step": 28272 }, { "epoch": 0.9702470830473576, "grad_norm": 0.872815646147064, "learning_rate": 2.3251144901422773e-08, "loss": 0.2233, "step": 28273 }, { "epoch": 0.9702814001372684, "grad_norm": 0.7482926967917445, "learning_rate": 2.31976431365194e-08, "loss": 0.3088, "step": 28274 }, { "epoch": 0.9703157172271791, "grad_norm": 0.8877470123454955, "learning_rate": 2.3144202854207264e-08, "loss": 0.2722, "step": 28275 }, { "epoch": 0.9703500343170899, "grad_norm": 0.8542823263302726, "learning_rate": 2.3090824055145843e-08, "loss": 0.2696, "step": 28276 }, { "epoch": 0.9703843514070006, "grad_norm": 0.7943375054034832, "learning_rate": 2.3037506739994607e-08, "loss": 0.2529, "step": 28277 }, { "epoch": 0.9704186684969115, "grad_norm": 0.7925274578255002, "learning_rate": 2.2984250909412476e-08, "loss": 0.278, "step": 28278 }, { "epoch": 0.9704529855868222, "grad_norm": 0.8838906980824304, "learning_rate": 2.2931056564057252e-08, "loss": 0.2626, "step": 28279 }, { "epoch": 0.970487302676733, "grad_norm": 0.8239167807684934, "learning_rate": 2.2877923704586192e-08, "loss": 0.224, "step": 28280 }, { "epoch": 0.9705216197666438, "grad_norm": 0.7721317360243254, "learning_rate": 2.2824852331655433e-08, "loss": 0.2707, "step": 28281 }, { "epoch": 0.9705559368565546, "grad_norm": 0.7943591473099801, "learning_rate": 2.2771842445921123e-08, "loss": 0.2811, "step": 28282 }, { "epoch": 0.9705902539464654, "grad_norm": 0.6855964163573653, "learning_rate": 2.2718894048037732e-08, "loss": 0.2367, "step": 28283 }, { "epoch": 0.9706245710363761, "grad_norm": 0.7516259174881941, "learning_rate": 2.2666007138659186e-08, "loss": 0.3241, "step": 28284 }, { "epoch": 0.9706588881262869, "grad_norm": 0.8033190378329248, "learning_rate": 2.2613181718439403e-08, "loss": 0.3114, "step": 28285 }, { "epoch": 0.9706932052161976, "grad_norm": 0.8399607323052596, "learning_rate": 2.2560417788030643e-08, "loss": 0.2394, "step": 28286 }, { "epoch": 0.9707275223061085, "grad_norm": 0.720060772022676, "learning_rate": 2.2507715348084604e-08, "loss": 0.276, "step": 28287 }, { "epoch": 0.9707618393960192, "grad_norm": 0.8351767970253766, "learning_rate": 2.2455074399252985e-08, "loss": 0.2856, "step": 28288 }, { "epoch": 0.97079615648593, "grad_norm": 0.7115809607560963, "learning_rate": 2.240249494218527e-08, "loss": 0.229, "step": 28289 }, { "epoch": 0.9708304735758407, "grad_norm": 0.8452599595437998, "learning_rate": 2.2349976977531496e-08, "loss": 0.2703, "step": 28290 }, { "epoch": 0.9708647906657516, "grad_norm": 0.691469673256826, "learning_rate": 2.2297520505940583e-08, "loss": 0.2314, "step": 28291 }, { "epoch": 0.9708991077556623, "grad_norm": 0.729603299649331, "learning_rate": 2.2245125528060352e-08, "loss": 0.2499, "step": 28292 }, { "epoch": 0.9709334248455731, "grad_norm": 0.9775727858962338, "learning_rate": 2.2192792044537504e-08, "loss": 0.279, "step": 28293 }, { "epoch": 0.9709677419354839, "grad_norm": 0.9408479681832584, "learning_rate": 2.2140520056019853e-08, "loss": 0.2323, "step": 28294 }, { "epoch": 0.9710020590253946, "grad_norm": 0.7959219326321354, "learning_rate": 2.2088309563151888e-08, "loss": 0.2656, "step": 28295 }, { "epoch": 0.9710363761153055, "grad_norm": 0.7759072973765924, "learning_rate": 2.2036160566579757e-08, "loss": 0.3228, "step": 28296 }, { "epoch": 0.9710706932052162, "grad_norm": 0.7514119882216396, "learning_rate": 2.198407306694683e-08, "loss": 0.2521, "step": 28297 }, { "epoch": 0.971105010295127, "grad_norm": 0.7963130015402051, "learning_rate": 2.193204706489649e-08, "loss": 0.2605, "step": 28298 }, { "epoch": 0.9711393273850377, "grad_norm": 1.0111982833054078, "learning_rate": 2.1880082561072657e-08, "loss": 0.2488, "step": 28299 }, { "epoch": 0.9711736444749485, "grad_norm": 0.7927384749695715, "learning_rate": 2.1828179556115937e-08, "loss": 0.2587, "step": 28300 }, { "epoch": 0.9712079615648593, "grad_norm": 0.7794999467081842, "learning_rate": 2.1776338050668033e-08, "loss": 0.2476, "step": 28301 }, { "epoch": 0.9712422786547701, "grad_norm": 0.747588000324319, "learning_rate": 2.1724558045370105e-08, "loss": 0.2507, "step": 28302 }, { "epoch": 0.9712765957446808, "grad_norm": 0.6631705622281047, "learning_rate": 2.1672839540859968e-08, "loss": 0.2238, "step": 28303 }, { "epoch": 0.9713109128345916, "grad_norm": 0.6614615654687354, "learning_rate": 2.1621182537778784e-08, "loss": 0.2134, "step": 28304 }, { "epoch": 0.9713452299245025, "grad_norm": 0.708663902199333, "learning_rate": 2.1569587036763263e-08, "loss": 0.2762, "step": 28305 }, { "epoch": 0.9713795470144132, "grad_norm": 0.8166457063986565, "learning_rate": 2.1518053038451224e-08, "loss": 0.3181, "step": 28306 }, { "epoch": 0.971413864104324, "grad_norm": 0.9497153147904431, "learning_rate": 2.146658054347939e-08, "loss": 0.3263, "step": 28307 }, { "epoch": 0.9714481811942347, "grad_norm": 0.6863849009439761, "learning_rate": 2.141516955248335e-08, "loss": 0.1887, "step": 28308 }, { "epoch": 0.9714824982841455, "grad_norm": 0.7554888771557847, "learning_rate": 2.1363820066098716e-08, "loss": 0.2321, "step": 28309 }, { "epoch": 0.9715168153740563, "grad_norm": 0.7896063965638181, "learning_rate": 2.131253208495887e-08, "loss": 0.2373, "step": 28310 }, { "epoch": 0.9715511324639671, "grad_norm": 0.8297393418391091, "learning_rate": 2.1261305609698857e-08, "loss": 0.2901, "step": 28311 }, { "epoch": 0.9715854495538778, "grad_norm": 0.7616446037033495, "learning_rate": 2.1210140640950395e-08, "loss": 0.2719, "step": 28312 }, { "epoch": 0.9716197666437886, "grad_norm": 0.7895516452643334, "learning_rate": 2.115903717934631e-08, "loss": 0.2634, "step": 28313 }, { "epoch": 0.9716540837336994, "grad_norm": 0.8680289944028019, "learning_rate": 2.110799522551721e-08, "loss": 0.286, "step": 28314 }, { "epoch": 0.9716884008236102, "grad_norm": 0.8559301411124974, "learning_rate": 2.1057014780094255e-08, "loss": 0.3116, "step": 28315 }, { "epoch": 0.9717227179135209, "grad_norm": 0.7364376509935655, "learning_rate": 2.100609584370694e-08, "loss": 0.2334, "step": 28316 }, { "epoch": 0.9717570350034317, "grad_norm": 0.7270139982769552, "learning_rate": 2.0955238416983657e-08, "loss": 0.2323, "step": 28317 }, { "epoch": 0.9717913520933424, "grad_norm": 0.7107284497062921, "learning_rate": 2.0904442500554456e-08, "loss": 0.2984, "step": 28318 }, { "epoch": 0.9718256691832533, "grad_norm": 0.7451954552526341, "learning_rate": 2.08537080950455e-08, "loss": 0.2758, "step": 28319 }, { "epoch": 0.9718599862731641, "grad_norm": 0.7341861600625142, "learning_rate": 2.080303520108351e-08, "loss": 0.2342, "step": 28320 }, { "epoch": 0.9718943033630748, "grad_norm": 0.827712227885344, "learning_rate": 2.0752423819295208e-08, "loss": 0.3035, "step": 28321 }, { "epoch": 0.9719286204529856, "grad_norm": 0.8122776728580013, "learning_rate": 2.0701873950305095e-08, "loss": 0.2751, "step": 28322 }, { "epoch": 0.9719629375428963, "grad_norm": 0.7989455503002003, "learning_rate": 2.0651385594738226e-08, "loss": 0.2447, "step": 28323 }, { "epoch": 0.9719972546328072, "grad_norm": 0.7624534498298192, "learning_rate": 2.0600958753217994e-08, "loss": 0.2451, "step": 28324 }, { "epoch": 0.9720315717227179, "grad_norm": 0.6900769776950457, "learning_rate": 2.055059342636778e-08, "loss": 0.2539, "step": 28325 }, { "epoch": 0.9720658888126287, "grad_norm": 0.9220043020865529, "learning_rate": 2.050028961480932e-08, "loss": 0.2703, "step": 28326 }, { "epoch": 0.9721002059025394, "grad_norm": 0.7698701252573933, "learning_rate": 2.0450047319164334e-08, "loss": 0.2791, "step": 28327 }, { "epoch": 0.9721345229924503, "grad_norm": 0.8676879737079441, "learning_rate": 2.0399866540053438e-08, "loss": 0.3296, "step": 28328 }, { "epoch": 0.972168840082361, "grad_norm": 0.8431094040424989, "learning_rate": 2.0349747278096688e-08, "loss": 0.2934, "step": 28329 }, { "epoch": 0.9722031571722718, "grad_norm": 0.7322061415544041, "learning_rate": 2.0299689533913037e-08, "loss": 0.2379, "step": 28330 }, { "epoch": 0.9722374742621825, "grad_norm": 0.8189151940357106, "learning_rate": 2.0249693308120878e-08, "loss": 0.3214, "step": 28331 }, { "epoch": 0.9722717913520933, "grad_norm": 0.7027262422617397, "learning_rate": 2.0199758601337493e-08, "loss": 0.2606, "step": 28332 }, { "epoch": 0.9723061084420042, "grad_norm": 0.7101077154910522, "learning_rate": 2.0149885414180726e-08, "loss": 0.2391, "step": 28333 }, { "epoch": 0.9723404255319149, "grad_norm": 0.7934276762543845, "learning_rate": 2.0100073747266192e-08, "loss": 0.2468, "step": 28334 }, { "epoch": 0.9723747426218257, "grad_norm": 0.7831093698508687, "learning_rate": 2.0050323601208956e-08, "loss": 0.233, "step": 28335 }, { "epoch": 0.9724090597117364, "grad_norm": 0.7231732634303235, "learning_rate": 2.000063497662408e-08, "loss": 0.2309, "step": 28336 }, { "epoch": 0.9724433768016473, "grad_norm": 0.7165726586590071, "learning_rate": 1.9951007874125518e-08, "loss": 0.2121, "step": 28337 }, { "epoch": 0.972477693891558, "grad_norm": 0.7286319855932335, "learning_rate": 1.990144229432556e-08, "loss": 0.2342, "step": 28338 }, { "epoch": 0.9725120109814688, "grad_norm": 0.8772865856352589, "learning_rate": 1.98519382378376e-08, "loss": 0.2522, "step": 28339 }, { "epoch": 0.9725463280713795, "grad_norm": 0.8033894716058558, "learning_rate": 1.9802495705272262e-08, "loss": 0.2656, "step": 28340 }, { "epoch": 0.9725806451612903, "grad_norm": 0.8396754719318679, "learning_rate": 1.9753114697240727e-08, "loss": 0.2677, "step": 28341 }, { "epoch": 0.9726149622512011, "grad_norm": 0.8137611694377678, "learning_rate": 1.9703795214353062e-08, "loss": 0.2304, "step": 28342 }, { "epoch": 0.9726492793411119, "grad_norm": 0.8477355827541206, "learning_rate": 1.9654537257218774e-08, "loss": 0.3033, "step": 28343 }, { "epoch": 0.9726835964310226, "grad_norm": 0.8313455771631924, "learning_rate": 1.9605340826445717e-08, "loss": 0.2472, "step": 28344 }, { "epoch": 0.9727179135209334, "grad_norm": 0.7051975229077856, "learning_rate": 1.9556205922642846e-08, "loss": 0.2215, "step": 28345 }, { "epoch": 0.9727522306108441, "grad_norm": 0.7198229251952315, "learning_rate": 1.9507132546415786e-08, "loss": 0.2362, "step": 28346 }, { "epoch": 0.972786547700755, "grad_norm": 0.6838349802090827, "learning_rate": 1.9458120698371273e-08, "loss": 0.2123, "step": 28347 }, { "epoch": 0.9728208647906658, "grad_norm": 0.815518444718291, "learning_rate": 1.9409170379115493e-08, "loss": 0.2939, "step": 28348 }, { "epoch": 0.9728551818805765, "grad_norm": 0.7949412266795717, "learning_rate": 1.9360281589252406e-08, "loss": 0.2966, "step": 28349 }, { "epoch": 0.9728894989704873, "grad_norm": 0.6799365882283652, "learning_rate": 1.9311454329386525e-08, "loss": 0.2262, "step": 28350 }, { "epoch": 0.9729238160603981, "grad_norm": 0.6655264724062727, "learning_rate": 1.9262688600120148e-08, "loss": 0.2102, "step": 28351 }, { "epoch": 0.9729581331503089, "grad_norm": 0.8209026339376496, "learning_rate": 1.921398440205613e-08, "loss": 0.2542, "step": 28352 }, { "epoch": 0.9729924502402196, "grad_norm": 0.8891298668056448, "learning_rate": 1.9165341735796762e-08, "loss": 0.2548, "step": 28353 }, { "epoch": 0.9730267673301304, "grad_norm": 0.6765279902071979, "learning_rate": 1.9116760601942675e-08, "loss": 0.2012, "step": 28354 }, { "epoch": 0.9730610844200411, "grad_norm": 0.7668151051011572, "learning_rate": 1.906824100109339e-08, "loss": 0.2682, "step": 28355 }, { "epoch": 0.973095401509952, "grad_norm": 0.8828076403713102, "learning_rate": 1.9019782933848985e-08, "loss": 0.2706, "step": 28356 }, { "epoch": 0.9731297185998627, "grad_norm": 0.8161115860148153, "learning_rate": 1.8971386400807867e-08, "loss": 0.2737, "step": 28357 }, { "epoch": 0.9731640356897735, "grad_norm": 0.7132228183187524, "learning_rate": 1.8923051402567893e-08, "loss": 0.2893, "step": 28358 }, { "epoch": 0.9731983527796843, "grad_norm": 0.7848880969905742, "learning_rate": 1.8874777939726364e-08, "loss": 0.2823, "step": 28359 }, { "epoch": 0.9732326698695951, "grad_norm": 0.804192297538426, "learning_rate": 1.8826566012879465e-08, "loss": 0.2641, "step": 28360 }, { "epoch": 0.9732669869595059, "grad_norm": 0.8399844377099321, "learning_rate": 1.8778415622622835e-08, "loss": 0.3381, "step": 28361 }, { "epoch": 0.9733013040494166, "grad_norm": 0.7673104420676976, "learning_rate": 1.8730326769551556e-08, "loss": 0.2147, "step": 28362 }, { "epoch": 0.9733356211393274, "grad_norm": 0.8163113622601916, "learning_rate": 1.8682299454259036e-08, "loss": 0.3023, "step": 28363 }, { "epoch": 0.9733699382292381, "grad_norm": 0.7382044522283195, "learning_rate": 1.8634333677339244e-08, "loss": 0.253, "step": 28364 }, { "epoch": 0.973404255319149, "grad_norm": 0.7516838126601744, "learning_rate": 1.8586429439385047e-08, "loss": 0.2471, "step": 28365 }, { "epoch": 0.9734385724090597, "grad_norm": 0.7099375868374661, "learning_rate": 1.8538586740987074e-08, "loss": 0.2699, "step": 28366 }, { "epoch": 0.9734728894989705, "grad_norm": 0.8435867173868155, "learning_rate": 1.8490805582737082e-08, "loss": 0.3486, "step": 28367 }, { "epoch": 0.9735072065888812, "grad_norm": 0.7719100898042324, "learning_rate": 1.8443085965225704e-08, "loss": 0.228, "step": 28368 }, { "epoch": 0.973541523678792, "grad_norm": 0.935256277423513, "learning_rate": 1.8395427889041918e-08, "loss": 0.3211, "step": 28369 }, { "epoch": 0.9735758407687028, "grad_norm": 0.774082594343021, "learning_rate": 1.8347831354774693e-08, "loss": 0.3, "step": 28370 }, { "epoch": 0.9736101578586136, "grad_norm": 0.7606068147541416, "learning_rate": 1.8300296363011895e-08, "loss": 0.2675, "step": 28371 }, { "epoch": 0.9736444749485244, "grad_norm": 0.6858015625613105, "learning_rate": 1.825282291434083e-08, "loss": 0.2232, "step": 28372 }, { "epoch": 0.9736787920384351, "grad_norm": 0.8478428893504071, "learning_rate": 1.820541100934825e-08, "loss": 0.2568, "step": 28373 }, { "epoch": 0.973713109128346, "grad_norm": 0.8193927798533054, "learning_rate": 1.8158060648619247e-08, "loss": 0.2396, "step": 28374 }, { "epoch": 0.9737474262182567, "grad_norm": 0.7822118735440746, "learning_rate": 1.8110771832738905e-08, "loss": 0.2819, "step": 28375 }, { "epoch": 0.9737817433081675, "grad_norm": 0.7475946801138407, "learning_rate": 1.8063544562292313e-08, "loss": 0.3281, "step": 28376 }, { "epoch": 0.9738160603980782, "grad_norm": 0.8386024951881282, "learning_rate": 1.801637883786178e-08, "loss": 0.2721, "step": 28377 }, { "epoch": 0.973850377487989, "grad_norm": 0.6816946313772883, "learning_rate": 1.7969274660030732e-08, "loss": 0.2897, "step": 28378 }, { "epoch": 0.9738846945778998, "grad_norm": 0.7293711172573009, "learning_rate": 1.792223202938037e-08, "loss": 0.2558, "step": 28379 }, { "epoch": 0.9739190116678106, "grad_norm": 0.887085575903217, "learning_rate": 1.7875250946493005e-08, "loss": 0.3123, "step": 28380 }, { "epoch": 0.9739533287577213, "grad_norm": 0.7117233958786952, "learning_rate": 1.782833141194762e-08, "loss": 0.2526, "step": 28381 }, { "epoch": 0.9739876458476321, "grad_norm": 0.8203923054440245, "learning_rate": 1.778147342632486e-08, "loss": 0.262, "step": 28382 }, { "epoch": 0.974021962937543, "grad_norm": 0.829311033308795, "learning_rate": 1.7734676990203147e-08, "loss": 0.2957, "step": 28383 }, { "epoch": 0.9740562800274537, "grad_norm": 0.8128274094668547, "learning_rate": 1.7687942104160916e-08, "loss": 0.2203, "step": 28384 }, { "epoch": 0.9740905971173645, "grad_norm": 0.7470373411740111, "learning_rate": 1.7641268768775476e-08, "loss": 0.2632, "step": 28385 }, { "epoch": 0.9741249142072752, "grad_norm": 0.9038301282853586, "learning_rate": 1.7594656984623036e-08, "loss": 0.2315, "step": 28386 }, { "epoch": 0.974159231297186, "grad_norm": 0.8050618572520479, "learning_rate": 1.7548106752279802e-08, "loss": 0.268, "step": 28387 }, { "epoch": 0.9741935483870968, "grad_norm": 0.6912779075534528, "learning_rate": 1.7501618072320868e-08, "loss": 0.2398, "step": 28388 }, { "epoch": 0.9742278654770076, "grad_norm": 0.7786736061785994, "learning_rate": 1.7455190945320223e-08, "loss": 0.2201, "step": 28389 }, { "epoch": 0.9742621825669183, "grad_norm": 0.7666116897843867, "learning_rate": 1.740882537185129e-08, "loss": 0.243, "step": 28390 }, { "epoch": 0.9742964996568291, "grad_norm": 0.8398242258198189, "learning_rate": 1.7362521352487504e-08, "loss": 0.2786, "step": 28391 }, { "epoch": 0.9743308167467398, "grad_norm": 0.8405758285602328, "learning_rate": 1.7316278887801186e-08, "loss": 0.2562, "step": 28392 }, { "epoch": 0.9743651338366507, "grad_norm": 0.9631880339439352, "learning_rate": 1.7270097978361878e-08, "loss": 0.2529, "step": 28393 }, { "epoch": 0.9743994509265614, "grad_norm": 0.7777387734630671, "learning_rate": 1.72239786247419e-08, "loss": 0.218, "step": 28394 }, { "epoch": 0.9744337680164722, "grad_norm": 0.74436006578771, "learning_rate": 1.7177920827510242e-08, "loss": 0.2326, "step": 28395 }, { "epoch": 0.9744680851063829, "grad_norm": 1.0253882241321681, "learning_rate": 1.7131924587235892e-08, "loss": 0.2755, "step": 28396 }, { "epoch": 0.9745024021962938, "grad_norm": 0.8433702342297579, "learning_rate": 1.708598990448729e-08, "loss": 0.2809, "step": 28397 }, { "epoch": 0.9745367192862046, "grad_norm": 0.714931898917753, "learning_rate": 1.7040116779831752e-08, "loss": 0.2642, "step": 28398 }, { "epoch": 0.9745710363761153, "grad_norm": 0.6655371670167727, "learning_rate": 1.6994305213835494e-08, "loss": 0.2443, "step": 28399 }, { "epoch": 0.9746053534660261, "grad_norm": 0.7274745307993946, "learning_rate": 1.6948555207065286e-08, "loss": 0.2207, "step": 28400 }, { "epoch": 0.9746396705559368, "grad_norm": 0.7371030135343966, "learning_rate": 1.6902866760086232e-08, "loss": 0.2519, "step": 28401 }, { "epoch": 0.9746739876458477, "grad_norm": 0.9079465042456532, "learning_rate": 1.6857239873462327e-08, "loss": 0.2293, "step": 28402 }, { "epoch": 0.9747083047357584, "grad_norm": 0.7797764506329732, "learning_rate": 1.6811674547757563e-08, "loss": 0.3395, "step": 28403 }, { "epoch": 0.9747426218256692, "grad_norm": 0.7535060122006291, "learning_rate": 1.676617078353482e-08, "loss": 0.258, "step": 28404 }, { "epoch": 0.9747769389155799, "grad_norm": 0.7933389447579434, "learning_rate": 1.6720728581355873e-08, "loss": 0.2418, "step": 28405 }, { "epoch": 0.9748112560054908, "grad_norm": 0.7584360666194739, "learning_rate": 1.6675347941782493e-08, "loss": 0.2157, "step": 28406 }, { "epoch": 0.9748455730954015, "grad_norm": 0.7570985767150571, "learning_rate": 1.6630028865375346e-08, "loss": 0.2523, "step": 28407 }, { "epoch": 0.9748798901853123, "grad_norm": 0.8366022664860999, "learning_rate": 1.6584771352693983e-08, "loss": 0.2506, "step": 28408 }, { "epoch": 0.974914207275223, "grad_norm": 0.7198064789178983, "learning_rate": 1.6539575404297403e-08, "loss": 0.2391, "step": 28409 }, { "epoch": 0.9749485243651338, "grad_norm": 0.7007253234360559, "learning_rate": 1.64944410207446e-08, "loss": 0.2619, "step": 28410 }, { "epoch": 0.9749828414550447, "grad_norm": 0.7511397539633823, "learning_rate": 1.644936820259291e-08, "loss": 0.2433, "step": 28411 }, { "epoch": 0.9750171585449554, "grad_norm": 0.7827350038787783, "learning_rate": 1.640435695039855e-08, "loss": 0.2497, "step": 28412 }, { "epoch": 0.9750514756348662, "grad_norm": 0.7259997283179705, "learning_rate": 1.6359407264718295e-08, "loss": 0.2467, "step": 28413 }, { "epoch": 0.9750857927247769, "grad_norm": 0.8586564409726184, "learning_rate": 1.6314519146107267e-08, "loss": 0.3035, "step": 28414 }, { "epoch": 0.9751201098146877, "grad_norm": 0.8423268057974962, "learning_rate": 1.6269692595120012e-08, "loss": 0.2326, "step": 28415 }, { "epoch": 0.9751544269045985, "grad_norm": 0.8540684356230305, "learning_rate": 1.622492761230998e-08, "loss": 0.2444, "step": 28416 }, { "epoch": 0.9751887439945093, "grad_norm": 0.782299501425814, "learning_rate": 1.618022419823062e-08, "loss": 0.2575, "step": 28417 }, { "epoch": 0.97522306108442, "grad_norm": 0.7501188373606823, "learning_rate": 1.6135582353434263e-08, "loss": 0.2812, "step": 28418 }, { "epoch": 0.9752573781743308, "grad_norm": 0.7517571304414995, "learning_rate": 1.6091002078471586e-08, "loss": 0.2801, "step": 28419 }, { "epoch": 0.9752916952642416, "grad_norm": 0.8036191324998514, "learning_rate": 1.604648337389436e-08, "loss": 0.2627, "step": 28420 }, { "epoch": 0.9753260123541524, "grad_norm": 0.9013851758828879, "learning_rate": 1.60020262402516e-08, "loss": 0.261, "step": 28421 }, { "epoch": 0.9753603294440631, "grad_norm": 0.7916907951272499, "learning_rate": 1.5957630678093418e-08, "loss": 0.2159, "step": 28422 }, { "epoch": 0.9753946465339739, "grad_norm": 0.8380740512199695, "learning_rate": 1.5913296687967707e-08, "loss": 0.2226, "step": 28423 }, { "epoch": 0.9754289636238846, "grad_norm": 0.7598311637819958, "learning_rate": 1.5869024270422917e-08, "loss": 0.276, "step": 28424 }, { "epoch": 0.9754632807137955, "grad_norm": 0.7476720156501989, "learning_rate": 1.5824813426004727e-08, "loss": 0.2556, "step": 28425 }, { "epoch": 0.9754975978037063, "grad_norm": 0.8368075632613963, "learning_rate": 1.5780664155260474e-08, "loss": 0.2906, "step": 28426 }, { "epoch": 0.975531914893617, "grad_norm": 0.8021606699494099, "learning_rate": 1.573657645873472e-08, "loss": 0.2867, "step": 28427 }, { "epoch": 0.9755662319835278, "grad_norm": 0.7848836176856874, "learning_rate": 1.5692550336972033e-08, "loss": 0.3103, "step": 28428 }, { "epoch": 0.9756005490734386, "grad_norm": 0.8113952633177723, "learning_rate": 1.5648585790517533e-08, "loss": 0.2514, "step": 28429 }, { "epoch": 0.9756348661633494, "grad_norm": 0.8074889152747227, "learning_rate": 1.5604682819913564e-08, "loss": 0.3286, "step": 28430 }, { "epoch": 0.9756691832532601, "grad_norm": 0.8237804280420242, "learning_rate": 1.556084142570191e-08, "loss": 0.2542, "step": 28431 }, { "epoch": 0.9757035003431709, "grad_norm": 0.7779898326079892, "learning_rate": 1.5517061608424923e-08, "loss": 0.2554, "step": 28432 }, { "epoch": 0.9757378174330816, "grad_norm": 0.8196493774701716, "learning_rate": 1.5473343368623828e-08, "loss": 0.2503, "step": 28433 }, { "epoch": 0.9757721345229925, "grad_norm": 0.7258385897026971, "learning_rate": 1.5429686706837645e-08, "loss": 0.2541, "step": 28434 }, { "epoch": 0.9758064516129032, "grad_norm": 0.8819700719113294, "learning_rate": 1.5386091623606493e-08, "loss": 0.2752, "step": 28435 }, { "epoch": 0.975840768702814, "grad_norm": 0.8771973704943928, "learning_rate": 1.5342558119468275e-08, "loss": 0.2862, "step": 28436 }, { "epoch": 0.9758750857927248, "grad_norm": 0.7680231724400837, "learning_rate": 1.5299086194962008e-08, "loss": 0.2673, "step": 28437 }, { "epoch": 0.9759094028826355, "grad_norm": 0.6887868932792075, "learning_rate": 1.5255675850623374e-08, "loss": 0.2203, "step": 28438 }, { "epoch": 0.9759437199725464, "grad_norm": 0.774830192695379, "learning_rate": 1.5212327086989165e-08, "loss": 0.244, "step": 28439 }, { "epoch": 0.9759780370624571, "grad_norm": 0.7752948448408826, "learning_rate": 1.516903990459506e-08, "loss": 0.2345, "step": 28440 }, { "epoch": 0.9760123541523679, "grad_norm": 0.7785668183977872, "learning_rate": 1.512581430397564e-08, "loss": 0.2957, "step": 28441 }, { "epoch": 0.9760466712422786, "grad_norm": 0.7956366479839754, "learning_rate": 1.508265028566547e-08, "loss": 0.2753, "step": 28442 }, { "epoch": 0.9760809883321895, "grad_norm": 0.699675449290933, "learning_rate": 1.5039547850196902e-08, "loss": 0.2128, "step": 28443 }, { "epoch": 0.9761153054221002, "grad_norm": 0.726316511263933, "learning_rate": 1.4996506998102845e-08, "loss": 0.2266, "step": 28444 }, { "epoch": 0.976149622512011, "grad_norm": 0.7636557957581909, "learning_rate": 1.495352772991454e-08, "loss": 0.2638, "step": 28445 }, { "epoch": 0.9761839396019217, "grad_norm": 0.8163645425882587, "learning_rate": 1.4910610046164343e-08, "loss": 0.2812, "step": 28446 }, { "epoch": 0.9762182566918325, "grad_norm": 0.8221807522933572, "learning_rate": 1.4867753947380714e-08, "loss": 0.226, "step": 28447 }, { "epoch": 0.9762525737817433, "grad_norm": 0.9358924729456504, "learning_rate": 1.4824959434094344e-08, "loss": 0.2172, "step": 28448 }, { "epoch": 0.9762868908716541, "grad_norm": 0.7407788534287274, "learning_rate": 1.4782226506833141e-08, "loss": 0.2562, "step": 28449 }, { "epoch": 0.9763212079615649, "grad_norm": 0.7894753138675726, "learning_rate": 1.4739555166125575e-08, "loss": 0.266, "step": 28450 }, { "epoch": 0.9763555250514756, "grad_norm": 0.7806261480062182, "learning_rate": 1.4696945412498443e-08, "loss": 0.2746, "step": 28451 }, { "epoch": 0.9763898421413865, "grad_norm": 0.840138490031041, "learning_rate": 1.4654397246478547e-08, "loss": 0.2726, "step": 28452 }, { "epoch": 0.9764241592312972, "grad_norm": 0.7439427664368542, "learning_rate": 1.4611910668591022e-08, "loss": 0.2175, "step": 28453 }, { "epoch": 0.976458476321208, "grad_norm": 0.7914328696450978, "learning_rate": 1.4569485679360451e-08, "loss": 0.2729, "step": 28454 }, { "epoch": 0.9764927934111187, "grad_norm": 0.7554267700209509, "learning_rate": 1.4527122279311967e-08, "loss": 0.2373, "step": 28455 }, { "epoch": 0.9765271105010295, "grad_norm": 0.8613295895387169, "learning_rate": 1.4484820468968486e-08, "loss": 0.285, "step": 28456 }, { "epoch": 0.9765614275909403, "grad_norm": 0.7240283895231133, "learning_rate": 1.4442580248851812e-08, "loss": 0.2275, "step": 28457 }, { "epoch": 0.9765957446808511, "grad_norm": 0.8333235728643353, "learning_rate": 1.4400401619485416e-08, "loss": 0.2584, "step": 28458 }, { "epoch": 0.9766300617707618, "grad_norm": 0.8249472425950719, "learning_rate": 1.4358284581388881e-08, "loss": 0.2828, "step": 28459 }, { "epoch": 0.9766643788606726, "grad_norm": 0.8147387875670096, "learning_rate": 1.4316229135082904e-08, "loss": 0.261, "step": 28460 }, { "epoch": 0.9766986959505833, "grad_norm": 0.7701792301824435, "learning_rate": 1.4274235281087624e-08, "loss": 0.2376, "step": 28461 }, { "epoch": 0.9767330130404942, "grad_norm": 0.7201382574807863, "learning_rate": 1.4232303019920956e-08, "loss": 0.2486, "step": 28462 }, { "epoch": 0.976767330130405, "grad_norm": 0.9564618421721637, "learning_rate": 1.419043235210138e-08, "loss": 0.2744, "step": 28463 }, { "epoch": 0.9768016472203157, "grad_norm": 0.7380788902040856, "learning_rate": 1.4148623278146256e-08, "loss": 0.2449, "step": 28464 }, { "epoch": 0.9768359643102265, "grad_norm": 0.8615189145803775, "learning_rate": 1.4106875798572394e-08, "loss": 0.2452, "step": 28465 }, { "epoch": 0.9768702814001373, "grad_norm": 1.581482704155009, "learning_rate": 1.4065189913894384e-08, "loss": 0.2065, "step": 28466 }, { "epoch": 0.9769045984900481, "grad_norm": 0.7593564905655861, "learning_rate": 1.4023565624627922e-08, "loss": 0.2135, "step": 28467 }, { "epoch": 0.9769389155799588, "grad_norm": 0.7861040292610743, "learning_rate": 1.3982002931287597e-08, "loss": 0.2425, "step": 28468 }, { "epoch": 0.9769732326698696, "grad_norm": 0.8698085681519625, "learning_rate": 1.3940501834386334e-08, "loss": 0.2314, "step": 28469 }, { "epoch": 0.9770075497597803, "grad_norm": 0.7047996774124689, "learning_rate": 1.3899062334437608e-08, "loss": 0.2463, "step": 28470 }, { "epoch": 0.9770418668496912, "grad_norm": 0.8195576497202288, "learning_rate": 1.3857684431951568e-08, "loss": 0.3283, "step": 28471 }, { "epoch": 0.9770761839396019, "grad_norm": 0.801173980243393, "learning_rate": 1.3816368127441693e-08, "loss": 0.2791, "step": 28472 }, { "epoch": 0.9771105010295127, "grad_norm": 0.7316536113268742, "learning_rate": 1.3775113421416464e-08, "loss": 0.2412, "step": 28473 }, { "epoch": 0.9771448181194234, "grad_norm": 0.8196586494926922, "learning_rate": 1.3733920314386583e-08, "loss": 0.2333, "step": 28474 }, { "epoch": 0.9771791352093342, "grad_norm": 0.9787312886541217, "learning_rate": 1.3692788806860535e-08, "loss": 0.2428, "step": 28475 }, { "epoch": 0.9772134522992451, "grad_norm": 0.8686894567041852, "learning_rate": 1.36517188993468e-08, "loss": 0.2826, "step": 28476 }, { "epoch": 0.9772477693891558, "grad_norm": 0.7758457357165311, "learning_rate": 1.3610710592352749e-08, "loss": 0.3269, "step": 28477 }, { "epoch": 0.9772820864790666, "grad_norm": 0.851611627053172, "learning_rate": 1.3569763886384646e-08, "loss": 0.2551, "step": 28478 }, { "epoch": 0.9773164035689773, "grad_norm": 0.8128790480216633, "learning_rate": 1.3528878781948195e-08, "loss": 0.2764, "step": 28479 }, { "epoch": 0.9773507206588882, "grad_norm": 0.7631679655181053, "learning_rate": 1.3488055279549107e-08, "loss": 0.2126, "step": 28480 }, { "epoch": 0.9773850377487989, "grad_norm": 0.7732656980384873, "learning_rate": 1.3447293379691418e-08, "loss": 0.2836, "step": 28481 }, { "epoch": 0.9774193548387097, "grad_norm": 0.8272333394605533, "learning_rate": 1.3406593082878617e-08, "loss": 0.282, "step": 28482 }, { "epoch": 0.9774536719286204, "grad_norm": 0.7574752257339082, "learning_rate": 1.3365954389613633e-08, "loss": 0.2437, "step": 28483 }, { "epoch": 0.9774879890185312, "grad_norm": 0.8023101999629302, "learning_rate": 1.332537730039829e-08, "loss": 0.3087, "step": 28484 }, { "epoch": 0.977522306108442, "grad_norm": 0.8506651246917549, "learning_rate": 1.3284861815733852e-08, "loss": 0.2857, "step": 28485 }, { "epoch": 0.9775566231983528, "grad_norm": 0.9395697208749257, "learning_rate": 1.3244407936121029e-08, "loss": 0.288, "step": 28486 }, { "epoch": 0.9775909402882635, "grad_norm": 0.7309882794771817, "learning_rate": 1.3204015662059977e-08, "loss": 0.2367, "step": 28487 }, { "epoch": 0.9776252573781743, "grad_norm": 0.7680039954320186, "learning_rate": 1.3163684994048631e-08, "loss": 0.2379, "step": 28488 }, { "epoch": 0.9776595744680852, "grad_norm": 0.8782804398538515, "learning_rate": 1.3123415932586591e-08, "loss": 0.2397, "step": 28489 }, { "epoch": 0.9776938915579959, "grad_norm": 0.797183360179203, "learning_rate": 1.3083208478170129e-08, "loss": 0.2884, "step": 28490 }, { "epoch": 0.9777282086479067, "grad_norm": 0.6921210438710548, "learning_rate": 1.3043062631296622e-08, "loss": 0.2136, "step": 28491 }, { "epoch": 0.9777625257378174, "grad_norm": 0.791722644397912, "learning_rate": 1.3002978392461784e-08, "loss": 0.2065, "step": 28492 }, { "epoch": 0.9777968428277282, "grad_norm": 0.9342860830526047, "learning_rate": 1.2962955762160778e-08, "loss": 0.239, "step": 28493 }, { "epoch": 0.977831159917639, "grad_norm": 0.8245828513160189, "learning_rate": 1.2922994740888207e-08, "loss": 0.2907, "step": 28494 }, { "epoch": 0.9778654770075498, "grad_norm": 0.7119179713720495, "learning_rate": 1.2883095329137562e-08, "loss": 0.2392, "step": 28495 }, { "epoch": 0.9778997940974605, "grad_norm": 0.8095634690521186, "learning_rate": 1.2843257527402342e-08, "loss": 0.2559, "step": 28496 }, { "epoch": 0.9779341111873713, "grad_norm": 0.8107784848733576, "learning_rate": 1.2803481336173818e-08, "loss": 0.2358, "step": 28497 }, { "epoch": 0.977968428277282, "grad_norm": 0.8347700340518767, "learning_rate": 1.2763766755943819e-08, "loss": 0.3319, "step": 28498 }, { "epoch": 0.9780027453671929, "grad_norm": 0.7239506324192375, "learning_rate": 1.2724113787203063e-08, "loss": 0.2604, "step": 28499 }, { "epoch": 0.9780370624571036, "grad_norm": 0.8642407736719842, "learning_rate": 1.2684522430441159e-08, "loss": 0.2586, "step": 28500 }, { "epoch": 0.9780713795470144, "grad_norm": 0.8408999371921433, "learning_rate": 1.2644992686147161e-08, "loss": 0.2862, "step": 28501 }, { "epoch": 0.9781056966369251, "grad_norm": 0.776265572378884, "learning_rate": 1.2605524554809568e-08, "loss": 0.2458, "step": 28502 }, { "epoch": 0.978140013726836, "grad_norm": 0.6528948366180127, "learning_rate": 1.2566118036916319e-08, "loss": 0.1846, "step": 28503 }, { "epoch": 0.9781743308167468, "grad_norm": 0.6915486516866575, "learning_rate": 1.2526773132953695e-08, "loss": 0.216, "step": 28504 }, { "epoch": 0.9782086479066575, "grad_norm": 0.7628115366744247, "learning_rate": 1.2487489843407974e-08, "loss": 0.2466, "step": 28505 }, { "epoch": 0.9782429649965683, "grad_norm": 0.8248576439919618, "learning_rate": 1.2448268168764321e-08, "loss": 0.254, "step": 28506 }, { "epoch": 0.978277282086479, "grad_norm": 0.8134985229821102, "learning_rate": 1.2409108109507905e-08, "loss": 0.2337, "step": 28507 }, { "epoch": 0.9783115991763899, "grad_norm": 0.7257679304851207, "learning_rate": 1.2370009666121674e-08, "loss": 0.2287, "step": 28508 }, { "epoch": 0.9783459162663006, "grad_norm": 0.8661914910681372, "learning_rate": 1.2330972839089129e-08, "loss": 0.2766, "step": 28509 }, { "epoch": 0.9783802333562114, "grad_norm": 0.8641343312334865, "learning_rate": 1.2291997628892105e-08, "loss": 0.2713, "step": 28510 }, { "epoch": 0.9784145504461221, "grad_norm": 0.8598273876488048, "learning_rate": 1.2253084036012442e-08, "loss": 0.2627, "step": 28511 }, { "epoch": 0.978448867536033, "grad_norm": 0.798599030442386, "learning_rate": 1.2214232060930308e-08, "loss": 0.27, "step": 28512 }, { "epoch": 0.9784831846259437, "grad_norm": 0.7395066738570722, "learning_rate": 1.217544170412699e-08, "loss": 0.281, "step": 28513 }, { "epoch": 0.9785175017158545, "grad_norm": 0.7598029083879112, "learning_rate": 1.2136712966080433e-08, "loss": 0.221, "step": 28514 }, { "epoch": 0.9785518188057652, "grad_norm": 0.7814387700512773, "learning_rate": 1.2098045847269147e-08, "loss": 0.2565, "step": 28515 }, { "epoch": 0.978586135895676, "grad_norm": 0.7843469667379855, "learning_rate": 1.2059440348171636e-08, "loss": 0.2265, "step": 28516 }, { "epoch": 0.9786204529855869, "grad_norm": 0.8308402650527484, "learning_rate": 1.2020896469264188e-08, "loss": 0.3152, "step": 28517 }, { "epoch": 0.9786547700754976, "grad_norm": 0.6363743955078741, "learning_rate": 1.1982414211023086e-08, "loss": 0.2525, "step": 28518 }, { "epoch": 0.9786890871654084, "grad_norm": 0.7636852701449803, "learning_rate": 1.1943993573924061e-08, "loss": 0.2649, "step": 28519 }, { "epoch": 0.9787234042553191, "grad_norm": 0.8753686459434138, "learning_rate": 1.190563455844118e-08, "loss": 0.2779, "step": 28520 }, { "epoch": 0.9787577213452299, "grad_norm": 0.7882091629257185, "learning_rate": 1.1867337165049064e-08, "loss": 0.2653, "step": 28521 }, { "epoch": 0.9787920384351407, "grad_norm": 0.8807848076943225, "learning_rate": 1.182910139422011e-08, "loss": 0.2769, "step": 28522 }, { "epoch": 0.9788263555250515, "grad_norm": 0.7516185281196398, "learning_rate": 1.179092724642672e-08, "loss": 0.2708, "step": 28523 }, { "epoch": 0.9788606726149622, "grad_norm": 0.9088652817074397, "learning_rate": 1.1752814722141292e-08, "loss": 0.2731, "step": 28524 }, { "epoch": 0.978894989704873, "grad_norm": 0.7861063348926687, "learning_rate": 1.1714763821834008e-08, "loss": 0.2743, "step": 28525 }, { "epoch": 0.9789293067947838, "grad_norm": 0.8479292617247404, "learning_rate": 1.167677454597449e-08, "loss": 0.2595, "step": 28526 }, { "epoch": 0.9789636238846946, "grad_norm": 0.7866110577317246, "learning_rate": 1.1638846895032918e-08, "loss": 0.247, "step": 28527 }, { "epoch": 0.9789979409746054, "grad_norm": 0.7869708761163686, "learning_rate": 1.1600980869477808e-08, "loss": 0.2265, "step": 28528 }, { "epoch": 0.9790322580645161, "grad_norm": 0.7604821919845491, "learning_rate": 1.156317646977656e-08, "loss": 0.2124, "step": 28529 }, { "epoch": 0.9790665751544269, "grad_norm": 0.9248753658775483, "learning_rate": 1.1525433696396027e-08, "loss": 0.277, "step": 28530 }, { "epoch": 0.9791008922443377, "grad_norm": 0.7835604413377669, "learning_rate": 1.1487752549803055e-08, "loss": 0.2655, "step": 28531 }, { "epoch": 0.9791352093342485, "grad_norm": 0.7982023475401204, "learning_rate": 1.145013303046283e-08, "loss": 0.3151, "step": 28532 }, { "epoch": 0.9791695264241592, "grad_norm": 1.0142619725192037, "learning_rate": 1.1412575138839976e-08, "loss": 0.2709, "step": 28533 }, { "epoch": 0.97920384351407, "grad_norm": 0.7571040143269306, "learning_rate": 1.1375078875398571e-08, "loss": 0.2087, "step": 28534 }, { "epoch": 0.9792381606039808, "grad_norm": 0.7831338084417412, "learning_rate": 1.133764424060213e-08, "loss": 0.2457, "step": 28535 }, { "epoch": 0.9792724776938916, "grad_norm": 0.8710797971960165, "learning_rate": 1.1300271234912508e-08, "loss": 0.2421, "step": 28536 }, { "epoch": 0.9793067947838023, "grad_norm": 0.8922252892422073, "learning_rate": 1.126295985879211e-08, "loss": 0.2666, "step": 28537 }, { "epoch": 0.9793411118737131, "grad_norm": 0.7239689940642787, "learning_rate": 1.1225710112701128e-08, "loss": 0.2528, "step": 28538 }, { "epoch": 0.9793754289636238, "grad_norm": 0.8534980043367322, "learning_rate": 1.1188521997100854e-08, "loss": 0.277, "step": 28539 }, { "epoch": 0.9794097460535347, "grad_norm": 0.8097081986939527, "learning_rate": 1.1151395512449259e-08, "loss": 0.2622, "step": 28540 }, { "epoch": 0.9794440631434455, "grad_norm": 0.8660262992822587, "learning_rate": 1.1114330659205974e-08, "loss": 0.253, "step": 28541 }, { "epoch": 0.9794783802333562, "grad_norm": 0.8453357533186787, "learning_rate": 1.1077327437828967e-08, "loss": 0.2758, "step": 28542 }, { "epoch": 0.979512697323267, "grad_norm": 0.693790767273401, "learning_rate": 1.1040385848775092e-08, "loss": 0.2288, "step": 28543 }, { "epoch": 0.9795470144131777, "grad_norm": 0.885691715331959, "learning_rate": 1.1003505892500655e-08, "loss": 0.2736, "step": 28544 }, { "epoch": 0.9795813315030886, "grad_norm": 0.913126052475951, "learning_rate": 1.0966687569460844e-08, "loss": 0.2745, "step": 28545 }, { "epoch": 0.9796156485929993, "grad_norm": 0.7911801444549922, "learning_rate": 1.0929930880111405e-08, "loss": 0.2546, "step": 28546 }, { "epoch": 0.9796499656829101, "grad_norm": 0.733792876647068, "learning_rate": 1.0893235824905312e-08, "loss": 0.2376, "step": 28547 }, { "epoch": 0.9796842827728208, "grad_norm": 0.7440939151243269, "learning_rate": 1.0856602404297201e-08, "loss": 0.3138, "step": 28548 }, { "epoch": 0.9797185998627317, "grad_norm": 0.8085402392598088, "learning_rate": 1.082003061873893e-08, "loss": 0.2633, "step": 28549 }, { "epoch": 0.9797529169526424, "grad_norm": 0.8865964353197885, "learning_rate": 1.0783520468682363e-08, "loss": 0.2805, "step": 28550 }, { "epoch": 0.9797872340425532, "grad_norm": 0.7577293891237015, "learning_rate": 1.0747071954578247e-08, "loss": 0.2238, "step": 28551 }, { "epoch": 0.9798215511324639, "grad_norm": 0.7910461854033356, "learning_rate": 1.0710685076877337e-08, "loss": 0.2488, "step": 28552 }, { "epoch": 0.9798558682223747, "grad_norm": 0.7253099348620725, "learning_rate": 1.0674359836029268e-08, "loss": 0.2212, "step": 28553 }, { "epoch": 0.9798901853122856, "grad_norm": 0.6976585611112954, "learning_rate": 1.0638096232482021e-08, "loss": 0.2693, "step": 28554 }, { "epoch": 0.9799245024021963, "grad_norm": 0.7346178127790719, "learning_rate": 1.060189426668412e-08, "loss": 0.2272, "step": 28555 }, { "epoch": 0.9799588194921071, "grad_norm": 0.7940281730970846, "learning_rate": 1.056575393908299e-08, "loss": 0.3014, "step": 28556 }, { "epoch": 0.9799931365820178, "grad_norm": 0.7322004277203272, "learning_rate": 1.0529675250124938e-08, "loss": 0.2643, "step": 28557 }, { "epoch": 0.9800274536719287, "grad_norm": 0.7349473309397273, "learning_rate": 1.0493658200255163e-08, "loss": 0.2361, "step": 28558 }, { "epoch": 0.9800617707618394, "grad_norm": 0.7318849537020005, "learning_rate": 1.0457702789919422e-08, "loss": 0.2332, "step": 28559 }, { "epoch": 0.9800960878517502, "grad_norm": 0.8056457370740229, "learning_rate": 1.0421809019561246e-08, "loss": 0.2375, "step": 28560 }, { "epoch": 0.9801304049416609, "grad_norm": 0.8231535896055822, "learning_rate": 1.0385976889624173e-08, "loss": 0.2967, "step": 28561 }, { "epoch": 0.9801647220315717, "grad_norm": 0.8054447515524049, "learning_rate": 1.0350206400551732e-08, "loss": 0.2386, "step": 28562 }, { "epoch": 0.9801990391214825, "grad_norm": 0.7047073513446008, "learning_rate": 1.0314497552784687e-08, "loss": 0.2372, "step": 28563 }, { "epoch": 0.9802333562113933, "grad_norm": 0.8119194159023312, "learning_rate": 1.0278850346764346e-08, "loss": 0.231, "step": 28564 }, { "epoch": 0.980267673301304, "grad_norm": 0.82353630852214, "learning_rate": 1.0243264782932028e-08, "loss": 0.2874, "step": 28565 }, { "epoch": 0.9803019903912148, "grad_norm": 0.7681239567369788, "learning_rate": 1.0207740861726267e-08, "loss": 0.2691, "step": 28566 }, { "epoch": 0.9803363074811255, "grad_norm": 0.7295190878026532, "learning_rate": 1.0172278583586159e-08, "loss": 0.2836, "step": 28567 }, { "epoch": 0.9803706245710364, "grad_norm": 0.7343634122496401, "learning_rate": 1.0136877948950242e-08, "loss": 0.299, "step": 28568 }, { "epoch": 0.9804049416609472, "grad_norm": 0.7512369364589301, "learning_rate": 1.0101538958255386e-08, "loss": 0.2425, "step": 28569 }, { "epoch": 0.9804392587508579, "grad_norm": 0.6682053922719645, "learning_rate": 1.0066261611938466e-08, "loss": 0.2358, "step": 28570 }, { "epoch": 0.9804735758407687, "grad_norm": 0.7707796186029326, "learning_rate": 1.0031045910435244e-08, "loss": 0.2424, "step": 28571 }, { "epoch": 0.9805078929306795, "grad_norm": 0.7295155146447647, "learning_rate": 9.995891854180928e-09, "loss": 0.2289, "step": 28572 }, { "epoch": 0.9805422100205903, "grad_norm": 0.6751819765896196, "learning_rate": 9.960799443609059e-09, "loss": 0.2328, "step": 28573 }, { "epoch": 0.980576527110501, "grad_norm": 0.7715698580822108, "learning_rate": 9.925768679153735e-09, "loss": 0.2521, "step": 28574 }, { "epoch": 0.9806108442004118, "grad_norm": 0.8171455078851533, "learning_rate": 9.890799561247943e-09, "loss": 0.2232, "step": 28575 }, { "epoch": 0.9806451612903225, "grad_norm": 0.6882368160484159, "learning_rate": 9.85589209032356e-09, "loss": 0.22, "step": 28576 }, { "epoch": 0.9806794783802334, "grad_norm": 0.7505225064789111, "learning_rate": 9.821046266811352e-09, "loss": 0.2441, "step": 28577 }, { "epoch": 0.9807137954701441, "grad_norm": 0.8197758596428448, "learning_rate": 9.78626209114264e-09, "loss": 0.2479, "step": 28578 }, { "epoch": 0.9807481125600549, "grad_norm": 0.7103029385649153, "learning_rate": 9.751539563745971e-09, "loss": 0.2593, "step": 28579 }, { "epoch": 0.9807824296499656, "grad_norm": 0.7853138636971252, "learning_rate": 9.716878685051556e-09, "loss": 0.2524, "step": 28580 }, { "epoch": 0.9808167467398765, "grad_norm": 0.8272019779404628, "learning_rate": 9.682279455486832e-09, "loss": 0.265, "step": 28581 }, { "epoch": 0.9808510638297873, "grad_norm": 0.6880863122373848, "learning_rate": 9.647741875479233e-09, "loss": 0.2261, "step": 28582 }, { "epoch": 0.980885380919698, "grad_norm": 0.7951359487839155, "learning_rate": 9.613265945455641e-09, "loss": 0.2036, "step": 28583 }, { "epoch": 0.9809196980096088, "grad_norm": 0.7940420919989176, "learning_rate": 9.578851665841826e-09, "loss": 0.2242, "step": 28584 }, { "epoch": 0.9809540150995195, "grad_norm": 0.8197667002678728, "learning_rate": 9.544499037063005e-09, "loss": 0.2956, "step": 28585 }, { "epoch": 0.9809883321894304, "grad_norm": 0.8083647540409091, "learning_rate": 9.510208059543834e-09, "loss": 0.2223, "step": 28586 }, { "epoch": 0.9810226492793411, "grad_norm": 0.7328145898481341, "learning_rate": 9.475978733707314e-09, "loss": 0.2249, "step": 28587 }, { "epoch": 0.9810569663692519, "grad_norm": 0.8252004236518838, "learning_rate": 9.44181105997699e-09, "loss": 0.2769, "step": 28588 }, { "epoch": 0.9810912834591626, "grad_norm": 0.7033554401640473, "learning_rate": 9.40770503877475e-09, "loss": 0.2759, "step": 28589 }, { "epoch": 0.9811256005490734, "grad_norm": 0.8219125135442829, "learning_rate": 9.373660670521923e-09, "loss": 0.2493, "step": 28590 }, { "epoch": 0.9811599176389842, "grad_norm": 0.7983868785652488, "learning_rate": 9.339677955638727e-09, "loss": 0.2972, "step": 28591 }, { "epoch": 0.981194234728895, "grad_norm": 0.6767502358889305, "learning_rate": 9.305756894545382e-09, "loss": 0.2336, "step": 28592 }, { "epoch": 0.9812285518188057, "grad_norm": 0.7628766959569055, "learning_rate": 9.271897487660997e-09, "loss": 0.2197, "step": 28593 }, { "epoch": 0.9812628689087165, "grad_norm": 0.7406781901645719, "learning_rate": 9.238099735403016e-09, "loss": 0.2434, "step": 28594 }, { "epoch": 0.9812971859986274, "grad_norm": 0.7857523758312506, "learning_rate": 9.20436363819055e-09, "loss": 0.26, "step": 28595 }, { "epoch": 0.9813315030885381, "grad_norm": 0.8354535935063295, "learning_rate": 9.17068919643882e-09, "loss": 0.251, "step": 28596 }, { "epoch": 0.9813658201784489, "grad_norm": 0.8774361365806997, "learning_rate": 9.137076410564717e-09, "loss": 0.3146, "step": 28597 }, { "epoch": 0.9814001372683596, "grad_norm": 0.7683758585022296, "learning_rate": 9.103525280983461e-09, "loss": 0.2258, "step": 28598 }, { "epoch": 0.9814344543582704, "grad_norm": 0.9182549923906091, "learning_rate": 9.070035808109169e-09, "loss": 0.2485, "step": 28599 }, { "epoch": 0.9814687714481812, "grad_norm": 0.7752138652742182, "learning_rate": 9.036607992355395e-09, "loss": 0.3144, "step": 28600 }, { "epoch": 0.981503088538092, "grad_norm": 0.7242980478373887, "learning_rate": 9.003241834135701e-09, "loss": 0.2295, "step": 28601 }, { "epoch": 0.9815374056280027, "grad_norm": 0.8104449797285889, "learning_rate": 8.969937333861979e-09, "loss": 0.2953, "step": 28602 }, { "epoch": 0.9815717227179135, "grad_norm": 0.7974837813593102, "learning_rate": 8.936694491946119e-09, "loss": 0.2584, "step": 28603 }, { "epoch": 0.9816060398078243, "grad_norm": 0.781882827541205, "learning_rate": 8.903513308797796e-09, "loss": 0.3019, "step": 28604 }, { "epoch": 0.9816403568977351, "grad_norm": 0.8067534171510875, "learning_rate": 8.870393784827791e-09, "loss": 0.3089, "step": 28605 }, { "epoch": 0.9816746739876459, "grad_norm": 0.8055750361207157, "learning_rate": 8.837335920445223e-09, "loss": 0.2249, "step": 28606 }, { "epoch": 0.9817089910775566, "grad_norm": 0.7746586940243932, "learning_rate": 8.804339716057542e-09, "loss": 0.2165, "step": 28607 }, { "epoch": 0.9817433081674674, "grad_norm": 0.8869603427498394, "learning_rate": 8.77140517207331e-09, "loss": 0.245, "step": 28608 }, { "epoch": 0.9817776252573782, "grad_norm": 0.6975593356141468, "learning_rate": 8.738532288899982e-09, "loss": 0.2389, "step": 28609 }, { "epoch": 0.981811942347289, "grad_norm": 0.7656205117815386, "learning_rate": 8.705721066941675e-09, "loss": 0.2261, "step": 28610 }, { "epoch": 0.9818462594371997, "grad_norm": 0.8535813464462864, "learning_rate": 8.672971506605843e-09, "loss": 0.2786, "step": 28611 }, { "epoch": 0.9818805765271105, "grad_norm": 0.7680297288312455, "learning_rate": 8.640283608295496e-09, "loss": 0.2658, "step": 28612 }, { "epoch": 0.9819148936170212, "grad_norm": 0.8066900716910208, "learning_rate": 8.607657372415312e-09, "loss": 0.2721, "step": 28613 }, { "epoch": 0.9819492107069321, "grad_norm": 0.74748734198651, "learning_rate": 8.575092799367745e-09, "loss": 0.2428, "step": 28614 }, { "epoch": 0.9819835277968428, "grad_norm": 0.7761166392390018, "learning_rate": 8.542589889555808e-09, "loss": 0.2291, "step": 28615 }, { "epoch": 0.9820178448867536, "grad_norm": 0.8315569781068585, "learning_rate": 8.510148643380844e-09, "loss": 0.2882, "step": 28616 }, { "epoch": 0.9820521619766643, "grad_norm": 0.7810158268211626, "learning_rate": 8.477769061243646e-09, "loss": 0.2484, "step": 28617 }, { "epoch": 0.9820864790665752, "grad_norm": 0.7631282657960009, "learning_rate": 8.445451143543893e-09, "loss": 0.2363, "step": 28618 }, { "epoch": 0.982120796156486, "grad_norm": 0.7552130537036772, "learning_rate": 8.413194890680708e-09, "loss": 0.2382, "step": 28619 }, { "epoch": 0.9821551132463967, "grad_norm": 0.7387759896734069, "learning_rate": 8.381000303052666e-09, "loss": 0.2396, "step": 28620 }, { "epoch": 0.9821894303363075, "grad_norm": 0.7426386440191984, "learning_rate": 8.348867381058335e-09, "loss": 0.2789, "step": 28621 }, { "epoch": 0.9822237474262182, "grad_norm": 0.8124722774353171, "learning_rate": 8.316796125093507e-09, "loss": 0.2455, "step": 28622 }, { "epoch": 0.9822580645161291, "grad_norm": 0.8312755203290954, "learning_rate": 8.284786535555645e-09, "loss": 0.3023, "step": 28623 }, { "epoch": 0.9822923816060398, "grad_norm": 0.8002563498033669, "learning_rate": 8.252838612838876e-09, "loss": 0.2796, "step": 28624 }, { "epoch": 0.9823266986959506, "grad_norm": 0.7394298574178025, "learning_rate": 8.22095235733844e-09, "loss": 0.2808, "step": 28625 }, { "epoch": 0.9823610157858613, "grad_norm": 0.8232632954435588, "learning_rate": 8.189127769448469e-09, "loss": 0.2312, "step": 28626 }, { "epoch": 0.9823953328757722, "grad_norm": 0.8464883243149911, "learning_rate": 8.157364849561978e-09, "loss": 0.2739, "step": 28627 }, { "epoch": 0.9824296499656829, "grad_norm": 0.8503809737299346, "learning_rate": 8.125663598071431e-09, "loss": 0.2494, "step": 28628 }, { "epoch": 0.9824639670555937, "grad_norm": 0.8112576947307448, "learning_rate": 8.094024015368184e-09, "loss": 0.2276, "step": 28629 }, { "epoch": 0.9824982841455044, "grad_norm": 0.8827062795337742, "learning_rate": 8.062446101843036e-09, "loss": 0.2667, "step": 28630 }, { "epoch": 0.9825326012354152, "grad_norm": 0.7146770725877686, "learning_rate": 8.030929857886783e-09, "loss": 0.2494, "step": 28631 }, { "epoch": 0.982566918325326, "grad_norm": 0.7220443551960757, "learning_rate": 7.999475283888003e-09, "loss": 0.2673, "step": 28632 }, { "epoch": 0.9826012354152368, "grad_norm": 0.832416865303512, "learning_rate": 7.968082380235832e-09, "loss": 0.2524, "step": 28633 }, { "epoch": 0.9826355525051476, "grad_norm": 0.814424527189817, "learning_rate": 7.936751147317734e-09, "loss": 0.2889, "step": 28634 }, { "epoch": 0.9826698695950583, "grad_norm": 0.7074496275401543, "learning_rate": 7.905481585520625e-09, "loss": 0.2567, "step": 28635 }, { "epoch": 0.9827041866849691, "grad_norm": 0.7240878679941516, "learning_rate": 7.874273695231416e-09, "loss": 0.313, "step": 28636 }, { "epoch": 0.9827385037748799, "grad_norm": 0.7868316569459505, "learning_rate": 7.843127476835355e-09, "loss": 0.2313, "step": 28637 }, { "epoch": 0.9827728208647907, "grad_norm": 0.7104394661711204, "learning_rate": 7.812042930717135e-09, "loss": 0.224, "step": 28638 }, { "epoch": 0.9828071379547014, "grad_norm": 0.7832760916140259, "learning_rate": 7.781020057260891e-09, "loss": 0.2547, "step": 28639 }, { "epoch": 0.9828414550446122, "grad_norm": 0.7355382205761106, "learning_rate": 7.750058856849096e-09, "loss": 0.2388, "step": 28640 }, { "epoch": 0.982875772134523, "grad_norm": 0.7918005561559793, "learning_rate": 7.719159329865888e-09, "loss": 0.2551, "step": 28641 }, { "epoch": 0.9829100892244338, "grad_norm": 0.8697361830184956, "learning_rate": 7.688321476691518e-09, "loss": 0.2844, "step": 28642 }, { "epoch": 0.9829444063143445, "grad_norm": 0.7440479123165968, "learning_rate": 7.657545297707347e-09, "loss": 0.2595, "step": 28643 }, { "epoch": 0.9829787234042553, "grad_norm": 0.8118248405575812, "learning_rate": 7.626830793294182e-09, "loss": 0.2705, "step": 28644 }, { "epoch": 0.983013040494166, "grad_norm": 0.7539173227574011, "learning_rate": 7.596177963830609e-09, "loss": 0.2413, "step": 28645 }, { "epoch": 0.9830473575840769, "grad_norm": 0.7870233452357092, "learning_rate": 7.565586809695768e-09, "loss": 0.2815, "step": 28646 }, { "epoch": 0.9830816746739877, "grad_norm": 0.8499228953598374, "learning_rate": 7.535057331267137e-09, "loss": 0.3151, "step": 28647 }, { "epoch": 0.9831159917638984, "grad_norm": 0.7443697450365111, "learning_rate": 7.504589528922746e-09, "loss": 0.3049, "step": 28648 }, { "epoch": 0.9831503088538092, "grad_norm": 0.9293404764862448, "learning_rate": 7.474183403038405e-09, "loss": 0.2659, "step": 28649 }, { "epoch": 0.98318462594372, "grad_norm": 0.8107318767127814, "learning_rate": 7.443838953989368e-09, "loss": 0.241, "step": 28650 }, { "epoch": 0.9832189430336308, "grad_norm": 0.8274177981619492, "learning_rate": 7.413556182151449e-09, "loss": 0.2865, "step": 28651 }, { "epoch": 0.9832532601235415, "grad_norm": 0.9079546898963186, "learning_rate": 7.383335087897681e-09, "loss": 0.2969, "step": 28652 }, { "epoch": 0.9832875772134523, "grad_norm": 0.7032839649033962, "learning_rate": 7.353175671602763e-09, "loss": 0.2471, "step": 28653 }, { "epoch": 0.983321894303363, "grad_norm": 0.7094539532359713, "learning_rate": 7.323077933638068e-09, "loss": 0.2468, "step": 28654 }, { "epoch": 0.9833562113932739, "grad_norm": 0.8660443784654361, "learning_rate": 7.293041874375517e-09, "loss": 0.2524, "step": 28655 }, { "epoch": 0.9833905284831846, "grad_norm": 0.6961665605106853, "learning_rate": 7.263067494187037e-09, "loss": 0.2409, "step": 28656 }, { "epoch": 0.9834248455730954, "grad_norm": 0.8879525574199565, "learning_rate": 7.233154793442332e-09, "loss": 0.2802, "step": 28657 }, { "epoch": 0.9834591626630061, "grad_norm": 0.7091674934609828, "learning_rate": 7.20330377251055e-09, "loss": 0.266, "step": 28658 }, { "epoch": 0.9834934797529169, "grad_norm": 0.7981002021338427, "learning_rate": 7.173514431761397e-09, "loss": 0.2556, "step": 28659 }, { "epoch": 0.9835277968428278, "grad_norm": 0.847344263760343, "learning_rate": 7.143786771562355e-09, "loss": 0.2712, "step": 28660 }, { "epoch": 0.9835621139327385, "grad_norm": 0.7908712606252992, "learning_rate": 7.114120792280355e-09, "loss": 0.301, "step": 28661 }, { "epoch": 0.9835964310226493, "grad_norm": 0.8437269105898648, "learning_rate": 7.084516494282878e-09, "loss": 0.2751, "step": 28662 }, { "epoch": 0.98363074811256, "grad_norm": 0.8617410902124344, "learning_rate": 7.054973877934635e-09, "loss": 0.2986, "step": 28663 }, { "epoch": 0.9836650652024709, "grad_norm": 0.7608226042039267, "learning_rate": 7.025492943600887e-09, "loss": 0.2511, "step": 28664 }, { "epoch": 0.9836993822923816, "grad_norm": 0.7896552476279328, "learning_rate": 6.996073691645788e-09, "loss": 0.2552, "step": 28665 }, { "epoch": 0.9837336993822924, "grad_norm": 0.7752379803367, "learning_rate": 6.966716122433492e-09, "loss": 0.2371, "step": 28666 }, { "epoch": 0.9837680164722031, "grad_norm": 0.8270444285222363, "learning_rate": 6.937420236325931e-09, "loss": 0.2627, "step": 28667 }, { "epoch": 0.9838023335621139, "grad_norm": 0.751643364468948, "learning_rate": 6.908186033685038e-09, "loss": 0.271, "step": 28668 }, { "epoch": 0.9838366506520247, "grad_norm": 0.8613179174654942, "learning_rate": 6.8790135148716355e-09, "loss": 0.2408, "step": 28669 }, { "epoch": 0.9838709677419355, "grad_norm": 0.7335591438277286, "learning_rate": 6.8499026802471e-09, "loss": 0.2933, "step": 28670 }, { "epoch": 0.9839052848318462, "grad_norm": 0.838497492243931, "learning_rate": 6.820853530171146e-09, "loss": 0.3584, "step": 28671 }, { "epoch": 0.983939601921757, "grad_norm": 0.7700636534235039, "learning_rate": 6.791866065001262e-09, "loss": 0.2538, "step": 28672 }, { "epoch": 0.9839739190116679, "grad_norm": 0.7929209150378211, "learning_rate": 6.762940285096608e-09, "loss": 0.2743, "step": 28673 }, { "epoch": 0.9840082361015786, "grad_norm": 0.8347374870378671, "learning_rate": 6.734076190814121e-09, "loss": 0.2696, "step": 28674 }, { "epoch": 0.9840425531914894, "grad_norm": 0.7285954132672308, "learning_rate": 6.705273782510735e-09, "loss": 0.2221, "step": 28675 }, { "epoch": 0.9840768702814001, "grad_norm": 0.9739212986212508, "learning_rate": 6.676533060542279e-09, "loss": 0.236, "step": 28676 }, { "epoch": 0.9841111873713109, "grad_norm": 0.8689462236697663, "learning_rate": 6.647854025262912e-09, "loss": 0.258, "step": 28677 }, { "epoch": 0.9841455044612217, "grad_norm": 0.8102004029145154, "learning_rate": 6.619236677028462e-09, "loss": 0.3235, "step": 28678 }, { "epoch": 0.9841798215511325, "grad_norm": 0.8576168589216295, "learning_rate": 6.5906810161908695e-09, "loss": 0.2351, "step": 28679 }, { "epoch": 0.9842141386410432, "grad_norm": 0.776569650054115, "learning_rate": 6.562187043104295e-09, "loss": 0.2615, "step": 28680 }, { "epoch": 0.984248455730954, "grad_norm": 0.8885289737142811, "learning_rate": 6.5337547581195705e-09, "loss": 0.2394, "step": 28681 }, { "epoch": 0.9842827728208647, "grad_norm": 0.7375381204519692, "learning_rate": 6.505384161588635e-09, "loss": 0.2417, "step": 28682 }, { "epoch": 0.9843170899107756, "grad_norm": 0.748954385218001, "learning_rate": 6.4770752538623196e-09, "loss": 0.2139, "step": 28683 }, { "epoch": 0.9843514070006864, "grad_norm": 0.7182146017131437, "learning_rate": 6.448828035289234e-09, "loss": 0.2416, "step": 28684 }, { "epoch": 0.9843857240905971, "grad_norm": 0.8613166357037281, "learning_rate": 6.420642506219654e-09, "loss": 0.3003, "step": 28685 }, { "epoch": 0.9844200411805079, "grad_norm": 0.7621981453088992, "learning_rate": 6.392518667000525e-09, "loss": 0.2844, "step": 28686 }, { "epoch": 0.9844543582704187, "grad_norm": 0.841978053166611, "learning_rate": 6.3644565179799004e-09, "loss": 0.2617, "step": 28687 }, { "epoch": 0.9844886753603295, "grad_norm": 0.79671920090879, "learning_rate": 6.33645605950417e-09, "loss": 0.2482, "step": 28688 }, { "epoch": 0.9845229924502402, "grad_norm": 0.8358548737770007, "learning_rate": 6.308517291919725e-09, "loss": 0.2888, "step": 28689 }, { "epoch": 0.984557309540151, "grad_norm": 0.8273367066028514, "learning_rate": 6.2806402155718425e-09, "loss": 0.3094, "step": 28690 }, { "epoch": 0.9845916266300617, "grad_norm": 0.8540938186006746, "learning_rate": 6.252824830804138e-09, "loss": 0.261, "step": 28691 }, { "epoch": 0.9846259437199726, "grad_norm": 0.7923569088899783, "learning_rate": 6.225071137960781e-09, "loss": 0.2625, "step": 28692 }, { "epoch": 0.9846602608098833, "grad_norm": 0.787261414053494, "learning_rate": 6.197379137384274e-09, "loss": 0.2768, "step": 28693 }, { "epoch": 0.9846945778997941, "grad_norm": 0.8046909725951971, "learning_rate": 6.169748829417122e-09, "loss": 0.2093, "step": 28694 }, { "epoch": 0.9847288949897048, "grad_norm": 0.7593194750817974, "learning_rate": 6.142180214400717e-09, "loss": 0.2963, "step": 28695 }, { "epoch": 0.9847632120796157, "grad_norm": 0.7147174432675808, "learning_rate": 6.1146732926747885e-09, "loss": 0.2673, "step": 28696 }, { "epoch": 0.9847975291695265, "grad_norm": 0.7333068581472667, "learning_rate": 6.087228064580175e-09, "loss": 0.2735, "step": 28697 }, { "epoch": 0.9848318462594372, "grad_norm": 0.8057602872254946, "learning_rate": 6.059844530455494e-09, "loss": 0.2311, "step": 28698 }, { "epoch": 0.984866163349348, "grad_norm": 0.8120941842878907, "learning_rate": 6.032522690639364e-09, "loss": 0.2527, "step": 28699 }, { "epoch": 0.9849004804392587, "grad_norm": 0.757178485105746, "learning_rate": 6.005262545468738e-09, "loss": 0.2782, "step": 28700 }, { "epoch": 0.9849347975291696, "grad_norm": 0.9119928940666305, "learning_rate": 5.978064095280567e-09, "loss": 0.2865, "step": 28701 }, { "epoch": 0.9849691146190803, "grad_norm": 0.731480508849397, "learning_rate": 5.950927340410695e-09, "loss": 0.2238, "step": 28702 }, { "epoch": 0.9850034317089911, "grad_norm": 0.7534400102298607, "learning_rate": 5.9238522811949635e-09, "loss": 0.2667, "step": 28703 }, { "epoch": 0.9850377487989018, "grad_norm": 0.8512565818842999, "learning_rate": 5.896838917967551e-09, "loss": 0.2192, "step": 28704 }, { "epoch": 0.9850720658888126, "grad_norm": 0.7491691625040338, "learning_rate": 5.869887251062079e-09, "loss": 0.2465, "step": 28705 }, { "epoch": 0.9851063829787234, "grad_norm": 0.727661285519055, "learning_rate": 5.842997280811613e-09, "loss": 0.2409, "step": 28706 }, { "epoch": 0.9851407000686342, "grad_norm": 0.7465310957021354, "learning_rate": 5.8161690075481115e-09, "loss": 0.2528, "step": 28707 }, { "epoch": 0.9851750171585449, "grad_norm": 0.7976667486630913, "learning_rate": 5.78940243160353e-09, "loss": 0.2795, "step": 28708 }, { "epoch": 0.9852093342484557, "grad_norm": 0.7605198067338481, "learning_rate": 5.762697553307605e-09, "loss": 0.272, "step": 28709 }, { "epoch": 0.9852436513383666, "grad_norm": 0.811800850086773, "learning_rate": 5.736054372991184e-09, "loss": 0.2569, "step": 28710 }, { "epoch": 0.9852779684282773, "grad_norm": 0.8399290217243919, "learning_rate": 5.709472890983447e-09, "loss": 0.2924, "step": 28711 }, { "epoch": 0.9853122855181881, "grad_norm": 0.8741640248312124, "learning_rate": 5.682953107611911e-09, "loss": 0.2266, "step": 28712 }, { "epoch": 0.9853466026080988, "grad_norm": 0.9750256748974426, "learning_rate": 5.656495023204645e-09, "loss": 0.2136, "step": 28713 }, { "epoch": 0.9853809196980096, "grad_norm": 0.8939135449257649, "learning_rate": 5.6300986380886106e-09, "loss": 0.3168, "step": 28714 }, { "epoch": 0.9854152367879204, "grad_norm": 0.7754333672534883, "learning_rate": 5.603763952590214e-09, "loss": 0.2327, "step": 28715 }, { "epoch": 0.9854495538778312, "grad_norm": 0.7690434610890766, "learning_rate": 5.5774909670336384e-09, "loss": 0.2594, "step": 28716 }, { "epoch": 0.9854838709677419, "grad_norm": 0.7679961563488842, "learning_rate": 5.551279681744737e-09, "loss": 0.2115, "step": 28717 }, { "epoch": 0.9855181880576527, "grad_norm": 0.7718623088296808, "learning_rate": 5.525130097047138e-09, "loss": 0.2778, "step": 28718 }, { "epoch": 0.9855525051475635, "grad_norm": 0.8383820979547509, "learning_rate": 5.499042213262807e-09, "loss": 0.2227, "step": 28719 }, { "epoch": 0.9855868222374743, "grad_norm": 0.7238997077835609, "learning_rate": 5.473016030715372e-09, "loss": 0.2411, "step": 28720 }, { "epoch": 0.985621139327385, "grad_norm": 0.7825114867020683, "learning_rate": 5.44705154972569e-09, "loss": 0.2812, "step": 28721 }, { "epoch": 0.9856554564172958, "grad_norm": 0.7296069854012592, "learning_rate": 5.421148770614615e-09, "loss": 0.3045, "step": 28722 }, { "epoch": 0.9856897735072065, "grad_norm": 0.80767144558317, "learning_rate": 5.39530769370189e-09, "loss": 0.2754, "step": 28723 }, { "epoch": 0.9857240905971174, "grad_norm": 0.7368588924557362, "learning_rate": 5.36952831930726e-09, "loss": 0.2732, "step": 28724 }, { "epoch": 0.9857584076870282, "grad_norm": 0.7943114838638992, "learning_rate": 5.343810647748804e-09, "loss": 0.2413, "step": 28725 }, { "epoch": 0.9857927247769389, "grad_norm": 0.7152724537761366, "learning_rate": 5.318154679344045e-09, "loss": 0.2361, "step": 28726 }, { "epoch": 0.9858270418668497, "grad_norm": 0.7885309429138098, "learning_rate": 5.292560414410508e-09, "loss": 0.2814, "step": 28727 }, { "epoch": 0.9858613589567604, "grad_norm": 0.7711576404176309, "learning_rate": 5.2670278532640505e-09, "loss": 0.2396, "step": 28728 }, { "epoch": 0.9858956760466713, "grad_norm": 0.7314266282593017, "learning_rate": 5.241556996220532e-09, "loss": 0.255, "step": 28729 }, { "epoch": 0.985929993136582, "grad_norm": 0.7811644406617311, "learning_rate": 5.216147843594144e-09, "loss": 0.2521, "step": 28730 }, { "epoch": 0.9859643102264928, "grad_norm": 0.723261490645434, "learning_rate": 5.190800395698525e-09, "loss": 0.2185, "step": 28731 }, { "epoch": 0.9859986273164035, "grad_norm": 0.8368310276392386, "learning_rate": 5.165514652847314e-09, "loss": 0.2773, "step": 28732 }, { "epoch": 0.9860329444063144, "grad_norm": 0.753239774601416, "learning_rate": 5.140290615353039e-09, "loss": 0.2039, "step": 28733 }, { "epoch": 0.9860672614962251, "grad_norm": 0.7862897521697779, "learning_rate": 5.115128283527115e-09, "loss": 0.2591, "step": 28734 }, { "epoch": 0.9861015785861359, "grad_norm": 0.876384903946859, "learning_rate": 5.090027657679853e-09, "loss": 0.2813, "step": 28735 }, { "epoch": 0.9861358956760466, "grad_norm": 0.7572227810555218, "learning_rate": 5.0649887381221115e-09, "loss": 0.2736, "step": 28736 }, { "epoch": 0.9861702127659574, "grad_norm": 0.7383685966174197, "learning_rate": 5.040011525162536e-09, "loss": 0.2334, "step": 28737 }, { "epoch": 0.9862045298558683, "grad_norm": 0.8499251007208473, "learning_rate": 5.015096019110321e-09, "loss": 0.3212, "step": 28738 }, { "epoch": 0.986238846945779, "grad_norm": 0.6982210695688258, "learning_rate": 4.9902422202730005e-09, "loss": 0.2511, "step": 28739 }, { "epoch": 0.9862731640356898, "grad_norm": 0.8105455349046261, "learning_rate": 4.96545012895755e-09, "loss": 0.2336, "step": 28740 }, { "epoch": 0.9863074811256005, "grad_norm": 0.8556870297552421, "learning_rate": 4.940719745470391e-09, "loss": 0.3299, "step": 28741 }, { "epoch": 0.9863417982155114, "grad_norm": 0.7603863795746405, "learning_rate": 4.916051070116834e-09, "loss": 0.2596, "step": 28742 }, { "epoch": 0.9863761153054221, "grad_norm": 0.7252306878012983, "learning_rate": 4.891444103202192e-09, "loss": 0.226, "step": 28743 }, { "epoch": 0.9864104323953329, "grad_norm": 0.7678193877884998, "learning_rate": 4.866898845029555e-09, "loss": 0.2986, "step": 28744 }, { "epoch": 0.9864447494852436, "grad_norm": 0.6913131629213884, "learning_rate": 4.842415295903125e-09, "loss": 0.2323, "step": 28745 }, { "epoch": 0.9864790665751544, "grad_norm": 0.956402854954014, "learning_rate": 4.817993456124325e-09, "loss": 0.2363, "step": 28746 }, { "epoch": 0.9865133836650652, "grad_norm": 0.8571498299442284, "learning_rate": 4.793633325995695e-09, "loss": 0.2353, "step": 28747 }, { "epoch": 0.986547700754976, "grad_norm": 0.7368234686403903, "learning_rate": 4.769334905817547e-09, "loss": 0.2654, "step": 28748 }, { "epoch": 0.9865820178448867, "grad_norm": 0.7604488454517513, "learning_rate": 4.7450981958907515e-09, "loss": 0.2748, "step": 28749 }, { "epoch": 0.9866163349347975, "grad_norm": 0.7674333696633195, "learning_rate": 4.720923196514515e-09, "loss": 0.2222, "step": 28750 }, { "epoch": 0.9866506520247083, "grad_norm": 0.769125282763662, "learning_rate": 4.696809907986932e-09, "loss": 0.2043, "step": 28751 }, { "epoch": 0.9866849691146191, "grad_norm": 0.9006739312868572, "learning_rate": 4.672758330606653e-09, "loss": 0.2608, "step": 28752 }, { "epoch": 0.9867192862045299, "grad_norm": 0.8667068430448016, "learning_rate": 4.648768464670106e-09, "loss": 0.2429, "step": 28753 }, { "epoch": 0.9867536032944406, "grad_norm": 0.956712120091581, "learning_rate": 4.624840310474277e-09, "loss": 0.2618, "step": 28754 }, { "epoch": 0.9867879203843514, "grad_norm": 0.791647745533815, "learning_rate": 4.6009738683144844e-09, "loss": 0.273, "step": 28755 }, { "epoch": 0.9868222374742622, "grad_norm": 0.7538942492252865, "learning_rate": 4.577169138484938e-09, "loss": 0.2507, "step": 28756 }, { "epoch": 0.986856554564173, "grad_norm": 0.7645863970185669, "learning_rate": 4.553426121280957e-09, "loss": 0.2502, "step": 28757 }, { "epoch": 0.9868908716540837, "grad_norm": 0.7743143129013197, "learning_rate": 4.52974481699564e-09, "loss": 0.2169, "step": 28758 }, { "epoch": 0.9869251887439945, "grad_norm": 0.7742206534594404, "learning_rate": 4.506125225920421e-09, "loss": 0.2595, "step": 28759 }, { "epoch": 0.9869595058339052, "grad_norm": 0.7029121091586477, "learning_rate": 4.482567348348399e-09, "loss": 0.2715, "step": 28760 }, { "epoch": 0.9869938229238161, "grad_norm": 0.8262351118102297, "learning_rate": 4.459071184569896e-09, "loss": 0.3131, "step": 28761 }, { "epoch": 0.9870281400137269, "grad_norm": 1.0602285739331871, "learning_rate": 4.4356367348752374e-09, "loss": 0.2309, "step": 28762 }, { "epoch": 0.9870624571036376, "grad_norm": 0.7709893678755373, "learning_rate": 4.412263999554189e-09, "loss": 0.265, "step": 28763 }, { "epoch": 0.9870967741935484, "grad_norm": 0.7430381034601191, "learning_rate": 4.388952978894856e-09, "loss": 0.2738, "step": 28764 }, { "epoch": 0.9871310912834592, "grad_norm": 0.7252100557666092, "learning_rate": 4.36570367318645e-09, "loss": 0.2313, "step": 28765 }, { "epoch": 0.98716540837337, "grad_norm": 0.691349017942128, "learning_rate": 4.3425160827148534e-09, "loss": 0.2646, "step": 28766 }, { "epoch": 0.9871997254632807, "grad_norm": 0.8571036489182667, "learning_rate": 4.3193902077670605e-09, "loss": 0.2876, "step": 28767 }, { "epoch": 0.9872340425531915, "grad_norm": 0.7041840855325606, "learning_rate": 4.296326048628951e-09, "loss": 0.2432, "step": 28768 }, { "epoch": 0.9872683596431022, "grad_norm": 0.7296354137138731, "learning_rate": 4.273323605585855e-09, "loss": 0.2178, "step": 28769 }, { "epoch": 0.9873026767330131, "grad_norm": 0.9217307485771041, "learning_rate": 4.2503828789208775e-09, "loss": 0.2602, "step": 28770 }, { "epoch": 0.9873369938229238, "grad_norm": 0.7716628577551273, "learning_rate": 4.2275038689176815e-09, "loss": 0.2821, "step": 28771 }, { "epoch": 0.9873713109128346, "grad_norm": 0.7087122504732111, "learning_rate": 4.204686575859374e-09, "loss": 0.2288, "step": 28772 }, { "epoch": 0.9874056280027453, "grad_norm": 0.8102478461736914, "learning_rate": 4.18193100002795e-09, "loss": 0.2477, "step": 28773 }, { "epoch": 0.9874399450926561, "grad_norm": 0.7898929254522554, "learning_rate": 4.159237141703742e-09, "loss": 0.2633, "step": 28774 }, { "epoch": 0.987474262182567, "grad_norm": 0.787191537753287, "learning_rate": 4.136605001168192e-09, "loss": 0.2906, "step": 28775 }, { "epoch": 0.9875085792724777, "grad_norm": 0.776572262307809, "learning_rate": 4.114034578699966e-09, "loss": 0.2357, "step": 28776 }, { "epoch": 0.9875428963623885, "grad_norm": 0.7291670315309087, "learning_rate": 4.091525874577729e-09, "loss": 0.2257, "step": 28777 }, { "epoch": 0.9875772134522992, "grad_norm": 0.8014286593957977, "learning_rate": 4.069078889080702e-09, "loss": 0.2317, "step": 28778 }, { "epoch": 0.9876115305422101, "grad_norm": 0.8649469059994777, "learning_rate": 4.046693622485331e-09, "loss": 0.3131, "step": 28779 }, { "epoch": 0.9876458476321208, "grad_norm": 0.7970014307911739, "learning_rate": 4.024370075068062e-09, "loss": 0.2827, "step": 28780 }, { "epoch": 0.9876801647220316, "grad_norm": 0.7661033525194713, "learning_rate": 4.002108247105341e-09, "loss": 0.2625, "step": 28781 }, { "epoch": 0.9877144818119423, "grad_norm": 0.8951776040308648, "learning_rate": 3.979908138871391e-09, "loss": 0.2934, "step": 28782 }, { "epoch": 0.9877487989018531, "grad_norm": 0.8255982663594948, "learning_rate": 3.957769750640994e-09, "loss": 0.2544, "step": 28783 }, { "epoch": 0.9877831159917639, "grad_norm": 0.729369408644813, "learning_rate": 3.935693082687819e-09, "loss": 0.2808, "step": 28784 }, { "epoch": 0.9878174330816747, "grad_norm": 0.8287715297696889, "learning_rate": 3.9136781352838716e-09, "loss": 0.3035, "step": 28785 }, { "epoch": 0.9878517501715854, "grad_norm": 0.6811712459747556, "learning_rate": 3.89172490870171e-09, "loss": 0.2814, "step": 28786 }, { "epoch": 0.9878860672614962, "grad_norm": 0.7524421600132797, "learning_rate": 3.869833403212786e-09, "loss": 0.2846, "step": 28787 }, { "epoch": 0.987920384351407, "grad_norm": 0.7220226172533253, "learning_rate": 3.848003619086882e-09, "loss": 0.219, "step": 28788 }, { "epoch": 0.9879547014413178, "grad_norm": 0.8011324852304399, "learning_rate": 3.826235556593782e-09, "loss": 0.2859, "step": 28789 }, { "epoch": 0.9879890185312286, "grad_norm": 0.7814339472398468, "learning_rate": 3.804529216002717e-09, "loss": 0.1949, "step": 28790 }, { "epoch": 0.9880233356211393, "grad_norm": 0.7496827938152543, "learning_rate": 3.782884597581804e-09, "loss": 0.2899, "step": 28791 }, { "epoch": 0.9880576527110501, "grad_norm": 0.7697364884645888, "learning_rate": 3.7613017015980525e-09, "loss": 0.2269, "step": 28792 }, { "epoch": 0.9880919698009609, "grad_norm": 0.7549086809212159, "learning_rate": 3.739780528318471e-09, "loss": 0.261, "step": 28793 }, { "epoch": 0.9881262868908717, "grad_norm": 0.7369479986644047, "learning_rate": 3.7183210780089575e-09, "loss": 0.2658, "step": 28794 }, { "epoch": 0.9881606039807824, "grad_norm": 0.8300566434170783, "learning_rate": 3.6969233509343004e-09, "loss": 0.3489, "step": 28795 }, { "epoch": 0.9881949210706932, "grad_norm": 0.7745542741858497, "learning_rate": 3.6755873473592885e-09, "loss": 0.2889, "step": 28796 }, { "epoch": 0.9882292381606039, "grad_norm": 0.7573008278505912, "learning_rate": 3.654313067547044e-09, "loss": 0.3014, "step": 28797 }, { "epoch": 0.9882635552505148, "grad_norm": 0.8153937363330551, "learning_rate": 3.6331005117606897e-09, "loss": 0.215, "step": 28798 }, { "epoch": 0.9882978723404255, "grad_norm": 0.7702422910921967, "learning_rate": 3.611949680262239e-09, "loss": 0.2266, "step": 28799 }, { "epoch": 0.9883321894303363, "grad_norm": 0.8097055328465624, "learning_rate": 3.5908605733125932e-09, "loss": 0.2572, "step": 28800 }, { "epoch": 0.988366506520247, "grad_norm": 0.7074739509171152, "learning_rate": 3.5698331911732107e-09, "loss": 0.2853, "step": 28801 }, { "epoch": 0.9884008236101579, "grad_norm": 0.875912173914345, "learning_rate": 3.5488675341027736e-09, "loss": 0.3038, "step": 28802 }, { "epoch": 0.9884351407000687, "grad_norm": 0.7707229067904267, "learning_rate": 3.527963602361073e-09, "loss": 0.2186, "step": 28803 }, { "epoch": 0.9884694577899794, "grad_norm": 0.7762234221461396, "learning_rate": 3.5071213962056814e-09, "loss": 0.2733, "step": 28804 }, { "epoch": 0.9885037748798902, "grad_norm": 0.8761754990978968, "learning_rate": 3.486340915894726e-09, "loss": 0.2578, "step": 28805 }, { "epoch": 0.9885380919698009, "grad_norm": 0.7328781670913322, "learning_rate": 3.4656221616846674e-09, "loss": 0.2013, "step": 28806 }, { "epoch": 0.9885724090597118, "grad_norm": 0.7888039236743943, "learning_rate": 3.444965133831413e-09, "loss": 0.2265, "step": 28807 }, { "epoch": 0.9886067261496225, "grad_norm": 0.8050052766463868, "learning_rate": 3.424369832590313e-09, "loss": 0.2497, "step": 28808 }, { "epoch": 0.9886410432395333, "grad_norm": 0.7543591345556802, "learning_rate": 3.4038362582156093e-09, "loss": 0.2547, "step": 28809 }, { "epoch": 0.988675360329444, "grad_norm": 0.722947205893016, "learning_rate": 3.3833644109609876e-09, "loss": 0.2218, "step": 28810 }, { "epoch": 0.9887096774193549, "grad_norm": 0.7369087037111937, "learning_rate": 3.3629542910790237e-09, "loss": 0.252, "step": 28811 }, { "epoch": 0.9887439945092656, "grad_norm": 0.7281004194015563, "learning_rate": 3.342605898822848e-09, "loss": 0.2965, "step": 28812 }, { "epoch": 0.9887783115991764, "grad_norm": 0.9074823211394293, "learning_rate": 3.3223192344428167e-09, "loss": 0.2312, "step": 28813 }, { "epoch": 0.9888126286890871, "grad_norm": 0.8319050420549567, "learning_rate": 3.3020942981903946e-09, "loss": 0.2975, "step": 28814 }, { "epoch": 0.9888469457789979, "grad_norm": 0.8904888113176103, "learning_rate": 3.281931090314827e-09, "loss": 0.2884, "step": 28815 }, { "epoch": 0.9888812628689088, "grad_norm": 0.7328093108942879, "learning_rate": 3.2618296110653592e-09, "loss": 0.2532, "step": 28816 }, { "epoch": 0.9889155799588195, "grad_norm": 0.7700591631798184, "learning_rate": 3.2417898606901255e-09, "loss": 0.2902, "step": 28817 }, { "epoch": 0.9889498970487303, "grad_norm": 0.9311313971292265, "learning_rate": 3.221811839437261e-09, "loss": 0.2838, "step": 28818 }, { "epoch": 0.988984214138641, "grad_norm": 0.7564986038464291, "learning_rate": 3.2018955475532354e-09, "loss": 0.3113, "step": 28819 }, { "epoch": 0.9890185312285518, "grad_norm": 0.7421352805518947, "learning_rate": 3.182040985283963e-09, "loss": 0.2983, "step": 28820 }, { "epoch": 0.9890528483184626, "grad_norm": 0.7965586575253392, "learning_rate": 3.1622481528748027e-09, "loss": 0.2373, "step": 28821 }, { "epoch": 0.9890871654083734, "grad_norm": 0.7543159886358374, "learning_rate": 3.1425170505705593e-09, "loss": 0.3317, "step": 28822 }, { "epoch": 0.9891214824982841, "grad_norm": 0.9124255827566964, "learning_rate": 3.1228476786143714e-09, "loss": 0.2673, "step": 28823 }, { "epoch": 0.9891557995881949, "grad_norm": 0.7193568251668354, "learning_rate": 3.103240037249933e-09, "loss": 0.2135, "step": 28824 }, { "epoch": 0.9891901166781057, "grad_norm": 0.8935458699748704, "learning_rate": 3.083694126718717e-09, "loss": 0.2707, "step": 28825 }, { "epoch": 0.9892244337680165, "grad_norm": 0.8644228495395878, "learning_rate": 3.064209947263308e-09, "loss": 0.2518, "step": 28826 }, { "epoch": 0.9892587508579272, "grad_norm": 0.819277708558395, "learning_rate": 3.044787499122959e-09, "loss": 0.2597, "step": 28827 }, { "epoch": 0.989293067947838, "grad_norm": 0.7619056632175831, "learning_rate": 3.0254267825391427e-09, "loss": 0.254, "step": 28828 }, { "epoch": 0.9893273850377488, "grad_norm": 0.7285183569415833, "learning_rate": 3.0061277977494475e-09, "loss": 0.247, "step": 28829 }, { "epoch": 0.9893617021276596, "grad_norm": 0.7567230983200142, "learning_rate": 2.9868905449936812e-09, "loss": 0.2417, "step": 28830 }, { "epoch": 0.9893960192175704, "grad_norm": 0.7426027869609038, "learning_rate": 2.967715024508877e-09, "loss": 0.2487, "step": 28831 }, { "epoch": 0.9894303363074811, "grad_norm": 0.7696788207819495, "learning_rate": 2.9486012365320673e-09, "loss": 0.2539, "step": 28832 }, { "epoch": 0.9894646533973919, "grad_norm": 1.0130893203104983, "learning_rate": 2.9295491812991737e-09, "loss": 0.2759, "step": 28833 }, { "epoch": 0.9894989704873027, "grad_norm": 0.7560270631071709, "learning_rate": 2.9105588590455645e-09, "loss": 0.2166, "step": 28834 }, { "epoch": 0.9895332875772135, "grad_norm": 0.939786021674345, "learning_rate": 2.8916302700060517e-09, "loss": 0.3003, "step": 28835 }, { "epoch": 0.9895676046671242, "grad_norm": 0.738662252179613, "learning_rate": 2.8727634144143366e-09, "loss": 0.2056, "step": 28836 }, { "epoch": 0.989601921757035, "grad_norm": 0.7913771430717629, "learning_rate": 2.8539582925035667e-09, "loss": 0.2432, "step": 28837 }, { "epoch": 0.9896362388469457, "grad_norm": 0.6971514758637842, "learning_rate": 2.8352149045057786e-09, "loss": 0.2354, "step": 28838 }, { "epoch": 0.9896705559368566, "grad_norm": 0.8124924132880122, "learning_rate": 2.8165332506530084e-09, "loss": 0.2464, "step": 28839 }, { "epoch": 0.9897048730267674, "grad_norm": 0.8869482938493548, "learning_rate": 2.797913331176183e-09, "loss": 0.2553, "step": 28840 }, { "epoch": 0.9897391901166781, "grad_norm": 0.861923647005478, "learning_rate": 2.779355146304563e-09, "loss": 0.2706, "step": 28841 }, { "epoch": 0.9897735072065889, "grad_norm": 0.7397710971157543, "learning_rate": 2.7608586962679653e-09, "loss": 0.2945, "step": 28842 }, { "epoch": 0.9898078242964996, "grad_norm": 0.8472840981240014, "learning_rate": 2.74242398129454e-09, "loss": 0.2904, "step": 28843 }, { "epoch": 0.9898421413864105, "grad_norm": 0.7783698963839517, "learning_rate": 2.724051001612438e-09, "loss": 0.2268, "step": 28844 }, { "epoch": 0.9898764584763212, "grad_norm": 0.8530822636086696, "learning_rate": 2.705739757448145e-09, "loss": 0.2764, "step": 28845 }, { "epoch": 0.989910775566232, "grad_norm": 0.681492571054773, "learning_rate": 2.687490249028146e-09, "loss": 0.2283, "step": 28846 }, { "epoch": 0.9899450926561427, "grad_norm": 0.8051492345156439, "learning_rate": 2.6693024765783725e-09, "loss": 0.2603, "step": 28847 }, { "epoch": 0.9899794097460536, "grad_norm": 0.8226692862520413, "learning_rate": 2.6511764403230887e-09, "loss": 0.2605, "step": 28848 }, { "epoch": 0.9900137268359643, "grad_norm": 0.6853106281104813, "learning_rate": 2.63311214048545e-09, "loss": 0.2247, "step": 28849 }, { "epoch": 0.9900480439258751, "grad_norm": 0.791653512875843, "learning_rate": 2.6151095772897207e-09, "loss": 0.2811, "step": 28850 }, { "epoch": 0.9900823610157858, "grad_norm": 0.7363652560902362, "learning_rate": 2.597168750957946e-09, "loss": 0.2448, "step": 28851 }, { "epoch": 0.9901166781056966, "grad_norm": 0.6827550314148786, "learning_rate": 2.57928966171217e-09, "loss": 0.285, "step": 28852 }, { "epoch": 0.9901509951956075, "grad_norm": 0.737425008727524, "learning_rate": 2.561472309772217e-09, "loss": 0.2704, "step": 28853 }, { "epoch": 0.9901853122855182, "grad_norm": 0.7934135612492621, "learning_rate": 2.543716695359022e-09, "loss": 0.262, "step": 28854 }, { "epoch": 0.990219629375429, "grad_norm": 0.8305927678436956, "learning_rate": 2.5260228186918532e-09, "loss": 0.2579, "step": 28855 }, { "epoch": 0.9902539464653397, "grad_norm": 0.8489782381985209, "learning_rate": 2.5083906799894253e-09, "loss": 0.2507, "step": 28856 }, { "epoch": 0.9902882635552506, "grad_norm": 0.7939563917529461, "learning_rate": 2.490820279468786e-09, "loss": 0.2985, "step": 28857 }, { "epoch": 0.9903225806451613, "grad_norm": 0.7802273140754581, "learning_rate": 2.47331161734754e-09, "loss": 0.2575, "step": 28858 }, { "epoch": 0.9903568977350721, "grad_norm": 0.8865115919859273, "learning_rate": 2.4558646938421806e-09, "loss": 0.2639, "step": 28859 }, { "epoch": 0.9903912148249828, "grad_norm": 0.7506113627297892, "learning_rate": 2.43847950916809e-09, "loss": 0.2708, "step": 28860 }, { "epoch": 0.9904255319148936, "grad_norm": 1.1327036441321459, "learning_rate": 2.421156063540098e-09, "loss": 0.2973, "step": 28861 }, { "epoch": 0.9904598490048044, "grad_norm": 0.7202917835553408, "learning_rate": 2.403894357171921e-09, "loss": 0.2445, "step": 28862 }, { "epoch": 0.9904941660947152, "grad_norm": 0.8094215494528856, "learning_rate": 2.3866943902772776e-09, "loss": 0.2251, "step": 28863 }, { "epoch": 0.9905284831846259, "grad_norm": 0.8553785803795199, "learning_rate": 2.3695561630676656e-09, "loss": 0.2997, "step": 28864 }, { "epoch": 0.9905628002745367, "grad_norm": 0.8307119290117022, "learning_rate": 2.3524796757562474e-09, "loss": 0.2565, "step": 28865 }, { "epoch": 0.9905971173644474, "grad_norm": 0.7091194362457928, "learning_rate": 2.3354649285528553e-09, "loss": 0.2772, "step": 28866 }, { "epoch": 0.9906314344543583, "grad_norm": 0.8074247841218636, "learning_rate": 2.3185119216684315e-09, "loss": 0.2428, "step": 28867 }, { "epoch": 0.9906657515442691, "grad_norm": 0.7220674633865333, "learning_rate": 2.301620655311698e-09, "loss": 0.2723, "step": 28868 }, { "epoch": 0.9907000686341798, "grad_norm": 0.8953643137711516, "learning_rate": 2.284791129691932e-09, "loss": 0.2885, "step": 28869 }, { "epoch": 0.9907343857240906, "grad_norm": 0.8099828758399039, "learning_rate": 2.2680233450167456e-09, "loss": 0.282, "step": 28870 }, { "epoch": 0.9907687028140014, "grad_norm": 0.8437354263243056, "learning_rate": 2.251317301493194e-09, "loss": 0.2659, "step": 28871 }, { "epoch": 0.9908030199039122, "grad_norm": 0.7650289759293647, "learning_rate": 2.2346729993277803e-09, "loss": 0.3175, "step": 28872 }, { "epoch": 0.9908373369938229, "grad_norm": 0.8139176129806714, "learning_rate": 2.21809043872645e-09, "loss": 0.2527, "step": 28873 }, { "epoch": 0.9908716540837337, "grad_norm": 0.8355952903066491, "learning_rate": 2.201569619893484e-09, "loss": 0.3073, "step": 28874 }, { "epoch": 0.9909059711736444, "grad_norm": 0.8741437343269028, "learning_rate": 2.1851105430331642e-09, "loss": 0.3332, "step": 28875 }, { "epoch": 0.9909402882635553, "grad_norm": 0.7488737044916485, "learning_rate": 2.1687132083492156e-09, "loss": 0.2355, "step": 28876 }, { "epoch": 0.990974605353466, "grad_norm": 0.7590384602598164, "learning_rate": 2.1523776160442545e-09, "loss": 0.2615, "step": 28877 }, { "epoch": 0.9910089224433768, "grad_norm": 0.7097865381556706, "learning_rate": 2.136103766319231e-09, "loss": 0.212, "step": 28878 }, { "epoch": 0.9910432395332875, "grad_norm": 0.741495263584635, "learning_rate": 2.11989165937565e-09, "loss": 0.2709, "step": 28879 }, { "epoch": 0.9910775566231984, "grad_norm": 0.7087955861650043, "learning_rate": 2.103741295413908e-09, "loss": 0.3026, "step": 28880 }, { "epoch": 0.9911118737131092, "grad_norm": 0.7232935588852887, "learning_rate": 2.0876526746338443e-09, "loss": 0.2301, "step": 28881 }, { "epoch": 0.9911461908030199, "grad_norm": 0.9077744767673516, "learning_rate": 2.071625797233079e-09, "loss": 0.2635, "step": 28882 }, { "epoch": 0.9911805078929307, "grad_norm": 0.9358982850804347, "learning_rate": 2.0556606634108966e-09, "loss": 0.2665, "step": 28883 }, { "epoch": 0.9912148249828414, "grad_norm": 0.9471859171174382, "learning_rate": 2.0397572733638073e-09, "loss": 0.2567, "step": 28884 }, { "epoch": 0.9912491420727523, "grad_norm": 0.7780187829584123, "learning_rate": 2.023915627288875e-09, "loss": 0.2246, "step": 28885 }, { "epoch": 0.991283459162663, "grad_norm": 0.7370378266172688, "learning_rate": 2.0081357253809443e-09, "loss": 0.2614, "step": 28886 }, { "epoch": 0.9913177762525738, "grad_norm": 0.8404505498197019, "learning_rate": 1.992417567835414e-09, "loss": 0.2997, "step": 28887 }, { "epoch": 0.9913520933424845, "grad_norm": 0.7768874068053756, "learning_rate": 1.9767611548465737e-09, "loss": 0.2694, "step": 28888 }, { "epoch": 0.9913864104323953, "grad_norm": 0.7975569321747493, "learning_rate": 1.961166486607602e-09, "loss": 0.248, "step": 28889 }, { "epoch": 0.9914207275223061, "grad_norm": 0.8746691373008912, "learning_rate": 1.9456335633116773e-09, "loss": 0.2855, "step": 28890 }, { "epoch": 0.9914550446122169, "grad_norm": 0.6921757097923781, "learning_rate": 1.9301623851497586e-09, "loss": 0.2383, "step": 28891 }, { "epoch": 0.9914893617021276, "grad_norm": 0.7753086506528408, "learning_rate": 1.914752952313914e-09, "loss": 0.2705, "step": 28892 }, { "epoch": 0.9915236787920384, "grad_norm": 0.8265922026760143, "learning_rate": 1.899405264993437e-09, "loss": 0.3721, "step": 28893 }, { "epoch": 0.9915579958819493, "grad_norm": 0.8248296121815368, "learning_rate": 1.8841193233792853e-09, "loss": 0.2558, "step": 28894 }, { "epoch": 0.99159231297186, "grad_norm": 0.7782769548063079, "learning_rate": 1.8688951276596422e-09, "loss": 0.2563, "step": 28895 }, { "epoch": 0.9916266300617708, "grad_norm": 0.8596354807171619, "learning_rate": 1.8537326780221354e-09, "loss": 0.2509, "step": 28896 }, { "epoch": 0.9916609471516815, "grad_norm": 0.9234688219575472, "learning_rate": 1.8386319746543925e-09, "loss": 0.2668, "step": 28897 }, { "epoch": 0.9916952642415923, "grad_norm": 0.7274680048190533, "learning_rate": 1.8235930177434857e-09, "loss": 0.2765, "step": 28898 }, { "epoch": 0.9917295813315031, "grad_norm": 0.8395014466935308, "learning_rate": 1.8086158074742677e-09, "loss": 0.2309, "step": 28899 }, { "epoch": 0.9917638984214139, "grad_norm": 0.726663935609624, "learning_rate": 1.7937003440327005e-09, "loss": 0.2748, "step": 28900 }, { "epoch": 0.9917982155113246, "grad_norm": 0.7965737104651327, "learning_rate": 1.7788466276025262e-09, "loss": 0.2917, "step": 28901 }, { "epoch": 0.9918325326012354, "grad_norm": 0.7825705447504547, "learning_rate": 1.7640546583674867e-09, "loss": 0.2233, "step": 28902 }, { "epoch": 0.9918668496911462, "grad_norm": 0.8097237929076162, "learning_rate": 1.7493244365096585e-09, "loss": 0.2553, "step": 28903 }, { "epoch": 0.991901166781057, "grad_norm": 0.8502356961683682, "learning_rate": 1.7346559622122282e-09, "loss": 0.265, "step": 28904 }, { "epoch": 0.9919354838709677, "grad_norm": 0.8308452356434404, "learning_rate": 1.7200492356550525e-09, "loss": 0.2618, "step": 28905 }, { "epoch": 0.9919698009608785, "grad_norm": 0.845135854165508, "learning_rate": 1.7055042570196522e-09, "loss": 0.2686, "step": 28906 }, { "epoch": 0.9920041180507893, "grad_norm": 0.963447283750048, "learning_rate": 1.6910210264853289e-09, "loss": 0.307, "step": 28907 }, { "epoch": 0.9920384351407001, "grad_norm": 0.7663240549375148, "learning_rate": 1.676599544230828e-09, "loss": 0.3593, "step": 28908 }, { "epoch": 0.9920727522306109, "grad_norm": 0.7730689766299977, "learning_rate": 1.6622398104343408e-09, "loss": 0.2429, "step": 28909 }, { "epoch": 0.9921070693205216, "grad_norm": 0.8680229410687237, "learning_rate": 1.6479418252735025e-09, "loss": 0.2751, "step": 28910 }, { "epoch": 0.9921413864104324, "grad_norm": 0.8291717427944667, "learning_rate": 1.6337055889248388e-09, "loss": 0.2587, "step": 28911 }, { "epoch": 0.9921757035003431, "grad_norm": 0.8552928402755922, "learning_rate": 1.61953110156432e-09, "loss": 0.2237, "step": 28912 }, { "epoch": 0.992210020590254, "grad_norm": 0.8444011345986081, "learning_rate": 1.6054183633668064e-09, "loss": 0.3007, "step": 28913 }, { "epoch": 0.9922443376801647, "grad_norm": 0.7810663001026767, "learning_rate": 1.5913673745071578e-09, "loss": 0.2252, "step": 28914 }, { "epoch": 0.9922786547700755, "grad_norm": 0.8472135538519894, "learning_rate": 1.577378135158014e-09, "loss": 0.2602, "step": 28915 }, { "epoch": 0.9923129718599862, "grad_norm": 1.0748359263304217, "learning_rate": 1.5634506454931253e-09, "loss": 0.3098, "step": 28916 }, { "epoch": 0.9923472889498971, "grad_norm": 0.7459102611925972, "learning_rate": 1.5495849056840206e-09, "loss": 0.2376, "step": 28917 }, { "epoch": 0.9923816060398079, "grad_norm": 0.7262274392417853, "learning_rate": 1.5357809159022298e-09, "loss": 0.1989, "step": 28918 }, { "epoch": 0.9924159231297186, "grad_norm": 0.81753754646902, "learning_rate": 1.5220386763181715e-09, "loss": 0.3013, "step": 28919 }, { "epoch": 0.9924502402196294, "grad_norm": 0.7155182527103296, "learning_rate": 1.5083581871017105e-09, "loss": 0.2086, "step": 28920 }, { "epoch": 0.9924845573095401, "grad_norm": 0.7689544891111308, "learning_rate": 1.4947394484221556e-09, "loss": 0.2486, "step": 28921 }, { "epoch": 0.992518874399451, "grad_norm": 0.8132769278938169, "learning_rate": 1.4811824604465953e-09, "loss": 0.3102, "step": 28922 }, { "epoch": 0.9925531914893617, "grad_norm": 0.8964855539992084, "learning_rate": 1.4676872233443385e-09, "loss": 0.2507, "step": 28923 }, { "epoch": 0.9925875085792725, "grad_norm": 0.9582741451059916, "learning_rate": 1.4542537372802534e-09, "loss": 0.2269, "step": 28924 }, { "epoch": 0.9926218256691832, "grad_norm": 0.8624074442285699, "learning_rate": 1.4408820024214287e-09, "loss": 0.3321, "step": 28925 }, { "epoch": 0.992656142759094, "grad_norm": 0.7632199445668216, "learning_rate": 1.4275720189327325e-09, "loss": 0.269, "step": 28926 }, { "epoch": 0.9926904598490048, "grad_norm": 0.7985376606147787, "learning_rate": 1.4143237869784776e-09, "loss": 0.2771, "step": 28927 }, { "epoch": 0.9927247769389156, "grad_norm": 0.6759879775342699, "learning_rate": 1.4011373067229772e-09, "loss": 0.27, "step": 28928 }, { "epoch": 0.9927590940288263, "grad_norm": 0.7417715663113863, "learning_rate": 1.3880125783277687e-09, "loss": 0.1963, "step": 28929 }, { "epoch": 0.9927934111187371, "grad_norm": 0.8276077949382389, "learning_rate": 1.3749496019566099e-09, "loss": 0.2521, "step": 28930 }, { "epoch": 0.992827728208648, "grad_norm": 0.7122532056100896, "learning_rate": 1.3619483777699284e-09, "loss": 0.2598, "step": 28931 }, { "epoch": 0.9928620452985587, "grad_norm": 0.8325000471891528, "learning_rate": 1.3490089059281509e-09, "loss": 0.2235, "step": 28932 }, { "epoch": 0.9928963623884695, "grad_norm": 0.8974729294172531, "learning_rate": 1.3361311865917048e-09, "loss": 0.2705, "step": 28933 }, { "epoch": 0.9929306794783802, "grad_norm": 0.7468765933182762, "learning_rate": 1.3233152199199074e-09, "loss": 0.2604, "step": 28934 }, { "epoch": 0.992964996568291, "grad_norm": 0.853762531474945, "learning_rate": 1.3105610060704099e-09, "loss": 0.2291, "step": 28935 }, { "epoch": 0.9929993136582018, "grad_norm": 0.8595356709005445, "learning_rate": 1.2978685452008643e-09, "loss": 0.2695, "step": 28936 }, { "epoch": 0.9930336307481126, "grad_norm": 0.7696933858191669, "learning_rate": 1.285237837468367e-09, "loss": 0.2734, "step": 28937 }, { "epoch": 0.9930679478380233, "grad_norm": 0.8435373657571439, "learning_rate": 1.2726688830283497e-09, "loss": 0.2552, "step": 28938 }, { "epoch": 0.9931022649279341, "grad_norm": 0.8302008582041175, "learning_rate": 1.260161682037353e-09, "loss": 0.3218, "step": 28939 }, { "epoch": 0.9931365820178449, "grad_norm": 0.9332505204522782, "learning_rate": 1.2477162346485882e-09, "loss": 0.2465, "step": 28940 }, { "epoch": 0.9931708991077557, "grad_norm": 0.8842308171761376, "learning_rate": 1.2353325410163763e-09, "loss": 0.3017, "step": 28941 }, { "epoch": 0.9932052161976664, "grad_norm": 0.9766843625034554, "learning_rate": 1.2230106012933729e-09, "loss": 0.2502, "step": 28942 }, { "epoch": 0.9932395332875772, "grad_norm": 0.7331378279303422, "learning_rate": 1.2107504156327887e-09, "loss": 0.2651, "step": 28943 }, { "epoch": 0.9932738503774879, "grad_norm": 0.8504237030553096, "learning_rate": 1.198551984185059e-09, "loss": 0.2642, "step": 28944 }, { "epoch": 0.9933081674673988, "grad_norm": 0.7997315032913838, "learning_rate": 1.186415307100619e-09, "loss": 0.2348, "step": 28945 }, { "epoch": 0.9933424845573096, "grad_norm": 0.8100028361552332, "learning_rate": 1.174340384530459e-09, "loss": 0.261, "step": 28946 }, { "epoch": 0.9933768016472203, "grad_norm": 0.7082787040059556, "learning_rate": 1.162327216623349e-09, "loss": 0.2088, "step": 28947 }, { "epoch": 0.9934111187371311, "grad_norm": 0.7571710029895246, "learning_rate": 1.1503758035275037e-09, "loss": 0.2551, "step": 28948 }, { "epoch": 0.9934454358270418, "grad_norm": 0.832029236993523, "learning_rate": 1.1384861453905826e-09, "loss": 0.22, "step": 28949 }, { "epoch": 0.9934797529169527, "grad_norm": 0.7379426613295536, "learning_rate": 1.1266582423596906e-09, "loss": 0.2538, "step": 28950 }, { "epoch": 0.9935140700068634, "grad_norm": 0.7149769774016579, "learning_rate": 1.1148920945808217e-09, "loss": 0.2413, "step": 28951 }, { "epoch": 0.9935483870967742, "grad_norm": 0.7809715160450412, "learning_rate": 1.1031877021988602e-09, "loss": 0.259, "step": 28952 }, { "epoch": 0.9935827041866849, "grad_norm": 0.7849498512224785, "learning_rate": 1.0915450653598003e-09, "loss": 0.2408, "step": 28953 }, { "epoch": 0.9936170212765958, "grad_norm": 0.8350640042446993, "learning_rate": 1.0799641842057507e-09, "loss": 0.289, "step": 28954 }, { "epoch": 0.9936513383665065, "grad_norm": 0.7060927142050988, "learning_rate": 1.0684450588810402e-09, "loss": 0.2218, "step": 28955 }, { "epoch": 0.9936856554564173, "grad_norm": 0.7654936879433804, "learning_rate": 1.0569876895272224e-09, "loss": 0.2204, "step": 28956 }, { "epoch": 0.993719972546328, "grad_norm": 0.7353270918658045, "learning_rate": 1.0455920762858508e-09, "loss": 0.2445, "step": 28957 }, { "epoch": 0.9937542896362388, "grad_norm": 0.7679531361024005, "learning_rate": 1.0342582192984785e-09, "loss": 0.2526, "step": 28958 }, { "epoch": 0.9937886067261497, "grad_norm": 0.8692403217283071, "learning_rate": 1.0229861187044388e-09, "loss": 0.341, "step": 28959 }, { "epoch": 0.9938229238160604, "grad_norm": 0.7572415628839834, "learning_rate": 1.0117757746430646e-09, "loss": 0.2665, "step": 28960 }, { "epoch": 0.9938572409059712, "grad_norm": 0.8177618753457156, "learning_rate": 1.0006271872531338e-09, "loss": 0.2734, "step": 28961 }, { "epoch": 0.9938915579958819, "grad_norm": 0.7246771052423778, "learning_rate": 9.895403566717588e-10, "loss": 0.2602, "step": 28962 }, { "epoch": 0.9939258750857928, "grad_norm": 0.7674041081114387, "learning_rate": 9.785152830371625e-10, "loss": 0.2967, "step": 28963 }, { "epoch": 0.9939601921757035, "grad_norm": 1.1493315658580432, "learning_rate": 9.67551966484237e-10, "loss": 0.301, "step": 28964 }, { "epoch": 0.9939945092656143, "grad_norm": 0.7638053011681869, "learning_rate": 9.566504071484294e-10, "loss": 0.2252, "step": 28965 }, { "epoch": 0.994028826355525, "grad_norm": 0.8438731855453377, "learning_rate": 9.458106051657424e-10, "loss": 0.3352, "step": 28966 }, { "epoch": 0.9940631434454358, "grad_norm": 0.7805004434692095, "learning_rate": 9.350325606688471e-10, "loss": 0.2492, "step": 28967 }, { "epoch": 0.9940974605353466, "grad_norm": 0.7521371247552779, "learning_rate": 9.243162737909705e-10, "loss": 0.3265, "step": 28968 }, { "epoch": 0.9941317776252574, "grad_norm": 0.769043782132064, "learning_rate": 9.136617446653395e-10, "loss": 0.2681, "step": 28969 }, { "epoch": 0.9941660947151681, "grad_norm": 0.8074768461485952, "learning_rate": 9.030689734224052e-10, "loss": 0.2253, "step": 28970 }, { "epoch": 0.9942004118050789, "grad_norm": 0.8312414854516382, "learning_rate": 8.925379601942841e-10, "loss": 0.2788, "step": 28971 }, { "epoch": 0.9942347288949896, "grad_norm": 0.6905645442742167, "learning_rate": 8.820687051103172e-10, "loss": 0.2189, "step": 28972 }, { "epoch": 0.9942690459849005, "grad_norm": 0.6866011865975614, "learning_rate": 8.716612082998454e-10, "loss": 0.2247, "step": 28973 }, { "epoch": 0.9943033630748113, "grad_norm": 0.7910757387593084, "learning_rate": 8.613154698916548e-10, "loss": 0.2879, "step": 28974 }, { "epoch": 0.994337680164722, "grad_norm": 0.9019989384633456, "learning_rate": 8.510314900134209e-10, "loss": 0.2524, "step": 28975 }, { "epoch": 0.9943719972546328, "grad_norm": 0.7854269549126411, "learning_rate": 8.408092687922642e-10, "loss": 0.2494, "step": 28976 }, { "epoch": 0.9944063143445436, "grad_norm": 0.8022497421686607, "learning_rate": 8.306488063547502e-10, "loss": 0.2896, "step": 28977 }, { "epoch": 0.9944406314344544, "grad_norm": 0.7788040555303027, "learning_rate": 8.20550102825779e-10, "loss": 0.2351, "step": 28978 }, { "epoch": 0.9944749485243651, "grad_norm": 0.725601263244138, "learning_rate": 8.105131583308057e-10, "loss": 0.2915, "step": 28979 }, { "epoch": 0.9945092656142759, "grad_norm": 0.7712814847257465, "learning_rate": 8.005379729930651e-10, "loss": 0.2913, "step": 28980 }, { "epoch": 0.9945435827041866, "grad_norm": 0.7420683550139552, "learning_rate": 7.906245469363472e-10, "loss": 0.2975, "step": 28981 }, { "epoch": 0.9945778997940975, "grad_norm": 0.7559024019190603, "learning_rate": 7.807728802827763e-10, "loss": 0.2788, "step": 28982 }, { "epoch": 0.9946122168840082, "grad_norm": 0.8838263532469246, "learning_rate": 7.709829731544771e-10, "loss": 0.2826, "step": 28983 }, { "epoch": 0.994646533973919, "grad_norm": 0.7083730342147696, "learning_rate": 7.612548256719088e-10, "loss": 0.2674, "step": 28984 }, { "epoch": 0.9946808510638298, "grad_norm": 0.7033147557168197, "learning_rate": 7.515884379555305e-10, "loss": 0.2634, "step": 28985 }, { "epoch": 0.9947151681537406, "grad_norm": 0.7770676336462585, "learning_rate": 7.419838101246912e-10, "loss": 0.2191, "step": 28986 }, { "epoch": 0.9947494852436514, "grad_norm": 0.7483835862698233, "learning_rate": 7.324409422981849e-10, "loss": 0.245, "step": 28987 }, { "epoch": 0.9947838023335621, "grad_norm": 0.7119087283642364, "learning_rate": 7.22959834593695e-10, "loss": 0.2219, "step": 28988 }, { "epoch": 0.9948181194234729, "grad_norm": 0.7497690472948761, "learning_rate": 7.135404871283502e-10, "loss": 0.2501, "step": 28989 }, { "epoch": 0.9948524365133836, "grad_norm": 1.0194514887669612, "learning_rate": 7.041829000187239e-10, "loss": 0.2564, "step": 28990 }, { "epoch": 0.9948867536032945, "grad_norm": 0.7232980789065157, "learning_rate": 6.948870733808343e-10, "loss": 0.2205, "step": 28991 }, { "epoch": 0.9949210706932052, "grad_norm": 0.7393041227802026, "learning_rate": 6.856530073284795e-10, "loss": 0.263, "step": 28992 }, { "epoch": 0.994955387783116, "grad_norm": 0.7072154155648233, "learning_rate": 6.764807019760122e-10, "loss": 0.2784, "step": 28993 }, { "epoch": 0.9949897048730267, "grad_norm": 0.7383031756298462, "learning_rate": 6.673701574372304e-10, "loss": 0.2851, "step": 28994 }, { "epoch": 0.9950240219629375, "grad_norm": 0.6762906928389812, "learning_rate": 6.583213738242667e-10, "loss": 0.2485, "step": 28995 }, { "epoch": 0.9950583390528484, "grad_norm": 0.8089283991825941, "learning_rate": 6.493343512492534e-10, "loss": 0.2669, "step": 28996 }, { "epoch": 0.9950926561427591, "grad_norm": 0.84621266798224, "learning_rate": 6.404090898232129e-10, "loss": 0.3296, "step": 28997 }, { "epoch": 0.9951269732326699, "grad_norm": 0.7643016655901216, "learning_rate": 6.315455896560574e-10, "loss": 0.3101, "step": 28998 }, { "epoch": 0.9951612903225806, "grad_norm": 0.8232514098443966, "learning_rate": 6.227438508576989e-10, "loss": 0.2526, "step": 28999 }, { "epoch": 0.9951956074124915, "grad_norm": 0.7974334093392179, "learning_rate": 6.14003873536384e-10, "loss": 0.2163, "step": 29000 }, { "epoch": 0.9952299245024022, "grad_norm": 0.7840759197292532, "learning_rate": 6.053256578003597e-10, "loss": 0.2837, "step": 29001 }, { "epoch": 0.995264241592313, "grad_norm": 0.7619946991732947, "learning_rate": 5.967092037567623e-10, "loss": 0.2293, "step": 29002 }, { "epoch": 0.9952985586822237, "grad_norm": 0.8205867243036711, "learning_rate": 5.881545115121734e-10, "loss": 0.3093, "step": 29003 }, { "epoch": 0.9953328757721345, "grad_norm": 0.8074487014837398, "learning_rate": 5.796615811720641e-10, "loss": 0.2569, "step": 29004 }, { "epoch": 0.9953671928620453, "grad_norm": 0.8094222094650774, "learning_rate": 5.712304128413504e-10, "loss": 0.2866, "step": 29005 }, { "epoch": 0.9954015099519561, "grad_norm": 0.7991206325547253, "learning_rate": 5.628610066243934e-10, "loss": 0.2702, "step": 29006 }, { "epoch": 0.9954358270418668, "grad_norm": 0.7491494618950945, "learning_rate": 5.545533626249988e-10, "loss": 0.2221, "step": 29007 }, { "epoch": 0.9954701441317776, "grad_norm": 0.7939537505759552, "learning_rate": 5.463074809447521e-10, "loss": 0.3263, "step": 29008 }, { "epoch": 0.9955044612216885, "grad_norm": 0.810053377198281, "learning_rate": 5.38123361686349e-10, "loss": 0.2678, "step": 29009 }, { "epoch": 0.9955387783115992, "grad_norm": 0.805331544381548, "learning_rate": 5.300010049502646e-10, "loss": 0.2732, "step": 29010 }, { "epoch": 0.99557309540151, "grad_norm": 0.8840446475059609, "learning_rate": 5.219404108375292e-10, "loss": 0.3587, "step": 29011 }, { "epoch": 0.9956074124914207, "grad_norm": 0.8354275968268725, "learning_rate": 5.139415794475078e-10, "loss": 0.3029, "step": 29012 }, { "epoch": 0.9956417295813315, "grad_norm": 0.7852650195431096, "learning_rate": 5.06004510878455e-10, "loss": 0.2763, "step": 29013 }, { "epoch": 0.9956760466712423, "grad_norm": 0.7151710519612144, "learning_rate": 4.981292052291808e-10, "loss": 0.2101, "step": 29014 }, { "epoch": 0.9957103637611531, "grad_norm": 0.7206546267380574, "learning_rate": 4.903156625968297e-10, "loss": 0.252, "step": 29015 }, { "epoch": 0.9957446808510638, "grad_norm": 0.9247467412613154, "learning_rate": 4.82563883077436e-10, "loss": 0.2392, "step": 29016 }, { "epoch": 0.9957789979409746, "grad_norm": 0.8216011484286938, "learning_rate": 4.74873866767589e-10, "loss": 0.2626, "step": 29017 }, { "epoch": 0.9958133150308853, "grad_norm": 0.7448338356086288, "learning_rate": 4.672456137611026e-10, "loss": 0.2742, "step": 29018 }, { "epoch": 0.9958476321207962, "grad_norm": 0.7671001633892316, "learning_rate": 4.5967912415345596e-10, "loss": 0.2512, "step": 29019 }, { "epoch": 0.9958819492107069, "grad_norm": 0.7350149954540096, "learning_rate": 4.5217439803735276e-10, "loss": 0.2433, "step": 29020 }, { "epoch": 0.9959162663006177, "grad_norm": 0.7564395713615031, "learning_rate": 4.447314355054966e-10, "loss": 0.275, "step": 29021 }, { "epoch": 0.9959505833905284, "grad_norm": 0.7415265495548622, "learning_rate": 4.3735023665059105e-10, "loss": 0.2619, "step": 29022 }, { "epoch": 0.9959849004804393, "grad_norm": 0.6911181742276291, "learning_rate": 4.300308015631194e-10, "loss": 0.243, "step": 29023 }, { "epoch": 0.9960192175703501, "grad_norm": 0.7737960951780692, "learning_rate": 4.2277313033356473e-10, "loss": 0.2376, "step": 29024 }, { "epoch": 0.9960535346602608, "grad_norm": 0.7283436849860229, "learning_rate": 4.155772230513e-10, "loss": 0.3033, "step": 29025 }, { "epoch": 0.9960878517501716, "grad_norm": 0.9065489155710761, "learning_rate": 4.084430798062533e-10, "loss": 0.2681, "step": 29026 }, { "epoch": 0.9961221688400823, "grad_norm": 0.7949106222912894, "learning_rate": 4.0137070068557716e-10, "loss": 0.2527, "step": 29027 }, { "epoch": 0.9961564859299932, "grad_norm": 0.8485088620090298, "learning_rate": 3.9436008577753425e-10, "loss": 0.2103, "step": 29028 }, { "epoch": 0.9961908030199039, "grad_norm": 0.7441607736152245, "learning_rate": 3.874112351676118e-10, "loss": 0.241, "step": 29029 }, { "epoch": 0.9962251201098147, "grad_norm": 0.789170779654763, "learning_rate": 3.8052414894240717e-10, "loss": 0.2517, "step": 29030 }, { "epoch": 0.9962594371997254, "grad_norm": 0.803403599939985, "learning_rate": 3.736988271868525e-10, "loss": 0.2371, "step": 29031 }, { "epoch": 0.9962937542896363, "grad_norm": 0.7255100531791812, "learning_rate": 3.6693526998532457e-10, "loss": 0.2361, "step": 29032 }, { "epoch": 0.996328071379547, "grad_norm": 0.7777691634323017, "learning_rate": 3.6023347742109027e-10, "loss": 0.2183, "step": 29033 }, { "epoch": 0.9963623884694578, "grad_norm": 0.7723829184786001, "learning_rate": 3.5359344957741625e-10, "loss": 0.2409, "step": 29034 }, { "epoch": 0.9963967055593685, "grad_norm": 0.711378760268627, "learning_rate": 3.4701518653590396e-10, "loss": 0.2171, "step": 29035 }, { "epoch": 0.9964310226492793, "grad_norm": 0.7897295740517385, "learning_rate": 3.404986883781547e-10, "loss": 0.2496, "step": 29036 }, { "epoch": 0.9964653397391902, "grad_norm": 1.0581216035473069, "learning_rate": 3.340439551846597e-10, "loss": 0.2691, "step": 29037 }, { "epoch": 0.9964996568291009, "grad_norm": 0.7778835799001409, "learning_rate": 3.276509870342448e-10, "loss": 0.267, "step": 29038 }, { "epoch": 0.9965339739190117, "grad_norm": 0.7221373456907515, "learning_rate": 3.2131978400740116e-10, "loss": 0.2346, "step": 29039 }, { "epoch": 0.9965682910089224, "grad_norm": 0.7407467808724089, "learning_rate": 3.150503461812893e-10, "loss": 0.2345, "step": 29040 }, { "epoch": 0.9966026080988332, "grad_norm": 0.6968035603565781, "learning_rate": 3.0884267363362477e-10, "loss": 0.2712, "step": 29041 }, { "epoch": 0.996636925188744, "grad_norm": 0.8621327661941832, "learning_rate": 3.026967664415681e-10, "loss": 0.3033, "step": 29042 }, { "epoch": 0.9966712422786548, "grad_norm": 0.7719080739705454, "learning_rate": 2.9661262468005934e-10, "loss": 0.2164, "step": 29043 }, { "epoch": 0.9967055593685655, "grad_norm": 0.702310678413556, "learning_rate": 2.9059024842514883e-10, "loss": 0.2646, "step": 29044 }, { "epoch": 0.9967398764584763, "grad_norm": 0.711812655366577, "learning_rate": 2.846296377506663e-10, "loss": 0.2221, "step": 29045 }, { "epoch": 0.9967741935483871, "grad_norm": 0.7100065635690681, "learning_rate": 2.7873079273044165e-10, "loss": 0.2442, "step": 29046 }, { "epoch": 0.9968085106382979, "grad_norm": 0.7720361759653358, "learning_rate": 2.728937134377496e-10, "loss": 0.2543, "step": 29047 }, { "epoch": 0.9968428277282086, "grad_norm": 0.7133993023075801, "learning_rate": 2.6711839994419954e-10, "loss": 0.1934, "step": 29048 }, { "epoch": 0.9968771448181194, "grad_norm": 0.7856598475493074, "learning_rate": 2.614048523214008e-10, "loss": 0.2453, "step": 29049 }, { "epoch": 0.9969114619080301, "grad_norm": 0.7634967490453298, "learning_rate": 2.5575307063929744e-10, "loss": 0.2635, "step": 29050 }, { "epoch": 0.996945778997941, "grad_norm": 0.7967710370447562, "learning_rate": 2.5016305496894377e-10, "loss": 0.2784, "step": 29051 }, { "epoch": 0.9969800960878518, "grad_norm": 0.723229336634318, "learning_rate": 2.446348053780634e-10, "loss": 0.2919, "step": 29052 }, { "epoch": 0.9970144131777625, "grad_norm": 0.9068017916904691, "learning_rate": 2.391683219360452e-10, "loss": 0.2506, "step": 29053 }, { "epoch": 0.9970487302676733, "grad_norm": 0.71665730078793, "learning_rate": 2.3376360470950266e-10, "loss": 0.2393, "step": 29054 }, { "epoch": 0.9970830473575841, "grad_norm": 0.7544320631701656, "learning_rate": 2.2842065376560418e-10, "loss": 0.2269, "step": 29055 }, { "epoch": 0.9971173644474949, "grad_norm": 0.7425707668835326, "learning_rate": 2.2313946917096318e-10, "loss": 0.2874, "step": 29056 }, { "epoch": 0.9971516815374056, "grad_norm": 0.7617042506861583, "learning_rate": 2.179200509899726e-10, "loss": 0.2855, "step": 29057 }, { "epoch": 0.9971859986273164, "grad_norm": 0.7818845538179804, "learning_rate": 2.1276239928702536e-10, "loss": 0.2595, "step": 29058 }, { "epoch": 0.9972203157172271, "grad_norm": 0.8265009508292769, "learning_rate": 2.076665141270695e-10, "loss": 0.2492, "step": 29059 }, { "epoch": 0.997254632807138, "grad_norm": 0.8698103047271187, "learning_rate": 2.0263239557116733e-10, "loss": 0.2545, "step": 29060 }, { "epoch": 0.9972889498970487, "grad_norm": 0.8112090364779622, "learning_rate": 1.976600436831566e-10, "loss": 0.237, "step": 29061 }, { "epoch": 0.9973232669869595, "grad_norm": 0.7429189254010374, "learning_rate": 1.9274945852409966e-10, "loss": 0.2513, "step": 29062 }, { "epoch": 0.9973575840768703, "grad_norm": 0.7798544375495731, "learning_rate": 1.879006401539485e-10, "loss": 0.2434, "step": 29063 }, { "epoch": 0.997391901166781, "grad_norm": 0.7431610011676207, "learning_rate": 1.8311358863321028e-10, "loss": 0.2252, "step": 29064 }, { "epoch": 0.9974262182566919, "grad_norm": 0.812420307263122, "learning_rate": 1.7838830402072681e-10, "loss": 0.2634, "step": 29065 }, { "epoch": 0.9974605353466026, "grad_norm": 0.6925778832850207, "learning_rate": 1.7372478637533996e-10, "loss": 0.2607, "step": 29066 }, { "epoch": 0.9974948524365134, "grad_norm": 0.7657617869933424, "learning_rate": 1.6912303575422618e-10, "loss": 0.2221, "step": 29067 }, { "epoch": 0.9975291695264241, "grad_norm": 0.7125412756817328, "learning_rate": 1.6458305221400684e-10, "loss": 0.248, "step": 29068 }, { "epoch": 0.997563486616335, "grad_norm": 0.737404870725193, "learning_rate": 1.6010483581185844e-10, "loss": 0.2476, "step": 29069 }, { "epoch": 0.9975978037062457, "grad_norm": 0.9453870967837528, "learning_rate": 1.5568838660162678e-10, "loss": 0.2632, "step": 29070 }, { "epoch": 0.9976321207961565, "grad_norm": 0.8456348649636012, "learning_rate": 1.5133370463882303e-10, "loss": 0.2652, "step": 29071 }, { "epoch": 0.9976664378860672, "grad_norm": 0.8563857816906219, "learning_rate": 1.4704078997729298e-10, "loss": 0.2784, "step": 29072 }, { "epoch": 0.997700754975978, "grad_norm": 0.8479213927263086, "learning_rate": 1.4280964266977226e-10, "loss": 0.2521, "step": 29073 }, { "epoch": 0.9977350720658889, "grad_norm": 0.7531221285388489, "learning_rate": 1.3864026276844134e-10, "loss": 0.2251, "step": 29074 }, { "epoch": 0.9977693891557996, "grad_norm": 0.7882914623409334, "learning_rate": 1.3453265032492558e-10, "loss": 0.2467, "step": 29075 }, { "epoch": 0.9978037062457104, "grad_norm": 0.7709929310104674, "learning_rate": 1.3048680539029524e-10, "loss": 0.2733, "step": 29076 }, { "epoch": 0.9978380233356211, "grad_norm": 0.7455059712627182, "learning_rate": 1.2650272801395524e-10, "loss": 0.2638, "step": 29077 }, { "epoch": 0.997872340425532, "grad_norm": 0.8106573676913801, "learning_rate": 1.2258041824531052e-10, "loss": 0.2224, "step": 29078 }, { "epoch": 0.9979066575154427, "grad_norm": 0.8130716475689939, "learning_rate": 1.187198761326558e-10, "loss": 0.2492, "step": 29079 }, { "epoch": 0.9979409746053535, "grad_norm": 0.8873098328230786, "learning_rate": 1.1492110172428573e-10, "loss": 0.2416, "step": 29080 }, { "epoch": 0.9979752916952642, "grad_norm": 0.7970986720092448, "learning_rate": 1.111840950668297e-10, "loss": 0.2758, "step": 29081 }, { "epoch": 0.998009608785175, "grad_norm": 0.8987859857670449, "learning_rate": 1.0750885620636198e-10, "loss": 0.2925, "step": 29082 }, { "epoch": 0.9980439258750858, "grad_norm": 0.767357908320562, "learning_rate": 1.0389538518784658e-10, "loss": 0.2852, "step": 29083 }, { "epoch": 0.9980782429649966, "grad_norm": 0.8068140428775306, "learning_rate": 1.0034368205680267e-10, "loss": 0.2505, "step": 29084 }, { "epoch": 0.9981125600549073, "grad_norm": 0.7747267717649575, "learning_rate": 9.685374685652893e-11, "loss": 0.2257, "step": 29085 }, { "epoch": 0.9981468771448181, "grad_norm": 0.7617287219971559, "learning_rate": 9.342557963087917e-11, "loss": 0.2257, "step": 29086 }, { "epoch": 0.9981811942347288, "grad_norm": 0.8780227805017121, "learning_rate": 9.005918042093165e-11, "loss": 0.3078, "step": 29087 }, { "epoch": 0.9982155113246397, "grad_norm": 0.8460718658425048, "learning_rate": 8.675454926942995e-11, "loss": 0.2932, "step": 29088 }, { "epoch": 0.9982498284145505, "grad_norm": 0.7611541484784821, "learning_rate": 8.351168621634209e-11, "loss": 0.2402, "step": 29089 }, { "epoch": 0.9982841455044612, "grad_norm": 0.784895810883762, "learning_rate": 8.033059130219122e-11, "loss": 0.2129, "step": 29090 }, { "epoch": 0.998318462594372, "grad_norm": 0.7622365541152432, "learning_rate": 7.721126456639028e-11, "loss": 0.2238, "step": 29091 }, { "epoch": 0.9983527796842828, "grad_norm": 0.7421890887886491, "learning_rate": 7.415370604724193e-11, "loss": 0.2605, "step": 29092 }, { "epoch": 0.9983870967741936, "grad_norm": 0.7985887334894679, "learning_rate": 7.115791578249376e-11, "loss": 0.3221, "step": 29093 }, { "epoch": 0.9984214138641043, "grad_norm": 0.7681367799190076, "learning_rate": 6.822389380933825e-11, "loss": 0.2742, "step": 29094 }, { "epoch": 0.9984557309540151, "grad_norm": 0.8152183234860305, "learning_rate": 6.535164016385764e-11, "loss": 0.2998, "step": 29095 }, { "epoch": 0.9984900480439258, "grad_norm": 0.8216809352519405, "learning_rate": 6.254115488157908e-11, "loss": 0.2472, "step": 29096 }, { "epoch": 0.9985243651338367, "grad_norm": 0.8108446779791191, "learning_rate": 5.979243799747458e-11, "loss": 0.2965, "step": 29097 }, { "epoch": 0.9985586822237474, "grad_norm": 0.7950217329190481, "learning_rate": 5.7105489545405955e-11, "loss": 0.2362, "step": 29098 }, { "epoch": 0.9985929993136582, "grad_norm": 0.6940599242963229, "learning_rate": 5.4480309558124776e-11, "loss": 0.2367, "step": 29099 }, { "epoch": 0.9986273164035689, "grad_norm": 0.8520884427382289, "learning_rate": 5.1916898068382627e-11, "loss": 0.284, "step": 29100 }, { "epoch": 0.9986616334934798, "grad_norm": 0.7928442715490089, "learning_rate": 4.941525510782086e-11, "loss": 0.2434, "step": 29101 }, { "epoch": 0.9986959505833906, "grad_norm": 0.6996815667322038, "learning_rate": 4.697538070697061e-11, "loss": 0.2023, "step": 29102 }, { "epoch": 0.9987302676733013, "grad_norm": 0.7227926615424307, "learning_rate": 4.459727489691812e-11, "loss": 0.2244, "step": 29103 }, { "epoch": 0.9987645847632121, "grad_norm": 0.869061895588273, "learning_rate": 4.228093770652919e-11, "loss": 0.2744, "step": 29104 }, { "epoch": 0.9987989018531228, "grad_norm": 0.8118893637057166, "learning_rate": 4.002636916411451e-11, "loss": 0.2539, "step": 29105 }, { "epoch": 0.9988332189430337, "grad_norm": 0.8216516363591869, "learning_rate": 3.783356929742965e-11, "loss": 0.2593, "step": 29106 }, { "epoch": 0.9988675360329444, "grad_norm": 0.831808579188694, "learning_rate": 3.57025381347853e-11, "loss": 0.1871, "step": 29107 }, { "epoch": 0.9989018531228552, "grad_norm": 0.7707220565358286, "learning_rate": 3.363327570116148e-11, "loss": 0.2321, "step": 29108 }, { "epoch": 0.9989361702127659, "grad_norm": 0.6927789277954957, "learning_rate": 3.162578202264843e-11, "loss": 0.3169, "step": 29109 }, { "epoch": 0.9989704873026767, "grad_norm": 0.7926228325570628, "learning_rate": 2.968005712367106e-11, "loss": 0.2628, "step": 29110 }, { "epoch": 0.9990048043925875, "grad_norm": 0.7508975485617561, "learning_rate": 2.7796101029209378e-11, "loss": 0.2347, "step": 29111 }, { "epoch": 0.9990391214824983, "grad_norm": 0.8870882724071324, "learning_rate": 2.5973913761467852e-11, "loss": 0.2871, "step": 29112 }, { "epoch": 0.999073438572409, "grad_norm": 0.8869796088795857, "learning_rate": 2.421349534376116e-11, "loss": 0.315, "step": 29113 }, { "epoch": 0.9991077556623198, "grad_norm": 1.0477154568134834, "learning_rate": 2.2514845797183548e-11, "loss": 0.2691, "step": 29114 }, { "epoch": 0.9991420727522307, "grad_norm": 0.7849744784533319, "learning_rate": 2.087796514282925e-11, "loss": 0.2425, "step": 29115 }, { "epoch": 0.9991763898421414, "grad_norm": 0.7171777141022275, "learning_rate": 1.9302853401792497e-11, "loss": 0.2743, "step": 29116 }, { "epoch": 0.9992107069320522, "grad_norm": 0.9202164007999054, "learning_rate": 1.778951059239198e-11, "loss": 0.3013, "step": 29117 }, { "epoch": 0.9992450240219629, "grad_norm": 0.7252306875341407, "learning_rate": 1.6337936734056593e-11, "loss": 0.2415, "step": 29118 }, { "epoch": 0.9992793411118737, "grad_norm": 0.8478642849383088, "learning_rate": 1.4948131843994795e-11, "loss": 0.291, "step": 29119 }, { "epoch": 0.9993136582017845, "grad_norm": 0.7346676299470374, "learning_rate": 1.3620095939970157e-11, "loss": 0.2178, "step": 29120 }, { "epoch": 0.9993479752916953, "grad_norm": 0.8626480698835028, "learning_rate": 1.2353829038636023e-11, "loss": 0.2651, "step": 29121 }, { "epoch": 0.999382292381606, "grad_norm": 0.8703658663713244, "learning_rate": 1.1149331154980403e-11, "loss": 0.2626, "step": 29122 }, { "epoch": 0.9994166094715168, "grad_norm": 0.7476071908018329, "learning_rate": 1.0006602303991309e-11, "loss": 0.2269, "step": 29123 }, { "epoch": 0.9994509265614276, "grad_norm": 0.7386396347500359, "learning_rate": 8.925642500101638e-12, "loss": 0.2221, "step": 29124 }, { "epoch": 0.9994852436513384, "grad_norm": 0.6987628293160404, "learning_rate": 7.90645175663407e-12, "loss": 0.2335, "step": 29125 }, { "epoch": 0.9995195607412491, "grad_norm": 0.7480217747975785, "learning_rate": 6.949030086356168e-12, "loss": 0.2592, "step": 29126 }, { "epoch": 0.9995538778311599, "grad_norm": 0.732527794260168, "learning_rate": 6.053377500370161e-12, "loss": 0.2382, "step": 29127 }, { "epoch": 0.9995881949210706, "grad_norm": 0.8330782563840613, "learning_rate": 5.2194940103333925e-12, "loss": 0.2529, "step": 29128 }, { "epoch": 0.9996225120109815, "grad_norm": 0.7861171931519434, "learning_rate": 4.447379626237869e-12, "loss": 0.289, "step": 29129 }, { "epoch": 0.9996568291008923, "grad_norm": 0.8498074704422829, "learning_rate": 3.737034357520486e-12, "loss": 0.2725, "step": 29130 }, { "epoch": 0.999691146190803, "grad_norm": 0.6891917544262532, "learning_rate": 3.0884582136181395e-12, "loss": 0.2902, "step": 29131 }, { "epoch": 0.9997254632807138, "grad_norm": 0.7649990021331056, "learning_rate": 2.5016512017472795e-12, "loss": 0.2145, "step": 29132 }, { "epoch": 0.9997597803706245, "grad_norm": 0.818111812081239, "learning_rate": 1.9766133296794664e-12, "loss": 0.2739, "step": 29133 }, { "epoch": 0.9997940974605354, "grad_norm": 0.9470143211418857, "learning_rate": 1.513344604076039e-12, "loss": 0.2362, "step": 29134 }, { "epoch": 0.9998284145504461, "grad_norm": 0.8480659342128317, "learning_rate": 1.1118450299330007e-12, "loss": 0.314, "step": 29135 }, { "epoch": 0.9998627316403569, "grad_norm": 0.7964631413864227, "learning_rate": 7.721146128014667e-13, "loss": 0.2417, "step": 29136 }, { "epoch": 0.9998970487302676, "grad_norm": 0.8854687707745347, "learning_rate": 4.941533571223289e-13, "loss": 0.3002, "step": 29137 }, { "epoch": 0.9999313658201785, "grad_norm": 0.7566495455803719, "learning_rate": 2.779612651160335e-13, "loss": 0.2716, "step": 29138 }, { "epoch": 0.9999656829100892, "grad_norm": 0.752245986967799, "learning_rate": 1.2353834066836102e-13, "loss": 0.2507, "step": 29139 }, { "epoch": 1.0, "grad_norm": 0.6918420963720083, "learning_rate": 3.088458544464601e-14, "loss": 0.2416, "step": 29140 }, { "epoch": 1.0, "step": 29140, "total_flos": 9.205896803726131e+16, "train_loss": 0.3028370787565358, "train_runtime": 115479.966, "train_samples_per_second": 64.598, "train_steps_per_second": 0.252 } ], "logging_steps": 1.0, "max_steps": 29140, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9.205896803726131e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }