{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 6228, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00016056518946692356, "grad_norm": 19.07044219970703, "learning_rate": 1.0000000000000002e-06, "loss": 2.2765, "step": 1 }, { "epoch": 0.0003211303789338471, "grad_norm": 17.860671997070312, "learning_rate": 2.0000000000000003e-06, "loss": 2.2325, "step": 2 }, { "epoch": 0.0004816955684007707, "grad_norm": 16.178730010986328, "learning_rate": 3e-06, "loss": 2.1528, "step": 3 }, { "epoch": 0.0006422607578676942, "grad_norm": 19.609806060791016, "learning_rate": 4.000000000000001e-06, "loss": 2.2842, "step": 4 }, { "epoch": 0.0008028259473346178, "grad_norm": 18.418621063232422, "learning_rate": 5e-06, "loss": 2.2578, "step": 5 }, { "epoch": 0.0009633911368015414, "grad_norm": 17.91242027282715, "learning_rate": 6e-06, "loss": 2.2732, "step": 6 }, { "epoch": 0.001123956326268465, "grad_norm": 19.31393814086914, "learning_rate": 7.000000000000001e-06, "loss": 2.24, "step": 7 }, { "epoch": 0.0012845215157353885, "grad_norm": 20.467914581298828, "learning_rate": 8.000000000000001e-06, "loss": 2.2559, "step": 8 }, { "epoch": 0.001445086705202312, "grad_norm": 16.672758102416992, "learning_rate": 9e-06, "loss": 2.178, "step": 9 }, { "epoch": 0.0016056518946692357, "grad_norm": 14.895868301391602, "learning_rate": 1e-05, "loss": 2.1822, "step": 10 }, { "epoch": 0.0017662170841361592, "grad_norm": 14.540231704711914, "learning_rate": 1.1000000000000001e-05, "loss": 2.1189, "step": 11 }, { "epoch": 0.0019267822736030828, "grad_norm": 13.031673431396484, "learning_rate": 1.2e-05, "loss": 2.0964, "step": 12 }, { "epoch": 0.0020873474630700066, "grad_norm": 8.03255844116211, "learning_rate": 1.3000000000000001e-05, "loss": 1.9923, "step": 13 }, { "epoch": 0.00224791265253693, "grad_norm": 11.225895881652832, "learning_rate": 1.4000000000000001e-05, "loss": 1.9474, "step": 14 }, { "epoch": 0.0024084778420038534, "grad_norm": 5.122796058654785, "learning_rate": 1.5e-05, "loss": 1.8987, "step": 15 }, { "epoch": 0.002569043031470777, "grad_norm": 7.303050518035889, "learning_rate": 1.6000000000000003e-05, "loss": 1.9861, "step": 16 }, { "epoch": 0.0027296082209377005, "grad_norm": 3.677480697631836, "learning_rate": 1.7000000000000003e-05, "loss": 1.954, "step": 17 }, { "epoch": 0.002890173410404624, "grad_norm": 2.2276861667633057, "learning_rate": 1.8e-05, "loss": 1.9711, "step": 18 }, { "epoch": 0.0030507385998715477, "grad_norm": 1.7764992713928223, "learning_rate": 1.9e-05, "loss": 1.9135, "step": 19 }, { "epoch": 0.0032113037893384713, "grad_norm": 1.8619823455810547, "learning_rate": 2e-05, "loss": 1.8812, "step": 20 }, { "epoch": 0.003371868978805395, "grad_norm": 1.7041542530059814, "learning_rate": 2.1e-05, "loss": 1.8469, "step": 21 }, { "epoch": 0.0035324341682723185, "grad_norm": 1.5247529745101929, "learning_rate": 2.2000000000000003e-05, "loss": 1.9096, "step": 22 }, { "epoch": 0.003692999357739242, "grad_norm": 1.4550870656967163, "learning_rate": 2.3000000000000003e-05, "loss": 1.8792, "step": 23 }, { "epoch": 0.0038535645472061657, "grad_norm": 1.0546276569366455, "learning_rate": 2.4e-05, "loss": 1.7961, "step": 24 }, { "epoch": 0.00401412973667309, "grad_norm": 0.8481073379516602, "learning_rate": 2.5e-05, "loss": 1.8205, "step": 25 }, { "epoch": 0.004174694926140013, "grad_norm": 0.8418527245521545, "learning_rate": 2.6000000000000002e-05, "loss": 1.8691, "step": 26 }, { "epoch": 0.004335260115606936, "grad_norm": 0.6527712941169739, "learning_rate": 2.7000000000000002e-05, "loss": 1.839, "step": 27 }, { "epoch": 0.00449582530507386, "grad_norm": 0.5822799205780029, "learning_rate": 2.8000000000000003e-05, "loss": 1.8679, "step": 28 }, { "epoch": 0.004656390494540783, "grad_norm": 0.6079473495483398, "learning_rate": 2.9e-05, "loss": 1.8828, "step": 29 }, { "epoch": 0.004816955684007707, "grad_norm": 0.6239740252494812, "learning_rate": 3e-05, "loss": 1.7649, "step": 30 }, { "epoch": 0.00497752087347463, "grad_norm": 0.5457854866981506, "learning_rate": 3.1e-05, "loss": 1.7975, "step": 31 }, { "epoch": 0.005138086062941554, "grad_norm": 0.7451772093772888, "learning_rate": 3.2000000000000005e-05, "loss": 1.8618, "step": 32 }, { "epoch": 0.0052986512524084775, "grad_norm": 0.665291428565979, "learning_rate": 3.3e-05, "loss": 1.8, "step": 33 }, { "epoch": 0.005459216441875401, "grad_norm": 0.6924399733543396, "learning_rate": 3.4000000000000007e-05, "loss": 1.8253, "step": 34 }, { "epoch": 0.005619781631342325, "grad_norm": 0.7987289428710938, "learning_rate": 3.5e-05, "loss": 1.9036, "step": 35 }, { "epoch": 0.005780346820809248, "grad_norm": 0.5785585641860962, "learning_rate": 3.6e-05, "loss": 1.8167, "step": 36 }, { "epoch": 0.005940912010276172, "grad_norm": 0.576931357383728, "learning_rate": 3.7e-05, "loss": 1.8636, "step": 37 }, { "epoch": 0.0061014771997430954, "grad_norm": 0.4122508466243744, "learning_rate": 3.8e-05, "loss": 1.7927, "step": 38 }, { "epoch": 0.006262042389210019, "grad_norm": 0.36012470722198486, "learning_rate": 3.9000000000000006e-05, "loss": 1.8464, "step": 39 }, { "epoch": 0.006422607578676943, "grad_norm": 0.37618574500083923, "learning_rate": 4e-05, "loss": 1.7824, "step": 40 }, { "epoch": 0.006583172768143866, "grad_norm": 0.32919710874557495, "learning_rate": 4.1e-05, "loss": 1.6654, "step": 41 }, { "epoch": 0.00674373795761079, "grad_norm": 0.3425315320491791, "learning_rate": 4.2e-05, "loss": 1.7902, "step": 42 }, { "epoch": 0.006904303147077713, "grad_norm": 0.4032002091407776, "learning_rate": 4.3e-05, "loss": 1.7432, "step": 43 }, { "epoch": 0.007064868336544637, "grad_norm": 0.37265583872795105, "learning_rate": 4.4000000000000006e-05, "loss": 1.8446, "step": 44 }, { "epoch": 0.0072254335260115606, "grad_norm": 0.4276624619960785, "learning_rate": 4.5e-05, "loss": 1.7643, "step": 45 }, { "epoch": 0.007385998715478484, "grad_norm": 0.39329883456230164, "learning_rate": 4.600000000000001e-05, "loss": 1.771, "step": 46 }, { "epoch": 0.007546563904945408, "grad_norm": 0.3743005394935608, "learning_rate": 4.7e-05, "loss": 1.7001, "step": 47 }, { "epoch": 0.007707129094412331, "grad_norm": 0.3529745638370514, "learning_rate": 4.8e-05, "loss": 1.8086, "step": 48 }, { "epoch": 0.007867694283879255, "grad_norm": 0.33023884892463684, "learning_rate": 4.9e-05, "loss": 1.7104, "step": 49 }, { "epoch": 0.00802825947334618, "grad_norm": 0.3542814552783966, "learning_rate": 5e-05, "loss": 1.7174, "step": 50 }, { "epoch": 0.008188824662813102, "grad_norm": 0.29632100462913513, "learning_rate": 5.1000000000000006e-05, "loss": 1.6615, "step": 51 }, { "epoch": 0.008349389852280027, "grad_norm": 0.3719535171985626, "learning_rate": 5.2000000000000004e-05, "loss": 1.7675, "step": 52 }, { "epoch": 0.00850995504174695, "grad_norm": 0.3312448263168335, "learning_rate": 5.300000000000001e-05, "loss": 1.7556, "step": 53 }, { "epoch": 0.008670520231213872, "grad_norm": 0.31105342507362366, "learning_rate": 5.4000000000000005e-05, "loss": 1.795, "step": 54 }, { "epoch": 0.008831085420680796, "grad_norm": 0.3008953928947449, "learning_rate": 5.500000000000001e-05, "loss": 1.6527, "step": 55 }, { "epoch": 0.00899165061014772, "grad_norm": 0.30014312267303467, "learning_rate": 5.6000000000000006e-05, "loss": 1.7012, "step": 56 }, { "epoch": 0.009152215799614644, "grad_norm": 0.3198379874229431, "learning_rate": 5.6999999999999996e-05, "loss": 1.7207, "step": 57 }, { "epoch": 0.009312780989081566, "grad_norm": 0.3174596428871155, "learning_rate": 5.8e-05, "loss": 1.7685, "step": 58 }, { "epoch": 0.00947334617854849, "grad_norm": 0.3041522800922394, "learning_rate": 5.9e-05, "loss": 1.6611, "step": 59 }, { "epoch": 0.009633911368015413, "grad_norm": 0.30511003732681274, "learning_rate": 6e-05, "loss": 1.7535, "step": 60 }, { "epoch": 0.009794476557482338, "grad_norm": 0.3328545093536377, "learning_rate": 6.1e-05, "loss": 1.738, "step": 61 }, { "epoch": 0.00995504174694926, "grad_norm": 0.29658642411231995, "learning_rate": 6.2e-05, "loss": 1.6807, "step": 62 }, { "epoch": 0.010115606936416185, "grad_norm": 0.29087984561920166, "learning_rate": 6.3e-05, "loss": 1.6995, "step": 63 }, { "epoch": 0.010276172125883108, "grad_norm": 0.3203093111515045, "learning_rate": 6.400000000000001e-05, "loss": 1.6552, "step": 64 }, { "epoch": 0.010436737315350032, "grad_norm": 0.3448745012283325, "learning_rate": 6.500000000000001e-05, "loss": 1.684, "step": 65 }, { "epoch": 0.010597302504816955, "grad_norm": 0.30873870849609375, "learning_rate": 6.6e-05, "loss": 1.7397, "step": 66 }, { "epoch": 0.01075786769428388, "grad_norm": 0.3079901933670044, "learning_rate": 6.7e-05, "loss": 1.757, "step": 67 }, { "epoch": 0.010918432883750802, "grad_norm": 0.30332741141319275, "learning_rate": 6.800000000000001e-05, "loss": 1.6954, "step": 68 }, { "epoch": 0.011078998073217727, "grad_norm": 0.30804821848869324, "learning_rate": 6.9e-05, "loss": 1.6608, "step": 69 }, { "epoch": 0.01123956326268465, "grad_norm": 0.33730414509773254, "learning_rate": 7e-05, "loss": 1.6891, "step": 70 }, { "epoch": 0.011400128452151574, "grad_norm": 0.3238990306854248, "learning_rate": 7.1e-05, "loss": 1.6052, "step": 71 }, { "epoch": 0.011560693641618497, "grad_norm": 0.32166120409965515, "learning_rate": 7.2e-05, "loss": 1.687, "step": 72 }, { "epoch": 0.011721258831085421, "grad_norm": 0.3208368122577667, "learning_rate": 7.3e-05, "loss": 1.6878, "step": 73 }, { "epoch": 0.011881824020552344, "grad_norm": 0.34366941452026367, "learning_rate": 7.4e-05, "loss": 1.7257, "step": 74 }, { "epoch": 0.012042389210019268, "grad_norm": 0.3077256381511688, "learning_rate": 7.500000000000001e-05, "loss": 1.6705, "step": 75 }, { "epoch": 0.012202954399486191, "grad_norm": 0.3369261622428894, "learning_rate": 7.6e-05, "loss": 1.7493, "step": 76 }, { "epoch": 0.012363519588953115, "grad_norm": 0.322565495967865, "learning_rate": 7.7e-05, "loss": 1.7214, "step": 77 }, { "epoch": 0.012524084778420038, "grad_norm": 0.30557385087013245, "learning_rate": 7.800000000000001e-05, "loss": 1.6356, "step": 78 }, { "epoch": 0.012684649967886963, "grad_norm": 0.3298359215259552, "learning_rate": 7.900000000000001e-05, "loss": 1.8063, "step": 79 }, { "epoch": 0.012845215157353885, "grad_norm": 0.3066646158695221, "learning_rate": 8e-05, "loss": 1.6539, "step": 80 }, { "epoch": 0.01300578034682081, "grad_norm": 0.29285869002342224, "learning_rate": 8.1e-05, "loss": 1.6448, "step": 81 }, { "epoch": 0.013166345536287732, "grad_norm": 0.3066501021385193, "learning_rate": 8.2e-05, "loss": 1.6828, "step": 82 }, { "epoch": 0.013326910725754657, "grad_norm": 0.31421324610710144, "learning_rate": 8.3e-05, "loss": 1.6947, "step": 83 }, { "epoch": 0.01348747591522158, "grad_norm": 0.33448484539985657, "learning_rate": 8.4e-05, "loss": 1.7222, "step": 84 }, { "epoch": 0.013648041104688504, "grad_norm": 0.3122430443763733, "learning_rate": 8.5e-05, "loss": 1.6433, "step": 85 }, { "epoch": 0.013808606294155427, "grad_norm": 0.31773683428764343, "learning_rate": 8.6e-05, "loss": 1.7606, "step": 86 }, { "epoch": 0.013969171483622351, "grad_norm": 0.29960861802101135, "learning_rate": 8.7e-05, "loss": 1.6992, "step": 87 }, { "epoch": 0.014129736673089274, "grad_norm": 0.3499071002006531, "learning_rate": 8.800000000000001e-05, "loss": 1.7152, "step": 88 }, { "epoch": 0.014290301862556198, "grad_norm": 0.3297984004020691, "learning_rate": 8.900000000000001e-05, "loss": 1.731, "step": 89 }, { "epoch": 0.014450867052023121, "grad_norm": 0.3231171667575836, "learning_rate": 9e-05, "loss": 1.6932, "step": 90 }, { "epoch": 0.014611432241490046, "grad_norm": 0.31853851675987244, "learning_rate": 9.1e-05, "loss": 1.7144, "step": 91 }, { "epoch": 0.014771997430956968, "grad_norm": 0.31341439485549927, "learning_rate": 9.200000000000001e-05, "loss": 1.6652, "step": 92 }, { "epoch": 0.014932562620423893, "grad_norm": 0.2927674353122711, "learning_rate": 9.300000000000001e-05, "loss": 1.6714, "step": 93 }, { "epoch": 0.015093127809890815, "grad_norm": 0.3104013502597809, "learning_rate": 9.4e-05, "loss": 1.65, "step": 94 }, { "epoch": 0.01525369299935774, "grad_norm": 0.3095431625843048, "learning_rate": 9.5e-05, "loss": 1.6997, "step": 95 }, { "epoch": 0.015414258188824663, "grad_norm": 0.3033742606639862, "learning_rate": 9.6e-05, "loss": 1.7215, "step": 96 }, { "epoch": 0.015574823378291587, "grad_norm": 0.3096693456172943, "learning_rate": 9.7e-05, "loss": 1.6991, "step": 97 }, { "epoch": 0.01573538856775851, "grad_norm": 0.30744585394859314, "learning_rate": 9.8e-05, "loss": 1.6688, "step": 98 }, { "epoch": 0.015895953757225433, "grad_norm": 0.30216723680496216, "learning_rate": 9.900000000000001e-05, "loss": 1.6727, "step": 99 }, { "epoch": 0.01605651894669236, "grad_norm": 0.31961628794670105, "learning_rate": 0.0001, "loss": 1.6672, "step": 100 }, { "epoch": 0.01621708413615928, "grad_norm": 0.29976239800453186, "learning_rate": 0.0001, "loss": 1.643, "step": 101 }, { "epoch": 0.016377649325626204, "grad_norm": 0.335050493478775, "learning_rate": 0.0001, "loss": 1.6817, "step": 102 }, { "epoch": 0.016538214515093127, "grad_norm": 0.31094300746917725, "learning_rate": 0.0001, "loss": 1.6277, "step": 103 }, { "epoch": 0.016698779704560053, "grad_norm": 0.32502052187919617, "learning_rate": 0.0001, "loss": 1.765, "step": 104 }, { "epoch": 0.016859344894026976, "grad_norm": 0.33868634700775146, "learning_rate": 0.0001, "loss": 1.6906, "step": 105 }, { "epoch": 0.0170199100834939, "grad_norm": 0.29988473653793335, "learning_rate": 0.0001, "loss": 1.6936, "step": 106 }, { "epoch": 0.01718047527296082, "grad_norm": 0.3123616874217987, "learning_rate": 0.0001, "loss": 1.6805, "step": 107 }, { "epoch": 0.017341040462427744, "grad_norm": 0.3224555253982544, "learning_rate": 0.0001, "loss": 1.6686, "step": 108 }, { "epoch": 0.01750160565189467, "grad_norm": 0.31106632947921753, "learning_rate": 0.0001, "loss": 1.6977, "step": 109 }, { "epoch": 0.017662170841361593, "grad_norm": 0.30861422419548035, "learning_rate": 0.0001, "loss": 1.6124, "step": 110 }, { "epoch": 0.017822736030828516, "grad_norm": 0.31756776571273804, "learning_rate": 0.0001, "loss": 1.7024, "step": 111 }, { "epoch": 0.01798330122029544, "grad_norm": 0.3358090817928314, "learning_rate": 0.0001, "loss": 1.7037, "step": 112 }, { "epoch": 0.018143866409762364, "grad_norm": 0.3067599833011627, "learning_rate": 0.0001, "loss": 1.6947, "step": 113 }, { "epoch": 0.018304431599229287, "grad_norm": 0.33327704668045044, "learning_rate": 0.0001, "loss": 1.6335, "step": 114 }, { "epoch": 0.01846499678869621, "grad_norm": 0.2796766459941864, "learning_rate": 0.0001, "loss": 1.5963, "step": 115 }, { "epoch": 0.018625561978163133, "grad_norm": 0.3339906632900238, "learning_rate": 0.0001, "loss": 1.6542, "step": 116 }, { "epoch": 0.01878612716763006, "grad_norm": 0.3105831742286682, "learning_rate": 0.0001, "loss": 1.6735, "step": 117 }, { "epoch": 0.01894669235709698, "grad_norm": 0.3182494640350342, "learning_rate": 0.0001, "loss": 1.6692, "step": 118 }, { "epoch": 0.019107257546563904, "grad_norm": 0.3525417745113373, "learning_rate": 0.0001, "loss": 1.7499, "step": 119 }, { "epoch": 0.019267822736030827, "grad_norm": 0.31575801968574524, "learning_rate": 0.0001, "loss": 1.6368, "step": 120 }, { "epoch": 0.019428387925497753, "grad_norm": 0.35428088903427124, "learning_rate": 0.0001, "loss": 1.8006, "step": 121 }, { "epoch": 0.019588953114964676, "grad_norm": 0.3589598536491394, "learning_rate": 0.0001, "loss": 1.717, "step": 122 }, { "epoch": 0.0197495183044316, "grad_norm": 0.32543036341667175, "learning_rate": 0.0001, "loss": 1.8, "step": 123 }, { "epoch": 0.01991008349389852, "grad_norm": 0.298537939786911, "learning_rate": 0.0001, "loss": 1.6333, "step": 124 }, { "epoch": 0.020070648683365448, "grad_norm": 0.3392146825790405, "learning_rate": 0.0001, "loss": 1.6314, "step": 125 }, { "epoch": 0.02023121387283237, "grad_norm": 0.29613935947418213, "learning_rate": 0.0001, "loss": 1.6209, "step": 126 }, { "epoch": 0.020391779062299293, "grad_norm": 0.3168414533138275, "learning_rate": 0.0001, "loss": 1.5678, "step": 127 }, { "epoch": 0.020552344251766216, "grad_norm": 0.2909003794193268, "learning_rate": 0.0001, "loss": 1.6471, "step": 128 }, { "epoch": 0.020712909441233142, "grad_norm": 0.3160809576511383, "learning_rate": 0.0001, "loss": 1.6526, "step": 129 }, { "epoch": 0.020873474630700065, "grad_norm": 0.296406090259552, "learning_rate": 0.0001, "loss": 1.7082, "step": 130 }, { "epoch": 0.021034039820166987, "grad_norm": 0.36247017979621887, "learning_rate": 0.0001, "loss": 1.7076, "step": 131 }, { "epoch": 0.02119460500963391, "grad_norm": 0.294392466545105, "learning_rate": 0.0001, "loss": 1.6495, "step": 132 }, { "epoch": 0.021355170199100836, "grad_norm": 0.33377623558044434, "learning_rate": 0.0001, "loss": 1.6448, "step": 133 }, { "epoch": 0.02151573538856776, "grad_norm": 0.3045656383037567, "learning_rate": 0.0001, "loss": 1.6668, "step": 134 }, { "epoch": 0.02167630057803468, "grad_norm": 0.3585946559906006, "learning_rate": 0.0001, "loss": 1.6351, "step": 135 }, { "epoch": 0.021836865767501604, "grad_norm": 0.29203692078590393, "learning_rate": 0.0001, "loss": 1.5617, "step": 136 }, { "epoch": 0.02199743095696853, "grad_norm": 0.3085068166255951, "learning_rate": 0.0001, "loss": 1.7582, "step": 137 }, { "epoch": 0.022157996146435453, "grad_norm": 0.30218175053596497, "learning_rate": 0.0001, "loss": 1.6359, "step": 138 }, { "epoch": 0.022318561335902376, "grad_norm": 0.32938602566719055, "learning_rate": 0.0001, "loss": 1.5966, "step": 139 }, { "epoch": 0.0224791265253693, "grad_norm": 0.32060807943344116, "learning_rate": 0.0001, "loss": 1.7, "step": 140 }, { "epoch": 0.022639691714836225, "grad_norm": 0.29182660579681396, "learning_rate": 0.0001, "loss": 1.6386, "step": 141 }, { "epoch": 0.022800256904303148, "grad_norm": 0.3093222677707672, "learning_rate": 0.0001, "loss": 1.6954, "step": 142 }, { "epoch": 0.02296082209377007, "grad_norm": 0.30981460213661194, "learning_rate": 0.0001, "loss": 1.6187, "step": 143 }, { "epoch": 0.023121387283236993, "grad_norm": 0.3060247004032135, "learning_rate": 0.0001, "loss": 1.6366, "step": 144 }, { "epoch": 0.02328195247270392, "grad_norm": 0.34533223509788513, "learning_rate": 0.0001, "loss": 1.6014, "step": 145 }, { "epoch": 0.023442517662170842, "grad_norm": 0.32339876890182495, "learning_rate": 0.0001, "loss": 1.7605, "step": 146 }, { "epoch": 0.023603082851637765, "grad_norm": 0.29571667313575745, "learning_rate": 0.0001, "loss": 1.6328, "step": 147 }, { "epoch": 0.023763648041104687, "grad_norm": 0.3205752670764923, "learning_rate": 0.0001, "loss": 1.6198, "step": 148 }, { "epoch": 0.023924213230571614, "grad_norm": 0.29312294721603394, "learning_rate": 0.0001, "loss": 1.6381, "step": 149 }, { "epoch": 0.024084778420038536, "grad_norm": 0.28196433186531067, "learning_rate": 0.0001, "loss": 1.6155, "step": 150 }, { "epoch": 0.02424534360950546, "grad_norm": 0.3127625286579132, "learning_rate": 0.0001, "loss": 1.7451, "step": 151 }, { "epoch": 0.024405908798972382, "grad_norm": 0.3327120244503021, "learning_rate": 0.0001, "loss": 1.7074, "step": 152 }, { "epoch": 0.024566473988439308, "grad_norm": 0.49247097969055176, "learning_rate": 0.0001, "loss": 1.74, "step": 153 }, { "epoch": 0.02472703917790623, "grad_norm": 0.3472457528114319, "learning_rate": 0.0001, "loss": 1.6715, "step": 154 }, { "epoch": 0.024887604367373153, "grad_norm": 0.3496563732624054, "learning_rate": 0.0001, "loss": 1.6577, "step": 155 }, { "epoch": 0.025048169556840076, "grad_norm": 0.335408091545105, "learning_rate": 0.0001, "loss": 1.6932, "step": 156 }, { "epoch": 0.025208734746307002, "grad_norm": 0.31248655915260315, "learning_rate": 0.0001, "loss": 1.697, "step": 157 }, { "epoch": 0.025369299935773925, "grad_norm": 0.2786000967025757, "learning_rate": 0.0001, "loss": 1.57, "step": 158 }, { "epoch": 0.025529865125240848, "grad_norm": 0.2947397530078888, "learning_rate": 0.0001, "loss": 1.6523, "step": 159 }, { "epoch": 0.02569043031470777, "grad_norm": 0.34668445587158203, "learning_rate": 0.0001, "loss": 1.6965, "step": 160 }, { "epoch": 0.025850995504174697, "grad_norm": 0.2984938621520996, "learning_rate": 0.0001, "loss": 1.6581, "step": 161 }, { "epoch": 0.02601156069364162, "grad_norm": 0.3221776783466339, "learning_rate": 0.0001, "loss": 1.604, "step": 162 }, { "epoch": 0.026172125883108542, "grad_norm": 0.28798162937164307, "learning_rate": 0.0001, "loss": 1.5953, "step": 163 }, { "epoch": 0.026332691072575465, "grad_norm": 0.32310038805007935, "learning_rate": 0.0001, "loss": 1.6954, "step": 164 }, { "epoch": 0.026493256262042388, "grad_norm": 0.2939600944519043, "learning_rate": 0.0001, "loss": 1.6698, "step": 165 }, { "epoch": 0.026653821451509314, "grad_norm": 0.2926805019378662, "learning_rate": 0.0001, "loss": 1.6898, "step": 166 }, { "epoch": 0.026814386640976236, "grad_norm": 0.3102514445781708, "learning_rate": 0.0001, "loss": 1.7258, "step": 167 }, { "epoch": 0.02697495183044316, "grad_norm": 0.28639596700668335, "learning_rate": 0.0001, "loss": 1.7035, "step": 168 }, { "epoch": 0.027135517019910082, "grad_norm": 0.34116673469543457, "learning_rate": 0.0001, "loss": 1.6692, "step": 169 }, { "epoch": 0.027296082209377008, "grad_norm": 0.28041908144950867, "learning_rate": 0.0001, "loss": 1.6578, "step": 170 }, { "epoch": 0.02745664739884393, "grad_norm": 0.3725098669528961, "learning_rate": 0.0001, "loss": 1.6975, "step": 171 }, { "epoch": 0.027617212588310854, "grad_norm": 0.31921643018722534, "learning_rate": 0.0001, "loss": 1.6892, "step": 172 }, { "epoch": 0.027777777777777776, "grad_norm": 0.3719729781150818, "learning_rate": 0.0001, "loss": 1.7543, "step": 173 }, { "epoch": 0.027938342967244702, "grad_norm": 0.32456451654434204, "learning_rate": 0.0001, "loss": 1.6563, "step": 174 }, { "epoch": 0.028098908156711625, "grad_norm": 0.3301026225090027, "learning_rate": 0.0001, "loss": 1.6022, "step": 175 }, { "epoch": 0.028259473346178548, "grad_norm": 0.3518262505531311, "learning_rate": 0.0001, "loss": 1.6914, "step": 176 }, { "epoch": 0.02842003853564547, "grad_norm": 0.27261704206466675, "learning_rate": 0.0001, "loss": 1.5933, "step": 177 }, { "epoch": 0.028580603725112397, "grad_norm": 0.33590003848075867, "learning_rate": 0.0001, "loss": 1.6827, "step": 178 }, { "epoch": 0.02874116891457932, "grad_norm": 0.3073245882987976, "learning_rate": 0.0001, "loss": 1.6588, "step": 179 }, { "epoch": 0.028901734104046242, "grad_norm": 0.2971007525920868, "learning_rate": 0.0001, "loss": 1.641, "step": 180 }, { "epoch": 0.029062299293513165, "grad_norm": 0.29228779673576355, "learning_rate": 0.0001, "loss": 1.6388, "step": 181 }, { "epoch": 0.02922286448298009, "grad_norm": 0.3131601810455322, "learning_rate": 0.0001, "loss": 1.6233, "step": 182 }, { "epoch": 0.029383429672447014, "grad_norm": 0.30442118644714355, "learning_rate": 0.0001, "loss": 1.7528, "step": 183 }, { "epoch": 0.029543994861913937, "grad_norm": 0.2892727553844452, "learning_rate": 0.0001, "loss": 1.676, "step": 184 }, { "epoch": 0.02970456005138086, "grad_norm": 0.29788944125175476, "learning_rate": 0.0001, "loss": 1.5781, "step": 185 }, { "epoch": 0.029865125240847785, "grad_norm": 0.3052654266357422, "learning_rate": 0.0001, "loss": 1.6033, "step": 186 }, { "epoch": 0.030025690430314708, "grad_norm": 0.27221453189849854, "learning_rate": 0.0001, "loss": 1.6477, "step": 187 }, { "epoch": 0.03018625561978163, "grad_norm": 0.29384908080101013, "learning_rate": 0.0001, "loss": 1.6009, "step": 188 }, { "epoch": 0.030346820809248554, "grad_norm": 0.2946358919143677, "learning_rate": 0.0001, "loss": 1.6412, "step": 189 }, { "epoch": 0.03050738599871548, "grad_norm": 0.29987457394599915, "learning_rate": 0.0001, "loss": 1.6494, "step": 190 }, { "epoch": 0.030667951188182403, "grad_norm": 0.29672113060951233, "learning_rate": 0.0001, "loss": 1.675, "step": 191 }, { "epoch": 0.030828516377649325, "grad_norm": 0.2981245815753937, "learning_rate": 0.0001, "loss": 1.5827, "step": 192 }, { "epoch": 0.030989081567116248, "grad_norm": 0.30196821689605713, "learning_rate": 0.0001, "loss": 1.6423, "step": 193 }, { "epoch": 0.031149646756583174, "grad_norm": 0.3073696494102478, "learning_rate": 0.0001, "loss": 1.7611, "step": 194 }, { "epoch": 0.0313102119460501, "grad_norm": 0.3277694284915924, "learning_rate": 0.0001, "loss": 1.7411, "step": 195 }, { "epoch": 0.03147077713551702, "grad_norm": 0.3470703363418579, "learning_rate": 0.0001, "loss": 1.7407, "step": 196 }, { "epoch": 0.03163134232498394, "grad_norm": 0.2917807400226593, "learning_rate": 0.0001, "loss": 1.6177, "step": 197 }, { "epoch": 0.031791907514450865, "grad_norm": 0.3175638020038605, "learning_rate": 0.0001, "loss": 1.582, "step": 198 }, { "epoch": 0.03195247270391779, "grad_norm": 0.2842324674129486, "learning_rate": 0.0001, "loss": 1.6694, "step": 199 }, { "epoch": 0.03211303789338472, "grad_norm": 0.29128801822662354, "learning_rate": 0.0001, "loss": 1.6467, "step": 200 }, { "epoch": 0.03227360308285164, "grad_norm": 0.28581851720809937, "learning_rate": 0.0001, "loss": 1.6484, "step": 201 }, { "epoch": 0.03243416827231856, "grad_norm": 0.28554877638816833, "learning_rate": 0.0001, "loss": 1.6476, "step": 202 }, { "epoch": 0.032594733461785486, "grad_norm": 0.4070573151111603, "learning_rate": 0.0001, "loss": 1.6423, "step": 203 }, { "epoch": 0.03275529865125241, "grad_norm": 0.2945961654186249, "learning_rate": 0.0001, "loss": 1.7596, "step": 204 }, { "epoch": 0.03291586384071933, "grad_norm": 0.3131120502948761, "learning_rate": 0.0001, "loss": 1.649, "step": 205 }, { "epoch": 0.033076429030186254, "grad_norm": 0.29569193720817566, "learning_rate": 0.0001, "loss": 1.7106, "step": 206 }, { "epoch": 0.033236994219653176, "grad_norm": 0.30611786246299744, "learning_rate": 0.0001, "loss": 1.6625, "step": 207 }, { "epoch": 0.033397559409120106, "grad_norm": 0.27818742394447327, "learning_rate": 0.0001, "loss": 1.6802, "step": 208 }, { "epoch": 0.03355812459858703, "grad_norm": 0.2986738979816437, "learning_rate": 0.0001, "loss": 1.7492, "step": 209 }, { "epoch": 0.03371868978805395, "grad_norm": 0.2596496343612671, "learning_rate": 0.0001, "loss": 1.5599, "step": 210 }, { "epoch": 0.033879254977520874, "grad_norm": 0.27559900283813477, "learning_rate": 0.0001, "loss": 1.6069, "step": 211 }, { "epoch": 0.0340398201669878, "grad_norm": 0.2860763370990753, "learning_rate": 0.0001, "loss": 1.5767, "step": 212 }, { "epoch": 0.03420038535645472, "grad_norm": 0.2946445345878601, "learning_rate": 0.0001, "loss": 1.6497, "step": 213 }, { "epoch": 0.03436095054592164, "grad_norm": 0.3171544373035431, "learning_rate": 0.0001, "loss": 1.6091, "step": 214 }, { "epoch": 0.034521515735388565, "grad_norm": 0.2965378761291504, "learning_rate": 0.0001, "loss": 1.6472, "step": 215 }, { "epoch": 0.03468208092485549, "grad_norm": 0.27333325147628784, "learning_rate": 0.0001, "loss": 1.5804, "step": 216 }, { "epoch": 0.03484264611432242, "grad_norm": 0.29446908831596375, "learning_rate": 0.0001, "loss": 1.657, "step": 217 }, { "epoch": 0.03500321130378934, "grad_norm": 0.3440585732460022, "learning_rate": 0.0001, "loss": 1.6271, "step": 218 }, { "epoch": 0.03516377649325626, "grad_norm": 0.2965490520000458, "learning_rate": 0.0001, "loss": 1.6513, "step": 219 }, { "epoch": 0.035324341682723186, "grad_norm": 0.2846764624118805, "learning_rate": 0.0001, "loss": 1.6948, "step": 220 }, { "epoch": 0.03548490687219011, "grad_norm": 0.27987420558929443, "learning_rate": 0.0001, "loss": 1.6841, "step": 221 }, { "epoch": 0.03564547206165703, "grad_norm": 0.29974129796028137, "learning_rate": 0.0001, "loss": 1.6245, "step": 222 }, { "epoch": 0.035806037251123954, "grad_norm": 0.28039097785949707, "learning_rate": 0.0001, "loss": 1.6591, "step": 223 }, { "epoch": 0.03596660244059088, "grad_norm": 0.3027125597000122, "learning_rate": 0.0001, "loss": 1.664, "step": 224 }, { "epoch": 0.036127167630057806, "grad_norm": 0.308106392621994, "learning_rate": 0.0001, "loss": 1.7404, "step": 225 }, { "epoch": 0.03628773281952473, "grad_norm": 0.2665157914161682, "learning_rate": 0.0001, "loss": 1.638, "step": 226 }, { "epoch": 0.03644829800899165, "grad_norm": 0.2794928252696991, "learning_rate": 0.0001, "loss": 1.5889, "step": 227 }, { "epoch": 0.036608863198458574, "grad_norm": 0.33470234274864197, "learning_rate": 0.0001, "loss": 1.6182, "step": 228 }, { "epoch": 0.0367694283879255, "grad_norm": 0.27957892417907715, "learning_rate": 0.0001, "loss": 1.6607, "step": 229 }, { "epoch": 0.03692999357739242, "grad_norm": 0.287702739238739, "learning_rate": 0.0001, "loss": 1.592, "step": 230 }, { "epoch": 0.03709055876685934, "grad_norm": 0.27235883474349976, "learning_rate": 0.0001, "loss": 1.5924, "step": 231 }, { "epoch": 0.037251123956326265, "grad_norm": 0.24706554412841797, "learning_rate": 0.0001, "loss": 1.4878, "step": 232 }, { "epoch": 0.037411689145793195, "grad_norm": 0.28109508752822876, "learning_rate": 0.0001, "loss": 1.66, "step": 233 }, { "epoch": 0.03757225433526012, "grad_norm": 0.301008015871048, "learning_rate": 0.0001, "loss": 1.7872, "step": 234 }, { "epoch": 0.03773281952472704, "grad_norm": 0.2830420732498169, "learning_rate": 0.0001, "loss": 1.6013, "step": 235 }, { "epoch": 0.03789338471419396, "grad_norm": 0.26912617683410645, "learning_rate": 0.0001, "loss": 1.6176, "step": 236 }, { "epoch": 0.038053949903660886, "grad_norm": 0.2907160818576813, "learning_rate": 0.0001, "loss": 1.6932, "step": 237 }, { "epoch": 0.03821451509312781, "grad_norm": 0.3068007826805115, "learning_rate": 0.0001, "loss": 1.685, "step": 238 }, { "epoch": 0.03837508028259473, "grad_norm": 0.27787286043167114, "learning_rate": 0.0001, "loss": 1.7064, "step": 239 }, { "epoch": 0.038535645472061654, "grad_norm": 0.30287325382232666, "learning_rate": 0.0001, "loss": 1.6296, "step": 240 }, { "epoch": 0.038696210661528584, "grad_norm": 0.27720868587493896, "learning_rate": 0.0001, "loss": 1.6549, "step": 241 }, { "epoch": 0.038856775850995506, "grad_norm": 0.2869109809398651, "learning_rate": 0.0001, "loss": 1.7045, "step": 242 }, { "epoch": 0.03901734104046243, "grad_norm": 0.30262935161590576, "learning_rate": 0.0001, "loss": 1.6358, "step": 243 }, { "epoch": 0.03917790622992935, "grad_norm": 0.2900177538394928, "learning_rate": 0.0001, "loss": 1.6479, "step": 244 }, { "epoch": 0.039338471419396275, "grad_norm": 0.29951053857803345, "learning_rate": 0.0001, "loss": 1.6419, "step": 245 }, { "epoch": 0.0394990366088632, "grad_norm": 0.30172184109687805, "learning_rate": 0.0001, "loss": 1.7259, "step": 246 }, { "epoch": 0.03965960179833012, "grad_norm": 0.2781960070133209, "learning_rate": 0.0001, "loss": 1.6788, "step": 247 }, { "epoch": 0.03982016698779704, "grad_norm": 0.2786746025085449, "learning_rate": 0.0001, "loss": 1.6411, "step": 248 }, { "epoch": 0.03998073217726397, "grad_norm": 0.273116797208786, "learning_rate": 0.0001, "loss": 1.6447, "step": 249 }, { "epoch": 0.040141297366730895, "grad_norm": 0.28072962164878845, "learning_rate": 0.0001, "loss": 1.6603, "step": 250 }, { "epoch": 0.04030186255619782, "grad_norm": 0.28691452741622925, "learning_rate": 0.0001, "loss": 1.6517, "step": 251 }, { "epoch": 0.04046242774566474, "grad_norm": 0.280073344707489, "learning_rate": 0.0001, "loss": 1.6652, "step": 252 }, { "epoch": 0.04062299293513166, "grad_norm": 0.28682181239128113, "learning_rate": 0.0001, "loss": 1.6401, "step": 253 }, { "epoch": 0.040783558124598586, "grad_norm": 0.2880575656890869, "learning_rate": 0.0001, "loss": 1.6264, "step": 254 }, { "epoch": 0.04094412331406551, "grad_norm": 0.2657603919506073, "learning_rate": 0.0001, "loss": 1.6107, "step": 255 }, { "epoch": 0.04110468850353243, "grad_norm": 0.3001009523868561, "learning_rate": 0.0001, "loss": 1.7025, "step": 256 }, { "epoch": 0.04126525369299936, "grad_norm": 0.2876819670200348, "learning_rate": 0.0001, "loss": 1.6628, "step": 257 }, { "epoch": 0.041425818882466284, "grad_norm": 0.27692848443984985, "learning_rate": 0.0001, "loss": 1.6402, "step": 258 }, { "epoch": 0.041586384071933206, "grad_norm": 0.27744626998901367, "learning_rate": 0.0001, "loss": 1.6806, "step": 259 }, { "epoch": 0.04174694926140013, "grad_norm": 0.27917224168777466, "learning_rate": 0.0001, "loss": 1.612, "step": 260 }, { "epoch": 0.04190751445086705, "grad_norm": 0.3135955035686493, "learning_rate": 0.0001, "loss": 1.6123, "step": 261 }, { "epoch": 0.042068079640333975, "grad_norm": 0.28715232014656067, "learning_rate": 0.0001, "loss": 1.7021, "step": 262 }, { "epoch": 0.0422286448298009, "grad_norm": 0.2720044255256653, "learning_rate": 0.0001, "loss": 1.5852, "step": 263 }, { "epoch": 0.04238921001926782, "grad_norm": 0.2853310704231262, "learning_rate": 0.0001, "loss": 1.5508, "step": 264 }, { "epoch": 0.04254977520873475, "grad_norm": 0.27966177463531494, "learning_rate": 0.0001, "loss": 1.6642, "step": 265 }, { "epoch": 0.04271034039820167, "grad_norm": 0.2797499895095825, "learning_rate": 0.0001, "loss": 1.6073, "step": 266 }, { "epoch": 0.042870905587668595, "grad_norm": 0.27873483300209045, "learning_rate": 0.0001, "loss": 1.6507, "step": 267 }, { "epoch": 0.04303147077713552, "grad_norm": 0.28432849049568176, "learning_rate": 0.0001, "loss": 1.6058, "step": 268 }, { "epoch": 0.04319203596660244, "grad_norm": 0.26404669880867004, "learning_rate": 0.0001, "loss": 1.6179, "step": 269 }, { "epoch": 0.04335260115606936, "grad_norm": 0.2764602303504944, "learning_rate": 0.0001, "loss": 1.6378, "step": 270 }, { "epoch": 0.043513166345536286, "grad_norm": 0.34712472558021545, "learning_rate": 0.0001, "loss": 1.7007, "step": 271 }, { "epoch": 0.04367373153500321, "grad_norm": 0.26928162574768066, "learning_rate": 0.0001, "loss": 1.676, "step": 272 }, { "epoch": 0.04383429672447013, "grad_norm": 0.26622453331947327, "learning_rate": 0.0001, "loss": 1.5913, "step": 273 }, { "epoch": 0.04399486191393706, "grad_norm": 0.2929988205432892, "learning_rate": 0.0001, "loss": 1.5786, "step": 274 }, { "epoch": 0.044155427103403984, "grad_norm": 0.2612575590610504, "learning_rate": 0.0001, "loss": 1.609, "step": 275 }, { "epoch": 0.04431599229287091, "grad_norm": 0.27061378955841064, "learning_rate": 0.0001, "loss": 1.6165, "step": 276 }, { "epoch": 0.04447655748233783, "grad_norm": 0.30242618918418884, "learning_rate": 0.0001, "loss": 1.6786, "step": 277 }, { "epoch": 0.04463712267180475, "grad_norm": 0.25789663195610046, "learning_rate": 0.0001, "loss": 1.5475, "step": 278 }, { "epoch": 0.044797687861271675, "grad_norm": 0.2688475251197815, "learning_rate": 0.0001, "loss": 1.6198, "step": 279 }, { "epoch": 0.0449582530507386, "grad_norm": 0.2619808316230774, "learning_rate": 0.0001, "loss": 1.6332, "step": 280 }, { "epoch": 0.04511881824020552, "grad_norm": 0.2763867676258087, "learning_rate": 0.0001, "loss": 1.7022, "step": 281 }, { "epoch": 0.04527938342967245, "grad_norm": 0.29728758335113525, "learning_rate": 0.0001, "loss": 1.6367, "step": 282 }, { "epoch": 0.04543994861913937, "grad_norm": 0.27272191643714905, "learning_rate": 0.0001, "loss": 1.6244, "step": 283 }, { "epoch": 0.045600513808606295, "grad_norm": 0.28270816802978516, "learning_rate": 0.0001, "loss": 1.6786, "step": 284 }, { "epoch": 0.04576107899807322, "grad_norm": 0.25999021530151367, "learning_rate": 0.0001, "loss": 1.6132, "step": 285 }, { "epoch": 0.04592164418754014, "grad_norm": 0.27927297353744507, "learning_rate": 0.0001, "loss": 1.5973, "step": 286 }, { "epoch": 0.04608220937700706, "grad_norm": 0.29601696133613586, "learning_rate": 0.0001, "loss": 1.6679, "step": 287 }, { "epoch": 0.046242774566473986, "grad_norm": 0.31276965141296387, "learning_rate": 0.0001, "loss": 1.6493, "step": 288 }, { "epoch": 0.04640333975594091, "grad_norm": 0.2943185865879059, "learning_rate": 0.0001, "loss": 1.6838, "step": 289 }, { "epoch": 0.04656390494540784, "grad_norm": 0.2697433531284332, "learning_rate": 0.0001, "loss": 1.5281, "step": 290 }, { "epoch": 0.04672447013487476, "grad_norm": 0.28271886706352234, "learning_rate": 0.0001, "loss": 1.6035, "step": 291 }, { "epoch": 0.046885035324341684, "grad_norm": 0.2874354124069214, "learning_rate": 0.0001, "loss": 1.6086, "step": 292 }, { "epoch": 0.04704560051380861, "grad_norm": 0.29864948987960815, "learning_rate": 0.0001, "loss": 1.6596, "step": 293 }, { "epoch": 0.04720616570327553, "grad_norm": 0.3923736810684204, "learning_rate": 0.0001, "loss": 1.6065, "step": 294 }, { "epoch": 0.04736673089274245, "grad_norm": 0.2731698453426361, "learning_rate": 0.0001, "loss": 1.5518, "step": 295 }, { "epoch": 0.047527296082209375, "grad_norm": 0.2886182367801666, "learning_rate": 0.0001, "loss": 1.7004, "step": 296 }, { "epoch": 0.0476878612716763, "grad_norm": 0.26350656151771545, "learning_rate": 0.0001, "loss": 1.5923, "step": 297 }, { "epoch": 0.04784842646114323, "grad_norm": 0.263208270072937, "learning_rate": 0.0001, "loss": 1.6354, "step": 298 }, { "epoch": 0.04800899165061015, "grad_norm": 0.2772878408432007, "learning_rate": 0.0001, "loss": 1.7047, "step": 299 }, { "epoch": 0.04816955684007707, "grad_norm": 0.2817806899547577, "learning_rate": 0.0001, "loss": 1.7032, "step": 300 }, { "epoch": 0.048330122029543995, "grad_norm": 0.26130443811416626, "learning_rate": 0.0001, "loss": 1.5879, "step": 301 }, { "epoch": 0.04849068721901092, "grad_norm": 0.28344425559043884, "learning_rate": 0.0001, "loss": 1.6826, "step": 302 }, { "epoch": 0.04865125240847784, "grad_norm": 0.2728808522224426, "learning_rate": 0.0001, "loss": 1.6019, "step": 303 }, { "epoch": 0.048811817597944764, "grad_norm": 0.2807309031486511, "learning_rate": 0.0001, "loss": 1.5904, "step": 304 }, { "epoch": 0.048972382787411686, "grad_norm": 0.2673819959163666, "learning_rate": 0.0001, "loss": 1.6582, "step": 305 }, { "epoch": 0.049132947976878616, "grad_norm": 0.307579904794693, "learning_rate": 0.0001, "loss": 1.5884, "step": 306 }, { "epoch": 0.04929351316634554, "grad_norm": 0.2933973968029022, "learning_rate": 0.0001, "loss": 1.701, "step": 307 }, { "epoch": 0.04945407835581246, "grad_norm": 0.2675729990005493, "learning_rate": 0.0001, "loss": 1.6637, "step": 308 }, { "epoch": 0.049614643545279384, "grad_norm": 0.2698867917060852, "learning_rate": 0.0001, "loss": 1.6131, "step": 309 }, { "epoch": 0.04977520873474631, "grad_norm": 0.28606680035591125, "learning_rate": 0.0001, "loss": 1.6189, "step": 310 }, { "epoch": 0.04993577392421323, "grad_norm": 0.28758710622787476, "learning_rate": 0.0001, "loss": 1.7053, "step": 311 }, { "epoch": 0.05009633911368015, "grad_norm": 0.29196128249168396, "learning_rate": 0.0001, "loss": 1.6379, "step": 312 }, { "epoch": 0.050256904303147075, "grad_norm": 0.2753434479236603, "learning_rate": 0.0001, "loss": 1.6491, "step": 313 }, { "epoch": 0.050417469492614005, "grad_norm": 0.2925824224948883, "learning_rate": 0.0001, "loss": 1.6025, "step": 314 }, { "epoch": 0.05057803468208093, "grad_norm": 0.2825222313404083, "learning_rate": 0.0001, "loss": 1.6813, "step": 315 }, { "epoch": 0.05073859987154785, "grad_norm": 0.26458314061164856, "learning_rate": 0.0001, "loss": 1.6451, "step": 316 }, { "epoch": 0.05089916506101477, "grad_norm": 0.2626109719276428, "learning_rate": 0.0001, "loss": 1.6472, "step": 317 }, { "epoch": 0.051059730250481696, "grad_norm": 0.2748810052871704, "learning_rate": 0.0001, "loss": 1.6277, "step": 318 }, { "epoch": 0.05122029543994862, "grad_norm": 0.26177430152893066, "learning_rate": 0.0001, "loss": 1.6687, "step": 319 }, { "epoch": 0.05138086062941554, "grad_norm": 0.27573367953300476, "learning_rate": 0.0001, "loss": 1.6763, "step": 320 }, { "epoch": 0.051541425818882464, "grad_norm": 0.2771402895450592, "learning_rate": 0.0001, "loss": 1.6566, "step": 321 }, { "epoch": 0.05170199100834939, "grad_norm": 0.26614296436309814, "learning_rate": 0.0001, "loss": 1.6022, "step": 322 }, { "epoch": 0.051862556197816316, "grad_norm": 0.2790323495864868, "learning_rate": 0.0001, "loss": 1.5592, "step": 323 }, { "epoch": 0.05202312138728324, "grad_norm": 0.2867019474506378, "learning_rate": 0.0001, "loss": 1.5999, "step": 324 }, { "epoch": 0.05218368657675016, "grad_norm": 0.2921207845211029, "learning_rate": 0.0001, "loss": 1.598, "step": 325 }, { "epoch": 0.052344251766217084, "grad_norm": 0.2791954278945923, "learning_rate": 0.0001, "loss": 1.5413, "step": 326 }, { "epoch": 0.05250481695568401, "grad_norm": 0.29377323389053345, "learning_rate": 0.0001, "loss": 1.6703, "step": 327 }, { "epoch": 0.05266538214515093, "grad_norm": 0.2944749593734741, "learning_rate": 0.0001, "loss": 1.6845, "step": 328 }, { "epoch": 0.05282594733461785, "grad_norm": 0.2755807042121887, "learning_rate": 0.0001, "loss": 1.6008, "step": 329 }, { "epoch": 0.052986512524084775, "grad_norm": 0.26535558700561523, "learning_rate": 0.0001, "loss": 1.6392, "step": 330 }, { "epoch": 0.053147077713551705, "grad_norm": 0.2698773443698883, "learning_rate": 0.0001, "loss": 1.6352, "step": 331 }, { "epoch": 0.05330764290301863, "grad_norm": 0.2726115584373474, "learning_rate": 0.0001, "loss": 1.6676, "step": 332 }, { "epoch": 0.05346820809248555, "grad_norm": 1.7487785816192627, "learning_rate": 0.0001, "loss": 1.5882, "step": 333 }, { "epoch": 0.05362877328195247, "grad_norm": 0.29556626081466675, "learning_rate": 0.0001, "loss": 1.5875, "step": 334 }, { "epoch": 0.053789338471419396, "grad_norm": 0.28750649094581604, "learning_rate": 0.0001, "loss": 1.6948, "step": 335 }, { "epoch": 0.05394990366088632, "grad_norm": 0.26631468534469604, "learning_rate": 0.0001, "loss": 1.6341, "step": 336 }, { "epoch": 0.05411046885035324, "grad_norm": 0.262188583612442, "learning_rate": 0.0001, "loss": 1.6069, "step": 337 }, { "epoch": 0.054271034039820164, "grad_norm": 0.3178561329841614, "learning_rate": 0.0001, "loss": 1.6833, "step": 338 }, { "epoch": 0.05443159922928709, "grad_norm": 0.28462502360343933, "learning_rate": 0.0001, "loss": 1.6664, "step": 339 }, { "epoch": 0.054592164418754016, "grad_norm": 0.2982574701309204, "learning_rate": 0.0001, "loss": 1.5992, "step": 340 }, { "epoch": 0.05475272960822094, "grad_norm": 0.26741865277290344, "learning_rate": 0.0001, "loss": 1.5997, "step": 341 }, { "epoch": 0.05491329479768786, "grad_norm": 0.28718245029449463, "learning_rate": 0.0001, "loss": 1.5932, "step": 342 }, { "epoch": 0.055073859987154784, "grad_norm": 0.28356030583381653, "learning_rate": 0.0001, "loss": 1.6303, "step": 343 }, { "epoch": 0.05523442517662171, "grad_norm": 0.2817205786705017, "learning_rate": 0.0001, "loss": 1.7115, "step": 344 }, { "epoch": 0.05539499036608863, "grad_norm": 0.26058703660964966, "learning_rate": 0.0001, "loss": 1.5112, "step": 345 }, { "epoch": 0.05555555555555555, "grad_norm": 0.2597590386867523, "learning_rate": 0.0001, "loss": 1.6047, "step": 346 }, { "epoch": 0.05571612074502248, "grad_norm": 0.3029089868068695, "learning_rate": 0.0001, "loss": 1.6968, "step": 347 }, { "epoch": 0.055876685934489405, "grad_norm": 0.27384987473487854, "learning_rate": 0.0001, "loss": 1.5922, "step": 348 }, { "epoch": 0.05603725112395633, "grad_norm": 0.2676237225532532, "learning_rate": 0.0001, "loss": 1.6603, "step": 349 }, { "epoch": 0.05619781631342325, "grad_norm": 0.25814104080200195, "learning_rate": 0.0001, "loss": 1.6006, "step": 350 }, { "epoch": 0.05635838150289017, "grad_norm": 0.27472570538520813, "learning_rate": 0.0001, "loss": 1.6377, "step": 351 }, { "epoch": 0.056518946692357096, "grad_norm": 0.2804344892501831, "learning_rate": 0.0001, "loss": 1.6267, "step": 352 }, { "epoch": 0.05667951188182402, "grad_norm": 0.29046690464019775, "learning_rate": 0.0001, "loss": 1.6689, "step": 353 }, { "epoch": 0.05684007707129094, "grad_norm": 0.3239709436893463, "learning_rate": 0.0001, "loss": 1.6366, "step": 354 }, { "epoch": 0.05700064226075787, "grad_norm": 0.3020557463169098, "learning_rate": 0.0001, "loss": 1.5986, "step": 355 }, { "epoch": 0.057161207450224794, "grad_norm": 0.27851709723472595, "learning_rate": 0.0001, "loss": 1.587, "step": 356 }, { "epoch": 0.057321772639691716, "grad_norm": 0.2869919240474701, "learning_rate": 0.0001, "loss": 1.6423, "step": 357 }, { "epoch": 0.05748233782915864, "grad_norm": 0.3175436854362488, "learning_rate": 0.0001, "loss": 1.6393, "step": 358 }, { "epoch": 0.05764290301862556, "grad_norm": 0.2586315870285034, "learning_rate": 0.0001, "loss": 1.5907, "step": 359 }, { "epoch": 0.057803468208092484, "grad_norm": 0.28906723856925964, "learning_rate": 0.0001, "loss": 1.6697, "step": 360 }, { "epoch": 0.05796403339755941, "grad_norm": 0.27379968762397766, "learning_rate": 0.0001, "loss": 1.7089, "step": 361 }, { "epoch": 0.05812459858702633, "grad_norm": 0.2720244526863098, "learning_rate": 0.0001, "loss": 1.5185, "step": 362 }, { "epoch": 0.05828516377649326, "grad_norm": 0.26641982793807983, "learning_rate": 0.0001, "loss": 1.6971, "step": 363 }, { "epoch": 0.05844572896596018, "grad_norm": 0.29224488139152527, "learning_rate": 0.0001, "loss": 1.6498, "step": 364 }, { "epoch": 0.058606294155427105, "grad_norm": 0.27155160903930664, "learning_rate": 0.0001, "loss": 1.5972, "step": 365 }, { "epoch": 0.05876685934489403, "grad_norm": 0.2602423429489136, "learning_rate": 0.0001, "loss": 1.6307, "step": 366 }, { "epoch": 0.05892742453436095, "grad_norm": 0.24640707671642303, "learning_rate": 0.0001, "loss": 1.6352, "step": 367 }, { "epoch": 0.05908798972382787, "grad_norm": 0.28325408697128296, "learning_rate": 0.0001, "loss": 1.7033, "step": 368 }, { "epoch": 0.059248554913294796, "grad_norm": 0.2639716863632202, "learning_rate": 0.0001, "loss": 1.5925, "step": 369 }, { "epoch": 0.05940912010276172, "grad_norm": 0.287239134311676, "learning_rate": 0.0001, "loss": 1.6584, "step": 370 }, { "epoch": 0.05956968529222865, "grad_norm": 0.2728608548641205, "learning_rate": 0.0001, "loss": 1.6749, "step": 371 }, { "epoch": 0.05973025048169557, "grad_norm": 0.2690257132053375, "learning_rate": 0.0001, "loss": 1.688, "step": 372 }, { "epoch": 0.059890815671162494, "grad_norm": 0.26607444882392883, "learning_rate": 0.0001, "loss": 1.6221, "step": 373 }, { "epoch": 0.060051380860629416, "grad_norm": 0.25512611865997314, "learning_rate": 0.0001, "loss": 1.6003, "step": 374 }, { "epoch": 0.06021194605009634, "grad_norm": 0.2618836760520935, "learning_rate": 0.0001, "loss": 1.6317, "step": 375 }, { "epoch": 0.06037251123956326, "grad_norm": 0.25583288073539734, "learning_rate": 0.0001, "loss": 1.6169, "step": 376 }, { "epoch": 0.060533076429030185, "grad_norm": 0.278244286775589, "learning_rate": 0.0001, "loss": 1.6447, "step": 377 }, { "epoch": 0.06069364161849711, "grad_norm": 0.2562451660633087, "learning_rate": 0.0001, "loss": 1.6416, "step": 378 }, { "epoch": 0.06085420680796403, "grad_norm": 0.3050384223461151, "learning_rate": 0.0001, "loss": 1.6582, "step": 379 }, { "epoch": 0.06101477199743096, "grad_norm": 0.30344587564468384, "learning_rate": 0.0001, "loss": 1.5562, "step": 380 }, { "epoch": 0.06117533718689788, "grad_norm": 0.2712695002555847, "learning_rate": 0.0001, "loss": 1.59, "step": 381 }, { "epoch": 0.061335902376364805, "grad_norm": 0.27521881461143494, "learning_rate": 0.0001, "loss": 1.6101, "step": 382 }, { "epoch": 0.06149646756583173, "grad_norm": 0.26694977283477783, "learning_rate": 0.0001, "loss": 1.6551, "step": 383 }, { "epoch": 0.06165703275529865, "grad_norm": 0.3174509108066559, "learning_rate": 0.0001, "loss": 1.6294, "step": 384 }, { "epoch": 0.06181759794476557, "grad_norm": 0.26048147678375244, "learning_rate": 0.0001, "loss": 1.6333, "step": 385 }, { "epoch": 0.061978163134232496, "grad_norm": 0.2684613764286041, "learning_rate": 0.0001, "loss": 1.4829, "step": 386 }, { "epoch": 0.06213872832369942, "grad_norm": 0.2893260717391968, "learning_rate": 0.0001, "loss": 1.7229, "step": 387 }, { "epoch": 0.06229929351316635, "grad_norm": 0.2736869156360626, "learning_rate": 0.0001, "loss": 1.7301, "step": 388 }, { "epoch": 0.06245985870263327, "grad_norm": 0.2580438256263733, "learning_rate": 0.0001, "loss": 1.672, "step": 389 }, { "epoch": 0.0626204238921002, "grad_norm": 0.2694301903247833, "learning_rate": 0.0001, "loss": 1.6613, "step": 390 }, { "epoch": 0.06278098908156711, "grad_norm": 0.28036239743232727, "learning_rate": 0.0001, "loss": 1.639, "step": 391 }, { "epoch": 0.06294155427103404, "grad_norm": 0.273385226726532, "learning_rate": 0.0001, "loss": 1.616, "step": 392 }, { "epoch": 0.06310211946050097, "grad_norm": 0.27833032608032227, "learning_rate": 0.0001, "loss": 1.6453, "step": 393 }, { "epoch": 0.06326268464996788, "grad_norm": 0.27655139565467834, "learning_rate": 0.0001, "loss": 1.6777, "step": 394 }, { "epoch": 0.06342324983943481, "grad_norm": 0.24965526163578033, "learning_rate": 0.0001, "loss": 1.5135, "step": 395 }, { "epoch": 0.06358381502890173, "grad_norm": 0.28703129291534424, "learning_rate": 0.0001, "loss": 1.6204, "step": 396 }, { "epoch": 0.06374438021836866, "grad_norm": 0.2821877598762512, "learning_rate": 0.0001, "loss": 1.6637, "step": 397 }, { "epoch": 0.06390494540783558, "grad_norm": 0.26222217082977295, "learning_rate": 0.0001, "loss": 1.5248, "step": 398 }, { "epoch": 0.0640655105973025, "grad_norm": 0.2721595764160156, "learning_rate": 0.0001, "loss": 1.5602, "step": 399 }, { "epoch": 0.06422607578676943, "grad_norm": 0.270107626914978, "learning_rate": 0.0001, "loss": 1.6016, "step": 400 }, { "epoch": 0.06438664097623635, "grad_norm": 0.26544928550720215, "learning_rate": 0.0001, "loss": 1.5488, "step": 401 }, { "epoch": 0.06454720616570328, "grad_norm": 0.2908627390861511, "learning_rate": 0.0001, "loss": 1.6418, "step": 402 }, { "epoch": 0.0647077713551702, "grad_norm": 0.2766053080558777, "learning_rate": 0.0001, "loss": 1.6592, "step": 403 }, { "epoch": 0.06486833654463713, "grad_norm": 0.2724528908729553, "learning_rate": 0.0001, "loss": 1.5596, "step": 404 }, { "epoch": 0.06502890173410404, "grad_norm": 0.2631203830242157, "learning_rate": 0.0001, "loss": 1.5603, "step": 405 }, { "epoch": 0.06518946692357097, "grad_norm": 0.2576027512550354, "learning_rate": 0.0001, "loss": 1.5777, "step": 406 }, { "epoch": 0.06535003211303789, "grad_norm": 0.29033124446868896, "learning_rate": 0.0001, "loss": 1.6131, "step": 407 }, { "epoch": 0.06551059730250482, "grad_norm": 0.27214476466178894, "learning_rate": 0.0001, "loss": 1.6768, "step": 408 }, { "epoch": 0.06567116249197175, "grad_norm": 0.2475825399160385, "learning_rate": 0.0001, "loss": 1.5325, "step": 409 }, { "epoch": 0.06583172768143866, "grad_norm": 0.2732248902320862, "learning_rate": 0.0001, "loss": 1.6699, "step": 410 }, { "epoch": 0.06599229287090559, "grad_norm": 0.2896178364753723, "learning_rate": 0.0001, "loss": 1.7157, "step": 411 }, { "epoch": 0.06615285806037251, "grad_norm": 0.27780890464782715, "learning_rate": 0.0001, "loss": 1.6669, "step": 412 }, { "epoch": 0.06631342324983944, "grad_norm": 0.26925742626190186, "learning_rate": 0.0001, "loss": 1.7434, "step": 413 }, { "epoch": 0.06647398843930635, "grad_norm": 0.2978026866912842, "learning_rate": 0.0001, "loss": 1.6731, "step": 414 }, { "epoch": 0.06663455362877328, "grad_norm": 0.26644179224967957, "learning_rate": 0.0001, "loss": 1.6674, "step": 415 }, { "epoch": 0.06679511881824021, "grad_norm": 0.2594255208969116, "learning_rate": 0.0001, "loss": 1.6407, "step": 416 }, { "epoch": 0.06695568400770713, "grad_norm": 0.29270321130752563, "learning_rate": 0.0001, "loss": 1.6436, "step": 417 }, { "epoch": 0.06711624919717406, "grad_norm": 0.2654949426651001, "learning_rate": 0.0001, "loss": 1.6121, "step": 418 }, { "epoch": 0.06727681438664097, "grad_norm": 0.2827872633934021, "learning_rate": 0.0001, "loss": 1.6427, "step": 419 }, { "epoch": 0.0674373795761079, "grad_norm": 0.2690976560115814, "learning_rate": 0.0001, "loss": 1.569, "step": 420 }, { "epoch": 0.06759794476557482, "grad_norm": 0.269963800907135, "learning_rate": 0.0001, "loss": 1.5509, "step": 421 }, { "epoch": 0.06775850995504175, "grad_norm": 0.2637880742549896, "learning_rate": 0.0001, "loss": 1.6754, "step": 422 }, { "epoch": 0.06791907514450866, "grad_norm": 0.28625962138175964, "learning_rate": 0.0001, "loss": 1.6278, "step": 423 }, { "epoch": 0.0680796403339756, "grad_norm": 0.2607976496219635, "learning_rate": 0.0001, "loss": 1.5234, "step": 424 }, { "epoch": 0.06824020552344252, "grad_norm": 0.2630302608013153, "learning_rate": 0.0001, "loss": 1.6054, "step": 425 }, { "epoch": 0.06840077071290944, "grad_norm": 0.26833876967430115, "learning_rate": 0.0001, "loss": 1.6376, "step": 426 }, { "epoch": 0.06856133590237637, "grad_norm": 0.2762647867202759, "learning_rate": 0.0001, "loss": 1.6134, "step": 427 }, { "epoch": 0.06872190109184328, "grad_norm": 0.27359503507614136, "learning_rate": 0.0001, "loss": 1.5636, "step": 428 }, { "epoch": 0.06888246628131021, "grad_norm": 0.24629630148410797, "learning_rate": 0.0001, "loss": 1.5002, "step": 429 }, { "epoch": 0.06904303147077713, "grad_norm": 0.2808135151863098, "learning_rate": 0.0001, "loss": 1.6491, "step": 430 }, { "epoch": 0.06920359666024406, "grad_norm": 0.2696678936481476, "learning_rate": 0.0001, "loss": 1.6919, "step": 431 }, { "epoch": 0.06936416184971098, "grad_norm": 0.31605294346809387, "learning_rate": 0.0001, "loss": 1.6632, "step": 432 }, { "epoch": 0.0695247270391779, "grad_norm": 0.2688290476799011, "learning_rate": 0.0001, "loss": 1.5792, "step": 433 }, { "epoch": 0.06968529222864484, "grad_norm": 0.2849433124065399, "learning_rate": 0.0001, "loss": 1.5376, "step": 434 }, { "epoch": 0.06984585741811175, "grad_norm": 0.26482313871383667, "learning_rate": 0.0001, "loss": 1.6478, "step": 435 }, { "epoch": 0.07000642260757868, "grad_norm": 0.2657308876514435, "learning_rate": 0.0001, "loss": 1.59, "step": 436 }, { "epoch": 0.0701669877970456, "grad_norm": 0.28563132882118225, "learning_rate": 0.0001, "loss": 1.6054, "step": 437 }, { "epoch": 0.07032755298651253, "grad_norm": 0.27213236689567566, "learning_rate": 0.0001, "loss": 1.5857, "step": 438 }, { "epoch": 0.07048811817597944, "grad_norm": 0.26817741990089417, "learning_rate": 0.0001, "loss": 1.5646, "step": 439 }, { "epoch": 0.07064868336544637, "grad_norm": 0.28620007634162903, "learning_rate": 0.0001, "loss": 1.6234, "step": 440 }, { "epoch": 0.0708092485549133, "grad_norm": 0.2723342180252075, "learning_rate": 0.0001, "loss": 1.6453, "step": 441 }, { "epoch": 0.07096981374438022, "grad_norm": 0.30084824562072754, "learning_rate": 0.0001, "loss": 1.5685, "step": 442 }, { "epoch": 0.07113037893384715, "grad_norm": 0.2717418968677521, "learning_rate": 0.0001, "loss": 1.5707, "step": 443 }, { "epoch": 0.07129094412331406, "grad_norm": 0.2876720130443573, "learning_rate": 0.0001, "loss": 1.5972, "step": 444 }, { "epoch": 0.07145150931278099, "grad_norm": 0.2772391140460968, "learning_rate": 0.0001, "loss": 1.5141, "step": 445 }, { "epoch": 0.07161207450224791, "grad_norm": 0.3008630871772766, "learning_rate": 0.0001, "loss": 1.6756, "step": 446 }, { "epoch": 0.07177263969171484, "grad_norm": 0.29428958892822266, "learning_rate": 0.0001, "loss": 1.7047, "step": 447 }, { "epoch": 0.07193320488118175, "grad_norm": 0.28210246562957764, "learning_rate": 0.0001, "loss": 1.5358, "step": 448 }, { "epoch": 0.07209377007064868, "grad_norm": 0.26412245631217957, "learning_rate": 0.0001, "loss": 1.6833, "step": 449 }, { "epoch": 0.07225433526011561, "grad_norm": 0.322248637676239, "learning_rate": 0.0001, "loss": 1.6587, "step": 450 }, { "epoch": 0.07241490044958253, "grad_norm": 0.2715447247028351, "learning_rate": 0.0001, "loss": 1.651, "step": 451 }, { "epoch": 0.07257546563904946, "grad_norm": 0.28355979919433594, "learning_rate": 0.0001, "loss": 1.6032, "step": 452 }, { "epoch": 0.07273603082851637, "grad_norm": 0.28326234221458435, "learning_rate": 0.0001, "loss": 1.536, "step": 453 }, { "epoch": 0.0728965960179833, "grad_norm": 0.27278533577919006, "learning_rate": 0.0001, "loss": 1.597, "step": 454 }, { "epoch": 0.07305716120745022, "grad_norm": 1.5439703464508057, "learning_rate": 0.0001, "loss": 1.597, "step": 455 }, { "epoch": 0.07321772639691715, "grad_norm": 0.2848699986934662, "learning_rate": 0.0001, "loss": 1.6438, "step": 456 }, { "epoch": 0.07337829158638408, "grad_norm": 0.27211204171180725, "learning_rate": 0.0001, "loss": 1.6598, "step": 457 }, { "epoch": 0.073538856775851, "grad_norm": 0.2694942355155945, "learning_rate": 0.0001, "loss": 1.5787, "step": 458 }, { "epoch": 0.07369942196531792, "grad_norm": 0.24894948303699493, "learning_rate": 0.0001, "loss": 1.5849, "step": 459 }, { "epoch": 0.07385998715478484, "grad_norm": 0.2886010408401489, "learning_rate": 0.0001, "loss": 1.6779, "step": 460 }, { "epoch": 0.07402055234425177, "grad_norm": 0.25323522090911865, "learning_rate": 0.0001, "loss": 1.613, "step": 461 }, { "epoch": 0.07418111753371869, "grad_norm": 0.2417057752609253, "learning_rate": 0.0001, "loss": 1.5457, "step": 462 }, { "epoch": 0.07434168272318561, "grad_norm": 0.25295180082321167, "learning_rate": 0.0001, "loss": 1.5663, "step": 463 }, { "epoch": 0.07450224791265253, "grad_norm": 0.27051475644111633, "learning_rate": 0.0001, "loss": 1.5979, "step": 464 }, { "epoch": 0.07466281310211946, "grad_norm": 0.26878947019577026, "learning_rate": 0.0001, "loss": 1.6463, "step": 465 }, { "epoch": 0.07482337829158639, "grad_norm": 0.25430378317832947, "learning_rate": 0.0001, "loss": 1.5731, "step": 466 }, { "epoch": 0.0749839434810533, "grad_norm": 0.27556899189949036, "learning_rate": 0.0001, "loss": 1.6599, "step": 467 }, { "epoch": 0.07514450867052024, "grad_norm": 0.28547075390815735, "learning_rate": 0.0001, "loss": 1.6284, "step": 468 }, { "epoch": 0.07530507385998715, "grad_norm": 0.2858209013938904, "learning_rate": 0.0001, "loss": 1.6093, "step": 469 }, { "epoch": 0.07546563904945408, "grad_norm": 0.2524562180042267, "learning_rate": 0.0001, "loss": 1.5948, "step": 470 }, { "epoch": 0.075626204238921, "grad_norm": 0.26532092690467834, "learning_rate": 0.0001, "loss": 1.5673, "step": 471 }, { "epoch": 0.07578676942838793, "grad_norm": 0.2942146062850952, "learning_rate": 0.0001, "loss": 1.6223, "step": 472 }, { "epoch": 0.07594733461785486, "grad_norm": 0.26330721378326416, "learning_rate": 0.0001, "loss": 1.6887, "step": 473 }, { "epoch": 0.07610789980732177, "grad_norm": 0.29492637515068054, "learning_rate": 0.0001, "loss": 1.6618, "step": 474 }, { "epoch": 0.0762684649967887, "grad_norm": 0.28351297974586487, "learning_rate": 0.0001, "loss": 1.5956, "step": 475 }, { "epoch": 0.07642903018625562, "grad_norm": 0.2659725546836853, "learning_rate": 0.0001, "loss": 1.562, "step": 476 }, { "epoch": 0.07658959537572255, "grad_norm": 0.2990523874759674, "learning_rate": 0.0001, "loss": 1.6288, "step": 477 }, { "epoch": 0.07675016056518946, "grad_norm": 0.26317718625068665, "learning_rate": 0.0001, "loss": 1.6535, "step": 478 }, { "epoch": 0.07691072575465639, "grad_norm": 0.3027506470680237, "learning_rate": 0.0001, "loss": 1.6961, "step": 479 }, { "epoch": 0.07707129094412331, "grad_norm": 0.24868576228618622, "learning_rate": 0.0001, "loss": 1.5759, "step": 480 }, { "epoch": 0.07723185613359024, "grad_norm": 0.27065804600715637, "learning_rate": 0.0001, "loss": 1.6023, "step": 481 }, { "epoch": 0.07739242132305717, "grad_norm": 0.30835163593292236, "learning_rate": 0.0001, "loss": 1.6865, "step": 482 }, { "epoch": 0.07755298651252408, "grad_norm": 0.265004962682724, "learning_rate": 0.0001, "loss": 1.6617, "step": 483 }, { "epoch": 0.07771355170199101, "grad_norm": 0.2614341974258423, "learning_rate": 0.0001, "loss": 1.6596, "step": 484 }, { "epoch": 0.07787411689145793, "grad_norm": 0.25203412771224976, "learning_rate": 0.0001, "loss": 1.5894, "step": 485 }, { "epoch": 0.07803468208092486, "grad_norm": 0.28151461482048035, "learning_rate": 0.0001, "loss": 1.703, "step": 486 }, { "epoch": 0.07819524727039177, "grad_norm": 0.2849961817264557, "learning_rate": 0.0001, "loss": 1.633, "step": 487 }, { "epoch": 0.0783558124598587, "grad_norm": 0.25807830691337585, "learning_rate": 0.0001, "loss": 1.5597, "step": 488 }, { "epoch": 0.07851637764932562, "grad_norm": 0.26951682567596436, "learning_rate": 0.0001, "loss": 1.6252, "step": 489 }, { "epoch": 0.07867694283879255, "grad_norm": 0.27103808522224426, "learning_rate": 0.0001, "loss": 1.652, "step": 490 }, { "epoch": 0.07883750802825948, "grad_norm": 0.2701008915901184, "learning_rate": 0.0001, "loss": 1.6301, "step": 491 }, { "epoch": 0.0789980732177264, "grad_norm": 0.2753893733024597, "learning_rate": 0.0001, "loss": 1.564, "step": 492 }, { "epoch": 0.07915863840719332, "grad_norm": 0.27593153715133667, "learning_rate": 0.0001, "loss": 1.7073, "step": 493 }, { "epoch": 0.07931920359666024, "grad_norm": 0.25339603424072266, "learning_rate": 0.0001, "loss": 1.5699, "step": 494 }, { "epoch": 0.07947976878612717, "grad_norm": 0.2900751233100891, "learning_rate": 0.0001, "loss": 1.6138, "step": 495 }, { "epoch": 0.07964033397559409, "grad_norm": 0.2772659361362457, "learning_rate": 0.0001, "loss": 1.6653, "step": 496 }, { "epoch": 0.07980089916506101, "grad_norm": 0.28388726711273193, "learning_rate": 0.0001, "loss": 1.6259, "step": 497 }, { "epoch": 0.07996146435452794, "grad_norm": 0.2737922966480255, "learning_rate": 0.0001, "loss": 1.594, "step": 498 }, { "epoch": 0.08012202954399486, "grad_norm": 0.27199098467826843, "learning_rate": 0.0001, "loss": 1.5972, "step": 499 }, { "epoch": 0.08028259473346179, "grad_norm": 0.29927489161491394, "learning_rate": 0.0001, "loss": 1.5758, "step": 500 }, { "epoch": 0.0804431599229287, "grad_norm": 60.146236419677734, "learning_rate": 0.0001, "loss": 2.0838, "step": 501 }, { "epoch": 0.08060372511239564, "grad_norm": 0.28392574191093445, "learning_rate": 0.0001, "loss": 1.6233, "step": 502 }, { "epoch": 0.08076429030186255, "grad_norm": 0.2903461456298828, "learning_rate": 0.0001, "loss": 1.7171, "step": 503 }, { "epoch": 0.08092485549132948, "grad_norm": 0.263322651386261, "learning_rate": 0.0001, "loss": 1.5696, "step": 504 }, { "epoch": 0.0810854206807964, "grad_norm": 0.2876776158809662, "learning_rate": 0.0001, "loss": 1.6347, "step": 505 }, { "epoch": 0.08124598587026333, "grad_norm": 0.28394991159439087, "learning_rate": 0.0001, "loss": 1.6217, "step": 506 }, { "epoch": 0.08140655105973026, "grad_norm": 0.26244935393333435, "learning_rate": 0.0001, "loss": 1.5834, "step": 507 }, { "epoch": 0.08156711624919717, "grad_norm": 0.25742456316947937, "learning_rate": 0.0001, "loss": 1.5667, "step": 508 }, { "epoch": 0.0817276814386641, "grad_norm": 0.26109588146209717, "learning_rate": 0.0001, "loss": 1.6388, "step": 509 }, { "epoch": 0.08188824662813102, "grad_norm": 0.2430950254201889, "learning_rate": 0.0001, "loss": 1.5202, "step": 510 }, { "epoch": 0.08204881181759795, "grad_norm": 0.2901354730129242, "learning_rate": 0.0001, "loss": 1.6619, "step": 511 }, { "epoch": 0.08220937700706486, "grad_norm": 0.2899554669857025, "learning_rate": 0.0001, "loss": 1.5867, "step": 512 }, { "epoch": 0.08236994219653179, "grad_norm": 0.2571408748626709, "learning_rate": 0.0001, "loss": 1.619, "step": 513 }, { "epoch": 0.08253050738599872, "grad_norm": 0.28769806027412415, "learning_rate": 0.0001, "loss": 1.6135, "step": 514 }, { "epoch": 0.08269107257546564, "grad_norm": 0.2652043104171753, "learning_rate": 0.0001, "loss": 1.559, "step": 515 }, { "epoch": 0.08285163776493257, "grad_norm": 0.2747846245765686, "learning_rate": 0.0001, "loss": 1.5978, "step": 516 }, { "epoch": 0.08301220295439948, "grad_norm": 0.2706848680973053, "learning_rate": 0.0001, "loss": 1.6333, "step": 517 }, { "epoch": 0.08317276814386641, "grad_norm": 0.28645724058151245, "learning_rate": 0.0001, "loss": 1.6845, "step": 518 }, { "epoch": 0.08333333333333333, "grad_norm": 0.2895065248012543, "learning_rate": 0.0001, "loss": 1.6928, "step": 519 }, { "epoch": 0.08349389852280026, "grad_norm": 0.2539268434047699, "learning_rate": 0.0001, "loss": 1.5785, "step": 520 }, { "epoch": 0.08365446371226717, "grad_norm": 0.2594113051891327, "learning_rate": 0.0001, "loss": 1.6307, "step": 521 }, { "epoch": 0.0838150289017341, "grad_norm": 0.26727333664894104, "learning_rate": 0.0001, "loss": 1.5499, "step": 522 }, { "epoch": 0.08397559409120103, "grad_norm": 0.258354514837265, "learning_rate": 0.0001, "loss": 1.5713, "step": 523 }, { "epoch": 0.08413615928066795, "grad_norm": 0.2712153196334839, "learning_rate": 0.0001, "loss": 1.6509, "step": 524 }, { "epoch": 0.08429672447013488, "grad_norm": 0.2564713656902313, "learning_rate": 0.0001, "loss": 1.6349, "step": 525 }, { "epoch": 0.0844572896596018, "grad_norm": 0.26457738876342773, "learning_rate": 0.0001, "loss": 1.6169, "step": 526 }, { "epoch": 0.08461785484906872, "grad_norm": 0.25640881061553955, "learning_rate": 0.0001, "loss": 1.5795, "step": 527 }, { "epoch": 0.08477842003853564, "grad_norm": 0.25883811712265015, "learning_rate": 0.0001, "loss": 1.6041, "step": 528 }, { "epoch": 0.08493898522800257, "grad_norm": 0.26522043347358704, "learning_rate": 0.0001, "loss": 1.6413, "step": 529 }, { "epoch": 0.0850995504174695, "grad_norm": 0.27930450439453125, "learning_rate": 0.0001, "loss": 1.6209, "step": 530 }, { "epoch": 0.08526011560693642, "grad_norm": 0.2752113342285156, "learning_rate": 0.0001, "loss": 1.5664, "step": 531 }, { "epoch": 0.08542068079640334, "grad_norm": 0.2911113202571869, "learning_rate": 0.0001, "loss": 1.6011, "step": 532 }, { "epoch": 0.08558124598587026, "grad_norm": 0.2742812931537628, "learning_rate": 0.0001, "loss": 1.7042, "step": 533 }, { "epoch": 0.08574181117533719, "grad_norm": 0.3127116560935974, "learning_rate": 0.0001, "loss": 1.5496, "step": 534 }, { "epoch": 0.0859023763648041, "grad_norm": 0.3009921908378601, "learning_rate": 0.0001, "loss": 1.6395, "step": 535 }, { "epoch": 0.08606294155427104, "grad_norm": 0.2412799447774887, "learning_rate": 0.0001, "loss": 1.4681, "step": 536 }, { "epoch": 0.08622350674373795, "grad_norm": 0.2689095735549927, "learning_rate": 0.0001, "loss": 1.5181, "step": 537 }, { "epoch": 0.08638407193320488, "grad_norm": 0.253313273191452, "learning_rate": 0.0001, "loss": 1.6089, "step": 538 }, { "epoch": 0.08654463712267181, "grad_norm": 0.2606997489929199, "learning_rate": 0.0001, "loss": 1.5802, "step": 539 }, { "epoch": 0.08670520231213873, "grad_norm": 0.2673456370830536, "learning_rate": 0.0001, "loss": 1.5876, "step": 540 }, { "epoch": 0.08686576750160566, "grad_norm": 0.28409141302108765, "learning_rate": 0.0001, "loss": 1.6098, "step": 541 }, { "epoch": 0.08702633269107257, "grad_norm": 0.2831609845161438, "learning_rate": 0.0001, "loss": 1.6459, "step": 542 }, { "epoch": 0.0871868978805395, "grad_norm": 0.27968084812164307, "learning_rate": 0.0001, "loss": 1.6794, "step": 543 }, { "epoch": 0.08734746307000642, "grad_norm": 0.3048776388168335, "learning_rate": 0.0001, "loss": 1.7141, "step": 544 }, { "epoch": 0.08750802825947335, "grad_norm": 0.24242153763771057, "learning_rate": 0.0001, "loss": 1.5411, "step": 545 }, { "epoch": 0.08766859344894026, "grad_norm": 0.2752143442630768, "learning_rate": 0.0001, "loss": 1.5587, "step": 546 }, { "epoch": 0.08782915863840719, "grad_norm": 0.25544866919517517, "learning_rate": 0.0001, "loss": 1.5802, "step": 547 }, { "epoch": 0.08798972382787412, "grad_norm": 0.2552921175956726, "learning_rate": 0.0001, "loss": 1.5834, "step": 548 }, { "epoch": 0.08815028901734104, "grad_norm": 0.28750985860824585, "learning_rate": 0.0001, "loss": 1.6387, "step": 549 }, { "epoch": 0.08831085420680797, "grad_norm": 0.26201680302619934, "learning_rate": 0.0001, "loss": 1.677, "step": 550 }, { "epoch": 0.08847141939627488, "grad_norm": 0.2960847020149231, "learning_rate": 0.0001, "loss": 1.6203, "step": 551 }, { "epoch": 0.08863198458574181, "grad_norm": 0.27084705233573914, "learning_rate": 0.0001, "loss": 1.6122, "step": 552 }, { "epoch": 0.08879254977520873, "grad_norm": 0.2630038261413574, "learning_rate": 0.0001, "loss": 1.6549, "step": 553 }, { "epoch": 0.08895311496467566, "grad_norm": 0.26962539553642273, "learning_rate": 0.0001, "loss": 1.6445, "step": 554 }, { "epoch": 0.08911368015414259, "grad_norm": 0.2721734642982483, "learning_rate": 0.0001, "loss": 1.6049, "step": 555 }, { "epoch": 0.0892742453436095, "grad_norm": 0.25505608320236206, "learning_rate": 0.0001, "loss": 1.5966, "step": 556 }, { "epoch": 0.08943481053307643, "grad_norm": 0.2604336142539978, "learning_rate": 0.0001, "loss": 1.6185, "step": 557 }, { "epoch": 0.08959537572254335, "grad_norm": 0.2705906629562378, "learning_rate": 0.0001, "loss": 1.5978, "step": 558 }, { "epoch": 0.08975594091201028, "grad_norm": 0.26772817969322205, "learning_rate": 0.0001, "loss": 1.5895, "step": 559 }, { "epoch": 0.0899165061014772, "grad_norm": 0.28454136848449707, "learning_rate": 0.0001, "loss": 1.6466, "step": 560 }, { "epoch": 0.09007707129094412, "grad_norm": 0.27886369824409485, "learning_rate": 0.0001, "loss": 1.6432, "step": 561 }, { "epoch": 0.09023763648041104, "grad_norm": 0.2522517144680023, "learning_rate": 0.0001, "loss": 1.5409, "step": 562 }, { "epoch": 0.09039820166987797, "grad_norm": 0.31142204999923706, "learning_rate": 0.0001, "loss": 1.6552, "step": 563 }, { "epoch": 0.0905587668593449, "grad_norm": 0.26340827345848083, "learning_rate": 0.0001, "loss": 1.7059, "step": 564 }, { "epoch": 0.09071933204881182, "grad_norm": 0.2656070291996002, "learning_rate": 0.0001, "loss": 1.5082, "step": 565 }, { "epoch": 0.09087989723827875, "grad_norm": 0.2772354483604431, "learning_rate": 0.0001, "loss": 1.5607, "step": 566 }, { "epoch": 0.09104046242774566, "grad_norm": 0.2598869502544403, "learning_rate": 0.0001, "loss": 1.6182, "step": 567 }, { "epoch": 0.09120102761721259, "grad_norm": 0.27644407749176025, "learning_rate": 0.0001, "loss": 1.6045, "step": 568 }, { "epoch": 0.0913615928066795, "grad_norm": 2.638380289077759, "learning_rate": 0.0001, "loss": 1.5733, "step": 569 }, { "epoch": 0.09152215799614644, "grad_norm": 0.2717907130718231, "learning_rate": 0.0001, "loss": 1.546, "step": 570 }, { "epoch": 0.09168272318561337, "grad_norm": 0.26251983642578125, "learning_rate": 0.0001, "loss": 1.6072, "step": 571 }, { "epoch": 0.09184328837508028, "grad_norm": 0.2579016089439392, "learning_rate": 0.0001, "loss": 1.5423, "step": 572 }, { "epoch": 0.09200385356454721, "grad_norm": 0.2622748017311096, "learning_rate": 0.0001, "loss": 1.6253, "step": 573 }, { "epoch": 0.09216441875401413, "grad_norm": 0.2696703374385834, "learning_rate": 0.0001, "loss": 1.6311, "step": 574 }, { "epoch": 0.09232498394348106, "grad_norm": 0.2684510052204132, "learning_rate": 0.0001, "loss": 1.5872, "step": 575 }, { "epoch": 0.09248554913294797, "grad_norm": 0.2625136375427246, "learning_rate": 0.0001, "loss": 1.5734, "step": 576 }, { "epoch": 0.0926461143224149, "grad_norm": 0.2556249499320984, "learning_rate": 0.0001, "loss": 1.5753, "step": 577 }, { "epoch": 0.09280667951188182, "grad_norm": 0.28377729654312134, "learning_rate": 0.0001, "loss": 1.6421, "step": 578 }, { "epoch": 0.09296724470134875, "grad_norm": 0.2608239948749542, "learning_rate": 0.0001, "loss": 1.6478, "step": 579 }, { "epoch": 0.09312780989081568, "grad_norm": 0.263975590467453, "learning_rate": 0.0001, "loss": 1.628, "step": 580 }, { "epoch": 0.09328837508028259, "grad_norm": 0.2537274956703186, "learning_rate": 0.0001, "loss": 1.6145, "step": 581 }, { "epoch": 0.09344894026974952, "grad_norm": 0.25347456336021423, "learning_rate": 0.0001, "loss": 1.5722, "step": 582 }, { "epoch": 0.09360950545921644, "grad_norm": 0.3010750710964203, "learning_rate": 0.0001, "loss": 1.5706, "step": 583 }, { "epoch": 0.09377007064868337, "grad_norm": 0.2591670751571655, "learning_rate": 0.0001, "loss": 1.5847, "step": 584 }, { "epoch": 0.09393063583815028, "grad_norm": 0.26924920082092285, "learning_rate": 0.0001, "loss": 1.577, "step": 585 }, { "epoch": 0.09409120102761721, "grad_norm": 0.275130033493042, "learning_rate": 0.0001, "loss": 1.6624, "step": 586 }, { "epoch": 0.09425176621708414, "grad_norm": 0.25509998202323914, "learning_rate": 0.0001, "loss": 1.6138, "step": 587 }, { "epoch": 0.09441233140655106, "grad_norm": 0.2571599781513214, "learning_rate": 0.0001, "loss": 1.583, "step": 588 }, { "epoch": 0.09457289659601799, "grad_norm": 0.25363945960998535, "learning_rate": 0.0001, "loss": 1.6196, "step": 589 }, { "epoch": 0.0947334617854849, "grad_norm": 0.2683764100074768, "learning_rate": 0.0001, "loss": 1.6649, "step": 590 }, { "epoch": 0.09489402697495183, "grad_norm": 0.27039188146591187, "learning_rate": 0.0001, "loss": 1.6218, "step": 591 }, { "epoch": 0.09505459216441875, "grad_norm": 0.29481080174446106, "learning_rate": 0.0001, "loss": 1.6723, "step": 592 }, { "epoch": 0.09521515735388568, "grad_norm": 0.2520233988761902, "learning_rate": 0.0001, "loss": 1.5608, "step": 593 }, { "epoch": 0.0953757225433526, "grad_norm": 0.286181777715683, "learning_rate": 0.0001, "loss": 1.6259, "step": 594 }, { "epoch": 0.09553628773281952, "grad_norm": 0.2637265920639038, "learning_rate": 0.0001, "loss": 1.6169, "step": 595 }, { "epoch": 0.09569685292228645, "grad_norm": 0.2822468876838684, "learning_rate": 0.0001, "loss": 1.6582, "step": 596 }, { "epoch": 0.09585741811175337, "grad_norm": 0.24705122411251068, "learning_rate": 0.0001, "loss": 1.6213, "step": 597 }, { "epoch": 0.0960179833012203, "grad_norm": 0.26160547137260437, "learning_rate": 0.0001, "loss": 1.6719, "step": 598 }, { "epoch": 0.09617854849068722, "grad_norm": 0.25721287727355957, "learning_rate": 0.0001, "loss": 1.63, "step": 599 }, { "epoch": 0.09633911368015415, "grad_norm": 0.26050350069999695, "learning_rate": 0.0001, "loss": 1.6177, "step": 600 }, { "epoch": 0.09649967886962106, "grad_norm": 0.27546998858451843, "learning_rate": 0.0001, "loss": 1.6334, "step": 601 }, { "epoch": 0.09666024405908799, "grad_norm": 0.256801962852478, "learning_rate": 0.0001, "loss": 1.5526, "step": 602 }, { "epoch": 0.0968208092485549, "grad_norm": 0.2656329870223999, "learning_rate": 0.0001, "loss": 1.6107, "step": 603 }, { "epoch": 0.09698137443802184, "grad_norm": 0.26477161049842834, "learning_rate": 0.0001, "loss": 1.6383, "step": 604 }, { "epoch": 0.09714193962748877, "grad_norm": 0.2643807530403137, "learning_rate": 0.0001, "loss": 1.6143, "step": 605 }, { "epoch": 0.09730250481695568, "grad_norm": 0.25192004442214966, "learning_rate": 0.0001, "loss": 1.5833, "step": 606 }, { "epoch": 0.09746307000642261, "grad_norm": 0.2630442678928375, "learning_rate": 0.0001, "loss": 1.6459, "step": 607 }, { "epoch": 0.09762363519588953, "grad_norm": 0.258197158575058, "learning_rate": 0.0001, "loss": 1.5996, "step": 608 }, { "epoch": 0.09778420038535646, "grad_norm": 0.26932215690612793, "learning_rate": 0.0001, "loss": 1.6144, "step": 609 }, { "epoch": 0.09794476557482337, "grad_norm": 0.28247031569480896, "learning_rate": 0.0001, "loss": 1.5781, "step": 610 }, { "epoch": 0.0981053307642903, "grad_norm": 0.268706738948822, "learning_rate": 0.0001, "loss": 1.618, "step": 611 }, { "epoch": 0.09826589595375723, "grad_norm": 0.2516535222530365, "learning_rate": 0.0001, "loss": 1.561, "step": 612 }, { "epoch": 0.09842646114322415, "grad_norm": 0.2579093873500824, "learning_rate": 0.0001, "loss": 1.6603, "step": 613 }, { "epoch": 0.09858702633269108, "grad_norm": 0.2705880105495453, "learning_rate": 0.0001, "loss": 1.5977, "step": 614 }, { "epoch": 0.098747591522158, "grad_norm": 0.28428101539611816, "learning_rate": 0.0001, "loss": 1.6044, "step": 615 }, { "epoch": 0.09890815671162492, "grad_norm": 0.26495474576950073, "learning_rate": 0.0001, "loss": 1.6648, "step": 616 }, { "epoch": 0.09906872190109184, "grad_norm": 0.2962563633918762, "learning_rate": 0.0001, "loss": 1.6269, "step": 617 }, { "epoch": 0.09922928709055877, "grad_norm": 0.26589927077293396, "learning_rate": 0.0001, "loss": 1.6277, "step": 618 }, { "epoch": 0.09938985228002568, "grad_norm": 0.2665221691131592, "learning_rate": 0.0001, "loss": 1.5782, "step": 619 }, { "epoch": 0.09955041746949261, "grad_norm": 0.24735838174819946, "learning_rate": 0.0001, "loss": 1.5841, "step": 620 }, { "epoch": 0.09971098265895954, "grad_norm": 0.25282835960388184, "learning_rate": 0.0001, "loss": 1.5395, "step": 621 }, { "epoch": 0.09987154784842646, "grad_norm": 0.27968940138816833, "learning_rate": 0.0001, "loss": 1.6733, "step": 622 }, { "epoch": 0.10003211303789339, "grad_norm": 0.25760510563850403, "learning_rate": 0.0001, "loss": 1.5227, "step": 623 }, { "epoch": 0.1001926782273603, "grad_norm": 0.2664651572704315, "learning_rate": 0.0001, "loss": 1.5838, "step": 624 }, { "epoch": 0.10035324341682723, "grad_norm": 0.2841992974281311, "learning_rate": 0.0001, "loss": 1.6479, "step": 625 }, { "epoch": 0.10051380860629415, "grad_norm": 0.2621322274208069, "learning_rate": 0.0001, "loss": 1.5879, "step": 626 }, { "epoch": 0.10067437379576108, "grad_norm": 0.27068841457366943, "learning_rate": 0.0001, "loss": 1.5974, "step": 627 }, { "epoch": 0.10083493898522801, "grad_norm": 0.30539098381996155, "learning_rate": 0.0001, "loss": 1.6288, "step": 628 }, { "epoch": 0.10099550417469493, "grad_norm": 0.24707889556884766, "learning_rate": 0.0001, "loss": 1.6518, "step": 629 }, { "epoch": 0.10115606936416185, "grad_norm": 0.26346689462661743, "learning_rate": 0.0001, "loss": 1.6062, "step": 630 }, { "epoch": 0.10131663455362877, "grad_norm": 0.2612040042877197, "learning_rate": 0.0001, "loss": 1.5447, "step": 631 }, { "epoch": 0.1014771997430957, "grad_norm": 0.2816939055919647, "learning_rate": 0.0001, "loss": 1.6229, "step": 632 }, { "epoch": 0.10163776493256262, "grad_norm": 0.27160507440567017, "learning_rate": 0.0001, "loss": 1.6842, "step": 633 }, { "epoch": 0.10179833012202955, "grad_norm": 0.25368353724479675, "learning_rate": 0.0001, "loss": 1.5702, "step": 634 }, { "epoch": 0.10195889531149646, "grad_norm": 0.26571977138519287, "learning_rate": 0.0001, "loss": 1.633, "step": 635 }, { "epoch": 0.10211946050096339, "grad_norm": 0.27272340655326843, "learning_rate": 0.0001, "loss": 1.6395, "step": 636 }, { "epoch": 0.10228002569043032, "grad_norm": 0.27077198028564453, "learning_rate": 0.0001, "loss": 1.6647, "step": 637 }, { "epoch": 0.10244059087989724, "grad_norm": 0.3076954185962677, "learning_rate": 0.0001, "loss": 1.5771, "step": 638 }, { "epoch": 0.10260115606936417, "grad_norm": 0.26889434456825256, "learning_rate": 0.0001, "loss": 1.6317, "step": 639 }, { "epoch": 0.10276172125883108, "grad_norm": 0.273711234331131, "learning_rate": 0.0001, "loss": 1.5547, "step": 640 }, { "epoch": 0.10292228644829801, "grad_norm": 0.25578954815864563, "learning_rate": 0.0001, "loss": 1.5703, "step": 641 }, { "epoch": 0.10308285163776493, "grad_norm": 0.2453918308019638, "learning_rate": 0.0001, "loss": 1.5894, "step": 642 }, { "epoch": 0.10324341682723186, "grad_norm": 0.2527920603752136, "learning_rate": 0.0001, "loss": 1.5275, "step": 643 }, { "epoch": 0.10340398201669879, "grad_norm": 0.2707948088645935, "learning_rate": 0.0001, "loss": 1.6847, "step": 644 }, { "epoch": 0.1035645472061657, "grad_norm": 0.2517828047275543, "learning_rate": 0.0001, "loss": 1.6775, "step": 645 }, { "epoch": 0.10372511239563263, "grad_norm": 0.2665092647075653, "learning_rate": 0.0001, "loss": 1.7126, "step": 646 }, { "epoch": 0.10388567758509955, "grad_norm": 0.2530919909477234, "learning_rate": 0.0001, "loss": 1.6325, "step": 647 }, { "epoch": 0.10404624277456648, "grad_norm": 0.25849777460098267, "learning_rate": 0.0001, "loss": 1.598, "step": 648 }, { "epoch": 0.1042068079640334, "grad_norm": 0.26646289229393005, "learning_rate": 0.0001, "loss": 1.6072, "step": 649 }, { "epoch": 0.10436737315350032, "grad_norm": 0.259625643491745, "learning_rate": 0.0001, "loss": 1.6249, "step": 650 }, { "epoch": 0.10452793834296724, "grad_norm": 0.2740899622440338, "learning_rate": 0.0001, "loss": 1.6599, "step": 651 }, { "epoch": 0.10468850353243417, "grad_norm": 0.24367207288742065, "learning_rate": 0.0001, "loss": 1.4812, "step": 652 }, { "epoch": 0.1048490687219011, "grad_norm": 0.260138601064682, "learning_rate": 0.0001, "loss": 1.6092, "step": 653 }, { "epoch": 0.10500963391136801, "grad_norm": 0.25125980377197266, "learning_rate": 0.0001, "loss": 1.5853, "step": 654 }, { "epoch": 0.10517019910083494, "grad_norm": 0.2736695110797882, "learning_rate": 0.0001, "loss": 1.593, "step": 655 }, { "epoch": 0.10533076429030186, "grad_norm": 1.329075813293457, "learning_rate": 0.0001, "loss": 1.6744, "step": 656 }, { "epoch": 0.10549132947976879, "grad_norm": 0.24662800133228302, "learning_rate": 0.0001, "loss": 1.5462, "step": 657 }, { "epoch": 0.1056518946692357, "grad_norm": 0.27666983008384705, "learning_rate": 0.0001, "loss": 1.6646, "step": 658 }, { "epoch": 0.10581245985870263, "grad_norm": 0.26717808842658997, "learning_rate": 0.0001, "loss": 1.7311, "step": 659 }, { "epoch": 0.10597302504816955, "grad_norm": 0.24117274582386017, "learning_rate": 0.0001, "loss": 1.4865, "step": 660 }, { "epoch": 0.10613359023763648, "grad_norm": 0.27742815017700195, "learning_rate": 0.0001, "loss": 1.5919, "step": 661 }, { "epoch": 0.10629415542710341, "grad_norm": 0.2660296559333801, "learning_rate": 0.0001, "loss": 1.6086, "step": 662 }, { "epoch": 0.10645472061657033, "grad_norm": 0.2646217346191406, "learning_rate": 0.0001, "loss": 1.7272, "step": 663 }, { "epoch": 0.10661528580603725, "grad_norm": 0.284201055765152, "learning_rate": 0.0001, "loss": 1.665, "step": 664 }, { "epoch": 0.10677585099550417, "grad_norm": 0.26237398386001587, "learning_rate": 0.0001, "loss": 1.6256, "step": 665 }, { "epoch": 0.1069364161849711, "grad_norm": 0.251699835062027, "learning_rate": 0.0001, "loss": 1.5611, "step": 666 }, { "epoch": 0.10709698137443802, "grad_norm": 0.26088041067123413, "learning_rate": 0.0001, "loss": 1.6255, "step": 667 }, { "epoch": 0.10725754656390495, "grad_norm": 0.2808876633644104, "learning_rate": 0.0001, "loss": 1.6421, "step": 668 }, { "epoch": 0.10741811175337188, "grad_norm": 0.25902828574180603, "learning_rate": 0.0001, "loss": 1.5471, "step": 669 }, { "epoch": 0.10757867694283879, "grad_norm": 0.27159273624420166, "learning_rate": 0.0001, "loss": 1.548, "step": 670 }, { "epoch": 0.10773924213230572, "grad_norm": 0.2766246199607849, "learning_rate": 0.0001, "loss": 1.5603, "step": 671 }, { "epoch": 0.10789980732177264, "grad_norm": 0.2636677622795105, "learning_rate": 0.0001, "loss": 1.663, "step": 672 }, { "epoch": 0.10806037251123957, "grad_norm": 0.2700072228908539, "learning_rate": 0.0001, "loss": 1.6586, "step": 673 }, { "epoch": 0.10822093770070648, "grad_norm": 0.26270678639411926, "learning_rate": 0.0001, "loss": 1.6112, "step": 674 }, { "epoch": 0.10838150289017341, "grad_norm": 0.2790539562702179, "learning_rate": 0.0001, "loss": 1.5652, "step": 675 }, { "epoch": 0.10854206807964033, "grad_norm": 0.2826235890388489, "learning_rate": 0.0001, "loss": 1.6552, "step": 676 }, { "epoch": 0.10870263326910726, "grad_norm": 0.2595786452293396, "learning_rate": 0.0001, "loss": 1.5313, "step": 677 }, { "epoch": 0.10886319845857419, "grad_norm": 0.25537219643592834, "learning_rate": 0.0001, "loss": 1.4861, "step": 678 }, { "epoch": 0.1090237636480411, "grad_norm": 0.26303696632385254, "learning_rate": 0.0001, "loss": 1.5731, "step": 679 }, { "epoch": 0.10918432883750803, "grad_norm": 0.5021303296089172, "learning_rate": 0.0001, "loss": 1.6219, "step": 680 }, { "epoch": 0.10934489402697495, "grad_norm": 0.2666873037815094, "learning_rate": 0.0001, "loss": 1.5963, "step": 681 }, { "epoch": 0.10950545921644188, "grad_norm": 0.30422693490982056, "learning_rate": 0.0001, "loss": 1.6347, "step": 682 }, { "epoch": 0.1096660244059088, "grad_norm": 0.26333922147750854, "learning_rate": 0.0001, "loss": 1.5852, "step": 683 }, { "epoch": 0.10982658959537572, "grad_norm": 0.24885666370391846, "learning_rate": 0.0001, "loss": 1.6133, "step": 684 }, { "epoch": 0.10998715478484265, "grad_norm": 0.2608605921268463, "learning_rate": 0.0001, "loss": 1.6446, "step": 685 }, { "epoch": 0.11014771997430957, "grad_norm": 0.29082754254341125, "learning_rate": 0.0001, "loss": 1.7178, "step": 686 }, { "epoch": 0.1103082851637765, "grad_norm": 0.2474960833787918, "learning_rate": 0.0001, "loss": 1.5778, "step": 687 }, { "epoch": 0.11046885035324341, "grad_norm": 0.2687987983226776, "learning_rate": 0.0001, "loss": 1.6162, "step": 688 }, { "epoch": 0.11062941554271034, "grad_norm": 0.28898414969444275, "learning_rate": 0.0001, "loss": 1.6355, "step": 689 }, { "epoch": 0.11078998073217726, "grad_norm": 0.28025883436203003, "learning_rate": 0.0001, "loss": 1.6369, "step": 690 }, { "epoch": 0.11095054592164419, "grad_norm": 0.29039886593818665, "learning_rate": 0.0001, "loss": 1.6717, "step": 691 }, { "epoch": 0.1111111111111111, "grad_norm": 0.2517712116241455, "learning_rate": 0.0001, "loss": 1.575, "step": 692 }, { "epoch": 0.11127167630057803, "grad_norm": 0.24026477336883545, "learning_rate": 0.0001, "loss": 1.5107, "step": 693 }, { "epoch": 0.11143224149004496, "grad_norm": 0.3061399459838867, "learning_rate": 0.0001, "loss": 1.5785, "step": 694 }, { "epoch": 0.11159280667951188, "grad_norm": 0.25361010432243347, "learning_rate": 0.0001, "loss": 1.5685, "step": 695 }, { "epoch": 0.11175337186897881, "grad_norm": 0.27122291922569275, "learning_rate": 0.0001, "loss": 1.547, "step": 696 }, { "epoch": 0.11191393705844573, "grad_norm": 0.2916577458381653, "learning_rate": 0.0001, "loss": 1.6699, "step": 697 }, { "epoch": 0.11207450224791266, "grad_norm": 0.2575892210006714, "learning_rate": 0.0001, "loss": 1.5471, "step": 698 }, { "epoch": 0.11223506743737957, "grad_norm": 0.27215683460235596, "learning_rate": 0.0001, "loss": 1.4966, "step": 699 }, { "epoch": 0.1123956326268465, "grad_norm": 0.2941299378871918, "learning_rate": 0.0001, "loss": 1.6237, "step": 700 }, { "epoch": 0.11255619781631342, "grad_norm": 0.26432883739471436, "learning_rate": 0.0001, "loss": 1.5561, "step": 701 }, { "epoch": 0.11271676300578035, "grad_norm": 0.2935466170310974, "learning_rate": 0.0001, "loss": 1.6628, "step": 702 }, { "epoch": 0.11287732819524728, "grad_norm": 0.283089280128479, "learning_rate": 0.0001, "loss": 1.6165, "step": 703 }, { "epoch": 0.11303789338471419, "grad_norm": 0.29502588510513306, "learning_rate": 0.0001, "loss": 1.6731, "step": 704 }, { "epoch": 0.11319845857418112, "grad_norm": 0.27801328897476196, "learning_rate": 0.0001, "loss": 1.6759, "step": 705 }, { "epoch": 0.11335902376364804, "grad_norm": 0.2630738317966461, "learning_rate": 0.0001, "loss": 1.5675, "step": 706 }, { "epoch": 0.11351958895311497, "grad_norm": 0.2872748076915741, "learning_rate": 0.0001, "loss": 1.6502, "step": 707 }, { "epoch": 0.11368015414258188, "grad_norm": 0.265083372592926, "learning_rate": 0.0001, "loss": 1.5443, "step": 708 }, { "epoch": 0.11384071933204881, "grad_norm": 0.2837506830692291, "learning_rate": 0.0001, "loss": 1.5847, "step": 709 }, { "epoch": 0.11400128452151574, "grad_norm": 0.29870733618736267, "learning_rate": 0.0001, "loss": 1.5679, "step": 710 }, { "epoch": 0.11416184971098266, "grad_norm": 0.27681732177734375, "learning_rate": 0.0001, "loss": 1.6262, "step": 711 }, { "epoch": 0.11432241490044959, "grad_norm": 0.2766120135784149, "learning_rate": 0.0001, "loss": 1.618, "step": 712 }, { "epoch": 0.1144829800899165, "grad_norm": 0.2983716130256653, "learning_rate": 0.0001, "loss": 1.6639, "step": 713 }, { "epoch": 0.11464354527938343, "grad_norm": 0.25736159086227417, "learning_rate": 0.0001, "loss": 1.5842, "step": 714 }, { "epoch": 0.11480411046885035, "grad_norm": 0.2921600043773651, "learning_rate": 0.0001, "loss": 1.6578, "step": 715 }, { "epoch": 0.11496467565831728, "grad_norm": 0.2478378266096115, "learning_rate": 0.0001, "loss": 1.5059, "step": 716 }, { "epoch": 0.1151252408477842, "grad_norm": 0.262857049703598, "learning_rate": 0.0001, "loss": 1.5717, "step": 717 }, { "epoch": 0.11528580603725112, "grad_norm": 0.2785440683364868, "learning_rate": 0.0001, "loss": 1.6867, "step": 718 }, { "epoch": 0.11544637122671805, "grad_norm": 0.2898574769496918, "learning_rate": 0.0001, "loss": 1.6168, "step": 719 }, { "epoch": 0.11560693641618497, "grad_norm": 0.2668989300727844, "learning_rate": 0.0001, "loss": 1.6193, "step": 720 }, { "epoch": 0.1157675016056519, "grad_norm": 0.2536221742630005, "learning_rate": 0.0001, "loss": 1.62, "step": 721 }, { "epoch": 0.11592806679511881, "grad_norm": 0.2669164836406708, "learning_rate": 0.0001, "loss": 1.6766, "step": 722 }, { "epoch": 0.11608863198458574, "grad_norm": 0.2606102526187897, "learning_rate": 0.0001, "loss": 1.6556, "step": 723 }, { "epoch": 0.11624919717405266, "grad_norm": 0.27430862188339233, "learning_rate": 0.0001, "loss": 1.6073, "step": 724 }, { "epoch": 0.11640976236351959, "grad_norm": 0.25970223546028137, "learning_rate": 0.0001, "loss": 1.6094, "step": 725 }, { "epoch": 0.11657032755298652, "grad_norm": 0.2728593349456787, "learning_rate": 0.0001, "loss": 1.6689, "step": 726 }, { "epoch": 0.11673089274245343, "grad_norm": 0.24974913895130157, "learning_rate": 0.0001, "loss": 1.5673, "step": 727 }, { "epoch": 0.11689145793192036, "grad_norm": 0.24918828904628754, "learning_rate": 0.0001, "loss": 1.566, "step": 728 }, { "epoch": 0.11705202312138728, "grad_norm": 0.2884836494922638, "learning_rate": 0.0001, "loss": 1.6147, "step": 729 }, { "epoch": 0.11721258831085421, "grad_norm": 0.33627474308013916, "learning_rate": 0.0001, "loss": 1.675, "step": 730 }, { "epoch": 0.11737315350032113, "grad_norm": 0.2641352415084839, "learning_rate": 0.0001, "loss": 1.6296, "step": 731 }, { "epoch": 0.11753371868978806, "grad_norm": 0.25515982508659363, "learning_rate": 0.0001, "loss": 1.6185, "step": 732 }, { "epoch": 0.11769428387925497, "grad_norm": 0.2719225585460663, "learning_rate": 0.0001, "loss": 1.6463, "step": 733 }, { "epoch": 0.1178548490687219, "grad_norm": 0.25415921211242676, "learning_rate": 0.0001, "loss": 1.606, "step": 734 }, { "epoch": 0.11801541425818883, "grad_norm": 0.24442121386528015, "learning_rate": 0.0001, "loss": 1.5864, "step": 735 }, { "epoch": 0.11817597944765575, "grad_norm": 0.25296634435653687, "learning_rate": 0.0001, "loss": 1.5527, "step": 736 }, { "epoch": 0.11833654463712268, "grad_norm": 0.2576388418674469, "learning_rate": 0.0001, "loss": 1.563, "step": 737 }, { "epoch": 0.11849710982658959, "grad_norm": 0.25560155510902405, "learning_rate": 0.0001, "loss": 1.5732, "step": 738 }, { "epoch": 0.11865767501605652, "grad_norm": 0.2664654552936554, "learning_rate": 0.0001, "loss": 1.654, "step": 739 }, { "epoch": 0.11881824020552344, "grad_norm": 0.2674829661846161, "learning_rate": 0.0001, "loss": 1.6087, "step": 740 }, { "epoch": 0.11897880539499037, "grad_norm": 0.25803428888320923, "learning_rate": 0.0001, "loss": 1.6313, "step": 741 }, { "epoch": 0.1191393705844573, "grad_norm": 0.26967284083366394, "learning_rate": 0.0001, "loss": 1.6864, "step": 742 }, { "epoch": 0.11929993577392421, "grad_norm": 0.2588898539543152, "learning_rate": 0.0001, "loss": 1.6192, "step": 743 }, { "epoch": 0.11946050096339114, "grad_norm": 0.25680235028266907, "learning_rate": 0.0001, "loss": 1.6432, "step": 744 }, { "epoch": 0.11962106615285806, "grad_norm": 0.2503674328327179, "learning_rate": 0.0001, "loss": 1.584, "step": 745 }, { "epoch": 0.11978163134232499, "grad_norm": 0.2691657245159149, "learning_rate": 0.0001, "loss": 1.6631, "step": 746 }, { "epoch": 0.1199421965317919, "grad_norm": 0.25727444887161255, "learning_rate": 0.0001, "loss": 1.611, "step": 747 }, { "epoch": 0.12010276172125883, "grad_norm": 0.2589010000228882, "learning_rate": 0.0001, "loss": 1.5458, "step": 748 }, { "epoch": 0.12026332691072575, "grad_norm": 0.2563769221305847, "learning_rate": 0.0001, "loss": 1.5711, "step": 749 }, { "epoch": 0.12042389210019268, "grad_norm": 0.27601945400238037, "learning_rate": 0.0001, "loss": 1.611, "step": 750 }, { "epoch": 0.12058445728965961, "grad_norm": 0.24667346477508545, "learning_rate": 0.0001, "loss": 1.5895, "step": 751 }, { "epoch": 0.12074502247912652, "grad_norm": 0.2612060010433197, "learning_rate": 0.0001, "loss": 1.5545, "step": 752 }, { "epoch": 0.12090558766859345, "grad_norm": 0.2722240686416626, "learning_rate": 0.0001, "loss": 1.5912, "step": 753 }, { "epoch": 0.12106615285806037, "grad_norm": 0.2581103444099426, "learning_rate": 0.0001, "loss": 1.6089, "step": 754 }, { "epoch": 0.1212267180475273, "grad_norm": 0.29264286160469055, "learning_rate": 0.0001, "loss": 1.5833, "step": 755 }, { "epoch": 0.12138728323699421, "grad_norm": 0.27083325386047363, "learning_rate": 0.0001, "loss": 1.6171, "step": 756 }, { "epoch": 0.12154784842646114, "grad_norm": 0.2839059829711914, "learning_rate": 0.0001, "loss": 1.6719, "step": 757 }, { "epoch": 0.12170841361592806, "grad_norm": 0.2692236602306366, "learning_rate": 0.0001, "loss": 1.5431, "step": 758 }, { "epoch": 0.12186897880539499, "grad_norm": 0.2881883680820465, "learning_rate": 0.0001, "loss": 1.677, "step": 759 }, { "epoch": 0.12202954399486192, "grad_norm": 0.25900858640670776, "learning_rate": 0.0001, "loss": 1.5977, "step": 760 }, { "epoch": 0.12219010918432884, "grad_norm": 0.2649819850921631, "learning_rate": 0.0001, "loss": 1.6206, "step": 761 }, { "epoch": 0.12235067437379576, "grad_norm": 0.26542365550994873, "learning_rate": 0.0001, "loss": 1.5952, "step": 762 }, { "epoch": 0.12251123956326268, "grad_norm": 0.25755128264427185, "learning_rate": 0.0001, "loss": 1.6292, "step": 763 }, { "epoch": 0.12267180475272961, "grad_norm": 0.25057458877563477, "learning_rate": 0.0001, "loss": 1.6348, "step": 764 }, { "epoch": 0.12283236994219653, "grad_norm": 0.2558939456939697, "learning_rate": 0.0001, "loss": 1.5879, "step": 765 }, { "epoch": 0.12299293513166346, "grad_norm": 0.2470790445804596, "learning_rate": 0.0001, "loss": 1.5863, "step": 766 }, { "epoch": 0.12315350032113039, "grad_norm": 0.2466525435447693, "learning_rate": 0.0001, "loss": 1.6173, "step": 767 }, { "epoch": 0.1233140655105973, "grad_norm": 0.24029389023780823, "learning_rate": 0.0001, "loss": 1.5194, "step": 768 }, { "epoch": 0.12347463070006423, "grad_norm": 0.25701624155044556, "learning_rate": 0.0001, "loss": 1.6094, "step": 769 }, { "epoch": 0.12363519588953115, "grad_norm": 0.2579065263271332, "learning_rate": 0.0001, "loss": 1.6371, "step": 770 }, { "epoch": 0.12379576107899808, "grad_norm": 0.25433599948883057, "learning_rate": 0.0001, "loss": 1.5433, "step": 771 }, { "epoch": 0.12395632626846499, "grad_norm": 0.25113752484321594, "learning_rate": 0.0001, "loss": 1.6899, "step": 772 }, { "epoch": 0.12411689145793192, "grad_norm": 0.25137612223625183, "learning_rate": 0.0001, "loss": 1.5753, "step": 773 }, { "epoch": 0.12427745664739884, "grad_norm": 0.25248846411705017, "learning_rate": 0.0001, "loss": 1.5925, "step": 774 }, { "epoch": 0.12443802183686577, "grad_norm": 0.25833266973495483, "learning_rate": 0.0001, "loss": 1.6114, "step": 775 }, { "epoch": 0.1245985870263327, "grad_norm": 0.26226380467414856, "learning_rate": 0.0001, "loss": 1.6383, "step": 776 }, { "epoch": 0.12475915221579961, "grad_norm": 0.27248066663742065, "learning_rate": 0.0001, "loss": 1.6382, "step": 777 }, { "epoch": 0.12491971740526654, "grad_norm": 0.2538253962993622, "learning_rate": 0.0001, "loss": 1.6168, "step": 778 }, { "epoch": 0.12508028259473347, "grad_norm": 0.2772725820541382, "learning_rate": 0.0001, "loss": 1.5979, "step": 779 }, { "epoch": 0.1252408477842004, "grad_norm": 0.2635462284088135, "learning_rate": 0.0001, "loss": 1.6585, "step": 780 }, { "epoch": 0.1254014129736673, "grad_norm": 0.24889424443244934, "learning_rate": 0.0001, "loss": 1.6109, "step": 781 }, { "epoch": 0.12556197816313422, "grad_norm": 0.2484816163778305, "learning_rate": 0.0001, "loss": 1.6207, "step": 782 }, { "epoch": 0.12572254335260116, "grad_norm": 0.2596292495727539, "learning_rate": 0.0001, "loss": 1.6653, "step": 783 }, { "epoch": 0.12588310854206808, "grad_norm": 0.26616355776786804, "learning_rate": 0.0001, "loss": 1.6816, "step": 784 }, { "epoch": 0.126043673731535, "grad_norm": 0.494791716337204, "learning_rate": 0.0001, "loss": 1.5498, "step": 785 }, { "epoch": 0.12620423892100194, "grad_norm": 0.2505752146244049, "learning_rate": 0.0001, "loss": 1.5923, "step": 786 }, { "epoch": 0.12636480411046885, "grad_norm": 0.26114824414253235, "learning_rate": 0.0001, "loss": 1.5349, "step": 787 }, { "epoch": 0.12652536929993577, "grad_norm": 0.2598491311073303, "learning_rate": 0.0001, "loss": 1.6301, "step": 788 }, { "epoch": 0.12668593448940269, "grad_norm": 0.2534463405609131, "learning_rate": 0.0001, "loss": 1.5631, "step": 789 }, { "epoch": 0.12684649967886963, "grad_norm": 0.27598607540130615, "learning_rate": 0.0001, "loss": 1.6233, "step": 790 }, { "epoch": 0.12700706486833654, "grad_norm": 0.2497626096010208, "learning_rate": 0.0001, "loss": 1.5669, "step": 791 }, { "epoch": 0.12716763005780346, "grad_norm": 0.2503911256790161, "learning_rate": 0.0001, "loss": 1.5778, "step": 792 }, { "epoch": 0.1273281952472704, "grad_norm": 0.2954845726490021, "learning_rate": 0.0001, "loss": 1.6333, "step": 793 }, { "epoch": 0.12748876043673732, "grad_norm": 0.2428663820028305, "learning_rate": 0.0001, "loss": 1.4572, "step": 794 }, { "epoch": 0.12764932562620424, "grad_norm": 0.2646285891532898, "learning_rate": 0.0001, "loss": 1.5702, "step": 795 }, { "epoch": 0.12780989081567115, "grad_norm": 0.25721773505210876, "learning_rate": 0.0001, "loss": 1.6594, "step": 796 }, { "epoch": 0.1279704560051381, "grad_norm": 0.2589038610458374, "learning_rate": 0.0001, "loss": 1.5998, "step": 797 }, { "epoch": 0.128131021194605, "grad_norm": 0.27920010685920715, "learning_rate": 0.0001, "loss": 1.6561, "step": 798 }, { "epoch": 0.12829158638407193, "grad_norm": 0.45557379722595215, "learning_rate": 0.0001, "loss": 1.5964, "step": 799 }, { "epoch": 0.12845215157353887, "grad_norm": 0.2536633610725403, "learning_rate": 0.0001, "loss": 1.5482, "step": 800 }, { "epoch": 0.12861271676300579, "grad_norm": 0.2707673907279968, "learning_rate": 0.0001, "loss": 1.6412, "step": 801 }, { "epoch": 0.1287732819524727, "grad_norm": 0.24133442342281342, "learning_rate": 0.0001, "loss": 1.589, "step": 802 }, { "epoch": 0.12893384714193962, "grad_norm": 0.2665124535560608, "learning_rate": 0.0001, "loss": 1.568, "step": 803 }, { "epoch": 0.12909441233140656, "grad_norm": 0.2640987038612366, "learning_rate": 0.0001, "loss": 1.565, "step": 804 }, { "epoch": 0.12925497752087348, "grad_norm": 0.2914842367172241, "learning_rate": 0.0001, "loss": 1.6363, "step": 805 }, { "epoch": 0.1294155427103404, "grad_norm": 0.25757482647895813, "learning_rate": 0.0001, "loss": 1.6507, "step": 806 }, { "epoch": 0.1295761078998073, "grad_norm": 0.24502956867218018, "learning_rate": 0.0001, "loss": 1.5228, "step": 807 }, { "epoch": 0.12973667308927425, "grad_norm": 0.25205034017562866, "learning_rate": 0.0001, "loss": 1.551, "step": 808 }, { "epoch": 0.12989723827874117, "grad_norm": 0.25903329253196716, "learning_rate": 0.0001, "loss": 1.566, "step": 809 }, { "epoch": 0.13005780346820808, "grad_norm": 0.24316738545894623, "learning_rate": 0.0001, "loss": 1.6612, "step": 810 }, { "epoch": 0.13021836865767503, "grad_norm": 0.24032652378082275, "learning_rate": 0.0001, "loss": 1.5407, "step": 811 }, { "epoch": 0.13037893384714194, "grad_norm": 0.2632538378238678, "learning_rate": 0.0001, "loss": 1.5716, "step": 812 }, { "epoch": 0.13053949903660886, "grad_norm": 0.27078595757484436, "learning_rate": 0.0001, "loss": 1.5277, "step": 813 }, { "epoch": 0.13070006422607577, "grad_norm": 0.2537182867527008, "learning_rate": 0.0001, "loss": 1.5279, "step": 814 }, { "epoch": 0.13086062941554272, "grad_norm": 0.2644798755645752, "learning_rate": 0.0001, "loss": 1.6266, "step": 815 }, { "epoch": 0.13102119460500963, "grad_norm": 0.24856843054294586, "learning_rate": 0.0001, "loss": 1.5094, "step": 816 }, { "epoch": 0.13118175979447655, "grad_norm": 0.24715793132781982, "learning_rate": 0.0001, "loss": 1.5693, "step": 817 }, { "epoch": 0.1313423249839435, "grad_norm": 0.2910032272338867, "learning_rate": 0.0001, "loss": 1.6246, "step": 818 }, { "epoch": 0.1315028901734104, "grad_norm": 0.2600892186164856, "learning_rate": 0.0001, "loss": 1.5608, "step": 819 }, { "epoch": 0.13166345536287732, "grad_norm": 0.28039804100990295, "learning_rate": 0.0001, "loss": 1.6317, "step": 820 }, { "epoch": 0.13182402055234424, "grad_norm": 0.25438904762268066, "learning_rate": 0.0001, "loss": 1.5446, "step": 821 }, { "epoch": 0.13198458574181118, "grad_norm": 0.24804502725601196, "learning_rate": 0.0001, "loss": 1.578, "step": 822 }, { "epoch": 0.1321451509312781, "grad_norm": 0.2627662122249603, "learning_rate": 0.0001, "loss": 1.6289, "step": 823 }, { "epoch": 0.13230571612074501, "grad_norm": 0.2525002062320709, "learning_rate": 0.0001, "loss": 1.6144, "step": 824 }, { "epoch": 0.13246628131021196, "grad_norm": 0.26678743958473206, "learning_rate": 0.0001, "loss": 1.5999, "step": 825 }, { "epoch": 0.13262684649967887, "grad_norm": 0.26153433322906494, "learning_rate": 0.0001, "loss": 1.6103, "step": 826 }, { "epoch": 0.1327874116891458, "grad_norm": 0.2502874732017517, "learning_rate": 0.0001, "loss": 1.5924, "step": 827 }, { "epoch": 0.1329479768786127, "grad_norm": 0.25310254096984863, "learning_rate": 0.0001, "loss": 1.6216, "step": 828 }, { "epoch": 0.13310854206807965, "grad_norm": 0.2579217851161957, "learning_rate": 0.0001, "loss": 1.6204, "step": 829 }, { "epoch": 0.13326910725754657, "grad_norm": 0.2544805109500885, "learning_rate": 0.0001, "loss": 1.5787, "step": 830 }, { "epoch": 0.13342967244701348, "grad_norm": 0.24990282952785492, "learning_rate": 0.0001, "loss": 1.5582, "step": 831 }, { "epoch": 0.13359023763648042, "grad_norm": 0.2465953677892685, "learning_rate": 0.0001, "loss": 1.5143, "step": 832 }, { "epoch": 0.13375080282594734, "grad_norm": 0.2525157928466797, "learning_rate": 0.0001, "loss": 1.6545, "step": 833 }, { "epoch": 0.13391136801541426, "grad_norm": 0.2696436643600464, "learning_rate": 0.0001, "loss": 1.5574, "step": 834 }, { "epoch": 0.13407193320488117, "grad_norm": 0.2418709248304367, "learning_rate": 0.0001, "loss": 1.5706, "step": 835 }, { "epoch": 0.13423249839434812, "grad_norm": 0.25983524322509766, "learning_rate": 0.0001, "loss": 1.5129, "step": 836 }, { "epoch": 0.13439306358381503, "grad_norm": 0.27395179867744446, "learning_rate": 0.0001, "loss": 1.6555, "step": 837 }, { "epoch": 0.13455362877328195, "grad_norm": 0.2571142315864563, "learning_rate": 0.0001, "loss": 1.5881, "step": 838 }, { "epoch": 0.13471419396274886, "grad_norm": 0.26157963275909424, "learning_rate": 0.0001, "loss": 1.5938, "step": 839 }, { "epoch": 0.1348747591522158, "grad_norm": 0.2452707439661026, "learning_rate": 0.0001, "loss": 1.5732, "step": 840 }, { "epoch": 0.13503532434168272, "grad_norm": 0.2631083130836487, "learning_rate": 0.0001, "loss": 1.5928, "step": 841 }, { "epoch": 0.13519588953114964, "grad_norm": 0.2500780522823334, "learning_rate": 0.0001, "loss": 1.5071, "step": 842 }, { "epoch": 0.13535645472061658, "grad_norm": 0.25931090116500854, "learning_rate": 0.0001, "loss": 1.6517, "step": 843 }, { "epoch": 0.1355170199100835, "grad_norm": 0.2653353214263916, "learning_rate": 0.0001, "loss": 1.5604, "step": 844 }, { "epoch": 0.1356775850995504, "grad_norm": 0.2534460127353668, "learning_rate": 0.0001, "loss": 1.5848, "step": 845 }, { "epoch": 0.13583815028901733, "grad_norm": 0.25302770733833313, "learning_rate": 0.0001, "loss": 1.6274, "step": 846 }, { "epoch": 0.13599871547848427, "grad_norm": 0.28456246852874756, "learning_rate": 0.0001, "loss": 1.6039, "step": 847 }, { "epoch": 0.1361592806679512, "grad_norm": 0.2481805682182312, "learning_rate": 0.0001, "loss": 1.5726, "step": 848 }, { "epoch": 0.1363198458574181, "grad_norm": 0.2782268226146698, "learning_rate": 0.0001, "loss": 1.6275, "step": 849 }, { "epoch": 0.13648041104688505, "grad_norm": 0.24883927404880524, "learning_rate": 0.0001, "loss": 1.5345, "step": 850 }, { "epoch": 0.13664097623635196, "grad_norm": 0.26905950903892517, "learning_rate": 0.0001, "loss": 1.5555, "step": 851 }, { "epoch": 0.13680154142581888, "grad_norm": 0.278736412525177, "learning_rate": 0.0001, "loss": 1.6644, "step": 852 }, { "epoch": 0.1369621066152858, "grad_norm": 0.2647725045681, "learning_rate": 0.0001, "loss": 1.6271, "step": 853 }, { "epoch": 0.13712267180475274, "grad_norm": 0.26414796710014343, "learning_rate": 0.0001, "loss": 1.6707, "step": 854 }, { "epoch": 0.13728323699421965, "grad_norm": 0.26472604274749756, "learning_rate": 0.0001, "loss": 1.63, "step": 855 }, { "epoch": 0.13744380218368657, "grad_norm": 0.2509171664714813, "learning_rate": 0.0001, "loss": 1.6083, "step": 856 }, { "epoch": 0.1376043673731535, "grad_norm": 0.30091553926467896, "learning_rate": 0.0001, "loss": 1.6461, "step": 857 }, { "epoch": 0.13776493256262043, "grad_norm": 0.2546672523021698, "learning_rate": 0.0001, "loss": 1.6342, "step": 858 }, { "epoch": 0.13792549775208734, "grad_norm": 0.2552092671394348, "learning_rate": 0.0001, "loss": 1.4802, "step": 859 }, { "epoch": 0.13808606294155426, "grad_norm": 0.2652498781681061, "learning_rate": 0.0001, "loss": 1.5446, "step": 860 }, { "epoch": 0.1382466281310212, "grad_norm": 0.2531950771808624, "learning_rate": 0.0001, "loss": 1.625, "step": 861 }, { "epoch": 0.13840719332048812, "grad_norm": 0.2755614221096039, "learning_rate": 0.0001, "loss": 1.6873, "step": 862 }, { "epoch": 0.13856775850995504, "grad_norm": 0.269375741481781, "learning_rate": 0.0001, "loss": 1.5331, "step": 863 }, { "epoch": 0.13872832369942195, "grad_norm": 0.26859086751937866, "learning_rate": 0.0001, "loss": 1.6087, "step": 864 }, { "epoch": 0.1388888888888889, "grad_norm": 0.2632748782634735, "learning_rate": 0.0001, "loss": 1.5878, "step": 865 }, { "epoch": 0.1390494540783558, "grad_norm": 0.2613421678543091, "learning_rate": 0.0001, "loss": 1.6823, "step": 866 }, { "epoch": 0.13921001926782273, "grad_norm": 0.2982962727546692, "learning_rate": 0.0001, "loss": 1.644, "step": 867 }, { "epoch": 0.13937058445728967, "grad_norm": 0.2554149329662323, "learning_rate": 0.0001, "loss": 1.5431, "step": 868 }, { "epoch": 0.13953114964675659, "grad_norm": 0.2654079496860504, "learning_rate": 0.0001, "loss": 1.5379, "step": 869 }, { "epoch": 0.1396917148362235, "grad_norm": 0.2743433713912964, "learning_rate": 0.0001, "loss": 1.612, "step": 870 }, { "epoch": 0.13985228002569042, "grad_norm": 0.24886168539524078, "learning_rate": 0.0001, "loss": 1.6143, "step": 871 }, { "epoch": 0.14001284521515736, "grad_norm": 0.29661887884140015, "learning_rate": 0.0001, "loss": 1.69, "step": 872 }, { "epoch": 0.14017341040462428, "grad_norm": 0.25791892409324646, "learning_rate": 0.0001, "loss": 1.5737, "step": 873 }, { "epoch": 0.1403339755940912, "grad_norm": 0.25356629490852356, "learning_rate": 0.0001, "loss": 1.6322, "step": 874 }, { "epoch": 0.14049454078355814, "grad_norm": 0.2713717520236969, "learning_rate": 0.0001, "loss": 1.5806, "step": 875 }, { "epoch": 0.14065510597302505, "grad_norm": 0.25718873739242554, "learning_rate": 0.0001, "loss": 1.6829, "step": 876 }, { "epoch": 0.14081567116249197, "grad_norm": 0.24350662529468536, "learning_rate": 0.0001, "loss": 1.544, "step": 877 }, { "epoch": 0.14097623635195888, "grad_norm": 0.2600824236869812, "learning_rate": 0.0001, "loss": 1.584, "step": 878 }, { "epoch": 0.14113680154142583, "grad_norm": 0.26631075143814087, "learning_rate": 0.0001, "loss": 1.6089, "step": 879 }, { "epoch": 0.14129736673089274, "grad_norm": 0.2674790918827057, "learning_rate": 0.0001, "loss": 1.6576, "step": 880 }, { "epoch": 0.14145793192035966, "grad_norm": 0.27016696333885193, "learning_rate": 0.0001, "loss": 1.5664, "step": 881 }, { "epoch": 0.1416184971098266, "grad_norm": 0.2517258822917938, "learning_rate": 0.0001, "loss": 1.5919, "step": 882 }, { "epoch": 0.14177906229929352, "grad_norm": 0.25702401995658875, "learning_rate": 0.0001, "loss": 1.55, "step": 883 }, { "epoch": 0.14193962748876043, "grad_norm": 0.2697002589702606, "learning_rate": 0.0001, "loss": 1.6049, "step": 884 }, { "epoch": 0.14210019267822735, "grad_norm": 0.25588372349739075, "learning_rate": 0.0001, "loss": 1.5859, "step": 885 }, { "epoch": 0.1422607578676943, "grad_norm": 0.2595311105251312, "learning_rate": 0.0001, "loss": 1.5958, "step": 886 }, { "epoch": 0.1424213230571612, "grad_norm": 0.2488589882850647, "learning_rate": 0.0001, "loss": 1.6681, "step": 887 }, { "epoch": 0.14258188824662812, "grad_norm": 0.23933641612529755, "learning_rate": 0.0001, "loss": 1.5913, "step": 888 }, { "epoch": 0.14274245343609507, "grad_norm": 0.24618078768253326, "learning_rate": 0.0001, "loss": 1.5106, "step": 889 }, { "epoch": 0.14290301862556198, "grad_norm": 0.24883906543254852, "learning_rate": 0.0001, "loss": 1.582, "step": 890 }, { "epoch": 0.1430635838150289, "grad_norm": 0.27218297123908997, "learning_rate": 0.0001, "loss": 1.5886, "step": 891 }, { "epoch": 0.14322414900449582, "grad_norm": 0.2581946849822998, "learning_rate": 0.0001, "loss": 1.5981, "step": 892 }, { "epoch": 0.14338471419396276, "grad_norm": 0.24821656942367554, "learning_rate": 0.0001, "loss": 1.5871, "step": 893 }, { "epoch": 0.14354527938342967, "grad_norm": 0.2561013996601105, "learning_rate": 0.0001, "loss": 1.6149, "step": 894 }, { "epoch": 0.1437058445728966, "grad_norm": 0.2528505325317383, "learning_rate": 0.0001, "loss": 1.6011, "step": 895 }, { "epoch": 0.1438664097623635, "grad_norm": 0.2521515190601349, "learning_rate": 0.0001, "loss": 1.5481, "step": 896 }, { "epoch": 0.14402697495183045, "grad_norm": 0.262277215719223, "learning_rate": 0.0001, "loss": 1.5968, "step": 897 }, { "epoch": 0.14418754014129737, "grad_norm": 0.2619973421096802, "learning_rate": 0.0001, "loss": 1.5859, "step": 898 }, { "epoch": 0.14434810533076428, "grad_norm": 0.2554990351200104, "learning_rate": 0.0001, "loss": 1.6384, "step": 899 }, { "epoch": 0.14450867052023122, "grad_norm": 0.25106117129325867, "learning_rate": 0.0001, "loss": 1.5078, "step": 900 }, { "epoch": 0.14466923570969814, "grad_norm": 0.2812288701534271, "learning_rate": 0.0001, "loss": 1.697, "step": 901 }, { "epoch": 0.14482980089916506, "grad_norm": 0.24550653994083405, "learning_rate": 0.0001, "loss": 1.4961, "step": 902 }, { "epoch": 0.14499036608863197, "grad_norm": 0.2630595564842224, "learning_rate": 0.0001, "loss": 1.6954, "step": 903 }, { "epoch": 0.14515093127809892, "grad_norm": 0.25216740369796753, "learning_rate": 0.0001, "loss": 1.6342, "step": 904 }, { "epoch": 0.14531149646756583, "grad_norm": 0.2595326602458954, "learning_rate": 0.0001, "loss": 1.6311, "step": 905 }, { "epoch": 0.14547206165703275, "grad_norm": 0.24139900505542755, "learning_rate": 0.0001, "loss": 1.5731, "step": 906 }, { "epoch": 0.1456326268464997, "grad_norm": 0.2659880816936493, "learning_rate": 0.0001, "loss": 1.626, "step": 907 }, { "epoch": 0.1457931920359666, "grad_norm": 0.24561461806297302, "learning_rate": 0.0001, "loss": 1.5737, "step": 908 }, { "epoch": 0.14595375722543352, "grad_norm": 0.23585672676563263, "learning_rate": 0.0001, "loss": 1.5343, "step": 909 }, { "epoch": 0.14611432241490044, "grad_norm": 0.2718529999256134, "learning_rate": 0.0001, "loss": 1.5992, "step": 910 }, { "epoch": 0.14627488760436738, "grad_norm": 0.2534932494163513, "learning_rate": 0.0001, "loss": 1.6032, "step": 911 }, { "epoch": 0.1464354527938343, "grad_norm": 0.2514909505844116, "learning_rate": 0.0001, "loss": 1.5442, "step": 912 }, { "epoch": 0.1465960179833012, "grad_norm": 0.2621341943740845, "learning_rate": 0.0001, "loss": 1.6254, "step": 913 }, { "epoch": 0.14675658317276816, "grad_norm": 0.2527523636817932, "learning_rate": 0.0001, "loss": 1.5434, "step": 914 }, { "epoch": 0.14691714836223507, "grad_norm": 0.2540346086025238, "learning_rate": 0.0001, "loss": 1.5594, "step": 915 }, { "epoch": 0.147077713551702, "grad_norm": 0.2796066701412201, "learning_rate": 0.0001, "loss": 1.6697, "step": 916 }, { "epoch": 0.1472382787411689, "grad_norm": 0.2621716558933258, "learning_rate": 0.0001, "loss": 1.6149, "step": 917 }, { "epoch": 0.14739884393063585, "grad_norm": 0.2522697448730469, "learning_rate": 0.0001, "loss": 1.5868, "step": 918 }, { "epoch": 0.14755940912010276, "grad_norm": 0.2512277662754059, "learning_rate": 0.0001, "loss": 1.5457, "step": 919 }, { "epoch": 0.14771997430956968, "grad_norm": 0.2664959728717804, "learning_rate": 0.0001, "loss": 1.609, "step": 920 }, { "epoch": 0.1478805394990366, "grad_norm": 0.2599131166934967, "learning_rate": 0.0001, "loss": 1.6409, "step": 921 }, { "epoch": 0.14804110468850354, "grad_norm": 0.2691696584224701, "learning_rate": 0.0001, "loss": 1.6161, "step": 922 }, { "epoch": 0.14820166987797045, "grad_norm": 0.26327449083328247, "learning_rate": 0.0001, "loss": 1.5896, "step": 923 }, { "epoch": 0.14836223506743737, "grad_norm": 0.25307759642601013, "learning_rate": 0.0001, "loss": 1.5517, "step": 924 }, { "epoch": 0.1485228002569043, "grad_norm": 0.27031588554382324, "learning_rate": 0.0001, "loss": 1.6378, "step": 925 }, { "epoch": 0.14868336544637123, "grad_norm": 0.24228115379810333, "learning_rate": 0.0001, "loss": 1.4805, "step": 926 }, { "epoch": 0.14884393063583815, "grad_norm": 0.25565674901008606, "learning_rate": 0.0001, "loss": 1.6749, "step": 927 }, { "epoch": 0.14900449582530506, "grad_norm": 0.2539302706718445, "learning_rate": 0.0001, "loss": 1.5772, "step": 928 }, { "epoch": 0.149165061014772, "grad_norm": 0.2562558650970459, "learning_rate": 0.0001, "loss": 1.6548, "step": 929 }, { "epoch": 0.14932562620423892, "grad_norm": 0.26420676708221436, "learning_rate": 0.0001, "loss": 1.6085, "step": 930 }, { "epoch": 0.14948619139370584, "grad_norm": 0.28227153420448303, "learning_rate": 0.0001, "loss": 1.5889, "step": 931 }, { "epoch": 0.14964675658317278, "grad_norm": 0.2469702661037445, "learning_rate": 0.0001, "loss": 1.5479, "step": 932 }, { "epoch": 0.1498073217726397, "grad_norm": 0.25898122787475586, "learning_rate": 0.0001, "loss": 1.5962, "step": 933 }, { "epoch": 0.1499678869621066, "grad_norm": 0.2539020776748657, "learning_rate": 0.0001, "loss": 1.5412, "step": 934 }, { "epoch": 0.15012845215157353, "grad_norm": 0.2724524438381195, "learning_rate": 0.0001, "loss": 1.6697, "step": 935 }, { "epoch": 0.15028901734104047, "grad_norm": 2.3618476390838623, "learning_rate": 0.0001, "loss": 1.6803, "step": 936 }, { "epoch": 0.1504495825305074, "grad_norm": 0.2667105197906494, "learning_rate": 0.0001, "loss": 1.6604, "step": 937 }, { "epoch": 0.1506101477199743, "grad_norm": 0.2506130039691925, "learning_rate": 0.0001, "loss": 1.4764, "step": 938 }, { "epoch": 0.15077071290944125, "grad_norm": 0.250129759311676, "learning_rate": 0.0001, "loss": 1.546, "step": 939 }, { "epoch": 0.15093127809890816, "grad_norm": 0.2476504147052765, "learning_rate": 0.0001, "loss": 1.5858, "step": 940 }, { "epoch": 0.15109184328837508, "grad_norm": 0.2472328394651413, "learning_rate": 0.0001, "loss": 1.5386, "step": 941 }, { "epoch": 0.151252408477842, "grad_norm": 0.24910295009613037, "learning_rate": 0.0001, "loss": 1.6534, "step": 942 }, { "epoch": 0.15141297366730894, "grad_norm": 0.2372818887233734, "learning_rate": 0.0001, "loss": 1.5527, "step": 943 }, { "epoch": 0.15157353885677585, "grad_norm": 0.2683623135089874, "learning_rate": 0.0001, "loss": 1.6166, "step": 944 }, { "epoch": 0.15173410404624277, "grad_norm": 0.24610085785388947, "learning_rate": 0.0001, "loss": 1.5643, "step": 945 }, { "epoch": 0.1518946692357097, "grad_norm": 0.2642036974430084, "learning_rate": 0.0001, "loss": 1.6591, "step": 946 }, { "epoch": 0.15205523442517663, "grad_norm": 0.25564056634902954, "learning_rate": 0.0001, "loss": 1.5975, "step": 947 }, { "epoch": 0.15221579961464354, "grad_norm": 0.25094327330589294, "learning_rate": 0.0001, "loss": 1.523, "step": 948 }, { "epoch": 0.15237636480411046, "grad_norm": 0.27053749561309814, "learning_rate": 0.0001, "loss": 1.6405, "step": 949 }, { "epoch": 0.1525369299935774, "grad_norm": 0.25663068890571594, "learning_rate": 0.0001, "loss": 1.6203, "step": 950 }, { "epoch": 0.15269749518304432, "grad_norm": 0.2562296986579895, "learning_rate": 0.0001, "loss": 1.5238, "step": 951 }, { "epoch": 0.15285806037251123, "grad_norm": 0.45618748664855957, "learning_rate": 0.0001, "loss": 1.6316, "step": 952 }, { "epoch": 0.15301862556197815, "grad_norm": 0.2593305706977844, "learning_rate": 0.0001, "loss": 1.6199, "step": 953 }, { "epoch": 0.1531791907514451, "grad_norm": 0.2535325288772583, "learning_rate": 0.0001, "loss": 1.5393, "step": 954 }, { "epoch": 0.153339755940912, "grad_norm": 0.2570097744464874, "learning_rate": 0.0001, "loss": 1.5836, "step": 955 }, { "epoch": 0.15350032113037893, "grad_norm": 0.26658475399017334, "learning_rate": 0.0001, "loss": 1.5609, "step": 956 }, { "epoch": 0.15366088631984587, "grad_norm": 0.23367327451705933, "learning_rate": 0.0001, "loss": 1.4769, "step": 957 }, { "epoch": 0.15382145150931278, "grad_norm": 0.2894875109195709, "learning_rate": 0.0001, "loss": 1.6935, "step": 958 }, { "epoch": 0.1539820166987797, "grad_norm": 0.24367184937000275, "learning_rate": 0.0001, "loss": 1.5402, "step": 959 }, { "epoch": 0.15414258188824662, "grad_norm": 0.2570075988769531, "learning_rate": 0.0001, "loss": 1.5435, "step": 960 }, { "epoch": 0.15430314707771356, "grad_norm": 0.26234692335128784, "learning_rate": 0.0001, "loss": 1.612, "step": 961 }, { "epoch": 0.15446371226718048, "grad_norm": 0.2623891830444336, "learning_rate": 0.0001, "loss": 1.5721, "step": 962 }, { "epoch": 0.1546242774566474, "grad_norm": 0.26043277978897095, "learning_rate": 0.0001, "loss": 1.6106, "step": 963 }, { "epoch": 0.15478484264611433, "grad_norm": 0.2588985860347748, "learning_rate": 0.0001, "loss": 1.5302, "step": 964 }, { "epoch": 0.15494540783558125, "grad_norm": 0.25718897581100464, "learning_rate": 0.0001, "loss": 1.5629, "step": 965 }, { "epoch": 0.15510597302504817, "grad_norm": 0.2608674466609955, "learning_rate": 0.0001, "loss": 1.5503, "step": 966 }, { "epoch": 0.15526653821451508, "grad_norm": 0.26131555438041687, "learning_rate": 0.0001, "loss": 1.5558, "step": 967 }, { "epoch": 0.15542710340398203, "grad_norm": 0.2566625475883484, "learning_rate": 0.0001, "loss": 1.6243, "step": 968 }, { "epoch": 0.15558766859344894, "grad_norm": 0.27965059876441956, "learning_rate": 0.0001, "loss": 1.5893, "step": 969 }, { "epoch": 0.15574823378291586, "grad_norm": 0.25014302134513855, "learning_rate": 0.0001, "loss": 1.5958, "step": 970 }, { "epoch": 0.1559087989723828, "grad_norm": 0.2523461580276489, "learning_rate": 0.0001, "loss": 1.5695, "step": 971 }, { "epoch": 0.15606936416184972, "grad_norm": 0.2674293518066406, "learning_rate": 0.0001, "loss": 1.5749, "step": 972 }, { "epoch": 0.15622992935131663, "grad_norm": 0.2695954144001007, "learning_rate": 0.0001, "loss": 1.723, "step": 973 }, { "epoch": 0.15639049454078355, "grad_norm": 0.25040581822395325, "learning_rate": 0.0001, "loss": 1.6021, "step": 974 }, { "epoch": 0.1565510597302505, "grad_norm": 0.252522349357605, "learning_rate": 0.0001, "loss": 1.5453, "step": 975 }, { "epoch": 0.1567116249197174, "grad_norm": 0.263759583234787, "learning_rate": 0.0001, "loss": 1.5177, "step": 976 }, { "epoch": 0.15687219010918432, "grad_norm": 0.26149657368659973, "learning_rate": 0.0001, "loss": 1.6225, "step": 977 }, { "epoch": 0.15703275529865124, "grad_norm": 0.2430988848209381, "learning_rate": 0.0001, "loss": 1.5972, "step": 978 }, { "epoch": 0.15719332048811818, "grad_norm": 0.2938035726547241, "learning_rate": 0.0001, "loss": 1.5743, "step": 979 }, { "epoch": 0.1573538856775851, "grad_norm": 0.24420037865638733, "learning_rate": 0.0001, "loss": 1.6323, "step": 980 }, { "epoch": 0.157514450867052, "grad_norm": 0.269610196352005, "learning_rate": 0.0001, "loss": 1.6412, "step": 981 }, { "epoch": 0.15767501605651896, "grad_norm": 0.25811803340911865, "learning_rate": 0.0001, "loss": 1.6129, "step": 982 }, { "epoch": 0.15783558124598587, "grad_norm": 0.2406802475452423, "learning_rate": 0.0001, "loss": 1.5603, "step": 983 }, { "epoch": 0.1579961464354528, "grad_norm": 0.2595805525779724, "learning_rate": 0.0001, "loss": 1.5083, "step": 984 }, { "epoch": 0.1581567116249197, "grad_norm": 0.260524183511734, "learning_rate": 0.0001, "loss": 1.614, "step": 985 }, { "epoch": 0.15831727681438665, "grad_norm": 0.25833332538604736, "learning_rate": 0.0001, "loss": 1.5933, "step": 986 }, { "epoch": 0.15847784200385356, "grad_norm": 0.25654345750808716, "learning_rate": 0.0001, "loss": 1.6111, "step": 987 }, { "epoch": 0.15863840719332048, "grad_norm": 0.26671066880226135, "learning_rate": 0.0001, "loss": 1.5813, "step": 988 }, { "epoch": 0.15879897238278742, "grad_norm": 0.2486371546983719, "learning_rate": 0.0001, "loss": 1.5531, "step": 989 }, { "epoch": 0.15895953757225434, "grad_norm": 0.2565583288669586, "learning_rate": 0.0001, "loss": 1.5289, "step": 990 }, { "epoch": 0.15912010276172125, "grad_norm": 0.24416203796863556, "learning_rate": 0.0001, "loss": 1.5113, "step": 991 }, { "epoch": 0.15928066795118817, "grad_norm": 0.27367034554481506, "learning_rate": 0.0001, "loss": 1.6646, "step": 992 }, { "epoch": 0.15944123314065511, "grad_norm": 0.26712900400161743, "learning_rate": 0.0001, "loss": 1.5482, "step": 993 }, { "epoch": 0.15960179833012203, "grad_norm": 0.2603553533554077, "learning_rate": 0.0001, "loss": 1.6457, "step": 994 }, { "epoch": 0.15976236351958895, "grad_norm": 0.23969604074954987, "learning_rate": 0.0001, "loss": 1.5153, "step": 995 }, { "epoch": 0.1599229287090559, "grad_norm": 0.27515581250190735, "learning_rate": 0.0001, "loss": 1.5772, "step": 996 }, { "epoch": 0.1600834938985228, "grad_norm": 0.2648704946041107, "learning_rate": 0.0001, "loss": 1.587, "step": 997 }, { "epoch": 0.16024405908798972, "grad_norm": 0.27304160594940186, "learning_rate": 0.0001, "loss": 1.6008, "step": 998 }, { "epoch": 0.16040462427745664, "grad_norm": 0.25929275155067444, "learning_rate": 0.0001, "loss": 1.594, "step": 999 }, { "epoch": 0.16056518946692358, "grad_norm": 0.25871768593788147, "learning_rate": 0.0001, "loss": 1.5781, "step": 1000 }, { "epoch": 0.1607257546563905, "grad_norm": 0.2427658885717392, "learning_rate": 0.0001, "loss": 1.5846, "step": 1001 }, { "epoch": 0.1608863198458574, "grad_norm": 0.24906103312969208, "learning_rate": 0.0001, "loss": 1.535, "step": 1002 }, { "epoch": 0.16104688503532436, "grad_norm": 0.24545247852802277, "learning_rate": 0.0001, "loss": 1.5767, "step": 1003 }, { "epoch": 0.16120745022479127, "grad_norm": 0.23803065717220306, "learning_rate": 0.0001, "loss": 1.5541, "step": 1004 }, { "epoch": 0.1613680154142582, "grad_norm": 0.23469966650009155, "learning_rate": 0.0001, "loss": 1.5283, "step": 1005 }, { "epoch": 0.1615285806037251, "grad_norm": 0.24923951923847198, "learning_rate": 0.0001, "loss": 1.5867, "step": 1006 }, { "epoch": 0.16168914579319205, "grad_norm": 0.24812714755535126, "learning_rate": 0.0001, "loss": 1.6495, "step": 1007 }, { "epoch": 0.16184971098265896, "grad_norm": 0.2443220466375351, "learning_rate": 0.0001, "loss": 1.5867, "step": 1008 }, { "epoch": 0.16201027617212588, "grad_norm": 0.2561798095703125, "learning_rate": 0.0001, "loss": 1.5966, "step": 1009 }, { "epoch": 0.1621708413615928, "grad_norm": 0.2338484227657318, "learning_rate": 0.0001, "loss": 1.5094, "step": 1010 }, { "epoch": 0.16233140655105974, "grad_norm": 0.24996957182884216, "learning_rate": 0.0001, "loss": 1.6699, "step": 1011 }, { "epoch": 0.16249197174052665, "grad_norm": 0.231418639421463, "learning_rate": 0.0001, "loss": 1.4153, "step": 1012 }, { "epoch": 0.16265253692999357, "grad_norm": 0.24210630357265472, "learning_rate": 0.0001, "loss": 1.5622, "step": 1013 }, { "epoch": 0.1628131021194605, "grad_norm": 0.239084392786026, "learning_rate": 0.0001, "loss": 1.552, "step": 1014 }, { "epoch": 0.16297366730892743, "grad_norm": 0.2544187009334564, "learning_rate": 0.0001, "loss": 1.6438, "step": 1015 }, { "epoch": 0.16313423249839434, "grad_norm": 0.249491348862648, "learning_rate": 0.0001, "loss": 1.5598, "step": 1016 }, { "epoch": 0.16329479768786126, "grad_norm": 0.24316665530204773, "learning_rate": 0.0001, "loss": 1.5345, "step": 1017 }, { "epoch": 0.1634553628773282, "grad_norm": 0.2577227056026459, "learning_rate": 0.0001, "loss": 1.582, "step": 1018 }, { "epoch": 0.16361592806679512, "grad_norm": 0.2507282793521881, "learning_rate": 0.0001, "loss": 1.5339, "step": 1019 }, { "epoch": 0.16377649325626203, "grad_norm": 0.24784612655639648, "learning_rate": 0.0001, "loss": 1.5851, "step": 1020 }, { "epoch": 0.16393705844572898, "grad_norm": 0.263863205909729, "learning_rate": 0.0001, "loss": 1.618, "step": 1021 }, { "epoch": 0.1640976236351959, "grad_norm": 0.24095465242862701, "learning_rate": 0.0001, "loss": 1.5363, "step": 1022 }, { "epoch": 0.1642581888246628, "grad_norm": 0.2621472179889679, "learning_rate": 0.0001, "loss": 1.6758, "step": 1023 }, { "epoch": 0.16441875401412973, "grad_norm": 0.26015326380729675, "learning_rate": 0.0001, "loss": 1.6516, "step": 1024 }, { "epoch": 0.16457931920359667, "grad_norm": 0.24528542160987854, "learning_rate": 0.0001, "loss": 1.6122, "step": 1025 }, { "epoch": 0.16473988439306358, "grad_norm": 0.25606489181518555, "learning_rate": 0.0001, "loss": 1.6446, "step": 1026 }, { "epoch": 0.1649004495825305, "grad_norm": 0.2509962320327759, "learning_rate": 0.0001, "loss": 1.6436, "step": 1027 }, { "epoch": 0.16506101477199744, "grad_norm": 0.26206785440444946, "learning_rate": 0.0001, "loss": 1.6145, "step": 1028 }, { "epoch": 0.16522157996146436, "grad_norm": 0.2522035539150238, "learning_rate": 0.0001, "loss": 1.6218, "step": 1029 }, { "epoch": 0.16538214515093128, "grad_norm": 1.347176432609558, "learning_rate": 0.0001, "loss": 1.6483, "step": 1030 }, { "epoch": 0.1655427103403982, "grad_norm": 0.2490011751651764, "learning_rate": 0.0001, "loss": 1.612, "step": 1031 }, { "epoch": 0.16570327552986513, "grad_norm": 0.26856088638305664, "learning_rate": 0.0001, "loss": 1.6311, "step": 1032 }, { "epoch": 0.16586384071933205, "grad_norm": 0.2517376244068146, "learning_rate": 0.0001, "loss": 1.6378, "step": 1033 }, { "epoch": 0.16602440590879897, "grad_norm": 0.25993338227272034, "learning_rate": 0.0001, "loss": 1.5903, "step": 1034 }, { "epoch": 0.16618497109826588, "grad_norm": 0.2448819875717163, "learning_rate": 0.0001, "loss": 1.5829, "step": 1035 }, { "epoch": 0.16634553628773283, "grad_norm": 0.24984216690063477, "learning_rate": 0.0001, "loss": 1.5087, "step": 1036 }, { "epoch": 0.16650610147719974, "grad_norm": 0.2472984492778778, "learning_rate": 0.0001, "loss": 1.6002, "step": 1037 }, { "epoch": 0.16666666666666666, "grad_norm": 0.2638058364391327, "learning_rate": 0.0001, "loss": 1.4944, "step": 1038 }, { "epoch": 0.1668272318561336, "grad_norm": 0.2725336253643036, "learning_rate": 0.0001, "loss": 1.6597, "step": 1039 }, { "epoch": 0.16698779704560052, "grad_norm": 0.24750974774360657, "learning_rate": 0.0001, "loss": 1.4934, "step": 1040 }, { "epoch": 0.16714836223506743, "grad_norm": 0.2630518972873688, "learning_rate": 0.0001, "loss": 1.5621, "step": 1041 }, { "epoch": 0.16730892742453435, "grad_norm": 0.2586844861507416, "learning_rate": 0.0001, "loss": 1.6803, "step": 1042 }, { "epoch": 0.1674694926140013, "grad_norm": 0.2508007287979126, "learning_rate": 0.0001, "loss": 1.5383, "step": 1043 }, { "epoch": 0.1676300578034682, "grad_norm": 0.252737432718277, "learning_rate": 0.0001, "loss": 1.5275, "step": 1044 }, { "epoch": 0.16779062299293512, "grad_norm": 0.2449912428855896, "learning_rate": 0.0001, "loss": 1.5562, "step": 1045 }, { "epoch": 0.16795118818240207, "grad_norm": 0.27893170714378357, "learning_rate": 0.0001, "loss": 1.5714, "step": 1046 }, { "epoch": 0.16811175337186898, "grad_norm": 0.2636607587337494, "learning_rate": 0.0001, "loss": 1.6262, "step": 1047 }, { "epoch": 0.1682723185613359, "grad_norm": 0.2528233826160431, "learning_rate": 0.0001, "loss": 1.5882, "step": 1048 }, { "epoch": 0.16843288375080281, "grad_norm": 0.2734587788581848, "learning_rate": 0.0001, "loss": 1.5933, "step": 1049 }, { "epoch": 0.16859344894026976, "grad_norm": 0.26790207624435425, "learning_rate": 0.0001, "loss": 1.5537, "step": 1050 }, { "epoch": 0.16875401412973667, "grad_norm": 0.27698516845703125, "learning_rate": 0.0001, "loss": 1.6398, "step": 1051 }, { "epoch": 0.1689145793192036, "grad_norm": 0.26569440960884094, "learning_rate": 0.0001, "loss": 1.6539, "step": 1052 }, { "epoch": 0.16907514450867053, "grad_norm": 0.24960124492645264, "learning_rate": 0.0001, "loss": 1.5665, "step": 1053 }, { "epoch": 0.16923570969813745, "grad_norm": 0.26630985736846924, "learning_rate": 0.0001, "loss": 1.5947, "step": 1054 }, { "epoch": 0.16939627488760436, "grad_norm": 0.24836736917495728, "learning_rate": 0.0001, "loss": 1.5871, "step": 1055 }, { "epoch": 0.16955684007707128, "grad_norm": 0.2526874244213104, "learning_rate": 0.0001, "loss": 1.5957, "step": 1056 }, { "epoch": 0.16971740526653822, "grad_norm": 0.25865864753723145, "learning_rate": 0.0001, "loss": 1.5824, "step": 1057 }, { "epoch": 0.16987797045600514, "grad_norm": 0.24482880532741547, "learning_rate": 0.0001, "loss": 1.5549, "step": 1058 }, { "epoch": 0.17003853564547206, "grad_norm": 0.3854556381702423, "learning_rate": 0.0001, "loss": 1.6224, "step": 1059 }, { "epoch": 0.170199100834939, "grad_norm": 0.25107231736183167, "learning_rate": 0.0001, "loss": 1.6952, "step": 1060 }, { "epoch": 0.17035966602440591, "grad_norm": 0.2519952654838562, "learning_rate": 0.0001, "loss": 1.5618, "step": 1061 }, { "epoch": 0.17052023121387283, "grad_norm": 0.2582648694515228, "learning_rate": 0.0001, "loss": 1.6372, "step": 1062 }, { "epoch": 0.17068079640333975, "grad_norm": 0.2552756071090698, "learning_rate": 0.0001, "loss": 1.6483, "step": 1063 }, { "epoch": 0.1708413615928067, "grad_norm": 0.261065274477005, "learning_rate": 0.0001, "loss": 1.5369, "step": 1064 }, { "epoch": 0.1710019267822736, "grad_norm": 0.26455509662628174, "learning_rate": 0.0001, "loss": 1.6229, "step": 1065 }, { "epoch": 0.17116249197174052, "grad_norm": 0.26041489839553833, "learning_rate": 0.0001, "loss": 1.642, "step": 1066 }, { "epoch": 0.17132305716120744, "grad_norm": 0.2590771019458771, "learning_rate": 0.0001, "loss": 1.5824, "step": 1067 }, { "epoch": 0.17148362235067438, "grad_norm": 0.32040154933929443, "learning_rate": 0.0001, "loss": 1.5452, "step": 1068 }, { "epoch": 0.1716441875401413, "grad_norm": 44.333099365234375, "learning_rate": 0.0001, "loss": 1.575, "step": 1069 }, { "epoch": 0.1718047527296082, "grad_norm": 0.24626383185386658, "learning_rate": 0.0001, "loss": 1.5173, "step": 1070 }, { "epoch": 0.17196531791907516, "grad_norm": 0.2662312686443329, "learning_rate": 0.0001, "loss": 1.6004, "step": 1071 }, { "epoch": 0.17212588310854207, "grad_norm": 0.31177830696105957, "learning_rate": 0.0001, "loss": 1.6923, "step": 1072 }, { "epoch": 0.172286448298009, "grad_norm": 0.2966936230659485, "learning_rate": 0.0001, "loss": 1.5856, "step": 1073 }, { "epoch": 0.1724470134874759, "grad_norm": 0.2461671680212021, "learning_rate": 0.0001, "loss": 1.553, "step": 1074 }, { "epoch": 0.17260757867694285, "grad_norm": 0.24973897635936737, "learning_rate": 0.0001, "loss": 1.6427, "step": 1075 }, { "epoch": 0.17276814386640976, "grad_norm": 1.4923425912857056, "learning_rate": 0.0001, "loss": 1.627, "step": 1076 }, { "epoch": 0.17292870905587668, "grad_norm": 0.2629109025001526, "learning_rate": 0.0001, "loss": 1.5898, "step": 1077 }, { "epoch": 0.17308927424534362, "grad_norm": 0.2483101487159729, "learning_rate": 0.0001, "loss": 1.5575, "step": 1078 }, { "epoch": 0.17324983943481054, "grad_norm": 0.2540942430496216, "learning_rate": 0.0001, "loss": 1.5033, "step": 1079 }, { "epoch": 0.17341040462427745, "grad_norm": 0.2435886114835739, "learning_rate": 0.0001, "loss": 1.5284, "step": 1080 }, { "epoch": 0.17357096981374437, "grad_norm": 0.2666841447353363, "learning_rate": 0.0001, "loss": 1.6102, "step": 1081 }, { "epoch": 0.1737315350032113, "grad_norm": 0.24547235667705536, "learning_rate": 0.0001, "loss": 1.5151, "step": 1082 }, { "epoch": 0.17389210019267823, "grad_norm": 0.6907444000244141, "learning_rate": 0.0001, "loss": 1.5529, "step": 1083 }, { "epoch": 0.17405266538214514, "grad_norm": 0.8070302605628967, "learning_rate": 0.0001, "loss": 1.6539, "step": 1084 }, { "epoch": 0.1742132305716121, "grad_norm": 0.25271305441856384, "learning_rate": 0.0001, "loss": 1.6012, "step": 1085 }, { "epoch": 0.174373795761079, "grad_norm": 0.2487424910068512, "learning_rate": 0.0001, "loss": 1.4805, "step": 1086 }, { "epoch": 0.17453436095054592, "grad_norm": 0.2643222510814667, "learning_rate": 0.0001, "loss": 1.5592, "step": 1087 }, { "epoch": 0.17469492614001284, "grad_norm": 0.2576553225517273, "learning_rate": 0.0001, "loss": 1.6074, "step": 1088 }, { "epoch": 0.17485549132947978, "grad_norm": 0.2615315318107605, "learning_rate": 0.0001, "loss": 1.5465, "step": 1089 }, { "epoch": 0.1750160565189467, "grad_norm": 0.2399469017982483, "learning_rate": 0.0001, "loss": 1.5739, "step": 1090 }, { "epoch": 0.1751766217084136, "grad_norm": 0.2683163285255432, "learning_rate": 0.0001, "loss": 1.6383, "step": 1091 }, { "epoch": 0.17533718689788053, "grad_norm": 0.2417677938938141, "learning_rate": 0.0001, "loss": 1.4673, "step": 1092 }, { "epoch": 0.17549775208734747, "grad_norm": 0.2879345118999481, "learning_rate": 0.0001, "loss": 1.6886, "step": 1093 }, { "epoch": 0.17565831727681439, "grad_norm": 0.2529812157154083, "learning_rate": 0.0001, "loss": 1.5832, "step": 1094 }, { "epoch": 0.1758188824662813, "grad_norm": 0.24131765961647034, "learning_rate": 0.0001, "loss": 1.4863, "step": 1095 }, { "epoch": 0.17597944765574824, "grad_norm": 0.25778815150260925, "learning_rate": 0.0001, "loss": 1.5694, "step": 1096 }, { "epoch": 0.17614001284521516, "grad_norm": 0.24608175456523895, "learning_rate": 0.0001, "loss": 1.6426, "step": 1097 }, { "epoch": 0.17630057803468208, "grad_norm": 0.2669673562049866, "learning_rate": 0.0001, "loss": 1.621, "step": 1098 }, { "epoch": 0.176461143224149, "grad_norm": 0.25561201572418213, "learning_rate": 0.0001, "loss": 1.5283, "step": 1099 }, { "epoch": 0.17662170841361594, "grad_norm": 0.25698336958885193, "learning_rate": 0.0001, "loss": 1.6021, "step": 1100 }, { "epoch": 0.17678227360308285, "grad_norm": 0.2444210797548294, "learning_rate": 0.0001, "loss": 1.6462, "step": 1101 }, { "epoch": 0.17694283879254977, "grad_norm": 0.26224303245544434, "learning_rate": 0.0001, "loss": 1.582, "step": 1102 }, { "epoch": 0.1771034039820167, "grad_norm": 0.25329750776290894, "learning_rate": 0.0001, "loss": 1.5771, "step": 1103 }, { "epoch": 0.17726396917148363, "grad_norm": 0.2472059726715088, "learning_rate": 0.0001, "loss": 1.5146, "step": 1104 }, { "epoch": 0.17742453436095054, "grad_norm": 0.25361812114715576, "learning_rate": 0.0001, "loss": 1.5983, "step": 1105 }, { "epoch": 0.17758509955041746, "grad_norm": 0.25120607018470764, "learning_rate": 0.0001, "loss": 1.5643, "step": 1106 }, { "epoch": 0.1777456647398844, "grad_norm": 0.2414521723985672, "learning_rate": 0.0001, "loss": 1.539, "step": 1107 }, { "epoch": 0.17790622992935132, "grad_norm": 0.25235995650291443, "learning_rate": 0.0001, "loss": 1.6059, "step": 1108 }, { "epoch": 0.17806679511881823, "grad_norm": 0.252722829580307, "learning_rate": 0.0001, "loss": 1.6828, "step": 1109 }, { "epoch": 0.17822736030828518, "grad_norm": 0.25269708037376404, "learning_rate": 0.0001, "loss": 1.4884, "step": 1110 }, { "epoch": 0.1783879254977521, "grad_norm": 0.25341561436653137, "learning_rate": 0.0001, "loss": 1.5377, "step": 1111 }, { "epoch": 0.178548490687219, "grad_norm": 0.2385052740573883, "learning_rate": 0.0001, "loss": 1.52, "step": 1112 }, { "epoch": 0.17870905587668592, "grad_norm": 0.255615234375, "learning_rate": 0.0001, "loss": 1.5391, "step": 1113 }, { "epoch": 0.17886962106615287, "grad_norm": 0.255342960357666, "learning_rate": 0.0001, "loss": 1.5574, "step": 1114 }, { "epoch": 0.17903018625561978, "grad_norm": 0.2434520423412323, "learning_rate": 0.0001, "loss": 1.5761, "step": 1115 }, { "epoch": 0.1791907514450867, "grad_norm": 0.2581034302711487, "learning_rate": 0.0001, "loss": 1.5969, "step": 1116 }, { "epoch": 0.17935131663455364, "grad_norm": 0.2473067194223404, "learning_rate": 0.0001, "loss": 1.561, "step": 1117 }, { "epoch": 0.17951188182402056, "grad_norm": 0.24469049274921417, "learning_rate": 0.0001, "loss": 1.5901, "step": 1118 }, { "epoch": 0.17967244701348747, "grad_norm": 0.25427860021591187, "learning_rate": 0.0001, "loss": 1.5611, "step": 1119 }, { "epoch": 0.1798330122029544, "grad_norm": 0.24965278804302216, "learning_rate": 0.0001, "loss": 1.5981, "step": 1120 }, { "epoch": 0.17999357739242133, "grad_norm": 0.24722306430339813, "learning_rate": 0.0001, "loss": 1.5614, "step": 1121 }, { "epoch": 0.18015414258188825, "grad_norm": 0.2540864050388336, "learning_rate": 0.0001, "loss": 1.5544, "step": 1122 }, { "epoch": 0.18031470777135516, "grad_norm": 0.2447497397661209, "learning_rate": 0.0001, "loss": 1.5944, "step": 1123 }, { "epoch": 0.18047527296082208, "grad_norm": 0.2738570272922516, "learning_rate": 0.0001, "loss": 1.6893, "step": 1124 }, { "epoch": 0.18063583815028902, "grad_norm": 0.2362639307975769, "learning_rate": 0.0001, "loss": 1.5471, "step": 1125 }, { "epoch": 0.18079640333975594, "grad_norm": 0.2534329295158386, "learning_rate": 0.0001, "loss": 1.616, "step": 1126 }, { "epoch": 0.18095696852922286, "grad_norm": 0.2539317011833191, "learning_rate": 0.0001, "loss": 1.4954, "step": 1127 }, { "epoch": 0.1811175337186898, "grad_norm": 0.24718168377876282, "learning_rate": 0.0001, "loss": 1.6153, "step": 1128 }, { "epoch": 0.18127809890815672, "grad_norm": 0.2623962461948395, "learning_rate": 0.0001, "loss": 1.6629, "step": 1129 }, { "epoch": 0.18143866409762363, "grad_norm": 0.26475292444229126, "learning_rate": 0.0001, "loss": 1.69, "step": 1130 }, { "epoch": 0.18159922928709055, "grad_norm": 0.276611864566803, "learning_rate": 0.0001, "loss": 1.6318, "step": 1131 }, { "epoch": 0.1817597944765575, "grad_norm": 0.24636362493038177, "learning_rate": 0.0001, "loss": 1.5763, "step": 1132 }, { "epoch": 0.1819203596660244, "grad_norm": 0.2588915526866913, "learning_rate": 0.0001, "loss": 1.6406, "step": 1133 }, { "epoch": 0.18208092485549132, "grad_norm": 0.2608543038368225, "learning_rate": 0.0001, "loss": 1.6187, "step": 1134 }, { "epoch": 0.18224149004495827, "grad_norm": 0.26090532541275024, "learning_rate": 0.0001, "loss": 1.5682, "step": 1135 }, { "epoch": 0.18240205523442518, "grad_norm": 0.25060582160949707, "learning_rate": 0.0001, "loss": 1.59, "step": 1136 }, { "epoch": 0.1825626204238921, "grad_norm": 0.2568562626838684, "learning_rate": 0.0001, "loss": 1.6352, "step": 1137 }, { "epoch": 0.182723185613359, "grad_norm": 0.27537429332733154, "learning_rate": 0.0001, "loss": 1.6926, "step": 1138 }, { "epoch": 0.18288375080282596, "grad_norm": 0.25080350041389465, "learning_rate": 0.0001, "loss": 1.614, "step": 1139 }, { "epoch": 0.18304431599229287, "grad_norm": 0.24731677770614624, "learning_rate": 0.0001, "loss": 1.633, "step": 1140 }, { "epoch": 0.1832048811817598, "grad_norm": 0.25230592489242554, "learning_rate": 0.0001, "loss": 1.5854, "step": 1141 }, { "epoch": 0.18336544637122673, "grad_norm": 0.25337666273117065, "learning_rate": 0.0001, "loss": 1.6147, "step": 1142 }, { "epoch": 0.18352601156069365, "grad_norm": 0.24595242738723755, "learning_rate": 0.0001, "loss": 1.5107, "step": 1143 }, { "epoch": 0.18368657675016056, "grad_norm": 0.2588496804237366, "learning_rate": 0.0001, "loss": 1.5937, "step": 1144 }, { "epoch": 0.18384714193962748, "grad_norm": 0.2558797299861908, "learning_rate": 0.0001, "loss": 1.5961, "step": 1145 }, { "epoch": 0.18400770712909442, "grad_norm": 0.25319406390190125, "learning_rate": 0.0001, "loss": 1.663, "step": 1146 }, { "epoch": 0.18416827231856134, "grad_norm": 0.2560921907424927, "learning_rate": 0.0001, "loss": 1.5537, "step": 1147 }, { "epoch": 0.18432883750802825, "grad_norm": 0.25958332419395447, "learning_rate": 0.0001, "loss": 1.6162, "step": 1148 }, { "epoch": 0.18448940269749517, "grad_norm": 0.24979592859745026, "learning_rate": 0.0001, "loss": 1.5961, "step": 1149 }, { "epoch": 0.1846499678869621, "grad_norm": 0.26627790927886963, "learning_rate": 0.0001, "loss": 1.4225, "step": 1150 }, { "epoch": 0.18481053307642903, "grad_norm": 0.25156256556510925, "learning_rate": 0.0001, "loss": 1.5706, "step": 1151 }, { "epoch": 0.18497109826589594, "grad_norm": 0.2600122392177582, "learning_rate": 0.0001, "loss": 1.5701, "step": 1152 }, { "epoch": 0.1851316634553629, "grad_norm": 0.25203970074653625, "learning_rate": 0.0001, "loss": 1.6395, "step": 1153 }, { "epoch": 0.1852922286448298, "grad_norm": 0.2482793778181076, "learning_rate": 0.0001, "loss": 1.5757, "step": 1154 }, { "epoch": 0.18545279383429672, "grad_norm": 0.2505303621292114, "learning_rate": 0.0001, "loss": 1.5482, "step": 1155 }, { "epoch": 0.18561335902376364, "grad_norm": 0.24639439582824707, "learning_rate": 0.0001, "loss": 1.5351, "step": 1156 }, { "epoch": 0.18577392421323058, "grad_norm": 0.2650209367275238, "learning_rate": 0.0001, "loss": 1.4979, "step": 1157 }, { "epoch": 0.1859344894026975, "grad_norm": 0.25111427903175354, "learning_rate": 0.0001, "loss": 1.5307, "step": 1158 }, { "epoch": 0.1860950545921644, "grad_norm": 0.2652316689491272, "learning_rate": 0.0001, "loss": 1.6746, "step": 1159 }, { "epoch": 0.18625561978163135, "grad_norm": 0.24217385053634644, "learning_rate": 0.0001, "loss": 1.5441, "step": 1160 }, { "epoch": 0.18641618497109827, "grad_norm": 0.2778951823711395, "learning_rate": 0.0001, "loss": 1.6431, "step": 1161 }, { "epoch": 0.18657675016056519, "grad_norm": 0.2490694373846054, "learning_rate": 0.0001, "loss": 1.4861, "step": 1162 }, { "epoch": 0.1867373153500321, "grad_norm": 0.2578921616077423, "learning_rate": 0.0001, "loss": 1.5689, "step": 1163 }, { "epoch": 0.18689788053949905, "grad_norm": 0.24337232112884521, "learning_rate": 0.0001, "loss": 1.5358, "step": 1164 }, { "epoch": 0.18705844572896596, "grad_norm": 0.256675660610199, "learning_rate": 0.0001, "loss": 1.5463, "step": 1165 }, { "epoch": 0.18721901091843288, "grad_norm": 0.26293712854385376, "learning_rate": 0.0001, "loss": 1.6198, "step": 1166 }, { "epoch": 0.18737957610789982, "grad_norm": 0.25028562545776367, "learning_rate": 0.0001, "loss": 1.525, "step": 1167 }, { "epoch": 0.18754014129736674, "grad_norm": 0.240972101688385, "learning_rate": 0.0001, "loss": 1.4788, "step": 1168 }, { "epoch": 0.18770070648683365, "grad_norm": 0.26054659485816956, "learning_rate": 0.0001, "loss": 1.6252, "step": 1169 }, { "epoch": 0.18786127167630057, "grad_norm": 0.263796865940094, "learning_rate": 0.0001, "loss": 1.5822, "step": 1170 }, { "epoch": 0.1880218368657675, "grad_norm": 0.2466707080602646, "learning_rate": 0.0001, "loss": 1.5136, "step": 1171 }, { "epoch": 0.18818240205523443, "grad_norm": 0.2484586089849472, "learning_rate": 0.0001, "loss": 1.6043, "step": 1172 }, { "epoch": 0.18834296724470134, "grad_norm": 0.253840833902359, "learning_rate": 0.0001, "loss": 1.6295, "step": 1173 }, { "epoch": 0.18850353243416829, "grad_norm": 0.2369823455810547, "learning_rate": 0.0001, "loss": 1.4692, "step": 1174 }, { "epoch": 0.1886640976236352, "grad_norm": 0.24843278527259827, "learning_rate": 0.0001, "loss": 1.575, "step": 1175 }, { "epoch": 0.18882466281310212, "grad_norm": 0.2453308403491974, "learning_rate": 0.0001, "loss": 1.5405, "step": 1176 }, { "epoch": 0.18898522800256903, "grad_norm": 0.25337234139442444, "learning_rate": 0.0001, "loss": 1.5851, "step": 1177 }, { "epoch": 0.18914579319203598, "grad_norm": 0.2726234197616577, "learning_rate": 0.0001, "loss": 1.6019, "step": 1178 }, { "epoch": 0.1893063583815029, "grad_norm": 0.24739961326122284, "learning_rate": 0.0001, "loss": 1.5996, "step": 1179 }, { "epoch": 0.1894669235709698, "grad_norm": 0.26011112332344055, "learning_rate": 0.0001, "loss": 1.5776, "step": 1180 }, { "epoch": 0.18962748876043672, "grad_norm": 0.267156183719635, "learning_rate": 0.0001, "loss": 1.622, "step": 1181 }, { "epoch": 0.18978805394990367, "grad_norm": 0.24337074160575867, "learning_rate": 0.0001, "loss": 1.5286, "step": 1182 }, { "epoch": 0.18994861913937058, "grad_norm": 0.2607194781303406, "learning_rate": 0.0001, "loss": 1.589, "step": 1183 }, { "epoch": 0.1901091843288375, "grad_norm": 0.25958162546157837, "learning_rate": 0.0001, "loss": 1.6048, "step": 1184 }, { "epoch": 0.19026974951830444, "grad_norm": 0.25642454624176025, "learning_rate": 0.0001, "loss": 1.618, "step": 1185 }, { "epoch": 0.19043031470777136, "grad_norm": 0.25708210468292236, "learning_rate": 0.0001, "loss": 1.6579, "step": 1186 }, { "epoch": 0.19059087989723827, "grad_norm": 0.3139567971229553, "learning_rate": 0.0001, "loss": 1.6066, "step": 1187 }, { "epoch": 0.1907514450867052, "grad_norm": 0.26446476578712463, "learning_rate": 0.0001, "loss": 1.6322, "step": 1188 }, { "epoch": 0.19091201027617213, "grad_norm": 0.25026997923851013, "learning_rate": 0.0001, "loss": 1.601, "step": 1189 }, { "epoch": 0.19107257546563905, "grad_norm": 0.2574523091316223, "learning_rate": 0.0001, "loss": 1.5596, "step": 1190 }, { "epoch": 0.19123314065510597, "grad_norm": 0.2602752149105072, "learning_rate": 0.0001, "loss": 1.5271, "step": 1191 }, { "epoch": 0.1913937058445729, "grad_norm": 0.24762839078903198, "learning_rate": 0.0001, "loss": 1.593, "step": 1192 }, { "epoch": 0.19155427103403982, "grad_norm": 0.2613926827907562, "learning_rate": 0.0001, "loss": 1.6103, "step": 1193 }, { "epoch": 0.19171483622350674, "grad_norm": 0.2612307667732239, "learning_rate": 0.0001, "loss": 1.6969, "step": 1194 }, { "epoch": 0.19187540141297366, "grad_norm": 0.25217312574386597, "learning_rate": 0.0001, "loss": 1.5758, "step": 1195 }, { "epoch": 0.1920359666024406, "grad_norm": 0.244678795337677, "learning_rate": 0.0001, "loss": 1.5781, "step": 1196 }, { "epoch": 0.19219653179190752, "grad_norm": 0.23493874073028564, "learning_rate": 0.0001, "loss": 1.5919, "step": 1197 }, { "epoch": 0.19235709698137443, "grad_norm": 0.2504349648952484, "learning_rate": 0.0001, "loss": 1.5639, "step": 1198 }, { "epoch": 0.19251766217084137, "grad_norm": 0.24955222010612488, "learning_rate": 0.0001, "loss": 1.5091, "step": 1199 }, { "epoch": 0.1926782273603083, "grad_norm": 0.23621833324432373, "learning_rate": 0.0001, "loss": 1.5365, "step": 1200 }, { "epoch": 0.1928387925497752, "grad_norm": 0.24546657502651215, "learning_rate": 0.0001, "loss": 1.5253, "step": 1201 }, { "epoch": 0.19299935773924212, "grad_norm": 0.24631303548812866, "learning_rate": 0.0001, "loss": 1.5587, "step": 1202 }, { "epoch": 0.19315992292870907, "grad_norm": 0.27041104435920715, "learning_rate": 0.0001, "loss": 1.7019, "step": 1203 }, { "epoch": 0.19332048811817598, "grad_norm": 0.2491196244955063, "learning_rate": 0.0001, "loss": 1.5807, "step": 1204 }, { "epoch": 0.1934810533076429, "grad_norm": 0.2602118253707886, "learning_rate": 0.0001, "loss": 1.5792, "step": 1205 }, { "epoch": 0.1936416184971098, "grad_norm": 0.26140204071998596, "learning_rate": 0.0001, "loss": 1.5863, "step": 1206 }, { "epoch": 0.19380218368657676, "grad_norm": 0.25287631154060364, "learning_rate": 0.0001, "loss": 1.6427, "step": 1207 }, { "epoch": 0.19396274887604367, "grad_norm": 0.24496833980083466, "learning_rate": 0.0001, "loss": 1.5007, "step": 1208 }, { "epoch": 0.1941233140655106, "grad_norm": 0.25292402505874634, "learning_rate": 0.0001, "loss": 1.5218, "step": 1209 }, { "epoch": 0.19428387925497753, "grad_norm": 0.2573296129703522, "learning_rate": 0.0001, "loss": 1.6401, "step": 1210 }, { "epoch": 0.19444444444444445, "grad_norm": 0.2588058114051819, "learning_rate": 0.0001, "loss": 1.6037, "step": 1211 }, { "epoch": 0.19460500963391136, "grad_norm": 0.2602480947971344, "learning_rate": 0.0001, "loss": 1.4993, "step": 1212 }, { "epoch": 0.19476557482337828, "grad_norm": 0.24529221653938293, "learning_rate": 0.0001, "loss": 1.606, "step": 1213 }, { "epoch": 0.19492614001284522, "grad_norm": 0.2493453323841095, "learning_rate": 0.0001, "loss": 1.581, "step": 1214 }, { "epoch": 0.19508670520231214, "grad_norm": 0.25749775767326355, "learning_rate": 0.0001, "loss": 1.648, "step": 1215 }, { "epoch": 0.19524727039177905, "grad_norm": 0.24454443156719208, "learning_rate": 0.0001, "loss": 1.5418, "step": 1216 }, { "epoch": 0.195407835581246, "grad_norm": 0.25549542903900146, "learning_rate": 0.0001, "loss": 1.5627, "step": 1217 }, { "epoch": 0.1955684007707129, "grad_norm": 0.25463199615478516, "learning_rate": 0.0001, "loss": 1.6265, "step": 1218 }, { "epoch": 0.19572896596017983, "grad_norm": 0.24837295711040497, "learning_rate": 0.0001, "loss": 1.515, "step": 1219 }, { "epoch": 0.19588953114964675, "grad_norm": 0.24428117275238037, "learning_rate": 0.0001, "loss": 1.6023, "step": 1220 }, { "epoch": 0.1960500963391137, "grad_norm": 0.24188461899757385, "learning_rate": 0.0001, "loss": 1.5239, "step": 1221 }, { "epoch": 0.1962106615285806, "grad_norm": 0.28273800015449524, "learning_rate": 0.0001, "loss": 1.628, "step": 1222 }, { "epoch": 0.19637122671804752, "grad_norm": 0.24625249207019806, "learning_rate": 0.0001, "loss": 1.5282, "step": 1223 }, { "epoch": 0.19653179190751446, "grad_norm": 0.2541029751300812, "learning_rate": 0.0001, "loss": 1.5845, "step": 1224 }, { "epoch": 0.19669235709698138, "grad_norm": 0.23067116737365723, "learning_rate": 0.0001, "loss": 1.4668, "step": 1225 }, { "epoch": 0.1968529222864483, "grad_norm": 0.25950169563293457, "learning_rate": 0.0001, "loss": 1.7031, "step": 1226 }, { "epoch": 0.1970134874759152, "grad_norm": 0.266619473695755, "learning_rate": 0.0001, "loss": 1.6598, "step": 1227 }, { "epoch": 0.19717405266538215, "grad_norm": 0.2611779272556305, "learning_rate": 0.0001, "loss": 1.4975, "step": 1228 }, { "epoch": 0.19733461785484907, "grad_norm": 0.24724836647510529, "learning_rate": 0.0001, "loss": 1.5402, "step": 1229 }, { "epoch": 0.197495183044316, "grad_norm": 0.2501607835292816, "learning_rate": 0.0001, "loss": 1.5692, "step": 1230 }, { "epoch": 0.19765574823378293, "grad_norm": 0.25537505745887756, "learning_rate": 0.0001, "loss": 1.5311, "step": 1231 }, { "epoch": 0.19781631342324985, "grad_norm": 0.265559583902359, "learning_rate": 0.0001, "loss": 1.5564, "step": 1232 }, { "epoch": 0.19797687861271676, "grad_norm": 0.25961583852767944, "learning_rate": 0.0001, "loss": 1.5156, "step": 1233 }, { "epoch": 0.19813744380218368, "grad_norm": 0.2424548864364624, "learning_rate": 0.0001, "loss": 1.4394, "step": 1234 }, { "epoch": 0.19829800899165062, "grad_norm": 0.25589460134506226, "learning_rate": 0.0001, "loss": 1.5529, "step": 1235 }, { "epoch": 0.19845857418111754, "grad_norm": 0.41593801975250244, "learning_rate": 0.0001, "loss": 1.6208, "step": 1236 }, { "epoch": 0.19861913937058445, "grad_norm": 0.2542087733745575, "learning_rate": 0.0001, "loss": 1.5614, "step": 1237 }, { "epoch": 0.19877970456005137, "grad_norm": 0.2820034623146057, "learning_rate": 0.0001, "loss": 1.6148, "step": 1238 }, { "epoch": 0.1989402697495183, "grad_norm": 0.23934297263622284, "learning_rate": 0.0001, "loss": 1.5318, "step": 1239 }, { "epoch": 0.19910083493898523, "grad_norm": 0.2777564227581024, "learning_rate": 0.0001, "loss": 1.555, "step": 1240 }, { "epoch": 0.19926140012845214, "grad_norm": 0.24646690487861633, "learning_rate": 0.0001, "loss": 1.5614, "step": 1241 }, { "epoch": 0.1994219653179191, "grad_norm": 1.28658127784729, "learning_rate": 0.0001, "loss": 1.6237, "step": 1242 }, { "epoch": 0.199582530507386, "grad_norm": 0.2608278691768646, "learning_rate": 0.0001, "loss": 1.5288, "step": 1243 }, { "epoch": 0.19974309569685292, "grad_norm": 0.25941115617752075, "learning_rate": 0.0001, "loss": 1.5484, "step": 1244 }, { "epoch": 0.19990366088631983, "grad_norm": 0.26302090287208557, "learning_rate": 0.0001, "loss": 1.6686, "step": 1245 }, { "epoch": 0.20006422607578678, "grad_norm": 0.28321418166160583, "learning_rate": 0.0001, "loss": 1.6189, "step": 1246 }, { "epoch": 0.2002247912652537, "grad_norm": 0.2480607032775879, "learning_rate": 0.0001, "loss": 1.5462, "step": 1247 }, { "epoch": 0.2003853564547206, "grad_norm": 0.2750486433506012, "learning_rate": 0.0001, "loss": 1.5818, "step": 1248 }, { "epoch": 0.20054592164418755, "grad_norm": 0.25892406702041626, "learning_rate": 0.0001, "loss": 1.4851, "step": 1249 }, { "epoch": 0.20070648683365447, "grad_norm": 0.2393769770860672, "learning_rate": 0.0001, "loss": 1.5491, "step": 1250 }, { "epoch": 0.20086705202312138, "grad_norm": 0.2535555362701416, "learning_rate": 0.0001, "loss": 1.5317, "step": 1251 }, { "epoch": 0.2010276172125883, "grad_norm": 0.25529196858406067, "learning_rate": 0.0001, "loss": 1.5794, "step": 1252 }, { "epoch": 0.20118818240205524, "grad_norm": 0.25667497515678406, "learning_rate": 0.0001, "loss": 1.6444, "step": 1253 }, { "epoch": 0.20134874759152216, "grad_norm": 0.2479962259531021, "learning_rate": 0.0001, "loss": 1.5517, "step": 1254 }, { "epoch": 0.20150931278098907, "grad_norm": 0.28191882371902466, "learning_rate": 0.0001, "loss": 1.6213, "step": 1255 }, { "epoch": 0.20166987797045602, "grad_norm": 0.27171066403388977, "learning_rate": 0.0001, "loss": 1.6614, "step": 1256 }, { "epoch": 0.20183044315992293, "grad_norm": 0.2494252473115921, "learning_rate": 0.0001, "loss": 1.5451, "step": 1257 }, { "epoch": 0.20199100834938985, "grad_norm": 0.24599884450435638, "learning_rate": 0.0001, "loss": 1.5817, "step": 1258 }, { "epoch": 0.20215157353885677, "grad_norm": 0.2512761950492859, "learning_rate": 0.0001, "loss": 1.5855, "step": 1259 }, { "epoch": 0.2023121387283237, "grad_norm": 0.24004778265953064, "learning_rate": 0.0001, "loss": 1.5994, "step": 1260 }, { "epoch": 0.20247270391779063, "grad_norm": 0.25274306535720825, "learning_rate": 0.0001, "loss": 1.5803, "step": 1261 }, { "epoch": 0.20263326910725754, "grad_norm": 0.2604442238807678, "learning_rate": 0.0001, "loss": 1.6233, "step": 1262 }, { "epoch": 0.20279383429672446, "grad_norm": 0.2535599172115326, "learning_rate": 0.0001, "loss": 1.6, "step": 1263 }, { "epoch": 0.2029543994861914, "grad_norm": 0.27341902256011963, "learning_rate": 0.0001, "loss": 1.5431, "step": 1264 }, { "epoch": 0.20311496467565832, "grad_norm": 0.24905379116535187, "learning_rate": 0.0001, "loss": 1.5125, "step": 1265 }, { "epoch": 0.20327552986512523, "grad_norm": 0.24562866985797882, "learning_rate": 0.0001, "loss": 1.5394, "step": 1266 }, { "epoch": 0.20343609505459218, "grad_norm": 0.26494455337524414, "learning_rate": 0.0001, "loss": 1.5749, "step": 1267 }, { "epoch": 0.2035966602440591, "grad_norm": 0.24077832698822021, "learning_rate": 0.0001, "loss": 1.6049, "step": 1268 }, { "epoch": 0.203757225433526, "grad_norm": 0.2567788362503052, "learning_rate": 0.0001, "loss": 1.5985, "step": 1269 }, { "epoch": 0.20391779062299292, "grad_norm": 0.23835213482379913, "learning_rate": 0.0001, "loss": 1.5971, "step": 1270 }, { "epoch": 0.20407835581245987, "grad_norm": 0.2541971802711487, "learning_rate": 0.0001, "loss": 1.5794, "step": 1271 }, { "epoch": 0.20423892100192678, "grad_norm": 0.24991245567798615, "learning_rate": 0.0001, "loss": 1.5234, "step": 1272 }, { "epoch": 0.2043994861913937, "grad_norm": 0.2575179636478424, "learning_rate": 0.0001, "loss": 1.6103, "step": 1273 }, { "epoch": 0.20456005138086064, "grad_norm": 0.2621893584728241, "learning_rate": 0.0001, "loss": 1.6919, "step": 1274 }, { "epoch": 0.20472061657032756, "grad_norm": 0.2681543827056885, "learning_rate": 0.0001, "loss": 1.5648, "step": 1275 }, { "epoch": 0.20488118175979447, "grad_norm": 0.25278764963150024, "learning_rate": 0.0001, "loss": 1.5622, "step": 1276 }, { "epoch": 0.2050417469492614, "grad_norm": 0.24915283918380737, "learning_rate": 0.0001, "loss": 1.6034, "step": 1277 }, { "epoch": 0.20520231213872833, "grad_norm": 0.2452990710735321, "learning_rate": 0.0001, "loss": 1.557, "step": 1278 }, { "epoch": 0.20536287732819525, "grad_norm": 0.2584128677845001, "learning_rate": 0.0001, "loss": 1.5505, "step": 1279 }, { "epoch": 0.20552344251766216, "grad_norm": 0.2779355049133301, "learning_rate": 0.0001, "loss": 1.6823, "step": 1280 }, { "epoch": 0.2056840077071291, "grad_norm": 0.2508663535118103, "learning_rate": 0.0001, "loss": 1.5681, "step": 1281 }, { "epoch": 0.20584457289659602, "grad_norm": 0.2578074336051941, "learning_rate": 0.0001, "loss": 1.5981, "step": 1282 }, { "epoch": 0.20600513808606294, "grad_norm": 0.2573000192642212, "learning_rate": 0.0001, "loss": 1.5426, "step": 1283 }, { "epoch": 0.20616570327552985, "grad_norm": 0.24457018077373505, "learning_rate": 0.0001, "loss": 1.5179, "step": 1284 }, { "epoch": 0.2063262684649968, "grad_norm": 0.24787230789661407, "learning_rate": 0.0001, "loss": 1.5257, "step": 1285 }, { "epoch": 0.20648683365446371, "grad_norm": 0.2664269804954529, "learning_rate": 0.0001, "loss": 1.6439, "step": 1286 }, { "epoch": 0.20664739884393063, "grad_norm": 0.2581881284713745, "learning_rate": 0.0001, "loss": 1.5825, "step": 1287 }, { "epoch": 0.20680796403339757, "grad_norm": 0.3322311043739319, "learning_rate": 0.0001, "loss": 1.5597, "step": 1288 }, { "epoch": 0.2069685292228645, "grad_norm": 0.26221877336502075, "learning_rate": 0.0001, "loss": 1.6434, "step": 1289 }, { "epoch": 0.2071290944123314, "grad_norm": 0.23221048712730408, "learning_rate": 0.0001, "loss": 1.5598, "step": 1290 }, { "epoch": 0.20728965960179832, "grad_norm": 0.2560206353664398, "learning_rate": 0.0001, "loss": 1.5869, "step": 1291 }, { "epoch": 0.20745022479126526, "grad_norm": 0.26476046442985535, "learning_rate": 0.0001, "loss": 1.5432, "step": 1292 }, { "epoch": 0.20761078998073218, "grad_norm": 0.24677133560180664, "learning_rate": 0.0001, "loss": 1.5761, "step": 1293 }, { "epoch": 0.2077713551701991, "grad_norm": 0.2672979533672333, "learning_rate": 0.0001, "loss": 1.6243, "step": 1294 }, { "epoch": 0.207931920359666, "grad_norm": 0.25498655438423157, "learning_rate": 0.0001, "loss": 1.5831, "step": 1295 }, { "epoch": 0.20809248554913296, "grad_norm": 0.27964186668395996, "learning_rate": 0.0001, "loss": 1.6878, "step": 1296 }, { "epoch": 0.20825305073859987, "grad_norm": 0.26787513494491577, "learning_rate": 0.0001, "loss": 1.601, "step": 1297 }, { "epoch": 0.2084136159280668, "grad_norm": 0.2635231912136078, "learning_rate": 0.0001, "loss": 1.6095, "step": 1298 }, { "epoch": 0.20857418111753373, "grad_norm": 0.2523629367351532, "learning_rate": 0.0001, "loss": 1.568, "step": 1299 }, { "epoch": 0.20873474630700065, "grad_norm": 0.2625834047794342, "learning_rate": 0.0001, "loss": 1.5783, "step": 1300 }, { "epoch": 0.20889531149646756, "grad_norm": 0.2797095775604248, "learning_rate": 0.0001, "loss": 1.6573, "step": 1301 }, { "epoch": 0.20905587668593448, "grad_norm": 0.25999319553375244, "learning_rate": 0.0001, "loss": 1.5702, "step": 1302 }, { "epoch": 0.20921644187540142, "grad_norm": 0.2916666865348816, "learning_rate": 0.0001, "loss": 1.5995, "step": 1303 }, { "epoch": 0.20937700706486834, "grad_norm": 0.28023776412010193, "learning_rate": 0.0001, "loss": 1.5945, "step": 1304 }, { "epoch": 0.20953757225433525, "grad_norm": 0.2535582184791565, "learning_rate": 0.0001, "loss": 1.5452, "step": 1305 }, { "epoch": 0.2096981374438022, "grad_norm": 0.2611648142337799, "learning_rate": 0.0001, "loss": 1.5214, "step": 1306 }, { "epoch": 0.2098587026332691, "grad_norm": 0.27532505989074707, "learning_rate": 0.0001, "loss": 1.6111, "step": 1307 }, { "epoch": 0.21001926782273603, "grad_norm": 0.2723951041698456, "learning_rate": 0.0001, "loss": 1.636, "step": 1308 }, { "epoch": 0.21017983301220294, "grad_norm": 0.24837726354599, "learning_rate": 0.0001, "loss": 1.6595, "step": 1309 }, { "epoch": 0.2103403982016699, "grad_norm": 0.26135581731796265, "learning_rate": 0.0001, "loss": 1.6177, "step": 1310 }, { "epoch": 0.2105009633911368, "grad_norm": 0.27552881836891174, "learning_rate": 0.0001, "loss": 1.6029, "step": 1311 }, { "epoch": 0.21066152858060372, "grad_norm": 0.7178780436515808, "learning_rate": 0.0001, "loss": 1.5322, "step": 1312 }, { "epoch": 0.21082209377007066, "grad_norm": 0.2589340806007385, "learning_rate": 0.0001, "loss": 1.6379, "step": 1313 }, { "epoch": 0.21098265895953758, "grad_norm": 0.26356926560401917, "learning_rate": 0.0001, "loss": 1.6699, "step": 1314 }, { "epoch": 0.2111432241490045, "grad_norm": 0.2632535994052887, "learning_rate": 0.0001, "loss": 1.611, "step": 1315 }, { "epoch": 0.2113037893384714, "grad_norm": 0.2623354494571686, "learning_rate": 0.0001, "loss": 1.55, "step": 1316 }, { "epoch": 0.21146435452793835, "grad_norm": 0.2578772008419037, "learning_rate": 0.0001, "loss": 1.5774, "step": 1317 }, { "epoch": 0.21162491971740527, "grad_norm": 0.26975879073143005, "learning_rate": 0.0001, "loss": 1.6416, "step": 1318 }, { "epoch": 0.21178548490687218, "grad_norm": 0.28247830271720886, "learning_rate": 0.0001, "loss": 1.6189, "step": 1319 }, { "epoch": 0.2119460500963391, "grad_norm": 0.24312029778957367, "learning_rate": 0.0001, "loss": 1.5905, "step": 1320 }, { "epoch": 0.21210661528580604, "grad_norm": 0.26209938526153564, "learning_rate": 0.0001, "loss": 1.6204, "step": 1321 }, { "epoch": 0.21226718047527296, "grad_norm": 0.262643039226532, "learning_rate": 0.0001, "loss": 1.6566, "step": 1322 }, { "epoch": 0.21242774566473988, "grad_norm": 0.25319334864616394, "learning_rate": 0.0001, "loss": 1.5506, "step": 1323 }, { "epoch": 0.21258831085420682, "grad_norm": 0.2527086138725281, "learning_rate": 0.0001, "loss": 1.5746, "step": 1324 }, { "epoch": 0.21274887604367373, "grad_norm": 0.24679316580295563, "learning_rate": 0.0001, "loss": 1.5769, "step": 1325 }, { "epoch": 0.21290944123314065, "grad_norm": 0.6807023882865906, "learning_rate": 0.0001, "loss": 1.5954, "step": 1326 }, { "epoch": 0.21307000642260757, "grad_norm": 0.25651562213897705, "learning_rate": 0.0001, "loss": 1.5962, "step": 1327 }, { "epoch": 0.2132305716120745, "grad_norm": 0.27083685994148254, "learning_rate": 0.0001, "loss": 1.5687, "step": 1328 }, { "epoch": 0.21339113680154143, "grad_norm": 0.26416775584220886, "learning_rate": 0.0001, "loss": 1.6133, "step": 1329 }, { "epoch": 0.21355170199100834, "grad_norm": 0.2596018612384796, "learning_rate": 0.0001, "loss": 1.6272, "step": 1330 }, { "epoch": 0.21371226718047528, "grad_norm": 0.2947336733341217, "learning_rate": 0.0001, "loss": 1.5521, "step": 1331 }, { "epoch": 0.2138728323699422, "grad_norm": 0.2585946023464203, "learning_rate": 0.0001, "loss": 1.5276, "step": 1332 }, { "epoch": 0.21403339755940912, "grad_norm": 0.24610991775989532, "learning_rate": 0.0001, "loss": 1.5619, "step": 1333 }, { "epoch": 0.21419396274887603, "grad_norm": 0.2546312212944031, "learning_rate": 0.0001, "loss": 1.5592, "step": 1334 }, { "epoch": 0.21435452793834298, "grad_norm": 0.25936055183410645, "learning_rate": 0.0001, "loss": 1.5805, "step": 1335 }, { "epoch": 0.2145150931278099, "grad_norm": 0.26311784982681274, "learning_rate": 0.0001, "loss": 1.6176, "step": 1336 }, { "epoch": 0.2146756583172768, "grad_norm": 0.2564358115196228, "learning_rate": 0.0001, "loss": 1.6126, "step": 1337 }, { "epoch": 0.21483622350674375, "grad_norm": 0.25600630044937134, "learning_rate": 0.0001, "loss": 1.5878, "step": 1338 }, { "epoch": 0.21499678869621067, "grad_norm": 0.24495278298854828, "learning_rate": 0.0001, "loss": 1.5802, "step": 1339 }, { "epoch": 0.21515735388567758, "grad_norm": 0.24395935237407684, "learning_rate": 0.0001, "loss": 1.578, "step": 1340 }, { "epoch": 0.2153179190751445, "grad_norm": 0.25721320509910583, "learning_rate": 0.0001, "loss": 1.6087, "step": 1341 }, { "epoch": 0.21547848426461144, "grad_norm": 0.2649543583393097, "learning_rate": 0.0001, "loss": 1.5431, "step": 1342 }, { "epoch": 0.21563904945407836, "grad_norm": 0.2546660304069519, "learning_rate": 0.0001, "loss": 1.5543, "step": 1343 }, { "epoch": 0.21579961464354527, "grad_norm": 0.26365596055984497, "learning_rate": 0.0001, "loss": 1.5639, "step": 1344 }, { "epoch": 0.2159601798330122, "grad_norm": 0.24007868766784668, "learning_rate": 0.0001, "loss": 1.5436, "step": 1345 }, { "epoch": 0.21612074502247913, "grad_norm": 0.24529314041137695, "learning_rate": 0.0001, "loss": 1.5986, "step": 1346 }, { "epoch": 0.21628131021194605, "grad_norm": 0.27236080169677734, "learning_rate": 0.0001, "loss": 1.6892, "step": 1347 }, { "epoch": 0.21644187540141296, "grad_norm": 0.26769787073135376, "learning_rate": 0.0001, "loss": 1.5531, "step": 1348 }, { "epoch": 0.2166024405908799, "grad_norm": 0.23053252696990967, "learning_rate": 0.0001, "loss": 1.5129, "step": 1349 }, { "epoch": 0.21676300578034682, "grad_norm": 0.3684787452220917, "learning_rate": 0.0001, "loss": 1.5096, "step": 1350 }, { "epoch": 0.21692357096981374, "grad_norm": 0.2830764055252075, "learning_rate": 0.0001, "loss": 1.6556, "step": 1351 }, { "epoch": 0.21708413615928066, "grad_norm": 0.2534649670124054, "learning_rate": 0.0001, "loss": 1.4971, "step": 1352 }, { "epoch": 0.2172447013487476, "grad_norm": 0.2649475336074829, "learning_rate": 0.0001, "loss": 1.5548, "step": 1353 }, { "epoch": 0.21740526653821451, "grad_norm": 0.2695215046405792, "learning_rate": 0.0001, "loss": 1.5673, "step": 1354 }, { "epoch": 0.21756583172768143, "grad_norm": 0.24033614993095398, "learning_rate": 0.0001, "loss": 1.5446, "step": 1355 }, { "epoch": 0.21772639691714837, "grad_norm": 0.2498067319393158, "learning_rate": 0.0001, "loss": 1.5808, "step": 1356 }, { "epoch": 0.2178869621066153, "grad_norm": 0.2555305063724518, "learning_rate": 0.0001, "loss": 1.5075, "step": 1357 }, { "epoch": 0.2180475272960822, "grad_norm": 0.24671803414821625, "learning_rate": 0.0001, "loss": 1.5241, "step": 1358 }, { "epoch": 0.21820809248554912, "grad_norm": 0.25955885648727417, "learning_rate": 0.0001, "loss": 1.6222, "step": 1359 }, { "epoch": 0.21836865767501606, "grad_norm": 0.2679576873779297, "learning_rate": 0.0001, "loss": 1.6631, "step": 1360 }, { "epoch": 0.21852922286448298, "grad_norm": 0.26426801085472107, "learning_rate": 0.0001, "loss": 1.516, "step": 1361 }, { "epoch": 0.2186897880539499, "grad_norm": 0.25338250398635864, "learning_rate": 0.0001, "loss": 1.6029, "step": 1362 }, { "epoch": 0.21885035324341684, "grad_norm": 0.27184170484542847, "learning_rate": 0.0001, "loss": 1.6391, "step": 1363 }, { "epoch": 0.21901091843288376, "grad_norm": 0.25309568643569946, "learning_rate": 0.0001, "loss": 1.4754, "step": 1364 }, { "epoch": 0.21917148362235067, "grad_norm": 0.38762274384498596, "learning_rate": 0.0001, "loss": 1.5477, "step": 1365 }, { "epoch": 0.2193320488118176, "grad_norm": 0.2547190189361572, "learning_rate": 0.0001, "loss": 1.5877, "step": 1366 }, { "epoch": 0.21949261400128453, "grad_norm": 0.24988453090190887, "learning_rate": 0.0001, "loss": 1.5625, "step": 1367 }, { "epoch": 0.21965317919075145, "grad_norm": 0.25075602531433105, "learning_rate": 0.0001, "loss": 1.4408, "step": 1368 }, { "epoch": 0.21981374438021836, "grad_norm": 0.25572770833969116, "learning_rate": 0.0001, "loss": 1.6669, "step": 1369 }, { "epoch": 0.2199743095696853, "grad_norm": 0.24106429517269135, "learning_rate": 0.0001, "loss": 1.4878, "step": 1370 }, { "epoch": 0.22013487475915222, "grad_norm": 0.25900036096572876, "learning_rate": 0.0001, "loss": 1.5423, "step": 1371 }, { "epoch": 0.22029543994861914, "grad_norm": 0.25564947724342346, "learning_rate": 0.0001, "loss": 1.6369, "step": 1372 }, { "epoch": 0.22045600513808605, "grad_norm": 0.24209898710250854, "learning_rate": 0.0001, "loss": 1.5761, "step": 1373 }, { "epoch": 0.220616570327553, "grad_norm": 0.2538258135318756, "learning_rate": 0.0001, "loss": 1.5493, "step": 1374 }, { "epoch": 0.2207771355170199, "grad_norm": 0.25893691182136536, "learning_rate": 0.0001, "loss": 1.5965, "step": 1375 }, { "epoch": 0.22093770070648683, "grad_norm": 0.2606137990951538, "learning_rate": 0.0001, "loss": 1.57, "step": 1376 }, { "epoch": 0.22109826589595374, "grad_norm": 0.24876049160957336, "learning_rate": 0.0001, "loss": 1.5434, "step": 1377 }, { "epoch": 0.2212588310854207, "grad_norm": 0.24675314128398895, "learning_rate": 0.0001, "loss": 1.5311, "step": 1378 }, { "epoch": 0.2214193962748876, "grad_norm": 0.2675689160823822, "learning_rate": 0.0001, "loss": 1.6377, "step": 1379 }, { "epoch": 0.22157996146435452, "grad_norm": 0.25446370244026184, "learning_rate": 0.0001, "loss": 1.5623, "step": 1380 }, { "epoch": 0.22174052665382146, "grad_norm": 0.35192978382110596, "learning_rate": 0.0001, "loss": 1.5927, "step": 1381 }, { "epoch": 0.22190109184328838, "grad_norm": 0.26166224479675293, "learning_rate": 0.0001, "loss": 1.7022, "step": 1382 }, { "epoch": 0.2220616570327553, "grad_norm": 0.24951639771461487, "learning_rate": 0.0001, "loss": 1.6084, "step": 1383 }, { "epoch": 0.2222222222222222, "grad_norm": 0.2615184783935547, "learning_rate": 0.0001, "loss": 1.5301, "step": 1384 }, { "epoch": 0.22238278741168915, "grad_norm": 0.2507842481136322, "learning_rate": 0.0001, "loss": 1.5129, "step": 1385 }, { "epoch": 0.22254335260115607, "grad_norm": 0.24396218359470367, "learning_rate": 0.0001, "loss": 1.5362, "step": 1386 }, { "epoch": 0.22270391779062298, "grad_norm": 0.25023460388183594, "learning_rate": 0.0001, "loss": 1.5818, "step": 1387 }, { "epoch": 0.22286448298008993, "grad_norm": 0.26676830649375916, "learning_rate": 0.0001, "loss": 1.5556, "step": 1388 }, { "epoch": 0.22302504816955684, "grad_norm": 0.2628074586391449, "learning_rate": 0.0001, "loss": 1.584, "step": 1389 }, { "epoch": 0.22318561335902376, "grad_norm": 0.24453000724315643, "learning_rate": 0.0001, "loss": 1.5694, "step": 1390 }, { "epoch": 0.22334617854849068, "grad_norm": 0.24808208644390106, "learning_rate": 0.0001, "loss": 1.5199, "step": 1391 }, { "epoch": 0.22350674373795762, "grad_norm": 0.25541916489601135, "learning_rate": 0.0001, "loss": 1.5991, "step": 1392 }, { "epoch": 0.22366730892742454, "grad_norm": 0.25001993775367737, "learning_rate": 0.0001, "loss": 1.5395, "step": 1393 }, { "epoch": 0.22382787411689145, "grad_norm": 0.24925601482391357, "learning_rate": 0.0001, "loss": 1.5594, "step": 1394 }, { "epoch": 0.2239884393063584, "grad_norm": 0.24612492322921753, "learning_rate": 0.0001, "loss": 1.5818, "step": 1395 }, { "epoch": 0.2241490044958253, "grad_norm": 0.25647103786468506, "learning_rate": 0.0001, "loss": 1.6169, "step": 1396 }, { "epoch": 0.22430956968529223, "grad_norm": 0.25585880875587463, "learning_rate": 0.0001, "loss": 1.5923, "step": 1397 }, { "epoch": 0.22447013487475914, "grad_norm": 0.25814953446388245, "learning_rate": 0.0001, "loss": 1.6253, "step": 1398 }, { "epoch": 0.22463070006422609, "grad_norm": 0.2706148624420166, "learning_rate": 0.0001, "loss": 1.6775, "step": 1399 }, { "epoch": 0.224791265253693, "grad_norm": 0.24915863573551178, "learning_rate": 0.0001, "loss": 1.4988, "step": 1400 }, { "epoch": 0.22495183044315992, "grad_norm": 0.25413191318511963, "learning_rate": 0.0001, "loss": 1.5438, "step": 1401 }, { "epoch": 0.22511239563262683, "grad_norm": 0.25795644521713257, "learning_rate": 0.0001, "loss": 1.6046, "step": 1402 }, { "epoch": 0.22527296082209378, "grad_norm": 0.24239666759967804, "learning_rate": 0.0001, "loss": 1.5229, "step": 1403 }, { "epoch": 0.2254335260115607, "grad_norm": 0.2521439492702484, "learning_rate": 0.0001, "loss": 1.5751, "step": 1404 }, { "epoch": 0.2255940912010276, "grad_norm": 0.25757378339767456, "learning_rate": 0.0001, "loss": 1.5828, "step": 1405 }, { "epoch": 0.22575465639049455, "grad_norm": 0.24728292226791382, "learning_rate": 0.0001, "loss": 1.5471, "step": 1406 }, { "epoch": 0.22591522157996147, "grad_norm": 0.2591513693332672, "learning_rate": 0.0001, "loss": 1.6014, "step": 1407 }, { "epoch": 0.22607578676942838, "grad_norm": 0.2557453513145447, "learning_rate": 0.0001, "loss": 1.5125, "step": 1408 }, { "epoch": 0.2262363519588953, "grad_norm": 0.24972213804721832, "learning_rate": 0.0001, "loss": 1.56, "step": 1409 }, { "epoch": 0.22639691714836224, "grad_norm": 0.2518574297428131, "learning_rate": 0.0001, "loss": 1.5866, "step": 1410 }, { "epoch": 0.22655748233782916, "grad_norm": 0.2644212245941162, "learning_rate": 0.0001, "loss": 1.505, "step": 1411 }, { "epoch": 0.22671804752729607, "grad_norm": 0.25966617465019226, "learning_rate": 0.0001, "loss": 1.6544, "step": 1412 }, { "epoch": 0.22687861271676302, "grad_norm": 0.25631988048553467, "learning_rate": 0.0001, "loss": 1.5058, "step": 1413 }, { "epoch": 0.22703917790622993, "grad_norm": 0.2735203504562378, "learning_rate": 0.0001, "loss": 1.5849, "step": 1414 }, { "epoch": 0.22719974309569685, "grad_norm": 0.24585656821727753, "learning_rate": 0.0001, "loss": 1.6229, "step": 1415 }, { "epoch": 0.22736030828516376, "grad_norm": 0.2850990891456604, "learning_rate": 0.0001, "loss": 1.6063, "step": 1416 }, { "epoch": 0.2275208734746307, "grad_norm": 0.23922717571258545, "learning_rate": 0.0001, "loss": 1.4942, "step": 1417 }, { "epoch": 0.22768143866409762, "grad_norm": 0.24667245149612427, "learning_rate": 0.0001, "loss": 1.5835, "step": 1418 }, { "epoch": 0.22784200385356454, "grad_norm": 0.2509988248348236, "learning_rate": 0.0001, "loss": 1.5977, "step": 1419 }, { "epoch": 0.22800256904303148, "grad_norm": 0.24224527180194855, "learning_rate": 0.0001, "loss": 1.5217, "step": 1420 }, { "epoch": 0.2281631342324984, "grad_norm": 0.25608593225479126, "learning_rate": 0.0001, "loss": 1.6166, "step": 1421 }, { "epoch": 0.22832369942196531, "grad_norm": 0.2535424828529358, "learning_rate": 0.0001, "loss": 1.6606, "step": 1422 }, { "epoch": 0.22848426461143223, "grad_norm": 3.90877628326416, "learning_rate": 0.0001, "loss": 1.6422, "step": 1423 }, { "epoch": 0.22864482980089917, "grad_norm": 0.24865517020225525, "learning_rate": 0.0001, "loss": 1.6414, "step": 1424 }, { "epoch": 0.2288053949903661, "grad_norm": 0.2509647607803345, "learning_rate": 0.0001, "loss": 1.5995, "step": 1425 }, { "epoch": 0.228965960179833, "grad_norm": 0.25810936093330383, "learning_rate": 0.0001, "loss": 1.6074, "step": 1426 }, { "epoch": 0.22912652536929995, "grad_norm": 0.2706359326839447, "learning_rate": 0.0001, "loss": 1.5623, "step": 1427 }, { "epoch": 0.22928709055876687, "grad_norm": 0.2858353853225708, "learning_rate": 0.0001, "loss": 1.6383, "step": 1428 }, { "epoch": 0.22944765574823378, "grad_norm": 0.26110613346099854, "learning_rate": 0.0001, "loss": 1.6075, "step": 1429 }, { "epoch": 0.2296082209377007, "grad_norm": 0.25887665152549744, "learning_rate": 0.0001, "loss": 1.5545, "step": 1430 }, { "epoch": 0.22976878612716764, "grad_norm": 0.25360432267189026, "learning_rate": 0.0001, "loss": 1.6354, "step": 1431 }, { "epoch": 0.22992935131663456, "grad_norm": 0.24859270453453064, "learning_rate": 0.0001, "loss": 1.5574, "step": 1432 }, { "epoch": 0.23008991650610147, "grad_norm": 0.24973681569099426, "learning_rate": 0.0001, "loss": 1.5883, "step": 1433 }, { "epoch": 0.2302504816955684, "grad_norm": 0.2463744878768921, "learning_rate": 0.0001, "loss": 1.5352, "step": 1434 }, { "epoch": 0.23041104688503533, "grad_norm": 0.2605551779270172, "learning_rate": 0.0001, "loss": 1.5923, "step": 1435 }, { "epoch": 0.23057161207450225, "grad_norm": 0.2524442970752716, "learning_rate": 0.0001, "loss": 1.5501, "step": 1436 }, { "epoch": 0.23073217726396916, "grad_norm": 0.26319846510887146, "learning_rate": 0.0001, "loss": 1.6097, "step": 1437 }, { "epoch": 0.2308927424534361, "grad_norm": 0.2604261636734009, "learning_rate": 0.0001, "loss": 1.6013, "step": 1438 }, { "epoch": 0.23105330764290302, "grad_norm": 0.2449166178703308, "learning_rate": 0.0001, "loss": 1.4734, "step": 1439 }, { "epoch": 0.23121387283236994, "grad_norm": 0.2389906495809555, "learning_rate": 0.0001, "loss": 1.5959, "step": 1440 }, { "epoch": 0.23137443802183685, "grad_norm": 0.26003342866897583, "learning_rate": 0.0001, "loss": 1.6533, "step": 1441 }, { "epoch": 0.2315350032113038, "grad_norm": 0.2461751103401184, "learning_rate": 0.0001, "loss": 1.5297, "step": 1442 }, { "epoch": 0.2316955684007707, "grad_norm": 0.25976449251174927, "learning_rate": 0.0001, "loss": 1.5842, "step": 1443 }, { "epoch": 0.23185613359023763, "grad_norm": 0.24590584635734558, "learning_rate": 0.0001, "loss": 1.4882, "step": 1444 }, { "epoch": 0.23201669877970457, "grad_norm": 0.25528788566589355, "learning_rate": 0.0001, "loss": 1.5965, "step": 1445 }, { "epoch": 0.2321772639691715, "grad_norm": 0.2836610674858093, "learning_rate": 0.0001, "loss": 1.6253, "step": 1446 }, { "epoch": 0.2323378291586384, "grad_norm": 0.24190182983875275, "learning_rate": 0.0001, "loss": 1.5365, "step": 1447 }, { "epoch": 0.23249839434810532, "grad_norm": 0.2515237033367157, "learning_rate": 0.0001, "loss": 1.6431, "step": 1448 }, { "epoch": 0.23265895953757226, "grad_norm": 0.24769434332847595, "learning_rate": 0.0001, "loss": 1.5132, "step": 1449 }, { "epoch": 0.23281952472703918, "grad_norm": 0.24459420144557953, "learning_rate": 0.0001, "loss": 1.5509, "step": 1450 }, { "epoch": 0.2329800899165061, "grad_norm": 0.2621827721595764, "learning_rate": 0.0001, "loss": 1.5563, "step": 1451 }, { "epoch": 0.23314065510597304, "grad_norm": 0.27197253704071045, "learning_rate": 0.0001, "loss": 1.5884, "step": 1452 }, { "epoch": 0.23330122029543995, "grad_norm": 0.2468864619731903, "learning_rate": 0.0001, "loss": 1.594, "step": 1453 }, { "epoch": 0.23346178548490687, "grad_norm": 0.25369641184806824, "learning_rate": 0.0001, "loss": 1.5619, "step": 1454 }, { "epoch": 0.23362235067437379, "grad_norm": 0.250773549079895, "learning_rate": 0.0001, "loss": 1.5888, "step": 1455 }, { "epoch": 0.23378291586384073, "grad_norm": 0.23992547392845154, "learning_rate": 0.0001, "loss": 1.5831, "step": 1456 }, { "epoch": 0.23394348105330764, "grad_norm": 0.2697206139564514, "learning_rate": 0.0001, "loss": 1.5004, "step": 1457 }, { "epoch": 0.23410404624277456, "grad_norm": 0.25312143564224243, "learning_rate": 0.0001, "loss": 1.578, "step": 1458 }, { "epoch": 0.23426461143224148, "grad_norm": 0.256674587726593, "learning_rate": 0.0001, "loss": 1.5219, "step": 1459 }, { "epoch": 0.23442517662170842, "grad_norm": 0.25566184520721436, "learning_rate": 0.0001, "loss": 1.5939, "step": 1460 }, { "epoch": 0.23458574181117534, "grad_norm": 0.24389982223510742, "learning_rate": 0.0001, "loss": 1.5649, "step": 1461 }, { "epoch": 0.23474630700064225, "grad_norm": 0.24940608441829681, "learning_rate": 0.0001, "loss": 1.5485, "step": 1462 }, { "epoch": 0.2349068721901092, "grad_norm": 0.24859265983104706, "learning_rate": 0.0001, "loss": 1.5356, "step": 1463 }, { "epoch": 0.2350674373795761, "grad_norm": 0.2573135793209076, "learning_rate": 0.0001, "loss": 1.6227, "step": 1464 }, { "epoch": 0.23522800256904303, "grad_norm": 0.24468062818050385, "learning_rate": 0.0001, "loss": 1.593, "step": 1465 }, { "epoch": 0.23538856775850994, "grad_norm": 0.27289363741874695, "learning_rate": 0.0001, "loss": 1.5822, "step": 1466 }, { "epoch": 0.23554913294797689, "grad_norm": 0.2553497850894928, "learning_rate": 0.0001, "loss": 1.5125, "step": 1467 }, { "epoch": 0.2357096981374438, "grad_norm": 0.24384398758411407, "learning_rate": 0.0001, "loss": 1.5465, "step": 1468 }, { "epoch": 0.23587026332691072, "grad_norm": 0.2581036686897278, "learning_rate": 0.0001, "loss": 1.519, "step": 1469 }, { "epoch": 0.23603082851637766, "grad_norm": 0.246010884642601, "learning_rate": 0.0001, "loss": 1.5899, "step": 1470 }, { "epoch": 0.23619139370584458, "grad_norm": 0.25303733348846436, "learning_rate": 0.0001, "loss": 1.5814, "step": 1471 }, { "epoch": 0.2363519588953115, "grad_norm": 0.25244733691215515, "learning_rate": 0.0001, "loss": 1.5473, "step": 1472 }, { "epoch": 0.2365125240847784, "grad_norm": 0.2596549391746521, "learning_rate": 0.0001, "loss": 1.5539, "step": 1473 }, { "epoch": 0.23667308927424535, "grad_norm": 0.24361246824264526, "learning_rate": 0.0001, "loss": 1.5328, "step": 1474 }, { "epoch": 0.23683365446371227, "grad_norm": 0.2471035271883011, "learning_rate": 0.0001, "loss": 1.5761, "step": 1475 }, { "epoch": 0.23699421965317918, "grad_norm": 0.25243648886680603, "learning_rate": 0.0001, "loss": 1.6034, "step": 1476 }, { "epoch": 0.23715478484264613, "grad_norm": 0.25919926166534424, "learning_rate": 0.0001, "loss": 1.5426, "step": 1477 }, { "epoch": 0.23731535003211304, "grad_norm": 0.260970801115036, "learning_rate": 0.0001, "loss": 1.531, "step": 1478 }, { "epoch": 0.23747591522157996, "grad_norm": 0.24548634886741638, "learning_rate": 0.0001, "loss": 1.5057, "step": 1479 }, { "epoch": 0.23763648041104687, "grad_norm": 0.2470322996377945, "learning_rate": 0.0001, "loss": 1.5392, "step": 1480 }, { "epoch": 0.23779704560051382, "grad_norm": 0.2650474011898041, "learning_rate": 0.0001, "loss": 1.588, "step": 1481 }, { "epoch": 0.23795761078998073, "grad_norm": 0.2452380210161209, "learning_rate": 0.0001, "loss": 1.575, "step": 1482 }, { "epoch": 0.23811817597944765, "grad_norm": 0.22939066588878632, "learning_rate": 0.0001, "loss": 1.4756, "step": 1483 }, { "epoch": 0.2382787411689146, "grad_norm": 0.2571943998336792, "learning_rate": 0.0001, "loss": 1.645, "step": 1484 }, { "epoch": 0.2384393063583815, "grad_norm": 0.2676110863685608, "learning_rate": 0.0001, "loss": 1.5951, "step": 1485 }, { "epoch": 0.23859987154784842, "grad_norm": 0.26024967432022095, "learning_rate": 0.0001, "loss": 1.5544, "step": 1486 }, { "epoch": 0.23876043673731534, "grad_norm": 0.279931902885437, "learning_rate": 0.0001, "loss": 1.5804, "step": 1487 }, { "epoch": 0.23892100192678228, "grad_norm": 0.24358607828617096, "learning_rate": 0.0001, "loss": 1.5338, "step": 1488 }, { "epoch": 0.2390815671162492, "grad_norm": 0.25573456287384033, "learning_rate": 0.0001, "loss": 1.6381, "step": 1489 }, { "epoch": 0.23924213230571612, "grad_norm": 0.24336667358875275, "learning_rate": 0.0001, "loss": 1.5297, "step": 1490 }, { "epoch": 0.23940269749518303, "grad_norm": 0.2546198070049286, "learning_rate": 0.0001, "loss": 1.5391, "step": 1491 }, { "epoch": 0.23956326268464997, "grad_norm": 0.24233075976371765, "learning_rate": 0.0001, "loss": 1.5365, "step": 1492 }, { "epoch": 0.2397238278741169, "grad_norm": 0.26909855008125305, "learning_rate": 0.0001, "loss": 1.601, "step": 1493 }, { "epoch": 0.2398843930635838, "grad_norm": 0.2723981440067291, "learning_rate": 0.0001, "loss": 1.6736, "step": 1494 }, { "epoch": 0.24004495825305075, "grad_norm": 0.26914265751838684, "learning_rate": 0.0001, "loss": 1.6543, "step": 1495 }, { "epoch": 0.24020552344251767, "grad_norm": 0.2784527838230133, "learning_rate": 0.0001, "loss": 1.6494, "step": 1496 }, { "epoch": 0.24036608863198458, "grad_norm": 0.2566381096839905, "learning_rate": 0.0001, "loss": 1.5029, "step": 1497 }, { "epoch": 0.2405266538214515, "grad_norm": 0.24081818759441376, "learning_rate": 0.0001, "loss": 1.5187, "step": 1498 }, { "epoch": 0.24068721901091844, "grad_norm": 0.25618815422058105, "learning_rate": 0.0001, "loss": 1.6156, "step": 1499 }, { "epoch": 0.24084778420038536, "grad_norm": 0.24754783511161804, "learning_rate": 0.0001, "loss": 1.5812, "step": 1500 }, { "epoch": 0.24100834938985227, "grad_norm": 0.26266035437583923, "learning_rate": 0.0001, "loss": 1.6404, "step": 1501 }, { "epoch": 0.24116891457931922, "grad_norm": 0.24704232811927795, "learning_rate": 0.0001, "loss": 1.5442, "step": 1502 }, { "epoch": 0.24132947976878613, "grad_norm": 0.25548985600471497, "learning_rate": 0.0001, "loss": 1.5383, "step": 1503 }, { "epoch": 0.24149004495825305, "grad_norm": 0.2660962641239166, "learning_rate": 0.0001, "loss": 1.5722, "step": 1504 }, { "epoch": 0.24165061014771996, "grad_norm": 0.24749712646007538, "learning_rate": 0.0001, "loss": 1.4693, "step": 1505 }, { "epoch": 0.2418111753371869, "grad_norm": 0.24952630698680878, "learning_rate": 0.0001, "loss": 1.5219, "step": 1506 }, { "epoch": 0.24197174052665382, "grad_norm": 0.2579048275947571, "learning_rate": 0.0001, "loss": 1.6029, "step": 1507 }, { "epoch": 0.24213230571612074, "grad_norm": 0.25550124049186707, "learning_rate": 0.0001, "loss": 1.6096, "step": 1508 }, { "epoch": 0.24229287090558768, "grad_norm": 0.25661399960517883, "learning_rate": 0.0001, "loss": 1.5581, "step": 1509 }, { "epoch": 0.2424534360950546, "grad_norm": 0.2509407103061676, "learning_rate": 0.0001, "loss": 1.5554, "step": 1510 }, { "epoch": 0.2426140012845215, "grad_norm": 0.25561586022377014, "learning_rate": 0.0001, "loss": 1.519, "step": 1511 }, { "epoch": 0.24277456647398843, "grad_norm": 0.2742331027984619, "learning_rate": 0.0001, "loss": 1.6084, "step": 1512 }, { "epoch": 0.24293513166345537, "grad_norm": 0.2725457549095154, "learning_rate": 0.0001, "loss": 1.5215, "step": 1513 }, { "epoch": 0.2430956968529223, "grad_norm": 0.25804951786994934, "learning_rate": 0.0001, "loss": 1.5492, "step": 1514 }, { "epoch": 0.2432562620423892, "grad_norm": 0.2545168697834015, "learning_rate": 0.0001, "loss": 1.5754, "step": 1515 }, { "epoch": 0.24341682723185612, "grad_norm": 0.25674569606781006, "learning_rate": 0.0001, "loss": 1.5144, "step": 1516 }, { "epoch": 0.24357739242132306, "grad_norm": 0.2617294192314148, "learning_rate": 0.0001, "loss": 1.6112, "step": 1517 }, { "epoch": 0.24373795761078998, "grad_norm": 0.24985378980636597, "learning_rate": 0.0001, "loss": 1.5346, "step": 1518 }, { "epoch": 0.2438985228002569, "grad_norm": 0.25484099984169006, "learning_rate": 0.0001, "loss": 1.4858, "step": 1519 }, { "epoch": 0.24405908798972384, "grad_norm": 0.2507209777832031, "learning_rate": 0.0001, "loss": 1.5587, "step": 1520 }, { "epoch": 0.24421965317919075, "grad_norm": 0.2766501009464264, "learning_rate": 0.0001, "loss": 1.6381, "step": 1521 }, { "epoch": 0.24438021836865767, "grad_norm": 0.25130683183670044, "learning_rate": 0.0001, "loss": 1.494, "step": 1522 }, { "epoch": 0.24454078355812459, "grad_norm": 0.24670766294002533, "learning_rate": 0.0001, "loss": 1.5293, "step": 1523 }, { "epoch": 0.24470134874759153, "grad_norm": 0.25187036395072937, "learning_rate": 0.0001, "loss": 1.5031, "step": 1524 }, { "epoch": 0.24486191393705845, "grad_norm": 0.24657708406448364, "learning_rate": 0.0001, "loss": 1.5075, "step": 1525 }, { "epoch": 0.24502247912652536, "grad_norm": 0.24846403300762177, "learning_rate": 0.0001, "loss": 1.5942, "step": 1526 }, { "epoch": 0.2451830443159923, "grad_norm": 0.24523480236530304, "learning_rate": 0.0001, "loss": 1.5384, "step": 1527 }, { "epoch": 0.24534360950545922, "grad_norm": 0.25702452659606934, "learning_rate": 0.0001, "loss": 1.5396, "step": 1528 }, { "epoch": 0.24550417469492614, "grad_norm": 0.2500796318054199, "learning_rate": 0.0001, "loss": 1.4692, "step": 1529 }, { "epoch": 0.24566473988439305, "grad_norm": 0.24745675921440125, "learning_rate": 0.0001, "loss": 1.5358, "step": 1530 }, { "epoch": 0.24582530507386, "grad_norm": 0.26025524735450745, "learning_rate": 0.0001, "loss": 1.5659, "step": 1531 }, { "epoch": 0.2459858702633269, "grad_norm": 0.2700679302215576, "learning_rate": 0.0001, "loss": 1.6448, "step": 1532 }, { "epoch": 0.24614643545279383, "grad_norm": 0.24460889399051666, "learning_rate": 0.0001, "loss": 1.6364, "step": 1533 }, { "epoch": 0.24630700064226077, "grad_norm": 0.24033024907112122, "learning_rate": 0.0001, "loss": 1.5412, "step": 1534 }, { "epoch": 0.2464675658317277, "grad_norm": 0.25126370787620544, "learning_rate": 0.0001, "loss": 1.6012, "step": 1535 }, { "epoch": 0.2466281310211946, "grad_norm": 0.2466874122619629, "learning_rate": 0.0001, "loss": 1.6127, "step": 1536 }, { "epoch": 0.24678869621066152, "grad_norm": 0.24531720578670502, "learning_rate": 0.0001, "loss": 1.5165, "step": 1537 }, { "epoch": 0.24694926140012846, "grad_norm": 0.2571285367012024, "learning_rate": 0.0001, "loss": 1.6267, "step": 1538 }, { "epoch": 0.24710982658959538, "grad_norm": 0.25678715109825134, "learning_rate": 0.0001, "loss": 1.5682, "step": 1539 }, { "epoch": 0.2472703917790623, "grad_norm": 0.2479630708694458, "learning_rate": 0.0001, "loss": 1.5792, "step": 1540 }, { "epoch": 0.24743095696852924, "grad_norm": 0.2584831118583679, "learning_rate": 0.0001, "loss": 1.5203, "step": 1541 }, { "epoch": 0.24759152215799615, "grad_norm": 0.2596479654312134, "learning_rate": 0.0001, "loss": 1.6176, "step": 1542 }, { "epoch": 0.24775208734746307, "grad_norm": 0.2657060921192169, "learning_rate": 0.0001, "loss": 1.6722, "step": 1543 }, { "epoch": 0.24791265253692998, "grad_norm": 0.25417783856391907, "learning_rate": 0.0001, "loss": 1.5195, "step": 1544 }, { "epoch": 0.24807321772639693, "grad_norm": 0.25777631998062134, "learning_rate": 0.0001, "loss": 1.566, "step": 1545 }, { "epoch": 0.24823378291586384, "grad_norm": 0.24382346868515015, "learning_rate": 0.0001, "loss": 1.499, "step": 1546 }, { "epoch": 0.24839434810533076, "grad_norm": 0.26882243156433105, "learning_rate": 0.0001, "loss": 1.6298, "step": 1547 }, { "epoch": 0.24855491329479767, "grad_norm": 0.25480836629867554, "learning_rate": 0.0001, "loss": 1.5445, "step": 1548 }, { "epoch": 0.24871547848426462, "grad_norm": 0.269992858171463, "learning_rate": 0.0001, "loss": 1.5671, "step": 1549 }, { "epoch": 0.24887604367373153, "grad_norm": 0.27160781621932983, "learning_rate": 0.0001, "loss": 1.563, "step": 1550 }, { "epoch": 0.24903660886319845, "grad_norm": 0.24489404261112213, "learning_rate": 0.0001, "loss": 1.5698, "step": 1551 }, { "epoch": 0.2491971740526654, "grad_norm": 0.2606049180030823, "learning_rate": 0.0001, "loss": 1.5125, "step": 1552 }, { "epoch": 0.2493577392421323, "grad_norm": 0.2516750395298004, "learning_rate": 0.0001, "loss": 1.621, "step": 1553 }, { "epoch": 0.24951830443159922, "grad_norm": 0.2516483962535858, "learning_rate": 0.0001, "loss": 1.6886, "step": 1554 }, { "epoch": 0.24967886962106614, "grad_norm": 0.24207676947116852, "learning_rate": 0.0001, "loss": 1.5208, "step": 1555 }, { "epoch": 0.24983943481053308, "grad_norm": 0.2646651864051819, "learning_rate": 0.0001, "loss": 1.5327, "step": 1556 }, { "epoch": 0.25, "grad_norm": 0.2527795433998108, "learning_rate": 0.0001, "loss": 1.6105, "step": 1557 }, { "epoch": 0.25016056518946694, "grad_norm": 0.2506919801235199, "learning_rate": 0.0001, "loss": 1.4942, "step": 1558 }, { "epoch": 0.25032113037893383, "grad_norm": 0.24822531640529633, "learning_rate": 0.0001, "loss": 1.5199, "step": 1559 }, { "epoch": 0.2504816955684008, "grad_norm": 0.2495066374540329, "learning_rate": 0.0001, "loss": 1.5926, "step": 1560 }, { "epoch": 0.2506422607578677, "grad_norm": 0.26969271898269653, "learning_rate": 0.0001, "loss": 1.6055, "step": 1561 }, { "epoch": 0.2508028259473346, "grad_norm": 0.2782869040966034, "learning_rate": 0.0001, "loss": 1.5934, "step": 1562 }, { "epoch": 0.25096339113680155, "grad_norm": 0.24785871803760529, "learning_rate": 0.0001, "loss": 1.5717, "step": 1563 }, { "epoch": 0.25112395632626844, "grad_norm": 0.257266104221344, "learning_rate": 0.0001, "loss": 1.5615, "step": 1564 }, { "epoch": 0.2512845215157354, "grad_norm": 0.24169054627418518, "learning_rate": 0.0001, "loss": 1.5768, "step": 1565 }, { "epoch": 0.2514450867052023, "grad_norm": 0.2433304488658905, "learning_rate": 0.0001, "loss": 1.5674, "step": 1566 }, { "epoch": 0.2516056518946692, "grad_norm": 0.26393190026283264, "learning_rate": 0.0001, "loss": 1.6549, "step": 1567 }, { "epoch": 0.25176621708413616, "grad_norm": 0.2528260350227356, "learning_rate": 0.0001, "loss": 1.5326, "step": 1568 }, { "epoch": 0.2519267822736031, "grad_norm": 0.2481459379196167, "learning_rate": 0.0001, "loss": 1.5466, "step": 1569 }, { "epoch": 0.25208734746307, "grad_norm": 0.2505651116371155, "learning_rate": 0.0001, "loss": 1.4812, "step": 1570 }, { "epoch": 0.25224791265253693, "grad_norm": 0.24493616819381714, "learning_rate": 0.0001, "loss": 1.548, "step": 1571 }, { "epoch": 0.2524084778420039, "grad_norm": 0.2680916488170624, "learning_rate": 0.0001, "loss": 1.5595, "step": 1572 }, { "epoch": 0.25256904303147076, "grad_norm": 0.239388108253479, "learning_rate": 0.0001, "loss": 1.5187, "step": 1573 }, { "epoch": 0.2527296082209377, "grad_norm": 0.25344353914260864, "learning_rate": 0.0001, "loss": 1.5244, "step": 1574 }, { "epoch": 0.25289017341040465, "grad_norm": 0.23965075612068176, "learning_rate": 0.0001, "loss": 1.4892, "step": 1575 }, { "epoch": 0.25305073859987154, "grad_norm": 0.2578268349170685, "learning_rate": 0.0001, "loss": 1.5644, "step": 1576 }, { "epoch": 0.2532113037893385, "grad_norm": 0.25776371359825134, "learning_rate": 0.0001, "loss": 1.5863, "step": 1577 }, { "epoch": 0.25337186897880537, "grad_norm": 0.2552551031112671, "learning_rate": 0.0001, "loss": 1.5657, "step": 1578 }, { "epoch": 0.2535324341682723, "grad_norm": 0.2545454204082489, "learning_rate": 0.0001, "loss": 1.5294, "step": 1579 }, { "epoch": 0.25369299935773926, "grad_norm": 0.24245326220989227, "learning_rate": 0.0001, "loss": 1.5041, "step": 1580 }, { "epoch": 0.25385356454720615, "grad_norm": 0.2521633207798004, "learning_rate": 0.0001, "loss": 1.5243, "step": 1581 }, { "epoch": 0.2540141297366731, "grad_norm": 0.2599114775657654, "learning_rate": 0.0001, "loss": 1.5851, "step": 1582 }, { "epoch": 0.25417469492614003, "grad_norm": 0.43263909220695496, "learning_rate": 0.0001, "loss": 1.6567, "step": 1583 }, { "epoch": 0.2543352601156069, "grad_norm": 0.24669121205806732, "learning_rate": 0.0001, "loss": 1.6013, "step": 1584 }, { "epoch": 0.25449582530507386, "grad_norm": 0.25480395555496216, "learning_rate": 0.0001, "loss": 1.5368, "step": 1585 }, { "epoch": 0.2546563904945408, "grad_norm": 0.23804716765880585, "learning_rate": 0.0001, "loss": 1.5947, "step": 1586 }, { "epoch": 0.2548169556840077, "grad_norm": 0.25204139947891235, "learning_rate": 0.0001, "loss": 1.6157, "step": 1587 }, { "epoch": 0.25497752087347464, "grad_norm": 0.25400885939598083, "learning_rate": 0.0001, "loss": 1.5967, "step": 1588 }, { "epoch": 0.2551380860629415, "grad_norm": 0.25997045636177063, "learning_rate": 0.0001, "loss": 1.5763, "step": 1589 }, { "epoch": 0.25529865125240847, "grad_norm": 0.24753816425800323, "learning_rate": 0.0001, "loss": 1.6301, "step": 1590 }, { "epoch": 0.2554592164418754, "grad_norm": 0.24728982150554657, "learning_rate": 0.0001, "loss": 1.5954, "step": 1591 }, { "epoch": 0.2556197816313423, "grad_norm": 0.25942108035087585, "learning_rate": 0.0001, "loss": 1.6172, "step": 1592 }, { "epoch": 0.25578034682080925, "grad_norm": 0.23737384378910065, "learning_rate": 0.0001, "loss": 1.5592, "step": 1593 }, { "epoch": 0.2559409120102762, "grad_norm": 0.26012396812438965, "learning_rate": 0.0001, "loss": 1.4835, "step": 1594 }, { "epoch": 0.2561014771997431, "grad_norm": 0.24980948865413666, "learning_rate": 0.0001, "loss": 1.5963, "step": 1595 }, { "epoch": 0.25626204238921, "grad_norm": 0.25194302201271057, "learning_rate": 0.0001, "loss": 1.4944, "step": 1596 }, { "epoch": 0.25642260757867696, "grad_norm": 0.3093951344490051, "learning_rate": 0.0001, "loss": 1.561, "step": 1597 }, { "epoch": 0.25658317276814385, "grad_norm": 0.25668489933013916, "learning_rate": 0.0001, "loss": 1.598, "step": 1598 }, { "epoch": 0.2567437379576108, "grad_norm": 0.25375503301620483, "learning_rate": 0.0001, "loss": 1.5625, "step": 1599 }, { "epoch": 0.25690430314707774, "grad_norm": 0.2650846540927887, "learning_rate": 0.0001, "loss": 1.5933, "step": 1600 }, { "epoch": 0.2570648683365446, "grad_norm": 0.24364729225635529, "learning_rate": 0.0001, "loss": 1.5039, "step": 1601 }, { "epoch": 0.25722543352601157, "grad_norm": 0.241117924451828, "learning_rate": 0.0001, "loss": 1.549, "step": 1602 }, { "epoch": 0.25738599871547846, "grad_norm": 0.2372046262025833, "learning_rate": 0.0001, "loss": 1.5241, "step": 1603 }, { "epoch": 0.2575465639049454, "grad_norm": 0.27673447132110596, "learning_rate": 0.0001, "loss": 1.6402, "step": 1604 }, { "epoch": 0.25770712909441235, "grad_norm": 0.24358339607715607, "learning_rate": 0.0001, "loss": 1.4948, "step": 1605 }, { "epoch": 0.25786769428387923, "grad_norm": 0.2623170018196106, "learning_rate": 0.0001, "loss": 1.6064, "step": 1606 }, { "epoch": 0.2580282594733462, "grad_norm": 0.2529672086238861, "learning_rate": 0.0001, "loss": 1.5286, "step": 1607 }, { "epoch": 0.2581888246628131, "grad_norm": 0.2579810619354248, "learning_rate": 0.0001, "loss": 1.5211, "step": 1608 }, { "epoch": 0.25834938985228, "grad_norm": 26.601112365722656, "learning_rate": 0.0001, "loss": 1.6601, "step": 1609 }, { "epoch": 0.25850995504174695, "grad_norm": 0.2715809643268585, "learning_rate": 0.0001, "loss": 1.511, "step": 1610 }, { "epoch": 0.2586705202312139, "grad_norm": 0.2676183581352234, "learning_rate": 0.0001, "loss": 1.6073, "step": 1611 }, { "epoch": 0.2588310854206808, "grad_norm": 0.3018186688423157, "learning_rate": 0.0001, "loss": 1.5862, "step": 1612 }, { "epoch": 0.25899165061014773, "grad_norm": 0.34301313757896423, "learning_rate": 0.0001, "loss": 1.5543, "step": 1613 }, { "epoch": 0.2591522157996146, "grad_norm": 0.30930280685424805, "learning_rate": 0.0001, "loss": 1.6061, "step": 1614 }, { "epoch": 0.25931278098908156, "grad_norm": 0.29000356793403625, "learning_rate": 0.0001, "loss": 1.5435, "step": 1615 }, { "epoch": 0.2594733461785485, "grad_norm": 0.2772104740142822, "learning_rate": 0.0001, "loss": 1.6181, "step": 1616 }, { "epoch": 0.2596339113680154, "grad_norm": 0.26202407479286194, "learning_rate": 0.0001, "loss": 1.6247, "step": 1617 }, { "epoch": 0.25979447655748233, "grad_norm": 0.2536071240901947, "learning_rate": 0.0001, "loss": 1.5941, "step": 1618 }, { "epoch": 0.2599550417469493, "grad_norm": 0.2397879958152771, "learning_rate": 0.0001, "loss": 1.5219, "step": 1619 }, { "epoch": 0.26011560693641617, "grad_norm": 0.2490512728691101, "learning_rate": 0.0001, "loss": 1.5562, "step": 1620 }, { "epoch": 0.2602761721258831, "grad_norm": 0.24812810122966766, "learning_rate": 0.0001, "loss": 1.5705, "step": 1621 }, { "epoch": 0.26043673731535005, "grad_norm": 0.2508559226989746, "learning_rate": 0.0001, "loss": 1.6012, "step": 1622 }, { "epoch": 0.26059730250481694, "grad_norm": 0.24309803545475006, "learning_rate": 0.0001, "loss": 1.5945, "step": 1623 }, { "epoch": 0.2607578676942839, "grad_norm": 0.2553725242614746, "learning_rate": 0.0001, "loss": 1.5472, "step": 1624 }, { "epoch": 0.26091843288375083, "grad_norm": 0.26383575797080994, "learning_rate": 0.0001, "loss": 1.5708, "step": 1625 }, { "epoch": 0.2610789980732177, "grad_norm": 0.26289352774620056, "learning_rate": 0.0001, "loss": 1.5839, "step": 1626 }, { "epoch": 0.26123956326268466, "grad_norm": 0.24623575806617737, "learning_rate": 0.0001, "loss": 1.5777, "step": 1627 }, { "epoch": 0.26140012845215155, "grad_norm": 0.2563294470310211, "learning_rate": 0.0001, "loss": 1.5699, "step": 1628 }, { "epoch": 0.2615606936416185, "grad_norm": 0.2564566135406494, "learning_rate": 0.0001, "loss": 1.5754, "step": 1629 }, { "epoch": 0.26172125883108543, "grad_norm": 0.26774367690086365, "learning_rate": 0.0001, "loss": 1.5544, "step": 1630 }, { "epoch": 0.2618818240205523, "grad_norm": 0.2636314928531647, "learning_rate": 0.0001, "loss": 1.642, "step": 1631 }, { "epoch": 0.26204238921001927, "grad_norm": 0.2636924386024475, "learning_rate": 0.0001, "loss": 1.578, "step": 1632 }, { "epoch": 0.2622029543994862, "grad_norm": 0.29126644134521484, "learning_rate": 0.0001, "loss": 1.6165, "step": 1633 }, { "epoch": 0.2623635195889531, "grad_norm": 0.24962428212165833, "learning_rate": 0.0001, "loss": 1.5517, "step": 1634 }, { "epoch": 0.26252408477842004, "grad_norm": 0.2517024278640747, "learning_rate": 0.0001, "loss": 1.5816, "step": 1635 }, { "epoch": 0.262684649967887, "grad_norm": 0.25700587034225464, "learning_rate": 0.0001, "loss": 1.5736, "step": 1636 }, { "epoch": 0.2628452151573539, "grad_norm": 0.25166627764701843, "learning_rate": 0.0001, "loss": 1.5726, "step": 1637 }, { "epoch": 0.2630057803468208, "grad_norm": 0.249893918633461, "learning_rate": 0.0001, "loss": 1.5654, "step": 1638 }, { "epoch": 0.26316634553628776, "grad_norm": 0.2570607662200928, "learning_rate": 0.0001, "loss": 1.5542, "step": 1639 }, { "epoch": 0.26332691072575465, "grad_norm": 0.26825419068336487, "learning_rate": 0.0001, "loss": 1.5636, "step": 1640 }, { "epoch": 0.2634874759152216, "grad_norm": 0.25213783979415894, "learning_rate": 0.0001, "loss": 1.4873, "step": 1641 }, { "epoch": 0.2636480411046885, "grad_norm": 0.25579217076301575, "learning_rate": 0.0001, "loss": 1.5429, "step": 1642 }, { "epoch": 0.2638086062941554, "grad_norm": 0.2491072118282318, "learning_rate": 0.0001, "loss": 1.5657, "step": 1643 }, { "epoch": 0.26396917148362237, "grad_norm": 0.24955804646015167, "learning_rate": 0.0001, "loss": 1.5761, "step": 1644 }, { "epoch": 0.26412973667308925, "grad_norm": 0.272416889667511, "learning_rate": 0.0001, "loss": 1.525, "step": 1645 }, { "epoch": 0.2642903018625562, "grad_norm": 1.6191034317016602, "learning_rate": 0.0001, "loss": 1.6203, "step": 1646 }, { "epoch": 0.26445086705202314, "grad_norm": 0.24718856811523438, "learning_rate": 0.0001, "loss": 1.5627, "step": 1647 }, { "epoch": 0.26461143224149003, "grad_norm": 0.23944930732250214, "learning_rate": 0.0001, "loss": 1.5695, "step": 1648 }, { "epoch": 0.264771997430957, "grad_norm": 0.250887006521225, "learning_rate": 0.0001, "loss": 1.6176, "step": 1649 }, { "epoch": 0.2649325626204239, "grad_norm": 0.2513943016529083, "learning_rate": 0.0001, "loss": 1.5467, "step": 1650 }, { "epoch": 0.2650931278098908, "grad_norm": 0.24110578000545502, "learning_rate": 0.0001, "loss": 1.5811, "step": 1651 }, { "epoch": 0.26525369299935775, "grad_norm": 0.24858005344867706, "learning_rate": 0.0001, "loss": 1.5724, "step": 1652 }, { "epoch": 0.26541425818882464, "grad_norm": 0.7188080549240112, "learning_rate": 0.0001, "loss": 1.5834, "step": 1653 }, { "epoch": 0.2655748233782916, "grad_norm": 0.24724483489990234, "learning_rate": 0.0001, "loss": 1.5319, "step": 1654 }, { "epoch": 0.2657353885677585, "grad_norm": 0.310080349445343, "learning_rate": 0.0001, "loss": 1.5385, "step": 1655 }, { "epoch": 0.2658959537572254, "grad_norm": 0.26888203620910645, "learning_rate": 0.0001, "loss": 1.5919, "step": 1656 }, { "epoch": 0.26605651894669236, "grad_norm": 0.2484586238861084, "learning_rate": 0.0001, "loss": 1.5613, "step": 1657 }, { "epoch": 0.2662170841361593, "grad_norm": 0.25118693709373474, "learning_rate": 0.0001, "loss": 1.5215, "step": 1658 }, { "epoch": 0.2663776493256262, "grad_norm": 0.25257745385169983, "learning_rate": 0.0001, "loss": 1.5993, "step": 1659 }, { "epoch": 0.26653821451509313, "grad_norm": 0.2566640079021454, "learning_rate": 0.0001, "loss": 1.5679, "step": 1660 }, { "epoch": 0.2666987797045601, "grad_norm": 0.26186603307724, "learning_rate": 0.0001, "loss": 1.6129, "step": 1661 }, { "epoch": 0.26685934489402696, "grad_norm": 1.3143200874328613, "learning_rate": 0.0001, "loss": 1.5932, "step": 1662 }, { "epoch": 0.2670199100834939, "grad_norm": 0.25016048550605774, "learning_rate": 0.0001, "loss": 1.6306, "step": 1663 }, { "epoch": 0.26718047527296085, "grad_norm": 0.2505226731300354, "learning_rate": 0.0001, "loss": 1.6446, "step": 1664 }, { "epoch": 0.26734104046242774, "grad_norm": 0.2522873878479004, "learning_rate": 0.0001, "loss": 1.551, "step": 1665 }, { "epoch": 0.2675016056518947, "grad_norm": 0.26987871527671814, "learning_rate": 0.0001, "loss": 1.6022, "step": 1666 }, { "epoch": 0.26766217084136157, "grad_norm": 0.25615522265434265, "learning_rate": 0.0001, "loss": 1.601, "step": 1667 }, { "epoch": 0.2678227360308285, "grad_norm": 0.25392067432403564, "learning_rate": 0.0001, "loss": 1.5457, "step": 1668 }, { "epoch": 0.26798330122029546, "grad_norm": 0.2800738513469696, "learning_rate": 0.0001, "loss": 1.5882, "step": 1669 }, { "epoch": 0.26814386640976234, "grad_norm": 0.2578084468841553, "learning_rate": 0.0001, "loss": 1.5746, "step": 1670 }, { "epoch": 0.2683044315992293, "grad_norm": 0.25805479288101196, "learning_rate": 0.0001, "loss": 1.6026, "step": 1671 }, { "epoch": 0.26846499678869623, "grad_norm": 0.24522823095321655, "learning_rate": 0.0001, "loss": 1.5259, "step": 1672 }, { "epoch": 0.2686255619781631, "grad_norm": 0.244820237159729, "learning_rate": 0.0001, "loss": 1.5385, "step": 1673 }, { "epoch": 0.26878612716763006, "grad_norm": 0.26194414496421814, "learning_rate": 0.0001, "loss": 1.5749, "step": 1674 }, { "epoch": 0.268946692357097, "grad_norm": 0.23983000218868256, "learning_rate": 0.0001, "loss": 1.5383, "step": 1675 }, { "epoch": 0.2691072575465639, "grad_norm": 0.24566598236560822, "learning_rate": 0.0001, "loss": 1.5422, "step": 1676 }, { "epoch": 0.26926782273603084, "grad_norm": 0.2699583172798157, "learning_rate": 0.0001, "loss": 1.567, "step": 1677 }, { "epoch": 0.2694283879254977, "grad_norm": 0.2636047899723053, "learning_rate": 0.0001, "loss": 1.5411, "step": 1678 }, { "epoch": 0.26958895311496467, "grad_norm": 0.2641831636428833, "learning_rate": 0.0001, "loss": 1.6095, "step": 1679 }, { "epoch": 0.2697495183044316, "grad_norm": 0.24597400426864624, "learning_rate": 0.0001, "loss": 1.5763, "step": 1680 }, { "epoch": 0.2699100834938985, "grad_norm": 0.24628034234046936, "learning_rate": 0.0001, "loss": 1.5545, "step": 1681 }, { "epoch": 0.27007064868336544, "grad_norm": 0.2375197559595108, "learning_rate": 0.0001, "loss": 1.4873, "step": 1682 }, { "epoch": 0.2702312138728324, "grad_norm": 0.23666463792324066, "learning_rate": 0.0001, "loss": 1.5143, "step": 1683 }, { "epoch": 0.2703917790622993, "grad_norm": 0.24433469772338867, "learning_rate": 0.0001, "loss": 1.4781, "step": 1684 }, { "epoch": 0.2705523442517662, "grad_norm": 0.2497817575931549, "learning_rate": 0.0001, "loss": 1.5663, "step": 1685 }, { "epoch": 0.27071290944123316, "grad_norm": 0.25570669770240784, "learning_rate": 0.0001, "loss": 1.505, "step": 1686 }, { "epoch": 0.27087347463070005, "grad_norm": 0.2590802311897278, "learning_rate": 0.0001, "loss": 1.5872, "step": 1687 }, { "epoch": 0.271034039820167, "grad_norm": 0.25787225365638733, "learning_rate": 0.0001, "loss": 1.5975, "step": 1688 }, { "epoch": 0.27119460500963394, "grad_norm": 0.2446824163198471, "learning_rate": 0.0001, "loss": 1.5587, "step": 1689 }, { "epoch": 0.2713551701991008, "grad_norm": 0.25209805369377136, "learning_rate": 0.0001, "loss": 1.6, "step": 1690 }, { "epoch": 0.27151573538856777, "grad_norm": 0.2606227397918701, "learning_rate": 0.0001, "loss": 1.5605, "step": 1691 }, { "epoch": 0.27167630057803466, "grad_norm": 0.24890798330307007, "learning_rate": 0.0001, "loss": 1.5679, "step": 1692 }, { "epoch": 0.2718368657675016, "grad_norm": 0.247292622923851, "learning_rate": 0.0001, "loss": 1.591, "step": 1693 }, { "epoch": 0.27199743095696854, "grad_norm": 0.2650787830352783, "learning_rate": 0.0001, "loss": 1.576, "step": 1694 }, { "epoch": 0.27215799614643543, "grad_norm": 0.238000750541687, "learning_rate": 0.0001, "loss": 1.4601, "step": 1695 }, { "epoch": 0.2723185613359024, "grad_norm": 0.2528303861618042, "learning_rate": 0.0001, "loss": 1.552, "step": 1696 }, { "epoch": 0.2724791265253693, "grad_norm": 0.28379806876182556, "learning_rate": 0.0001, "loss": 1.5382, "step": 1697 }, { "epoch": 0.2726396917148362, "grad_norm": 0.2615777850151062, "learning_rate": 0.0001, "loss": 1.5511, "step": 1698 }, { "epoch": 0.27280025690430315, "grad_norm": 0.6315487623214722, "learning_rate": 0.0001, "loss": 1.5045, "step": 1699 }, { "epoch": 0.2729608220937701, "grad_norm": 0.2547401487827301, "learning_rate": 0.0001, "loss": 1.5649, "step": 1700 }, { "epoch": 0.273121387283237, "grad_norm": 0.2653486132621765, "learning_rate": 0.0001, "loss": 1.5767, "step": 1701 }, { "epoch": 0.2732819524727039, "grad_norm": 0.25836852192878723, "learning_rate": 0.0001, "loss": 1.5331, "step": 1702 }, { "epoch": 0.2734425176621708, "grad_norm": 0.27013924717903137, "learning_rate": 0.0001, "loss": 1.5654, "step": 1703 }, { "epoch": 0.27360308285163776, "grad_norm": 0.2533862590789795, "learning_rate": 0.0001, "loss": 1.5766, "step": 1704 }, { "epoch": 0.2737636480411047, "grad_norm": 0.2571921646595001, "learning_rate": 0.0001, "loss": 1.4723, "step": 1705 }, { "epoch": 0.2739242132305716, "grad_norm": 0.27870211005210876, "learning_rate": 0.0001, "loss": 1.5889, "step": 1706 }, { "epoch": 0.27408477842003853, "grad_norm": 0.27164745330810547, "learning_rate": 0.0001, "loss": 1.5685, "step": 1707 }, { "epoch": 0.2742453436095055, "grad_norm": 0.2703072130680084, "learning_rate": 0.0001, "loss": 1.5222, "step": 1708 }, { "epoch": 0.27440590879897236, "grad_norm": 0.280581533908844, "learning_rate": 0.0001, "loss": 1.5795, "step": 1709 }, { "epoch": 0.2745664739884393, "grad_norm": 0.25726816058158875, "learning_rate": 0.0001, "loss": 1.5816, "step": 1710 }, { "epoch": 0.27472703917790625, "grad_norm": 0.258029043674469, "learning_rate": 0.0001, "loss": 1.5867, "step": 1711 }, { "epoch": 0.27488760436737314, "grad_norm": 0.28576207160949707, "learning_rate": 0.0001, "loss": 1.586, "step": 1712 }, { "epoch": 0.2750481695568401, "grad_norm": 0.24984319508075714, "learning_rate": 0.0001, "loss": 1.6312, "step": 1713 }, { "epoch": 0.275208734746307, "grad_norm": 0.251816987991333, "learning_rate": 0.0001, "loss": 1.548, "step": 1714 }, { "epoch": 0.2753692999357739, "grad_norm": 0.2735465168952942, "learning_rate": 0.0001, "loss": 1.6175, "step": 1715 }, { "epoch": 0.27552986512524086, "grad_norm": 0.27161598205566406, "learning_rate": 0.0001, "loss": 1.5866, "step": 1716 }, { "epoch": 0.27569043031470775, "grad_norm": 0.254117488861084, "learning_rate": 0.0001, "loss": 1.5676, "step": 1717 }, { "epoch": 0.2758509955041747, "grad_norm": 0.2731083929538727, "learning_rate": 0.0001, "loss": 1.5807, "step": 1718 }, { "epoch": 0.27601156069364163, "grad_norm": 0.25791260600090027, "learning_rate": 0.0001, "loss": 1.5614, "step": 1719 }, { "epoch": 0.2761721258831085, "grad_norm": 0.2510983347892761, "learning_rate": 0.0001, "loss": 1.5552, "step": 1720 }, { "epoch": 0.27633269107257546, "grad_norm": 0.2803308367729187, "learning_rate": 0.0001, "loss": 1.6478, "step": 1721 }, { "epoch": 0.2764932562620424, "grad_norm": 0.2563945949077606, "learning_rate": 0.0001, "loss": 1.6291, "step": 1722 }, { "epoch": 0.2766538214515093, "grad_norm": 0.24843338131904602, "learning_rate": 0.0001, "loss": 1.5558, "step": 1723 }, { "epoch": 0.27681438664097624, "grad_norm": 0.2584153115749359, "learning_rate": 0.0001, "loss": 1.5683, "step": 1724 }, { "epoch": 0.2769749518304432, "grad_norm": 0.24279606342315674, "learning_rate": 0.0001, "loss": 1.5223, "step": 1725 }, { "epoch": 0.27713551701991007, "grad_norm": 0.24455226957798004, "learning_rate": 0.0001, "loss": 1.5448, "step": 1726 }, { "epoch": 0.277296082209377, "grad_norm": 0.2676351070404053, "learning_rate": 0.0001, "loss": 1.5099, "step": 1727 }, { "epoch": 0.2774566473988439, "grad_norm": 0.26381057500839233, "learning_rate": 0.0001, "loss": 1.5533, "step": 1728 }, { "epoch": 0.27761721258831085, "grad_norm": 0.23981605470180511, "learning_rate": 0.0001, "loss": 1.5411, "step": 1729 }, { "epoch": 0.2777777777777778, "grad_norm": 0.27142682671546936, "learning_rate": 0.0001, "loss": 1.5469, "step": 1730 }, { "epoch": 0.2779383429672447, "grad_norm": 0.2526998519897461, "learning_rate": 0.0001, "loss": 1.573, "step": 1731 }, { "epoch": 0.2780989081567116, "grad_norm": 0.2599260210990906, "learning_rate": 0.0001, "loss": 1.564, "step": 1732 }, { "epoch": 0.27825947334617857, "grad_norm": 0.23529094457626343, "learning_rate": 0.0001, "loss": 1.517, "step": 1733 }, { "epoch": 0.27842003853564545, "grad_norm": 0.2527669370174408, "learning_rate": 0.0001, "loss": 1.5827, "step": 1734 }, { "epoch": 0.2785806037251124, "grad_norm": 0.24101081490516663, "learning_rate": 0.0001, "loss": 1.526, "step": 1735 }, { "epoch": 0.27874116891457934, "grad_norm": 0.25223127007484436, "learning_rate": 0.0001, "loss": 1.5617, "step": 1736 }, { "epoch": 0.27890173410404623, "grad_norm": 0.26755383610725403, "learning_rate": 0.0001, "loss": 1.6618, "step": 1737 }, { "epoch": 0.27906229929351317, "grad_norm": 0.24168036878108978, "learning_rate": 0.0001, "loss": 1.5689, "step": 1738 }, { "epoch": 0.2792228644829801, "grad_norm": 0.2531331479549408, "learning_rate": 0.0001, "loss": 1.5838, "step": 1739 }, { "epoch": 0.279383429672447, "grad_norm": 0.2503527104854584, "learning_rate": 0.0001, "loss": 1.5343, "step": 1740 }, { "epoch": 0.27954399486191395, "grad_norm": 0.2647632956504822, "learning_rate": 0.0001, "loss": 1.5654, "step": 1741 }, { "epoch": 0.27970456005138083, "grad_norm": 0.27943092584609985, "learning_rate": 0.0001, "loss": 1.6067, "step": 1742 }, { "epoch": 0.2798651252408478, "grad_norm": 0.25607460737228394, "learning_rate": 0.0001, "loss": 1.548, "step": 1743 }, { "epoch": 0.2800256904303147, "grad_norm": 0.23967596888542175, "learning_rate": 0.0001, "loss": 1.4828, "step": 1744 }, { "epoch": 0.2801862556197816, "grad_norm": 0.2720800042152405, "learning_rate": 0.0001, "loss": 1.5305, "step": 1745 }, { "epoch": 0.28034682080924855, "grad_norm": 0.23610429465770721, "learning_rate": 0.0001, "loss": 1.5107, "step": 1746 }, { "epoch": 0.2805073859987155, "grad_norm": 0.2505277395248413, "learning_rate": 0.0001, "loss": 1.525, "step": 1747 }, { "epoch": 0.2806679511881824, "grad_norm": 0.26155003905296326, "learning_rate": 0.0001, "loss": 1.5811, "step": 1748 }, { "epoch": 0.28082851637764933, "grad_norm": 0.2556462585926056, "learning_rate": 0.0001, "loss": 1.5804, "step": 1749 }, { "epoch": 0.2809890815671163, "grad_norm": 0.2547653019428253, "learning_rate": 0.0001, "loss": 1.5572, "step": 1750 }, { "epoch": 0.28114964675658316, "grad_norm": 0.2455403059720993, "learning_rate": 0.0001, "loss": 1.5749, "step": 1751 }, { "epoch": 0.2813102119460501, "grad_norm": 0.264738529920578, "learning_rate": 0.0001, "loss": 1.6097, "step": 1752 }, { "epoch": 0.28147077713551705, "grad_norm": 0.23003803193569183, "learning_rate": 0.0001, "loss": 1.4107, "step": 1753 }, { "epoch": 0.28163134232498394, "grad_norm": 0.24369120597839355, "learning_rate": 0.0001, "loss": 1.5842, "step": 1754 }, { "epoch": 0.2817919075144509, "grad_norm": 0.24253031611442566, "learning_rate": 0.0001, "loss": 1.5105, "step": 1755 }, { "epoch": 0.28195247270391777, "grad_norm": 0.2415713667869568, "learning_rate": 0.0001, "loss": 1.5624, "step": 1756 }, { "epoch": 0.2821130378933847, "grad_norm": 0.25316810607910156, "learning_rate": 0.0001, "loss": 1.5913, "step": 1757 }, { "epoch": 0.28227360308285165, "grad_norm": 0.25151219964027405, "learning_rate": 0.0001, "loss": 1.5772, "step": 1758 }, { "epoch": 0.28243416827231854, "grad_norm": 0.2585172951221466, "learning_rate": 0.0001, "loss": 1.6321, "step": 1759 }, { "epoch": 0.2825947334617855, "grad_norm": 0.2640420198440552, "learning_rate": 0.0001, "loss": 1.6788, "step": 1760 }, { "epoch": 0.28275529865125243, "grad_norm": 0.2329113632440567, "learning_rate": 0.0001, "loss": 1.5101, "step": 1761 }, { "epoch": 0.2829158638407193, "grad_norm": 0.2561537027359009, "learning_rate": 0.0001, "loss": 1.5514, "step": 1762 }, { "epoch": 0.28307642903018626, "grad_norm": 0.25183650851249695, "learning_rate": 0.0001, "loss": 1.6311, "step": 1763 }, { "epoch": 0.2832369942196532, "grad_norm": 0.23833124339580536, "learning_rate": 0.0001, "loss": 1.58, "step": 1764 }, { "epoch": 0.2833975594091201, "grad_norm": 0.24028009176254272, "learning_rate": 0.0001, "loss": 1.5065, "step": 1765 }, { "epoch": 0.28355812459858704, "grad_norm": 0.2601412534713745, "learning_rate": 0.0001, "loss": 1.5283, "step": 1766 }, { "epoch": 0.2837186897880539, "grad_norm": 0.24666845798492432, "learning_rate": 0.0001, "loss": 1.4702, "step": 1767 }, { "epoch": 0.28387925497752087, "grad_norm": 0.24733489751815796, "learning_rate": 0.0001, "loss": 1.5611, "step": 1768 }, { "epoch": 0.2840398201669878, "grad_norm": 0.24647156894207, "learning_rate": 0.0001, "loss": 1.5644, "step": 1769 }, { "epoch": 0.2842003853564547, "grad_norm": 0.25269049406051636, "learning_rate": 0.0001, "loss": 1.5607, "step": 1770 }, { "epoch": 0.28436095054592164, "grad_norm": 0.26135051250457764, "learning_rate": 0.0001, "loss": 1.618, "step": 1771 }, { "epoch": 0.2845215157353886, "grad_norm": 0.2525368928909302, "learning_rate": 0.0001, "loss": 1.4101, "step": 1772 }, { "epoch": 0.2846820809248555, "grad_norm": 0.2654610872268677, "learning_rate": 0.0001, "loss": 1.492, "step": 1773 }, { "epoch": 0.2848426461143224, "grad_norm": 0.24932320415973663, "learning_rate": 0.0001, "loss": 1.4902, "step": 1774 }, { "epoch": 0.28500321130378936, "grad_norm": 0.26075899600982666, "learning_rate": 0.0001, "loss": 1.5023, "step": 1775 }, { "epoch": 0.28516377649325625, "grad_norm": 0.2603805363178253, "learning_rate": 0.0001, "loss": 1.5619, "step": 1776 }, { "epoch": 0.2853243416827232, "grad_norm": 0.26587820053100586, "learning_rate": 0.0001, "loss": 1.5643, "step": 1777 }, { "epoch": 0.28548490687219014, "grad_norm": 0.2653835415840149, "learning_rate": 0.0001, "loss": 1.5833, "step": 1778 }, { "epoch": 0.285645472061657, "grad_norm": 0.4035285711288452, "learning_rate": 0.0001, "loss": 1.5592, "step": 1779 }, { "epoch": 0.28580603725112397, "grad_norm": 0.27328649163246155, "learning_rate": 0.0001, "loss": 1.5187, "step": 1780 }, { "epoch": 0.28596660244059086, "grad_norm": 0.2567174732685089, "learning_rate": 0.0001, "loss": 1.6116, "step": 1781 }, { "epoch": 0.2861271676300578, "grad_norm": 0.2559148967266083, "learning_rate": 0.0001, "loss": 1.5958, "step": 1782 }, { "epoch": 0.28628773281952474, "grad_norm": 0.26825404167175293, "learning_rate": 0.0001, "loss": 1.535, "step": 1783 }, { "epoch": 0.28644829800899163, "grad_norm": 0.24423590302467346, "learning_rate": 0.0001, "loss": 1.5303, "step": 1784 }, { "epoch": 0.2866088631984586, "grad_norm": 0.24644210934638977, "learning_rate": 0.0001, "loss": 1.4775, "step": 1785 }, { "epoch": 0.2867694283879255, "grad_norm": 0.2514526844024658, "learning_rate": 0.0001, "loss": 1.5425, "step": 1786 }, { "epoch": 0.2869299935773924, "grad_norm": 0.2657131254673004, "learning_rate": 0.0001, "loss": 1.5442, "step": 1787 }, { "epoch": 0.28709055876685935, "grad_norm": 0.24807108938694, "learning_rate": 0.0001, "loss": 1.5325, "step": 1788 }, { "epoch": 0.2872511239563263, "grad_norm": 0.24604395031929016, "learning_rate": 0.0001, "loss": 1.6047, "step": 1789 }, { "epoch": 0.2874116891457932, "grad_norm": 0.25090017914772034, "learning_rate": 0.0001, "loss": 1.5903, "step": 1790 }, { "epoch": 0.2875722543352601, "grad_norm": 0.2556399703025818, "learning_rate": 0.0001, "loss": 1.6333, "step": 1791 }, { "epoch": 0.287732819524727, "grad_norm": 0.250573068857193, "learning_rate": 0.0001, "loss": 1.6146, "step": 1792 }, { "epoch": 0.28789338471419396, "grad_norm": 0.25975993275642395, "learning_rate": 0.0001, "loss": 1.6196, "step": 1793 }, { "epoch": 0.2880539499036609, "grad_norm": 0.23993436992168427, "learning_rate": 0.0001, "loss": 1.5714, "step": 1794 }, { "epoch": 0.2882145150931278, "grad_norm": 0.2708066701889038, "learning_rate": 0.0001, "loss": 1.5882, "step": 1795 }, { "epoch": 0.28837508028259473, "grad_norm": 0.2533797025680542, "learning_rate": 0.0001, "loss": 1.6494, "step": 1796 }, { "epoch": 0.2885356454720617, "grad_norm": 0.24018733203411102, "learning_rate": 0.0001, "loss": 1.4955, "step": 1797 }, { "epoch": 0.28869621066152856, "grad_norm": 0.25984418392181396, "learning_rate": 0.0001, "loss": 1.5455, "step": 1798 }, { "epoch": 0.2888567758509955, "grad_norm": 0.23806238174438477, "learning_rate": 0.0001, "loss": 1.5164, "step": 1799 }, { "epoch": 0.28901734104046245, "grad_norm": 0.26575228571891785, "learning_rate": 0.0001, "loss": 1.547, "step": 1800 }, { "epoch": 0.28917790622992934, "grad_norm": 0.26070842146873474, "learning_rate": 0.0001, "loss": 1.6211, "step": 1801 }, { "epoch": 0.2893384714193963, "grad_norm": 0.24455119669437408, "learning_rate": 0.0001, "loss": 1.5171, "step": 1802 }, { "epoch": 0.2894990366088632, "grad_norm": 0.25976407527923584, "learning_rate": 0.0001, "loss": 1.6557, "step": 1803 }, { "epoch": 0.2896596017983301, "grad_norm": 0.26272279024124146, "learning_rate": 0.0001, "loss": 1.497, "step": 1804 }, { "epoch": 0.28982016698779706, "grad_norm": 0.25596457719802856, "learning_rate": 0.0001, "loss": 1.6153, "step": 1805 }, { "epoch": 0.28998073217726394, "grad_norm": 0.2553982436656952, "learning_rate": 0.0001, "loss": 1.5958, "step": 1806 }, { "epoch": 0.2901412973667309, "grad_norm": 0.24437467753887177, "learning_rate": 0.0001, "loss": 1.5436, "step": 1807 }, { "epoch": 0.29030186255619783, "grad_norm": 0.25055429339408875, "learning_rate": 0.0001, "loss": 1.5484, "step": 1808 }, { "epoch": 0.2904624277456647, "grad_norm": 0.25160881876945496, "learning_rate": 0.0001, "loss": 1.5617, "step": 1809 }, { "epoch": 0.29062299293513166, "grad_norm": 0.26008692383766174, "learning_rate": 0.0001, "loss": 1.5801, "step": 1810 }, { "epoch": 0.2907835581245986, "grad_norm": 0.2523382902145386, "learning_rate": 0.0001, "loss": 1.5315, "step": 1811 }, { "epoch": 0.2909441233140655, "grad_norm": 0.269346684217453, "learning_rate": 0.0001, "loss": 1.5889, "step": 1812 }, { "epoch": 0.29110468850353244, "grad_norm": 0.24388159811496735, "learning_rate": 0.0001, "loss": 1.4948, "step": 1813 }, { "epoch": 0.2912652536929994, "grad_norm": 0.25000280141830444, "learning_rate": 0.0001, "loss": 1.5402, "step": 1814 }, { "epoch": 0.29142581888246627, "grad_norm": 0.2693331241607666, "learning_rate": 0.0001, "loss": 1.5676, "step": 1815 }, { "epoch": 0.2915863840719332, "grad_norm": 0.25427648425102234, "learning_rate": 0.0001, "loss": 1.6353, "step": 1816 }, { "epoch": 0.2917469492614001, "grad_norm": 0.24013175070285797, "learning_rate": 0.0001, "loss": 1.5296, "step": 1817 }, { "epoch": 0.29190751445086704, "grad_norm": 0.27462080121040344, "learning_rate": 0.0001, "loss": 1.5787, "step": 1818 }, { "epoch": 0.292068079640334, "grad_norm": 0.27167797088623047, "learning_rate": 0.0001, "loss": 1.5974, "step": 1819 }, { "epoch": 0.2922286448298009, "grad_norm": 0.263680100440979, "learning_rate": 0.0001, "loss": 1.6008, "step": 1820 }, { "epoch": 0.2923892100192678, "grad_norm": 0.24112461507320404, "learning_rate": 0.0001, "loss": 1.5226, "step": 1821 }, { "epoch": 0.29254977520873476, "grad_norm": 0.2383682131767273, "learning_rate": 0.0001, "loss": 1.5073, "step": 1822 }, { "epoch": 0.29271034039820165, "grad_norm": 0.23662635684013367, "learning_rate": 0.0001, "loss": 1.4506, "step": 1823 }, { "epoch": 0.2928709055876686, "grad_norm": 0.2591504752635956, "learning_rate": 0.0001, "loss": 1.5848, "step": 1824 }, { "epoch": 0.29303147077713554, "grad_norm": 0.26476243138313293, "learning_rate": 0.0001, "loss": 1.5316, "step": 1825 }, { "epoch": 0.2931920359666024, "grad_norm": 0.2454085797071457, "learning_rate": 0.0001, "loss": 1.5379, "step": 1826 }, { "epoch": 0.29335260115606937, "grad_norm": 0.2538471817970276, "learning_rate": 0.0001, "loss": 1.6104, "step": 1827 }, { "epoch": 0.2935131663455363, "grad_norm": 0.2743769586086273, "learning_rate": 0.0001, "loss": 1.6508, "step": 1828 }, { "epoch": 0.2936737315350032, "grad_norm": 0.25146836042404175, "learning_rate": 0.0001, "loss": 1.5778, "step": 1829 }, { "epoch": 0.29383429672447015, "grad_norm": 0.2563507556915283, "learning_rate": 0.0001, "loss": 1.5977, "step": 1830 }, { "epoch": 0.29399486191393703, "grad_norm": 0.2630648612976074, "learning_rate": 0.0001, "loss": 1.5948, "step": 1831 }, { "epoch": 0.294155427103404, "grad_norm": 0.26110172271728516, "learning_rate": 0.0001, "loss": 1.5926, "step": 1832 }, { "epoch": 0.2943159922928709, "grad_norm": 0.2517088055610657, "learning_rate": 0.0001, "loss": 1.5772, "step": 1833 }, { "epoch": 0.2944765574823378, "grad_norm": 0.24928121268749237, "learning_rate": 0.0001, "loss": 1.6135, "step": 1834 }, { "epoch": 0.29463712267180475, "grad_norm": 0.23833926022052765, "learning_rate": 0.0001, "loss": 1.5628, "step": 1835 }, { "epoch": 0.2947976878612717, "grad_norm": 0.24982048571109772, "learning_rate": 0.0001, "loss": 1.491, "step": 1836 }, { "epoch": 0.2949582530507386, "grad_norm": 0.3186301290988922, "learning_rate": 0.0001, "loss": 1.6437, "step": 1837 }, { "epoch": 0.2951188182402055, "grad_norm": 0.2510720491409302, "learning_rate": 0.0001, "loss": 1.5703, "step": 1838 }, { "epoch": 0.29527938342967247, "grad_norm": 0.2615893483161926, "learning_rate": 0.0001, "loss": 1.591, "step": 1839 }, { "epoch": 0.29543994861913936, "grad_norm": 0.25472602248191833, "learning_rate": 0.0001, "loss": 1.5373, "step": 1840 }, { "epoch": 0.2956005138086063, "grad_norm": 0.2813533842563629, "learning_rate": 0.0001, "loss": 1.508, "step": 1841 }, { "epoch": 0.2957610789980732, "grad_norm": 0.25984275341033936, "learning_rate": 0.0001, "loss": 1.5794, "step": 1842 }, { "epoch": 0.29592164418754013, "grad_norm": 0.25177472829818726, "learning_rate": 0.0001, "loss": 1.5174, "step": 1843 }, { "epoch": 0.2960822093770071, "grad_norm": 0.24683226644992828, "learning_rate": 0.0001, "loss": 1.4671, "step": 1844 }, { "epoch": 0.29624277456647397, "grad_norm": 0.26041093468666077, "learning_rate": 0.0001, "loss": 1.6242, "step": 1845 }, { "epoch": 0.2964033397559409, "grad_norm": 0.24409382045269012, "learning_rate": 0.0001, "loss": 1.5268, "step": 1846 }, { "epoch": 0.29656390494540785, "grad_norm": 0.25203752517700195, "learning_rate": 0.0001, "loss": 1.5728, "step": 1847 }, { "epoch": 0.29672447013487474, "grad_norm": 0.27876532077789307, "learning_rate": 0.0001, "loss": 1.5732, "step": 1848 }, { "epoch": 0.2968850353243417, "grad_norm": 0.28675970435142517, "learning_rate": 0.0001, "loss": 1.5728, "step": 1849 }, { "epoch": 0.2970456005138086, "grad_norm": 0.2579909563064575, "learning_rate": 0.0001, "loss": 1.5657, "step": 1850 }, { "epoch": 0.2972061657032755, "grad_norm": 0.26867666840553284, "learning_rate": 0.0001, "loss": 1.5831, "step": 1851 }, { "epoch": 0.29736673089274246, "grad_norm": 0.24972176551818848, "learning_rate": 0.0001, "loss": 1.5103, "step": 1852 }, { "epoch": 0.2975272960822094, "grad_norm": 0.2798141539096832, "learning_rate": 0.0001, "loss": 1.6262, "step": 1853 }, { "epoch": 0.2976878612716763, "grad_norm": 0.27341991662979126, "learning_rate": 0.0001, "loss": 1.6127, "step": 1854 }, { "epoch": 0.29784842646114323, "grad_norm": 0.28879499435424805, "learning_rate": 0.0001, "loss": 1.5381, "step": 1855 }, { "epoch": 0.2980089916506101, "grad_norm": 0.2521066963672638, "learning_rate": 0.0001, "loss": 1.5628, "step": 1856 }, { "epoch": 0.29816955684007707, "grad_norm": 0.24854888021945953, "learning_rate": 0.0001, "loss": 1.548, "step": 1857 }, { "epoch": 0.298330122029544, "grad_norm": 0.2579093873500824, "learning_rate": 0.0001, "loss": 1.5704, "step": 1858 }, { "epoch": 0.2984906872190109, "grad_norm": 0.26755714416503906, "learning_rate": 0.0001, "loss": 1.6673, "step": 1859 }, { "epoch": 0.29865125240847784, "grad_norm": 0.2619021236896515, "learning_rate": 0.0001, "loss": 1.5102, "step": 1860 }, { "epoch": 0.2988118175979448, "grad_norm": 0.2696104347705841, "learning_rate": 0.0001, "loss": 1.4678, "step": 1861 }, { "epoch": 0.2989723827874117, "grad_norm": 0.2595376670360565, "learning_rate": 0.0001, "loss": 1.5342, "step": 1862 }, { "epoch": 0.2991329479768786, "grad_norm": 0.2659249007701874, "learning_rate": 0.0001, "loss": 1.5667, "step": 1863 }, { "epoch": 0.29929351316634556, "grad_norm": 0.27641892433166504, "learning_rate": 0.0001, "loss": 1.7052, "step": 1864 }, { "epoch": 0.29945407835581245, "grad_norm": 0.24762192368507385, "learning_rate": 0.0001, "loss": 1.5204, "step": 1865 }, { "epoch": 0.2996146435452794, "grad_norm": 0.25400760769844055, "learning_rate": 0.0001, "loss": 1.5343, "step": 1866 }, { "epoch": 0.29977520873474633, "grad_norm": 0.24175044894218445, "learning_rate": 0.0001, "loss": 1.6044, "step": 1867 }, { "epoch": 0.2999357739242132, "grad_norm": 0.24022619426250458, "learning_rate": 0.0001, "loss": 1.5563, "step": 1868 }, { "epoch": 0.30009633911368017, "grad_norm": 0.25152572989463806, "learning_rate": 0.0001, "loss": 1.5692, "step": 1869 }, { "epoch": 0.30025690430314705, "grad_norm": 0.26082396507263184, "learning_rate": 0.0001, "loss": 1.5929, "step": 1870 }, { "epoch": 0.300417469492614, "grad_norm": 0.28792721033096313, "learning_rate": 0.0001, "loss": 1.5404, "step": 1871 }, { "epoch": 0.30057803468208094, "grad_norm": 0.25064149498939514, "learning_rate": 0.0001, "loss": 1.6066, "step": 1872 }, { "epoch": 0.30073859987154783, "grad_norm": 0.2479681670665741, "learning_rate": 0.0001, "loss": 1.5462, "step": 1873 }, { "epoch": 0.3008991650610148, "grad_norm": 0.6160130500793457, "learning_rate": 0.0001, "loss": 1.5941, "step": 1874 }, { "epoch": 0.3010597302504817, "grad_norm": 0.2493082731962204, "learning_rate": 0.0001, "loss": 1.5703, "step": 1875 }, { "epoch": 0.3012202954399486, "grad_norm": 0.23921550810337067, "learning_rate": 0.0001, "loss": 1.4552, "step": 1876 }, { "epoch": 0.30138086062941555, "grad_norm": 0.2838565409183502, "learning_rate": 0.0001, "loss": 1.5857, "step": 1877 }, { "epoch": 0.3015414258188825, "grad_norm": 0.268409788608551, "learning_rate": 0.0001, "loss": 1.5909, "step": 1878 }, { "epoch": 0.3017019910083494, "grad_norm": 0.27479058504104614, "learning_rate": 0.0001, "loss": 1.5766, "step": 1879 }, { "epoch": 0.3018625561978163, "grad_norm": 0.24375787377357483, "learning_rate": 0.0001, "loss": 1.514, "step": 1880 }, { "epoch": 0.3020231213872832, "grad_norm": 0.24530580639839172, "learning_rate": 0.0001, "loss": 1.5464, "step": 1881 }, { "epoch": 0.30218368657675015, "grad_norm": 0.2485753893852234, "learning_rate": 0.0001, "loss": 1.5329, "step": 1882 }, { "epoch": 0.3023442517662171, "grad_norm": 0.24819721281528473, "learning_rate": 0.0001, "loss": 1.5531, "step": 1883 }, { "epoch": 0.302504816955684, "grad_norm": 0.27060645818710327, "learning_rate": 0.0001, "loss": 1.6018, "step": 1884 }, { "epoch": 0.30266538214515093, "grad_norm": 0.2594221532344818, "learning_rate": 0.0001, "loss": 1.4978, "step": 1885 }, { "epoch": 0.3028259473346179, "grad_norm": 0.23679520189762115, "learning_rate": 0.0001, "loss": 1.486, "step": 1886 }, { "epoch": 0.30298651252408476, "grad_norm": 0.24668680131435394, "learning_rate": 0.0001, "loss": 1.4878, "step": 1887 }, { "epoch": 0.3031470777135517, "grad_norm": 0.24873335659503937, "learning_rate": 0.0001, "loss": 1.4877, "step": 1888 }, { "epoch": 0.30330764290301865, "grad_norm": 0.25774168968200684, "learning_rate": 0.0001, "loss": 1.5324, "step": 1889 }, { "epoch": 0.30346820809248554, "grad_norm": 0.25117889046669006, "learning_rate": 0.0001, "loss": 1.5499, "step": 1890 }, { "epoch": 0.3036287732819525, "grad_norm": 0.2548123896121979, "learning_rate": 0.0001, "loss": 1.5137, "step": 1891 }, { "epoch": 0.3037893384714194, "grad_norm": 0.23779501020908356, "learning_rate": 0.0001, "loss": 1.5505, "step": 1892 }, { "epoch": 0.3039499036608863, "grad_norm": 0.24799437820911407, "learning_rate": 0.0001, "loss": 1.5272, "step": 1893 }, { "epoch": 0.30411046885035325, "grad_norm": 0.2357570379972458, "learning_rate": 0.0001, "loss": 1.5371, "step": 1894 }, { "epoch": 0.30427103403982014, "grad_norm": 0.24633687734603882, "learning_rate": 0.0001, "loss": 1.5216, "step": 1895 }, { "epoch": 0.3044315992292871, "grad_norm": 0.2573365271091461, "learning_rate": 0.0001, "loss": 1.608, "step": 1896 }, { "epoch": 0.30459216441875403, "grad_norm": 0.24100179970264435, "learning_rate": 0.0001, "loss": 1.4889, "step": 1897 }, { "epoch": 0.3047527296082209, "grad_norm": 0.24734078347682953, "learning_rate": 0.0001, "loss": 1.455, "step": 1898 }, { "epoch": 0.30491329479768786, "grad_norm": 0.25962188839912415, "learning_rate": 0.0001, "loss": 1.6056, "step": 1899 }, { "epoch": 0.3050738599871548, "grad_norm": 0.2501012682914734, "learning_rate": 0.0001, "loss": 1.538, "step": 1900 }, { "epoch": 0.3052344251766217, "grad_norm": 0.25188666582107544, "learning_rate": 0.0001, "loss": 1.612, "step": 1901 }, { "epoch": 0.30539499036608864, "grad_norm": 0.25298765301704407, "learning_rate": 0.0001, "loss": 1.6036, "step": 1902 }, { "epoch": 0.3055555555555556, "grad_norm": 0.268204003572464, "learning_rate": 0.0001, "loss": 1.6405, "step": 1903 }, { "epoch": 0.30571612074502247, "grad_norm": 1.586869239807129, "learning_rate": 0.0001, "loss": 1.573, "step": 1904 }, { "epoch": 0.3058766859344894, "grad_norm": 0.24489398300647736, "learning_rate": 0.0001, "loss": 1.5984, "step": 1905 }, { "epoch": 0.3060372511239563, "grad_norm": 0.2484893947839737, "learning_rate": 0.0001, "loss": 1.543, "step": 1906 }, { "epoch": 0.30619781631342324, "grad_norm": 0.23659148812294006, "learning_rate": 0.0001, "loss": 1.4638, "step": 1907 }, { "epoch": 0.3063583815028902, "grad_norm": 0.238989919424057, "learning_rate": 0.0001, "loss": 1.4976, "step": 1908 }, { "epoch": 0.3065189466923571, "grad_norm": 0.24608288705348969, "learning_rate": 0.0001, "loss": 1.6556, "step": 1909 }, { "epoch": 0.306679511881824, "grad_norm": 0.24990175664424896, "learning_rate": 0.0001, "loss": 1.5045, "step": 1910 }, { "epoch": 0.30684007707129096, "grad_norm": 0.2556162178516388, "learning_rate": 0.0001, "loss": 1.6116, "step": 1911 }, { "epoch": 0.30700064226075785, "grad_norm": 0.24361644685268402, "learning_rate": 0.0001, "loss": 1.5561, "step": 1912 }, { "epoch": 0.3071612074502248, "grad_norm": 0.2393457591533661, "learning_rate": 0.0001, "loss": 1.5945, "step": 1913 }, { "epoch": 0.30732177263969174, "grad_norm": 0.2425204962491989, "learning_rate": 0.0001, "loss": 1.5432, "step": 1914 }, { "epoch": 0.3074823378291586, "grad_norm": 0.25019174814224243, "learning_rate": 0.0001, "loss": 1.6036, "step": 1915 }, { "epoch": 0.30764290301862557, "grad_norm": 0.24608489871025085, "learning_rate": 0.0001, "loss": 1.5638, "step": 1916 }, { "epoch": 0.3078034682080925, "grad_norm": 0.25412517786026, "learning_rate": 0.0001, "loss": 1.5885, "step": 1917 }, { "epoch": 0.3079640333975594, "grad_norm": 0.247113436460495, "learning_rate": 0.0001, "loss": 1.4763, "step": 1918 }, { "epoch": 0.30812459858702634, "grad_norm": 0.24102932214736938, "learning_rate": 0.0001, "loss": 1.5049, "step": 1919 }, { "epoch": 0.30828516377649323, "grad_norm": 1.6683555841445923, "learning_rate": 0.0001, "loss": 1.5579, "step": 1920 }, { "epoch": 0.3084457289659602, "grad_norm": 0.2455155998468399, "learning_rate": 0.0001, "loss": 1.5239, "step": 1921 }, { "epoch": 0.3086062941554271, "grad_norm": 0.26336005330085754, "learning_rate": 0.0001, "loss": 1.4679, "step": 1922 }, { "epoch": 0.308766859344894, "grad_norm": 0.26241132616996765, "learning_rate": 0.0001, "loss": 1.5808, "step": 1923 }, { "epoch": 0.30892742453436095, "grad_norm": 0.2679413855075836, "learning_rate": 0.0001, "loss": 1.5523, "step": 1924 }, { "epoch": 0.3090879897238279, "grad_norm": 0.2441592514514923, "learning_rate": 0.0001, "loss": 1.5253, "step": 1925 }, { "epoch": 0.3092485549132948, "grad_norm": 0.2611231505870819, "learning_rate": 0.0001, "loss": 1.5624, "step": 1926 }, { "epoch": 0.3094091201027617, "grad_norm": 0.26747897267341614, "learning_rate": 0.0001, "loss": 1.5377, "step": 1927 }, { "epoch": 0.30956968529222867, "grad_norm": 0.28684693574905396, "learning_rate": 0.0001, "loss": 1.5811, "step": 1928 }, { "epoch": 0.30973025048169556, "grad_norm": 0.2699681222438812, "learning_rate": 0.0001, "loss": 1.5805, "step": 1929 }, { "epoch": 0.3098908156711625, "grad_norm": 0.2753625810146332, "learning_rate": 0.0001, "loss": 1.5864, "step": 1930 }, { "epoch": 0.3100513808606294, "grad_norm": 0.2829086482524872, "learning_rate": 0.0001, "loss": 1.6383, "step": 1931 }, { "epoch": 0.31021194605009633, "grad_norm": 0.27932077646255493, "learning_rate": 0.0001, "loss": 1.5027, "step": 1932 }, { "epoch": 0.3103725112395633, "grad_norm": 0.2526090145111084, "learning_rate": 0.0001, "loss": 1.4831, "step": 1933 }, { "epoch": 0.31053307642903016, "grad_norm": 0.27693843841552734, "learning_rate": 0.0001, "loss": 1.6333, "step": 1934 }, { "epoch": 0.3106936416184971, "grad_norm": 0.2676289677619934, "learning_rate": 0.0001, "loss": 1.5865, "step": 1935 }, { "epoch": 0.31085420680796405, "grad_norm": 0.2501024603843689, "learning_rate": 0.0001, "loss": 1.6406, "step": 1936 }, { "epoch": 0.31101477199743094, "grad_norm": 0.31236693263053894, "learning_rate": 0.0001, "loss": 1.5404, "step": 1937 }, { "epoch": 0.3111753371868979, "grad_norm": 0.2553372085094452, "learning_rate": 0.0001, "loss": 1.5008, "step": 1938 }, { "epoch": 0.3113359023763648, "grad_norm": 0.247478649020195, "learning_rate": 0.0001, "loss": 1.5338, "step": 1939 }, { "epoch": 0.3114964675658317, "grad_norm": 0.2544630467891693, "learning_rate": 0.0001, "loss": 1.5287, "step": 1940 }, { "epoch": 0.31165703275529866, "grad_norm": 0.2486809939146042, "learning_rate": 0.0001, "loss": 1.5386, "step": 1941 }, { "epoch": 0.3118175979447656, "grad_norm": 0.26548662781715393, "learning_rate": 0.0001, "loss": 1.4916, "step": 1942 }, { "epoch": 0.3119781631342325, "grad_norm": 0.26302793622016907, "learning_rate": 0.0001, "loss": 1.6307, "step": 1943 }, { "epoch": 0.31213872832369943, "grad_norm": 0.2523314952850342, "learning_rate": 0.0001, "loss": 1.5184, "step": 1944 }, { "epoch": 0.3122992935131663, "grad_norm": 0.2504449188709259, "learning_rate": 0.0001, "loss": 1.4592, "step": 1945 }, { "epoch": 0.31245985870263326, "grad_norm": 0.2648429870605469, "learning_rate": 0.0001, "loss": 1.6783, "step": 1946 }, { "epoch": 0.3126204238921002, "grad_norm": 0.27570006251335144, "learning_rate": 0.0001, "loss": 1.5943, "step": 1947 }, { "epoch": 0.3127809890815671, "grad_norm": 0.2519308924674988, "learning_rate": 0.0001, "loss": 1.544, "step": 1948 }, { "epoch": 0.31294155427103404, "grad_norm": 0.26972678303718567, "learning_rate": 0.0001, "loss": 1.5713, "step": 1949 }, { "epoch": 0.313102119460501, "grad_norm": 0.2651619017124176, "learning_rate": 0.0001, "loss": 1.4749, "step": 1950 }, { "epoch": 0.31326268464996787, "grad_norm": 0.2575755715370178, "learning_rate": 0.0001, "loss": 1.5378, "step": 1951 }, { "epoch": 0.3134232498394348, "grad_norm": 0.2738771140575409, "learning_rate": 0.0001, "loss": 1.5779, "step": 1952 }, { "epoch": 0.31358381502890176, "grad_norm": 0.2623549699783325, "learning_rate": 0.0001, "loss": 1.5892, "step": 1953 }, { "epoch": 0.31374438021836865, "grad_norm": 0.26148900389671326, "learning_rate": 0.0001, "loss": 1.4955, "step": 1954 }, { "epoch": 0.3139049454078356, "grad_norm": 0.26656344532966614, "learning_rate": 0.0001, "loss": 1.5875, "step": 1955 }, { "epoch": 0.3140655105973025, "grad_norm": 0.2636772394180298, "learning_rate": 0.0001, "loss": 1.5877, "step": 1956 }, { "epoch": 0.3142260757867694, "grad_norm": 0.26439574360847473, "learning_rate": 0.0001, "loss": 1.5937, "step": 1957 }, { "epoch": 0.31438664097623636, "grad_norm": 0.3177263140678406, "learning_rate": 0.0001, "loss": 1.4055, "step": 1958 }, { "epoch": 0.31454720616570325, "grad_norm": 0.31999045610427856, "learning_rate": 0.0001, "loss": 1.5173, "step": 1959 }, { "epoch": 0.3147077713551702, "grad_norm": 0.28947094082832336, "learning_rate": 0.0001, "loss": 1.6825, "step": 1960 }, { "epoch": 0.31486833654463714, "grad_norm": 0.26300951838493347, "learning_rate": 0.0001, "loss": 1.5699, "step": 1961 }, { "epoch": 0.315028901734104, "grad_norm": 0.31268611550331116, "learning_rate": 0.0001, "loss": 1.6053, "step": 1962 }, { "epoch": 0.31518946692357097, "grad_norm": 0.31239306926727295, "learning_rate": 0.0001, "loss": 1.4967, "step": 1963 }, { "epoch": 0.3153500321130379, "grad_norm": 0.4538760483264923, "learning_rate": 0.0001, "loss": 1.5428, "step": 1964 }, { "epoch": 0.3155105973025048, "grad_norm": 0.2723877429962158, "learning_rate": 0.0001, "loss": 1.5532, "step": 1965 }, { "epoch": 0.31567116249197175, "grad_norm": 0.273652583360672, "learning_rate": 0.0001, "loss": 1.5211, "step": 1966 }, { "epoch": 0.3158317276814387, "grad_norm": 0.2829582691192627, "learning_rate": 0.0001, "loss": 1.5397, "step": 1967 }, { "epoch": 0.3159922928709056, "grad_norm": 0.28165462613105774, "learning_rate": 0.0001, "loss": 1.6203, "step": 1968 }, { "epoch": 0.3161528580603725, "grad_norm": 0.27137845754623413, "learning_rate": 0.0001, "loss": 1.5079, "step": 1969 }, { "epoch": 0.3163134232498394, "grad_norm": 0.28451213240623474, "learning_rate": 0.0001, "loss": 1.5272, "step": 1970 }, { "epoch": 0.31647398843930635, "grad_norm": 0.2487514317035675, "learning_rate": 0.0001, "loss": 1.5149, "step": 1971 }, { "epoch": 0.3166345536287733, "grad_norm": 0.2665540277957916, "learning_rate": 0.0001, "loss": 1.584, "step": 1972 }, { "epoch": 0.3167951188182402, "grad_norm": 0.29046347737312317, "learning_rate": 0.0001, "loss": 1.579, "step": 1973 }, { "epoch": 0.31695568400770713, "grad_norm": 0.25909698009490967, "learning_rate": 0.0001, "loss": 1.6118, "step": 1974 }, { "epoch": 0.31711624919717407, "grad_norm": 0.24843347072601318, "learning_rate": 0.0001, "loss": 1.5208, "step": 1975 }, { "epoch": 0.31727681438664096, "grad_norm": 0.24475157260894775, "learning_rate": 0.0001, "loss": 1.5602, "step": 1976 }, { "epoch": 0.3174373795761079, "grad_norm": 0.2558642327785492, "learning_rate": 0.0001, "loss": 1.5627, "step": 1977 }, { "epoch": 0.31759794476557485, "grad_norm": 0.26439404487609863, "learning_rate": 0.0001, "loss": 1.5986, "step": 1978 }, { "epoch": 0.31775850995504173, "grad_norm": 0.24407227337360382, "learning_rate": 0.0001, "loss": 1.5732, "step": 1979 }, { "epoch": 0.3179190751445087, "grad_norm": 0.28595051169395447, "learning_rate": 0.0001, "loss": 1.6268, "step": 1980 }, { "epoch": 0.3180796403339756, "grad_norm": 0.26790329813957214, "learning_rate": 0.0001, "loss": 1.6188, "step": 1981 }, { "epoch": 0.3182402055234425, "grad_norm": 0.33726316690444946, "learning_rate": 0.0001, "loss": 1.5407, "step": 1982 }, { "epoch": 0.31840077071290945, "grad_norm": 0.2506847083568573, "learning_rate": 0.0001, "loss": 1.5907, "step": 1983 }, { "epoch": 0.31856133590237634, "grad_norm": 0.26663774251937866, "learning_rate": 0.0001, "loss": 1.5612, "step": 1984 }, { "epoch": 0.3187219010918433, "grad_norm": 0.2664209008216858, "learning_rate": 0.0001, "loss": 1.5583, "step": 1985 }, { "epoch": 0.31888246628131023, "grad_norm": 0.2599117159843445, "learning_rate": 0.0001, "loss": 1.5069, "step": 1986 }, { "epoch": 0.3190430314707771, "grad_norm": 0.25136682391166687, "learning_rate": 0.0001, "loss": 1.5309, "step": 1987 }, { "epoch": 0.31920359666024406, "grad_norm": 0.2607286870479584, "learning_rate": 0.0001, "loss": 1.5507, "step": 1988 }, { "epoch": 0.319364161849711, "grad_norm": 0.25644397735595703, "learning_rate": 0.0001, "loss": 1.5686, "step": 1989 }, { "epoch": 0.3195247270391779, "grad_norm": 0.25758540630340576, "learning_rate": 0.0001, "loss": 1.4948, "step": 1990 }, { "epoch": 0.31968529222864484, "grad_norm": 0.24424684047698975, "learning_rate": 0.0001, "loss": 1.4676, "step": 1991 }, { "epoch": 0.3198458574181118, "grad_norm": 2.369795560836792, "learning_rate": 0.0001, "loss": 1.6539, "step": 1992 }, { "epoch": 0.32000642260757867, "grad_norm": 0.2553209364414215, "learning_rate": 0.0001, "loss": 1.5653, "step": 1993 }, { "epoch": 0.3201669877970456, "grad_norm": 0.25574493408203125, "learning_rate": 0.0001, "loss": 1.5519, "step": 1994 }, { "epoch": 0.3203275529865125, "grad_norm": 0.24983198940753937, "learning_rate": 0.0001, "loss": 1.638, "step": 1995 }, { "epoch": 0.32048811817597944, "grad_norm": 0.24426138401031494, "learning_rate": 0.0001, "loss": 1.5146, "step": 1996 }, { "epoch": 0.3206486833654464, "grad_norm": 0.2396833598613739, "learning_rate": 0.0001, "loss": 1.5439, "step": 1997 }, { "epoch": 0.3208092485549133, "grad_norm": 0.2616952359676361, "learning_rate": 0.0001, "loss": 1.532, "step": 1998 }, { "epoch": 0.3209698137443802, "grad_norm": 0.2405225783586502, "learning_rate": 0.0001, "loss": 1.5721, "step": 1999 }, { "epoch": 0.32113037893384716, "grad_norm": 0.24469563364982605, "learning_rate": 0.0001, "loss": 1.5501, "step": 2000 }, { "epoch": 0.32129094412331405, "grad_norm": 0.24045726656913757, "learning_rate": 0.0001, "loss": 1.4864, "step": 2001 }, { "epoch": 0.321451509312781, "grad_norm": 0.23491191864013672, "learning_rate": 0.0001, "loss": 1.5456, "step": 2002 }, { "epoch": 0.32161207450224794, "grad_norm": 0.25913017988204956, "learning_rate": 0.0001, "loss": 1.5277, "step": 2003 }, { "epoch": 0.3217726396917148, "grad_norm": 0.24957261979579926, "learning_rate": 0.0001, "loss": 1.5388, "step": 2004 }, { "epoch": 0.32193320488118177, "grad_norm": 0.26871004700660706, "learning_rate": 0.0001, "loss": 1.5986, "step": 2005 }, { "epoch": 0.3220937700706487, "grad_norm": 0.25158387422561646, "learning_rate": 0.0001, "loss": 1.5362, "step": 2006 }, { "epoch": 0.3222543352601156, "grad_norm": 0.25004780292510986, "learning_rate": 0.0001, "loss": 1.5961, "step": 2007 }, { "epoch": 0.32241490044958254, "grad_norm": 0.2508402466773987, "learning_rate": 0.0001, "loss": 1.5096, "step": 2008 }, { "epoch": 0.32257546563904943, "grad_norm": 0.2602085471153259, "learning_rate": 0.0001, "loss": 1.5506, "step": 2009 }, { "epoch": 0.3227360308285164, "grad_norm": 0.24283818900585175, "learning_rate": 0.0001, "loss": 1.4731, "step": 2010 }, { "epoch": 0.3228965960179833, "grad_norm": 0.2539307177066803, "learning_rate": 0.0001, "loss": 1.551, "step": 2011 }, { "epoch": 0.3230571612074502, "grad_norm": 0.2527778148651123, "learning_rate": 0.0001, "loss": 1.6136, "step": 2012 }, { "epoch": 0.32321772639691715, "grad_norm": 0.2855991721153259, "learning_rate": 0.0001, "loss": 1.66, "step": 2013 }, { "epoch": 0.3233782915863841, "grad_norm": 0.2550317645072937, "learning_rate": 0.0001, "loss": 1.6189, "step": 2014 }, { "epoch": 0.323538856775851, "grad_norm": 0.2562001049518585, "learning_rate": 0.0001, "loss": 1.5835, "step": 2015 }, { "epoch": 0.3236994219653179, "grad_norm": 0.2569708526134491, "learning_rate": 0.0001, "loss": 1.5579, "step": 2016 }, { "epoch": 0.32385998715478487, "grad_norm": 0.25206586718559265, "learning_rate": 0.0001, "loss": 1.4494, "step": 2017 }, { "epoch": 0.32402055234425176, "grad_norm": 0.260757178068161, "learning_rate": 0.0001, "loss": 1.5939, "step": 2018 }, { "epoch": 0.3241811175337187, "grad_norm": 0.24333572387695312, "learning_rate": 0.0001, "loss": 1.5233, "step": 2019 }, { "epoch": 0.3243416827231856, "grad_norm": 0.2474576234817505, "learning_rate": 0.0001, "loss": 1.531, "step": 2020 }, { "epoch": 0.32450224791265253, "grad_norm": 0.24919936060905457, "learning_rate": 0.0001, "loss": 1.5104, "step": 2021 }, { "epoch": 0.3246628131021195, "grad_norm": 0.27407601475715637, "learning_rate": 0.0001, "loss": 1.5779, "step": 2022 }, { "epoch": 0.32482337829158636, "grad_norm": 0.2472204566001892, "learning_rate": 0.0001, "loss": 1.548, "step": 2023 }, { "epoch": 0.3249839434810533, "grad_norm": 0.2495601326227188, "learning_rate": 0.0001, "loss": 1.5391, "step": 2024 }, { "epoch": 0.32514450867052025, "grad_norm": 0.34387463331222534, "learning_rate": 0.0001, "loss": 1.477, "step": 2025 }, { "epoch": 0.32530507385998714, "grad_norm": 0.2577751874923706, "learning_rate": 0.0001, "loss": 1.5778, "step": 2026 }, { "epoch": 0.3254656390494541, "grad_norm": 0.2544448971748352, "learning_rate": 0.0001, "loss": 1.4909, "step": 2027 }, { "epoch": 0.325626204238921, "grad_norm": 0.26194360852241516, "learning_rate": 0.0001, "loss": 1.4971, "step": 2028 }, { "epoch": 0.3257867694283879, "grad_norm": 0.2775893211364746, "learning_rate": 0.0001, "loss": 1.57, "step": 2029 }, { "epoch": 0.32594733461785486, "grad_norm": 0.24274244904518127, "learning_rate": 0.0001, "loss": 1.4804, "step": 2030 }, { "epoch": 0.3261078998073218, "grad_norm": 0.25908997654914856, "learning_rate": 0.0001, "loss": 1.5899, "step": 2031 }, { "epoch": 0.3262684649967887, "grad_norm": 0.24661113321781158, "learning_rate": 0.0001, "loss": 1.533, "step": 2032 }, { "epoch": 0.32642903018625563, "grad_norm": 0.2571931481361389, "learning_rate": 0.0001, "loss": 1.5652, "step": 2033 }, { "epoch": 0.3265895953757225, "grad_norm": 0.27937066555023193, "learning_rate": 0.0001, "loss": 1.5215, "step": 2034 }, { "epoch": 0.32675016056518946, "grad_norm": 0.25084108114242554, "learning_rate": 0.0001, "loss": 1.5691, "step": 2035 }, { "epoch": 0.3269107257546564, "grad_norm": 0.2674304246902466, "learning_rate": 0.0001, "loss": 1.6143, "step": 2036 }, { "epoch": 0.3270712909441233, "grad_norm": 0.2760143578052521, "learning_rate": 0.0001, "loss": 1.6586, "step": 2037 }, { "epoch": 0.32723185613359024, "grad_norm": 0.2676118016242981, "learning_rate": 0.0001, "loss": 1.5496, "step": 2038 }, { "epoch": 0.3273924213230572, "grad_norm": 0.2591191232204437, "learning_rate": 0.0001, "loss": 1.5747, "step": 2039 }, { "epoch": 0.32755298651252407, "grad_norm": 0.24934707581996918, "learning_rate": 0.0001, "loss": 1.5478, "step": 2040 }, { "epoch": 0.327713551701991, "grad_norm": 0.24062830209732056, "learning_rate": 0.0001, "loss": 1.5283, "step": 2041 }, { "epoch": 0.32787411689145796, "grad_norm": 0.2677200138568878, "learning_rate": 0.0001, "loss": 1.5722, "step": 2042 }, { "epoch": 0.32803468208092484, "grad_norm": 0.25619253516197205, "learning_rate": 0.0001, "loss": 1.5749, "step": 2043 }, { "epoch": 0.3281952472703918, "grad_norm": 0.25234487652778625, "learning_rate": 0.0001, "loss": 1.5149, "step": 2044 }, { "epoch": 0.3283558124598587, "grad_norm": 0.24501624703407288, "learning_rate": 0.0001, "loss": 1.5608, "step": 2045 }, { "epoch": 0.3285163776493256, "grad_norm": 0.2601943910121918, "learning_rate": 0.0001, "loss": 1.5929, "step": 2046 }, { "epoch": 0.32867694283879256, "grad_norm": 0.2691039443016052, "learning_rate": 0.0001, "loss": 1.6367, "step": 2047 }, { "epoch": 0.32883750802825945, "grad_norm": 0.2641131281852722, "learning_rate": 0.0001, "loss": 1.5494, "step": 2048 }, { "epoch": 0.3289980732177264, "grad_norm": 0.24564731121063232, "learning_rate": 0.0001, "loss": 1.5653, "step": 2049 }, { "epoch": 0.32915863840719334, "grad_norm": 0.24584510922431946, "learning_rate": 0.0001, "loss": 1.6153, "step": 2050 }, { "epoch": 0.3293192035966602, "grad_norm": 0.25819748640060425, "learning_rate": 0.0001, "loss": 1.4515, "step": 2051 }, { "epoch": 0.32947976878612717, "grad_norm": 0.25013402104377747, "learning_rate": 0.0001, "loss": 1.5023, "step": 2052 }, { "epoch": 0.3296403339755941, "grad_norm": 0.2627883553504944, "learning_rate": 0.0001, "loss": 1.5698, "step": 2053 }, { "epoch": 0.329800899165061, "grad_norm": 0.2595508098602295, "learning_rate": 0.0001, "loss": 1.5887, "step": 2054 }, { "epoch": 0.32996146435452794, "grad_norm": 0.2511318027973175, "learning_rate": 0.0001, "loss": 1.5807, "step": 2055 }, { "epoch": 0.3301220295439949, "grad_norm": 0.28229302167892456, "learning_rate": 0.0001, "loss": 1.5293, "step": 2056 }, { "epoch": 0.3302825947334618, "grad_norm": 0.2682824730873108, "learning_rate": 0.0001, "loss": 1.5524, "step": 2057 }, { "epoch": 0.3304431599229287, "grad_norm": 0.2473607063293457, "learning_rate": 0.0001, "loss": 1.5423, "step": 2058 }, { "epoch": 0.3306037251123956, "grad_norm": 0.26974809169769287, "learning_rate": 0.0001, "loss": 1.6149, "step": 2059 }, { "epoch": 0.33076429030186255, "grad_norm": 0.26088446378707886, "learning_rate": 0.0001, "loss": 1.5512, "step": 2060 }, { "epoch": 0.3309248554913295, "grad_norm": 0.42597144842147827, "learning_rate": 0.0001, "loss": 1.5373, "step": 2061 }, { "epoch": 0.3310854206807964, "grad_norm": 0.25390321016311646, "learning_rate": 0.0001, "loss": 1.5602, "step": 2062 }, { "epoch": 0.3312459858702633, "grad_norm": 0.2506279945373535, "learning_rate": 0.0001, "loss": 1.4774, "step": 2063 }, { "epoch": 0.33140655105973027, "grad_norm": 0.2644400894641876, "learning_rate": 0.0001, "loss": 1.5258, "step": 2064 }, { "epoch": 0.33156711624919716, "grad_norm": 0.27889135479927063, "learning_rate": 0.0001, "loss": 1.4989, "step": 2065 }, { "epoch": 0.3317276814386641, "grad_norm": 0.2611287832260132, "learning_rate": 0.0001, "loss": 1.5797, "step": 2066 }, { "epoch": 0.33188824662813105, "grad_norm": 0.26141950488090515, "learning_rate": 0.0001, "loss": 1.5697, "step": 2067 }, { "epoch": 0.33204881181759793, "grad_norm": 0.2811838984489441, "learning_rate": 0.0001, "loss": 1.5889, "step": 2068 }, { "epoch": 0.3322093770070649, "grad_norm": 0.2627982497215271, "learning_rate": 0.0001, "loss": 1.598, "step": 2069 }, { "epoch": 0.33236994219653176, "grad_norm": 0.24998469650745392, "learning_rate": 0.0001, "loss": 1.5854, "step": 2070 }, { "epoch": 0.3325305073859987, "grad_norm": 0.260353147983551, "learning_rate": 0.0001, "loss": 1.618, "step": 2071 }, { "epoch": 0.33269107257546565, "grad_norm": 0.250887393951416, "learning_rate": 0.0001, "loss": 1.4957, "step": 2072 }, { "epoch": 0.33285163776493254, "grad_norm": 0.2872818112373352, "learning_rate": 0.0001, "loss": 1.5865, "step": 2073 }, { "epoch": 0.3330122029543995, "grad_norm": 0.24831518530845642, "learning_rate": 0.0001, "loss": 1.4169, "step": 2074 }, { "epoch": 0.3331727681438664, "grad_norm": 0.2671929597854614, "learning_rate": 0.0001, "loss": 1.5842, "step": 2075 }, { "epoch": 0.3333333333333333, "grad_norm": 0.2549666166305542, "learning_rate": 0.0001, "loss": 1.4667, "step": 2076 }, { "epoch": 0.33349389852280026, "grad_norm": 0.25841936469078064, "learning_rate": 0.0001, "loss": 1.5821, "step": 2077 }, { "epoch": 0.3336544637122672, "grad_norm": 0.2738978862762451, "learning_rate": 0.0001, "loss": 1.6261, "step": 2078 }, { "epoch": 0.3338150289017341, "grad_norm": 0.2430199831724167, "learning_rate": 0.0001, "loss": 1.5608, "step": 2079 }, { "epoch": 0.33397559409120103, "grad_norm": 0.24916157126426697, "learning_rate": 0.0001, "loss": 1.5206, "step": 2080 }, { "epoch": 0.334136159280668, "grad_norm": 0.2479800283908844, "learning_rate": 0.0001, "loss": 1.6286, "step": 2081 }, { "epoch": 0.33429672447013487, "grad_norm": 0.25704678893089294, "learning_rate": 0.0001, "loss": 1.5186, "step": 2082 }, { "epoch": 0.3344572896596018, "grad_norm": 0.24137870967388153, "learning_rate": 0.0001, "loss": 1.6058, "step": 2083 }, { "epoch": 0.3346178548490687, "grad_norm": 0.26002371311187744, "learning_rate": 0.0001, "loss": 1.6203, "step": 2084 }, { "epoch": 0.33477842003853564, "grad_norm": 0.2356281876564026, "learning_rate": 0.0001, "loss": 1.5415, "step": 2085 }, { "epoch": 0.3349389852280026, "grad_norm": 0.26551684737205505, "learning_rate": 0.0001, "loss": 1.6309, "step": 2086 }, { "epoch": 0.33509955041746947, "grad_norm": 0.250840425491333, "learning_rate": 0.0001, "loss": 1.5145, "step": 2087 }, { "epoch": 0.3352601156069364, "grad_norm": 0.25762513279914856, "learning_rate": 0.0001, "loss": 1.5706, "step": 2088 }, { "epoch": 0.33542068079640336, "grad_norm": 0.24373485147953033, "learning_rate": 0.0001, "loss": 1.5207, "step": 2089 }, { "epoch": 0.33558124598587025, "grad_norm": 0.3432878851890564, "learning_rate": 0.0001, "loss": 1.6484, "step": 2090 }, { "epoch": 0.3357418111753372, "grad_norm": 0.25250113010406494, "learning_rate": 0.0001, "loss": 1.6051, "step": 2091 }, { "epoch": 0.33590237636480413, "grad_norm": 0.24866685271263123, "learning_rate": 0.0001, "loss": 1.5427, "step": 2092 }, { "epoch": 0.336062941554271, "grad_norm": 0.2524840235710144, "learning_rate": 0.0001, "loss": 1.5751, "step": 2093 }, { "epoch": 0.33622350674373797, "grad_norm": 0.2584114670753479, "learning_rate": 0.0001, "loss": 1.6, "step": 2094 }, { "epoch": 0.33638407193320485, "grad_norm": 0.2565653920173645, "learning_rate": 0.0001, "loss": 1.5776, "step": 2095 }, { "epoch": 0.3365446371226718, "grad_norm": 0.26194992661476135, "learning_rate": 0.0001, "loss": 1.5842, "step": 2096 }, { "epoch": 0.33670520231213874, "grad_norm": 0.25302010774612427, "learning_rate": 0.0001, "loss": 1.5818, "step": 2097 }, { "epoch": 0.33686576750160563, "grad_norm": 0.2598724365234375, "learning_rate": 0.0001, "loss": 1.6093, "step": 2098 }, { "epoch": 0.33702633269107257, "grad_norm": 0.2545692026615143, "learning_rate": 0.0001, "loss": 1.4404, "step": 2099 }, { "epoch": 0.3371868978805395, "grad_norm": 0.24554964900016785, "learning_rate": 0.0001, "loss": 1.4851, "step": 2100 }, { "epoch": 0.3373474630700064, "grad_norm": 0.25128766894340515, "learning_rate": 0.0001, "loss": 1.5647, "step": 2101 }, { "epoch": 0.33750802825947335, "grad_norm": 0.2542661428451538, "learning_rate": 0.0001, "loss": 1.5352, "step": 2102 }, { "epoch": 0.3376685934489403, "grad_norm": 0.26056137681007385, "learning_rate": 0.0001, "loss": 1.6635, "step": 2103 }, { "epoch": 0.3378291586384072, "grad_norm": 0.2550312280654907, "learning_rate": 0.0001, "loss": 1.5557, "step": 2104 }, { "epoch": 0.3379897238278741, "grad_norm": 0.2502883970737457, "learning_rate": 0.0001, "loss": 1.5526, "step": 2105 }, { "epoch": 0.33815028901734107, "grad_norm": 0.25496160984039307, "learning_rate": 0.0001, "loss": 1.5514, "step": 2106 }, { "epoch": 0.33831085420680795, "grad_norm": 0.2540586292743683, "learning_rate": 0.0001, "loss": 1.5898, "step": 2107 }, { "epoch": 0.3384714193962749, "grad_norm": 0.25860700011253357, "learning_rate": 0.0001, "loss": 1.5184, "step": 2108 }, { "epoch": 0.3386319845857418, "grad_norm": 0.2346867173910141, "learning_rate": 0.0001, "loss": 1.4983, "step": 2109 }, { "epoch": 0.33879254977520873, "grad_norm": 0.2642350196838379, "learning_rate": 0.0001, "loss": 1.6796, "step": 2110 }, { "epoch": 0.3389531149646757, "grad_norm": 0.23771946132183075, "learning_rate": 0.0001, "loss": 1.5507, "step": 2111 }, { "epoch": 0.33911368015414256, "grad_norm": 0.24657626450061798, "learning_rate": 0.0001, "loss": 1.5134, "step": 2112 }, { "epoch": 0.3392742453436095, "grad_norm": 0.27198708057403564, "learning_rate": 0.0001, "loss": 1.5508, "step": 2113 }, { "epoch": 0.33943481053307645, "grad_norm": 0.2621020972728729, "learning_rate": 0.0001, "loss": 1.6011, "step": 2114 }, { "epoch": 0.33959537572254334, "grad_norm": 0.27582138776779175, "learning_rate": 0.0001, "loss": 1.6013, "step": 2115 }, { "epoch": 0.3397559409120103, "grad_norm": 0.2563203275203705, "learning_rate": 0.0001, "loss": 1.4894, "step": 2116 }, { "epoch": 0.3399165061014772, "grad_norm": 0.2562795877456665, "learning_rate": 0.0001, "loss": 1.581, "step": 2117 }, { "epoch": 0.3400770712909441, "grad_norm": 0.26509010791778564, "learning_rate": 0.0001, "loss": 1.5237, "step": 2118 }, { "epoch": 0.34023763648041105, "grad_norm": 0.25600680708885193, "learning_rate": 0.0001, "loss": 1.6093, "step": 2119 }, { "epoch": 0.340398201669878, "grad_norm": 0.2511206269264221, "learning_rate": 0.0001, "loss": 1.5343, "step": 2120 }, { "epoch": 0.3405587668593449, "grad_norm": 0.24978840351104736, "learning_rate": 0.0001, "loss": 1.5208, "step": 2121 }, { "epoch": 0.34071933204881183, "grad_norm": 0.25395306944847107, "learning_rate": 0.0001, "loss": 1.6241, "step": 2122 }, { "epoch": 0.3408798972382787, "grad_norm": 0.26245689392089844, "learning_rate": 0.0001, "loss": 1.5836, "step": 2123 }, { "epoch": 0.34104046242774566, "grad_norm": 0.25019681453704834, "learning_rate": 0.0001, "loss": 1.5727, "step": 2124 }, { "epoch": 0.3412010276172126, "grad_norm": 0.2606225609779358, "learning_rate": 0.0001, "loss": 1.5617, "step": 2125 }, { "epoch": 0.3413615928066795, "grad_norm": 0.24390485882759094, "learning_rate": 0.0001, "loss": 1.5801, "step": 2126 }, { "epoch": 0.34152215799614644, "grad_norm": 0.24285078048706055, "learning_rate": 0.0001, "loss": 1.5468, "step": 2127 }, { "epoch": 0.3416827231856134, "grad_norm": 0.5630396008491516, "learning_rate": 0.0001, "loss": 1.5057, "step": 2128 }, { "epoch": 0.34184328837508027, "grad_norm": 0.2508193552494049, "learning_rate": 0.0001, "loss": 1.5735, "step": 2129 }, { "epoch": 0.3420038535645472, "grad_norm": 0.24452221393585205, "learning_rate": 0.0001, "loss": 1.5448, "step": 2130 }, { "epoch": 0.34216441875401415, "grad_norm": 0.24207033216953278, "learning_rate": 0.0001, "loss": 1.5064, "step": 2131 }, { "epoch": 0.34232498394348104, "grad_norm": 0.2509983777999878, "learning_rate": 0.0001, "loss": 1.5166, "step": 2132 }, { "epoch": 0.342485549132948, "grad_norm": 0.33120840787887573, "learning_rate": 0.0001, "loss": 1.5876, "step": 2133 }, { "epoch": 0.3426461143224149, "grad_norm": 0.26178523898124695, "learning_rate": 0.0001, "loss": 1.6726, "step": 2134 }, { "epoch": 0.3428066795118818, "grad_norm": 0.2442718893289566, "learning_rate": 0.0001, "loss": 1.5507, "step": 2135 }, { "epoch": 0.34296724470134876, "grad_norm": 0.2566917836666107, "learning_rate": 0.0001, "loss": 1.5711, "step": 2136 }, { "epoch": 0.34312780989081565, "grad_norm": 0.26134663820266724, "learning_rate": 0.0001, "loss": 1.5366, "step": 2137 }, { "epoch": 0.3432883750802826, "grad_norm": 0.2464238554239273, "learning_rate": 0.0001, "loss": 1.5535, "step": 2138 }, { "epoch": 0.34344894026974954, "grad_norm": 0.26522767543792725, "learning_rate": 0.0001, "loss": 1.519, "step": 2139 }, { "epoch": 0.3436095054592164, "grad_norm": 0.25250810384750366, "learning_rate": 0.0001, "loss": 1.5288, "step": 2140 }, { "epoch": 0.34377007064868337, "grad_norm": 0.2520618736743927, "learning_rate": 0.0001, "loss": 1.5671, "step": 2141 }, { "epoch": 0.3439306358381503, "grad_norm": 0.24800851941108704, "learning_rate": 0.0001, "loss": 1.5362, "step": 2142 }, { "epoch": 0.3440912010276172, "grad_norm": 0.24773597717285156, "learning_rate": 0.0001, "loss": 1.4873, "step": 2143 }, { "epoch": 0.34425176621708414, "grad_norm": 0.25473371148109436, "learning_rate": 0.0001, "loss": 1.5516, "step": 2144 }, { "epoch": 0.3444123314065511, "grad_norm": 0.24132798612117767, "learning_rate": 0.0001, "loss": 1.507, "step": 2145 }, { "epoch": 0.344572896596018, "grad_norm": 0.24645288288593292, "learning_rate": 0.0001, "loss": 1.6099, "step": 2146 }, { "epoch": 0.3447334617854849, "grad_norm": 0.24814775586128235, "learning_rate": 0.0001, "loss": 1.5343, "step": 2147 }, { "epoch": 0.3448940269749518, "grad_norm": 0.25247395038604736, "learning_rate": 0.0001, "loss": 1.5251, "step": 2148 }, { "epoch": 0.34505459216441875, "grad_norm": 0.26199835538864136, "learning_rate": 0.0001, "loss": 1.5467, "step": 2149 }, { "epoch": 0.3452151573538857, "grad_norm": 0.2571370601654053, "learning_rate": 0.0001, "loss": 1.564, "step": 2150 }, { "epoch": 0.3453757225433526, "grad_norm": 0.27830779552459717, "learning_rate": 0.0001, "loss": 1.5887, "step": 2151 }, { "epoch": 0.3455362877328195, "grad_norm": 0.2565908432006836, "learning_rate": 0.0001, "loss": 1.574, "step": 2152 }, { "epoch": 0.34569685292228647, "grad_norm": 0.2541201412677765, "learning_rate": 0.0001, "loss": 1.4921, "step": 2153 }, { "epoch": 0.34585741811175336, "grad_norm": 0.25625211000442505, "learning_rate": 0.0001, "loss": 1.5355, "step": 2154 }, { "epoch": 0.3460179833012203, "grad_norm": 0.24762476980686188, "learning_rate": 0.0001, "loss": 1.5887, "step": 2155 }, { "epoch": 0.34617854849068724, "grad_norm": 0.25672975182533264, "learning_rate": 0.0001, "loss": 1.5681, "step": 2156 }, { "epoch": 0.34633911368015413, "grad_norm": 0.24759668111801147, "learning_rate": 0.0001, "loss": 1.5086, "step": 2157 }, { "epoch": 0.3464996788696211, "grad_norm": 0.2625550627708435, "learning_rate": 0.0001, "loss": 1.5708, "step": 2158 }, { "epoch": 0.34666024405908796, "grad_norm": 0.25980886816978455, "learning_rate": 0.0001, "loss": 1.5546, "step": 2159 }, { "epoch": 0.3468208092485549, "grad_norm": 0.2516760230064392, "learning_rate": 0.0001, "loss": 1.5672, "step": 2160 }, { "epoch": 0.34698137443802185, "grad_norm": 0.2552073299884796, "learning_rate": 0.0001, "loss": 1.5915, "step": 2161 }, { "epoch": 0.34714193962748874, "grad_norm": 0.26119092106819153, "learning_rate": 0.0001, "loss": 1.5021, "step": 2162 }, { "epoch": 0.3473025048169557, "grad_norm": 0.2548002302646637, "learning_rate": 0.0001, "loss": 1.5178, "step": 2163 }, { "epoch": 0.3474630700064226, "grad_norm": 0.24852673709392548, "learning_rate": 0.0001, "loss": 1.5639, "step": 2164 }, { "epoch": 0.3476236351958895, "grad_norm": 0.24551789462566376, "learning_rate": 0.0001, "loss": 1.6229, "step": 2165 }, { "epoch": 0.34778420038535646, "grad_norm": 0.2583712935447693, "learning_rate": 0.0001, "loss": 1.5728, "step": 2166 }, { "epoch": 0.3479447655748234, "grad_norm": 0.26149067282676697, "learning_rate": 0.0001, "loss": 1.4892, "step": 2167 }, { "epoch": 0.3481053307642903, "grad_norm": 0.2641371786594391, "learning_rate": 0.0001, "loss": 1.5411, "step": 2168 }, { "epoch": 0.34826589595375723, "grad_norm": 0.25248417258262634, "learning_rate": 0.0001, "loss": 1.5704, "step": 2169 }, { "epoch": 0.3484264611432242, "grad_norm": 0.250577837228775, "learning_rate": 0.0001, "loss": 1.5639, "step": 2170 }, { "epoch": 0.34858702633269106, "grad_norm": 0.2572253346443176, "learning_rate": 0.0001, "loss": 1.5327, "step": 2171 }, { "epoch": 0.348747591522158, "grad_norm": 0.26197028160095215, "learning_rate": 0.0001, "loss": 1.594, "step": 2172 }, { "epoch": 0.3489081567116249, "grad_norm": 0.2460302859544754, "learning_rate": 0.0001, "loss": 1.5568, "step": 2173 }, { "epoch": 0.34906872190109184, "grad_norm": 0.24514052271842957, "learning_rate": 0.0001, "loss": 1.4846, "step": 2174 }, { "epoch": 0.3492292870905588, "grad_norm": 0.2499423325061798, "learning_rate": 0.0001, "loss": 1.4849, "step": 2175 }, { "epoch": 0.34938985228002567, "grad_norm": 0.2529575526714325, "learning_rate": 0.0001, "loss": 1.5793, "step": 2176 }, { "epoch": 0.3495504174694926, "grad_norm": 0.2539237439632416, "learning_rate": 0.0001, "loss": 1.4962, "step": 2177 }, { "epoch": 0.34971098265895956, "grad_norm": 0.2468840479850769, "learning_rate": 0.0001, "loss": 1.5231, "step": 2178 }, { "epoch": 0.34987154784842645, "grad_norm": 0.2504441738128662, "learning_rate": 0.0001, "loss": 1.589, "step": 2179 }, { "epoch": 0.3500321130378934, "grad_norm": 0.2503768503665924, "learning_rate": 0.0001, "loss": 1.5372, "step": 2180 }, { "epoch": 0.35019267822736033, "grad_norm": 0.2639516294002533, "learning_rate": 0.0001, "loss": 1.4467, "step": 2181 }, { "epoch": 0.3503532434168272, "grad_norm": 0.27165842056274414, "learning_rate": 0.0001, "loss": 1.5509, "step": 2182 }, { "epoch": 0.35051380860629416, "grad_norm": 0.25772613286972046, "learning_rate": 0.0001, "loss": 1.472, "step": 2183 }, { "epoch": 0.35067437379576105, "grad_norm": 0.25208979845046997, "learning_rate": 0.0001, "loss": 1.5356, "step": 2184 }, { "epoch": 0.350834938985228, "grad_norm": 0.2547012269496918, "learning_rate": 0.0001, "loss": 1.5456, "step": 2185 }, { "epoch": 0.35099550417469494, "grad_norm": 0.27544358372688293, "learning_rate": 0.0001, "loss": 1.6191, "step": 2186 }, { "epoch": 0.3511560693641618, "grad_norm": 0.2565270960330963, "learning_rate": 0.0001, "loss": 1.5449, "step": 2187 }, { "epoch": 0.35131663455362877, "grad_norm": 0.2597314119338989, "learning_rate": 0.0001, "loss": 1.6189, "step": 2188 }, { "epoch": 0.3514771997430957, "grad_norm": 0.2584989666938782, "learning_rate": 0.0001, "loss": 1.5808, "step": 2189 }, { "epoch": 0.3516377649325626, "grad_norm": 0.2660825252532959, "learning_rate": 0.0001, "loss": 1.659, "step": 2190 }, { "epoch": 0.35179833012202955, "grad_norm": 0.25640127062797546, "learning_rate": 0.0001, "loss": 1.5751, "step": 2191 }, { "epoch": 0.3519588953114965, "grad_norm": 1.5304124355316162, "learning_rate": 0.0001, "loss": 1.562, "step": 2192 }, { "epoch": 0.3521194605009634, "grad_norm": 0.26782211661338806, "learning_rate": 0.0001, "loss": 1.688, "step": 2193 }, { "epoch": 0.3522800256904303, "grad_norm": 0.238906130194664, "learning_rate": 0.0001, "loss": 1.4125, "step": 2194 }, { "epoch": 0.35244059087989726, "grad_norm": 0.2628589868545532, "learning_rate": 0.0001, "loss": 1.5951, "step": 2195 }, { "epoch": 0.35260115606936415, "grad_norm": 0.24343790113925934, "learning_rate": 0.0001, "loss": 1.4637, "step": 2196 }, { "epoch": 0.3527617212588311, "grad_norm": 0.2564511299133301, "learning_rate": 0.0001, "loss": 1.5038, "step": 2197 }, { "epoch": 0.352922286448298, "grad_norm": 0.24965737760066986, "learning_rate": 0.0001, "loss": 1.5078, "step": 2198 }, { "epoch": 0.3530828516377649, "grad_norm": 0.288682222366333, "learning_rate": 0.0001, "loss": 1.5988, "step": 2199 }, { "epoch": 0.35324341682723187, "grad_norm": 0.23998339474201202, "learning_rate": 0.0001, "loss": 1.5456, "step": 2200 }, { "epoch": 0.35340398201669876, "grad_norm": 0.25241750478744507, "learning_rate": 0.0001, "loss": 1.5322, "step": 2201 }, { "epoch": 0.3535645472061657, "grad_norm": 0.2497698813676834, "learning_rate": 0.0001, "loss": 1.5637, "step": 2202 }, { "epoch": 0.35372511239563265, "grad_norm": 0.25241801142692566, "learning_rate": 0.0001, "loss": 1.5509, "step": 2203 }, { "epoch": 0.35388567758509953, "grad_norm": 0.2644217908382416, "learning_rate": 0.0001, "loss": 1.6523, "step": 2204 }, { "epoch": 0.3540462427745665, "grad_norm": 0.25793546438217163, "learning_rate": 0.0001, "loss": 1.5697, "step": 2205 }, { "epoch": 0.3542068079640334, "grad_norm": 0.24560660123825073, "learning_rate": 0.0001, "loss": 1.4703, "step": 2206 }, { "epoch": 0.3543673731535003, "grad_norm": 0.26065948605537415, "learning_rate": 0.0001, "loss": 1.5204, "step": 2207 }, { "epoch": 0.35452793834296725, "grad_norm": 0.250878244638443, "learning_rate": 0.0001, "loss": 1.5404, "step": 2208 }, { "epoch": 0.35468850353243414, "grad_norm": 0.2677420675754547, "learning_rate": 0.0001, "loss": 1.6749, "step": 2209 }, { "epoch": 0.3548490687219011, "grad_norm": 0.2563774883747101, "learning_rate": 0.0001, "loss": 1.5647, "step": 2210 }, { "epoch": 0.355009633911368, "grad_norm": 0.2509433925151825, "learning_rate": 0.0001, "loss": 1.5338, "step": 2211 }, { "epoch": 0.3551701991008349, "grad_norm": 0.24012061953544617, "learning_rate": 0.0001, "loss": 1.5578, "step": 2212 }, { "epoch": 0.35533076429030186, "grad_norm": 0.23800039291381836, "learning_rate": 0.0001, "loss": 1.4734, "step": 2213 }, { "epoch": 0.3554913294797688, "grad_norm": 0.24657626450061798, "learning_rate": 0.0001, "loss": 1.5262, "step": 2214 }, { "epoch": 0.3556518946692357, "grad_norm": 0.24189968407154083, "learning_rate": 0.0001, "loss": 1.5417, "step": 2215 }, { "epoch": 0.35581245985870263, "grad_norm": 0.25544416904449463, "learning_rate": 0.0001, "loss": 1.6089, "step": 2216 }, { "epoch": 0.3559730250481696, "grad_norm": 0.266414999961853, "learning_rate": 0.0001, "loss": 1.5198, "step": 2217 }, { "epoch": 0.35613359023763647, "grad_norm": 0.2565564811229706, "learning_rate": 0.0001, "loss": 1.5381, "step": 2218 }, { "epoch": 0.3562941554271034, "grad_norm": 0.26174500584602356, "learning_rate": 0.0001, "loss": 1.596, "step": 2219 }, { "epoch": 0.35645472061657035, "grad_norm": 0.24598649144172668, "learning_rate": 0.0001, "loss": 1.502, "step": 2220 }, { "epoch": 0.35661528580603724, "grad_norm": 0.25279855728149414, "learning_rate": 0.0001, "loss": 1.5968, "step": 2221 }, { "epoch": 0.3567758509955042, "grad_norm": 0.25305691361427307, "learning_rate": 0.0001, "loss": 1.5453, "step": 2222 }, { "epoch": 0.3569364161849711, "grad_norm": 0.25044116377830505, "learning_rate": 0.0001, "loss": 1.5798, "step": 2223 }, { "epoch": 0.357096981374438, "grad_norm": 0.25510168075561523, "learning_rate": 0.0001, "loss": 1.5435, "step": 2224 }, { "epoch": 0.35725754656390496, "grad_norm": 0.25853925943374634, "learning_rate": 0.0001, "loss": 1.4952, "step": 2225 }, { "epoch": 0.35741811175337185, "grad_norm": 0.278916597366333, "learning_rate": 0.0001, "loss": 1.5436, "step": 2226 }, { "epoch": 0.3575786769428388, "grad_norm": 0.26227280497550964, "learning_rate": 0.0001, "loss": 1.5987, "step": 2227 }, { "epoch": 0.35773924213230573, "grad_norm": 0.2559322416782379, "learning_rate": 0.0001, "loss": 1.5715, "step": 2228 }, { "epoch": 0.3578998073217726, "grad_norm": 0.26765576004981995, "learning_rate": 0.0001, "loss": 1.5298, "step": 2229 }, { "epoch": 0.35806037251123957, "grad_norm": 0.25036296248435974, "learning_rate": 0.0001, "loss": 1.5902, "step": 2230 }, { "epoch": 0.3582209377007065, "grad_norm": 0.2417023926973343, "learning_rate": 0.0001, "loss": 1.4898, "step": 2231 }, { "epoch": 0.3583815028901734, "grad_norm": 0.2664749026298523, "learning_rate": 0.0001, "loss": 1.521, "step": 2232 }, { "epoch": 0.35854206807964034, "grad_norm": 0.2519276738166809, "learning_rate": 0.0001, "loss": 1.5354, "step": 2233 }, { "epoch": 0.3587026332691073, "grad_norm": 0.27917492389678955, "learning_rate": 0.0001, "loss": 1.6605, "step": 2234 }, { "epoch": 0.3588631984585742, "grad_norm": 0.25916269421577454, "learning_rate": 0.0001, "loss": 1.6011, "step": 2235 }, { "epoch": 0.3590237636480411, "grad_norm": 0.27903980016708374, "learning_rate": 0.0001, "loss": 1.5656, "step": 2236 }, { "epoch": 0.359184328837508, "grad_norm": 0.26287680864334106, "learning_rate": 0.0001, "loss": 1.5848, "step": 2237 }, { "epoch": 0.35934489402697495, "grad_norm": 0.255217969417572, "learning_rate": 0.0001, "loss": 1.6421, "step": 2238 }, { "epoch": 0.3595054592164419, "grad_norm": 0.2615929841995239, "learning_rate": 0.0001, "loss": 1.5013, "step": 2239 }, { "epoch": 0.3596660244059088, "grad_norm": 0.25425776839256287, "learning_rate": 0.0001, "loss": 1.4706, "step": 2240 }, { "epoch": 0.3598265895953757, "grad_norm": 0.26095038652420044, "learning_rate": 0.0001, "loss": 1.5401, "step": 2241 }, { "epoch": 0.35998715478484267, "grad_norm": 0.2528751790523529, "learning_rate": 0.0001, "loss": 1.5341, "step": 2242 }, { "epoch": 0.36014771997430955, "grad_norm": 0.2808654308319092, "learning_rate": 0.0001, "loss": 1.5915, "step": 2243 }, { "epoch": 0.3603082851637765, "grad_norm": 0.27228716015815735, "learning_rate": 0.0001, "loss": 1.5292, "step": 2244 }, { "epoch": 0.36046885035324344, "grad_norm": 0.24799329042434692, "learning_rate": 0.0001, "loss": 1.4773, "step": 2245 }, { "epoch": 0.36062941554271033, "grad_norm": 0.26087716221809387, "learning_rate": 0.0001, "loss": 1.4816, "step": 2246 }, { "epoch": 0.3607899807321773, "grad_norm": 0.29650628566741943, "learning_rate": 0.0001, "loss": 1.5467, "step": 2247 }, { "epoch": 0.36095054592164416, "grad_norm": 0.27107101678848267, "learning_rate": 0.0001, "loss": 1.6258, "step": 2248 }, { "epoch": 0.3611111111111111, "grad_norm": 0.2658062279224396, "learning_rate": 0.0001, "loss": 1.5173, "step": 2249 }, { "epoch": 0.36127167630057805, "grad_norm": 0.2656562030315399, "learning_rate": 0.0001, "loss": 1.5613, "step": 2250 }, { "epoch": 0.36143224149004494, "grad_norm": 0.254713773727417, "learning_rate": 0.0001, "loss": 1.59, "step": 2251 }, { "epoch": 0.3615928066795119, "grad_norm": 0.2695602476596832, "learning_rate": 0.0001, "loss": 1.5835, "step": 2252 }, { "epoch": 0.3617533718689788, "grad_norm": 0.2540896534919739, "learning_rate": 0.0001, "loss": 1.6294, "step": 2253 }, { "epoch": 0.3619139370584457, "grad_norm": 0.24553243815898895, "learning_rate": 0.0001, "loss": 1.4728, "step": 2254 }, { "epoch": 0.36207450224791266, "grad_norm": 0.25581425428390503, "learning_rate": 0.0001, "loss": 1.5225, "step": 2255 }, { "epoch": 0.3622350674373796, "grad_norm": 0.25077083706855774, "learning_rate": 0.0001, "loss": 1.4769, "step": 2256 }, { "epoch": 0.3623956326268465, "grad_norm": 0.2618795931339264, "learning_rate": 0.0001, "loss": 1.4968, "step": 2257 }, { "epoch": 0.36255619781631343, "grad_norm": 0.24708011746406555, "learning_rate": 0.0001, "loss": 1.4932, "step": 2258 }, { "epoch": 0.3627167630057804, "grad_norm": 0.26081109046936035, "learning_rate": 0.0001, "loss": 1.5161, "step": 2259 }, { "epoch": 0.36287732819524726, "grad_norm": 0.26388704776763916, "learning_rate": 0.0001, "loss": 1.5432, "step": 2260 }, { "epoch": 0.3630378933847142, "grad_norm": 0.2524031698703766, "learning_rate": 0.0001, "loss": 1.5497, "step": 2261 }, { "epoch": 0.3631984585741811, "grad_norm": 0.25277817249298096, "learning_rate": 0.0001, "loss": 1.4921, "step": 2262 }, { "epoch": 0.36335902376364804, "grad_norm": 0.24862059950828552, "learning_rate": 0.0001, "loss": 1.5437, "step": 2263 }, { "epoch": 0.363519588953115, "grad_norm": 0.26767322421073914, "learning_rate": 0.0001, "loss": 1.5406, "step": 2264 }, { "epoch": 0.36368015414258187, "grad_norm": 0.2560683488845825, "learning_rate": 0.0001, "loss": 1.5773, "step": 2265 }, { "epoch": 0.3638407193320488, "grad_norm": 0.27597951889038086, "learning_rate": 0.0001, "loss": 1.609, "step": 2266 }, { "epoch": 0.36400128452151576, "grad_norm": 0.2604365348815918, "learning_rate": 0.0001, "loss": 1.5812, "step": 2267 }, { "epoch": 0.36416184971098264, "grad_norm": 0.268728643655777, "learning_rate": 0.0001, "loss": 1.5985, "step": 2268 }, { "epoch": 0.3643224149004496, "grad_norm": 0.2544846832752228, "learning_rate": 0.0001, "loss": 1.5059, "step": 2269 }, { "epoch": 0.36448298008991653, "grad_norm": 0.2506101727485657, "learning_rate": 0.0001, "loss": 1.5233, "step": 2270 }, { "epoch": 0.3646435452793834, "grad_norm": 0.2539709508419037, "learning_rate": 0.0001, "loss": 1.5356, "step": 2271 }, { "epoch": 0.36480411046885036, "grad_norm": 0.2580517530441284, "learning_rate": 0.0001, "loss": 1.4624, "step": 2272 }, { "epoch": 0.36496467565831725, "grad_norm": 0.24341896176338196, "learning_rate": 0.0001, "loss": 1.4838, "step": 2273 }, { "epoch": 0.3651252408477842, "grad_norm": 0.3055890202522278, "learning_rate": 0.0001, "loss": 1.5315, "step": 2274 }, { "epoch": 0.36528580603725114, "grad_norm": 0.2585761547088623, "learning_rate": 0.0001, "loss": 1.5925, "step": 2275 }, { "epoch": 0.365446371226718, "grad_norm": 0.255572646856308, "learning_rate": 0.0001, "loss": 1.487, "step": 2276 }, { "epoch": 0.36560693641618497, "grad_norm": 0.2550049424171448, "learning_rate": 0.0001, "loss": 1.549, "step": 2277 }, { "epoch": 0.3657675016056519, "grad_norm": 0.2758522927761078, "learning_rate": 0.0001, "loss": 1.5406, "step": 2278 }, { "epoch": 0.3659280667951188, "grad_norm": 0.27849021553993225, "learning_rate": 0.0001, "loss": 1.659, "step": 2279 }, { "epoch": 0.36608863198458574, "grad_norm": 0.2738516330718994, "learning_rate": 0.0001, "loss": 1.5025, "step": 2280 }, { "epoch": 0.3662491971740527, "grad_norm": 0.27679434418678284, "learning_rate": 0.0001, "loss": 1.5515, "step": 2281 }, { "epoch": 0.3664097623635196, "grad_norm": 0.24797074496746063, "learning_rate": 0.0001, "loss": 1.5051, "step": 2282 }, { "epoch": 0.3665703275529865, "grad_norm": 0.2453446090221405, "learning_rate": 0.0001, "loss": 1.6077, "step": 2283 }, { "epoch": 0.36673089274245346, "grad_norm": 0.2575076222419739, "learning_rate": 0.0001, "loss": 1.569, "step": 2284 }, { "epoch": 0.36689145793192035, "grad_norm": 0.2688547372817993, "learning_rate": 0.0001, "loss": 1.5406, "step": 2285 }, { "epoch": 0.3670520231213873, "grad_norm": 0.2662636637687683, "learning_rate": 0.0001, "loss": 1.6246, "step": 2286 }, { "epoch": 0.3672125883108542, "grad_norm": 0.2721993923187256, "learning_rate": 0.0001, "loss": 1.5124, "step": 2287 }, { "epoch": 0.3673731535003211, "grad_norm": 0.26898816227912903, "learning_rate": 0.0001, "loss": 1.5695, "step": 2288 }, { "epoch": 0.36753371868978807, "grad_norm": 0.2678947150707245, "learning_rate": 0.0001, "loss": 1.5957, "step": 2289 }, { "epoch": 0.36769428387925496, "grad_norm": 0.2912648618221283, "learning_rate": 0.0001, "loss": 1.5751, "step": 2290 }, { "epoch": 0.3678548490687219, "grad_norm": 0.2662580907344818, "learning_rate": 0.0001, "loss": 1.6528, "step": 2291 }, { "epoch": 0.36801541425818884, "grad_norm": 0.24180811643600464, "learning_rate": 0.0001, "loss": 1.493, "step": 2292 }, { "epoch": 0.36817597944765573, "grad_norm": 0.26568982005119324, "learning_rate": 0.0001, "loss": 1.4992, "step": 2293 }, { "epoch": 0.3683365446371227, "grad_norm": 0.2533663511276245, "learning_rate": 0.0001, "loss": 1.5462, "step": 2294 }, { "epoch": 0.3684971098265896, "grad_norm": 0.2525925040245056, "learning_rate": 0.0001, "loss": 1.5829, "step": 2295 }, { "epoch": 0.3686576750160565, "grad_norm": 0.2568332552909851, "learning_rate": 0.0001, "loss": 1.5286, "step": 2296 }, { "epoch": 0.36881824020552345, "grad_norm": 0.2500928044319153, "learning_rate": 0.0001, "loss": 1.4974, "step": 2297 }, { "epoch": 0.36897880539499034, "grad_norm": 0.24780963361263275, "learning_rate": 0.0001, "loss": 1.5155, "step": 2298 }, { "epoch": 0.3691393705844573, "grad_norm": 0.2531459927558899, "learning_rate": 0.0001, "loss": 1.5436, "step": 2299 }, { "epoch": 0.3692999357739242, "grad_norm": 0.2483982890844345, "learning_rate": 0.0001, "loss": 1.4811, "step": 2300 }, { "epoch": 0.3694605009633911, "grad_norm": 0.24589164555072784, "learning_rate": 0.0001, "loss": 1.5705, "step": 2301 }, { "epoch": 0.36962106615285806, "grad_norm": 0.24893982708454132, "learning_rate": 0.0001, "loss": 1.5017, "step": 2302 }, { "epoch": 0.369781631342325, "grad_norm": 0.24600011110305786, "learning_rate": 0.0001, "loss": 1.4987, "step": 2303 }, { "epoch": 0.3699421965317919, "grad_norm": 0.24123330414295197, "learning_rate": 0.0001, "loss": 1.5392, "step": 2304 }, { "epoch": 0.37010276172125883, "grad_norm": 0.26374539732933044, "learning_rate": 0.0001, "loss": 1.5464, "step": 2305 }, { "epoch": 0.3702633269107258, "grad_norm": 0.238440603017807, "learning_rate": 0.0001, "loss": 1.4842, "step": 2306 }, { "epoch": 0.37042389210019266, "grad_norm": 0.25389161705970764, "learning_rate": 0.0001, "loss": 1.5679, "step": 2307 }, { "epoch": 0.3705844572896596, "grad_norm": 0.24614334106445312, "learning_rate": 0.0001, "loss": 1.4467, "step": 2308 }, { "epoch": 0.37074502247912655, "grad_norm": 0.2794525623321533, "learning_rate": 0.0001, "loss": 1.4848, "step": 2309 }, { "epoch": 0.37090558766859344, "grad_norm": 0.2474374771118164, "learning_rate": 0.0001, "loss": 1.5284, "step": 2310 }, { "epoch": 0.3710661528580604, "grad_norm": 0.25746452808380127, "learning_rate": 0.0001, "loss": 1.6253, "step": 2311 }, { "epoch": 0.37122671804752727, "grad_norm": 0.26093387603759766, "learning_rate": 0.0001, "loss": 1.5352, "step": 2312 }, { "epoch": 0.3713872832369942, "grad_norm": 1.198448657989502, "learning_rate": 0.0001, "loss": 1.4913, "step": 2313 }, { "epoch": 0.37154784842646116, "grad_norm": 0.24500393867492676, "learning_rate": 0.0001, "loss": 1.4841, "step": 2314 }, { "epoch": 0.37170841361592805, "grad_norm": 0.2583453953266144, "learning_rate": 0.0001, "loss": 1.5428, "step": 2315 }, { "epoch": 0.371868978805395, "grad_norm": 0.2561831474304199, "learning_rate": 0.0001, "loss": 1.5289, "step": 2316 }, { "epoch": 0.37202954399486193, "grad_norm": 0.29225724935531616, "learning_rate": 0.0001, "loss": 1.5268, "step": 2317 }, { "epoch": 0.3721901091843288, "grad_norm": 0.26211458444595337, "learning_rate": 0.0001, "loss": 1.5352, "step": 2318 }, { "epoch": 0.37235067437379576, "grad_norm": 0.2648836672306061, "learning_rate": 0.0001, "loss": 1.5129, "step": 2319 }, { "epoch": 0.3725112395632627, "grad_norm": 0.2531552016735077, "learning_rate": 0.0001, "loss": 1.518, "step": 2320 }, { "epoch": 0.3726718047527296, "grad_norm": 0.2425978034734726, "learning_rate": 0.0001, "loss": 1.4766, "step": 2321 }, { "epoch": 0.37283236994219654, "grad_norm": 0.25794047117233276, "learning_rate": 0.0001, "loss": 1.5844, "step": 2322 }, { "epoch": 0.37299293513166343, "grad_norm": 0.2569916546344757, "learning_rate": 0.0001, "loss": 1.5871, "step": 2323 }, { "epoch": 0.37315350032113037, "grad_norm": 0.2629062235355377, "learning_rate": 0.0001, "loss": 1.4939, "step": 2324 }, { "epoch": 0.3733140655105973, "grad_norm": 0.24914056062698364, "learning_rate": 0.0001, "loss": 1.5539, "step": 2325 }, { "epoch": 0.3734746307000642, "grad_norm": 0.255267471075058, "learning_rate": 0.0001, "loss": 1.4701, "step": 2326 }, { "epoch": 0.37363519588953115, "grad_norm": 0.24833789467811584, "learning_rate": 0.0001, "loss": 1.5282, "step": 2327 }, { "epoch": 0.3737957610789981, "grad_norm": 0.24636401236057281, "learning_rate": 0.0001, "loss": 1.5551, "step": 2328 }, { "epoch": 0.373956326268465, "grad_norm": 0.24946527183055878, "learning_rate": 0.0001, "loss": 1.5193, "step": 2329 }, { "epoch": 0.3741168914579319, "grad_norm": 0.253368616104126, "learning_rate": 0.0001, "loss": 1.4774, "step": 2330 }, { "epoch": 0.37427745664739887, "grad_norm": 0.2617979943752289, "learning_rate": 0.0001, "loss": 1.5071, "step": 2331 }, { "epoch": 0.37443802183686575, "grad_norm": 0.264935165643692, "learning_rate": 0.0001, "loss": 1.5944, "step": 2332 }, { "epoch": 0.3745985870263327, "grad_norm": 0.2448675036430359, "learning_rate": 0.0001, "loss": 1.5315, "step": 2333 }, { "epoch": 0.37475915221579964, "grad_norm": 0.2725590169429779, "learning_rate": 0.0001, "loss": 1.5681, "step": 2334 }, { "epoch": 0.37491971740526653, "grad_norm": 0.25366848707199097, "learning_rate": 0.0001, "loss": 1.3628, "step": 2335 }, { "epoch": 0.37508028259473347, "grad_norm": 0.2598473131656647, "learning_rate": 0.0001, "loss": 1.482, "step": 2336 }, { "epoch": 0.37524084778420036, "grad_norm": 0.2570243775844574, "learning_rate": 0.0001, "loss": 1.5799, "step": 2337 }, { "epoch": 0.3754014129736673, "grad_norm": 0.2450980842113495, "learning_rate": 0.0001, "loss": 1.5059, "step": 2338 }, { "epoch": 0.37556197816313425, "grad_norm": 0.24217240512371063, "learning_rate": 0.0001, "loss": 1.5302, "step": 2339 }, { "epoch": 0.37572254335260113, "grad_norm": 0.2534436583518982, "learning_rate": 0.0001, "loss": 1.5605, "step": 2340 }, { "epoch": 0.3758831085420681, "grad_norm": 0.24209095537662506, "learning_rate": 0.0001, "loss": 1.4772, "step": 2341 }, { "epoch": 0.376043673731535, "grad_norm": 0.25760748982429504, "learning_rate": 0.0001, "loss": 1.521, "step": 2342 }, { "epoch": 0.3762042389210019, "grad_norm": 0.2506316900253296, "learning_rate": 0.0001, "loss": 1.5185, "step": 2343 }, { "epoch": 0.37636480411046885, "grad_norm": 0.2690396308898926, "learning_rate": 0.0001, "loss": 1.5056, "step": 2344 }, { "epoch": 0.3765253692999358, "grad_norm": 0.2650214731693268, "learning_rate": 0.0001, "loss": 1.5884, "step": 2345 }, { "epoch": 0.3766859344894027, "grad_norm": 0.2703990638256073, "learning_rate": 0.0001, "loss": 1.6113, "step": 2346 }, { "epoch": 0.37684649967886963, "grad_norm": 0.24998964369297028, "learning_rate": 0.0001, "loss": 1.5366, "step": 2347 }, { "epoch": 0.37700706486833657, "grad_norm": 0.2509383261203766, "learning_rate": 0.0001, "loss": 1.5678, "step": 2348 }, { "epoch": 0.37716763005780346, "grad_norm": 0.2529858648777008, "learning_rate": 0.0001, "loss": 1.4946, "step": 2349 }, { "epoch": 0.3773281952472704, "grad_norm": 0.26368749141693115, "learning_rate": 0.0001, "loss": 1.5901, "step": 2350 }, { "epoch": 0.3774887604367373, "grad_norm": 0.25796085596084595, "learning_rate": 0.0001, "loss": 1.5278, "step": 2351 }, { "epoch": 0.37764932562620424, "grad_norm": 0.24968422949314117, "learning_rate": 0.0001, "loss": 1.4562, "step": 2352 }, { "epoch": 0.3778098908156712, "grad_norm": 0.24780969321727753, "learning_rate": 0.0001, "loss": 1.5084, "step": 2353 }, { "epoch": 0.37797045600513807, "grad_norm": 0.2629697620868683, "learning_rate": 0.0001, "loss": 1.583, "step": 2354 }, { "epoch": 0.378131021194605, "grad_norm": 0.2600143551826477, "learning_rate": 0.0001, "loss": 1.5817, "step": 2355 }, { "epoch": 0.37829158638407195, "grad_norm": 0.3038232624530792, "learning_rate": 0.0001, "loss": 1.4952, "step": 2356 }, { "epoch": 0.37845215157353884, "grad_norm": 0.25345587730407715, "learning_rate": 0.0001, "loss": 1.6067, "step": 2357 }, { "epoch": 0.3786127167630058, "grad_norm": 0.26497361063957214, "learning_rate": 0.0001, "loss": 1.5539, "step": 2358 }, { "epoch": 0.37877328195247273, "grad_norm": 0.26825860142707825, "learning_rate": 0.0001, "loss": 1.5312, "step": 2359 }, { "epoch": 0.3789338471419396, "grad_norm": 0.2717742323875427, "learning_rate": 0.0001, "loss": 1.509, "step": 2360 }, { "epoch": 0.37909441233140656, "grad_norm": 0.24727971851825714, "learning_rate": 0.0001, "loss": 1.5388, "step": 2361 }, { "epoch": 0.37925497752087345, "grad_norm": 0.29400545358657837, "learning_rate": 0.0001, "loss": 1.5218, "step": 2362 }, { "epoch": 0.3794155427103404, "grad_norm": 0.2577107548713684, "learning_rate": 0.0001, "loss": 1.6147, "step": 2363 }, { "epoch": 0.37957610789980734, "grad_norm": 0.27200621366500854, "learning_rate": 0.0001, "loss": 1.6134, "step": 2364 }, { "epoch": 0.3797366730892742, "grad_norm": 0.2652324438095093, "learning_rate": 0.0001, "loss": 1.5501, "step": 2365 }, { "epoch": 0.37989723827874117, "grad_norm": 0.2527463436126709, "learning_rate": 0.0001, "loss": 1.5035, "step": 2366 }, { "epoch": 0.3800578034682081, "grad_norm": 0.24923215806484222, "learning_rate": 0.0001, "loss": 1.4147, "step": 2367 }, { "epoch": 0.380218368657675, "grad_norm": 0.2674259543418884, "learning_rate": 0.0001, "loss": 1.5684, "step": 2368 }, { "epoch": 0.38037893384714194, "grad_norm": 0.25354260206222534, "learning_rate": 0.0001, "loss": 1.6004, "step": 2369 }, { "epoch": 0.3805394990366089, "grad_norm": 0.24319404363632202, "learning_rate": 0.0001, "loss": 1.4466, "step": 2370 }, { "epoch": 0.3807000642260758, "grad_norm": 0.24842718243598938, "learning_rate": 0.0001, "loss": 1.4414, "step": 2371 }, { "epoch": 0.3808606294155427, "grad_norm": 0.25003471970558167, "learning_rate": 0.0001, "loss": 1.5588, "step": 2372 }, { "epoch": 0.38102119460500966, "grad_norm": 0.26346343755722046, "learning_rate": 0.0001, "loss": 1.5043, "step": 2373 }, { "epoch": 0.38118175979447655, "grad_norm": 0.2605985105037689, "learning_rate": 0.0001, "loss": 1.4751, "step": 2374 }, { "epoch": 0.3813423249839435, "grad_norm": 0.2635630965232849, "learning_rate": 0.0001, "loss": 1.5325, "step": 2375 }, { "epoch": 0.3815028901734104, "grad_norm": 0.2483413815498352, "learning_rate": 0.0001, "loss": 1.5835, "step": 2376 }, { "epoch": 0.3816634553628773, "grad_norm": 0.2556459307670593, "learning_rate": 0.0001, "loss": 1.5943, "step": 2377 }, { "epoch": 0.38182402055234427, "grad_norm": 0.28714779019355774, "learning_rate": 0.0001, "loss": 1.5772, "step": 2378 }, { "epoch": 0.38198458574181116, "grad_norm": 0.24681034684181213, "learning_rate": 0.0001, "loss": 1.5301, "step": 2379 }, { "epoch": 0.3821451509312781, "grad_norm": 0.24625767767429352, "learning_rate": 0.0001, "loss": 1.5651, "step": 2380 }, { "epoch": 0.38230571612074504, "grad_norm": 0.2521355450153351, "learning_rate": 0.0001, "loss": 1.5356, "step": 2381 }, { "epoch": 0.38246628131021193, "grad_norm": 0.2519856095314026, "learning_rate": 0.0001, "loss": 1.5701, "step": 2382 }, { "epoch": 0.3826268464996789, "grad_norm": 0.2626766562461853, "learning_rate": 0.0001, "loss": 1.5292, "step": 2383 }, { "epoch": 0.3827874116891458, "grad_norm": 0.25843772292137146, "learning_rate": 0.0001, "loss": 1.5953, "step": 2384 }, { "epoch": 0.3829479768786127, "grad_norm": 0.25508567690849304, "learning_rate": 0.0001, "loss": 1.5657, "step": 2385 }, { "epoch": 0.38310854206807965, "grad_norm": 0.2541292607784271, "learning_rate": 0.0001, "loss": 1.5553, "step": 2386 }, { "epoch": 0.38326910725754654, "grad_norm": 0.26325348019599915, "learning_rate": 0.0001, "loss": 1.5455, "step": 2387 }, { "epoch": 0.3834296724470135, "grad_norm": 0.259388267993927, "learning_rate": 0.0001, "loss": 1.5504, "step": 2388 }, { "epoch": 0.3835902376364804, "grad_norm": 0.2743922472000122, "learning_rate": 0.0001, "loss": 1.5492, "step": 2389 }, { "epoch": 0.3837508028259473, "grad_norm": 0.261371374130249, "learning_rate": 0.0001, "loss": 1.5414, "step": 2390 }, { "epoch": 0.38391136801541426, "grad_norm": 0.24848809838294983, "learning_rate": 0.0001, "loss": 1.4902, "step": 2391 }, { "epoch": 0.3840719332048812, "grad_norm": 0.2744009494781494, "learning_rate": 0.0001, "loss": 1.4709, "step": 2392 }, { "epoch": 0.3842324983943481, "grad_norm": 0.2709813714027405, "learning_rate": 0.0001, "loss": 1.5535, "step": 2393 }, { "epoch": 0.38439306358381503, "grad_norm": 0.24891509115695953, "learning_rate": 0.0001, "loss": 1.5542, "step": 2394 }, { "epoch": 0.384553628773282, "grad_norm": 0.2544114589691162, "learning_rate": 0.0001, "loss": 1.5439, "step": 2395 }, { "epoch": 0.38471419396274886, "grad_norm": 0.2500731348991394, "learning_rate": 0.0001, "loss": 1.5878, "step": 2396 }, { "epoch": 0.3848747591522158, "grad_norm": 0.2746654748916626, "learning_rate": 0.0001, "loss": 1.5404, "step": 2397 }, { "epoch": 0.38503532434168275, "grad_norm": 0.25532686710357666, "learning_rate": 0.0001, "loss": 1.6097, "step": 2398 }, { "epoch": 0.38519588953114964, "grad_norm": 0.2480587363243103, "learning_rate": 0.0001, "loss": 1.599, "step": 2399 }, { "epoch": 0.3853564547206166, "grad_norm": 0.2571102976799011, "learning_rate": 0.0001, "loss": 1.5527, "step": 2400 }, { "epoch": 0.38551701991008347, "grad_norm": 0.24474580585956573, "learning_rate": 0.0001, "loss": 1.6144, "step": 2401 }, { "epoch": 0.3856775850995504, "grad_norm": 0.2899627089500427, "learning_rate": 0.0001, "loss": 1.5268, "step": 2402 }, { "epoch": 0.38583815028901736, "grad_norm": 0.2516287863254547, "learning_rate": 0.0001, "loss": 1.527, "step": 2403 }, { "epoch": 0.38599871547848424, "grad_norm": 0.2463321089744568, "learning_rate": 0.0001, "loss": 1.5011, "step": 2404 }, { "epoch": 0.3861592806679512, "grad_norm": 0.26257675886154175, "learning_rate": 0.0001, "loss": 1.5527, "step": 2405 }, { "epoch": 0.38631984585741813, "grad_norm": 0.2730129063129425, "learning_rate": 0.0001, "loss": 1.5943, "step": 2406 }, { "epoch": 0.386480411046885, "grad_norm": 0.2407064586877823, "learning_rate": 0.0001, "loss": 1.4716, "step": 2407 }, { "epoch": 0.38664097623635196, "grad_norm": 0.25162702798843384, "learning_rate": 0.0001, "loss": 1.5346, "step": 2408 }, { "epoch": 0.3868015414258189, "grad_norm": 0.26848462224006653, "learning_rate": 0.0001, "loss": 1.5723, "step": 2409 }, { "epoch": 0.3869621066152858, "grad_norm": 0.34027060866355896, "learning_rate": 0.0001, "loss": 1.4739, "step": 2410 }, { "epoch": 0.38712267180475274, "grad_norm": 0.26073455810546875, "learning_rate": 0.0001, "loss": 1.5585, "step": 2411 }, { "epoch": 0.3872832369942196, "grad_norm": 0.25451356172561646, "learning_rate": 0.0001, "loss": 1.5711, "step": 2412 }, { "epoch": 0.38744380218368657, "grad_norm": 0.2457560896873474, "learning_rate": 0.0001, "loss": 1.5013, "step": 2413 }, { "epoch": 0.3876043673731535, "grad_norm": 0.2544652819633484, "learning_rate": 0.0001, "loss": 1.6145, "step": 2414 }, { "epoch": 0.3877649325626204, "grad_norm": 0.2672406733036041, "learning_rate": 0.0001, "loss": 1.5614, "step": 2415 }, { "epoch": 0.38792549775208734, "grad_norm": 0.2651098072528839, "learning_rate": 0.0001, "loss": 1.5902, "step": 2416 }, { "epoch": 0.3880860629415543, "grad_norm": 0.3771659731864929, "learning_rate": 0.0001, "loss": 1.5871, "step": 2417 }, { "epoch": 0.3882466281310212, "grad_norm": 0.24673216044902802, "learning_rate": 0.0001, "loss": 1.4673, "step": 2418 }, { "epoch": 0.3884071933204881, "grad_norm": 0.25624924898147583, "learning_rate": 0.0001, "loss": 1.6001, "step": 2419 }, { "epoch": 0.38856775850995506, "grad_norm": 0.2547285556793213, "learning_rate": 0.0001, "loss": 1.4866, "step": 2420 }, { "epoch": 0.38872832369942195, "grad_norm": 0.5639746785163879, "learning_rate": 0.0001, "loss": 1.4773, "step": 2421 }, { "epoch": 0.3888888888888889, "grad_norm": 0.2534834146499634, "learning_rate": 0.0001, "loss": 1.5176, "step": 2422 }, { "epoch": 0.38904945407835584, "grad_norm": 0.26092657446861267, "learning_rate": 0.0001, "loss": 1.6083, "step": 2423 }, { "epoch": 0.3892100192678227, "grad_norm": 0.2513125240802765, "learning_rate": 0.0001, "loss": 1.5747, "step": 2424 }, { "epoch": 0.38937058445728967, "grad_norm": 0.2617529034614563, "learning_rate": 0.0001, "loss": 1.5298, "step": 2425 }, { "epoch": 0.38953114964675656, "grad_norm": 0.26412928104400635, "learning_rate": 0.0001, "loss": 1.5612, "step": 2426 }, { "epoch": 0.3896917148362235, "grad_norm": 0.2545403242111206, "learning_rate": 0.0001, "loss": 1.5585, "step": 2427 }, { "epoch": 0.38985228002569045, "grad_norm": 0.2545302212238312, "learning_rate": 0.0001, "loss": 1.5066, "step": 2428 }, { "epoch": 0.39001284521515733, "grad_norm": 0.25672441720962524, "learning_rate": 0.0001, "loss": 1.5193, "step": 2429 }, { "epoch": 0.3901734104046243, "grad_norm": 0.26069778203964233, "learning_rate": 0.0001, "loss": 1.5505, "step": 2430 }, { "epoch": 0.3903339755940912, "grad_norm": 0.2753305733203888, "learning_rate": 0.0001, "loss": 1.5965, "step": 2431 }, { "epoch": 0.3904945407835581, "grad_norm": 0.2471272051334381, "learning_rate": 0.0001, "loss": 1.449, "step": 2432 }, { "epoch": 0.39065510597302505, "grad_norm": 0.27465078234672546, "learning_rate": 0.0001, "loss": 1.5436, "step": 2433 }, { "epoch": 0.390815671162492, "grad_norm": 0.2718292772769928, "learning_rate": 0.0001, "loss": 1.528, "step": 2434 }, { "epoch": 0.3909762363519589, "grad_norm": 0.2727717161178589, "learning_rate": 0.0001, "loss": 1.5999, "step": 2435 }, { "epoch": 0.3911368015414258, "grad_norm": 0.2534559965133667, "learning_rate": 0.0001, "loss": 1.6012, "step": 2436 }, { "epoch": 0.3912973667308927, "grad_norm": 0.24747711420059204, "learning_rate": 0.0001, "loss": 1.5206, "step": 2437 }, { "epoch": 0.39145793192035966, "grad_norm": 0.26092955470085144, "learning_rate": 0.0001, "loss": 1.5382, "step": 2438 }, { "epoch": 0.3916184971098266, "grad_norm": 0.25769567489624023, "learning_rate": 0.0001, "loss": 1.5279, "step": 2439 }, { "epoch": 0.3917790622992935, "grad_norm": 0.25086402893066406, "learning_rate": 0.0001, "loss": 1.5302, "step": 2440 }, { "epoch": 0.39193962748876043, "grad_norm": 0.26384875178337097, "learning_rate": 0.0001, "loss": 1.458, "step": 2441 }, { "epoch": 0.3921001926782274, "grad_norm": 0.27071601152420044, "learning_rate": 0.0001, "loss": 1.5745, "step": 2442 }, { "epoch": 0.39226075786769427, "grad_norm": 0.24402453005313873, "learning_rate": 0.0001, "loss": 1.5489, "step": 2443 }, { "epoch": 0.3924213230571612, "grad_norm": 0.25266405940055847, "learning_rate": 0.0001, "loss": 1.5293, "step": 2444 }, { "epoch": 0.39258188824662815, "grad_norm": 0.2891020178794861, "learning_rate": 0.0001, "loss": 1.5921, "step": 2445 }, { "epoch": 0.39274245343609504, "grad_norm": 0.2697277069091797, "learning_rate": 0.0001, "loss": 1.5985, "step": 2446 }, { "epoch": 0.392903018625562, "grad_norm": 0.2564629316329956, "learning_rate": 0.0001, "loss": 1.5235, "step": 2447 }, { "epoch": 0.3930635838150289, "grad_norm": 0.27220186591148376, "learning_rate": 0.0001, "loss": 1.5511, "step": 2448 }, { "epoch": 0.3932241490044958, "grad_norm": 0.2559594213962555, "learning_rate": 0.0001, "loss": 1.5893, "step": 2449 }, { "epoch": 0.39338471419396276, "grad_norm": 0.2827349305152893, "learning_rate": 0.0001, "loss": 1.4945, "step": 2450 }, { "epoch": 0.39354527938342965, "grad_norm": 0.2591390311717987, "learning_rate": 0.0001, "loss": 1.5236, "step": 2451 }, { "epoch": 0.3937058445728966, "grad_norm": 0.2526845932006836, "learning_rate": 0.0001, "loss": 1.5154, "step": 2452 }, { "epoch": 0.39386640976236353, "grad_norm": 0.2741335928440094, "learning_rate": 0.0001, "loss": 1.5649, "step": 2453 }, { "epoch": 0.3940269749518304, "grad_norm": 0.26153987646102905, "learning_rate": 0.0001, "loss": 1.5225, "step": 2454 }, { "epoch": 0.39418754014129737, "grad_norm": 0.2582678496837616, "learning_rate": 0.0001, "loss": 1.5703, "step": 2455 }, { "epoch": 0.3943481053307643, "grad_norm": 0.25584661960601807, "learning_rate": 0.0001, "loss": 1.4649, "step": 2456 }, { "epoch": 0.3945086705202312, "grad_norm": 0.2764172852039337, "learning_rate": 0.0001, "loss": 1.58, "step": 2457 }, { "epoch": 0.39466923570969814, "grad_norm": 0.2539321184158325, "learning_rate": 0.0001, "loss": 1.5412, "step": 2458 }, { "epoch": 0.3948298008991651, "grad_norm": 0.2499200999736786, "learning_rate": 0.0001, "loss": 1.5115, "step": 2459 }, { "epoch": 0.394990366088632, "grad_norm": 0.27872729301452637, "learning_rate": 0.0001, "loss": 1.542, "step": 2460 }, { "epoch": 0.3951509312780989, "grad_norm": 0.2624555826187134, "learning_rate": 0.0001, "loss": 1.6081, "step": 2461 }, { "epoch": 0.39531149646756586, "grad_norm": 0.24843905866146088, "learning_rate": 0.0001, "loss": 1.5184, "step": 2462 }, { "epoch": 0.39547206165703275, "grad_norm": 2.4374990463256836, "learning_rate": 0.0001, "loss": 1.5083, "step": 2463 }, { "epoch": 0.3956326268464997, "grad_norm": 0.27892616391181946, "learning_rate": 0.0001, "loss": 1.4696, "step": 2464 }, { "epoch": 0.3957931920359666, "grad_norm": 0.2818267345428467, "learning_rate": 0.0001, "loss": 1.4486, "step": 2465 }, { "epoch": 0.3959537572254335, "grad_norm": 0.3038250207901001, "learning_rate": 0.0001, "loss": 1.6259, "step": 2466 }, { "epoch": 0.39611432241490047, "grad_norm": 0.31338241696357727, "learning_rate": 0.0001, "loss": 1.5498, "step": 2467 }, { "epoch": 0.39627488760436735, "grad_norm": 0.3117619752883911, "learning_rate": 0.0001, "loss": 1.6265, "step": 2468 }, { "epoch": 0.3964354527938343, "grad_norm": 0.3030302822589874, "learning_rate": 0.0001, "loss": 1.4984, "step": 2469 }, { "epoch": 0.39659601798330124, "grad_norm": 0.30077430605888367, "learning_rate": 0.0001, "loss": 1.5468, "step": 2470 }, { "epoch": 0.39675658317276813, "grad_norm": 0.2581309676170349, "learning_rate": 0.0001, "loss": 1.4842, "step": 2471 }, { "epoch": 0.3969171483622351, "grad_norm": 0.26717257499694824, "learning_rate": 0.0001, "loss": 1.4888, "step": 2472 }, { "epoch": 0.397077713551702, "grad_norm": 0.274960458278656, "learning_rate": 0.0001, "loss": 1.6078, "step": 2473 }, { "epoch": 0.3972382787411689, "grad_norm": 0.2550688683986664, "learning_rate": 0.0001, "loss": 1.5199, "step": 2474 }, { "epoch": 0.39739884393063585, "grad_norm": 0.26114949584007263, "learning_rate": 0.0001, "loss": 1.5089, "step": 2475 }, { "epoch": 0.39755940912010274, "grad_norm": 0.2530492842197418, "learning_rate": 0.0001, "loss": 1.4758, "step": 2476 }, { "epoch": 0.3977199743095697, "grad_norm": 0.2503330409526825, "learning_rate": 0.0001, "loss": 1.4575, "step": 2477 }, { "epoch": 0.3978805394990366, "grad_norm": 0.2625015079975128, "learning_rate": 0.0001, "loss": 1.5725, "step": 2478 }, { "epoch": 0.3980411046885035, "grad_norm": 0.25151559710502625, "learning_rate": 0.0001, "loss": 1.524, "step": 2479 }, { "epoch": 0.39820166987797045, "grad_norm": 0.2679506838321686, "learning_rate": 0.0001, "loss": 1.5126, "step": 2480 }, { "epoch": 0.3983622350674374, "grad_norm": 0.2590963840484619, "learning_rate": 0.0001, "loss": 1.5943, "step": 2481 }, { "epoch": 0.3985228002569043, "grad_norm": 0.24560026824474335, "learning_rate": 0.0001, "loss": 1.5301, "step": 2482 }, { "epoch": 0.39868336544637123, "grad_norm": 0.2637335956096649, "learning_rate": 0.0001, "loss": 1.5418, "step": 2483 }, { "epoch": 0.3988439306358382, "grad_norm": 0.2577708661556244, "learning_rate": 0.0001, "loss": 1.543, "step": 2484 }, { "epoch": 0.39900449582530506, "grad_norm": 0.2670186460018158, "learning_rate": 0.0001, "loss": 1.5635, "step": 2485 }, { "epoch": 0.399165061014772, "grad_norm": 0.2714400887489319, "learning_rate": 0.0001, "loss": 1.5687, "step": 2486 }, { "epoch": 0.39932562620423895, "grad_norm": 0.26190483570098877, "learning_rate": 0.0001, "loss": 1.5756, "step": 2487 }, { "epoch": 0.39948619139370584, "grad_norm": 0.24080729484558105, "learning_rate": 0.0001, "loss": 1.4703, "step": 2488 }, { "epoch": 0.3996467565831728, "grad_norm": 0.25955361127853394, "learning_rate": 0.0001, "loss": 1.6049, "step": 2489 }, { "epoch": 0.39980732177263967, "grad_norm": 0.2651372253894806, "learning_rate": 0.0001, "loss": 1.6211, "step": 2490 }, { "epoch": 0.3999678869621066, "grad_norm": 0.24466051161289215, "learning_rate": 0.0001, "loss": 1.5628, "step": 2491 }, { "epoch": 0.40012845215157355, "grad_norm": 0.25570055842399597, "learning_rate": 0.0001, "loss": 1.5573, "step": 2492 }, { "epoch": 0.40028901734104044, "grad_norm": 0.2497667521238327, "learning_rate": 0.0001, "loss": 1.5025, "step": 2493 }, { "epoch": 0.4004495825305074, "grad_norm": 0.26511573791503906, "learning_rate": 0.0001, "loss": 1.5379, "step": 2494 }, { "epoch": 0.40061014771997433, "grad_norm": 0.2591298222541809, "learning_rate": 0.0001, "loss": 1.4809, "step": 2495 }, { "epoch": 0.4007707129094412, "grad_norm": 0.2511709928512573, "learning_rate": 0.0001, "loss": 1.4905, "step": 2496 }, { "epoch": 0.40093127809890816, "grad_norm": 0.2514796555042267, "learning_rate": 0.0001, "loss": 1.5073, "step": 2497 }, { "epoch": 0.4010918432883751, "grad_norm": 0.2433425784111023, "learning_rate": 0.0001, "loss": 1.5256, "step": 2498 }, { "epoch": 0.401252408477842, "grad_norm": 0.25819021463394165, "learning_rate": 0.0001, "loss": 1.5117, "step": 2499 }, { "epoch": 0.40141297366730894, "grad_norm": 0.2464933842420578, "learning_rate": 0.0001, "loss": 1.5366, "step": 2500 }, { "epoch": 0.4015735388567758, "grad_norm": 0.30560988187789917, "learning_rate": 0.0001, "loss": 1.5413, "step": 2501 }, { "epoch": 0.40173410404624277, "grad_norm": 0.2743026316165924, "learning_rate": 0.0001, "loss": 1.5325, "step": 2502 }, { "epoch": 0.4018946692357097, "grad_norm": 0.24112781882286072, "learning_rate": 0.0001, "loss": 1.5483, "step": 2503 }, { "epoch": 0.4020552344251766, "grad_norm": 0.26679348945617676, "learning_rate": 0.0001, "loss": 1.5957, "step": 2504 }, { "epoch": 0.40221579961464354, "grad_norm": 0.2718173563480377, "learning_rate": 0.0001, "loss": 1.5665, "step": 2505 }, { "epoch": 0.4023763648041105, "grad_norm": 0.2669573426246643, "learning_rate": 0.0001, "loss": 1.5665, "step": 2506 }, { "epoch": 0.4025369299935774, "grad_norm": 0.24345548450946808, "learning_rate": 0.0001, "loss": 1.5037, "step": 2507 }, { "epoch": 0.4026974951830443, "grad_norm": 0.2644270360469818, "learning_rate": 0.0001, "loss": 1.5585, "step": 2508 }, { "epoch": 0.40285806037251126, "grad_norm": 0.2604409456253052, "learning_rate": 0.0001, "loss": 1.4758, "step": 2509 }, { "epoch": 0.40301862556197815, "grad_norm": 0.24744658172130585, "learning_rate": 0.0001, "loss": 1.5103, "step": 2510 }, { "epoch": 0.4031791907514451, "grad_norm": 0.25284886360168457, "learning_rate": 0.0001, "loss": 1.5241, "step": 2511 }, { "epoch": 0.40333975594091204, "grad_norm": 0.2449856996536255, "learning_rate": 0.0001, "loss": 1.5397, "step": 2512 }, { "epoch": 0.4035003211303789, "grad_norm": 0.24185772240161896, "learning_rate": 0.0001, "loss": 1.4647, "step": 2513 }, { "epoch": 0.40366088631984587, "grad_norm": 0.24948136508464813, "learning_rate": 0.0001, "loss": 1.4474, "step": 2514 }, { "epoch": 0.40382145150931276, "grad_norm": 0.23663552105426788, "learning_rate": 0.0001, "loss": 1.5678, "step": 2515 }, { "epoch": 0.4039820166987797, "grad_norm": 0.2513238489627838, "learning_rate": 0.0001, "loss": 1.5092, "step": 2516 }, { "epoch": 0.40414258188824664, "grad_norm": 0.2486760914325714, "learning_rate": 0.0001, "loss": 1.4202, "step": 2517 }, { "epoch": 0.40430314707771353, "grad_norm": 0.27109813690185547, "learning_rate": 0.0001, "loss": 1.5239, "step": 2518 }, { "epoch": 0.4044637122671805, "grad_norm": 0.23884856700897217, "learning_rate": 0.0001, "loss": 1.4775, "step": 2519 }, { "epoch": 0.4046242774566474, "grad_norm": 0.25711917877197266, "learning_rate": 0.0001, "loss": 1.5313, "step": 2520 }, { "epoch": 0.4047848426461143, "grad_norm": 0.2518276572227478, "learning_rate": 0.0001, "loss": 1.5513, "step": 2521 }, { "epoch": 0.40494540783558125, "grad_norm": 0.2606884241104126, "learning_rate": 0.0001, "loss": 1.5232, "step": 2522 }, { "epoch": 0.4051059730250482, "grad_norm": 0.25613394379615784, "learning_rate": 0.0001, "loss": 1.5433, "step": 2523 }, { "epoch": 0.4052665382145151, "grad_norm": 0.24913008511066437, "learning_rate": 0.0001, "loss": 1.462, "step": 2524 }, { "epoch": 0.405427103403982, "grad_norm": 0.25905099511146545, "learning_rate": 0.0001, "loss": 1.6206, "step": 2525 }, { "epoch": 0.4055876685934489, "grad_norm": 0.24587513506412506, "learning_rate": 0.0001, "loss": 1.4632, "step": 2526 }, { "epoch": 0.40574823378291586, "grad_norm": 0.23414070904254913, "learning_rate": 0.0001, "loss": 1.4336, "step": 2527 }, { "epoch": 0.4059087989723828, "grad_norm": 0.24958279728889465, "learning_rate": 0.0001, "loss": 1.5619, "step": 2528 }, { "epoch": 0.4060693641618497, "grad_norm": 0.26070019602775574, "learning_rate": 0.0001, "loss": 1.5057, "step": 2529 }, { "epoch": 0.40622992935131663, "grad_norm": 0.24881942570209503, "learning_rate": 0.0001, "loss": 1.591, "step": 2530 }, { "epoch": 0.4063904945407836, "grad_norm": 0.26656574010849, "learning_rate": 0.0001, "loss": 1.5512, "step": 2531 }, { "epoch": 0.40655105973025046, "grad_norm": 0.2782878875732422, "learning_rate": 0.0001, "loss": 1.5695, "step": 2532 }, { "epoch": 0.4067116249197174, "grad_norm": 0.25589877367019653, "learning_rate": 0.0001, "loss": 1.5471, "step": 2533 }, { "epoch": 0.40687219010918435, "grad_norm": 0.26112449169158936, "learning_rate": 0.0001, "loss": 1.5348, "step": 2534 }, { "epoch": 0.40703275529865124, "grad_norm": 0.2865256667137146, "learning_rate": 0.0001, "loss": 1.5444, "step": 2535 }, { "epoch": 0.4071933204881182, "grad_norm": 0.2590349316596985, "learning_rate": 0.0001, "loss": 1.4691, "step": 2536 }, { "epoch": 0.4073538856775851, "grad_norm": 0.25301647186279297, "learning_rate": 0.0001, "loss": 1.5163, "step": 2537 }, { "epoch": 0.407514450867052, "grad_norm": 0.24199478328227997, "learning_rate": 0.0001, "loss": 1.4866, "step": 2538 }, { "epoch": 0.40767501605651896, "grad_norm": 0.26721832156181335, "learning_rate": 0.0001, "loss": 1.5946, "step": 2539 }, { "epoch": 0.40783558124598585, "grad_norm": 0.2533133327960968, "learning_rate": 0.0001, "loss": 1.5306, "step": 2540 }, { "epoch": 0.4079961464354528, "grad_norm": 0.2587657570838928, "learning_rate": 0.0001, "loss": 1.5496, "step": 2541 }, { "epoch": 0.40815671162491973, "grad_norm": 0.2488706260919571, "learning_rate": 0.0001, "loss": 1.5212, "step": 2542 }, { "epoch": 0.4083172768143866, "grad_norm": 0.26686549186706543, "learning_rate": 0.0001, "loss": 1.5308, "step": 2543 }, { "epoch": 0.40847784200385356, "grad_norm": 0.27136510610580444, "learning_rate": 0.0001, "loss": 1.5257, "step": 2544 }, { "epoch": 0.4086384071933205, "grad_norm": 0.2445228099822998, "learning_rate": 0.0001, "loss": 1.5685, "step": 2545 }, { "epoch": 0.4087989723827874, "grad_norm": 0.24528813362121582, "learning_rate": 0.0001, "loss": 1.5103, "step": 2546 }, { "epoch": 0.40895953757225434, "grad_norm": 0.2633833885192871, "learning_rate": 0.0001, "loss": 1.5417, "step": 2547 }, { "epoch": 0.4091201027617213, "grad_norm": 0.25173237919807434, "learning_rate": 0.0001, "loss": 1.5045, "step": 2548 }, { "epoch": 0.40928066795118817, "grad_norm": 0.2596408426761627, "learning_rate": 0.0001, "loss": 1.5999, "step": 2549 }, { "epoch": 0.4094412331406551, "grad_norm": 0.27274206280708313, "learning_rate": 0.0001, "loss": 1.5617, "step": 2550 }, { "epoch": 0.409601798330122, "grad_norm": 0.2476007491350174, "learning_rate": 0.0001, "loss": 1.5232, "step": 2551 }, { "epoch": 0.40976236351958895, "grad_norm": 0.25796622037887573, "learning_rate": 0.0001, "loss": 1.5114, "step": 2552 }, { "epoch": 0.4099229287090559, "grad_norm": 0.24175529181957245, "learning_rate": 0.0001, "loss": 1.4924, "step": 2553 }, { "epoch": 0.4100834938985228, "grad_norm": 0.2576192617416382, "learning_rate": 0.0001, "loss": 1.5433, "step": 2554 }, { "epoch": 0.4102440590879897, "grad_norm": 0.2633851170539856, "learning_rate": 0.0001, "loss": 1.6007, "step": 2555 }, { "epoch": 0.41040462427745666, "grad_norm": 0.2631693482398987, "learning_rate": 0.0001, "loss": 1.527, "step": 2556 }, { "epoch": 0.41056518946692355, "grad_norm": 0.26312610507011414, "learning_rate": 0.0001, "loss": 1.6563, "step": 2557 }, { "epoch": 0.4107257546563905, "grad_norm": 0.25023066997528076, "learning_rate": 0.0001, "loss": 1.5059, "step": 2558 }, { "epoch": 0.41088631984585744, "grad_norm": 0.2595348060131073, "learning_rate": 0.0001, "loss": 1.6379, "step": 2559 }, { "epoch": 0.4110468850353243, "grad_norm": 0.2501184642314911, "learning_rate": 0.0001, "loss": 1.5306, "step": 2560 }, { "epoch": 0.41120745022479127, "grad_norm": 0.3003421425819397, "learning_rate": 0.0001, "loss": 1.5155, "step": 2561 }, { "epoch": 0.4113680154142582, "grad_norm": 0.2603636384010315, "learning_rate": 0.0001, "loss": 1.5076, "step": 2562 }, { "epoch": 0.4115285806037251, "grad_norm": 0.2558700442314148, "learning_rate": 0.0001, "loss": 1.5355, "step": 2563 }, { "epoch": 0.41168914579319205, "grad_norm": 0.24567385017871857, "learning_rate": 0.0001, "loss": 1.523, "step": 2564 }, { "epoch": 0.41184971098265893, "grad_norm": 0.26885974407196045, "learning_rate": 0.0001, "loss": 1.5623, "step": 2565 }, { "epoch": 0.4120102761721259, "grad_norm": 0.2442621886730194, "learning_rate": 0.0001, "loss": 1.5341, "step": 2566 }, { "epoch": 0.4121708413615928, "grad_norm": 0.2417394071817398, "learning_rate": 0.0001, "loss": 1.4942, "step": 2567 }, { "epoch": 0.4123314065510597, "grad_norm": 0.2529677450656891, "learning_rate": 0.0001, "loss": 1.5544, "step": 2568 }, { "epoch": 0.41249197174052665, "grad_norm": 0.26576799154281616, "learning_rate": 0.0001, "loss": 1.5618, "step": 2569 }, { "epoch": 0.4126525369299936, "grad_norm": 0.25639644265174866, "learning_rate": 0.0001, "loss": 1.6553, "step": 2570 }, { "epoch": 0.4128131021194605, "grad_norm": 0.24996112287044525, "learning_rate": 0.0001, "loss": 1.5138, "step": 2571 }, { "epoch": 0.41297366730892743, "grad_norm": 0.25390946865081787, "learning_rate": 0.0001, "loss": 1.605, "step": 2572 }, { "epoch": 0.41313423249839437, "grad_norm": 0.23816630244255066, "learning_rate": 0.0001, "loss": 1.5376, "step": 2573 }, { "epoch": 0.41329479768786126, "grad_norm": 0.2712462842464447, "learning_rate": 0.0001, "loss": 1.5503, "step": 2574 }, { "epoch": 0.4134553628773282, "grad_norm": 0.24352729320526123, "learning_rate": 0.0001, "loss": 1.5102, "step": 2575 }, { "epoch": 0.41361592806679515, "grad_norm": 0.2582276463508606, "learning_rate": 0.0001, "loss": 1.5741, "step": 2576 }, { "epoch": 0.41377649325626203, "grad_norm": 0.2590008080005646, "learning_rate": 0.0001, "loss": 1.4903, "step": 2577 }, { "epoch": 0.413937058445729, "grad_norm": 0.2693990468978882, "learning_rate": 0.0001, "loss": 1.5606, "step": 2578 }, { "epoch": 0.41409762363519587, "grad_norm": 0.24303196370601654, "learning_rate": 0.0001, "loss": 1.5016, "step": 2579 }, { "epoch": 0.4142581888246628, "grad_norm": 0.25497758388519287, "learning_rate": 0.0001, "loss": 1.5817, "step": 2580 }, { "epoch": 0.41441875401412975, "grad_norm": 0.2549613118171692, "learning_rate": 0.0001, "loss": 1.6054, "step": 2581 }, { "epoch": 0.41457931920359664, "grad_norm": 0.24005141854286194, "learning_rate": 0.0001, "loss": 1.5124, "step": 2582 }, { "epoch": 0.4147398843930636, "grad_norm": 0.2643565833568573, "learning_rate": 0.0001, "loss": 1.6373, "step": 2583 }, { "epoch": 0.41490044958253053, "grad_norm": 0.25584590435028076, "learning_rate": 0.0001, "loss": 1.6176, "step": 2584 }, { "epoch": 0.4150610147719974, "grad_norm": 0.25806379318237305, "learning_rate": 0.0001, "loss": 1.5022, "step": 2585 }, { "epoch": 0.41522157996146436, "grad_norm": 0.2439253330230713, "learning_rate": 0.0001, "loss": 1.572, "step": 2586 }, { "epoch": 0.4153821451509313, "grad_norm": 0.24600747227668762, "learning_rate": 0.0001, "loss": 1.5545, "step": 2587 }, { "epoch": 0.4155427103403982, "grad_norm": 0.23955826461315155, "learning_rate": 0.0001, "loss": 1.5198, "step": 2588 }, { "epoch": 0.41570327552986513, "grad_norm": 0.25872573256492615, "learning_rate": 0.0001, "loss": 1.607, "step": 2589 }, { "epoch": 0.415863840719332, "grad_norm": 0.25654417276382446, "learning_rate": 0.0001, "loss": 1.5204, "step": 2590 }, { "epoch": 0.41602440590879897, "grad_norm": 0.25154048204421997, "learning_rate": 0.0001, "loss": 1.4452, "step": 2591 }, { "epoch": 0.4161849710982659, "grad_norm": 0.25746750831604004, "learning_rate": 0.0001, "loss": 1.5712, "step": 2592 }, { "epoch": 0.4163455362877328, "grad_norm": 0.26046082377433777, "learning_rate": 0.0001, "loss": 1.5369, "step": 2593 }, { "epoch": 0.41650610147719974, "grad_norm": 0.2493816763162613, "learning_rate": 0.0001, "loss": 1.4769, "step": 2594 }, { "epoch": 0.4166666666666667, "grad_norm": 0.28193199634552, "learning_rate": 0.0001, "loss": 1.5421, "step": 2595 }, { "epoch": 0.4168272318561336, "grad_norm": 0.24330681562423706, "learning_rate": 0.0001, "loss": 1.489, "step": 2596 }, { "epoch": 0.4169877970456005, "grad_norm": 0.2504135072231293, "learning_rate": 0.0001, "loss": 1.5631, "step": 2597 }, { "epoch": 0.41714836223506746, "grad_norm": 0.2549284100532532, "learning_rate": 0.0001, "loss": 1.5078, "step": 2598 }, { "epoch": 0.41730892742453435, "grad_norm": 0.2654156982898712, "learning_rate": 0.0001, "loss": 1.4762, "step": 2599 }, { "epoch": 0.4174694926140013, "grad_norm": 0.23926936089992523, "learning_rate": 0.0001, "loss": 1.5948, "step": 2600 }, { "epoch": 0.41763005780346824, "grad_norm": 0.25596749782562256, "learning_rate": 0.0001, "loss": 1.5494, "step": 2601 }, { "epoch": 0.4177906229929351, "grad_norm": 0.2619558870792389, "learning_rate": 0.0001, "loss": 1.4978, "step": 2602 }, { "epoch": 0.41795118818240207, "grad_norm": 0.26040348410606384, "learning_rate": 0.0001, "loss": 1.5119, "step": 2603 }, { "epoch": 0.41811175337186895, "grad_norm": 0.25314196944236755, "learning_rate": 0.0001, "loss": 1.4858, "step": 2604 }, { "epoch": 0.4182723185613359, "grad_norm": 0.24116294085979462, "learning_rate": 0.0001, "loss": 1.4512, "step": 2605 }, { "epoch": 0.41843288375080284, "grad_norm": 0.26536011695861816, "learning_rate": 0.0001, "loss": 1.4884, "step": 2606 }, { "epoch": 0.41859344894026973, "grad_norm": 0.26319968700408936, "learning_rate": 0.0001, "loss": 1.5866, "step": 2607 }, { "epoch": 0.4187540141297367, "grad_norm": 0.2550050616264343, "learning_rate": 0.0001, "loss": 1.5778, "step": 2608 }, { "epoch": 0.4189145793192036, "grad_norm": 0.2522720992565155, "learning_rate": 0.0001, "loss": 1.485, "step": 2609 }, { "epoch": 0.4190751445086705, "grad_norm": 0.2624843418598175, "learning_rate": 0.0001, "loss": 1.5883, "step": 2610 }, { "epoch": 0.41923570969813745, "grad_norm": 0.26206061244010925, "learning_rate": 0.0001, "loss": 1.5232, "step": 2611 }, { "epoch": 0.4193962748876044, "grad_norm": 0.2567644417285919, "learning_rate": 0.0001, "loss": 1.4272, "step": 2612 }, { "epoch": 0.4195568400770713, "grad_norm": 0.29025423526763916, "learning_rate": 0.0001, "loss": 1.5642, "step": 2613 }, { "epoch": 0.4197174052665382, "grad_norm": 0.2984831929206848, "learning_rate": 0.0001, "loss": 1.5887, "step": 2614 }, { "epoch": 0.4198779704560051, "grad_norm": 0.24703791737556458, "learning_rate": 0.0001, "loss": 1.5293, "step": 2615 }, { "epoch": 0.42003853564547206, "grad_norm": 0.26482588052749634, "learning_rate": 0.0001, "loss": 1.5698, "step": 2616 }, { "epoch": 0.420199100834939, "grad_norm": 0.26263001561164856, "learning_rate": 0.0001, "loss": 1.6209, "step": 2617 }, { "epoch": 0.4203596660244059, "grad_norm": 0.24634204804897308, "learning_rate": 0.0001, "loss": 1.5267, "step": 2618 }, { "epoch": 0.42052023121387283, "grad_norm": 0.2785063087940216, "learning_rate": 0.0001, "loss": 1.5986, "step": 2619 }, { "epoch": 0.4206807964033398, "grad_norm": 0.26866859197616577, "learning_rate": 0.0001, "loss": 1.4961, "step": 2620 }, { "epoch": 0.42084136159280666, "grad_norm": 0.272344708442688, "learning_rate": 0.0001, "loss": 1.5206, "step": 2621 }, { "epoch": 0.4210019267822736, "grad_norm": 0.27001920342445374, "learning_rate": 0.0001, "loss": 1.6008, "step": 2622 }, { "epoch": 0.42116249197174055, "grad_norm": 0.2562403380870819, "learning_rate": 0.0001, "loss": 1.5091, "step": 2623 }, { "epoch": 0.42132305716120744, "grad_norm": 0.2639353573322296, "learning_rate": 0.0001, "loss": 1.5608, "step": 2624 }, { "epoch": 0.4214836223506744, "grad_norm": 0.28386738896369934, "learning_rate": 0.0001, "loss": 1.6232, "step": 2625 }, { "epoch": 0.4216441875401413, "grad_norm": 0.2597806453704834, "learning_rate": 0.0001, "loss": 1.5924, "step": 2626 }, { "epoch": 0.4218047527296082, "grad_norm": 0.26284340023994446, "learning_rate": 0.0001, "loss": 1.5389, "step": 2627 }, { "epoch": 0.42196531791907516, "grad_norm": 0.24359674751758575, "learning_rate": 0.0001, "loss": 1.4012, "step": 2628 }, { "epoch": 0.42212588310854204, "grad_norm": 0.25141337513923645, "learning_rate": 0.0001, "loss": 1.5518, "step": 2629 }, { "epoch": 0.422286448298009, "grad_norm": 0.2479681670665741, "learning_rate": 0.0001, "loss": 1.5493, "step": 2630 }, { "epoch": 0.42244701348747593, "grad_norm": 0.24880971014499664, "learning_rate": 0.0001, "loss": 1.5023, "step": 2631 }, { "epoch": 0.4226075786769428, "grad_norm": 0.25287386775016785, "learning_rate": 0.0001, "loss": 1.5166, "step": 2632 }, { "epoch": 0.42276814386640976, "grad_norm": 0.2639290690422058, "learning_rate": 0.0001, "loss": 1.5861, "step": 2633 }, { "epoch": 0.4229287090558767, "grad_norm": 0.2502775490283966, "learning_rate": 0.0001, "loss": 1.5268, "step": 2634 }, { "epoch": 0.4230892742453436, "grad_norm": 0.24484850466251373, "learning_rate": 0.0001, "loss": 1.4464, "step": 2635 }, { "epoch": 0.42324983943481054, "grad_norm": 0.2664753198623657, "learning_rate": 0.0001, "loss": 1.594, "step": 2636 }, { "epoch": 0.4234104046242775, "grad_norm": 0.2515271306037903, "learning_rate": 0.0001, "loss": 1.5276, "step": 2637 }, { "epoch": 0.42357096981374437, "grad_norm": 0.25124526023864746, "learning_rate": 0.0001, "loss": 1.52, "step": 2638 }, { "epoch": 0.4237315350032113, "grad_norm": 0.2585409879684448, "learning_rate": 0.0001, "loss": 1.5634, "step": 2639 }, { "epoch": 0.4238921001926782, "grad_norm": 0.25445839762687683, "learning_rate": 0.0001, "loss": 1.5547, "step": 2640 }, { "epoch": 0.42405266538214514, "grad_norm": 0.26580333709716797, "learning_rate": 0.0001, "loss": 1.4956, "step": 2641 }, { "epoch": 0.4242132305716121, "grad_norm": 0.25759628415107727, "learning_rate": 0.0001, "loss": 1.5288, "step": 2642 }, { "epoch": 0.424373795761079, "grad_norm": 0.25335782766342163, "learning_rate": 0.0001, "loss": 1.4992, "step": 2643 }, { "epoch": 0.4245343609505459, "grad_norm": 0.26285186409950256, "learning_rate": 0.0001, "loss": 1.5565, "step": 2644 }, { "epoch": 0.42469492614001286, "grad_norm": 0.2520076632499695, "learning_rate": 0.0001, "loss": 1.5347, "step": 2645 }, { "epoch": 0.42485549132947975, "grad_norm": 0.2650809586048126, "learning_rate": 0.0001, "loss": 1.5181, "step": 2646 }, { "epoch": 0.4250160565189467, "grad_norm": 0.2627808451652527, "learning_rate": 0.0001, "loss": 1.4876, "step": 2647 }, { "epoch": 0.42517662170841364, "grad_norm": 0.2433909773826599, "learning_rate": 0.0001, "loss": 1.5079, "step": 2648 }, { "epoch": 0.4253371868978805, "grad_norm": 0.25884318351745605, "learning_rate": 0.0001, "loss": 1.583, "step": 2649 }, { "epoch": 0.42549775208734747, "grad_norm": 0.2662484347820282, "learning_rate": 0.0001, "loss": 1.5267, "step": 2650 }, { "epoch": 0.4256583172768144, "grad_norm": 0.2550729215145111, "learning_rate": 0.0001, "loss": 1.6167, "step": 2651 }, { "epoch": 0.4258188824662813, "grad_norm": 0.25947099924087524, "learning_rate": 0.0001, "loss": 1.5475, "step": 2652 }, { "epoch": 0.42597944765574824, "grad_norm": 0.35818055272102356, "learning_rate": 0.0001, "loss": 1.577, "step": 2653 }, { "epoch": 0.42614001284521513, "grad_norm": 0.27459511160850525, "learning_rate": 0.0001, "loss": 1.5577, "step": 2654 }, { "epoch": 0.4263005780346821, "grad_norm": 0.25434303283691406, "learning_rate": 0.0001, "loss": 1.5585, "step": 2655 }, { "epoch": 0.426461143224149, "grad_norm": 0.2576950490474701, "learning_rate": 0.0001, "loss": 1.5593, "step": 2656 }, { "epoch": 0.4266217084136159, "grad_norm": 0.2581554055213928, "learning_rate": 0.0001, "loss": 1.5572, "step": 2657 }, { "epoch": 0.42678227360308285, "grad_norm": 0.25048211216926575, "learning_rate": 0.0001, "loss": 1.4973, "step": 2658 }, { "epoch": 0.4269428387925498, "grad_norm": 0.246945321559906, "learning_rate": 0.0001, "loss": 1.5544, "step": 2659 }, { "epoch": 0.4271034039820167, "grad_norm": 0.2460125982761383, "learning_rate": 0.0001, "loss": 1.5221, "step": 2660 }, { "epoch": 0.4272639691714836, "grad_norm": 0.25332316756248474, "learning_rate": 0.0001, "loss": 1.4938, "step": 2661 }, { "epoch": 0.42742453436095057, "grad_norm": 0.2497895061969757, "learning_rate": 0.0001, "loss": 1.494, "step": 2662 }, { "epoch": 0.42758509955041746, "grad_norm": 0.2499072551727295, "learning_rate": 0.0001, "loss": 1.5508, "step": 2663 }, { "epoch": 0.4277456647398844, "grad_norm": 0.2507265508174896, "learning_rate": 0.0001, "loss": 1.4885, "step": 2664 }, { "epoch": 0.4279062299293513, "grad_norm": 0.25569257140159607, "learning_rate": 0.0001, "loss": 1.5684, "step": 2665 }, { "epoch": 0.42806679511881823, "grad_norm": 2.385692596435547, "learning_rate": 0.0001, "loss": 1.4908, "step": 2666 }, { "epoch": 0.4282273603082852, "grad_norm": 0.2744636535644531, "learning_rate": 0.0001, "loss": 1.6281, "step": 2667 }, { "epoch": 0.42838792549775206, "grad_norm": 0.2834700644016266, "learning_rate": 0.0001, "loss": 1.5577, "step": 2668 }, { "epoch": 0.428548490687219, "grad_norm": 0.2911142110824585, "learning_rate": 0.0001, "loss": 1.5719, "step": 2669 }, { "epoch": 0.42870905587668595, "grad_norm": 0.25528252124786377, "learning_rate": 0.0001, "loss": 1.4831, "step": 2670 }, { "epoch": 0.42886962106615284, "grad_norm": 0.2466645985841751, "learning_rate": 0.0001, "loss": 1.5427, "step": 2671 }, { "epoch": 0.4290301862556198, "grad_norm": 0.27905088663101196, "learning_rate": 0.0001, "loss": 1.5257, "step": 2672 }, { "epoch": 0.4291907514450867, "grad_norm": 0.25568363070487976, "learning_rate": 0.0001, "loss": 1.4708, "step": 2673 }, { "epoch": 0.4293513166345536, "grad_norm": 0.25385648012161255, "learning_rate": 0.0001, "loss": 1.5574, "step": 2674 }, { "epoch": 0.42951188182402056, "grad_norm": 0.2757319509983063, "learning_rate": 0.0001, "loss": 1.5304, "step": 2675 }, { "epoch": 0.4296724470134875, "grad_norm": 0.2604694366455078, "learning_rate": 0.0001, "loss": 1.4783, "step": 2676 }, { "epoch": 0.4298330122029544, "grad_norm": 0.27133870124816895, "learning_rate": 0.0001, "loss": 1.503, "step": 2677 }, { "epoch": 0.42999357739242133, "grad_norm": 0.25154414772987366, "learning_rate": 0.0001, "loss": 1.4647, "step": 2678 }, { "epoch": 0.4301541425818882, "grad_norm": 0.24950821697711945, "learning_rate": 0.0001, "loss": 1.5182, "step": 2679 }, { "epoch": 0.43031470777135516, "grad_norm": 0.2645113468170166, "learning_rate": 0.0001, "loss": 1.5368, "step": 2680 }, { "epoch": 0.4304752729608221, "grad_norm": 0.25628626346588135, "learning_rate": 0.0001, "loss": 1.5494, "step": 2681 }, { "epoch": 0.430635838150289, "grad_norm": 0.25732356309890747, "learning_rate": 0.0001, "loss": 1.5515, "step": 2682 }, { "epoch": 0.43079640333975594, "grad_norm": 0.2506778836250305, "learning_rate": 0.0001, "loss": 1.6141, "step": 2683 }, { "epoch": 0.4309569685292229, "grad_norm": 0.2596699893474579, "learning_rate": 0.0001, "loss": 1.5307, "step": 2684 }, { "epoch": 0.43111753371868977, "grad_norm": 0.24951109290122986, "learning_rate": 0.0001, "loss": 1.5074, "step": 2685 }, { "epoch": 0.4312780989081567, "grad_norm": 0.26219576597213745, "learning_rate": 0.0001, "loss": 1.4332, "step": 2686 }, { "epoch": 0.43143866409762366, "grad_norm": 0.254175066947937, "learning_rate": 0.0001, "loss": 1.6227, "step": 2687 }, { "epoch": 0.43159922928709055, "grad_norm": 0.25088366866111755, "learning_rate": 0.0001, "loss": 1.5545, "step": 2688 }, { "epoch": 0.4317597944765575, "grad_norm": 0.2951388657093048, "learning_rate": 0.0001, "loss": 1.6021, "step": 2689 }, { "epoch": 0.4319203596660244, "grad_norm": 0.28665709495544434, "learning_rate": 0.0001, "loss": 1.5506, "step": 2690 }, { "epoch": 0.4320809248554913, "grad_norm": 0.25719329714775085, "learning_rate": 0.0001, "loss": 1.5669, "step": 2691 }, { "epoch": 0.43224149004495827, "grad_norm": 0.27239304780960083, "learning_rate": 0.0001, "loss": 1.5778, "step": 2692 }, { "epoch": 0.43240205523442515, "grad_norm": 0.24096353352069855, "learning_rate": 0.0001, "loss": 1.4177, "step": 2693 }, { "epoch": 0.4325626204238921, "grad_norm": 0.25921630859375, "learning_rate": 0.0001, "loss": 1.5777, "step": 2694 }, { "epoch": 0.43272318561335904, "grad_norm": 0.2616073191165924, "learning_rate": 0.0001, "loss": 1.5244, "step": 2695 }, { "epoch": 0.43288375080282593, "grad_norm": 0.24035929143428802, "learning_rate": 0.0001, "loss": 1.4818, "step": 2696 }, { "epoch": 0.43304431599229287, "grad_norm": 0.2576081454753876, "learning_rate": 0.0001, "loss": 1.6338, "step": 2697 }, { "epoch": 0.4332048811817598, "grad_norm": 0.26582103967666626, "learning_rate": 0.0001, "loss": 1.5621, "step": 2698 }, { "epoch": 0.4333654463712267, "grad_norm": 0.2568143308162689, "learning_rate": 0.0001, "loss": 1.554, "step": 2699 }, { "epoch": 0.43352601156069365, "grad_norm": 0.2558077573776245, "learning_rate": 0.0001, "loss": 1.5883, "step": 2700 }, { "epoch": 0.4336865767501606, "grad_norm": 0.2493770569562912, "learning_rate": 0.0001, "loss": 1.5059, "step": 2701 }, { "epoch": 0.4338471419396275, "grad_norm": 0.24986302852630615, "learning_rate": 0.0001, "loss": 1.4514, "step": 2702 }, { "epoch": 0.4340077071290944, "grad_norm": 0.2509508728981018, "learning_rate": 0.0001, "loss": 1.6067, "step": 2703 }, { "epoch": 0.4341682723185613, "grad_norm": 0.2587379217147827, "learning_rate": 0.0001, "loss": 1.5094, "step": 2704 }, { "epoch": 0.43432883750802825, "grad_norm": 0.25891369581222534, "learning_rate": 0.0001, "loss": 1.5213, "step": 2705 }, { "epoch": 0.4344894026974952, "grad_norm": 0.2458716183900833, "learning_rate": 0.0001, "loss": 1.5257, "step": 2706 }, { "epoch": 0.4346499678869621, "grad_norm": 0.24901600182056427, "learning_rate": 0.0001, "loss": 1.5352, "step": 2707 }, { "epoch": 0.43481053307642903, "grad_norm": 0.24522405862808228, "learning_rate": 0.0001, "loss": 1.5502, "step": 2708 }, { "epoch": 0.434971098265896, "grad_norm": 0.25697633624076843, "learning_rate": 0.0001, "loss": 1.5078, "step": 2709 }, { "epoch": 0.43513166345536286, "grad_norm": 0.25953882932662964, "learning_rate": 0.0001, "loss": 1.4886, "step": 2710 }, { "epoch": 0.4352922286448298, "grad_norm": 0.26224443316459656, "learning_rate": 0.0001, "loss": 1.5709, "step": 2711 }, { "epoch": 0.43545279383429675, "grad_norm": 0.2708311378955841, "learning_rate": 0.0001, "loss": 1.5595, "step": 2712 }, { "epoch": 0.43561335902376364, "grad_norm": 0.24821874499320984, "learning_rate": 0.0001, "loss": 1.499, "step": 2713 }, { "epoch": 0.4357739242132306, "grad_norm": 0.35547691583633423, "learning_rate": 0.0001, "loss": 1.5083, "step": 2714 }, { "epoch": 0.4359344894026975, "grad_norm": 0.24064689874649048, "learning_rate": 0.0001, "loss": 1.5219, "step": 2715 }, { "epoch": 0.4360950545921644, "grad_norm": 0.26905253529548645, "learning_rate": 0.0001, "loss": 1.6141, "step": 2716 }, { "epoch": 0.43625561978163135, "grad_norm": 0.2711796462535858, "learning_rate": 0.0001, "loss": 1.573, "step": 2717 }, { "epoch": 0.43641618497109824, "grad_norm": 0.2584587037563324, "learning_rate": 0.0001, "loss": 1.5267, "step": 2718 }, { "epoch": 0.4365767501605652, "grad_norm": 0.25472304224967957, "learning_rate": 0.0001, "loss": 1.5016, "step": 2719 }, { "epoch": 0.43673731535003213, "grad_norm": 0.257657527923584, "learning_rate": 0.0001, "loss": 1.5757, "step": 2720 }, { "epoch": 0.436897880539499, "grad_norm": 0.25497204065322876, "learning_rate": 0.0001, "loss": 1.4792, "step": 2721 }, { "epoch": 0.43705844572896596, "grad_norm": 0.2629372775554657, "learning_rate": 0.0001, "loss": 1.5797, "step": 2722 }, { "epoch": 0.4372190109184329, "grad_norm": 0.40733203291893005, "learning_rate": 0.0001, "loss": 1.5442, "step": 2723 }, { "epoch": 0.4373795761078998, "grad_norm": 0.2550099194049835, "learning_rate": 0.0001, "loss": 1.5965, "step": 2724 }, { "epoch": 0.43754014129736674, "grad_norm": 0.2658746540546417, "learning_rate": 0.0001, "loss": 1.4625, "step": 2725 }, { "epoch": 0.4377007064868337, "grad_norm": 0.25851795077323914, "learning_rate": 0.0001, "loss": 1.6022, "step": 2726 }, { "epoch": 0.43786127167630057, "grad_norm": 0.27039971947669983, "learning_rate": 0.0001, "loss": 1.5828, "step": 2727 }, { "epoch": 0.4380218368657675, "grad_norm": 0.25005993247032166, "learning_rate": 0.0001, "loss": 1.4932, "step": 2728 }, { "epoch": 0.4381824020552344, "grad_norm": 0.24069570004940033, "learning_rate": 0.0001, "loss": 1.5304, "step": 2729 }, { "epoch": 0.43834296724470134, "grad_norm": 0.23995202779769897, "learning_rate": 0.0001, "loss": 1.5175, "step": 2730 }, { "epoch": 0.4385035324341683, "grad_norm": 0.26718127727508545, "learning_rate": 0.0001, "loss": 1.534, "step": 2731 }, { "epoch": 0.4386640976236352, "grad_norm": 0.2617626190185547, "learning_rate": 0.0001, "loss": 1.6382, "step": 2732 }, { "epoch": 0.4388246628131021, "grad_norm": 0.2636631429195404, "learning_rate": 0.0001, "loss": 1.5521, "step": 2733 }, { "epoch": 0.43898522800256906, "grad_norm": 0.26319217681884766, "learning_rate": 0.0001, "loss": 1.5235, "step": 2734 }, { "epoch": 0.43914579319203595, "grad_norm": 0.2425624132156372, "learning_rate": 0.0001, "loss": 1.5166, "step": 2735 }, { "epoch": 0.4393063583815029, "grad_norm": 0.2700788080692291, "learning_rate": 0.0001, "loss": 1.6358, "step": 2736 }, { "epoch": 0.43946692357096984, "grad_norm": 0.25637128949165344, "learning_rate": 0.0001, "loss": 1.5037, "step": 2737 }, { "epoch": 0.4396274887604367, "grad_norm": 0.27559953927993774, "learning_rate": 0.0001, "loss": 1.5561, "step": 2738 }, { "epoch": 0.43978805394990367, "grad_norm": 0.25348690152168274, "learning_rate": 0.0001, "loss": 1.4787, "step": 2739 }, { "epoch": 0.4399486191393706, "grad_norm": 0.2682728171348572, "learning_rate": 0.0001, "loss": 1.476, "step": 2740 }, { "epoch": 0.4401091843288375, "grad_norm": 0.2554301619529724, "learning_rate": 0.0001, "loss": 1.514, "step": 2741 }, { "epoch": 0.44026974951830444, "grad_norm": 0.2548758387565613, "learning_rate": 0.0001, "loss": 1.5879, "step": 2742 }, { "epoch": 0.44043031470777133, "grad_norm": 0.2555524408817291, "learning_rate": 0.0001, "loss": 1.6307, "step": 2743 }, { "epoch": 0.4405908798972383, "grad_norm": 0.2641182243824005, "learning_rate": 0.0001, "loss": 1.5381, "step": 2744 }, { "epoch": 0.4407514450867052, "grad_norm": 0.25536489486694336, "learning_rate": 0.0001, "loss": 1.4782, "step": 2745 }, { "epoch": 0.4409120102761721, "grad_norm": 0.26611775159835815, "learning_rate": 0.0001, "loss": 1.5721, "step": 2746 }, { "epoch": 0.44107257546563905, "grad_norm": 0.25221577286720276, "learning_rate": 0.0001, "loss": 1.5091, "step": 2747 }, { "epoch": 0.441233140655106, "grad_norm": 0.25327831506729126, "learning_rate": 0.0001, "loss": 1.5007, "step": 2748 }, { "epoch": 0.4413937058445729, "grad_norm": 0.26875635981559753, "learning_rate": 0.0001, "loss": 1.558, "step": 2749 }, { "epoch": 0.4415542710340398, "grad_norm": 0.2573522925376892, "learning_rate": 0.0001, "loss": 1.4633, "step": 2750 }, { "epoch": 0.44171483622350677, "grad_norm": 0.2467297464609146, "learning_rate": 0.0001, "loss": 1.4585, "step": 2751 }, { "epoch": 0.44187540141297366, "grad_norm": 0.26516193151474, "learning_rate": 0.0001, "loss": 1.5107, "step": 2752 }, { "epoch": 0.4420359666024406, "grad_norm": 0.24327106773853302, "learning_rate": 0.0001, "loss": 1.4813, "step": 2753 }, { "epoch": 0.4421965317919075, "grad_norm": 0.24396276473999023, "learning_rate": 0.0001, "loss": 1.4599, "step": 2754 }, { "epoch": 0.44235709698137443, "grad_norm": 0.2524784505367279, "learning_rate": 0.0001, "loss": 1.4842, "step": 2755 }, { "epoch": 0.4425176621708414, "grad_norm": 0.2601543962955475, "learning_rate": 0.0001, "loss": 1.5943, "step": 2756 }, { "epoch": 0.44267822736030826, "grad_norm": 0.28155869245529175, "learning_rate": 0.0001, "loss": 1.4808, "step": 2757 }, { "epoch": 0.4428387925497752, "grad_norm": 0.2583642899990082, "learning_rate": 0.0001, "loss": 1.486, "step": 2758 }, { "epoch": 0.44299935773924215, "grad_norm": 0.2680513858795166, "learning_rate": 0.0001, "loss": 1.6168, "step": 2759 }, { "epoch": 0.44315992292870904, "grad_norm": 0.24765296280384064, "learning_rate": 0.0001, "loss": 1.5884, "step": 2760 }, { "epoch": 0.443320488118176, "grad_norm": 0.24888968467712402, "learning_rate": 0.0001, "loss": 1.4023, "step": 2761 }, { "epoch": 0.4434810533076429, "grad_norm": 0.2736065685749054, "learning_rate": 0.0001, "loss": 1.5299, "step": 2762 }, { "epoch": 0.4436416184971098, "grad_norm": 0.26058515906333923, "learning_rate": 0.0001, "loss": 1.5805, "step": 2763 }, { "epoch": 0.44380218368657676, "grad_norm": 0.2461216151714325, "learning_rate": 0.0001, "loss": 1.5148, "step": 2764 }, { "epoch": 0.4439627488760437, "grad_norm": 0.27771174907684326, "learning_rate": 0.0001, "loss": 1.5703, "step": 2765 }, { "epoch": 0.4441233140655106, "grad_norm": 0.24980218708515167, "learning_rate": 0.0001, "loss": 1.4626, "step": 2766 }, { "epoch": 0.44428387925497753, "grad_norm": 0.24767200648784637, "learning_rate": 0.0001, "loss": 1.4348, "step": 2767 }, { "epoch": 0.4444444444444444, "grad_norm": 0.26564276218414307, "learning_rate": 0.0001, "loss": 1.5237, "step": 2768 }, { "epoch": 0.44460500963391136, "grad_norm": 0.2671497166156769, "learning_rate": 0.0001, "loss": 1.5847, "step": 2769 }, { "epoch": 0.4447655748233783, "grad_norm": 0.2535061240196228, "learning_rate": 0.0001, "loss": 1.5423, "step": 2770 }, { "epoch": 0.4449261400128452, "grad_norm": 0.2804764211177826, "learning_rate": 0.0001, "loss": 1.5995, "step": 2771 }, { "epoch": 0.44508670520231214, "grad_norm": 0.2646520137786865, "learning_rate": 0.0001, "loss": 1.557, "step": 2772 }, { "epoch": 0.4452472703917791, "grad_norm": 0.2630246877670288, "learning_rate": 0.0001, "loss": 1.5388, "step": 2773 }, { "epoch": 0.44540783558124597, "grad_norm": 0.25226303935050964, "learning_rate": 0.0001, "loss": 1.5313, "step": 2774 }, { "epoch": 0.4455684007707129, "grad_norm": 0.2842601239681244, "learning_rate": 0.0001, "loss": 1.556, "step": 2775 }, { "epoch": 0.44572896596017986, "grad_norm": 0.2518823742866516, "learning_rate": 0.0001, "loss": 1.5384, "step": 2776 }, { "epoch": 0.44588953114964675, "grad_norm": 0.27817302942276, "learning_rate": 0.0001, "loss": 1.6704, "step": 2777 }, { "epoch": 0.4460500963391137, "grad_norm": 0.26197052001953125, "learning_rate": 0.0001, "loss": 1.6203, "step": 2778 }, { "epoch": 0.4462106615285806, "grad_norm": 0.24848100543022156, "learning_rate": 0.0001, "loss": 1.502, "step": 2779 }, { "epoch": 0.4463712267180475, "grad_norm": 0.2504656910896301, "learning_rate": 0.0001, "loss": 1.5106, "step": 2780 }, { "epoch": 0.44653179190751446, "grad_norm": 0.24837298691272736, "learning_rate": 0.0001, "loss": 1.5564, "step": 2781 }, { "epoch": 0.44669235709698135, "grad_norm": 0.2552327811717987, "learning_rate": 0.0001, "loss": 1.5696, "step": 2782 }, { "epoch": 0.4468529222864483, "grad_norm": 0.26277005672454834, "learning_rate": 0.0001, "loss": 1.5916, "step": 2783 }, { "epoch": 0.44701348747591524, "grad_norm": 0.24569441378116608, "learning_rate": 0.0001, "loss": 1.4613, "step": 2784 }, { "epoch": 0.4471740526653821, "grad_norm": 0.24393264949321747, "learning_rate": 0.0001, "loss": 1.5542, "step": 2785 }, { "epoch": 0.44733461785484907, "grad_norm": 0.2537849247455597, "learning_rate": 0.0001, "loss": 1.5466, "step": 2786 }, { "epoch": 0.447495183044316, "grad_norm": 0.245890811085701, "learning_rate": 0.0001, "loss": 1.5347, "step": 2787 }, { "epoch": 0.4476557482337829, "grad_norm": 0.2544203996658325, "learning_rate": 0.0001, "loss": 1.4941, "step": 2788 }, { "epoch": 0.44781631342324985, "grad_norm": 0.2503323256969452, "learning_rate": 0.0001, "loss": 1.5464, "step": 2789 }, { "epoch": 0.4479768786127168, "grad_norm": 0.5950185656547546, "learning_rate": 0.0001, "loss": 1.5325, "step": 2790 }, { "epoch": 0.4481374438021837, "grad_norm": 0.25244295597076416, "learning_rate": 0.0001, "loss": 1.5653, "step": 2791 }, { "epoch": 0.4482980089916506, "grad_norm": 0.2557685673236847, "learning_rate": 0.0001, "loss": 1.4407, "step": 2792 }, { "epoch": 0.4484585741811175, "grad_norm": 0.2686178684234619, "learning_rate": 0.0001, "loss": 1.6164, "step": 2793 }, { "epoch": 0.44861913937058445, "grad_norm": 0.2816196382045746, "learning_rate": 0.0001, "loss": 1.5574, "step": 2794 }, { "epoch": 0.4487797045600514, "grad_norm": 0.27569353580474854, "learning_rate": 0.0001, "loss": 1.5461, "step": 2795 }, { "epoch": 0.4489402697495183, "grad_norm": 0.27293121814727783, "learning_rate": 0.0001, "loss": 1.5787, "step": 2796 }, { "epoch": 0.4491008349389852, "grad_norm": 0.24752502143383026, "learning_rate": 0.0001, "loss": 1.5157, "step": 2797 }, { "epoch": 0.44926140012845217, "grad_norm": 0.2575712203979492, "learning_rate": 0.0001, "loss": 1.5448, "step": 2798 }, { "epoch": 0.44942196531791906, "grad_norm": 0.2574111223220825, "learning_rate": 0.0001, "loss": 1.4557, "step": 2799 }, { "epoch": 0.449582530507386, "grad_norm": 0.6211435198783875, "learning_rate": 0.0001, "loss": 1.5214, "step": 2800 }, { "epoch": 0.44974309569685295, "grad_norm": 0.2662184536457062, "learning_rate": 0.0001, "loss": 1.6333, "step": 2801 }, { "epoch": 0.44990366088631983, "grad_norm": 0.2594231069087982, "learning_rate": 0.0001, "loss": 1.5146, "step": 2802 }, { "epoch": 0.4500642260757868, "grad_norm": 0.33451321721076965, "learning_rate": 0.0001, "loss": 1.5315, "step": 2803 }, { "epoch": 0.45022479126525367, "grad_norm": 0.26403456926345825, "learning_rate": 0.0001, "loss": 1.5518, "step": 2804 }, { "epoch": 0.4503853564547206, "grad_norm": 0.24283012747764587, "learning_rate": 0.0001, "loss": 1.5375, "step": 2805 }, { "epoch": 0.45054592164418755, "grad_norm": 0.28998836874961853, "learning_rate": 0.0001, "loss": 1.5779, "step": 2806 }, { "epoch": 0.45070648683365444, "grad_norm": 0.2587253451347351, "learning_rate": 0.0001, "loss": 1.4139, "step": 2807 }, { "epoch": 0.4508670520231214, "grad_norm": 0.24802348017692566, "learning_rate": 0.0001, "loss": 1.5288, "step": 2808 }, { "epoch": 0.4510276172125883, "grad_norm": 0.2477966547012329, "learning_rate": 0.0001, "loss": 1.5519, "step": 2809 }, { "epoch": 0.4511881824020552, "grad_norm": 0.33115845918655396, "learning_rate": 0.0001, "loss": 1.4968, "step": 2810 }, { "epoch": 0.45134874759152216, "grad_norm": 0.2585131824016571, "learning_rate": 0.0001, "loss": 1.4975, "step": 2811 }, { "epoch": 0.4515093127809891, "grad_norm": 0.2601938843727112, "learning_rate": 0.0001, "loss": 1.4967, "step": 2812 }, { "epoch": 0.451669877970456, "grad_norm": 0.2451581507921219, "learning_rate": 0.0001, "loss": 1.4733, "step": 2813 }, { "epoch": 0.45183044315992293, "grad_norm": 0.2767625153064728, "learning_rate": 0.0001, "loss": 1.5122, "step": 2814 }, { "epoch": 0.4519910083493899, "grad_norm": 0.2543317973613739, "learning_rate": 0.0001, "loss": 1.5517, "step": 2815 }, { "epoch": 0.45215157353885677, "grad_norm": 0.26612985134124756, "learning_rate": 0.0001, "loss": 1.5807, "step": 2816 }, { "epoch": 0.4523121387283237, "grad_norm": 0.25472018122673035, "learning_rate": 0.0001, "loss": 1.5078, "step": 2817 }, { "epoch": 0.4524727039177906, "grad_norm": 0.24495795369148254, "learning_rate": 0.0001, "loss": 1.5455, "step": 2818 }, { "epoch": 0.45263326910725754, "grad_norm": 0.24734511971473694, "learning_rate": 0.0001, "loss": 1.464, "step": 2819 }, { "epoch": 0.4527938342967245, "grad_norm": 0.25817036628723145, "learning_rate": 0.0001, "loss": 1.5298, "step": 2820 }, { "epoch": 0.4529543994861914, "grad_norm": 0.5361108779907227, "learning_rate": 0.0001, "loss": 1.4777, "step": 2821 }, { "epoch": 0.4531149646756583, "grad_norm": 0.24240370094776154, "learning_rate": 0.0001, "loss": 1.5158, "step": 2822 }, { "epoch": 0.45327552986512526, "grad_norm": 0.24924057722091675, "learning_rate": 0.0001, "loss": 1.5477, "step": 2823 }, { "epoch": 0.45343609505459215, "grad_norm": 0.2728411853313446, "learning_rate": 0.0001, "loss": 1.6117, "step": 2824 }, { "epoch": 0.4535966602440591, "grad_norm": 0.2432524412870407, "learning_rate": 0.0001, "loss": 1.5073, "step": 2825 }, { "epoch": 0.45375722543352603, "grad_norm": 0.2530824840068817, "learning_rate": 0.0001, "loss": 1.5505, "step": 2826 }, { "epoch": 0.4539177906229929, "grad_norm": 0.24826349318027496, "learning_rate": 0.0001, "loss": 1.5805, "step": 2827 }, { "epoch": 0.45407835581245987, "grad_norm": 0.26144877076148987, "learning_rate": 0.0001, "loss": 1.5111, "step": 2828 }, { "epoch": 0.4542389210019268, "grad_norm": 0.2444932460784912, "learning_rate": 0.0001, "loss": 1.5571, "step": 2829 }, { "epoch": 0.4543994861913937, "grad_norm": 0.25554490089416504, "learning_rate": 0.0001, "loss": 1.5486, "step": 2830 }, { "epoch": 0.45456005138086064, "grad_norm": 0.25208333134651184, "learning_rate": 0.0001, "loss": 1.4875, "step": 2831 }, { "epoch": 0.45472061657032753, "grad_norm": 0.25124403834342957, "learning_rate": 0.0001, "loss": 1.4813, "step": 2832 }, { "epoch": 0.4548811817597945, "grad_norm": 0.26822763681411743, "learning_rate": 0.0001, "loss": 1.5502, "step": 2833 }, { "epoch": 0.4550417469492614, "grad_norm": 0.24606886506080627, "learning_rate": 0.0001, "loss": 1.5009, "step": 2834 }, { "epoch": 0.4552023121387283, "grad_norm": 1.08027982711792, "learning_rate": 0.0001, "loss": 1.4697, "step": 2835 }, { "epoch": 0.45536287732819525, "grad_norm": 0.2689116597175598, "learning_rate": 0.0001, "loss": 1.5069, "step": 2836 }, { "epoch": 0.4555234425176622, "grad_norm": 0.24950948357582092, "learning_rate": 0.0001, "loss": 1.4455, "step": 2837 }, { "epoch": 0.4556840077071291, "grad_norm": 0.2667357921600342, "learning_rate": 0.0001, "loss": 1.4858, "step": 2838 }, { "epoch": 0.455844572896596, "grad_norm": 0.2685289978981018, "learning_rate": 0.0001, "loss": 1.4995, "step": 2839 }, { "epoch": 0.45600513808606297, "grad_norm": 0.25204527378082275, "learning_rate": 0.0001, "loss": 1.5202, "step": 2840 }, { "epoch": 0.45616570327552985, "grad_norm": 0.24358847737312317, "learning_rate": 0.0001, "loss": 1.4787, "step": 2841 }, { "epoch": 0.4563262684649968, "grad_norm": 0.2502761781215668, "learning_rate": 0.0001, "loss": 1.5346, "step": 2842 }, { "epoch": 0.4564868336544637, "grad_norm": 0.25408902764320374, "learning_rate": 0.0001, "loss": 1.5318, "step": 2843 }, { "epoch": 0.45664739884393063, "grad_norm": 0.24858152866363525, "learning_rate": 0.0001, "loss": 1.4884, "step": 2844 }, { "epoch": 0.4568079640333976, "grad_norm": 0.2565762996673584, "learning_rate": 0.0001, "loss": 1.493, "step": 2845 }, { "epoch": 0.45696852922286446, "grad_norm": 0.25907644629478455, "learning_rate": 0.0001, "loss": 1.521, "step": 2846 }, { "epoch": 0.4571290944123314, "grad_norm": 0.278012216091156, "learning_rate": 0.0001, "loss": 1.6365, "step": 2847 }, { "epoch": 0.45728965960179835, "grad_norm": 0.25443723797798157, "learning_rate": 0.0001, "loss": 1.4585, "step": 2848 }, { "epoch": 0.45745022479126524, "grad_norm": 0.2635557949542999, "learning_rate": 0.0001, "loss": 1.516, "step": 2849 }, { "epoch": 0.4576107899807322, "grad_norm": 0.2510010898113251, "learning_rate": 0.0001, "loss": 1.5257, "step": 2850 }, { "epoch": 0.4577713551701991, "grad_norm": 0.2531384527683258, "learning_rate": 0.0001, "loss": 1.5146, "step": 2851 }, { "epoch": 0.457931920359666, "grad_norm": 0.2632436454296112, "learning_rate": 0.0001, "loss": 1.6984, "step": 2852 }, { "epoch": 0.45809248554913296, "grad_norm": 0.25483858585357666, "learning_rate": 0.0001, "loss": 1.4538, "step": 2853 }, { "epoch": 0.4582530507385999, "grad_norm": 0.25285083055496216, "learning_rate": 0.0001, "loss": 1.5308, "step": 2854 }, { "epoch": 0.4584136159280668, "grad_norm": 0.26279085874557495, "learning_rate": 0.0001, "loss": 1.5755, "step": 2855 }, { "epoch": 0.45857418111753373, "grad_norm": 0.24838216602802277, "learning_rate": 0.0001, "loss": 1.5077, "step": 2856 }, { "epoch": 0.4587347463070006, "grad_norm": 0.2317473590373993, "learning_rate": 0.0001, "loss": 1.4682, "step": 2857 }, { "epoch": 0.45889531149646756, "grad_norm": 0.24912071228027344, "learning_rate": 0.0001, "loss": 1.528, "step": 2858 }, { "epoch": 0.4590558766859345, "grad_norm": 0.25990068912506104, "learning_rate": 0.0001, "loss": 1.5177, "step": 2859 }, { "epoch": 0.4592164418754014, "grad_norm": 0.26524385809898376, "learning_rate": 0.0001, "loss": 1.6028, "step": 2860 }, { "epoch": 0.45937700706486834, "grad_norm": 0.2510650157928467, "learning_rate": 0.0001, "loss": 1.4484, "step": 2861 }, { "epoch": 0.4595375722543353, "grad_norm": 0.24385692179203033, "learning_rate": 0.0001, "loss": 1.5092, "step": 2862 }, { "epoch": 0.45969813744380217, "grad_norm": 0.26305243372917175, "learning_rate": 0.0001, "loss": 1.4875, "step": 2863 }, { "epoch": 0.4598587026332691, "grad_norm": 0.24691642820835114, "learning_rate": 0.0001, "loss": 1.5286, "step": 2864 }, { "epoch": 0.46001926782273606, "grad_norm": 0.24307402968406677, "learning_rate": 0.0001, "loss": 1.4736, "step": 2865 }, { "epoch": 0.46017983301220294, "grad_norm": 0.2772844433784485, "learning_rate": 0.0001, "loss": 1.6063, "step": 2866 }, { "epoch": 0.4603403982016699, "grad_norm": 0.2490212619304657, "learning_rate": 0.0001, "loss": 1.4737, "step": 2867 }, { "epoch": 0.4605009633911368, "grad_norm": 0.27187564969062805, "learning_rate": 0.0001, "loss": 1.5311, "step": 2868 }, { "epoch": 0.4606615285806037, "grad_norm": 0.25558462738990784, "learning_rate": 0.0001, "loss": 1.4869, "step": 2869 }, { "epoch": 0.46082209377007066, "grad_norm": 0.2559339702129364, "learning_rate": 0.0001, "loss": 1.5277, "step": 2870 }, { "epoch": 0.46098265895953755, "grad_norm": 0.2592107951641083, "learning_rate": 0.0001, "loss": 1.4847, "step": 2871 }, { "epoch": 0.4611432241490045, "grad_norm": 0.2574782073497772, "learning_rate": 0.0001, "loss": 1.5467, "step": 2872 }, { "epoch": 0.46130378933847144, "grad_norm": 0.25029680132865906, "learning_rate": 0.0001, "loss": 1.5419, "step": 2873 }, { "epoch": 0.4614643545279383, "grad_norm": 0.9509021639823914, "learning_rate": 0.0001, "loss": 1.5162, "step": 2874 }, { "epoch": 0.46162491971740527, "grad_norm": 0.2531764805316925, "learning_rate": 0.0001, "loss": 1.5323, "step": 2875 }, { "epoch": 0.4617854849068722, "grad_norm": 0.245522141456604, "learning_rate": 0.0001, "loss": 1.5567, "step": 2876 }, { "epoch": 0.4619460500963391, "grad_norm": 0.26436519622802734, "learning_rate": 0.0001, "loss": 1.5695, "step": 2877 }, { "epoch": 0.46210661528580604, "grad_norm": 0.2703956663608551, "learning_rate": 0.0001, "loss": 1.5229, "step": 2878 }, { "epoch": 0.462267180475273, "grad_norm": 0.26570844650268555, "learning_rate": 0.0001, "loss": 1.5541, "step": 2879 }, { "epoch": 0.4624277456647399, "grad_norm": 0.26499056816101074, "learning_rate": 0.0001, "loss": 1.5506, "step": 2880 }, { "epoch": 0.4625883108542068, "grad_norm": 0.27163347601890564, "learning_rate": 0.0001, "loss": 1.5466, "step": 2881 }, { "epoch": 0.4627488760436737, "grad_norm": 0.26002606749534607, "learning_rate": 0.0001, "loss": 1.517, "step": 2882 }, { "epoch": 0.46290944123314065, "grad_norm": 0.24343660473823547, "learning_rate": 0.0001, "loss": 1.514, "step": 2883 }, { "epoch": 0.4630700064226076, "grad_norm": 0.2543664574623108, "learning_rate": 0.0001, "loss": 1.4526, "step": 2884 }, { "epoch": 0.4632305716120745, "grad_norm": 0.2580462396144867, "learning_rate": 0.0001, "loss": 1.5423, "step": 2885 }, { "epoch": 0.4633911368015414, "grad_norm": 0.259204626083374, "learning_rate": 0.0001, "loss": 1.5541, "step": 2886 }, { "epoch": 0.46355170199100837, "grad_norm": 0.26318368315696716, "learning_rate": 0.0001, "loss": 1.5519, "step": 2887 }, { "epoch": 0.46371226718047526, "grad_norm": 0.26448196172714233, "learning_rate": 0.0001, "loss": 1.5723, "step": 2888 }, { "epoch": 0.4638728323699422, "grad_norm": 0.25672727823257446, "learning_rate": 0.0001, "loss": 1.5213, "step": 2889 }, { "epoch": 0.46403339755940914, "grad_norm": 0.2626258134841919, "learning_rate": 0.0001, "loss": 1.6652, "step": 2890 }, { "epoch": 0.46419396274887603, "grad_norm": 0.255079060792923, "learning_rate": 0.0001, "loss": 1.5671, "step": 2891 }, { "epoch": 0.464354527938343, "grad_norm": 0.2678382694721222, "learning_rate": 0.0001, "loss": 1.5555, "step": 2892 }, { "epoch": 0.46451509312780986, "grad_norm": 0.2653570771217346, "learning_rate": 0.0001, "loss": 1.5375, "step": 2893 }, { "epoch": 0.4646756583172768, "grad_norm": 0.2625160813331604, "learning_rate": 0.0001, "loss": 1.5035, "step": 2894 }, { "epoch": 0.46483622350674375, "grad_norm": 0.2426673173904419, "learning_rate": 0.0001, "loss": 1.4554, "step": 2895 }, { "epoch": 0.46499678869621064, "grad_norm": 0.25273504853248596, "learning_rate": 0.0001, "loss": 1.4972, "step": 2896 }, { "epoch": 0.4651573538856776, "grad_norm": 0.2501536011695862, "learning_rate": 0.0001, "loss": 1.4182, "step": 2897 }, { "epoch": 0.4653179190751445, "grad_norm": 0.25146669149398804, "learning_rate": 0.0001, "loss": 1.5331, "step": 2898 }, { "epoch": 0.4654784842646114, "grad_norm": 0.25322243571281433, "learning_rate": 0.0001, "loss": 1.5151, "step": 2899 }, { "epoch": 0.46563904945407836, "grad_norm": 0.26858463883399963, "learning_rate": 0.0001, "loss": 1.6399, "step": 2900 }, { "epoch": 0.4657996146435453, "grad_norm": 0.2575323283672333, "learning_rate": 0.0001, "loss": 1.5164, "step": 2901 }, { "epoch": 0.4659601798330122, "grad_norm": 0.2752355635166168, "learning_rate": 0.0001, "loss": 1.5762, "step": 2902 }, { "epoch": 0.46612074502247913, "grad_norm": 0.2613184452056885, "learning_rate": 0.0001, "loss": 1.585, "step": 2903 }, { "epoch": 0.4662813102119461, "grad_norm": 0.2515537142753601, "learning_rate": 0.0001, "loss": 1.5035, "step": 2904 }, { "epoch": 0.46644187540141296, "grad_norm": 0.2640692889690399, "learning_rate": 0.0001, "loss": 1.6099, "step": 2905 }, { "epoch": 0.4666024405908799, "grad_norm": 0.26171159744262695, "learning_rate": 0.0001, "loss": 1.5364, "step": 2906 }, { "epoch": 0.4667630057803468, "grad_norm": 0.25064030289649963, "learning_rate": 0.0001, "loss": 1.5213, "step": 2907 }, { "epoch": 0.46692357096981374, "grad_norm": 0.26227661967277527, "learning_rate": 0.0001, "loss": 1.6114, "step": 2908 }, { "epoch": 0.4670841361592807, "grad_norm": 0.24718691408634186, "learning_rate": 0.0001, "loss": 1.5096, "step": 2909 }, { "epoch": 0.46724470134874757, "grad_norm": 0.251468300819397, "learning_rate": 0.0001, "loss": 1.5939, "step": 2910 }, { "epoch": 0.4674052665382145, "grad_norm": 0.24605131149291992, "learning_rate": 0.0001, "loss": 1.448, "step": 2911 }, { "epoch": 0.46756583172768146, "grad_norm": 0.25557634234428406, "learning_rate": 0.0001, "loss": 1.4951, "step": 2912 }, { "epoch": 0.46772639691714835, "grad_norm": 0.2542295455932617, "learning_rate": 0.0001, "loss": 1.4564, "step": 2913 }, { "epoch": 0.4678869621066153, "grad_norm": 0.25903230905532837, "learning_rate": 0.0001, "loss": 1.4722, "step": 2914 }, { "epoch": 0.46804752729608223, "grad_norm": 0.2592344880104065, "learning_rate": 0.0001, "loss": 1.5616, "step": 2915 }, { "epoch": 0.4682080924855491, "grad_norm": 0.25056353211402893, "learning_rate": 0.0001, "loss": 1.4715, "step": 2916 }, { "epoch": 0.46836865767501606, "grad_norm": 0.26791608333587646, "learning_rate": 0.0001, "loss": 1.623, "step": 2917 }, { "epoch": 0.46852922286448295, "grad_norm": 0.25067391991615295, "learning_rate": 0.0001, "loss": 1.5268, "step": 2918 }, { "epoch": 0.4686897880539499, "grad_norm": 0.25362953543663025, "learning_rate": 0.0001, "loss": 1.5774, "step": 2919 }, { "epoch": 0.46885035324341684, "grad_norm": 0.24526727199554443, "learning_rate": 0.0001, "loss": 1.5023, "step": 2920 }, { "epoch": 0.4690109184328837, "grad_norm": 0.2462223917245865, "learning_rate": 0.0001, "loss": 1.4341, "step": 2921 }, { "epoch": 0.46917148362235067, "grad_norm": 0.24873018264770508, "learning_rate": 0.0001, "loss": 1.5341, "step": 2922 }, { "epoch": 0.4693320488118176, "grad_norm": 0.24852591753005981, "learning_rate": 0.0001, "loss": 1.538, "step": 2923 }, { "epoch": 0.4694926140012845, "grad_norm": 0.24487623572349548, "learning_rate": 0.0001, "loss": 1.4673, "step": 2924 }, { "epoch": 0.46965317919075145, "grad_norm": 0.25096890330314636, "learning_rate": 0.0001, "loss": 1.4821, "step": 2925 }, { "epoch": 0.4698137443802184, "grad_norm": 0.2560618817806244, "learning_rate": 0.0001, "loss": 1.5734, "step": 2926 }, { "epoch": 0.4699743095696853, "grad_norm": 0.24424079060554504, "learning_rate": 0.0001, "loss": 1.5163, "step": 2927 }, { "epoch": 0.4701348747591522, "grad_norm": 0.2547134459018707, "learning_rate": 0.0001, "loss": 1.5492, "step": 2928 }, { "epoch": 0.47029543994861917, "grad_norm": 0.25469645857810974, "learning_rate": 0.0001, "loss": 1.6081, "step": 2929 }, { "epoch": 0.47045600513808605, "grad_norm": 0.2547605335712433, "learning_rate": 0.0001, "loss": 1.5611, "step": 2930 }, { "epoch": 0.470616570327553, "grad_norm": 0.2461174577474594, "learning_rate": 0.0001, "loss": 1.5565, "step": 2931 }, { "epoch": 0.4707771355170199, "grad_norm": 0.26163119077682495, "learning_rate": 0.0001, "loss": 1.5466, "step": 2932 }, { "epoch": 0.47093770070648683, "grad_norm": 0.24534977972507477, "learning_rate": 0.0001, "loss": 1.4996, "step": 2933 }, { "epoch": 0.47109826589595377, "grad_norm": 0.25218233466148376, "learning_rate": 0.0001, "loss": 1.4699, "step": 2934 }, { "epoch": 0.47125883108542066, "grad_norm": 0.24794015288352966, "learning_rate": 0.0001, "loss": 1.4439, "step": 2935 }, { "epoch": 0.4714193962748876, "grad_norm": 0.2545740008354187, "learning_rate": 0.0001, "loss": 1.4699, "step": 2936 }, { "epoch": 0.47157996146435455, "grad_norm": 0.2666550874710083, "learning_rate": 0.0001, "loss": 1.634, "step": 2937 }, { "epoch": 0.47174052665382143, "grad_norm": 0.2550573945045471, "learning_rate": 0.0001, "loss": 1.5672, "step": 2938 }, { "epoch": 0.4719010918432884, "grad_norm": 0.2551001310348511, "learning_rate": 0.0001, "loss": 1.5257, "step": 2939 }, { "epoch": 0.4720616570327553, "grad_norm": 0.24900071322917938, "learning_rate": 0.0001, "loss": 1.4897, "step": 2940 }, { "epoch": 0.4722222222222222, "grad_norm": 0.2513026297092438, "learning_rate": 0.0001, "loss": 1.5128, "step": 2941 }, { "epoch": 0.47238278741168915, "grad_norm": 0.2582988739013672, "learning_rate": 0.0001, "loss": 1.5086, "step": 2942 }, { "epoch": 0.4725433526011561, "grad_norm": 0.24734970927238464, "learning_rate": 0.0001, "loss": 1.5136, "step": 2943 }, { "epoch": 0.472703917790623, "grad_norm": 1.768375277519226, "learning_rate": 0.0001, "loss": 1.4875, "step": 2944 }, { "epoch": 0.47286448298008993, "grad_norm": 0.25934040546417236, "learning_rate": 0.0001, "loss": 1.5262, "step": 2945 }, { "epoch": 0.4730250481695568, "grad_norm": 0.2954626977443695, "learning_rate": 0.0001, "loss": 1.5517, "step": 2946 }, { "epoch": 0.47318561335902376, "grad_norm": 0.29073503613471985, "learning_rate": 0.0001, "loss": 1.5996, "step": 2947 }, { "epoch": 0.4733461785484907, "grad_norm": 0.2698410153388977, "learning_rate": 0.0001, "loss": 1.5954, "step": 2948 }, { "epoch": 0.4735067437379576, "grad_norm": 0.24963271617889404, "learning_rate": 0.0001, "loss": 1.4816, "step": 2949 }, { "epoch": 0.47366730892742454, "grad_norm": 0.2844211161136627, "learning_rate": 0.0001, "loss": 1.564, "step": 2950 }, { "epoch": 0.4738278741168915, "grad_norm": 0.2843918800354004, "learning_rate": 0.0001, "loss": 1.5153, "step": 2951 }, { "epoch": 0.47398843930635837, "grad_norm": 0.27777576446533203, "learning_rate": 0.0001, "loss": 1.5159, "step": 2952 }, { "epoch": 0.4741490044958253, "grad_norm": 0.3000965118408203, "learning_rate": 0.0001, "loss": 1.5341, "step": 2953 }, { "epoch": 0.47430956968529225, "grad_norm": 0.3191080093383789, "learning_rate": 0.0001, "loss": 1.5554, "step": 2954 }, { "epoch": 0.47447013487475914, "grad_norm": 0.2943609058856964, "learning_rate": 0.0001, "loss": 1.5191, "step": 2955 }, { "epoch": 0.4746307000642261, "grad_norm": 0.267706036567688, "learning_rate": 0.0001, "loss": 1.5717, "step": 2956 }, { "epoch": 0.474791265253693, "grad_norm": 0.2766854763031006, "learning_rate": 0.0001, "loss": 1.5028, "step": 2957 }, { "epoch": 0.4749518304431599, "grad_norm": 0.2598714232444763, "learning_rate": 0.0001, "loss": 1.4789, "step": 2958 }, { "epoch": 0.47511239563262686, "grad_norm": 0.31471943855285645, "learning_rate": 0.0001, "loss": 1.4731, "step": 2959 }, { "epoch": 0.47527296082209375, "grad_norm": 0.26923859119415283, "learning_rate": 0.0001, "loss": 1.5163, "step": 2960 }, { "epoch": 0.4754335260115607, "grad_norm": 0.3559715151786804, "learning_rate": 0.0001, "loss": 1.5698, "step": 2961 }, { "epoch": 0.47559409120102764, "grad_norm": 0.32424938678741455, "learning_rate": 0.0001, "loss": 1.5003, "step": 2962 }, { "epoch": 0.4757546563904945, "grad_norm": 0.2876241207122803, "learning_rate": 0.0001, "loss": 1.5818, "step": 2963 }, { "epoch": 0.47591522157996147, "grad_norm": 0.2469119131565094, "learning_rate": 0.0001, "loss": 1.5285, "step": 2964 }, { "epoch": 0.4760757867694284, "grad_norm": 0.26568013429641724, "learning_rate": 0.0001, "loss": 1.5193, "step": 2965 }, { "epoch": 0.4762363519588953, "grad_norm": 0.2633431553840637, "learning_rate": 0.0001, "loss": 1.5456, "step": 2966 }, { "epoch": 0.47639691714836224, "grad_norm": 0.2920028269290924, "learning_rate": 0.0001, "loss": 1.5056, "step": 2967 }, { "epoch": 0.4765574823378292, "grad_norm": 0.2750384211540222, "learning_rate": 0.0001, "loss": 1.4879, "step": 2968 }, { "epoch": 0.4767180475272961, "grad_norm": 0.25811445713043213, "learning_rate": 0.0001, "loss": 1.5274, "step": 2969 }, { "epoch": 0.476878612716763, "grad_norm": 0.2579619586467743, "learning_rate": 0.0001, "loss": 1.5256, "step": 2970 }, { "epoch": 0.4770391779062299, "grad_norm": 0.2626616060733795, "learning_rate": 0.0001, "loss": 1.4944, "step": 2971 }, { "epoch": 0.47719974309569685, "grad_norm": 0.28014877438545227, "learning_rate": 0.0001, "loss": 1.5417, "step": 2972 }, { "epoch": 0.4773603082851638, "grad_norm": 0.25855451822280884, "learning_rate": 0.0001, "loss": 1.4902, "step": 2973 }, { "epoch": 0.4775208734746307, "grad_norm": 0.26190412044525146, "learning_rate": 0.0001, "loss": 1.5015, "step": 2974 }, { "epoch": 0.4776814386640976, "grad_norm": 0.264337420463562, "learning_rate": 0.0001, "loss": 1.4599, "step": 2975 }, { "epoch": 0.47784200385356457, "grad_norm": 0.25439706444740295, "learning_rate": 0.0001, "loss": 1.489, "step": 2976 }, { "epoch": 0.47800256904303146, "grad_norm": 0.2698615491390228, "learning_rate": 0.0001, "loss": 1.6834, "step": 2977 }, { "epoch": 0.4781631342324984, "grad_norm": 0.25165149569511414, "learning_rate": 0.0001, "loss": 1.4425, "step": 2978 }, { "epoch": 0.47832369942196534, "grad_norm": 0.2794475853443146, "learning_rate": 0.0001, "loss": 1.6159, "step": 2979 }, { "epoch": 0.47848426461143223, "grad_norm": 0.23459677398204803, "learning_rate": 0.0001, "loss": 1.4629, "step": 2980 }, { "epoch": 0.4786448298008992, "grad_norm": 0.24268727004528046, "learning_rate": 0.0001, "loss": 1.5017, "step": 2981 }, { "epoch": 0.47880539499036606, "grad_norm": 0.2444038689136505, "learning_rate": 0.0001, "loss": 1.4956, "step": 2982 }, { "epoch": 0.478965960179833, "grad_norm": 0.263205885887146, "learning_rate": 0.0001, "loss": 1.57, "step": 2983 }, { "epoch": 0.47912652536929995, "grad_norm": 0.26336947083473206, "learning_rate": 0.0001, "loss": 1.5825, "step": 2984 }, { "epoch": 0.47928709055876684, "grad_norm": 0.25302061438560486, "learning_rate": 0.0001, "loss": 1.4827, "step": 2985 }, { "epoch": 0.4794476557482338, "grad_norm": 0.26255595684051514, "learning_rate": 0.0001, "loss": 1.5722, "step": 2986 }, { "epoch": 0.4796082209377007, "grad_norm": 0.26336580514907837, "learning_rate": 0.0001, "loss": 1.5097, "step": 2987 }, { "epoch": 0.4797687861271676, "grad_norm": 0.24177514016628265, "learning_rate": 0.0001, "loss": 1.409, "step": 2988 }, { "epoch": 0.47992935131663456, "grad_norm": 0.24892064929008484, "learning_rate": 0.0001, "loss": 1.4953, "step": 2989 }, { "epoch": 0.4800899165061015, "grad_norm": 0.2624559700489044, "learning_rate": 0.0001, "loss": 1.542, "step": 2990 }, { "epoch": 0.4802504816955684, "grad_norm": 0.27438074350357056, "learning_rate": 0.0001, "loss": 1.6751, "step": 2991 }, { "epoch": 0.48041104688503533, "grad_norm": 0.26733386516571045, "learning_rate": 0.0001, "loss": 1.6132, "step": 2992 }, { "epoch": 0.4805716120745023, "grad_norm": 0.2562370002269745, "learning_rate": 0.0001, "loss": 1.5572, "step": 2993 }, { "epoch": 0.48073217726396916, "grad_norm": 0.25975048542022705, "learning_rate": 0.0001, "loss": 1.6142, "step": 2994 }, { "epoch": 0.4808927424534361, "grad_norm": 0.2624019384384155, "learning_rate": 0.0001, "loss": 1.5079, "step": 2995 }, { "epoch": 0.481053307642903, "grad_norm": 0.25996801257133484, "learning_rate": 0.0001, "loss": 1.5772, "step": 2996 }, { "epoch": 0.48121387283236994, "grad_norm": 0.2643839418888092, "learning_rate": 0.0001, "loss": 1.57, "step": 2997 }, { "epoch": 0.4813744380218369, "grad_norm": 0.25774019956588745, "learning_rate": 0.0001, "loss": 1.5286, "step": 2998 }, { "epoch": 0.48153500321130377, "grad_norm": 0.2576081156730652, "learning_rate": 0.0001, "loss": 1.5145, "step": 2999 }, { "epoch": 0.4816955684007707, "grad_norm": 0.25839635729789734, "learning_rate": 0.0001, "loss": 1.5477, "step": 3000 }, { "epoch": 0.48185613359023766, "grad_norm": 0.2544023096561432, "learning_rate": 0.0001, "loss": 1.556, "step": 3001 }, { "epoch": 0.48201669877970454, "grad_norm": 0.2556130588054657, "learning_rate": 0.0001, "loss": 1.5974, "step": 3002 }, { "epoch": 0.4821772639691715, "grad_norm": 0.2694602608680725, "learning_rate": 0.0001, "loss": 1.4956, "step": 3003 }, { "epoch": 0.48233782915863843, "grad_norm": 0.24750924110412598, "learning_rate": 0.0001, "loss": 1.4942, "step": 3004 }, { "epoch": 0.4824983943481053, "grad_norm": 0.25541990995407104, "learning_rate": 0.0001, "loss": 1.4511, "step": 3005 }, { "epoch": 0.48265895953757226, "grad_norm": 0.2663719058036804, "learning_rate": 0.0001, "loss": 1.578, "step": 3006 }, { "epoch": 0.48281952472703915, "grad_norm": 0.26437413692474365, "learning_rate": 0.0001, "loss": 1.5265, "step": 3007 }, { "epoch": 0.4829800899165061, "grad_norm": 0.24333199858665466, "learning_rate": 0.0001, "loss": 1.4902, "step": 3008 }, { "epoch": 0.48314065510597304, "grad_norm": 0.2512253224849701, "learning_rate": 0.0001, "loss": 1.4467, "step": 3009 }, { "epoch": 0.4833012202954399, "grad_norm": 0.2544972002506256, "learning_rate": 0.0001, "loss": 1.547, "step": 3010 }, { "epoch": 0.48346178548490687, "grad_norm": 0.2563701868057251, "learning_rate": 0.0001, "loss": 1.5568, "step": 3011 }, { "epoch": 0.4836223506743738, "grad_norm": 0.2565440535545349, "learning_rate": 0.0001, "loss": 1.456, "step": 3012 }, { "epoch": 0.4837829158638407, "grad_norm": 0.27084702253341675, "learning_rate": 0.0001, "loss": 1.542, "step": 3013 }, { "epoch": 0.48394348105330764, "grad_norm": 0.27865681052207947, "learning_rate": 0.0001, "loss": 1.5371, "step": 3014 }, { "epoch": 0.4841040462427746, "grad_norm": 0.25021904706954956, "learning_rate": 0.0001, "loss": 1.5203, "step": 3015 }, { "epoch": 0.4842646114322415, "grad_norm": 0.27153530716896057, "learning_rate": 0.0001, "loss": 1.5202, "step": 3016 }, { "epoch": 0.4844251766217084, "grad_norm": 0.28725340962409973, "learning_rate": 0.0001, "loss": 1.534, "step": 3017 }, { "epoch": 0.48458574181117536, "grad_norm": 0.2531295716762543, "learning_rate": 0.0001, "loss": 1.5382, "step": 3018 }, { "epoch": 0.48474630700064225, "grad_norm": 0.26707106828689575, "learning_rate": 0.0001, "loss": 1.481, "step": 3019 }, { "epoch": 0.4849068721901092, "grad_norm": 0.2601153254508972, "learning_rate": 0.0001, "loss": 1.4935, "step": 3020 }, { "epoch": 0.4850674373795761, "grad_norm": 0.23785848915576935, "learning_rate": 0.0001, "loss": 1.4438, "step": 3021 }, { "epoch": 0.485228002569043, "grad_norm": 0.2714046239852905, "learning_rate": 0.0001, "loss": 1.5395, "step": 3022 }, { "epoch": 0.48538856775850997, "grad_norm": 0.2642495334148407, "learning_rate": 0.0001, "loss": 1.5503, "step": 3023 }, { "epoch": 0.48554913294797686, "grad_norm": 0.2607245147228241, "learning_rate": 0.0001, "loss": 1.4963, "step": 3024 }, { "epoch": 0.4857096981374438, "grad_norm": 0.26534876227378845, "learning_rate": 0.0001, "loss": 1.5146, "step": 3025 }, { "epoch": 0.48587026332691075, "grad_norm": 0.2559542655944824, "learning_rate": 0.0001, "loss": 1.4475, "step": 3026 }, { "epoch": 0.48603082851637763, "grad_norm": 0.2548435628414154, "learning_rate": 0.0001, "loss": 1.6015, "step": 3027 }, { "epoch": 0.4861913937058446, "grad_norm": 0.2658431828022003, "learning_rate": 0.0001, "loss": 1.5076, "step": 3028 }, { "epoch": 0.4863519588953115, "grad_norm": 0.28509068489074707, "learning_rate": 0.0001, "loss": 1.505, "step": 3029 }, { "epoch": 0.4865125240847784, "grad_norm": 0.2549109160900116, "learning_rate": 0.0001, "loss": 1.5436, "step": 3030 }, { "epoch": 0.48667308927424535, "grad_norm": 0.41223037242889404, "learning_rate": 0.0001, "loss": 1.5205, "step": 3031 }, { "epoch": 0.48683365446371224, "grad_norm": 0.2568729519844055, "learning_rate": 0.0001, "loss": 1.4396, "step": 3032 }, { "epoch": 0.4869942196531792, "grad_norm": 0.24964162707328796, "learning_rate": 0.0001, "loss": 1.4121, "step": 3033 }, { "epoch": 0.4871547848426461, "grad_norm": 0.2588632106781006, "learning_rate": 0.0001, "loss": 1.5287, "step": 3034 }, { "epoch": 0.487315350032113, "grad_norm": 0.25589656829833984, "learning_rate": 0.0001, "loss": 1.5095, "step": 3035 }, { "epoch": 0.48747591522157996, "grad_norm": 0.2549636960029602, "learning_rate": 0.0001, "loss": 1.5844, "step": 3036 }, { "epoch": 0.4876364804110469, "grad_norm": 0.24072504043579102, "learning_rate": 0.0001, "loss": 1.4383, "step": 3037 }, { "epoch": 0.4877970456005138, "grad_norm": 0.24526798725128174, "learning_rate": 0.0001, "loss": 1.4131, "step": 3038 }, { "epoch": 0.48795761078998073, "grad_norm": 0.2538144290447235, "learning_rate": 0.0001, "loss": 1.5171, "step": 3039 }, { "epoch": 0.4881181759794477, "grad_norm": 0.2606353163719177, "learning_rate": 0.0001, "loss": 1.5911, "step": 3040 }, { "epoch": 0.48827874116891457, "grad_norm": 0.25958704948425293, "learning_rate": 0.0001, "loss": 1.5363, "step": 3041 }, { "epoch": 0.4884393063583815, "grad_norm": 0.2852924168109894, "learning_rate": 0.0001, "loss": 1.5482, "step": 3042 }, { "epoch": 0.48859987154784845, "grad_norm": 0.25527581572532654, "learning_rate": 0.0001, "loss": 1.5255, "step": 3043 }, { "epoch": 0.48876043673731534, "grad_norm": 0.2534925937652588, "learning_rate": 0.0001, "loss": 1.4663, "step": 3044 }, { "epoch": 0.4889210019267823, "grad_norm": 0.27186769247055054, "learning_rate": 0.0001, "loss": 1.4537, "step": 3045 }, { "epoch": 0.48908156711624917, "grad_norm": 0.28862160444259644, "learning_rate": 0.0001, "loss": 1.5088, "step": 3046 }, { "epoch": 0.4892421323057161, "grad_norm": 0.270006388425827, "learning_rate": 0.0001, "loss": 1.5399, "step": 3047 }, { "epoch": 0.48940269749518306, "grad_norm": 0.26035580039024353, "learning_rate": 0.0001, "loss": 1.5279, "step": 3048 }, { "epoch": 0.48956326268464995, "grad_norm": 0.3059391677379608, "learning_rate": 0.0001, "loss": 1.6101, "step": 3049 }, { "epoch": 0.4897238278741169, "grad_norm": 0.2891250252723694, "learning_rate": 0.0001, "loss": 1.524, "step": 3050 }, { "epoch": 0.48988439306358383, "grad_norm": 0.2657817304134369, "learning_rate": 0.0001, "loss": 1.473, "step": 3051 }, { "epoch": 0.4900449582530507, "grad_norm": 0.27785712480545044, "learning_rate": 0.0001, "loss": 1.5539, "step": 3052 }, { "epoch": 0.49020552344251767, "grad_norm": 0.23967069387435913, "learning_rate": 0.0001, "loss": 1.5034, "step": 3053 }, { "epoch": 0.4903660886319846, "grad_norm": 0.2672518491744995, "learning_rate": 0.0001, "loss": 1.4786, "step": 3054 }, { "epoch": 0.4905266538214515, "grad_norm": 0.25132396817207336, "learning_rate": 0.0001, "loss": 1.4989, "step": 3055 }, { "epoch": 0.49068721901091844, "grad_norm": 0.2554181218147278, "learning_rate": 0.0001, "loss": 1.5257, "step": 3056 }, { "epoch": 0.4908477842003854, "grad_norm": 0.24807313084602356, "learning_rate": 0.0001, "loss": 1.4963, "step": 3057 }, { "epoch": 0.49100834938985227, "grad_norm": 0.2602437138557434, "learning_rate": 0.0001, "loss": 1.5575, "step": 3058 }, { "epoch": 0.4911689145793192, "grad_norm": 0.26603472232818604, "learning_rate": 0.0001, "loss": 1.5486, "step": 3059 }, { "epoch": 0.4913294797687861, "grad_norm": 0.2473047375679016, "learning_rate": 0.0001, "loss": 1.5221, "step": 3060 }, { "epoch": 0.49149004495825305, "grad_norm": 0.2520563006401062, "learning_rate": 0.0001, "loss": 1.4736, "step": 3061 }, { "epoch": 0.49165061014772, "grad_norm": 0.24585667252540588, "learning_rate": 0.0001, "loss": 1.5442, "step": 3062 }, { "epoch": 0.4918111753371869, "grad_norm": 0.29285928606987, "learning_rate": 0.0001, "loss": 1.6057, "step": 3063 }, { "epoch": 0.4919717405266538, "grad_norm": 0.24709568917751312, "learning_rate": 0.0001, "loss": 1.5296, "step": 3064 }, { "epoch": 0.49213230571612077, "grad_norm": 0.2599603235721588, "learning_rate": 0.0001, "loss": 1.5214, "step": 3065 }, { "epoch": 0.49229287090558765, "grad_norm": 0.25023606419563293, "learning_rate": 0.0001, "loss": 1.5966, "step": 3066 }, { "epoch": 0.4924534360950546, "grad_norm": 0.26718711853027344, "learning_rate": 0.0001, "loss": 1.554, "step": 3067 }, { "epoch": 0.49261400128452154, "grad_norm": 0.2554135322570801, "learning_rate": 0.0001, "loss": 1.4141, "step": 3068 }, { "epoch": 0.49277456647398843, "grad_norm": 0.2615140974521637, "learning_rate": 0.0001, "loss": 1.5525, "step": 3069 }, { "epoch": 0.4929351316634554, "grad_norm": 0.25347504019737244, "learning_rate": 0.0001, "loss": 1.5742, "step": 3070 }, { "epoch": 0.49309569685292226, "grad_norm": 0.2668316662311554, "learning_rate": 0.0001, "loss": 1.5778, "step": 3071 }, { "epoch": 0.4932562620423892, "grad_norm": 0.2649231553077698, "learning_rate": 0.0001, "loss": 1.4718, "step": 3072 }, { "epoch": 0.49341682723185615, "grad_norm": 0.2498873621225357, "learning_rate": 0.0001, "loss": 1.4198, "step": 3073 }, { "epoch": 0.49357739242132304, "grad_norm": 0.2470768839120865, "learning_rate": 0.0001, "loss": 1.567, "step": 3074 }, { "epoch": 0.49373795761079, "grad_norm": 0.2564491331577301, "learning_rate": 0.0001, "loss": 1.4942, "step": 3075 }, { "epoch": 0.4938985228002569, "grad_norm": 0.2566014528274536, "learning_rate": 0.0001, "loss": 1.5559, "step": 3076 }, { "epoch": 0.4940590879897238, "grad_norm": 0.24887849390506744, "learning_rate": 0.0001, "loss": 1.4746, "step": 3077 }, { "epoch": 0.49421965317919075, "grad_norm": 0.25308987498283386, "learning_rate": 0.0001, "loss": 1.4983, "step": 3078 }, { "epoch": 0.4943802183686577, "grad_norm": 0.2783001661300659, "learning_rate": 0.0001, "loss": 1.579, "step": 3079 }, { "epoch": 0.4945407835581246, "grad_norm": 0.25055360794067383, "learning_rate": 0.0001, "loss": 1.5075, "step": 3080 }, { "epoch": 0.49470134874759153, "grad_norm": 0.27468374371528625, "learning_rate": 0.0001, "loss": 1.4941, "step": 3081 }, { "epoch": 0.4948619139370585, "grad_norm": 0.2519631087779999, "learning_rate": 0.0001, "loss": 1.5096, "step": 3082 }, { "epoch": 0.49502247912652536, "grad_norm": 0.271618127822876, "learning_rate": 0.0001, "loss": 1.4984, "step": 3083 }, { "epoch": 0.4951830443159923, "grad_norm": 0.24738885462284088, "learning_rate": 0.0001, "loss": 1.559, "step": 3084 }, { "epoch": 0.4953436095054592, "grad_norm": 0.24938714504241943, "learning_rate": 0.0001, "loss": 1.5353, "step": 3085 }, { "epoch": 0.49550417469492614, "grad_norm": 0.24343474209308624, "learning_rate": 0.0001, "loss": 1.5173, "step": 3086 }, { "epoch": 0.4956647398843931, "grad_norm": 0.2473086416721344, "learning_rate": 0.0001, "loss": 1.5159, "step": 3087 }, { "epoch": 0.49582530507385997, "grad_norm": 0.2625449597835541, "learning_rate": 0.0001, "loss": 1.5636, "step": 3088 }, { "epoch": 0.4959858702633269, "grad_norm": 0.2572121322154999, "learning_rate": 0.0001, "loss": 1.5225, "step": 3089 }, { "epoch": 0.49614643545279385, "grad_norm": 0.25577324628829956, "learning_rate": 0.0001, "loss": 1.52, "step": 3090 }, { "epoch": 0.49630700064226074, "grad_norm": 0.250839501619339, "learning_rate": 0.0001, "loss": 1.4678, "step": 3091 }, { "epoch": 0.4964675658317277, "grad_norm": 0.25855231285095215, "learning_rate": 0.0001, "loss": 1.5609, "step": 3092 }, { "epoch": 0.49662813102119463, "grad_norm": 0.26504799723625183, "learning_rate": 0.0001, "loss": 1.4408, "step": 3093 }, { "epoch": 0.4967886962106615, "grad_norm": 0.2471844106912613, "learning_rate": 0.0001, "loss": 1.5146, "step": 3094 }, { "epoch": 0.49694926140012846, "grad_norm": 0.24830299615859985, "learning_rate": 0.0001, "loss": 1.4682, "step": 3095 }, { "epoch": 0.49710982658959535, "grad_norm": 0.25124719738960266, "learning_rate": 0.0001, "loss": 1.5363, "step": 3096 }, { "epoch": 0.4972703917790623, "grad_norm": 0.26362356543540955, "learning_rate": 0.0001, "loss": 1.553, "step": 3097 }, { "epoch": 0.49743095696852924, "grad_norm": 0.2732217609882355, "learning_rate": 0.0001, "loss": 1.4665, "step": 3098 }, { "epoch": 0.4975915221579961, "grad_norm": 0.25540000200271606, "learning_rate": 0.0001, "loss": 1.5296, "step": 3099 }, { "epoch": 0.49775208734746307, "grad_norm": 0.25769975781440735, "learning_rate": 0.0001, "loss": 1.5072, "step": 3100 }, { "epoch": 0.49791265253693, "grad_norm": 0.266641229391098, "learning_rate": 0.0001, "loss": 1.5302, "step": 3101 }, { "epoch": 0.4980732177263969, "grad_norm": 0.2833288013935089, "learning_rate": 0.0001, "loss": 1.4988, "step": 3102 }, { "epoch": 0.49823378291586384, "grad_norm": 0.252909779548645, "learning_rate": 0.0001, "loss": 1.4831, "step": 3103 }, { "epoch": 0.4983943481053308, "grad_norm": 0.2659388780593872, "learning_rate": 0.0001, "loss": 1.5238, "step": 3104 }, { "epoch": 0.4985549132947977, "grad_norm": 0.2778969407081604, "learning_rate": 0.0001, "loss": 1.4491, "step": 3105 }, { "epoch": 0.4987154784842646, "grad_norm": 0.24570304155349731, "learning_rate": 0.0001, "loss": 1.5025, "step": 3106 }, { "epoch": 0.49887604367373156, "grad_norm": 0.24787116050720215, "learning_rate": 0.0001, "loss": 1.5433, "step": 3107 }, { "epoch": 0.49903660886319845, "grad_norm": 0.28059741854667664, "learning_rate": 0.0001, "loss": 1.4792, "step": 3108 }, { "epoch": 0.4991971740526654, "grad_norm": 0.24759264290332794, "learning_rate": 0.0001, "loss": 1.5283, "step": 3109 }, { "epoch": 0.4993577392421323, "grad_norm": 0.2525692582130432, "learning_rate": 0.0001, "loss": 1.5304, "step": 3110 }, { "epoch": 0.4995183044315992, "grad_norm": 0.2865564525127411, "learning_rate": 0.0001, "loss": 1.5287, "step": 3111 }, { "epoch": 0.49967886962106617, "grad_norm": 0.2567136585712433, "learning_rate": 0.0001, "loss": 1.5395, "step": 3112 }, { "epoch": 0.49983943481053306, "grad_norm": 0.24678722023963928, "learning_rate": 0.0001, "loss": 1.481, "step": 3113 }, { "epoch": 0.5, "grad_norm": 0.2739090919494629, "learning_rate": 0.0001, "loss": 1.5642, "step": 3114 }, { "epoch": 0.5001605651894669, "grad_norm": 0.2589796781539917, "learning_rate": 0.0001, "loss": 1.4718, "step": 3115 }, { "epoch": 0.5003211303789339, "grad_norm": 0.2633964419364929, "learning_rate": 0.0001, "loss": 1.4272, "step": 3116 }, { "epoch": 0.5004816955684007, "grad_norm": 0.2695305049419403, "learning_rate": 0.0001, "loss": 1.593, "step": 3117 }, { "epoch": 0.5006422607578677, "grad_norm": 0.29884904623031616, "learning_rate": 0.0001, "loss": 1.5662, "step": 3118 }, { "epoch": 0.5008028259473346, "grad_norm": 0.26133275032043457, "learning_rate": 0.0001, "loss": 1.5149, "step": 3119 }, { "epoch": 0.5009633911368016, "grad_norm": 0.25415998697280884, "learning_rate": 0.0001, "loss": 1.5085, "step": 3120 }, { "epoch": 0.5011239563262685, "grad_norm": 0.23989804089069366, "learning_rate": 0.0001, "loss": 1.4415, "step": 3121 }, { "epoch": 0.5012845215157354, "grad_norm": 0.25352632999420166, "learning_rate": 0.0001, "loss": 1.518, "step": 3122 }, { "epoch": 0.5014450867052023, "grad_norm": 0.245565265417099, "learning_rate": 0.0001, "loss": 1.4381, "step": 3123 }, { "epoch": 0.5016056518946692, "grad_norm": 0.25735533237457275, "learning_rate": 0.0001, "loss": 1.5262, "step": 3124 }, { "epoch": 0.5017662170841362, "grad_norm": 0.2700367867946625, "learning_rate": 0.0001, "loss": 1.5335, "step": 3125 }, { "epoch": 0.5019267822736031, "grad_norm": 0.2597363293170929, "learning_rate": 0.0001, "loss": 1.5915, "step": 3126 }, { "epoch": 0.50208734746307, "grad_norm": 0.246708482503891, "learning_rate": 0.0001, "loss": 1.566, "step": 3127 }, { "epoch": 0.5022479126525369, "grad_norm": 0.2523314356803894, "learning_rate": 0.0001, "loss": 1.5981, "step": 3128 }, { "epoch": 0.5024084778420038, "grad_norm": 0.26154765486717224, "learning_rate": 0.0001, "loss": 1.5659, "step": 3129 }, { "epoch": 0.5025690430314708, "grad_norm": 0.27075454592704773, "learning_rate": 0.0001, "loss": 1.5471, "step": 3130 }, { "epoch": 0.5027296082209377, "grad_norm": 0.2411806732416153, "learning_rate": 0.0001, "loss": 1.4735, "step": 3131 }, { "epoch": 0.5028901734104047, "grad_norm": 0.2575620710849762, "learning_rate": 0.0001, "loss": 1.5578, "step": 3132 }, { "epoch": 0.5030507385998716, "grad_norm": 0.2657145857810974, "learning_rate": 0.0001, "loss": 1.5552, "step": 3133 }, { "epoch": 0.5032113037893384, "grad_norm": 0.2549566626548767, "learning_rate": 0.0001, "loss": 1.5077, "step": 3134 }, { "epoch": 0.5033718689788054, "grad_norm": 0.2419261783361435, "learning_rate": 0.0001, "loss": 1.4125, "step": 3135 }, { "epoch": 0.5035324341682723, "grad_norm": 0.2513786256313324, "learning_rate": 0.0001, "loss": 1.5263, "step": 3136 }, { "epoch": 0.5036929993577393, "grad_norm": 0.47951874136924744, "learning_rate": 0.0001, "loss": 1.5759, "step": 3137 }, { "epoch": 0.5038535645472062, "grad_norm": 0.2813974916934967, "learning_rate": 0.0001, "loss": 1.5249, "step": 3138 }, { "epoch": 0.5040141297366731, "grad_norm": 0.2584213614463806, "learning_rate": 0.0001, "loss": 1.4307, "step": 3139 }, { "epoch": 0.50417469492614, "grad_norm": 0.2642471194267273, "learning_rate": 0.0001, "loss": 1.4891, "step": 3140 }, { "epoch": 0.5043352601156069, "grad_norm": 0.2796146869659424, "learning_rate": 0.0001, "loss": 1.5579, "step": 3141 }, { "epoch": 0.5044958253050739, "grad_norm": 0.27717068791389465, "learning_rate": 0.0001, "loss": 1.5323, "step": 3142 }, { "epoch": 0.5046563904945408, "grad_norm": 0.26738211512565613, "learning_rate": 0.0001, "loss": 1.5566, "step": 3143 }, { "epoch": 0.5048169556840078, "grad_norm": 0.2563300132751465, "learning_rate": 0.0001, "loss": 1.5415, "step": 3144 }, { "epoch": 0.5049775208734746, "grad_norm": 0.261578232049942, "learning_rate": 0.0001, "loss": 1.5141, "step": 3145 }, { "epoch": 0.5051380860629415, "grad_norm": 0.24926215410232544, "learning_rate": 0.0001, "loss": 1.4775, "step": 3146 }, { "epoch": 0.5052986512524085, "grad_norm": 0.26072317361831665, "learning_rate": 0.0001, "loss": 1.5386, "step": 3147 }, { "epoch": 0.5054592164418754, "grad_norm": 0.26383572816848755, "learning_rate": 0.0001, "loss": 1.5988, "step": 3148 }, { "epoch": 0.5056197816313424, "grad_norm": 0.2610394358634949, "learning_rate": 0.0001, "loss": 1.5487, "step": 3149 }, { "epoch": 0.5057803468208093, "grad_norm": 0.25946468114852905, "learning_rate": 0.0001, "loss": 1.6254, "step": 3150 }, { "epoch": 0.5059409120102761, "grad_norm": 0.2548661530017853, "learning_rate": 0.0001, "loss": 1.5542, "step": 3151 }, { "epoch": 0.5061014771997431, "grad_norm": 0.2439573109149933, "learning_rate": 0.0001, "loss": 1.499, "step": 3152 }, { "epoch": 0.50626204238921, "grad_norm": 0.2605626583099365, "learning_rate": 0.0001, "loss": 1.4585, "step": 3153 }, { "epoch": 0.506422607578677, "grad_norm": 0.25361889600753784, "learning_rate": 0.0001, "loss": 1.5259, "step": 3154 }, { "epoch": 0.5065831727681439, "grad_norm": 0.26460692286491394, "learning_rate": 0.0001, "loss": 1.5612, "step": 3155 }, { "epoch": 0.5067437379576107, "grad_norm": 0.26998046040534973, "learning_rate": 0.0001, "loss": 1.5473, "step": 3156 }, { "epoch": 0.5069043031470777, "grad_norm": 0.2783830165863037, "learning_rate": 0.0001, "loss": 1.5556, "step": 3157 }, { "epoch": 0.5070648683365446, "grad_norm": 0.2519178092479706, "learning_rate": 0.0001, "loss": 1.5053, "step": 3158 }, { "epoch": 0.5072254335260116, "grad_norm": 0.2751830816268921, "learning_rate": 0.0001, "loss": 1.6179, "step": 3159 }, { "epoch": 0.5073859987154785, "grad_norm": 0.2519207000732422, "learning_rate": 0.0001, "loss": 1.4305, "step": 3160 }, { "epoch": 0.5075465639049455, "grad_norm": 0.2452932894229889, "learning_rate": 0.0001, "loss": 1.4406, "step": 3161 }, { "epoch": 0.5077071290944123, "grad_norm": 0.24579176306724548, "learning_rate": 0.0001, "loss": 1.4543, "step": 3162 }, { "epoch": 0.5078676942838792, "grad_norm": 0.24985000491142273, "learning_rate": 0.0001, "loss": 1.5073, "step": 3163 }, { "epoch": 0.5080282594733462, "grad_norm": 0.26153042912483215, "learning_rate": 0.0001, "loss": 1.4945, "step": 3164 }, { "epoch": 0.5081888246628131, "grad_norm": 0.2678162753582001, "learning_rate": 0.0001, "loss": 1.4675, "step": 3165 }, { "epoch": 0.5083493898522801, "grad_norm": 0.2512088716030121, "learning_rate": 0.0001, "loss": 1.5458, "step": 3166 }, { "epoch": 0.5085099550417469, "grad_norm": 0.25522008538246155, "learning_rate": 0.0001, "loss": 1.5105, "step": 3167 }, { "epoch": 0.5086705202312138, "grad_norm": 0.26836958527565, "learning_rate": 0.0001, "loss": 1.5729, "step": 3168 }, { "epoch": 0.5088310854206808, "grad_norm": 0.2670387625694275, "learning_rate": 0.0001, "loss": 1.5368, "step": 3169 }, { "epoch": 0.5089916506101477, "grad_norm": 0.2495080679655075, "learning_rate": 0.0001, "loss": 1.5713, "step": 3170 }, { "epoch": 0.5091522157996147, "grad_norm": 0.26266956329345703, "learning_rate": 0.0001, "loss": 1.5738, "step": 3171 }, { "epoch": 0.5093127809890816, "grad_norm": 0.2562830448150635, "learning_rate": 0.0001, "loss": 1.5904, "step": 3172 }, { "epoch": 0.5094733461785484, "grad_norm": 0.24958689510822296, "learning_rate": 0.0001, "loss": 1.4577, "step": 3173 }, { "epoch": 0.5096339113680154, "grad_norm": 0.24641390144824982, "learning_rate": 0.0001, "loss": 1.5625, "step": 3174 }, { "epoch": 0.5097944765574823, "grad_norm": 0.2717821002006531, "learning_rate": 0.0001, "loss": 1.499, "step": 3175 }, { "epoch": 0.5099550417469493, "grad_norm": 0.28674250841140747, "learning_rate": 0.0001, "loss": 1.545, "step": 3176 }, { "epoch": 0.5101156069364162, "grad_norm": 0.2587761878967285, "learning_rate": 0.0001, "loss": 1.5364, "step": 3177 }, { "epoch": 0.510276172125883, "grad_norm": 0.27704596519470215, "learning_rate": 0.0001, "loss": 1.5522, "step": 3178 }, { "epoch": 0.51043673731535, "grad_norm": 0.26586246490478516, "learning_rate": 0.0001, "loss": 1.5379, "step": 3179 }, { "epoch": 0.5105973025048169, "grad_norm": 0.2559185028076172, "learning_rate": 0.0001, "loss": 1.5399, "step": 3180 }, { "epoch": 0.5107578676942839, "grad_norm": 0.26495662331581116, "learning_rate": 0.0001, "loss": 1.5068, "step": 3181 }, { "epoch": 0.5109184328837508, "grad_norm": 0.2635836899280548, "learning_rate": 0.0001, "loss": 1.5815, "step": 3182 }, { "epoch": 0.5110789980732178, "grad_norm": 0.27640485763549805, "learning_rate": 0.0001, "loss": 1.5914, "step": 3183 }, { "epoch": 0.5112395632626846, "grad_norm": 0.25381994247436523, "learning_rate": 0.0001, "loss": 1.5241, "step": 3184 }, { "epoch": 0.5114001284521515, "grad_norm": 0.2735559046268463, "learning_rate": 0.0001, "loss": 1.6167, "step": 3185 }, { "epoch": 0.5115606936416185, "grad_norm": 0.2589644193649292, "learning_rate": 0.0001, "loss": 1.5265, "step": 3186 }, { "epoch": 0.5117212588310854, "grad_norm": 0.2958916425704956, "learning_rate": 0.0001, "loss": 1.5123, "step": 3187 }, { "epoch": 0.5118818240205524, "grad_norm": 0.2542179524898529, "learning_rate": 0.0001, "loss": 1.5355, "step": 3188 }, { "epoch": 0.5120423892100193, "grad_norm": 0.26672956347465515, "learning_rate": 0.0001, "loss": 1.5225, "step": 3189 }, { "epoch": 0.5122029543994862, "grad_norm": 0.2564774453639984, "learning_rate": 0.0001, "loss": 1.4778, "step": 3190 }, { "epoch": 0.5123635195889531, "grad_norm": 0.2652626633644104, "learning_rate": 0.0001, "loss": 1.525, "step": 3191 }, { "epoch": 0.51252408477842, "grad_norm": 0.24786992371082306, "learning_rate": 0.0001, "loss": 1.4826, "step": 3192 }, { "epoch": 0.512684649967887, "grad_norm": 0.2644241154193878, "learning_rate": 0.0001, "loss": 1.47, "step": 3193 }, { "epoch": 0.5128452151573539, "grad_norm": 0.2515641152858734, "learning_rate": 0.0001, "loss": 1.4372, "step": 3194 }, { "epoch": 0.5130057803468208, "grad_norm": 0.25682637095451355, "learning_rate": 0.0001, "loss": 1.449, "step": 3195 }, { "epoch": 0.5131663455362877, "grad_norm": 0.2886198163032532, "learning_rate": 0.0001, "loss": 1.6067, "step": 3196 }, { "epoch": 0.5133269107257546, "grad_norm": 0.2510821521282196, "learning_rate": 0.0001, "loss": 1.4527, "step": 3197 }, { "epoch": 0.5134874759152216, "grad_norm": 0.25297778844833374, "learning_rate": 0.0001, "loss": 1.5021, "step": 3198 }, { "epoch": 0.5136480411046885, "grad_norm": 0.2658636271953583, "learning_rate": 0.0001, "loss": 1.5454, "step": 3199 }, { "epoch": 0.5138086062941555, "grad_norm": 0.24878738820552826, "learning_rate": 0.0001, "loss": 1.491, "step": 3200 }, { "epoch": 0.5139691714836223, "grad_norm": 0.27713683247566223, "learning_rate": 0.0001, "loss": 1.4908, "step": 3201 }, { "epoch": 0.5141297366730893, "grad_norm": 0.26166701316833496, "learning_rate": 0.0001, "loss": 1.5481, "step": 3202 }, { "epoch": 0.5142903018625562, "grad_norm": 0.2589658796787262, "learning_rate": 0.0001, "loss": 1.5556, "step": 3203 }, { "epoch": 0.5144508670520231, "grad_norm": 0.2689337432384491, "learning_rate": 0.0001, "loss": 1.5803, "step": 3204 }, { "epoch": 0.5146114322414901, "grad_norm": 0.2718493342399597, "learning_rate": 0.0001, "loss": 1.5135, "step": 3205 }, { "epoch": 0.5147719974309569, "grad_norm": 0.2672770619392395, "learning_rate": 0.0001, "loss": 1.534, "step": 3206 }, { "epoch": 0.5149325626204239, "grad_norm": 0.25379711389541626, "learning_rate": 0.0001, "loss": 1.5178, "step": 3207 }, { "epoch": 0.5150931278098908, "grad_norm": 0.24569812417030334, "learning_rate": 0.0001, "loss": 1.4503, "step": 3208 }, { "epoch": 0.5152536929993577, "grad_norm": 0.259088933467865, "learning_rate": 0.0001, "loss": 1.5196, "step": 3209 }, { "epoch": 0.5154142581888247, "grad_norm": 0.2550090551376343, "learning_rate": 0.0001, "loss": 1.4543, "step": 3210 }, { "epoch": 0.5155748233782916, "grad_norm": 0.2598012089729309, "learning_rate": 0.0001, "loss": 1.4943, "step": 3211 }, { "epoch": 0.5157353885677585, "grad_norm": 0.2577979266643524, "learning_rate": 0.0001, "loss": 1.4927, "step": 3212 }, { "epoch": 0.5158959537572254, "grad_norm": 0.25564226508140564, "learning_rate": 0.0001, "loss": 1.4854, "step": 3213 }, { "epoch": 0.5160565189466924, "grad_norm": 0.25783076882362366, "learning_rate": 0.0001, "loss": 1.535, "step": 3214 }, { "epoch": 0.5162170841361593, "grad_norm": 0.23981228470802307, "learning_rate": 0.0001, "loss": 1.4536, "step": 3215 }, { "epoch": 0.5163776493256262, "grad_norm": 0.28006672859191895, "learning_rate": 0.0001, "loss": 1.489, "step": 3216 }, { "epoch": 0.5165382145150931, "grad_norm": 0.2745996415615082, "learning_rate": 0.0001, "loss": 1.5246, "step": 3217 }, { "epoch": 0.51669877970456, "grad_norm": 0.24766501784324646, "learning_rate": 0.0001, "loss": 1.4865, "step": 3218 }, { "epoch": 0.516859344894027, "grad_norm": 0.24522282183170319, "learning_rate": 0.0001, "loss": 1.4773, "step": 3219 }, { "epoch": 0.5170199100834939, "grad_norm": 0.26986628770828247, "learning_rate": 0.0001, "loss": 1.5138, "step": 3220 }, { "epoch": 0.5171804752729608, "grad_norm": 0.2706376910209656, "learning_rate": 0.0001, "loss": 1.5568, "step": 3221 }, { "epoch": 0.5173410404624278, "grad_norm": 0.27984869480133057, "learning_rate": 0.0001, "loss": 1.5308, "step": 3222 }, { "epoch": 0.5175016056518946, "grad_norm": 0.2790790796279907, "learning_rate": 0.0001, "loss": 1.5512, "step": 3223 }, { "epoch": 0.5176621708413616, "grad_norm": 0.2629150450229645, "learning_rate": 0.0001, "loss": 1.5213, "step": 3224 }, { "epoch": 0.5178227360308285, "grad_norm": 0.2627519965171814, "learning_rate": 0.0001, "loss": 1.4908, "step": 3225 }, { "epoch": 0.5179833012202955, "grad_norm": 0.2655399441719055, "learning_rate": 0.0001, "loss": 1.5327, "step": 3226 }, { "epoch": 0.5181438664097624, "grad_norm": 0.25235995650291443, "learning_rate": 0.0001, "loss": 1.4728, "step": 3227 }, { "epoch": 0.5183044315992292, "grad_norm": 0.2675730586051941, "learning_rate": 0.0001, "loss": 1.5385, "step": 3228 }, { "epoch": 0.5184649967886962, "grad_norm": 0.2632604241371155, "learning_rate": 0.0001, "loss": 1.4902, "step": 3229 }, { "epoch": 0.5186255619781631, "grad_norm": 0.2656300663948059, "learning_rate": 0.0001, "loss": 1.5251, "step": 3230 }, { "epoch": 0.5187861271676301, "grad_norm": 0.2696046531200409, "learning_rate": 0.0001, "loss": 1.533, "step": 3231 }, { "epoch": 0.518946692357097, "grad_norm": 0.2575431168079376, "learning_rate": 0.0001, "loss": 1.4346, "step": 3232 }, { "epoch": 0.519107257546564, "grad_norm": 0.2561548352241516, "learning_rate": 0.0001, "loss": 1.5137, "step": 3233 }, { "epoch": 0.5192678227360308, "grad_norm": 0.25344690680503845, "learning_rate": 0.0001, "loss": 1.473, "step": 3234 }, { "epoch": 0.5194283879254977, "grad_norm": 0.2702086269855499, "learning_rate": 0.0001, "loss": 1.5989, "step": 3235 }, { "epoch": 0.5195889531149647, "grad_norm": 0.26273682713508606, "learning_rate": 0.0001, "loss": 1.4993, "step": 3236 }, { "epoch": 0.5197495183044316, "grad_norm": 0.2502880096435547, "learning_rate": 0.0001, "loss": 1.5172, "step": 3237 }, { "epoch": 0.5199100834938986, "grad_norm": 0.2783799171447754, "learning_rate": 0.0001, "loss": 1.5978, "step": 3238 }, { "epoch": 0.5200706486833655, "grad_norm": 0.26468271017074585, "learning_rate": 0.0001, "loss": 1.478, "step": 3239 }, { "epoch": 0.5202312138728323, "grad_norm": 0.24852678179740906, "learning_rate": 0.0001, "loss": 1.5157, "step": 3240 }, { "epoch": 0.5203917790622993, "grad_norm": 0.254946231842041, "learning_rate": 0.0001, "loss": 1.4678, "step": 3241 }, { "epoch": 0.5205523442517662, "grad_norm": 0.24661694467067719, "learning_rate": 0.0001, "loss": 1.4429, "step": 3242 }, { "epoch": 0.5207129094412332, "grad_norm": 0.2673511803150177, "learning_rate": 0.0001, "loss": 1.5049, "step": 3243 }, { "epoch": 0.5208734746307001, "grad_norm": 0.2501184940338135, "learning_rate": 0.0001, "loss": 1.5195, "step": 3244 }, { "epoch": 0.5210340398201669, "grad_norm": 0.32538872957229614, "learning_rate": 0.0001, "loss": 1.6076, "step": 3245 }, { "epoch": 0.5211946050096339, "grad_norm": 0.25452202558517456, "learning_rate": 0.0001, "loss": 1.5015, "step": 3246 }, { "epoch": 0.5213551701991008, "grad_norm": 0.24114397168159485, "learning_rate": 0.0001, "loss": 1.4725, "step": 3247 }, { "epoch": 0.5215157353885678, "grad_norm": 0.2677294611930847, "learning_rate": 0.0001, "loss": 1.4785, "step": 3248 }, { "epoch": 0.5216763005780347, "grad_norm": 0.2708767354488373, "learning_rate": 0.0001, "loss": 1.5621, "step": 3249 }, { "epoch": 0.5218368657675017, "grad_norm": 0.2655431032180786, "learning_rate": 0.0001, "loss": 1.5123, "step": 3250 }, { "epoch": 0.5219974309569685, "grad_norm": 0.25391721725463867, "learning_rate": 0.0001, "loss": 1.4559, "step": 3251 }, { "epoch": 0.5221579961464354, "grad_norm": 0.2556287944316864, "learning_rate": 0.0001, "loss": 1.4503, "step": 3252 }, { "epoch": 0.5223185613359024, "grad_norm": 0.281301349401474, "learning_rate": 0.0001, "loss": 1.5309, "step": 3253 }, { "epoch": 0.5224791265253693, "grad_norm": 0.26665544509887695, "learning_rate": 0.0001, "loss": 1.5339, "step": 3254 }, { "epoch": 0.5226396917148363, "grad_norm": 0.24834078550338745, "learning_rate": 0.0001, "loss": 1.4675, "step": 3255 }, { "epoch": 0.5228002569043031, "grad_norm": 0.2413569688796997, "learning_rate": 0.0001, "loss": 1.3731, "step": 3256 }, { "epoch": 0.52296082209377, "grad_norm": 0.2564075291156769, "learning_rate": 0.0001, "loss": 1.4544, "step": 3257 }, { "epoch": 0.523121387283237, "grad_norm": 0.24661500751972198, "learning_rate": 0.0001, "loss": 1.4722, "step": 3258 }, { "epoch": 0.5232819524727039, "grad_norm": 0.24206063151359558, "learning_rate": 0.0001, "loss": 1.4261, "step": 3259 }, { "epoch": 0.5234425176621709, "grad_norm": 0.27224233746528625, "learning_rate": 0.0001, "loss": 1.5751, "step": 3260 }, { "epoch": 0.5236030828516378, "grad_norm": 0.2451929748058319, "learning_rate": 0.0001, "loss": 1.4378, "step": 3261 }, { "epoch": 0.5237636480411046, "grad_norm": 0.26787272095680237, "learning_rate": 0.0001, "loss": 1.5243, "step": 3262 }, { "epoch": 0.5239242132305716, "grad_norm": 0.25678402185440063, "learning_rate": 0.0001, "loss": 1.5373, "step": 3263 }, { "epoch": 0.5240847784200385, "grad_norm": 0.2668248414993286, "learning_rate": 0.0001, "loss": 1.5171, "step": 3264 }, { "epoch": 0.5242453436095055, "grad_norm": 0.2636510133743286, "learning_rate": 0.0001, "loss": 1.5734, "step": 3265 }, { "epoch": 0.5244059087989724, "grad_norm": 0.2673064172267914, "learning_rate": 0.0001, "loss": 1.5351, "step": 3266 }, { "epoch": 0.5245664739884393, "grad_norm": 0.24744990468025208, "learning_rate": 0.0001, "loss": 1.4201, "step": 3267 }, { "epoch": 0.5247270391779062, "grad_norm": 0.2554832100868225, "learning_rate": 0.0001, "loss": 1.5381, "step": 3268 }, { "epoch": 0.5248876043673731, "grad_norm": 0.25067007541656494, "learning_rate": 0.0001, "loss": 1.4835, "step": 3269 }, { "epoch": 0.5250481695568401, "grad_norm": 0.2619760036468506, "learning_rate": 0.0001, "loss": 1.63, "step": 3270 }, { "epoch": 0.525208734746307, "grad_norm": 0.2762417793273926, "learning_rate": 0.0001, "loss": 1.5615, "step": 3271 }, { "epoch": 0.525369299935774, "grad_norm": 0.2523985207080841, "learning_rate": 0.0001, "loss": 1.4261, "step": 3272 }, { "epoch": 0.5255298651252408, "grad_norm": 0.25158926844596863, "learning_rate": 0.0001, "loss": 1.4379, "step": 3273 }, { "epoch": 0.5256904303147077, "grad_norm": 0.2579212486743927, "learning_rate": 0.0001, "loss": 1.5615, "step": 3274 }, { "epoch": 0.5258509955041747, "grad_norm": 0.25403761863708496, "learning_rate": 0.0001, "loss": 1.5598, "step": 3275 }, { "epoch": 0.5260115606936416, "grad_norm": 0.2513054311275482, "learning_rate": 0.0001, "loss": 1.5315, "step": 3276 }, { "epoch": 0.5261721258831086, "grad_norm": 0.24935093522071838, "learning_rate": 0.0001, "loss": 1.5597, "step": 3277 }, { "epoch": 0.5263326910725755, "grad_norm": 0.2529204189777374, "learning_rate": 0.0001, "loss": 1.5671, "step": 3278 }, { "epoch": 0.5264932562620424, "grad_norm": 0.253094345331192, "learning_rate": 0.0001, "loss": 1.4921, "step": 3279 }, { "epoch": 0.5266538214515093, "grad_norm": 0.26417168974876404, "learning_rate": 0.0001, "loss": 1.5414, "step": 3280 }, { "epoch": 0.5268143866409762, "grad_norm": 0.24993251264095306, "learning_rate": 0.0001, "loss": 1.4082, "step": 3281 }, { "epoch": 0.5269749518304432, "grad_norm": 0.2519819438457489, "learning_rate": 0.0001, "loss": 1.5673, "step": 3282 }, { "epoch": 0.5271355170199101, "grad_norm": 0.27065905928611755, "learning_rate": 0.0001, "loss": 1.5241, "step": 3283 }, { "epoch": 0.527296082209377, "grad_norm": 0.2681165039539337, "learning_rate": 0.0001, "loss": 1.5656, "step": 3284 }, { "epoch": 0.5274566473988439, "grad_norm": 0.26704514026641846, "learning_rate": 0.0001, "loss": 1.4909, "step": 3285 }, { "epoch": 0.5276172125883108, "grad_norm": 0.2553468346595764, "learning_rate": 0.0001, "loss": 1.5596, "step": 3286 }, { "epoch": 0.5277777777777778, "grad_norm": 0.2722179591655731, "learning_rate": 0.0001, "loss": 1.523, "step": 3287 }, { "epoch": 0.5279383429672447, "grad_norm": 0.26517441868782043, "learning_rate": 0.0001, "loss": 1.5728, "step": 3288 }, { "epoch": 0.5280989081567117, "grad_norm": 0.3017769455909729, "learning_rate": 0.0001, "loss": 1.5826, "step": 3289 }, { "epoch": 0.5282594733461785, "grad_norm": 0.25480830669403076, "learning_rate": 0.0001, "loss": 1.5278, "step": 3290 }, { "epoch": 0.5284200385356455, "grad_norm": 0.25403088331222534, "learning_rate": 0.0001, "loss": 1.5433, "step": 3291 }, { "epoch": 0.5285806037251124, "grad_norm": 0.2752486765384674, "learning_rate": 0.0001, "loss": 1.533, "step": 3292 }, { "epoch": 0.5287411689145793, "grad_norm": 0.2497430443763733, "learning_rate": 0.0001, "loss": 1.4959, "step": 3293 }, { "epoch": 0.5289017341040463, "grad_norm": 0.2565641701221466, "learning_rate": 0.0001, "loss": 1.5025, "step": 3294 }, { "epoch": 0.5290622992935131, "grad_norm": 0.2425820529460907, "learning_rate": 0.0001, "loss": 1.5128, "step": 3295 }, { "epoch": 0.5292228644829801, "grad_norm": 0.23783937096595764, "learning_rate": 0.0001, "loss": 1.4898, "step": 3296 }, { "epoch": 0.529383429672447, "grad_norm": 0.2435399293899536, "learning_rate": 0.0001, "loss": 1.4828, "step": 3297 }, { "epoch": 0.529543994861914, "grad_norm": 0.2555021047592163, "learning_rate": 0.0001, "loss": 1.4554, "step": 3298 }, { "epoch": 0.5297045600513809, "grad_norm": 0.2563108801841736, "learning_rate": 0.0001, "loss": 1.5174, "step": 3299 }, { "epoch": 0.5298651252408478, "grad_norm": 0.24869240820407867, "learning_rate": 0.0001, "loss": 1.5288, "step": 3300 }, { "epoch": 0.5300256904303147, "grad_norm": 0.2502351999282837, "learning_rate": 0.0001, "loss": 1.5042, "step": 3301 }, { "epoch": 0.5301862556197816, "grad_norm": 0.2555674910545349, "learning_rate": 0.0001, "loss": 1.4952, "step": 3302 }, { "epoch": 0.5303468208092486, "grad_norm": 0.2545374929904938, "learning_rate": 0.0001, "loss": 1.5459, "step": 3303 }, { "epoch": 0.5305073859987155, "grad_norm": 0.2690221667289734, "learning_rate": 0.0001, "loss": 1.5089, "step": 3304 }, { "epoch": 0.5306679511881824, "grad_norm": 0.24777981638908386, "learning_rate": 0.0001, "loss": 1.487, "step": 3305 }, { "epoch": 0.5308285163776493, "grad_norm": 0.2689094841480255, "learning_rate": 0.0001, "loss": 1.5838, "step": 3306 }, { "epoch": 0.5309890815671162, "grad_norm": 0.41664373874664307, "learning_rate": 0.0001, "loss": 1.4959, "step": 3307 }, { "epoch": 0.5311496467565832, "grad_norm": 0.25525885820388794, "learning_rate": 0.0001, "loss": 1.5469, "step": 3308 }, { "epoch": 0.5313102119460501, "grad_norm": 0.2596135139465332, "learning_rate": 0.0001, "loss": 1.506, "step": 3309 }, { "epoch": 0.531470777135517, "grad_norm": 0.25523683428764343, "learning_rate": 0.0001, "loss": 1.525, "step": 3310 }, { "epoch": 0.531631342324984, "grad_norm": 0.23948585987091064, "learning_rate": 0.0001, "loss": 1.3896, "step": 3311 }, { "epoch": 0.5317919075144508, "grad_norm": 0.25522205233573914, "learning_rate": 0.0001, "loss": 1.4894, "step": 3312 }, { "epoch": 0.5319524727039178, "grad_norm": 0.2610253691673279, "learning_rate": 0.0001, "loss": 1.523, "step": 3313 }, { "epoch": 0.5321130378933847, "grad_norm": 0.2702656090259552, "learning_rate": 0.0001, "loss": 1.5766, "step": 3314 }, { "epoch": 0.5322736030828517, "grad_norm": 0.25961098074913025, "learning_rate": 0.0001, "loss": 1.5311, "step": 3315 }, { "epoch": 0.5324341682723186, "grad_norm": 0.2641583979129791, "learning_rate": 0.0001, "loss": 1.4961, "step": 3316 }, { "epoch": 0.5325947334617854, "grad_norm": 0.25887444615364075, "learning_rate": 0.0001, "loss": 1.4627, "step": 3317 }, { "epoch": 0.5327552986512524, "grad_norm": 0.2602037489414215, "learning_rate": 0.0001, "loss": 1.5018, "step": 3318 }, { "epoch": 0.5329158638407193, "grad_norm": 0.2509191930294037, "learning_rate": 0.0001, "loss": 1.5104, "step": 3319 }, { "epoch": 0.5330764290301863, "grad_norm": 0.25888267159461975, "learning_rate": 0.0001, "loss": 1.5678, "step": 3320 }, { "epoch": 0.5332369942196532, "grad_norm": 0.27837073802948, "learning_rate": 0.0001, "loss": 1.4706, "step": 3321 }, { "epoch": 0.5333975594091201, "grad_norm": 0.28042373061180115, "learning_rate": 0.0001, "loss": 1.589, "step": 3322 }, { "epoch": 0.533558124598587, "grad_norm": 0.2434910088777542, "learning_rate": 0.0001, "loss": 1.5362, "step": 3323 }, { "epoch": 0.5337186897880539, "grad_norm": 0.2693328857421875, "learning_rate": 0.0001, "loss": 1.4727, "step": 3324 }, { "epoch": 0.5338792549775209, "grad_norm": 0.25696882605552673, "learning_rate": 0.0001, "loss": 1.487, "step": 3325 }, { "epoch": 0.5340398201669878, "grad_norm": 0.25650927424430847, "learning_rate": 0.0001, "loss": 1.4711, "step": 3326 }, { "epoch": 0.5342003853564548, "grad_norm": 0.2530691623687744, "learning_rate": 0.0001, "loss": 1.5132, "step": 3327 }, { "epoch": 0.5343609505459217, "grad_norm": 0.25183364748954773, "learning_rate": 0.0001, "loss": 1.5159, "step": 3328 }, { "epoch": 0.5345215157353885, "grad_norm": 0.24335777759552002, "learning_rate": 0.0001, "loss": 1.4891, "step": 3329 }, { "epoch": 0.5346820809248555, "grad_norm": 0.25641438364982605, "learning_rate": 0.0001, "loss": 1.4974, "step": 3330 }, { "epoch": 0.5348426461143224, "grad_norm": 0.25543588399887085, "learning_rate": 0.0001, "loss": 1.614, "step": 3331 }, { "epoch": 0.5350032113037894, "grad_norm": 0.23903098702430725, "learning_rate": 0.0001, "loss": 1.4576, "step": 3332 }, { "epoch": 0.5351637764932563, "grad_norm": 0.25110888481140137, "learning_rate": 0.0001, "loss": 1.5597, "step": 3333 }, { "epoch": 0.5353243416827231, "grad_norm": 0.24475426971912384, "learning_rate": 0.0001, "loss": 1.4571, "step": 3334 }, { "epoch": 0.5354849068721901, "grad_norm": 0.2556982934474945, "learning_rate": 0.0001, "loss": 1.5103, "step": 3335 }, { "epoch": 0.535645472061657, "grad_norm": 0.2632392942905426, "learning_rate": 0.0001, "loss": 1.5196, "step": 3336 }, { "epoch": 0.535806037251124, "grad_norm": 0.27056071162223816, "learning_rate": 0.0001, "loss": 1.5133, "step": 3337 }, { "epoch": 0.5359666024405909, "grad_norm": 0.2630089223384857, "learning_rate": 0.0001, "loss": 1.5428, "step": 3338 }, { "epoch": 0.5361271676300579, "grad_norm": 0.2578960061073303, "learning_rate": 0.0001, "loss": 1.4556, "step": 3339 }, { "epoch": 0.5362877328195247, "grad_norm": 0.2513989806175232, "learning_rate": 0.0001, "loss": 1.4638, "step": 3340 }, { "epoch": 0.5364482980089916, "grad_norm": 0.26084649562835693, "learning_rate": 0.0001, "loss": 1.5662, "step": 3341 }, { "epoch": 0.5366088631984586, "grad_norm": 0.25264763832092285, "learning_rate": 0.0001, "loss": 1.4682, "step": 3342 }, { "epoch": 0.5367694283879255, "grad_norm": 0.2598627209663391, "learning_rate": 0.0001, "loss": 1.4693, "step": 3343 }, { "epoch": 0.5369299935773925, "grad_norm": 0.24570077657699585, "learning_rate": 0.0001, "loss": 1.5695, "step": 3344 }, { "epoch": 0.5370905587668593, "grad_norm": 0.2574644982814789, "learning_rate": 0.0001, "loss": 1.5303, "step": 3345 }, { "epoch": 0.5372511239563262, "grad_norm": 0.27043068408966064, "learning_rate": 0.0001, "loss": 1.5588, "step": 3346 }, { "epoch": 0.5374116891457932, "grad_norm": 0.2574154734611511, "learning_rate": 0.0001, "loss": 1.5076, "step": 3347 }, { "epoch": 0.5375722543352601, "grad_norm": 0.24641282856464386, "learning_rate": 0.0001, "loss": 1.5372, "step": 3348 }, { "epoch": 0.5377328195247271, "grad_norm": 0.2506469488143921, "learning_rate": 0.0001, "loss": 1.3844, "step": 3349 }, { "epoch": 0.537893384714194, "grad_norm": 0.26251789927482605, "learning_rate": 0.0001, "loss": 1.451, "step": 3350 }, { "epoch": 0.5380539499036608, "grad_norm": 0.2515432834625244, "learning_rate": 0.0001, "loss": 1.405, "step": 3351 }, { "epoch": 0.5382145150931278, "grad_norm": 0.2543710768222809, "learning_rate": 0.0001, "loss": 1.4897, "step": 3352 }, { "epoch": 0.5383750802825947, "grad_norm": 0.2588856816291809, "learning_rate": 0.0001, "loss": 1.5267, "step": 3353 }, { "epoch": 0.5385356454720617, "grad_norm": 0.2631353735923767, "learning_rate": 0.0001, "loss": 1.4741, "step": 3354 }, { "epoch": 0.5386962106615286, "grad_norm": 0.25693097710609436, "learning_rate": 0.0001, "loss": 1.523, "step": 3355 }, { "epoch": 0.5388567758509955, "grad_norm": 0.25500455498695374, "learning_rate": 0.0001, "loss": 1.4464, "step": 3356 }, { "epoch": 0.5390173410404624, "grad_norm": 0.26337122917175293, "learning_rate": 0.0001, "loss": 1.5926, "step": 3357 }, { "epoch": 0.5391779062299293, "grad_norm": 0.2575652599334717, "learning_rate": 0.0001, "loss": 1.5151, "step": 3358 }, { "epoch": 0.5393384714193963, "grad_norm": 0.25726866722106934, "learning_rate": 0.0001, "loss": 1.456, "step": 3359 }, { "epoch": 0.5394990366088632, "grad_norm": 0.25708848237991333, "learning_rate": 0.0001, "loss": 1.4669, "step": 3360 }, { "epoch": 0.5396596017983302, "grad_norm": 0.2644226551055908, "learning_rate": 0.0001, "loss": 1.5217, "step": 3361 }, { "epoch": 0.539820166987797, "grad_norm": 0.26429903507232666, "learning_rate": 0.0001, "loss": 1.5444, "step": 3362 }, { "epoch": 0.539980732177264, "grad_norm": 0.25670334696769714, "learning_rate": 0.0001, "loss": 1.5248, "step": 3363 }, { "epoch": 0.5401412973667309, "grad_norm": 0.2515736520290375, "learning_rate": 0.0001, "loss": 1.5046, "step": 3364 }, { "epoch": 0.5403018625561978, "grad_norm": 0.26216983795166016, "learning_rate": 0.0001, "loss": 1.6001, "step": 3365 }, { "epoch": 0.5404624277456648, "grad_norm": 0.25578591227531433, "learning_rate": 0.0001, "loss": 1.5838, "step": 3366 }, { "epoch": 0.5406229929351317, "grad_norm": 0.25333574414253235, "learning_rate": 0.0001, "loss": 1.6201, "step": 3367 }, { "epoch": 0.5407835581245986, "grad_norm": 0.25093314051628113, "learning_rate": 0.0001, "loss": 1.5405, "step": 3368 }, { "epoch": 0.5409441233140655, "grad_norm": 0.26136481761932373, "learning_rate": 0.0001, "loss": 1.5464, "step": 3369 }, { "epoch": 0.5411046885035324, "grad_norm": 0.2559725344181061, "learning_rate": 0.0001, "loss": 1.4596, "step": 3370 }, { "epoch": 0.5412652536929994, "grad_norm": 0.24507717788219452, "learning_rate": 0.0001, "loss": 1.5425, "step": 3371 }, { "epoch": 0.5414258188824663, "grad_norm": 0.2500024139881134, "learning_rate": 0.0001, "loss": 1.5271, "step": 3372 }, { "epoch": 0.5415863840719332, "grad_norm": 0.26483073830604553, "learning_rate": 0.0001, "loss": 1.5551, "step": 3373 }, { "epoch": 0.5417469492614001, "grad_norm": 0.2631481885910034, "learning_rate": 0.0001, "loss": 1.5341, "step": 3374 }, { "epoch": 0.541907514450867, "grad_norm": 0.25673601031303406, "learning_rate": 0.0001, "loss": 1.5737, "step": 3375 }, { "epoch": 0.542068079640334, "grad_norm": 0.2587435841560364, "learning_rate": 0.0001, "loss": 1.5832, "step": 3376 }, { "epoch": 0.5422286448298009, "grad_norm": 0.28487128019332886, "learning_rate": 0.0001, "loss": 1.5452, "step": 3377 }, { "epoch": 0.5423892100192679, "grad_norm": 0.24453434348106384, "learning_rate": 0.0001, "loss": 1.4557, "step": 3378 }, { "epoch": 0.5425497752087347, "grad_norm": 0.2701871395111084, "learning_rate": 0.0001, "loss": 1.54, "step": 3379 }, { "epoch": 0.5427103403982017, "grad_norm": 0.25957581400871277, "learning_rate": 0.0001, "loss": 1.5388, "step": 3380 }, { "epoch": 0.5428709055876686, "grad_norm": 0.27372631430625916, "learning_rate": 0.0001, "loss": 1.5081, "step": 3381 }, { "epoch": 0.5430314707771355, "grad_norm": 0.2520945072174072, "learning_rate": 0.0001, "loss": 1.5461, "step": 3382 }, { "epoch": 0.5431920359666025, "grad_norm": 0.25508081912994385, "learning_rate": 0.0001, "loss": 1.4849, "step": 3383 }, { "epoch": 0.5433526011560693, "grad_norm": 0.25025564432144165, "learning_rate": 0.0001, "loss": 1.5892, "step": 3384 }, { "epoch": 0.5435131663455363, "grad_norm": 0.26551491022109985, "learning_rate": 0.0001, "loss": 1.5201, "step": 3385 }, { "epoch": 0.5436737315350032, "grad_norm": 0.2558436393737793, "learning_rate": 0.0001, "loss": 1.4543, "step": 3386 }, { "epoch": 0.5438342967244701, "grad_norm": 0.25744447112083435, "learning_rate": 0.0001, "loss": 1.4921, "step": 3387 }, { "epoch": 0.5439948619139371, "grad_norm": 0.2569776177406311, "learning_rate": 0.0001, "loss": 1.4524, "step": 3388 }, { "epoch": 0.544155427103404, "grad_norm": 0.26785245537757874, "learning_rate": 0.0001, "loss": 1.5942, "step": 3389 }, { "epoch": 0.5443159922928709, "grad_norm": 0.26799994707107544, "learning_rate": 0.0001, "loss": 1.6123, "step": 3390 }, { "epoch": 0.5444765574823378, "grad_norm": 0.27217844128608704, "learning_rate": 0.0001, "loss": 1.5506, "step": 3391 }, { "epoch": 0.5446371226718048, "grad_norm": 0.24782425165176392, "learning_rate": 0.0001, "loss": 1.4203, "step": 3392 }, { "epoch": 0.5447976878612717, "grad_norm": 0.25451624393463135, "learning_rate": 0.0001, "loss": 1.5459, "step": 3393 }, { "epoch": 0.5449582530507386, "grad_norm": 0.3037620186805725, "learning_rate": 0.0001, "loss": 1.5618, "step": 3394 }, { "epoch": 0.5451188182402055, "grad_norm": 0.263279527425766, "learning_rate": 0.0001, "loss": 1.531, "step": 3395 }, { "epoch": 0.5452793834296724, "grad_norm": 0.27302125096321106, "learning_rate": 0.0001, "loss": 1.5634, "step": 3396 }, { "epoch": 0.5454399486191394, "grad_norm": 0.25892695784568787, "learning_rate": 0.0001, "loss": 1.5196, "step": 3397 }, { "epoch": 0.5456005138086063, "grad_norm": 0.25457724928855896, "learning_rate": 0.0001, "loss": 1.4876, "step": 3398 }, { "epoch": 0.5457610789980732, "grad_norm": 0.26115864515304565, "learning_rate": 0.0001, "loss": 1.5297, "step": 3399 }, { "epoch": 0.5459216441875402, "grad_norm": 0.24479591846466064, "learning_rate": 0.0001, "loss": 1.4309, "step": 3400 }, { "epoch": 0.546082209377007, "grad_norm": 0.2485695779323578, "learning_rate": 0.0001, "loss": 1.5039, "step": 3401 }, { "epoch": 0.546242774566474, "grad_norm": 0.25134915113449097, "learning_rate": 0.0001, "loss": 1.5014, "step": 3402 }, { "epoch": 0.5464033397559409, "grad_norm": 0.24269607663154602, "learning_rate": 0.0001, "loss": 1.5118, "step": 3403 }, { "epoch": 0.5465639049454079, "grad_norm": 0.2524523138999939, "learning_rate": 0.0001, "loss": 1.5586, "step": 3404 }, { "epoch": 0.5467244701348748, "grad_norm": 0.256293386220932, "learning_rate": 0.0001, "loss": 1.5002, "step": 3405 }, { "epoch": 0.5468850353243416, "grad_norm": 0.2573462128639221, "learning_rate": 0.0001, "loss": 1.5701, "step": 3406 }, { "epoch": 0.5470456005138086, "grad_norm": 0.2703557014465332, "learning_rate": 0.0001, "loss": 1.5144, "step": 3407 }, { "epoch": 0.5472061657032755, "grad_norm": 0.2504549026489258, "learning_rate": 0.0001, "loss": 1.4945, "step": 3408 }, { "epoch": 0.5473667308927425, "grad_norm": 0.25042614340782166, "learning_rate": 0.0001, "loss": 1.5555, "step": 3409 }, { "epoch": 0.5475272960822094, "grad_norm": 0.282416969537735, "learning_rate": 0.0001, "loss": 1.5334, "step": 3410 }, { "epoch": 0.5476878612716763, "grad_norm": 0.27234387397766113, "learning_rate": 0.0001, "loss": 1.4861, "step": 3411 }, { "epoch": 0.5478484264611432, "grad_norm": 0.25599533319473267, "learning_rate": 0.0001, "loss": 1.571, "step": 3412 }, { "epoch": 0.5480089916506101, "grad_norm": 0.26688531041145325, "learning_rate": 0.0001, "loss": 1.453, "step": 3413 }, { "epoch": 0.5481695568400771, "grad_norm": 0.2721960246562958, "learning_rate": 0.0001, "loss": 1.5289, "step": 3414 }, { "epoch": 0.548330122029544, "grad_norm": 0.24172380566596985, "learning_rate": 0.0001, "loss": 1.4282, "step": 3415 }, { "epoch": 0.548490687219011, "grad_norm": 0.25997138023376465, "learning_rate": 0.0001, "loss": 1.5497, "step": 3416 }, { "epoch": 0.5486512524084779, "grad_norm": 0.2492331862449646, "learning_rate": 0.0001, "loss": 1.5111, "step": 3417 }, { "epoch": 0.5488118175979447, "grad_norm": 0.26237958669662476, "learning_rate": 0.0001, "loss": 1.5827, "step": 3418 }, { "epoch": 0.5489723827874117, "grad_norm": 0.25716063380241394, "learning_rate": 0.0001, "loss": 1.4674, "step": 3419 }, { "epoch": 0.5491329479768786, "grad_norm": 0.25188320875167847, "learning_rate": 0.0001, "loss": 1.522, "step": 3420 }, { "epoch": 0.5492935131663456, "grad_norm": 0.2570692002773285, "learning_rate": 0.0001, "loss": 1.5166, "step": 3421 }, { "epoch": 0.5494540783558125, "grad_norm": 0.2607956826686859, "learning_rate": 0.0001, "loss": 1.5785, "step": 3422 }, { "epoch": 0.5496146435452793, "grad_norm": 0.2597101628780365, "learning_rate": 0.0001, "loss": 1.5615, "step": 3423 }, { "epoch": 0.5497752087347463, "grad_norm": 0.26716485619544983, "learning_rate": 0.0001, "loss": 1.5188, "step": 3424 }, { "epoch": 0.5499357739242132, "grad_norm": 0.25290799140930176, "learning_rate": 0.0001, "loss": 1.5207, "step": 3425 }, { "epoch": 0.5500963391136802, "grad_norm": 0.24632811546325684, "learning_rate": 0.0001, "loss": 1.3944, "step": 3426 }, { "epoch": 0.5502569043031471, "grad_norm": 0.27936041355133057, "learning_rate": 0.0001, "loss": 1.4912, "step": 3427 }, { "epoch": 0.550417469492614, "grad_norm": 0.2731547951698303, "learning_rate": 0.0001, "loss": 1.5761, "step": 3428 }, { "epoch": 0.5505780346820809, "grad_norm": 0.26861652731895447, "learning_rate": 0.0001, "loss": 1.4822, "step": 3429 }, { "epoch": 0.5507385998715478, "grad_norm": 0.2558331787586212, "learning_rate": 0.0001, "loss": 1.5092, "step": 3430 }, { "epoch": 0.5508991650610148, "grad_norm": 0.2508247196674347, "learning_rate": 0.0001, "loss": 1.5146, "step": 3431 }, { "epoch": 0.5510597302504817, "grad_norm": 0.24907223880290985, "learning_rate": 0.0001, "loss": 1.4887, "step": 3432 }, { "epoch": 0.5512202954399487, "grad_norm": 0.2866308391094208, "learning_rate": 0.0001, "loss": 1.5711, "step": 3433 }, { "epoch": 0.5513808606294155, "grad_norm": 0.27295202016830444, "learning_rate": 0.0001, "loss": 1.5576, "step": 3434 }, { "epoch": 0.5515414258188824, "grad_norm": 0.26600003242492676, "learning_rate": 0.0001, "loss": 1.5331, "step": 3435 }, { "epoch": 0.5517019910083494, "grad_norm": 0.26780498027801514, "learning_rate": 0.0001, "loss": 1.527, "step": 3436 }, { "epoch": 0.5518625561978163, "grad_norm": 0.26983973383903503, "learning_rate": 0.0001, "loss": 1.5242, "step": 3437 }, { "epoch": 0.5520231213872833, "grad_norm": 0.2618176341056824, "learning_rate": 0.0001, "loss": 1.6295, "step": 3438 }, { "epoch": 0.5521836865767502, "grad_norm": 0.24899901449680328, "learning_rate": 0.0001, "loss": 1.5081, "step": 3439 }, { "epoch": 0.552344251766217, "grad_norm": 0.2552049160003662, "learning_rate": 0.0001, "loss": 1.516, "step": 3440 }, { "epoch": 0.552504816955684, "grad_norm": 0.25595688819885254, "learning_rate": 0.0001, "loss": 1.5933, "step": 3441 }, { "epoch": 0.5526653821451509, "grad_norm": 0.2632392346858978, "learning_rate": 0.0001, "loss": 1.5124, "step": 3442 }, { "epoch": 0.5528259473346179, "grad_norm": 0.26812928915023804, "learning_rate": 0.0001, "loss": 1.5507, "step": 3443 }, { "epoch": 0.5529865125240848, "grad_norm": 3.5970778465270996, "learning_rate": 0.0001, "loss": 1.5527, "step": 3444 }, { "epoch": 0.5531470777135516, "grad_norm": 0.249466210603714, "learning_rate": 0.0001, "loss": 1.4624, "step": 3445 }, { "epoch": 0.5533076429030186, "grad_norm": 0.2638276517391205, "learning_rate": 0.0001, "loss": 1.4815, "step": 3446 }, { "epoch": 0.5534682080924855, "grad_norm": 0.23985153436660767, "learning_rate": 0.0001, "loss": 1.4399, "step": 3447 }, { "epoch": 0.5536287732819525, "grad_norm": 0.25133323669433594, "learning_rate": 0.0001, "loss": 1.473, "step": 3448 }, { "epoch": 0.5537893384714194, "grad_norm": 0.25573378801345825, "learning_rate": 0.0001, "loss": 1.4564, "step": 3449 }, { "epoch": 0.5539499036608864, "grad_norm": 0.2694820463657379, "learning_rate": 0.0001, "loss": 1.5474, "step": 3450 }, { "epoch": 0.5541104688503532, "grad_norm": 0.2726207375526428, "learning_rate": 0.0001, "loss": 1.6114, "step": 3451 }, { "epoch": 0.5542710340398201, "grad_norm": 0.2577306032180786, "learning_rate": 0.0001, "loss": 1.4994, "step": 3452 }, { "epoch": 0.5544315992292871, "grad_norm": 0.2638789117336273, "learning_rate": 0.0001, "loss": 1.5959, "step": 3453 }, { "epoch": 0.554592164418754, "grad_norm": 0.2736872434616089, "learning_rate": 0.0001, "loss": 1.5324, "step": 3454 }, { "epoch": 0.554752729608221, "grad_norm": 0.24926017224788666, "learning_rate": 0.0001, "loss": 1.4686, "step": 3455 }, { "epoch": 0.5549132947976878, "grad_norm": 0.23989923298358917, "learning_rate": 0.0001, "loss": 1.5231, "step": 3456 }, { "epoch": 0.5550738599871547, "grad_norm": 0.26691344380378723, "learning_rate": 0.0001, "loss": 1.4484, "step": 3457 }, { "epoch": 0.5552344251766217, "grad_norm": 0.251097708940506, "learning_rate": 0.0001, "loss": 1.4741, "step": 3458 }, { "epoch": 0.5553949903660886, "grad_norm": 0.25313010811805725, "learning_rate": 0.0001, "loss": 1.5321, "step": 3459 }, { "epoch": 0.5555555555555556, "grad_norm": 0.2603638172149658, "learning_rate": 0.0001, "loss": 1.565, "step": 3460 }, { "epoch": 0.5557161207450225, "grad_norm": 0.2619098126888275, "learning_rate": 0.0001, "loss": 1.5085, "step": 3461 }, { "epoch": 0.5558766859344894, "grad_norm": 0.26237279176712036, "learning_rate": 0.0001, "loss": 1.5434, "step": 3462 }, { "epoch": 0.5560372511239563, "grad_norm": 0.2590477764606476, "learning_rate": 0.0001, "loss": 1.5638, "step": 3463 }, { "epoch": 0.5561978163134232, "grad_norm": 0.2514772117137909, "learning_rate": 0.0001, "loss": 1.4436, "step": 3464 }, { "epoch": 0.5563583815028902, "grad_norm": 0.27150002121925354, "learning_rate": 0.0001, "loss": 1.5903, "step": 3465 }, { "epoch": 0.5565189466923571, "grad_norm": 0.2557409405708313, "learning_rate": 0.0001, "loss": 1.5477, "step": 3466 }, { "epoch": 0.5566795118818241, "grad_norm": 0.2467564195394516, "learning_rate": 0.0001, "loss": 1.452, "step": 3467 }, { "epoch": 0.5568400770712909, "grad_norm": 0.24928201735019684, "learning_rate": 0.0001, "loss": 1.492, "step": 3468 }, { "epoch": 0.5570006422607579, "grad_norm": 0.26520484685897827, "learning_rate": 0.0001, "loss": 1.5598, "step": 3469 }, { "epoch": 0.5571612074502248, "grad_norm": 0.2560626268386841, "learning_rate": 0.0001, "loss": 1.5434, "step": 3470 }, { "epoch": 0.5573217726396917, "grad_norm": 0.25910642743110657, "learning_rate": 0.0001, "loss": 1.6065, "step": 3471 }, { "epoch": 0.5574823378291587, "grad_norm": 0.2590223252773285, "learning_rate": 0.0001, "loss": 1.5274, "step": 3472 }, { "epoch": 0.5576429030186255, "grad_norm": 0.26514342427253723, "learning_rate": 0.0001, "loss": 1.5426, "step": 3473 }, { "epoch": 0.5578034682080925, "grad_norm": 0.253199964761734, "learning_rate": 0.0001, "loss": 1.4766, "step": 3474 }, { "epoch": 0.5579640333975594, "grad_norm": 0.24293287098407745, "learning_rate": 0.0001, "loss": 1.4573, "step": 3475 }, { "epoch": 0.5581245985870263, "grad_norm": 0.262442946434021, "learning_rate": 0.0001, "loss": 1.5558, "step": 3476 }, { "epoch": 0.5582851637764933, "grad_norm": 0.2672119736671448, "learning_rate": 0.0001, "loss": 1.5199, "step": 3477 }, { "epoch": 0.5584457289659602, "grad_norm": 0.2569417953491211, "learning_rate": 0.0001, "loss": 1.4778, "step": 3478 }, { "epoch": 0.5586062941554271, "grad_norm": 0.2576897144317627, "learning_rate": 0.0001, "loss": 1.4639, "step": 3479 }, { "epoch": 0.558766859344894, "grad_norm": 0.2620057463645935, "learning_rate": 0.0001, "loss": 1.5513, "step": 3480 }, { "epoch": 0.558927424534361, "grad_norm": 0.267472505569458, "learning_rate": 0.0001, "loss": 1.5409, "step": 3481 }, { "epoch": 0.5590879897238279, "grad_norm": 0.2704296410083771, "learning_rate": 0.0001, "loss": 1.5411, "step": 3482 }, { "epoch": 0.5592485549132948, "grad_norm": 0.2500203549861908, "learning_rate": 0.0001, "loss": 1.5201, "step": 3483 }, { "epoch": 0.5594091201027617, "grad_norm": 0.24683894217014313, "learning_rate": 0.0001, "loss": 1.4956, "step": 3484 }, { "epoch": 0.5595696852922286, "grad_norm": 0.2554965913295746, "learning_rate": 0.0001, "loss": 1.468, "step": 3485 }, { "epoch": 0.5597302504816956, "grad_norm": 0.26204511523246765, "learning_rate": 0.0001, "loss": 1.4225, "step": 3486 }, { "epoch": 0.5598908156711625, "grad_norm": 0.25450706481933594, "learning_rate": 0.0001, "loss": 1.5129, "step": 3487 }, { "epoch": 0.5600513808606294, "grad_norm": 0.2704123556613922, "learning_rate": 0.0001, "loss": 1.5706, "step": 3488 }, { "epoch": 0.5602119460500964, "grad_norm": 0.2498333752155304, "learning_rate": 0.0001, "loss": 1.5064, "step": 3489 }, { "epoch": 0.5603725112395632, "grad_norm": 0.2551574409008026, "learning_rate": 0.0001, "loss": 1.4952, "step": 3490 }, { "epoch": 0.5605330764290302, "grad_norm": 0.25675302743911743, "learning_rate": 0.0001, "loss": 1.5415, "step": 3491 }, { "epoch": 0.5606936416184971, "grad_norm": 0.2501094341278076, "learning_rate": 0.0001, "loss": 1.4979, "step": 3492 }, { "epoch": 0.560854206807964, "grad_norm": 0.24903394281864166, "learning_rate": 0.0001, "loss": 1.5462, "step": 3493 }, { "epoch": 0.561014771997431, "grad_norm": 0.24453727900981903, "learning_rate": 0.0001, "loss": 1.4863, "step": 3494 }, { "epoch": 0.5611753371868978, "grad_norm": 0.26887333393096924, "learning_rate": 0.0001, "loss": 1.449, "step": 3495 }, { "epoch": 0.5613359023763648, "grad_norm": 0.2653504014015198, "learning_rate": 0.0001, "loss": 1.6348, "step": 3496 }, { "epoch": 0.5614964675658317, "grad_norm": 0.2897603511810303, "learning_rate": 0.0001, "loss": 1.5136, "step": 3497 }, { "epoch": 0.5616570327552987, "grad_norm": 0.2508017420768738, "learning_rate": 0.0001, "loss": 1.4904, "step": 3498 }, { "epoch": 0.5618175979447656, "grad_norm": 0.2683207094669342, "learning_rate": 0.0001, "loss": 1.5295, "step": 3499 }, { "epoch": 0.5619781631342325, "grad_norm": 0.2680720388889313, "learning_rate": 0.0001, "loss": 1.5504, "step": 3500 }, { "epoch": 0.5621387283236994, "grad_norm": 0.24851787090301514, "learning_rate": 0.0001, "loss": 1.4618, "step": 3501 }, { "epoch": 0.5622992935131663, "grad_norm": 0.27713721990585327, "learning_rate": 0.0001, "loss": 1.5878, "step": 3502 }, { "epoch": 0.5624598587026333, "grad_norm": 0.2540768086910248, "learning_rate": 0.0001, "loss": 1.4579, "step": 3503 }, { "epoch": 0.5626204238921002, "grad_norm": 0.26961416006088257, "learning_rate": 0.0001, "loss": 1.5374, "step": 3504 }, { "epoch": 0.5627809890815672, "grad_norm": 0.2740367650985718, "learning_rate": 0.0001, "loss": 1.5216, "step": 3505 }, { "epoch": 0.5629415542710341, "grad_norm": 0.26434916257858276, "learning_rate": 0.0001, "loss": 1.593, "step": 3506 }, { "epoch": 0.5631021194605009, "grad_norm": 0.25455477833747864, "learning_rate": 0.0001, "loss": 1.473, "step": 3507 }, { "epoch": 0.5632626846499679, "grad_norm": 0.27761974930763245, "learning_rate": 0.0001, "loss": 1.4984, "step": 3508 }, { "epoch": 0.5634232498394348, "grad_norm": 0.2448902279138565, "learning_rate": 0.0001, "loss": 1.4101, "step": 3509 }, { "epoch": 0.5635838150289018, "grad_norm": 0.3218258023262024, "learning_rate": 0.0001, "loss": 1.3785, "step": 3510 }, { "epoch": 0.5637443802183687, "grad_norm": 0.2583906650543213, "learning_rate": 0.0001, "loss": 1.5542, "step": 3511 }, { "epoch": 0.5639049454078355, "grad_norm": 0.2744387686252594, "learning_rate": 0.0001, "loss": 1.5306, "step": 3512 }, { "epoch": 0.5640655105973025, "grad_norm": 0.2542562484741211, "learning_rate": 0.0001, "loss": 1.5573, "step": 3513 }, { "epoch": 0.5642260757867694, "grad_norm": 0.27191752195358276, "learning_rate": 0.0001, "loss": 1.5455, "step": 3514 }, { "epoch": 0.5643866409762364, "grad_norm": 0.2717880308628082, "learning_rate": 0.0001, "loss": 1.4949, "step": 3515 }, { "epoch": 0.5645472061657033, "grad_norm": 0.2628335654735565, "learning_rate": 0.0001, "loss": 1.4825, "step": 3516 }, { "epoch": 0.5647077713551703, "grad_norm": 0.2621069550514221, "learning_rate": 0.0001, "loss": 1.6146, "step": 3517 }, { "epoch": 0.5648683365446371, "grad_norm": 0.28826311230659485, "learning_rate": 0.0001, "loss": 1.5447, "step": 3518 }, { "epoch": 0.565028901734104, "grad_norm": 0.2646731436252594, "learning_rate": 0.0001, "loss": 1.5986, "step": 3519 }, { "epoch": 0.565189466923571, "grad_norm": 0.2607734501361847, "learning_rate": 0.0001, "loss": 1.4956, "step": 3520 }, { "epoch": 0.5653500321130379, "grad_norm": 0.25392746925354004, "learning_rate": 0.0001, "loss": 1.4504, "step": 3521 }, { "epoch": 0.5655105973025049, "grad_norm": 0.2602335810661316, "learning_rate": 0.0001, "loss": 1.4503, "step": 3522 }, { "epoch": 0.5656711624919717, "grad_norm": 0.2631988525390625, "learning_rate": 0.0001, "loss": 1.4503, "step": 3523 }, { "epoch": 0.5658317276814386, "grad_norm": 0.27291157841682434, "learning_rate": 0.0001, "loss": 1.5659, "step": 3524 }, { "epoch": 0.5659922928709056, "grad_norm": 0.25426504015922546, "learning_rate": 0.0001, "loss": 1.5112, "step": 3525 }, { "epoch": 0.5661528580603725, "grad_norm": 0.2615494430065155, "learning_rate": 0.0001, "loss": 1.4765, "step": 3526 }, { "epoch": 0.5663134232498395, "grad_norm": 0.25024959444999695, "learning_rate": 0.0001, "loss": 1.4416, "step": 3527 }, { "epoch": 0.5664739884393064, "grad_norm": 0.2591741979122162, "learning_rate": 0.0001, "loss": 1.4866, "step": 3528 }, { "epoch": 0.5666345536287732, "grad_norm": 0.26453763246536255, "learning_rate": 0.0001, "loss": 1.5254, "step": 3529 }, { "epoch": 0.5667951188182402, "grad_norm": 0.24958562850952148, "learning_rate": 0.0001, "loss": 1.4718, "step": 3530 }, { "epoch": 0.5669556840077071, "grad_norm": 0.2647601068019867, "learning_rate": 0.0001, "loss": 1.4356, "step": 3531 }, { "epoch": 0.5671162491971741, "grad_norm": 0.28071776032447815, "learning_rate": 0.0001, "loss": 1.4949, "step": 3532 }, { "epoch": 0.567276814386641, "grad_norm": 0.25910112261772156, "learning_rate": 0.0001, "loss": 1.4674, "step": 3533 }, { "epoch": 0.5674373795761078, "grad_norm": 0.2666882276535034, "learning_rate": 0.0001, "loss": 1.5594, "step": 3534 }, { "epoch": 0.5675979447655748, "grad_norm": 0.27435198426246643, "learning_rate": 0.0001, "loss": 1.4877, "step": 3535 }, { "epoch": 0.5677585099550417, "grad_norm": 0.2550223469734192, "learning_rate": 0.0001, "loss": 1.4289, "step": 3536 }, { "epoch": 0.5679190751445087, "grad_norm": 0.2811153531074524, "learning_rate": 0.0001, "loss": 1.5937, "step": 3537 }, { "epoch": 0.5680796403339756, "grad_norm": 0.27584466338157654, "learning_rate": 0.0001, "loss": 1.508, "step": 3538 }, { "epoch": 0.5682402055234426, "grad_norm": 0.2671346962451935, "learning_rate": 0.0001, "loss": 1.5133, "step": 3539 }, { "epoch": 0.5684007707129094, "grad_norm": 0.26532262563705444, "learning_rate": 0.0001, "loss": 1.5286, "step": 3540 }, { "epoch": 0.5685613359023763, "grad_norm": 0.2532363831996918, "learning_rate": 0.0001, "loss": 1.5986, "step": 3541 }, { "epoch": 0.5687219010918433, "grad_norm": 0.2529861032962799, "learning_rate": 0.0001, "loss": 1.5459, "step": 3542 }, { "epoch": 0.5688824662813102, "grad_norm": 0.26175493001937866, "learning_rate": 0.0001, "loss": 1.6012, "step": 3543 }, { "epoch": 0.5690430314707772, "grad_norm": 0.25633907318115234, "learning_rate": 0.0001, "loss": 1.5074, "step": 3544 }, { "epoch": 0.569203596660244, "grad_norm": 0.2499510496854782, "learning_rate": 0.0001, "loss": 1.4577, "step": 3545 }, { "epoch": 0.569364161849711, "grad_norm": 0.269633024930954, "learning_rate": 0.0001, "loss": 1.5499, "step": 3546 }, { "epoch": 0.5695247270391779, "grad_norm": 25.48210906982422, "learning_rate": 0.0001, "loss": 1.6888, "step": 3547 }, { "epoch": 0.5696852922286448, "grad_norm": 0.29812732338905334, "learning_rate": 0.0001, "loss": 1.4714, "step": 3548 }, { "epoch": 0.5698458574181118, "grad_norm": 0.30050480365753174, "learning_rate": 0.0001, "loss": 1.5308, "step": 3549 }, { "epoch": 0.5700064226075787, "grad_norm": 0.30164986848831177, "learning_rate": 0.0001, "loss": 1.5135, "step": 3550 }, { "epoch": 0.5701669877970456, "grad_norm": 0.29812827706336975, "learning_rate": 0.0001, "loss": 1.6057, "step": 3551 }, { "epoch": 0.5703275529865125, "grad_norm": 0.3121807873249054, "learning_rate": 0.0001, "loss": 1.4885, "step": 3552 }, { "epoch": 0.5704881181759794, "grad_norm": 0.28077369928359985, "learning_rate": 0.0001, "loss": 1.4681, "step": 3553 }, { "epoch": 0.5706486833654464, "grad_norm": 0.27972835302352905, "learning_rate": 0.0001, "loss": 1.4775, "step": 3554 }, { "epoch": 0.5708092485549133, "grad_norm": 0.2922600507736206, "learning_rate": 0.0001, "loss": 1.446, "step": 3555 }, { "epoch": 0.5709698137443803, "grad_norm": 0.27353715896606445, "learning_rate": 0.0001, "loss": 1.5218, "step": 3556 }, { "epoch": 0.5711303789338471, "grad_norm": 0.2593901455402374, "learning_rate": 0.0001, "loss": 1.4743, "step": 3557 }, { "epoch": 0.571290944123314, "grad_norm": 0.2923367917537689, "learning_rate": 0.0001, "loss": 1.5646, "step": 3558 }, { "epoch": 0.571451509312781, "grad_norm": 0.34674420952796936, "learning_rate": 0.0001, "loss": 1.5701, "step": 3559 }, { "epoch": 0.5716120745022479, "grad_norm": 0.3112606108188629, "learning_rate": 0.0001, "loss": 1.4606, "step": 3560 }, { "epoch": 0.5717726396917149, "grad_norm": 0.2782287299633026, "learning_rate": 0.0001, "loss": 1.4974, "step": 3561 }, { "epoch": 0.5719332048811817, "grad_norm": 0.2519095838069916, "learning_rate": 0.0001, "loss": 1.5007, "step": 3562 }, { "epoch": 0.5720937700706487, "grad_norm": 0.24846331775188446, "learning_rate": 0.0001, "loss": 1.4191, "step": 3563 }, { "epoch": 0.5722543352601156, "grad_norm": 0.2899773418903351, "learning_rate": 0.0001, "loss": 1.5408, "step": 3564 }, { "epoch": 0.5724149004495825, "grad_norm": 0.25818878412246704, "learning_rate": 0.0001, "loss": 1.5233, "step": 3565 }, { "epoch": 0.5725754656390495, "grad_norm": 0.30965518951416016, "learning_rate": 0.0001, "loss": 1.48, "step": 3566 }, { "epoch": 0.5727360308285164, "grad_norm": 0.26094964146614075, "learning_rate": 0.0001, "loss": 1.536, "step": 3567 }, { "epoch": 0.5728965960179833, "grad_norm": 0.2866499722003937, "learning_rate": 0.0001, "loss": 1.5216, "step": 3568 }, { "epoch": 0.5730571612074502, "grad_norm": 0.28775933384895325, "learning_rate": 0.0001, "loss": 1.5449, "step": 3569 }, { "epoch": 0.5732177263969171, "grad_norm": 0.2771734893321991, "learning_rate": 0.0001, "loss": 1.4515, "step": 3570 }, { "epoch": 0.5733782915863841, "grad_norm": 0.2711351215839386, "learning_rate": 0.0001, "loss": 1.4363, "step": 3571 }, { "epoch": 0.573538856775851, "grad_norm": 0.2581285238265991, "learning_rate": 0.0001, "loss": 1.4205, "step": 3572 }, { "epoch": 0.5736994219653179, "grad_norm": 0.27857813239097595, "learning_rate": 0.0001, "loss": 1.5103, "step": 3573 }, { "epoch": 0.5738599871547848, "grad_norm": 0.2940189242362976, "learning_rate": 0.0001, "loss": 1.4527, "step": 3574 }, { "epoch": 0.5740205523442518, "grad_norm": 0.2706471085548401, "learning_rate": 0.0001, "loss": 1.5351, "step": 3575 }, { "epoch": 0.5741811175337187, "grad_norm": 0.26701080799102783, "learning_rate": 0.0001, "loss": 1.4732, "step": 3576 }, { "epoch": 0.5743416827231856, "grad_norm": 0.27279427647590637, "learning_rate": 0.0001, "loss": 1.4732, "step": 3577 }, { "epoch": 0.5745022479126526, "grad_norm": 0.3203839659690857, "learning_rate": 0.0001, "loss": 1.4648, "step": 3578 }, { "epoch": 0.5746628131021194, "grad_norm": 0.25967317819595337, "learning_rate": 0.0001, "loss": 1.5407, "step": 3579 }, { "epoch": 0.5748233782915864, "grad_norm": 0.2614003121852875, "learning_rate": 0.0001, "loss": 1.5071, "step": 3580 }, { "epoch": 0.5749839434810533, "grad_norm": 0.2658310830593109, "learning_rate": 0.0001, "loss": 1.4692, "step": 3581 }, { "epoch": 0.5751445086705202, "grad_norm": 0.2863481938838959, "learning_rate": 0.0001, "loss": 1.4733, "step": 3582 }, { "epoch": 0.5753050738599872, "grad_norm": 0.2631807327270508, "learning_rate": 0.0001, "loss": 1.471, "step": 3583 }, { "epoch": 0.575465639049454, "grad_norm": 0.24985304474830627, "learning_rate": 0.0001, "loss": 1.5507, "step": 3584 }, { "epoch": 0.575626204238921, "grad_norm": 0.26440948247909546, "learning_rate": 0.0001, "loss": 1.5232, "step": 3585 }, { "epoch": 0.5757867694283879, "grad_norm": 0.2813529372215271, "learning_rate": 0.0001, "loss": 1.6416, "step": 3586 }, { "epoch": 0.5759473346178549, "grad_norm": 0.27158108353614807, "learning_rate": 0.0001, "loss": 1.4755, "step": 3587 }, { "epoch": 0.5761078998073218, "grad_norm": 0.26029500365257263, "learning_rate": 0.0001, "loss": 1.5513, "step": 3588 }, { "epoch": 0.5762684649967887, "grad_norm": 0.2807159125804901, "learning_rate": 0.0001, "loss": 1.5234, "step": 3589 }, { "epoch": 0.5764290301862556, "grad_norm": 0.25395241379737854, "learning_rate": 0.0001, "loss": 1.4321, "step": 3590 }, { "epoch": 0.5765895953757225, "grad_norm": 0.2527233362197876, "learning_rate": 0.0001, "loss": 1.5193, "step": 3591 }, { "epoch": 0.5767501605651895, "grad_norm": 0.27952840924263, "learning_rate": 0.0001, "loss": 1.4721, "step": 3592 }, { "epoch": 0.5769107257546564, "grad_norm": 0.26070594787597656, "learning_rate": 0.0001, "loss": 1.5733, "step": 3593 }, { "epoch": 0.5770712909441233, "grad_norm": 0.25981488823890686, "learning_rate": 0.0001, "loss": 1.5009, "step": 3594 }, { "epoch": 0.5772318561335903, "grad_norm": 0.2579481303691864, "learning_rate": 0.0001, "loss": 1.5066, "step": 3595 }, { "epoch": 0.5773924213230571, "grad_norm": 0.2575138807296753, "learning_rate": 0.0001, "loss": 1.548, "step": 3596 }, { "epoch": 0.5775529865125241, "grad_norm": 0.2819560766220093, "learning_rate": 0.0001, "loss": 1.4838, "step": 3597 }, { "epoch": 0.577713551701991, "grad_norm": 0.28816813230514526, "learning_rate": 0.0001, "loss": 1.5963, "step": 3598 }, { "epoch": 0.577874116891458, "grad_norm": 0.2554091215133667, "learning_rate": 0.0001, "loss": 1.4762, "step": 3599 }, { "epoch": 0.5780346820809249, "grad_norm": 0.2506262958049774, "learning_rate": 0.0001, "loss": 1.4941, "step": 3600 }, { "epoch": 0.5781952472703917, "grad_norm": 0.27438631653785706, "learning_rate": 0.0001, "loss": 1.4969, "step": 3601 }, { "epoch": 0.5783558124598587, "grad_norm": 0.30739980936050415, "learning_rate": 0.0001, "loss": 1.5721, "step": 3602 }, { "epoch": 0.5785163776493256, "grad_norm": 0.2560059726238251, "learning_rate": 0.0001, "loss": 1.6132, "step": 3603 }, { "epoch": 0.5786769428387926, "grad_norm": 0.26068899035453796, "learning_rate": 0.0001, "loss": 1.429, "step": 3604 }, { "epoch": 0.5788375080282595, "grad_norm": 0.26843971014022827, "learning_rate": 0.0001, "loss": 1.4722, "step": 3605 }, { "epoch": 0.5789980732177264, "grad_norm": 0.2547934353351593, "learning_rate": 0.0001, "loss": 1.4931, "step": 3606 }, { "epoch": 0.5791586384071933, "grad_norm": 0.2655860185623169, "learning_rate": 0.0001, "loss": 1.5597, "step": 3607 }, { "epoch": 0.5793192035966602, "grad_norm": 0.2529540956020355, "learning_rate": 0.0001, "loss": 1.4938, "step": 3608 }, { "epoch": 0.5794797687861272, "grad_norm": 0.2818930447101593, "learning_rate": 0.0001, "loss": 1.554, "step": 3609 }, { "epoch": 0.5796403339755941, "grad_norm": 0.25783148407936096, "learning_rate": 0.0001, "loss": 1.4835, "step": 3610 }, { "epoch": 0.5798008991650611, "grad_norm": 0.26304492354393005, "learning_rate": 0.0001, "loss": 1.4554, "step": 3611 }, { "epoch": 0.5799614643545279, "grad_norm": 0.3086816668510437, "learning_rate": 0.0001, "loss": 1.4979, "step": 3612 }, { "epoch": 0.5801220295439948, "grad_norm": 0.2533327341079712, "learning_rate": 0.0001, "loss": 1.5208, "step": 3613 }, { "epoch": 0.5802825947334618, "grad_norm": 0.2684653103351593, "learning_rate": 0.0001, "loss": 1.6161, "step": 3614 }, { "epoch": 0.5804431599229287, "grad_norm": 0.2483733743429184, "learning_rate": 0.0001, "loss": 1.49, "step": 3615 }, { "epoch": 0.5806037251123957, "grad_norm": 0.26590898633003235, "learning_rate": 0.0001, "loss": 1.4978, "step": 3616 }, { "epoch": 0.5807642903018626, "grad_norm": 0.25449588894844055, "learning_rate": 0.0001, "loss": 1.5236, "step": 3617 }, { "epoch": 0.5809248554913294, "grad_norm": 0.25207141041755676, "learning_rate": 0.0001, "loss": 1.4599, "step": 3618 }, { "epoch": 0.5810854206807964, "grad_norm": 0.2689812481403351, "learning_rate": 0.0001, "loss": 1.5096, "step": 3619 }, { "epoch": 0.5812459858702633, "grad_norm": 0.2550770044326782, "learning_rate": 0.0001, "loss": 1.5115, "step": 3620 }, { "epoch": 0.5814065510597303, "grad_norm": 0.2674384117126465, "learning_rate": 0.0001, "loss": 1.5326, "step": 3621 }, { "epoch": 0.5815671162491972, "grad_norm": 0.2624462842941284, "learning_rate": 0.0001, "loss": 1.5497, "step": 3622 }, { "epoch": 0.581727681438664, "grad_norm": 0.2592432200908661, "learning_rate": 0.0001, "loss": 1.5431, "step": 3623 }, { "epoch": 0.581888246628131, "grad_norm": 0.25049206614494324, "learning_rate": 0.0001, "loss": 1.4347, "step": 3624 }, { "epoch": 0.5820488118175979, "grad_norm": 0.2545790374279022, "learning_rate": 0.0001, "loss": 1.538, "step": 3625 }, { "epoch": 0.5822093770070649, "grad_norm": 0.2624324560165405, "learning_rate": 0.0001, "loss": 1.5579, "step": 3626 }, { "epoch": 0.5823699421965318, "grad_norm": 0.26768431067466736, "learning_rate": 0.0001, "loss": 1.526, "step": 3627 }, { "epoch": 0.5825305073859988, "grad_norm": 0.25618383288383484, "learning_rate": 0.0001, "loss": 1.5025, "step": 3628 }, { "epoch": 0.5826910725754656, "grad_norm": 0.2567412257194519, "learning_rate": 0.0001, "loss": 1.5235, "step": 3629 }, { "epoch": 0.5828516377649325, "grad_norm": 0.2692475914955139, "learning_rate": 0.0001, "loss": 1.5209, "step": 3630 }, { "epoch": 0.5830122029543995, "grad_norm": 0.2622545659542084, "learning_rate": 0.0001, "loss": 1.4982, "step": 3631 }, { "epoch": 0.5831727681438664, "grad_norm": 0.27413806319236755, "learning_rate": 0.0001, "loss": 1.5426, "step": 3632 }, { "epoch": 0.5833333333333334, "grad_norm": 0.2643025815486908, "learning_rate": 0.0001, "loss": 1.5348, "step": 3633 }, { "epoch": 0.5834938985228002, "grad_norm": 0.2692074477672577, "learning_rate": 0.0001, "loss": 1.5558, "step": 3634 }, { "epoch": 0.5836544637122671, "grad_norm": 0.25885412096977234, "learning_rate": 0.0001, "loss": 1.5358, "step": 3635 }, { "epoch": 0.5838150289017341, "grad_norm": 0.25478604435920715, "learning_rate": 0.0001, "loss": 1.5737, "step": 3636 }, { "epoch": 0.583975594091201, "grad_norm": 0.24968627095222473, "learning_rate": 0.0001, "loss": 1.471, "step": 3637 }, { "epoch": 0.584136159280668, "grad_norm": 0.24788379669189453, "learning_rate": 0.0001, "loss": 1.5014, "step": 3638 }, { "epoch": 0.5842967244701349, "grad_norm": 0.2518269121646881, "learning_rate": 0.0001, "loss": 1.5161, "step": 3639 }, { "epoch": 0.5844572896596018, "grad_norm": 0.25715354084968567, "learning_rate": 0.0001, "loss": 1.3978, "step": 3640 }, { "epoch": 0.5846178548490687, "grad_norm": 0.2593884766101837, "learning_rate": 0.0001, "loss": 1.4308, "step": 3641 }, { "epoch": 0.5847784200385356, "grad_norm": 0.27028846740722656, "learning_rate": 0.0001, "loss": 1.5397, "step": 3642 }, { "epoch": 0.5849389852280026, "grad_norm": 0.2675887942314148, "learning_rate": 0.0001, "loss": 1.4675, "step": 3643 }, { "epoch": 0.5850995504174695, "grad_norm": 0.262739360332489, "learning_rate": 0.0001, "loss": 1.4573, "step": 3644 }, { "epoch": 0.5852601156069365, "grad_norm": 0.2562864422798157, "learning_rate": 0.0001, "loss": 1.5833, "step": 3645 }, { "epoch": 0.5854206807964033, "grad_norm": 0.2761216163635254, "learning_rate": 0.0001, "loss": 1.6049, "step": 3646 }, { "epoch": 0.5855812459858702, "grad_norm": 0.26393917202949524, "learning_rate": 0.0001, "loss": 1.5133, "step": 3647 }, { "epoch": 0.5857418111753372, "grad_norm": 0.26334771513938904, "learning_rate": 0.0001, "loss": 1.5213, "step": 3648 }, { "epoch": 0.5859023763648041, "grad_norm": 0.24718475341796875, "learning_rate": 0.0001, "loss": 1.4571, "step": 3649 }, { "epoch": 0.5860629415542711, "grad_norm": 0.27074918150901794, "learning_rate": 0.0001, "loss": 1.5857, "step": 3650 }, { "epoch": 0.5862235067437379, "grad_norm": 0.25103697180747986, "learning_rate": 0.0001, "loss": 1.5143, "step": 3651 }, { "epoch": 0.5863840719332049, "grad_norm": 0.2570626139640808, "learning_rate": 0.0001, "loss": 1.5127, "step": 3652 }, { "epoch": 0.5865446371226718, "grad_norm": 0.2511909306049347, "learning_rate": 0.0001, "loss": 1.5365, "step": 3653 }, { "epoch": 0.5867052023121387, "grad_norm": 0.27300527691841125, "learning_rate": 0.0001, "loss": 1.4084, "step": 3654 }, { "epoch": 0.5868657675016057, "grad_norm": 0.28094717860221863, "learning_rate": 0.0001, "loss": 1.4748, "step": 3655 }, { "epoch": 0.5870263326910726, "grad_norm": 0.2525530159473419, "learning_rate": 0.0001, "loss": 1.4885, "step": 3656 }, { "epoch": 0.5871868978805395, "grad_norm": 0.25379708409309387, "learning_rate": 0.0001, "loss": 1.4142, "step": 3657 }, { "epoch": 0.5873474630700064, "grad_norm": 0.2683212161064148, "learning_rate": 0.0001, "loss": 1.5485, "step": 3658 }, { "epoch": 0.5875080282594733, "grad_norm": 0.25514858961105347, "learning_rate": 0.0001, "loss": 1.524, "step": 3659 }, { "epoch": 0.5876685934489403, "grad_norm": 0.2825925350189209, "learning_rate": 0.0001, "loss": 1.5755, "step": 3660 }, { "epoch": 0.5878291586384072, "grad_norm": 0.26648077368736267, "learning_rate": 0.0001, "loss": 1.5302, "step": 3661 }, { "epoch": 0.5879897238278741, "grad_norm": 0.3023388385772705, "learning_rate": 0.0001, "loss": 1.5183, "step": 3662 }, { "epoch": 0.588150289017341, "grad_norm": 0.2562459707260132, "learning_rate": 0.0001, "loss": 1.4898, "step": 3663 }, { "epoch": 0.588310854206808, "grad_norm": 0.27271050214767456, "learning_rate": 0.0001, "loss": 1.4787, "step": 3664 }, { "epoch": 0.5884714193962749, "grad_norm": 0.2668638825416565, "learning_rate": 0.0001, "loss": 1.5099, "step": 3665 }, { "epoch": 0.5886319845857418, "grad_norm": 0.2641138732433319, "learning_rate": 0.0001, "loss": 1.5078, "step": 3666 }, { "epoch": 0.5887925497752088, "grad_norm": 0.2596166133880615, "learning_rate": 0.0001, "loss": 1.5392, "step": 3667 }, { "epoch": 0.5889531149646756, "grad_norm": 0.2693968117237091, "learning_rate": 0.0001, "loss": 1.5343, "step": 3668 }, { "epoch": 0.5891136801541426, "grad_norm": 0.2611267864704132, "learning_rate": 0.0001, "loss": 1.4792, "step": 3669 }, { "epoch": 0.5892742453436095, "grad_norm": 0.3592695891857147, "learning_rate": 0.0001, "loss": 1.526, "step": 3670 }, { "epoch": 0.5894348105330764, "grad_norm": 0.2588188648223877, "learning_rate": 0.0001, "loss": 1.5063, "step": 3671 }, { "epoch": 0.5895953757225434, "grad_norm": 0.3175065517425537, "learning_rate": 0.0001, "loss": 1.5471, "step": 3672 }, { "epoch": 0.5897559409120102, "grad_norm": 0.2606745660305023, "learning_rate": 0.0001, "loss": 1.5067, "step": 3673 }, { "epoch": 0.5899165061014772, "grad_norm": 0.2673262655735016, "learning_rate": 0.0001, "loss": 1.5013, "step": 3674 }, { "epoch": 0.5900770712909441, "grad_norm": 0.24372580647468567, "learning_rate": 0.0001, "loss": 1.4876, "step": 3675 }, { "epoch": 0.590237636480411, "grad_norm": 0.25752654671669006, "learning_rate": 0.0001, "loss": 1.4871, "step": 3676 }, { "epoch": 0.590398201669878, "grad_norm": 0.25043997168540955, "learning_rate": 0.0001, "loss": 1.4646, "step": 3677 }, { "epoch": 0.5905587668593449, "grad_norm": 0.2570090889930725, "learning_rate": 0.0001, "loss": 1.5276, "step": 3678 }, { "epoch": 0.5907193320488118, "grad_norm": 0.2523142993450165, "learning_rate": 0.0001, "loss": 1.5158, "step": 3679 }, { "epoch": 0.5908798972382787, "grad_norm": 0.27723002433776855, "learning_rate": 0.0001, "loss": 1.5198, "step": 3680 }, { "epoch": 0.5910404624277457, "grad_norm": 0.2671026587486267, "learning_rate": 0.0001, "loss": 1.5323, "step": 3681 }, { "epoch": 0.5912010276172126, "grad_norm": 0.2731771171092987, "learning_rate": 0.0001, "loss": 1.5666, "step": 3682 }, { "epoch": 0.5913615928066795, "grad_norm": 0.2718448042869568, "learning_rate": 0.0001, "loss": 1.5432, "step": 3683 }, { "epoch": 0.5915221579961464, "grad_norm": 0.26460906863212585, "learning_rate": 0.0001, "loss": 1.4593, "step": 3684 }, { "epoch": 0.5916827231856133, "grad_norm": 0.248262420296669, "learning_rate": 0.0001, "loss": 1.4438, "step": 3685 }, { "epoch": 0.5918432883750803, "grad_norm": 0.3607558310031891, "learning_rate": 0.0001, "loss": 1.5388, "step": 3686 }, { "epoch": 0.5920038535645472, "grad_norm": 0.27468305826187134, "learning_rate": 0.0001, "loss": 1.5937, "step": 3687 }, { "epoch": 0.5921644187540142, "grad_norm": 0.26270800828933716, "learning_rate": 0.0001, "loss": 1.5876, "step": 3688 }, { "epoch": 0.5923249839434811, "grad_norm": 0.261650413274765, "learning_rate": 0.0001, "loss": 1.4658, "step": 3689 }, { "epoch": 0.5924855491329479, "grad_norm": 0.2564741373062134, "learning_rate": 0.0001, "loss": 1.5001, "step": 3690 }, { "epoch": 0.5926461143224149, "grad_norm": 0.2611472010612488, "learning_rate": 0.0001, "loss": 1.449, "step": 3691 }, { "epoch": 0.5928066795118818, "grad_norm": 0.25770461559295654, "learning_rate": 0.0001, "loss": 1.4628, "step": 3692 }, { "epoch": 0.5929672447013488, "grad_norm": 0.28395816683769226, "learning_rate": 0.0001, "loss": 1.4928, "step": 3693 }, { "epoch": 0.5931278098908157, "grad_norm": 0.2604933977127075, "learning_rate": 0.0001, "loss": 1.5971, "step": 3694 }, { "epoch": 0.5932883750802826, "grad_norm": 0.2781031131744385, "learning_rate": 0.0001, "loss": 1.5112, "step": 3695 }, { "epoch": 0.5934489402697495, "grad_norm": 0.2766863703727722, "learning_rate": 0.0001, "loss": 1.5517, "step": 3696 }, { "epoch": 0.5936095054592164, "grad_norm": 0.24513736367225647, "learning_rate": 0.0001, "loss": 1.4801, "step": 3697 }, { "epoch": 0.5937700706486834, "grad_norm": 0.2590049207210541, "learning_rate": 0.0001, "loss": 1.5126, "step": 3698 }, { "epoch": 0.5939306358381503, "grad_norm": 2.0267324447631836, "learning_rate": 0.0001, "loss": 1.4867, "step": 3699 }, { "epoch": 0.5940912010276173, "grad_norm": 0.27024027705192566, "learning_rate": 0.0001, "loss": 1.5394, "step": 3700 }, { "epoch": 0.5942517662170841, "grad_norm": 0.2798246443271637, "learning_rate": 0.0001, "loss": 1.5858, "step": 3701 }, { "epoch": 0.594412331406551, "grad_norm": 0.25654903054237366, "learning_rate": 0.0001, "loss": 1.5108, "step": 3702 }, { "epoch": 0.594572896596018, "grad_norm": 0.2551288902759552, "learning_rate": 0.0001, "loss": 1.4992, "step": 3703 }, { "epoch": 0.5947334617854849, "grad_norm": 0.24642886221408844, "learning_rate": 0.0001, "loss": 1.5106, "step": 3704 }, { "epoch": 0.5948940269749519, "grad_norm": 0.2912113070487976, "learning_rate": 0.0001, "loss": 1.509, "step": 3705 }, { "epoch": 0.5950545921644188, "grad_norm": 0.2692941725254059, "learning_rate": 0.0001, "loss": 1.4771, "step": 3706 }, { "epoch": 0.5952151573538856, "grad_norm": 0.267769992351532, "learning_rate": 0.0001, "loss": 1.5248, "step": 3707 }, { "epoch": 0.5953757225433526, "grad_norm": 0.2733960449695587, "learning_rate": 0.0001, "loss": 1.5027, "step": 3708 }, { "epoch": 0.5955362877328195, "grad_norm": 0.2550022304058075, "learning_rate": 0.0001, "loss": 1.5546, "step": 3709 }, { "epoch": 0.5956968529222865, "grad_norm": 0.2601030170917511, "learning_rate": 0.0001, "loss": 1.5325, "step": 3710 }, { "epoch": 0.5958574181117534, "grad_norm": 0.252185195684433, "learning_rate": 0.0001, "loss": 1.5049, "step": 3711 }, { "epoch": 0.5960179833012202, "grad_norm": 0.2696683704853058, "learning_rate": 0.0001, "loss": 1.4869, "step": 3712 }, { "epoch": 0.5961785484906872, "grad_norm": 0.25885897874832153, "learning_rate": 0.0001, "loss": 1.4866, "step": 3713 }, { "epoch": 0.5963391136801541, "grad_norm": 0.26752132177352905, "learning_rate": 0.0001, "loss": 1.5446, "step": 3714 }, { "epoch": 0.5964996788696211, "grad_norm": 0.2513367831707001, "learning_rate": 0.0001, "loss": 1.4674, "step": 3715 }, { "epoch": 0.596660244059088, "grad_norm": 0.25783655047416687, "learning_rate": 0.0001, "loss": 1.4558, "step": 3716 }, { "epoch": 0.596820809248555, "grad_norm": 0.2807951271533966, "learning_rate": 0.0001, "loss": 1.4932, "step": 3717 }, { "epoch": 0.5969813744380218, "grad_norm": 0.25926467776298523, "learning_rate": 0.0001, "loss": 1.572, "step": 3718 }, { "epoch": 0.5971419396274887, "grad_norm": 0.2520737648010254, "learning_rate": 0.0001, "loss": 1.5898, "step": 3719 }, { "epoch": 0.5973025048169557, "grad_norm": 0.24815239012241364, "learning_rate": 0.0001, "loss": 1.5617, "step": 3720 }, { "epoch": 0.5974630700064226, "grad_norm": 0.2779528498649597, "learning_rate": 0.0001, "loss": 1.5554, "step": 3721 }, { "epoch": 0.5976236351958896, "grad_norm": 0.26077648997306824, "learning_rate": 0.0001, "loss": 1.5922, "step": 3722 }, { "epoch": 0.5977842003853564, "grad_norm": 0.2544933557510376, "learning_rate": 0.0001, "loss": 1.5108, "step": 3723 }, { "epoch": 0.5979447655748233, "grad_norm": 0.2520194947719574, "learning_rate": 0.0001, "loss": 1.4948, "step": 3724 }, { "epoch": 0.5981053307642903, "grad_norm": 0.264365553855896, "learning_rate": 0.0001, "loss": 1.4446, "step": 3725 }, { "epoch": 0.5982658959537572, "grad_norm": 0.2670818269252777, "learning_rate": 0.0001, "loss": 1.4515, "step": 3726 }, { "epoch": 0.5984264611432242, "grad_norm": 0.2703360319137573, "learning_rate": 0.0001, "loss": 1.4897, "step": 3727 }, { "epoch": 0.5985870263326911, "grad_norm": 0.27654823660850525, "learning_rate": 0.0001, "loss": 1.4311, "step": 3728 }, { "epoch": 0.598747591522158, "grad_norm": 0.26018691062927246, "learning_rate": 0.0001, "loss": 1.5499, "step": 3729 }, { "epoch": 0.5989081567116249, "grad_norm": 0.26952722668647766, "learning_rate": 0.0001, "loss": 1.5381, "step": 3730 }, { "epoch": 0.5990687219010918, "grad_norm": 0.26965275406837463, "learning_rate": 0.0001, "loss": 1.52, "step": 3731 }, { "epoch": 0.5992292870905588, "grad_norm": 0.2653871178627014, "learning_rate": 0.0001, "loss": 1.5566, "step": 3732 }, { "epoch": 0.5993898522800257, "grad_norm": 0.2541314959526062, "learning_rate": 0.0001, "loss": 1.5139, "step": 3733 }, { "epoch": 0.5995504174694927, "grad_norm": 0.2657106816768646, "learning_rate": 0.0001, "loss": 1.4952, "step": 3734 }, { "epoch": 0.5997109826589595, "grad_norm": 0.25480684638023376, "learning_rate": 0.0001, "loss": 1.5435, "step": 3735 }, { "epoch": 0.5998715478484264, "grad_norm": 0.2721002995967865, "learning_rate": 0.0001, "loss": 1.5409, "step": 3736 }, { "epoch": 0.6000321130378934, "grad_norm": 0.2616802155971527, "learning_rate": 0.0001, "loss": 1.5084, "step": 3737 }, { "epoch": 0.6001926782273603, "grad_norm": 0.26311367750167847, "learning_rate": 0.0001, "loss": 1.5424, "step": 3738 }, { "epoch": 0.6003532434168273, "grad_norm": 0.2507343888282776, "learning_rate": 0.0001, "loss": 1.4231, "step": 3739 }, { "epoch": 0.6005138086062941, "grad_norm": 0.26450052857398987, "learning_rate": 0.0001, "loss": 1.4702, "step": 3740 }, { "epoch": 0.600674373795761, "grad_norm": 0.2669855058193207, "learning_rate": 0.0001, "loss": 1.4722, "step": 3741 }, { "epoch": 0.600834938985228, "grad_norm": 0.2633204460144043, "learning_rate": 0.0001, "loss": 1.5088, "step": 3742 }, { "epoch": 0.6009955041746949, "grad_norm": 0.2775106728076935, "learning_rate": 0.0001, "loss": 1.4919, "step": 3743 }, { "epoch": 0.6011560693641619, "grad_norm": 0.25679922103881836, "learning_rate": 0.0001, "loss": 1.4259, "step": 3744 }, { "epoch": 0.6013166345536288, "grad_norm": 0.2631145119667053, "learning_rate": 0.0001, "loss": 1.5751, "step": 3745 }, { "epoch": 0.6014771997430957, "grad_norm": 0.28631600737571716, "learning_rate": 0.0001, "loss": 1.5594, "step": 3746 }, { "epoch": 0.6016377649325626, "grad_norm": 0.27606093883514404, "learning_rate": 0.0001, "loss": 1.4945, "step": 3747 }, { "epoch": 0.6017983301220295, "grad_norm": 0.28245851397514343, "learning_rate": 0.0001, "loss": 1.4792, "step": 3748 }, { "epoch": 0.6019588953114965, "grad_norm": 0.26298055052757263, "learning_rate": 0.0001, "loss": 1.5375, "step": 3749 }, { "epoch": 0.6021194605009634, "grad_norm": 0.27240198850631714, "learning_rate": 0.0001, "loss": 1.5082, "step": 3750 }, { "epoch": 0.6022800256904303, "grad_norm": 0.25478267669677734, "learning_rate": 0.0001, "loss": 1.5485, "step": 3751 }, { "epoch": 0.6024405908798972, "grad_norm": 0.2596980929374695, "learning_rate": 0.0001, "loss": 1.5212, "step": 3752 }, { "epoch": 0.6026011560693642, "grad_norm": 0.2807830572128296, "learning_rate": 0.0001, "loss": 1.4283, "step": 3753 }, { "epoch": 0.6027617212588311, "grad_norm": 0.2505226135253906, "learning_rate": 0.0001, "loss": 1.4848, "step": 3754 }, { "epoch": 0.602922286448298, "grad_norm": 0.2531312108039856, "learning_rate": 0.0001, "loss": 1.4272, "step": 3755 }, { "epoch": 0.603082851637765, "grad_norm": 0.27030253410339355, "learning_rate": 0.0001, "loss": 1.4415, "step": 3756 }, { "epoch": 0.6032434168272318, "grad_norm": 0.26890572905540466, "learning_rate": 0.0001, "loss": 1.5336, "step": 3757 }, { "epoch": 0.6034039820166988, "grad_norm": 0.28463757038116455, "learning_rate": 0.0001, "loss": 1.4643, "step": 3758 }, { "epoch": 0.6035645472061657, "grad_norm": 0.28352299332618713, "learning_rate": 0.0001, "loss": 1.5305, "step": 3759 }, { "epoch": 0.6037251123956326, "grad_norm": 0.27299073338508606, "learning_rate": 0.0001, "loss": 1.4717, "step": 3760 }, { "epoch": 0.6038856775850996, "grad_norm": 0.28827208280563354, "learning_rate": 0.0001, "loss": 1.4754, "step": 3761 }, { "epoch": 0.6040462427745664, "grad_norm": 0.2631355822086334, "learning_rate": 0.0001, "loss": 1.5013, "step": 3762 }, { "epoch": 0.6042068079640334, "grad_norm": 0.2724611163139343, "learning_rate": 0.0001, "loss": 1.514, "step": 3763 }, { "epoch": 0.6043673731535003, "grad_norm": 0.26698440313339233, "learning_rate": 0.0001, "loss": 1.5561, "step": 3764 }, { "epoch": 0.6045279383429673, "grad_norm": 0.2523908019065857, "learning_rate": 0.0001, "loss": 1.5108, "step": 3765 }, { "epoch": 0.6046885035324342, "grad_norm": 0.28086769580841064, "learning_rate": 0.0001, "loss": 1.554, "step": 3766 }, { "epoch": 0.6048490687219011, "grad_norm": 0.255831778049469, "learning_rate": 0.0001, "loss": 1.4552, "step": 3767 }, { "epoch": 0.605009633911368, "grad_norm": 0.24505458772182465, "learning_rate": 0.0001, "loss": 1.5219, "step": 3768 }, { "epoch": 0.6051701991008349, "grad_norm": 0.2664952278137207, "learning_rate": 0.0001, "loss": 1.5656, "step": 3769 }, { "epoch": 0.6053307642903019, "grad_norm": 0.24774408340454102, "learning_rate": 0.0001, "loss": 1.5415, "step": 3770 }, { "epoch": 0.6054913294797688, "grad_norm": 0.2702226936817169, "learning_rate": 0.0001, "loss": 1.4686, "step": 3771 }, { "epoch": 0.6056518946692357, "grad_norm": 0.25096145272254944, "learning_rate": 0.0001, "loss": 1.4664, "step": 3772 }, { "epoch": 0.6058124598587026, "grad_norm": 0.25611796975135803, "learning_rate": 0.0001, "loss": 1.5748, "step": 3773 }, { "epoch": 0.6059730250481695, "grad_norm": 0.2698650062084198, "learning_rate": 0.0001, "loss": 1.5064, "step": 3774 }, { "epoch": 0.6061335902376365, "grad_norm": 0.26493504643440247, "learning_rate": 0.0001, "loss": 1.5543, "step": 3775 }, { "epoch": 0.6062941554271034, "grad_norm": 0.2588040232658386, "learning_rate": 0.0001, "loss": 1.5167, "step": 3776 }, { "epoch": 0.6064547206165704, "grad_norm": 0.27098220586776733, "learning_rate": 0.0001, "loss": 1.456, "step": 3777 }, { "epoch": 0.6066152858060373, "grad_norm": 0.26216307282447815, "learning_rate": 0.0001, "loss": 1.454, "step": 3778 }, { "epoch": 0.6067758509955041, "grad_norm": 0.2592635452747345, "learning_rate": 0.0001, "loss": 1.4926, "step": 3779 }, { "epoch": 0.6069364161849711, "grad_norm": 0.2716052830219269, "learning_rate": 0.0001, "loss": 1.6326, "step": 3780 }, { "epoch": 0.607096981374438, "grad_norm": 0.2565079629421234, "learning_rate": 0.0001, "loss": 1.444, "step": 3781 }, { "epoch": 0.607257546563905, "grad_norm": 0.2609427869319916, "learning_rate": 0.0001, "loss": 1.5621, "step": 3782 }, { "epoch": 0.6074181117533719, "grad_norm": 0.2507987916469574, "learning_rate": 0.0001, "loss": 1.3649, "step": 3783 }, { "epoch": 0.6075786769428388, "grad_norm": 0.23893386125564575, "learning_rate": 0.0001, "loss": 1.4768, "step": 3784 }, { "epoch": 0.6077392421323057, "grad_norm": 0.2619777321815491, "learning_rate": 0.0001, "loss": 1.5338, "step": 3785 }, { "epoch": 0.6078998073217726, "grad_norm": 0.26731613278388977, "learning_rate": 0.0001, "loss": 1.5302, "step": 3786 }, { "epoch": 0.6080603725112396, "grad_norm": 0.25718340277671814, "learning_rate": 0.0001, "loss": 1.4965, "step": 3787 }, { "epoch": 0.6082209377007065, "grad_norm": 0.25129830837249756, "learning_rate": 0.0001, "loss": 1.4393, "step": 3788 }, { "epoch": 0.6083815028901735, "grad_norm": 0.27007824182510376, "learning_rate": 0.0001, "loss": 1.5635, "step": 3789 }, { "epoch": 0.6085420680796403, "grad_norm": 0.26350027322769165, "learning_rate": 0.0001, "loss": 1.4737, "step": 3790 }, { "epoch": 0.6087026332691072, "grad_norm": 0.2519645690917969, "learning_rate": 0.0001, "loss": 1.4655, "step": 3791 }, { "epoch": 0.6088631984585742, "grad_norm": 0.25365468859672546, "learning_rate": 0.0001, "loss": 1.4729, "step": 3792 }, { "epoch": 0.6090237636480411, "grad_norm": 0.26774725317955017, "learning_rate": 0.0001, "loss": 1.6337, "step": 3793 }, { "epoch": 0.6091843288375081, "grad_norm": 0.25014322996139526, "learning_rate": 0.0001, "loss": 1.4466, "step": 3794 }, { "epoch": 0.609344894026975, "grad_norm": 0.26495805382728577, "learning_rate": 0.0001, "loss": 1.5456, "step": 3795 }, { "epoch": 0.6095054592164418, "grad_norm": 0.2657485604286194, "learning_rate": 0.0001, "loss": 1.5112, "step": 3796 }, { "epoch": 0.6096660244059088, "grad_norm": 0.25781241059303284, "learning_rate": 0.0001, "loss": 1.5289, "step": 3797 }, { "epoch": 0.6098265895953757, "grad_norm": 0.2627398371696472, "learning_rate": 0.0001, "loss": 1.542, "step": 3798 }, { "epoch": 0.6099871547848427, "grad_norm": 0.26378151774406433, "learning_rate": 0.0001, "loss": 1.3881, "step": 3799 }, { "epoch": 0.6101477199743096, "grad_norm": 0.26991933584213257, "learning_rate": 0.0001, "loss": 1.5015, "step": 3800 }, { "epoch": 0.6103082851637764, "grad_norm": 0.2511008083820343, "learning_rate": 0.0001, "loss": 1.4836, "step": 3801 }, { "epoch": 0.6104688503532434, "grad_norm": 0.2760944068431854, "learning_rate": 0.0001, "loss": 1.4911, "step": 3802 }, { "epoch": 0.6106294155427103, "grad_norm": 0.2546599507331848, "learning_rate": 0.0001, "loss": 1.4023, "step": 3803 }, { "epoch": 0.6107899807321773, "grad_norm": 0.2641827464103699, "learning_rate": 0.0001, "loss": 1.5429, "step": 3804 }, { "epoch": 0.6109505459216442, "grad_norm": 0.30749645829200745, "learning_rate": 0.0001, "loss": 1.4374, "step": 3805 }, { "epoch": 0.6111111111111112, "grad_norm": 0.2694147527217865, "learning_rate": 0.0001, "loss": 1.5099, "step": 3806 }, { "epoch": 0.611271676300578, "grad_norm": 0.26679447293281555, "learning_rate": 0.0001, "loss": 1.5684, "step": 3807 }, { "epoch": 0.6114322414900449, "grad_norm": 0.2818480134010315, "learning_rate": 0.0001, "loss": 1.497, "step": 3808 }, { "epoch": 0.6115928066795119, "grad_norm": 0.28491002321243286, "learning_rate": 0.0001, "loss": 1.5879, "step": 3809 }, { "epoch": 0.6117533718689788, "grad_norm": 0.25981539487838745, "learning_rate": 0.0001, "loss": 1.5756, "step": 3810 }, { "epoch": 0.6119139370584458, "grad_norm": 0.26610076427459717, "learning_rate": 0.0001, "loss": 1.5041, "step": 3811 }, { "epoch": 0.6120745022479126, "grad_norm": 0.25697603821754456, "learning_rate": 0.0001, "loss": 1.428, "step": 3812 }, { "epoch": 0.6122350674373795, "grad_norm": 0.26291266083717346, "learning_rate": 0.0001, "loss": 1.5169, "step": 3813 }, { "epoch": 0.6123956326268465, "grad_norm": 0.26881441473960876, "learning_rate": 0.0001, "loss": 1.6022, "step": 3814 }, { "epoch": 0.6125561978163134, "grad_norm": 0.25986117124557495, "learning_rate": 0.0001, "loss": 1.5053, "step": 3815 }, { "epoch": 0.6127167630057804, "grad_norm": 0.2613845467567444, "learning_rate": 0.0001, "loss": 1.5652, "step": 3816 }, { "epoch": 0.6128773281952473, "grad_norm": 0.24931935966014862, "learning_rate": 0.0001, "loss": 1.4748, "step": 3817 }, { "epoch": 0.6130378933847141, "grad_norm": 0.2685846984386444, "learning_rate": 0.0001, "loss": 1.5155, "step": 3818 }, { "epoch": 0.6131984585741811, "grad_norm": 0.3231850564479828, "learning_rate": 0.0001, "loss": 1.5149, "step": 3819 }, { "epoch": 0.613359023763648, "grad_norm": 0.24862191081047058, "learning_rate": 0.0001, "loss": 1.4452, "step": 3820 }, { "epoch": 0.613519588953115, "grad_norm": 0.26581764221191406, "learning_rate": 0.0001, "loss": 1.4571, "step": 3821 }, { "epoch": 0.6136801541425819, "grad_norm": 0.27062392234802246, "learning_rate": 0.0001, "loss": 1.475, "step": 3822 }, { "epoch": 0.6138407193320488, "grad_norm": 0.27251020073890686, "learning_rate": 0.0001, "loss": 1.5969, "step": 3823 }, { "epoch": 0.6140012845215157, "grad_norm": 0.262570321559906, "learning_rate": 0.0001, "loss": 1.4885, "step": 3824 }, { "epoch": 0.6141618497109826, "grad_norm": 0.262735515832901, "learning_rate": 0.0001, "loss": 1.4949, "step": 3825 }, { "epoch": 0.6143224149004496, "grad_norm": 0.26099810004234314, "learning_rate": 0.0001, "loss": 1.567, "step": 3826 }, { "epoch": 0.6144829800899165, "grad_norm": 0.25535207986831665, "learning_rate": 0.0001, "loss": 1.5153, "step": 3827 }, { "epoch": 0.6146435452793835, "grad_norm": 0.2533794045448303, "learning_rate": 0.0001, "loss": 1.4156, "step": 3828 }, { "epoch": 0.6148041104688503, "grad_norm": 0.26601144671440125, "learning_rate": 0.0001, "loss": 1.488, "step": 3829 }, { "epoch": 0.6149646756583173, "grad_norm": 0.2557826340198517, "learning_rate": 0.0001, "loss": 1.4828, "step": 3830 }, { "epoch": 0.6151252408477842, "grad_norm": 0.28637704253196716, "learning_rate": 0.0001, "loss": 1.5266, "step": 3831 }, { "epoch": 0.6152858060372511, "grad_norm": 0.2602996826171875, "learning_rate": 0.0001, "loss": 1.49, "step": 3832 }, { "epoch": 0.6154463712267181, "grad_norm": 0.2608982026576996, "learning_rate": 0.0001, "loss": 1.5477, "step": 3833 }, { "epoch": 0.615606936416185, "grad_norm": 0.2602182626724243, "learning_rate": 0.0001, "loss": 1.5409, "step": 3834 }, { "epoch": 0.6157675016056519, "grad_norm": 0.2599102258682251, "learning_rate": 0.0001, "loss": 1.5139, "step": 3835 }, { "epoch": 0.6159280667951188, "grad_norm": 0.24739331007003784, "learning_rate": 0.0001, "loss": 1.4262, "step": 3836 }, { "epoch": 0.6160886319845857, "grad_norm": 0.25765302777290344, "learning_rate": 0.0001, "loss": 1.5392, "step": 3837 }, { "epoch": 0.6162491971740527, "grad_norm": 0.27270618081092834, "learning_rate": 0.0001, "loss": 1.5475, "step": 3838 }, { "epoch": 0.6164097623635196, "grad_norm": 0.25871002674102783, "learning_rate": 0.0001, "loss": 1.4366, "step": 3839 }, { "epoch": 0.6165703275529865, "grad_norm": 0.24694572389125824, "learning_rate": 0.0001, "loss": 1.4313, "step": 3840 }, { "epoch": 0.6167308927424534, "grad_norm": 0.27282777428627014, "learning_rate": 0.0001, "loss": 1.5298, "step": 3841 }, { "epoch": 0.6168914579319204, "grad_norm": 0.25624415278434753, "learning_rate": 0.0001, "loss": 1.4922, "step": 3842 }, { "epoch": 0.6170520231213873, "grad_norm": 0.25390180945396423, "learning_rate": 0.0001, "loss": 1.5232, "step": 3843 }, { "epoch": 0.6172125883108542, "grad_norm": 0.26919737458229065, "learning_rate": 0.0001, "loss": 1.444, "step": 3844 }, { "epoch": 0.6173731535003212, "grad_norm": 0.2601894438266754, "learning_rate": 0.0001, "loss": 1.5319, "step": 3845 }, { "epoch": 0.617533718689788, "grad_norm": 0.25561174750328064, "learning_rate": 0.0001, "loss": 1.5097, "step": 3846 }, { "epoch": 0.617694283879255, "grad_norm": 0.24631574749946594, "learning_rate": 0.0001, "loss": 1.4197, "step": 3847 }, { "epoch": 0.6178548490687219, "grad_norm": 0.25146305561065674, "learning_rate": 0.0001, "loss": 1.5236, "step": 3848 }, { "epoch": 0.6180154142581888, "grad_norm": 0.2760447561740875, "learning_rate": 0.0001, "loss": 1.5684, "step": 3849 }, { "epoch": 0.6181759794476558, "grad_norm": 0.25651994347572327, "learning_rate": 0.0001, "loss": 1.5364, "step": 3850 }, { "epoch": 0.6183365446371226, "grad_norm": 0.2498057633638382, "learning_rate": 0.0001, "loss": 1.4898, "step": 3851 }, { "epoch": 0.6184971098265896, "grad_norm": 0.2638412415981293, "learning_rate": 0.0001, "loss": 1.5045, "step": 3852 }, { "epoch": 0.6186576750160565, "grad_norm": 0.24950148165225983, "learning_rate": 0.0001, "loss": 1.4867, "step": 3853 }, { "epoch": 0.6188182402055235, "grad_norm": 0.24246224761009216, "learning_rate": 0.0001, "loss": 1.4704, "step": 3854 }, { "epoch": 0.6189788053949904, "grad_norm": 0.24784599244594574, "learning_rate": 0.0001, "loss": 1.4354, "step": 3855 }, { "epoch": 0.6191393705844573, "grad_norm": 0.2517155110836029, "learning_rate": 0.0001, "loss": 1.5132, "step": 3856 }, { "epoch": 0.6192999357739242, "grad_norm": 0.2677137553691864, "learning_rate": 0.0001, "loss": 1.5903, "step": 3857 }, { "epoch": 0.6194605009633911, "grad_norm": 0.2763158082962036, "learning_rate": 0.0001, "loss": 1.5194, "step": 3858 }, { "epoch": 0.6196210661528581, "grad_norm": 0.28667938709259033, "learning_rate": 0.0001, "loss": 1.4989, "step": 3859 }, { "epoch": 0.619781631342325, "grad_norm": 0.2558166980743408, "learning_rate": 0.0001, "loss": 1.5101, "step": 3860 }, { "epoch": 0.619942196531792, "grad_norm": 0.25873079895973206, "learning_rate": 0.0001, "loss": 1.473, "step": 3861 }, { "epoch": 0.6201027617212588, "grad_norm": 0.28501585125923157, "learning_rate": 0.0001, "loss": 1.5765, "step": 3862 }, { "epoch": 0.6202633269107257, "grad_norm": 0.2709580957889557, "learning_rate": 0.0001, "loss": 1.5434, "step": 3863 }, { "epoch": 0.6204238921001927, "grad_norm": 0.31828853487968445, "learning_rate": 0.0001, "loss": 1.4774, "step": 3864 }, { "epoch": 0.6205844572896596, "grad_norm": 0.2685384154319763, "learning_rate": 0.0001, "loss": 1.5567, "step": 3865 }, { "epoch": 0.6207450224791266, "grad_norm": 0.24856871366500854, "learning_rate": 0.0001, "loss": 1.4992, "step": 3866 }, { "epoch": 0.6209055876685935, "grad_norm": 0.2948998212814331, "learning_rate": 0.0001, "loss": 1.5635, "step": 3867 }, { "epoch": 0.6210661528580603, "grad_norm": 0.27943724393844604, "learning_rate": 0.0001, "loss": 1.544, "step": 3868 }, { "epoch": 0.6212267180475273, "grad_norm": 0.2594679892063141, "learning_rate": 0.0001, "loss": 1.5091, "step": 3869 }, { "epoch": 0.6213872832369942, "grad_norm": 0.26492151618003845, "learning_rate": 0.0001, "loss": 1.5173, "step": 3870 }, { "epoch": 0.6215478484264612, "grad_norm": 0.26779136061668396, "learning_rate": 0.0001, "loss": 1.4597, "step": 3871 }, { "epoch": 0.6217084136159281, "grad_norm": 0.2853734791278839, "learning_rate": 0.0001, "loss": 1.5688, "step": 3872 }, { "epoch": 0.621868978805395, "grad_norm": 0.2937188744544983, "learning_rate": 0.0001, "loss": 1.518, "step": 3873 }, { "epoch": 0.6220295439948619, "grad_norm": 0.27518853545188904, "learning_rate": 0.0001, "loss": 1.4543, "step": 3874 }, { "epoch": 0.6221901091843288, "grad_norm": 0.2555117905139923, "learning_rate": 0.0001, "loss": 1.4583, "step": 3875 }, { "epoch": 0.6223506743737958, "grad_norm": 0.2831689119338989, "learning_rate": 0.0001, "loss": 1.4717, "step": 3876 }, { "epoch": 0.6225112395632627, "grad_norm": 0.27565649151802063, "learning_rate": 0.0001, "loss": 1.5472, "step": 3877 }, { "epoch": 0.6226718047527297, "grad_norm": 0.28797581791877747, "learning_rate": 0.0001, "loss": 1.4364, "step": 3878 }, { "epoch": 0.6228323699421965, "grad_norm": 0.255044162273407, "learning_rate": 0.0001, "loss": 1.4476, "step": 3879 }, { "epoch": 0.6229929351316634, "grad_norm": 0.27069318294525146, "learning_rate": 0.0001, "loss": 1.5265, "step": 3880 }, { "epoch": 0.6231535003211304, "grad_norm": 0.26840701699256897, "learning_rate": 0.0001, "loss": 1.4822, "step": 3881 }, { "epoch": 0.6233140655105973, "grad_norm": 0.2857365012168884, "learning_rate": 0.0001, "loss": 1.5621, "step": 3882 }, { "epoch": 0.6234746307000643, "grad_norm": 0.26349303126335144, "learning_rate": 0.0001, "loss": 1.4404, "step": 3883 }, { "epoch": 0.6236351958895312, "grad_norm": 0.25878918170928955, "learning_rate": 0.0001, "loss": 1.4413, "step": 3884 }, { "epoch": 0.623795761078998, "grad_norm": 0.2577318847179413, "learning_rate": 0.0001, "loss": 1.4586, "step": 3885 }, { "epoch": 0.623956326268465, "grad_norm": 0.2663488984107971, "learning_rate": 0.0001, "loss": 1.4783, "step": 3886 }, { "epoch": 0.6241168914579319, "grad_norm": 0.38730761408805847, "learning_rate": 0.0001, "loss": 1.5137, "step": 3887 }, { "epoch": 0.6242774566473989, "grad_norm": 0.2595405578613281, "learning_rate": 0.0001, "loss": 1.5058, "step": 3888 }, { "epoch": 0.6244380218368658, "grad_norm": 0.25100287795066833, "learning_rate": 0.0001, "loss": 1.5188, "step": 3889 }, { "epoch": 0.6245985870263326, "grad_norm": 0.39545443654060364, "learning_rate": 0.0001, "loss": 1.5598, "step": 3890 }, { "epoch": 0.6247591522157996, "grad_norm": 0.2625770568847656, "learning_rate": 0.0001, "loss": 1.4418, "step": 3891 }, { "epoch": 0.6249197174052665, "grad_norm": 0.25303763151168823, "learning_rate": 0.0001, "loss": 1.3447, "step": 3892 }, { "epoch": 0.6250802825947335, "grad_norm": 0.26952648162841797, "learning_rate": 0.0001, "loss": 1.5335, "step": 3893 }, { "epoch": 0.6252408477842004, "grad_norm": 0.2523767650127411, "learning_rate": 0.0001, "loss": 1.4401, "step": 3894 }, { "epoch": 0.6254014129736674, "grad_norm": 0.2689189314842224, "learning_rate": 0.0001, "loss": 1.4875, "step": 3895 }, { "epoch": 0.6255619781631342, "grad_norm": 0.2631898522377014, "learning_rate": 0.0001, "loss": 1.4369, "step": 3896 }, { "epoch": 0.6257225433526011, "grad_norm": 0.28323471546173096, "learning_rate": 0.0001, "loss": 1.5534, "step": 3897 }, { "epoch": 0.6258831085420681, "grad_norm": 0.4033949673175812, "learning_rate": 0.0001, "loss": 1.4326, "step": 3898 }, { "epoch": 0.626043673731535, "grad_norm": 0.24338938295841217, "learning_rate": 0.0001, "loss": 1.3999, "step": 3899 }, { "epoch": 0.626204238921002, "grad_norm": 0.2539018988609314, "learning_rate": 0.0001, "loss": 1.5075, "step": 3900 }, { "epoch": 0.6263648041104688, "grad_norm": 0.2713395357131958, "learning_rate": 0.0001, "loss": 1.493, "step": 3901 }, { "epoch": 0.6265253692999357, "grad_norm": 0.26039496064186096, "learning_rate": 0.0001, "loss": 1.5543, "step": 3902 }, { "epoch": 0.6266859344894027, "grad_norm": 0.25155508518218994, "learning_rate": 0.0001, "loss": 1.4714, "step": 3903 }, { "epoch": 0.6268464996788696, "grad_norm": 0.26837146282196045, "learning_rate": 0.0001, "loss": 1.5416, "step": 3904 }, { "epoch": 0.6270070648683366, "grad_norm": 0.26188722252845764, "learning_rate": 0.0001, "loss": 1.5835, "step": 3905 }, { "epoch": 0.6271676300578035, "grad_norm": 0.27130991220474243, "learning_rate": 0.0001, "loss": 1.525, "step": 3906 }, { "epoch": 0.6273281952472703, "grad_norm": 0.25837934017181396, "learning_rate": 0.0001, "loss": 1.4969, "step": 3907 }, { "epoch": 0.6274887604367373, "grad_norm": 0.26127636432647705, "learning_rate": 0.0001, "loss": 1.6004, "step": 3908 }, { "epoch": 0.6276493256262042, "grad_norm": 0.2639945447444916, "learning_rate": 0.0001, "loss": 1.4771, "step": 3909 }, { "epoch": 0.6278098908156712, "grad_norm": 0.2688761353492737, "learning_rate": 0.0001, "loss": 1.4476, "step": 3910 }, { "epoch": 0.6279704560051381, "grad_norm": 0.25851279497146606, "learning_rate": 0.0001, "loss": 1.4896, "step": 3911 }, { "epoch": 0.628131021194605, "grad_norm": 0.2678278684616089, "learning_rate": 0.0001, "loss": 1.4605, "step": 3912 }, { "epoch": 0.6282915863840719, "grad_norm": 0.2609288692474365, "learning_rate": 0.0001, "loss": 1.4632, "step": 3913 }, { "epoch": 0.6284521515735388, "grad_norm": 0.24951615929603577, "learning_rate": 0.0001, "loss": 1.4165, "step": 3914 }, { "epoch": 0.6286127167630058, "grad_norm": 0.2530744671821594, "learning_rate": 0.0001, "loss": 1.5278, "step": 3915 }, { "epoch": 0.6287732819524727, "grad_norm": 0.2548890709877014, "learning_rate": 0.0001, "loss": 1.5422, "step": 3916 }, { "epoch": 0.6289338471419397, "grad_norm": 0.2568867802619934, "learning_rate": 0.0001, "loss": 1.4971, "step": 3917 }, { "epoch": 0.6290944123314065, "grad_norm": 0.26970985531806946, "learning_rate": 0.0001, "loss": 1.5194, "step": 3918 }, { "epoch": 0.6292549775208734, "grad_norm": 0.2785082161426544, "learning_rate": 0.0001, "loss": 1.4597, "step": 3919 }, { "epoch": 0.6294155427103404, "grad_norm": 0.2592335343360901, "learning_rate": 0.0001, "loss": 1.5084, "step": 3920 }, { "epoch": 0.6295761078998073, "grad_norm": 0.29501408338546753, "learning_rate": 0.0001, "loss": 1.526, "step": 3921 }, { "epoch": 0.6297366730892743, "grad_norm": 0.27205076813697815, "learning_rate": 0.0001, "loss": 1.5303, "step": 3922 }, { "epoch": 0.6298972382787412, "grad_norm": 0.27818426489830017, "learning_rate": 0.0001, "loss": 1.4993, "step": 3923 }, { "epoch": 0.630057803468208, "grad_norm": 0.25199609994888306, "learning_rate": 0.0001, "loss": 1.4921, "step": 3924 }, { "epoch": 0.630218368657675, "grad_norm": 0.259134441614151, "learning_rate": 0.0001, "loss": 1.5125, "step": 3925 }, { "epoch": 0.6303789338471419, "grad_norm": 0.2574685215950012, "learning_rate": 0.0001, "loss": 1.4471, "step": 3926 }, { "epoch": 0.6305394990366089, "grad_norm": 0.2670515477657318, "learning_rate": 0.0001, "loss": 1.5431, "step": 3927 }, { "epoch": 0.6307000642260758, "grad_norm": 0.2595072388648987, "learning_rate": 0.0001, "loss": 1.4905, "step": 3928 }, { "epoch": 0.6308606294155427, "grad_norm": 0.2716316282749176, "learning_rate": 0.0001, "loss": 1.4877, "step": 3929 }, { "epoch": 0.6310211946050096, "grad_norm": 0.26863327622413635, "learning_rate": 0.0001, "loss": 1.4627, "step": 3930 }, { "epoch": 0.6311817597944765, "grad_norm": 0.25725260376930237, "learning_rate": 0.0001, "loss": 1.4557, "step": 3931 }, { "epoch": 0.6313423249839435, "grad_norm": 0.2594561278820038, "learning_rate": 0.0001, "loss": 1.5729, "step": 3932 }, { "epoch": 0.6315028901734104, "grad_norm": 0.2631740868091583, "learning_rate": 0.0001, "loss": 1.4892, "step": 3933 }, { "epoch": 0.6316634553628774, "grad_norm": 0.270300030708313, "learning_rate": 0.0001, "loss": 1.5202, "step": 3934 }, { "epoch": 0.6318240205523442, "grad_norm": 0.2681209146976471, "learning_rate": 0.0001, "loss": 1.5378, "step": 3935 }, { "epoch": 0.6319845857418112, "grad_norm": 0.2748728394508362, "learning_rate": 0.0001, "loss": 1.6086, "step": 3936 }, { "epoch": 0.6321451509312781, "grad_norm": 0.28140565752983093, "learning_rate": 0.0001, "loss": 1.532, "step": 3937 }, { "epoch": 0.632305716120745, "grad_norm": 0.25414136052131653, "learning_rate": 0.0001, "loss": 1.5435, "step": 3938 }, { "epoch": 0.632466281310212, "grad_norm": 0.535088300704956, "learning_rate": 0.0001, "loss": 1.5236, "step": 3939 }, { "epoch": 0.6326268464996788, "grad_norm": 0.25747591257095337, "learning_rate": 0.0001, "loss": 1.406, "step": 3940 }, { "epoch": 0.6327874116891458, "grad_norm": 0.2462492734193802, "learning_rate": 0.0001, "loss": 1.4842, "step": 3941 }, { "epoch": 0.6329479768786127, "grad_norm": 0.2716800570487976, "learning_rate": 0.0001, "loss": 1.465, "step": 3942 }, { "epoch": 0.6331085420680796, "grad_norm": 0.26844802498817444, "learning_rate": 0.0001, "loss": 1.5626, "step": 3943 }, { "epoch": 0.6332691072575466, "grad_norm": 0.25362297892570496, "learning_rate": 0.0001, "loss": 1.534, "step": 3944 }, { "epoch": 0.6334296724470135, "grad_norm": 0.2911030650138855, "learning_rate": 0.0001, "loss": 1.5393, "step": 3945 }, { "epoch": 0.6335902376364804, "grad_norm": 0.24949724972248077, "learning_rate": 0.0001, "loss": 1.5277, "step": 3946 }, { "epoch": 0.6337508028259473, "grad_norm": 0.25194117426872253, "learning_rate": 0.0001, "loss": 1.5009, "step": 3947 }, { "epoch": 0.6339113680154143, "grad_norm": 0.25425615906715393, "learning_rate": 0.0001, "loss": 1.5279, "step": 3948 }, { "epoch": 0.6340719332048812, "grad_norm": 0.26466798782348633, "learning_rate": 0.0001, "loss": 1.5686, "step": 3949 }, { "epoch": 0.6342324983943481, "grad_norm": 0.3454229533672333, "learning_rate": 0.0001, "loss": 1.5514, "step": 3950 }, { "epoch": 0.634393063583815, "grad_norm": 0.24445679783821106, "learning_rate": 0.0001, "loss": 1.455, "step": 3951 }, { "epoch": 0.6345536287732819, "grad_norm": 0.27483898401260376, "learning_rate": 0.0001, "loss": 1.511, "step": 3952 }, { "epoch": 0.6347141939627489, "grad_norm": 0.25458669662475586, "learning_rate": 0.0001, "loss": 1.5065, "step": 3953 }, { "epoch": 0.6348747591522158, "grad_norm": 0.258373886346817, "learning_rate": 0.0001, "loss": 1.5748, "step": 3954 }, { "epoch": 0.6350353243416827, "grad_norm": 0.2767835557460785, "learning_rate": 0.0001, "loss": 1.5348, "step": 3955 }, { "epoch": 0.6351958895311497, "grad_norm": 0.25013065338134766, "learning_rate": 0.0001, "loss": 1.5059, "step": 3956 }, { "epoch": 0.6353564547206165, "grad_norm": 0.2646697461605072, "learning_rate": 0.0001, "loss": 1.5392, "step": 3957 }, { "epoch": 0.6355170199100835, "grad_norm": 0.280038058757782, "learning_rate": 0.0001, "loss": 1.5206, "step": 3958 }, { "epoch": 0.6356775850995504, "grad_norm": 0.2392078936100006, "learning_rate": 0.0001, "loss": 1.3745, "step": 3959 }, { "epoch": 0.6358381502890174, "grad_norm": 0.27248793840408325, "learning_rate": 0.0001, "loss": 1.4943, "step": 3960 }, { "epoch": 0.6359987154784843, "grad_norm": 0.2702663242816925, "learning_rate": 0.0001, "loss": 1.4945, "step": 3961 }, { "epoch": 0.6361592806679512, "grad_norm": 0.26339977979660034, "learning_rate": 0.0001, "loss": 1.5032, "step": 3962 }, { "epoch": 0.6363198458574181, "grad_norm": 0.27651360630989075, "learning_rate": 0.0001, "loss": 1.5513, "step": 3963 }, { "epoch": 0.636480411046885, "grad_norm": 0.25699666142463684, "learning_rate": 0.0001, "loss": 1.368, "step": 3964 }, { "epoch": 0.636640976236352, "grad_norm": 0.2467186003923416, "learning_rate": 0.0001, "loss": 1.5225, "step": 3965 }, { "epoch": 0.6368015414258189, "grad_norm": 0.26869550347328186, "learning_rate": 0.0001, "loss": 1.4593, "step": 3966 }, { "epoch": 0.6369621066152859, "grad_norm": 0.2678242027759552, "learning_rate": 0.0001, "loss": 1.5605, "step": 3967 }, { "epoch": 0.6371226718047527, "grad_norm": 0.2536470890045166, "learning_rate": 0.0001, "loss": 1.4875, "step": 3968 }, { "epoch": 0.6372832369942196, "grad_norm": 0.25990384817123413, "learning_rate": 0.0001, "loss": 1.4988, "step": 3969 }, { "epoch": 0.6374438021836866, "grad_norm": 0.24920576810836792, "learning_rate": 0.0001, "loss": 1.4493, "step": 3970 }, { "epoch": 0.6376043673731535, "grad_norm": 0.25677353143692017, "learning_rate": 0.0001, "loss": 1.5054, "step": 3971 }, { "epoch": 0.6377649325626205, "grad_norm": 0.2507385015487671, "learning_rate": 0.0001, "loss": 1.492, "step": 3972 }, { "epoch": 0.6379254977520874, "grad_norm": 0.2684652805328369, "learning_rate": 0.0001, "loss": 1.544, "step": 3973 }, { "epoch": 0.6380860629415542, "grad_norm": 0.24495548009872437, "learning_rate": 0.0001, "loss": 1.5038, "step": 3974 }, { "epoch": 0.6382466281310212, "grad_norm": 0.2707032561302185, "learning_rate": 0.0001, "loss": 1.5115, "step": 3975 }, { "epoch": 0.6384071933204881, "grad_norm": 0.27481770515441895, "learning_rate": 0.0001, "loss": 1.5197, "step": 3976 }, { "epoch": 0.6385677585099551, "grad_norm": 0.26256996393203735, "learning_rate": 0.0001, "loss": 1.517, "step": 3977 }, { "epoch": 0.638728323699422, "grad_norm": 0.28677448630332947, "learning_rate": 0.0001, "loss": 1.5337, "step": 3978 }, { "epoch": 0.6388888888888888, "grad_norm": 0.2571727931499481, "learning_rate": 0.0001, "loss": 1.4849, "step": 3979 }, { "epoch": 0.6390494540783558, "grad_norm": 0.25264930725097656, "learning_rate": 0.0001, "loss": 1.4524, "step": 3980 }, { "epoch": 0.6392100192678227, "grad_norm": 0.2521830201148987, "learning_rate": 0.0001, "loss": 1.521, "step": 3981 }, { "epoch": 0.6393705844572897, "grad_norm": 0.27169087529182434, "learning_rate": 0.0001, "loss": 1.5291, "step": 3982 }, { "epoch": 0.6395311496467566, "grad_norm": 0.2885984480381012, "learning_rate": 0.0001, "loss": 1.4407, "step": 3983 }, { "epoch": 0.6396917148362236, "grad_norm": 0.2694244980812073, "learning_rate": 0.0001, "loss": 1.4951, "step": 3984 }, { "epoch": 0.6398522800256904, "grad_norm": 0.26871436834335327, "learning_rate": 0.0001, "loss": 1.54, "step": 3985 }, { "epoch": 0.6400128452151573, "grad_norm": 0.27839118242263794, "learning_rate": 0.0001, "loss": 1.4836, "step": 3986 }, { "epoch": 0.6401734104046243, "grad_norm": 0.2824634909629822, "learning_rate": 0.0001, "loss": 1.4963, "step": 3987 }, { "epoch": 0.6403339755940912, "grad_norm": 0.2501073479652405, "learning_rate": 0.0001, "loss": 1.5297, "step": 3988 }, { "epoch": 0.6404945407835582, "grad_norm": 0.2556857764720917, "learning_rate": 0.0001, "loss": 1.5161, "step": 3989 }, { "epoch": 0.640655105973025, "grad_norm": 0.24514375627040863, "learning_rate": 0.0001, "loss": 1.3465, "step": 3990 }, { "epoch": 0.6408156711624919, "grad_norm": 0.2614043354988098, "learning_rate": 0.0001, "loss": 1.5163, "step": 3991 }, { "epoch": 0.6409762363519589, "grad_norm": 0.27069714665412903, "learning_rate": 0.0001, "loss": 1.5546, "step": 3992 }, { "epoch": 0.6411368015414258, "grad_norm": 0.2559158205986023, "learning_rate": 0.0001, "loss": 1.43, "step": 3993 }, { "epoch": 0.6412973667308928, "grad_norm": 0.26587527990341187, "learning_rate": 0.0001, "loss": 1.5224, "step": 3994 }, { "epoch": 0.6414579319203597, "grad_norm": 0.2705190181732178, "learning_rate": 0.0001, "loss": 1.5092, "step": 3995 }, { "epoch": 0.6416184971098265, "grad_norm": 0.2716139256954193, "learning_rate": 0.0001, "loss": 1.5611, "step": 3996 }, { "epoch": 0.6417790622992935, "grad_norm": 0.30606609582901, "learning_rate": 0.0001, "loss": 1.5154, "step": 3997 }, { "epoch": 0.6419396274887604, "grad_norm": 0.2739136219024658, "learning_rate": 0.0001, "loss": 1.5407, "step": 3998 }, { "epoch": 0.6421001926782274, "grad_norm": 0.2726215422153473, "learning_rate": 0.0001, "loss": 1.5111, "step": 3999 }, { "epoch": 0.6422607578676943, "grad_norm": 0.2681017816066742, "learning_rate": 0.0001, "loss": 1.4121, "step": 4000 }, { "epoch": 0.6424213230571612, "grad_norm": 0.2679270803928375, "learning_rate": 0.0001, "loss": 1.5198, "step": 4001 }, { "epoch": 0.6425818882466281, "grad_norm": 0.2869030237197876, "learning_rate": 0.0001, "loss": 1.512, "step": 4002 }, { "epoch": 0.642742453436095, "grad_norm": 0.2672863304615021, "learning_rate": 0.0001, "loss": 1.5422, "step": 4003 }, { "epoch": 0.642903018625562, "grad_norm": 0.2606004476547241, "learning_rate": 0.0001, "loss": 1.4536, "step": 4004 }, { "epoch": 0.6430635838150289, "grad_norm": 0.26094046235084534, "learning_rate": 0.0001, "loss": 1.424, "step": 4005 }, { "epoch": 0.6432241490044959, "grad_norm": 0.2781563401222229, "learning_rate": 0.0001, "loss": 1.4617, "step": 4006 }, { "epoch": 0.6433847141939627, "grad_norm": 0.25793081521987915, "learning_rate": 0.0001, "loss": 1.4748, "step": 4007 }, { "epoch": 0.6435452793834296, "grad_norm": 0.2908867299556732, "learning_rate": 0.0001, "loss": 1.5213, "step": 4008 }, { "epoch": 0.6437058445728966, "grad_norm": 0.26451578736305237, "learning_rate": 0.0001, "loss": 1.5057, "step": 4009 }, { "epoch": 0.6438664097623635, "grad_norm": 0.2684488594532013, "learning_rate": 0.0001, "loss": 1.4973, "step": 4010 }, { "epoch": 0.6440269749518305, "grad_norm": 0.2536855638027191, "learning_rate": 0.0001, "loss": 1.4982, "step": 4011 }, { "epoch": 0.6441875401412974, "grad_norm": 0.27478665113449097, "learning_rate": 0.0001, "loss": 1.5806, "step": 4012 }, { "epoch": 0.6443481053307643, "grad_norm": 0.2817869186401367, "learning_rate": 0.0001, "loss": 1.551, "step": 4013 }, { "epoch": 0.6445086705202312, "grad_norm": 0.2698647677898407, "learning_rate": 0.0001, "loss": 1.4447, "step": 4014 }, { "epoch": 0.6446692357096981, "grad_norm": 0.26216360926628113, "learning_rate": 0.0001, "loss": 1.5068, "step": 4015 }, { "epoch": 0.6448298008991651, "grad_norm": 0.2640087306499481, "learning_rate": 0.0001, "loss": 1.4907, "step": 4016 }, { "epoch": 0.644990366088632, "grad_norm": 0.2662026286125183, "learning_rate": 0.0001, "loss": 1.4667, "step": 4017 }, { "epoch": 0.6451509312780989, "grad_norm": 0.25925353169441223, "learning_rate": 0.0001, "loss": 1.4734, "step": 4018 }, { "epoch": 0.6453114964675658, "grad_norm": 0.25807052850723267, "learning_rate": 0.0001, "loss": 1.5146, "step": 4019 }, { "epoch": 0.6454720616570327, "grad_norm": 0.25807955861091614, "learning_rate": 0.0001, "loss": 1.4413, "step": 4020 }, { "epoch": 0.6456326268464997, "grad_norm": 0.25325387716293335, "learning_rate": 0.0001, "loss": 1.4866, "step": 4021 }, { "epoch": 0.6457931920359666, "grad_norm": 0.27402958273887634, "learning_rate": 0.0001, "loss": 1.489, "step": 4022 }, { "epoch": 0.6459537572254336, "grad_norm": 0.25539112091064453, "learning_rate": 0.0001, "loss": 1.4931, "step": 4023 }, { "epoch": 0.6461143224149004, "grad_norm": 0.2613573372364044, "learning_rate": 0.0001, "loss": 1.5178, "step": 4024 }, { "epoch": 0.6462748876043674, "grad_norm": 0.2629983425140381, "learning_rate": 0.0001, "loss": 1.4594, "step": 4025 }, { "epoch": 0.6464354527938343, "grad_norm": 0.27503475546836853, "learning_rate": 0.0001, "loss": 1.4597, "step": 4026 }, { "epoch": 0.6465960179833012, "grad_norm": 0.254550576210022, "learning_rate": 0.0001, "loss": 1.4578, "step": 4027 }, { "epoch": 0.6467565831727682, "grad_norm": 0.2574964761734009, "learning_rate": 0.0001, "loss": 1.3582, "step": 4028 }, { "epoch": 0.646917148362235, "grad_norm": 0.2585909366607666, "learning_rate": 0.0001, "loss": 1.4848, "step": 4029 }, { "epoch": 0.647077713551702, "grad_norm": 0.2635752558708191, "learning_rate": 0.0001, "loss": 1.5383, "step": 4030 }, { "epoch": 0.6472382787411689, "grad_norm": 0.2808038592338562, "learning_rate": 0.0001, "loss": 1.5712, "step": 4031 }, { "epoch": 0.6473988439306358, "grad_norm": 0.27354416251182556, "learning_rate": 0.0001, "loss": 1.5538, "step": 4032 }, { "epoch": 0.6475594091201028, "grad_norm": 0.2562684416770935, "learning_rate": 0.0001, "loss": 1.4289, "step": 4033 }, { "epoch": 0.6477199743095697, "grad_norm": 0.27846622467041016, "learning_rate": 0.0001, "loss": 1.5365, "step": 4034 }, { "epoch": 0.6478805394990366, "grad_norm": 0.26496443152427673, "learning_rate": 0.0001, "loss": 1.5764, "step": 4035 }, { "epoch": 0.6480411046885035, "grad_norm": 0.25747424364089966, "learning_rate": 0.0001, "loss": 1.5748, "step": 4036 }, { "epoch": 0.6482016698779705, "grad_norm": 0.2745968699455261, "learning_rate": 0.0001, "loss": 1.5252, "step": 4037 }, { "epoch": 0.6483622350674374, "grad_norm": 0.2700631320476532, "learning_rate": 0.0001, "loss": 1.506, "step": 4038 }, { "epoch": 0.6485228002569043, "grad_norm": 0.26957178115844727, "learning_rate": 0.0001, "loss": 1.5065, "step": 4039 }, { "epoch": 0.6486833654463712, "grad_norm": 0.25552108883857727, "learning_rate": 0.0001, "loss": 1.4456, "step": 4040 }, { "epoch": 0.6488439306358381, "grad_norm": 0.2736758589744568, "learning_rate": 0.0001, "loss": 1.5181, "step": 4041 }, { "epoch": 0.6490044958253051, "grad_norm": 0.2582840621471405, "learning_rate": 0.0001, "loss": 1.4804, "step": 4042 }, { "epoch": 0.649165061014772, "grad_norm": 0.25374460220336914, "learning_rate": 0.0001, "loss": 1.5333, "step": 4043 }, { "epoch": 0.649325626204239, "grad_norm": 0.2682127058506012, "learning_rate": 0.0001, "loss": 1.4528, "step": 4044 }, { "epoch": 0.6494861913937059, "grad_norm": 0.26875752210617065, "learning_rate": 0.0001, "loss": 1.5214, "step": 4045 }, { "epoch": 0.6496467565831727, "grad_norm": 0.2548571825027466, "learning_rate": 0.0001, "loss": 1.5197, "step": 4046 }, { "epoch": 0.6498073217726397, "grad_norm": 0.24765118956565857, "learning_rate": 0.0001, "loss": 1.4662, "step": 4047 }, { "epoch": 0.6499678869621066, "grad_norm": 0.26132071018218994, "learning_rate": 0.0001, "loss": 1.4996, "step": 4048 }, { "epoch": 0.6501284521515736, "grad_norm": 0.25638461112976074, "learning_rate": 0.0001, "loss": 1.4963, "step": 4049 }, { "epoch": 0.6502890173410405, "grad_norm": 0.25048828125, "learning_rate": 0.0001, "loss": 1.3668, "step": 4050 }, { "epoch": 0.6504495825305073, "grad_norm": 0.2597469091415405, "learning_rate": 0.0001, "loss": 1.4992, "step": 4051 }, { "epoch": 0.6506101477199743, "grad_norm": 0.25522956252098083, "learning_rate": 0.0001, "loss": 1.5644, "step": 4052 }, { "epoch": 0.6507707129094412, "grad_norm": 0.351707398891449, "learning_rate": 0.0001, "loss": 1.4995, "step": 4053 }, { "epoch": 0.6509312780989082, "grad_norm": 0.2668572664260864, "learning_rate": 0.0001, "loss": 1.5145, "step": 4054 }, { "epoch": 0.6510918432883751, "grad_norm": 0.24314342439174652, "learning_rate": 0.0001, "loss": 1.4478, "step": 4055 }, { "epoch": 0.651252408477842, "grad_norm": 0.2775131165981293, "learning_rate": 0.0001, "loss": 1.5582, "step": 4056 }, { "epoch": 0.6514129736673089, "grad_norm": 0.24780496954917908, "learning_rate": 0.0001, "loss": 1.4818, "step": 4057 }, { "epoch": 0.6515735388567758, "grad_norm": 0.2634061574935913, "learning_rate": 0.0001, "loss": 1.5423, "step": 4058 }, { "epoch": 0.6517341040462428, "grad_norm": 0.2591491639614105, "learning_rate": 0.0001, "loss": 1.5061, "step": 4059 }, { "epoch": 0.6518946692357097, "grad_norm": 0.2679141163825989, "learning_rate": 0.0001, "loss": 1.5486, "step": 4060 }, { "epoch": 0.6520552344251767, "grad_norm": 0.27372705936431885, "learning_rate": 0.0001, "loss": 1.6082, "step": 4061 }, { "epoch": 0.6522157996146436, "grad_norm": 0.2574971914291382, "learning_rate": 0.0001, "loss": 1.643, "step": 4062 }, { "epoch": 0.6523763648041104, "grad_norm": 0.26333364844322205, "learning_rate": 0.0001, "loss": 1.5374, "step": 4063 }, { "epoch": 0.6525369299935774, "grad_norm": 0.2890571057796478, "learning_rate": 0.0001, "loss": 1.5404, "step": 4064 }, { "epoch": 0.6526974951830443, "grad_norm": 0.26847440004348755, "learning_rate": 0.0001, "loss": 1.4936, "step": 4065 }, { "epoch": 0.6528580603725113, "grad_norm": 0.2642154097557068, "learning_rate": 0.0001, "loss": 1.538, "step": 4066 }, { "epoch": 0.6530186255619782, "grad_norm": 0.29670676589012146, "learning_rate": 0.0001, "loss": 1.5223, "step": 4067 }, { "epoch": 0.653179190751445, "grad_norm": 0.2784339487552643, "learning_rate": 0.0001, "loss": 1.553, "step": 4068 }, { "epoch": 0.653339755940912, "grad_norm": 0.2552869915962219, "learning_rate": 0.0001, "loss": 1.3798, "step": 4069 }, { "epoch": 0.6535003211303789, "grad_norm": 0.26420438289642334, "learning_rate": 0.0001, "loss": 1.4712, "step": 4070 }, { "epoch": 0.6536608863198459, "grad_norm": 0.26188531517982483, "learning_rate": 0.0001, "loss": 1.5179, "step": 4071 }, { "epoch": 0.6538214515093128, "grad_norm": 0.25568708777427673, "learning_rate": 0.0001, "loss": 1.4788, "step": 4072 }, { "epoch": 0.6539820166987798, "grad_norm": 0.2482534795999527, "learning_rate": 0.0001, "loss": 1.4883, "step": 4073 }, { "epoch": 0.6541425818882466, "grad_norm": 0.269436240196228, "learning_rate": 0.0001, "loss": 1.4757, "step": 4074 }, { "epoch": 0.6543031470777135, "grad_norm": 0.2498292624950409, "learning_rate": 0.0001, "loss": 1.4839, "step": 4075 }, { "epoch": 0.6544637122671805, "grad_norm": 0.2830914258956909, "learning_rate": 0.0001, "loss": 1.4721, "step": 4076 }, { "epoch": 0.6546242774566474, "grad_norm": 0.28456372022628784, "learning_rate": 0.0001, "loss": 1.5467, "step": 4077 }, { "epoch": 0.6547848426461144, "grad_norm": 0.27140548825263977, "learning_rate": 0.0001, "loss": 1.6219, "step": 4078 }, { "epoch": 0.6549454078355812, "grad_norm": 0.26949694752693176, "learning_rate": 0.0001, "loss": 1.5581, "step": 4079 }, { "epoch": 0.6551059730250481, "grad_norm": 0.27604708075523376, "learning_rate": 0.0001, "loss": 1.5329, "step": 4080 }, { "epoch": 0.6552665382145151, "grad_norm": 0.2518842816352844, "learning_rate": 0.0001, "loss": 1.4373, "step": 4081 }, { "epoch": 0.655427103403982, "grad_norm": 0.26696524024009705, "learning_rate": 0.0001, "loss": 1.3639, "step": 4082 }, { "epoch": 0.655587668593449, "grad_norm": 0.2709606885910034, "learning_rate": 0.0001, "loss": 1.445, "step": 4083 }, { "epoch": 0.6557482337829159, "grad_norm": 0.2692444324493408, "learning_rate": 0.0001, "loss": 1.4507, "step": 4084 }, { "epoch": 0.6559087989723827, "grad_norm": 0.2680546045303345, "learning_rate": 0.0001, "loss": 1.5252, "step": 4085 }, { "epoch": 0.6560693641618497, "grad_norm": 0.278359055519104, "learning_rate": 0.0001, "loss": 1.4262, "step": 4086 }, { "epoch": 0.6562299293513166, "grad_norm": 0.27649831771850586, "learning_rate": 0.0001, "loss": 1.5494, "step": 4087 }, { "epoch": 0.6563904945407836, "grad_norm": 0.2656194865703583, "learning_rate": 0.0001, "loss": 1.5439, "step": 4088 }, { "epoch": 0.6565510597302505, "grad_norm": 0.26811110973358154, "learning_rate": 0.0001, "loss": 1.4896, "step": 4089 }, { "epoch": 0.6567116249197174, "grad_norm": 0.2669956386089325, "learning_rate": 0.0001, "loss": 1.4648, "step": 4090 }, { "epoch": 0.6568721901091843, "grad_norm": 0.2634719908237457, "learning_rate": 0.0001, "loss": 1.4921, "step": 4091 }, { "epoch": 0.6570327552986512, "grad_norm": 0.2633553445339203, "learning_rate": 0.0001, "loss": 1.5851, "step": 4092 }, { "epoch": 0.6571933204881182, "grad_norm": 0.25439122319221497, "learning_rate": 0.0001, "loss": 1.471, "step": 4093 }, { "epoch": 0.6573538856775851, "grad_norm": 0.28257036209106445, "learning_rate": 0.0001, "loss": 1.5814, "step": 4094 }, { "epoch": 0.6575144508670521, "grad_norm": 0.26080578565597534, "learning_rate": 0.0001, "loss": 1.484, "step": 4095 }, { "epoch": 0.6576750160565189, "grad_norm": 0.28057047724723816, "learning_rate": 0.0001, "loss": 1.5782, "step": 4096 }, { "epoch": 0.6578355812459858, "grad_norm": 0.25944429636001587, "learning_rate": 0.0001, "loss": 1.5081, "step": 4097 }, { "epoch": 0.6579961464354528, "grad_norm": 0.2557280361652374, "learning_rate": 0.0001, "loss": 1.5102, "step": 4098 }, { "epoch": 0.6581567116249197, "grad_norm": 0.2652299702167511, "learning_rate": 0.0001, "loss": 1.5274, "step": 4099 }, { "epoch": 0.6583172768143867, "grad_norm": 0.26031339168548584, "learning_rate": 0.0001, "loss": 1.5459, "step": 4100 }, { "epoch": 0.6584778420038536, "grad_norm": 0.2873890995979309, "learning_rate": 0.0001, "loss": 1.5146, "step": 4101 }, { "epoch": 0.6586384071933205, "grad_norm": 0.2638516128063202, "learning_rate": 0.0001, "loss": 1.5402, "step": 4102 }, { "epoch": 0.6587989723827874, "grad_norm": 0.2758546471595764, "learning_rate": 0.0001, "loss": 1.5576, "step": 4103 }, { "epoch": 0.6589595375722543, "grad_norm": 0.25617489218711853, "learning_rate": 0.0001, "loss": 1.5061, "step": 4104 }, { "epoch": 0.6591201027617213, "grad_norm": 0.27788490056991577, "learning_rate": 0.0001, "loss": 1.5287, "step": 4105 }, { "epoch": 0.6592806679511882, "grad_norm": 0.27403321862220764, "learning_rate": 0.0001, "loss": 1.5544, "step": 4106 }, { "epoch": 0.6594412331406551, "grad_norm": 0.26027989387512207, "learning_rate": 0.0001, "loss": 1.61, "step": 4107 }, { "epoch": 0.659601798330122, "grad_norm": 0.266972154378891, "learning_rate": 0.0001, "loss": 1.5504, "step": 4108 }, { "epoch": 0.659762363519589, "grad_norm": 0.2704896628856659, "learning_rate": 0.0001, "loss": 1.547, "step": 4109 }, { "epoch": 0.6599229287090559, "grad_norm": 0.2757876217365265, "learning_rate": 0.0001, "loss": 1.4804, "step": 4110 }, { "epoch": 0.6600834938985228, "grad_norm": 0.2564221918582916, "learning_rate": 0.0001, "loss": 1.5409, "step": 4111 }, { "epoch": 0.6602440590879898, "grad_norm": 0.27693742513656616, "learning_rate": 0.0001, "loss": 1.4758, "step": 4112 }, { "epoch": 0.6604046242774566, "grad_norm": 0.2597733736038208, "learning_rate": 0.0001, "loss": 1.4343, "step": 4113 }, { "epoch": 0.6605651894669236, "grad_norm": 0.26988744735717773, "learning_rate": 0.0001, "loss": 1.522, "step": 4114 }, { "epoch": 0.6607257546563905, "grad_norm": 0.2563435137271881, "learning_rate": 0.0001, "loss": 1.451, "step": 4115 }, { "epoch": 0.6608863198458574, "grad_norm": 0.2942071557044983, "learning_rate": 0.0001, "loss": 1.5037, "step": 4116 }, { "epoch": 0.6610468850353244, "grad_norm": 0.2602013945579529, "learning_rate": 0.0001, "loss": 1.4699, "step": 4117 }, { "epoch": 0.6612074502247912, "grad_norm": 0.28853535652160645, "learning_rate": 0.0001, "loss": 1.417, "step": 4118 }, { "epoch": 0.6613680154142582, "grad_norm": 0.2661252021789551, "learning_rate": 0.0001, "loss": 1.4368, "step": 4119 }, { "epoch": 0.6615285806037251, "grad_norm": 0.27435213327407837, "learning_rate": 0.0001, "loss": 1.4582, "step": 4120 }, { "epoch": 0.661689145793192, "grad_norm": 0.2826497554779053, "learning_rate": 0.0001, "loss": 1.593, "step": 4121 }, { "epoch": 0.661849710982659, "grad_norm": 0.25823888182640076, "learning_rate": 0.0001, "loss": 1.4628, "step": 4122 }, { "epoch": 0.6620102761721259, "grad_norm": 0.27879124879837036, "learning_rate": 0.0001, "loss": 1.4943, "step": 4123 }, { "epoch": 0.6621708413615928, "grad_norm": 0.25891903042793274, "learning_rate": 0.0001, "loss": 1.4956, "step": 4124 }, { "epoch": 0.6623314065510597, "grad_norm": 0.261674165725708, "learning_rate": 0.0001, "loss": 1.4694, "step": 4125 }, { "epoch": 0.6624919717405267, "grad_norm": 0.2857045531272888, "learning_rate": 0.0001, "loss": 1.4331, "step": 4126 }, { "epoch": 0.6626525369299936, "grad_norm": 0.273318350315094, "learning_rate": 0.0001, "loss": 1.4926, "step": 4127 }, { "epoch": 0.6628131021194605, "grad_norm": 0.2702943682670593, "learning_rate": 0.0001, "loss": 1.5398, "step": 4128 }, { "epoch": 0.6629736673089274, "grad_norm": 0.2519720196723938, "learning_rate": 0.0001, "loss": 1.4458, "step": 4129 }, { "epoch": 0.6631342324983943, "grad_norm": 0.2591875493526459, "learning_rate": 0.0001, "loss": 1.4549, "step": 4130 }, { "epoch": 0.6632947976878613, "grad_norm": 0.263841837644577, "learning_rate": 0.0001, "loss": 1.5494, "step": 4131 }, { "epoch": 0.6634553628773282, "grad_norm": 0.26048779487609863, "learning_rate": 0.0001, "loss": 1.5098, "step": 4132 }, { "epoch": 0.6636159280667951, "grad_norm": 0.26579147577285767, "learning_rate": 0.0001, "loss": 1.5549, "step": 4133 }, { "epoch": 0.6637764932562621, "grad_norm": 0.262335866689682, "learning_rate": 0.0001, "loss": 1.5588, "step": 4134 }, { "epoch": 0.6639370584457289, "grad_norm": 0.26614880561828613, "learning_rate": 0.0001, "loss": 1.4807, "step": 4135 }, { "epoch": 0.6640976236351959, "grad_norm": 0.2592613101005554, "learning_rate": 0.0001, "loss": 1.4901, "step": 4136 }, { "epoch": 0.6642581888246628, "grad_norm": 0.25582200288772583, "learning_rate": 0.0001, "loss": 1.4766, "step": 4137 }, { "epoch": 0.6644187540141298, "grad_norm": 0.26614314317703247, "learning_rate": 0.0001, "loss": 1.5446, "step": 4138 }, { "epoch": 0.6645793192035967, "grad_norm": 0.2539105713367462, "learning_rate": 0.0001, "loss": 1.5006, "step": 4139 }, { "epoch": 0.6647398843930635, "grad_norm": 0.2665300667285919, "learning_rate": 0.0001, "loss": 1.5861, "step": 4140 }, { "epoch": 0.6649004495825305, "grad_norm": 0.2647922933101654, "learning_rate": 0.0001, "loss": 1.5404, "step": 4141 }, { "epoch": 0.6650610147719974, "grad_norm": 0.25334295630455017, "learning_rate": 0.0001, "loss": 1.4352, "step": 4142 }, { "epoch": 0.6652215799614644, "grad_norm": 0.27983757853507996, "learning_rate": 0.0001, "loss": 1.5223, "step": 4143 }, { "epoch": 0.6653821451509313, "grad_norm": 0.2596900761127472, "learning_rate": 0.0001, "loss": 1.5112, "step": 4144 }, { "epoch": 0.6655427103403982, "grad_norm": 0.26293647289276123, "learning_rate": 0.0001, "loss": 1.5191, "step": 4145 }, { "epoch": 0.6657032755298651, "grad_norm": 0.46495234966278076, "learning_rate": 0.0001, "loss": 1.4792, "step": 4146 }, { "epoch": 0.665863840719332, "grad_norm": 0.26407575607299805, "learning_rate": 0.0001, "loss": 1.481, "step": 4147 }, { "epoch": 0.666024405908799, "grad_norm": 0.46885931491851807, "learning_rate": 0.0001, "loss": 1.5224, "step": 4148 }, { "epoch": 0.6661849710982659, "grad_norm": 0.268364280462265, "learning_rate": 0.0001, "loss": 1.4719, "step": 4149 }, { "epoch": 0.6663455362877329, "grad_norm": 0.2675420045852661, "learning_rate": 0.0001, "loss": 1.4783, "step": 4150 }, { "epoch": 0.6665061014771998, "grad_norm": 0.26824304461479187, "learning_rate": 0.0001, "loss": 1.5892, "step": 4151 }, { "epoch": 0.6666666666666666, "grad_norm": 0.25549522042274475, "learning_rate": 0.0001, "loss": 1.5058, "step": 4152 }, { "epoch": 0.6668272318561336, "grad_norm": 0.24786025285720825, "learning_rate": 0.0001, "loss": 1.5099, "step": 4153 }, { "epoch": 0.6669877970456005, "grad_norm": 0.2666029930114746, "learning_rate": 0.0001, "loss": 1.5409, "step": 4154 }, { "epoch": 0.6671483622350675, "grad_norm": 0.28225189447402954, "learning_rate": 0.0001, "loss": 1.5035, "step": 4155 }, { "epoch": 0.6673089274245344, "grad_norm": 0.2664307653903961, "learning_rate": 0.0001, "loss": 1.5109, "step": 4156 }, { "epoch": 0.6674694926140012, "grad_norm": 0.2636362910270691, "learning_rate": 0.0001, "loss": 1.5063, "step": 4157 }, { "epoch": 0.6676300578034682, "grad_norm": 0.27478745579719543, "learning_rate": 0.0001, "loss": 1.5109, "step": 4158 }, { "epoch": 0.6677906229929351, "grad_norm": 0.28328198194503784, "learning_rate": 0.0001, "loss": 1.4975, "step": 4159 }, { "epoch": 0.6679511881824021, "grad_norm": 0.2634706199169159, "learning_rate": 0.0001, "loss": 1.5536, "step": 4160 }, { "epoch": 0.668111753371869, "grad_norm": 0.26545238494873047, "learning_rate": 0.0001, "loss": 1.448, "step": 4161 }, { "epoch": 0.668272318561336, "grad_norm": 0.2772989869117737, "learning_rate": 0.0001, "loss": 1.5411, "step": 4162 }, { "epoch": 0.6684328837508028, "grad_norm": 0.26372072100639343, "learning_rate": 0.0001, "loss": 1.5296, "step": 4163 }, { "epoch": 0.6685934489402697, "grad_norm": 0.2692832052707672, "learning_rate": 0.0001, "loss": 1.4802, "step": 4164 }, { "epoch": 0.6687540141297367, "grad_norm": 0.2728932499885559, "learning_rate": 0.0001, "loss": 1.5573, "step": 4165 }, { "epoch": 0.6689145793192036, "grad_norm": 0.25267428159713745, "learning_rate": 0.0001, "loss": 1.5656, "step": 4166 }, { "epoch": 0.6690751445086706, "grad_norm": 0.25965332984924316, "learning_rate": 0.0001, "loss": 1.5448, "step": 4167 }, { "epoch": 0.6692357096981374, "grad_norm": 0.2715737819671631, "learning_rate": 0.0001, "loss": 1.4931, "step": 4168 }, { "epoch": 0.6693962748876043, "grad_norm": 0.263933390378952, "learning_rate": 0.0001, "loss": 1.4785, "step": 4169 }, { "epoch": 0.6695568400770713, "grad_norm": 0.26766473054885864, "learning_rate": 0.0001, "loss": 1.4657, "step": 4170 }, { "epoch": 0.6697174052665382, "grad_norm": 0.2798439860343933, "learning_rate": 0.0001, "loss": 1.5415, "step": 4171 }, { "epoch": 0.6698779704560052, "grad_norm": 0.2672432065010071, "learning_rate": 0.0001, "loss": 1.5437, "step": 4172 }, { "epoch": 0.6700385356454721, "grad_norm": 0.2547382116317749, "learning_rate": 0.0001, "loss": 1.4333, "step": 4173 }, { "epoch": 0.6701991008349389, "grad_norm": 0.26243576407432556, "learning_rate": 0.0001, "loss": 1.5311, "step": 4174 }, { "epoch": 0.6703596660244059, "grad_norm": 0.27144116163253784, "learning_rate": 0.0001, "loss": 1.5626, "step": 4175 }, { "epoch": 0.6705202312138728, "grad_norm": 0.27108368277549744, "learning_rate": 0.0001, "loss": 1.4498, "step": 4176 }, { "epoch": 0.6706807964033398, "grad_norm": 0.272928923368454, "learning_rate": 0.0001, "loss": 1.5726, "step": 4177 }, { "epoch": 0.6708413615928067, "grad_norm": 0.25330835580825806, "learning_rate": 0.0001, "loss": 1.4942, "step": 4178 }, { "epoch": 0.6710019267822736, "grad_norm": 0.25305670499801636, "learning_rate": 0.0001, "loss": 1.4918, "step": 4179 }, { "epoch": 0.6711624919717405, "grad_norm": 0.2702277600765228, "learning_rate": 0.0001, "loss": 1.5377, "step": 4180 }, { "epoch": 0.6713230571612074, "grad_norm": 0.2533726990222931, "learning_rate": 0.0001, "loss": 1.4688, "step": 4181 }, { "epoch": 0.6714836223506744, "grad_norm": 0.2574719488620758, "learning_rate": 0.0001, "loss": 1.5209, "step": 4182 }, { "epoch": 0.6716441875401413, "grad_norm": 0.2595611810684204, "learning_rate": 0.0001, "loss": 1.4995, "step": 4183 }, { "epoch": 0.6718047527296083, "grad_norm": 0.25201043486595154, "learning_rate": 0.0001, "loss": 1.471, "step": 4184 }, { "epoch": 0.6719653179190751, "grad_norm": 0.2750900089740753, "learning_rate": 0.0001, "loss": 1.4705, "step": 4185 }, { "epoch": 0.672125883108542, "grad_norm": 0.26094889640808105, "learning_rate": 0.0001, "loss": 1.5288, "step": 4186 }, { "epoch": 0.672286448298009, "grad_norm": 0.25764408707618713, "learning_rate": 0.0001, "loss": 1.4275, "step": 4187 }, { "epoch": 0.6724470134874759, "grad_norm": 0.25645217299461365, "learning_rate": 0.0001, "loss": 1.4675, "step": 4188 }, { "epoch": 0.6726075786769429, "grad_norm": 0.2957596778869629, "learning_rate": 0.0001, "loss": 1.4583, "step": 4189 }, { "epoch": 0.6727681438664097, "grad_norm": 0.2649129629135132, "learning_rate": 0.0001, "loss": 1.5384, "step": 4190 }, { "epoch": 0.6729287090558767, "grad_norm": 0.2660101652145386, "learning_rate": 0.0001, "loss": 1.389, "step": 4191 }, { "epoch": 0.6730892742453436, "grad_norm": 0.2686706483364105, "learning_rate": 0.0001, "loss": 1.5432, "step": 4192 }, { "epoch": 0.6732498394348105, "grad_norm": 0.2836068272590637, "learning_rate": 0.0001, "loss": 1.4721, "step": 4193 }, { "epoch": 0.6734104046242775, "grad_norm": 0.25428712368011475, "learning_rate": 0.0001, "loss": 1.4336, "step": 4194 }, { "epoch": 0.6735709698137444, "grad_norm": 0.2774694859981537, "learning_rate": 0.0001, "loss": 1.4709, "step": 4195 }, { "epoch": 0.6737315350032113, "grad_norm": 0.2620084583759308, "learning_rate": 0.0001, "loss": 1.4472, "step": 4196 }, { "epoch": 0.6738921001926782, "grad_norm": 0.2742082476615906, "learning_rate": 0.0001, "loss": 1.6028, "step": 4197 }, { "epoch": 0.6740526653821451, "grad_norm": 0.2725132405757904, "learning_rate": 0.0001, "loss": 1.4722, "step": 4198 }, { "epoch": 0.6742132305716121, "grad_norm": 0.26817259192466736, "learning_rate": 0.0001, "loss": 1.5051, "step": 4199 }, { "epoch": 0.674373795761079, "grad_norm": 0.27134665846824646, "learning_rate": 0.0001, "loss": 1.4977, "step": 4200 }, { "epoch": 0.674534360950546, "grad_norm": 0.2610231935977936, "learning_rate": 0.0001, "loss": 1.5038, "step": 4201 }, { "epoch": 0.6746949261400128, "grad_norm": 0.2780536413192749, "learning_rate": 0.0001, "loss": 1.5502, "step": 4202 }, { "epoch": 0.6748554913294798, "grad_norm": 0.26236626505851746, "learning_rate": 0.0001, "loss": 1.539, "step": 4203 }, { "epoch": 0.6750160565189467, "grad_norm": 0.2582312226295471, "learning_rate": 0.0001, "loss": 1.4549, "step": 4204 }, { "epoch": 0.6751766217084136, "grad_norm": 0.26440122723579407, "learning_rate": 0.0001, "loss": 1.5539, "step": 4205 }, { "epoch": 0.6753371868978806, "grad_norm": 0.26772627234458923, "learning_rate": 0.0001, "loss": 1.4856, "step": 4206 }, { "epoch": 0.6754977520873474, "grad_norm": 0.25626301765441895, "learning_rate": 0.0001, "loss": 1.5426, "step": 4207 }, { "epoch": 0.6756583172768144, "grad_norm": 0.2645321190357208, "learning_rate": 0.0001, "loss": 1.459, "step": 4208 }, { "epoch": 0.6758188824662813, "grad_norm": 0.2824168801307678, "learning_rate": 0.0001, "loss": 1.4617, "step": 4209 }, { "epoch": 0.6759794476557482, "grad_norm": 0.25180715322494507, "learning_rate": 0.0001, "loss": 1.4945, "step": 4210 }, { "epoch": 0.6761400128452152, "grad_norm": 0.27988308668136597, "learning_rate": 0.0001, "loss": 1.4962, "step": 4211 }, { "epoch": 0.6763005780346821, "grad_norm": 0.2720843553543091, "learning_rate": 0.0001, "loss": 1.4862, "step": 4212 }, { "epoch": 0.676461143224149, "grad_norm": 0.24799558520317078, "learning_rate": 0.0001, "loss": 1.4484, "step": 4213 }, { "epoch": 0.6766217084136159, "grad_norm": 0.27907493710517883, "learning_rate": 0.0001, "loss": 1.5056, "step": 4214 }, { "epoch": 0.6767822736030829, "grad_norm": 0.26135680079460144, "learning_rate": 0.0001, "loss": 1.4847, "step": 4215 }, { "epoch": 0.6769428387925498, "grad_norm": 0.25625544786453247, "learning_rate": 0.0001, "loss": 1.5233, "step": 4216 }, { "epoch": 0.6771034039820167, "grad_norm": 0.2576616108417511, "learning_rate": 0.0001, "loss": 1.54, "step": 4217 }, { "epoch": 0.6772639691714836, "grad_norm": 0.2574950158596039, "learning_rate": 0.0001, "loss": 1.4897, "step": 4218 }, { "epoch": 0.6774245343609505, "grad_norm": 0.2586347460746765, "learning_rate": 0.0001, "loss": 1.447, "step": 4219 }, { "epoch": 0.6775850995504175, "grad_norm": 0.24843011796474457, "learning_rate": 0.0001, "loss": 1.525, "step": 4220 }, { "epoch": 0.6777456647398844, "grad_norm": 0.2628840208053589, "learning_rate": 0.0001, "loss": 1.5484, "step": 4221 }, { "epoch": 0.6779062299293513, "grad_norm": 0.2488044947385788, "learning_rate": 0.0001, "loss": 1.4058, "step": 4222 }, { "epoch": 0.6780667951188183, "grad_norm": 0.27336618304252625, "learning_rate": 0.0001, "loss": 1.554, "step": 4223 }, { "epoch": 0.6782273603082851, "grad_norm": 0.2552751898765564, "learning_rate": 0.0001, "loss": 1.4675, "step": 4224 }, { "epoch": 0.6783879254977521, "grad_norm": 0.26962193846702576, "learning_rate": 0.0001, "loss": 1.5626, "step": 4225 }, { "epoch": 0.678548490687219, "grad_norm": 0.2638053297996521, "learning_rate": 0.0001, "loss": 1.4879, "step": 4226 }, { "epoch": 0.678709055876686, "grad_norm": 0.2734527289867401, "learning_rate": 0.0001, "loss": 1.4906, "step": 4227 }, { "epoch": 0.6788696210661529, "grad_norm": 0.2864174544811249, "learning_rate": 0.0001, "loss": 1.5056, "step": 4228 }, { "epoch": 0.6790301862556197, "grad_norm": 0.24866554141044617, "learning_rate": 0.0001, "loss": 1.5035, "step": 4229 }, { "epoch": 0.6791907514450867, "grad_norm": 0.2756553292274475, "learning_rate": 0.0001, "loss": 1.5121, "step": 4230 }, { "epoch": 0.6793513166345536, "grad_norm": 0.2636953294277191, "learning_rate": 0.0001, "loss": 1.5706, "step": 4231 }, { "epoch": 0.6795118818240206, "grad_norm": 0.2574693262577057, "learning_rate": 0.0001, "loss": 1.5137, "step": 4232 }, { "epoch": 0.6796724470134875, "grad_norm": 0.25836923718452454, "learning_rate": 0.0001, "loss": 1.5287, "step": 4233 }, { "epoch": 0.6798330122029544, "grad_norm": 0.2573105990886688, "learning_rate": 0.0001, "loss": 1.4541, "step": 4234 }, { "epoch": 0.6799935773924213, "grad_norm": 0.2674020230770111, "learning_rate": 0.0001, "loss": 1.5424, "step": 4235 }, { "epoch": 0.6801541425818882, "grad_norm": 0.269796222448349, "learning_rate": 0.0001, "loss": 1.6454, "step": 4236 }, { "epoch": 0.6803147077713552, "grad_norm": 0.2537638247013092, "learning_rate": 0.0001, "loss": 1.4866, "step": 4237 }, { "epoch": 0.6804752729608221, "grad_norm": 0.2629987299442291, "learning_rate": 0.0001, "loss": 1.4895, "step": 4238 }, { "epoch": 0.680635838150289, "grad_norm": 0.2534637749195099, "learning_rate": 0.0001, "loss": 1.4874, "step": 4239 }, { "epoch": 0.680796403339756, "grad_norm": 0.26694411039352417, "learning_rate": 0.0001, "loss": 1.5057, "step": 4240 }, { "epoch": 0.6809569685292228, "grad_norm": 0.25374630093574524, "learning_rate": 0.0001, "loss": 1.4128, "step": 4241 }, { "epoch": 0.6811175337186898, "grad_norm": 0.258924663066864, "learning_rate": 0.0001, "loss": 1.4962, "step": 4242 }, { "epoch": 0.6812780989081567, "grad_norm": 0.2512001693248749, "learning_rate": 0.0001, "loss": 1.4155, "step": 4243 }, { "epoch": 0.6814386640976237, "grad_norm": 0.2707739472389221, "learning_rate": 0.0001, "loss": 1.5236, "step": 4244 }, { "epoch": 0.6815992292870906, "grad_norm": 0.2508571445941925, "learning_rate": 0.0001, "loss": 1.5075, "step": 4245 }, { "epoch": 0.6817597944765574, "grad_norm": 0.2570149898529053, "learning_rate": 0.0001, "loss": 1.4935, "step": 4246 }, { "epoch": 0.6819203596660244, "grad_norm": 0.25217339396476746, "learning_rate": 0.0001, "loss": 1.4927, "step": 4247 }, { "epoch": 0.6820809248554913, "grad_norm": 0.26150375604629517, "learning_rate": 0.0001, "loss": 1.4548, "step": 4248 }, { "epoch": 0.6822414900449583, "grad_norm": 0.25464996695518494, "learning_rate": 0.0001, "loss": 1.4027, "step": 4249 }, { "epoch": 0.6824020552344252, "grad_norm": 0.2840500771999359, "learning_rate": 0.0001, "loss": 1.5993, "step": 4250 }, { "epoch": 0.6825626204238922, "grad_norm": 0.2636156678199768, "learning_rate": 0.0001, "loss": 1.4286, "step": 4251 }, { "epoch": 0.682723185613359, "grad_norm": 0.26296964287757874, "learning_rate": 0.0001, "loss": 1.4855, "step": 4252 }, { "epoch": 0.6828837508028259, "grad_norm": 0.2587028443813324, "learning_rate": 0.0001, "loss": 1.4446, "step": 4253 }, { "epoch": 0.6830443159922929, "grad_norm": 0.2679729759693146, "learning_rate": 0.0001, "loss": 1.5407, "step": 4254 }, { "epoch": 0.6832048811817598, "grad_norm": 0.25425592064857483, "learning_rate": 0.0001, "loss": 1.4615, "step": 4255 }, { "epoch": 0.6833654463712268, "grad_norm": 0.33618855476379395, "learning_rate": 0.0001, "loss": 1.4126, "step": 4256 }, { "epoch": 0.6835260115606936, "grad_norm": 0.5344606637954712, "learning_rate": 0.0001, "loss": 1.4545, "step": 4257 }, { "epoch": 0.6836865767501605, "grad_norm": 0.2503321170806885, "learning_rate": 0.0001, "loss": 1.4864, "step": 4258 }, { "epoch": 0.6838471419396275, "grad_norm": 0.27189552783966064, "learning_rate": 0.0001, "loss": 1.5107, "step": 4259 }, { "epoch": 0.6840077071290944, "grad_norm": 0.26606759428977966, "learning_rate": 0.0001, "loss": 1.54, "step": 4260 }, { "epoch": 0.6841682723185614, "grad_norm": 0.2644661068916321, "learning_rate": 0.0001, "loss": 1.5584, "step": 4261 }, { "epoch": 0.6843288375080283, "grad_norm": 0.26754721999168396, "learning_rate": 0.0001, "loss": 1.5246, "step": 4262 }, { "epoch": 0.6844894026974951, "grad_norm": 0.26102858781814575, "learning_rate": 0.0001, "loss": 1.4957, "step": 4263 }, { "epoch": 0.6846499678869621, "grad_norm": 0.25447434186935425, "learning_rate": 0.0001, "loss": 1.4867, "step": 4264 }, { "epoch": 0.684810533076429, "grad_norm": 0.2528344392776489, "learning_rate": 0.0001, "loss": 1.4459, "step": 4265 }, { "epoch": 0.684971098265896, "grad_norm": 0.2751440405845642, "learning_rate": 0.0001, "loss": 1.5573, "step": 4266 }, { "epoch": 0.6851316634553629, "grad_norm": 0.34374743700027466, "learning_rate": 0.0001, "loss": 1.4634, "step": 4267 }, { "epoch": 0.6852922286448297, "grad_norm": 0.4083437919616699, "learning_rate": 0.0001, "loss": 1.4515, "step": 4268 }, { "epoch": 0.6854527938342967, "grad_norm": 0.2677876651287079, "learning_rate": 0.0001, "loss": 1.573, "step": 4269 }, { "epoch": 0.6856133590237636, "grad_norm": 0.2757302224636078, "learning_rate": 0.0001, "loss": 1.6004, "step": 4270 }, { "epoch": 0.6857739242132306, "grad_norm": 0.2541894018650055, "learning_rate": 0.0001, "loss": 1.5002, "step": 4271 }, { "epoch": 0.6859344894026975, "grad_norm": 0.2496742159128189, "learning_rate": 0.0001, "loss": 1.4454, "step": 4272 }, { "epoch": 0.6860950545921645, "grad_norm": 0.27230650186538696, "learning_rate": 0.0001, "loss": 1.5389, "step": 4273 }, { "epoch": 0.6862556197816313, "grad_norm": 0.27860227227211, "learning_rate": 0.0001, "loss": 1.5486, "step": 4274 }, { "epoch": 0.6864161849710982, "grad_norm": 0.25846415758132935, "learning_rate": 0.0001, "loss": 1.427, "step": 4275 }, { "epoch": 0.6865767501605652, "grad_norm": 0.26338714361190796, "learning_rate": 0.0001, "loss": 1.5178, "step": 4276 }, { "epoch": 0.6867373153500321, "grad_norm": 0.26182571053504944, "learning_rate": 0.0001, "loss": 1.5063, "step": 4277 }, { "epoch": 0.6868978805394991, "grad_norm": 0.26924988627433777, "learning_rate": 0.0001, "loss": 1.5526, "step": 4278 }, { "epoch": 0.6870584457289659, "grad_norm": 0.26171842217445374, "learning_rate": 0.0001, "loss": 1.4071, "step": 4279 }, { "epoch": 0.6872190109184328, "grad_norm": 0.28873947262763977, "learning_rate": 0.0001, "loss": 1.5053, "step": 4280 }, { "epoch": 0.6873795761078998, "grad_norm": 0.26880842447280884, "learning_rate": 0.0001, "loss": 1.4269, "step": 4281 }, { "epoch": 0.6875401412973667, "grad_norm": 0.274006187915802, "learning_rate": 0.0001, "loss": 1.4469, "step": 4282 }, { "epoch": 0.6877007064868337, "grad_norm": 0.27234819531440735, "learning_rate": 0.0001, "loss": 1.4983, "step": 4283 }, { "epoch": 0.6878612716763006, "grad_norm": 0.2691795229911804, "learning_rate": 0.0001, "loss": 1.5159, "step": 4284 }, { "epoch": 0.6880218368657675, "grad_norm": 0.25503280758857727, "learning_rate": 0.0001, "loss": 1.5067, "step": 4285 }, { "epoch": 0.6881824020552344, "grad_norm": 0.264271080493927, "learning_rate": 0.0001, "loss": 1.5302, "step": 4286 }, { "epoch": 0.6883429672447013, "grad_norm": 0.26026618480682373, "learning_rate": 0.0001, "loss": 1.5167, "step": 4287 }, { "epoch": 0.6885035324341683, "grad_norm": 0.2942197918891907, "learning_rate": 0.0001, "loss": 1.5174, "step": 4288 }, { "epoch": 0.6886640976236352, "grad_norm": 0.26720893383026123, "learning_rate": 0.0001, "loss": 1.4347, "step": 4289 }, { "epoch": 0.6888246628131022, "grad_norm": 0.26280948519706726, "learning_rate": 0.0001, "loss": 1.4533, "step": 4290 }, { "epoch": 0.688985228002569, "grad_norm": 0.27599456906318665, "learning_rate": 0.0001, "loss": 1.5484, "step": 4291 }, { "epoch": 0.689145793192036, "grad_norm": 0.27223873138427734, "learning_rate": 0.0001, "loss": 1.5116, "step": 4292 }, { "epoch": 0.6893063583815029, "grad_norm": 0.26796194911003113, "learning_rate": 0.0001, "loss": 1.4582, "step": 4293 }, { "epoch": 0.6894669235709698, "grad_norm": 0.2743043303489685, "learning_rate": 0.0001, "loss": 1.4849, "step": 4294 }, { "epoch": 0.6896274887604368, "grad_norm": 0.2695828974246979, "learning_rate": 0.0001, "loss": 1.5353, "step": 4295 }, { "epoch": 0.6897880539499036, "grad_norm": 0.27552083134651184, "learning_rate": 0.0001, "loss": 1.4595, "step": 4296 }, { "epoch": 0.6899486191393706, "grad_norm": 0.2738676369190216, "learning_rate": 0.0001, "loss": 1.5096, "step": 4297 }, { "epoch": 0.6901091843288375, "grad_norm": 0.42212623357772827, "learning_rate": 0.0001, "loss": 1.4909, "step": 4298 }, { "epoch": 0.6902697495183044, "grad_norm": 0.25751927495002747, "learning_rate": 0.0001, "loss": 1.5218, "step": 4299 }, { "epoch": 0.6904303147077714, "grad_norm": 0.291474848985672, "learning_rate": 0.0001, "loss": 1.5342, "step": 4300 }, { "epoch": 0.6905908798972383, "grad_norm": 0.26581260561943054, "learning_rate": 0.0001, "loss": 1.4447, "step": 4301 }, { "epoch": 0.6907514450867052, "grad_norm": 0.27196231484413147, "learning_rate": 0.0001, "loss": 1.4482, "step": 4302 }, { "epoch": 0.6909120102761721, "grad_norm": 0.26516249775886536, "learning_rate": 0.0001, "loss": 1.4661, "step": 4303 }, { "epoch": 0.691072575465639, "grad_norm": 0.2661803960800171, "learning_rate": 0.0001, "loss": 1.4621, "step": 4304 }, { "epoch": 0.691233140655106, "grad_norm": 0.26079392433166504, "learning_rate": 0.0001, "loss": 1.5063, "step": 4305 }, { "epoch": 0.6913937058445729, "grad_norm": 0.27813488245010376, "learning_rate": 0.0001, "loss": 1.5284, "step": 4306 }, { "epoch": 0.6915542710340398, "grad_norm": 0.26817786693573, "learning_rate": 0.0001, "loss": 1.5603, "step": 4307 }, { "epoch": 0.6917148362235067, "grad_norm": 0.26582929491996765, "learning_rate": 0.0001, "loss": 1.4259, "step": 4308 }, { "epoch": 0.6918754014129737, "grad_norm": 0.26686498522758484, "learning_rate": 0.0001, "loss": 1.5148, "step": 4309 }, { "epoch": 0.6920359666024406, "grad_norm": 0.25720271468162537, "learning_rate": 0.0001, "loss": 1.4958, "step": 4310 }, { "epoch": 0.6921965317919075, "grad_norm": 0.2693347632884979, "learning_rate": 0.0001, "loss": 1.4241, "step": 4311 }, { "epoch": 0.6923570969813745, "grad_norm": 0.26887163519859314, "learning_rate": 0.0001, "loss": 1.5274, "step": 4312 }, { "epoch": 0.6925176621708413, "grad_norm": 0.2574673891067505, "learning_rate": 0.0001, "loss": 1.5347, "step": 4313 }, { "epoch": 0.6926782273603083, "grad_norm": 0.2512687146663666, "learning_rate": 0.0001, "loss": 1.3923, "step": 4314 }, { "epoch": 0.6928387925497752, "grad_norm": 0.288910835981369, "learning_rate": 0.0001, "loss": 1.4973, "step": 4315 }, { "epoch": 0.6929993577392421, "grad_norm": 0.27544349431991577, "learning_rate": 0.0001, "loss": 1.532, "step": 4316 }, { "epoch": 0.6931599229287091, "grad_norm": 0.25817129015922546, "learning_rate": 0.0001, "loss": 1.5234, "step": 4317 }, { "epoch": 0.6933204881181759, "grad_norm": 0.2737163007259369, "learning_rate": 0.0001, "loss": 1.4919, "step": 4318 }, { "epoch": 0.6934810533076429, "grad_norm": 0.26792851090431213, "learning_rate": 0.0001, "loss": 1.4809, "step": 4319 }, { "epoch": 0.6936416184971098, "grad_norm": 0.26003244519233704, "learning_rate": 0.0001, "loss": 1.4815, "step": 4320 }, { "epoch": 0.6938021836865768, "grad_norm": 0.25619208812713623, "learning_rate": 0.0001, "loss": 1.4865, "step": 4321 }, { "epoch": 0.6939627488760437, "grad_norm": 0.26232337951660156, "learning_rate": 0.0001, "loss": 1.5054, "step": 4322 }, { "epoch": 0.6941233140655106, "grad_norm": 0.2903430461883545, "learning_rate": 0.0001, "loss": 1.429, "step": 4323 }, { "epoch": 0.6942838792549775, "grad_norm": 0.2574857473373413, "learning_rate": 0.0001, "loss": 1.4722, "step": 4324 }, { "epoch": 0.6944444444444444, "grad_norm": 0.2702174782752991, "learning_rate": 0.0001, "loss": 1.4795, "step": 4325 }, { "epoch": 0.6946050096339114, "grad_norm": 0.2641509771347046, "learning_rate": 0.0001, "loss": 1.4928, "step": 4326 }, { "epoch": 0.6947655748233783, "grad_norm": 0.2596450448036194, "learning_rate": 0.0001, "loss": 1.5512, "step": 4327 }, { "epoch": 0.6949261400128453, "grad_norm": 0.2672803997993469, "learning_rate": 0.0001, "loss": 1.5115, "step": 4328 }, { "epoch": 0.6950867052023122, "grad_norm": 0.27464133501052856, "learning_rate": 0.0001, "loss": 1.5322, "step": 4329 }, { "epoch": 0.695247270391779, "grad_norm": 0.26365870237350464, "learning_rate": 0.0001, "loss": 1.5631, "step": 4330 }, { "epoch": 0.695407835581246, "grad_norm": 0.2679399847984314, "learning_rate": 0.0001, "loss": 1.5365, "step": 4331 }, { "epoch": 0.6955684007707129, "grad_norm": 0.2631140649318695, "learning_rate": 0.0001, "loss": 1.4632, "step": 4332 }, { "epoch": 0.6957289659601799, "grad_norm": 0.2667805850505829, "learning_rate": 0.0001, "loss": 1.5183, "step": 4333 }, { "epoch": 0.6958895311496468, "grad_norm": 0.25944942235946655, "learning_rate": 0.0001, "loss": 1.4965, "step": 4334 }, { "epoch": 0.6960500963391136, "grad_norm": 0.25462979078292847, "learning_rate": 0.0001, "loss": 1.5036, "step": 4335 }, { "epoch": 0.6962106615285806, "grad_norm": 0.26245835423469543, "learning_rate": 0.0001, "loss": 1.4849, "step": 4336 }, { "epoch": 0.6963712267180475, "grad_norm": 0.26964670419692993, "learning_rate": 0.0001, "loss": 1.5723, "step": 4337 }, { "epoch": 0.6965317919075145, "grad_norm": 0.2651227116584778, "learning_rate": 0.0001, "loss": 1.5487, "step": 4338 }, { "epoch": 0.6966923570969814, "grad_norm": 0.30446404218673706, "learning_rate": 0.0001, "loss": 1.5096, "step": 4339 }, { "epoch": 0.6968529222864484, "grad_norm": 0.27518704533576965, "learning_rate": 0.0001, "loss": 1.5842, "step": 4340 }, { "epoch": 0.6970134874759152, "grad_norm": 0.25260788202285767, "learning_rate": 0.0001, "loss": 1.4444, "step": 4341 }, { "epoch": 0.6971740526653821, "grad_norm": 0.25623124837875366, "learning_rate": 0.0001, "loss": 1.3767, "step": 4342 }, { "epoch": 0.6973346178548491, "grad_norm": 0.2668977975845337, "learning_rate": 0.0001, "loss": 1.5001, "step": 4343 }, { "epoch": 0.697495183044316, "grad_norm": 0.27028948068618774, "learning_rate": 0.0001, "loss": 1.4352, "step": 4344 }, { "epoch": 0.697655748233783, "grad_norm": 0.27070972323417664, "learning_rate": 0.0001, "loss": 1.6113, "step": 4345 }, { "epoch": 0.6978163134232498, "grad_norm": 0.27184128761291504, "learning_rate": 0.0001, "loss": 1.4989, "step": 4346 }, { "epoch": 0.6979768786127167, "grad_norm": 0.2648637294769287, "learning_rate": 0.0001, "loss": 1.5043, "step": 4347 }, { "epoch": 0.6981374438021837, "grad_norm": 0.2720525562763214, "learning_rate": 0.0001, "loss": 1.5435, "step": 4348 }, { "epoch": 0.6982980089916506, "grad_norm": 0.25799763202667236, "learning_rate": 0.0001, "loss": 1.4338, "step": 4349 }, { "epoch": 0.6984585741811176, "grad_norm": 0.2532768249511719, "learning_rate": 0.0001, "loss": 1.4077, "step": 4350 }, { "epoch": 0.6986191393705845, "grad_norm": 0.2617965042591095, "learning_rate": 0.0001, "loss": 1.503, "step": 4351 }, { "epoch": 0.6987797045600513, "grad_norm": 0.27117711305618286, "learning_rate": 0.0001, "loss": 1.5566, "step": 4352 }, { "epoch": 0.6989402697495183, "grad_norm": 0.26747074723243713, "learning_rate": 0.0001, "loss": 1.4427, "step": 4353 }, { "epoch": 0.6991008349389852, "grad_norm": 0.2636476159095764, "learning_rate": 0.0001, "loss": 1.5027, "step": 4354 }, { "epoch": 0.6992614001284522, "grad_norm": 0.2568819522857666, "learning_rate": 0.0001, "loss": 1.4629, "step": 4355 }, { "epoch": 0.6994219653179191, "grad_norm": 0.2751426100730896, "learning_rate": 0.0001, "loss": 1.5391, "step": 4356 }, { "epoch": 0.699582530507386, "grad_norm": 0.2704501152038574, "learning_rate": 0.0001, "loss": 1.579, "step": 4357 }, { "epoch": 0.6997430956968529, "grad_norm": 0.2700333595275879, "learning_rate": 0.0001, "loss": 1.616, "step": 4358 }, { "epoch": 0.6999036608863198, "grad_norm": 0.268250435590744, "learning_rate": 0.0001, "loss": 1.5191, "step": 4359 }, { "epoch": 0.7000642260757868, "grad_norm": 0.26120874285697937, "learning_rate": 0.0001, "loss": 1.4953, "step": 4360 }, { "epoch": 0.7002247912652537, "grad_norm": 0.25803521275520325, "learning_rate": 0.0001, "loss": 1.4986, "step": 4361 }, { "epoch": 0.7003853564547207, "grad_norm": 0.2690806984901428, "learning_rate": 0.0001, "loss": 1.5414, "step": 4362 }, { "epoch": 0.7005459216441875, "grad_norm": 0.25062671303749084, "learning_rate": 0.0001, "loss": 1.4683, "step": 4363 }, { "epoch": 0.7007064868336544, "grad_norm": 0.265217661857605, "learning_rate": 0.0001, "loss": 1.5582, "step": 4364 }, { "epoch": 0.7008670520231214, "grad_norm": 0.2658994495868683, "learning_rate": 0.0001, "loss": 1.396, "step": 4365 }, { "epoch": 0.7010276172125883, "grad_norm": 0.2500857710838318, "learning_rate": 0.0001, "loss": 1.4334, "step": 4366 }, { "epoch": 0.7011881824020553, "grad_norm": 0.26173529028892517, "learning_rate": 0.0001, "loss": 1.48, "step": 4367 }, { "epoch": 0.7013487475915221, "grad_norm": 0.2551463544368744, "learning_rate": 0.0001, "loss": 1.4782, "step": 4368 }, { "epoch": 0.701509312780989, "grad_norm": 0.2488209754228592, "learning_rate": 0.0001, "loss": 1.3831, "step": 4369 }, { "epoch": 0.701669877970456, "grad_norm": 0.25642555952072144, "learning_rate": 0.0001, "loss": 1.5492, "step": 4370 }, { "epoch": 0.7018304431599229, "grad_norm": 0.2563942074775696, "learning_rate": 0.0001, "loss": 1.5025, "step": 4371 }, { "epoch": 0.7019910083493899, "grad_norm": 0.26586371660232544, "learning_rate": 0.0001, "loss": 1.4865, "step": 4372 }, { "epoch": 0.7021515735388568, "grad_norm": 0.25640198588371277, "learning_rate": 0.0001, "loss": 1.4257, "step": 4373 }, { "epoch": 0.7023121387283237, "grad_norm": 0.30186402797698975, "learning_rate": 0.0001, "loss": 1.515, "step": 4374 }, { "epoch": 0.7024727039177906, "grad_norm": 0.26809120178222656, "learning_rate": 0.0001, "loss": 1.4723, "step": 4375 }, { "epoch": 0.7026332691072575, "grad_norm": 0.2669029235839844, "learning_rate": 0.0001, "loss": 1.459, "step": 4376 }, { "epoch": 0.7027938342967245, "grad_norm": 0.2582818269729614, "learning_rate": 0.0001, "loss": 1.5551, "step": 4377 }, { "epoch": 0.7029543994861914, "grad_norm": 0.26471009850502014, "learning_rate": 0.0001, "loss": 1.4531, "step": 4378 }, { "epoch": 0.7031149646756584, "grad_norm": 0.25546589493751526, "learning_rate": 0.0001, "loss": 1.5049, "step": 4379 }, { "epoch": 0.7032755298651252, "grad_norm": 0.268154501914978, "learning_rate": 0.0001, "loss": 1.4526, "step": 4380 }, { "epoch": 0.7034360950545921, "grad_norm": 0.25864431262016296, "learning_rate": 0.0001, "loss": 1.531, "step": 4381 }, { "epoch": 0.7035966602440591, "grad_norm": 0.2717253863811493, "learning_rate": 0.0001, "loss": 1.5211, "step": 4382 }, { "epoch": 0.703757225433526, "grad_norm": 0.2777341902256012, "learning_rate": 0.0001, "loss": 1.5082, "step": 4383 }, { "epoch": 0.703917790622993, "grad_norm": 0.26059722900390625, "learning_rate": 0.0001, "loss": 1.4994, "step": 4384 }, { "epoch": 0.7040783558124598, "grad_norm": 0.27167803049087524, "learning_rate": 0.0001, "loss": 1.551, "step": 4385 }, { "epoch": 0.7042389210019268, "grad_norm": 0.26073744893074036, "learning_rate": 0.0001, "loss": 1.4914, "step": 4386 }, { "epoch": 0.7043994861913937, "grad_norm": 0.2469278872013092, "learning_rate": 0.0001, "loss": 1.5015, "step": 4387 }, { "epoch": 0.7045600513808606, "grad_norm": 0.26496291160583496, "learning_rate": 0.0001, "loss": 1.5747, "step": 4388 }, { "epoch": 0.7047206165703276, "grad_norm": 0.2571849524974823, "learning_rate": 0.0001, "loss": 1.5013, "step": 4389 }, { "epoch": 0.7048811817597945, "grad_norm": 0.2778785824775696, "learning_rate": 0.0001, "loss": 1.4337, "step": 4390 }, { "epoch": 0.7050417469492614, "grad_norm": 0.2539641559123993, "learning_rate": 0.0001, "loss": 1.537, "step": 4391 }, { "epoch": 0.7052023121387283, "grad_norm": 0.27053964138031006, "learning_rate": 0.0001, "loss": 1.3811, "step": 4392 }, { "epoch": 0.7053628773281952, "grad_norm": 0.2752392292022705, "learning_rate": 0.0001, "loss": 1.4962, "step": 4393 }, { "epoch": 0.7055234425176622, "grad_norm": 0.25827983021736145, "learning_rate": 0.0001, "loss": 1.5302, "step": 4394 }, { "epoch": 0.7056840077071291, "grad_norm": 0.2535262405872345, "learning_rate": 0.0001, "loss": 1.4865, "step": 4395 }, { "epoch": 0.705844572896596, "grad_norm": 0.2754910886287689, "learning_rate": 0.0001, "loss": 1.4842, "step": 4396 }, { "epoch": 0.7060051380860629, "grad_norm": 0.26365748047828674, "learning_rate": 0.0001, "loss": 1.4433, "step": 4397 }, { "epoch": 0.7061657032755299, "grad_norm": 0.2513587176799774, "learning_rate": 0.0001, "loss": 1.4889, "step": 4398 }, { "epoch": 0.7063262684649968, "grad_norm": 0.25660595297813416, "learning_rate": 0.0001, "loss": 1.4839, "step": 4399 }, { "epoch": 0.7064868336544637, "grad_norm": 0.2599651515483856, "learning_rate": 0.0001, "loss": 1.4655, "step": 4400 }, { "epoch": 0.7066473988439307, "grad_norm": 0.27303239703178406, "learning_rate": 0.0001, "loss": 1.5043, "step": 4401 }, { "epoch": 0.7068079640333975, "grad_norm": 0.27233800292015076, "learning_rate": 0.0001, "loss": 1.4356, "step": 4402 }, { "epoch": 0.7069685292228645, "grad_norm": 0.25332629680633545, "learning_rate": 0.0001, "loss": 1.4662, "step": 4403 }, { "epoch": 0.7071290944123314, "grad_norm": 0.2634056806564331, "learning_rate": 0.0001, "loss": 1.5181, "step": 4404 }, { "epoch": 0.7072896596017983, "grad_norm": 0.26729610562324524, "learning_rate": 0.0001, "loss": 1.5289, "step": 4405 }, { "epoch": 0.7074502247912653, "grad_norm": 0.27977415919303894, "learning_rate": 0.0001, "loss": 1.58, "step": 4406 }, { "epoch": 0.7076107899807321, "grad_norm": 0.25852587819099426, "learning_rate": 0.0001, "loss": 1.4558, "step": 4407 }, { "epoch": 0.7077713551701991, "grad_norm": 0.2548057734966278, "learning_rate": 0.0001, "loss": 1.4733, "step": 4408 }, { "epoch": 0.707931920359666, "grad_norm": 0.26761743426322937, "learning_rate": 0.0001, "loss": 1.4502, "step": 4409 }, { "epoch": 0.708092485549133, "grad_norm": 0.2621440589427948, "learning_rate": 0.0001, "loss": 1.5589, "step": 4410 }, { "epoch": 0.7082530507385999, "grad_norm": 0.25833815336227417, "learning_rate": 0.0001, "loss": 1.4941, "step": 4411 }, { "epoch": 0.7084136159280668, "grad_norm": 0.25731223821640015, "learning_rate": 0.0001, "loss": 1.5585, "step": 4412 }, { "epoch": 0.7085741811175337, "grad_norm": 0.26889559626579285, "learning_rate": 0.0001, "loss": 1.5231, "step": 4413 }, { "epoch": 0.7087347463070006, "grad_norm": 0.25878679752349854, "learning_rate": 0.0001, "loss": 1.5026, "step": 4414 }, { "epoch": 0.7088953114964676, "grad_norm": 0.2717954218387604, "learning_rate": 0.0001, "loss": 1.5714, "step": 4415 }, { "epoch": 0.7090558766859345, "grad_norm": 0.26209917664527893, "learning_rate": 0.0001, "loss": 1.4316, "step": 4416 }, { "epoch": 0.7092164418754014, "grad_norm": 0.27343231439590454, "learning_rate": 0.0001, "loss": 1.5084, "step": 4417 }, { "epoch": 0.7093770070648683, "grad_norm": 0.27332833409309387, "learning_rate": 0.0001, "loss": 1.5473, "step": 4418 }, { "epoch": 0.7095375722543352, "grad_norm": 0.2737707495689392, "learning_rate": 0.0001, "loss": 1.4728, "step": 4419 }, { "epoch": 0.7096981374438022, "grad_norm": 0.2730090618133545, "learning_rate": 0.0001, "loss": 1.5087, "step": 4420 }, { "epoch": 0.7098587026332691, "grad_norm": 0.27479568123817444, "learning_rate": 0.0001, "loss": 1.5424, "step": 4421 }, { "epoch": 0.710019267822736, "grad_norm": 0.2649615406990051, "learning_rate": 0.0001, "loss": 1.449, "step": 4422 }, { "epoch": 0.710179833012203, "grad_norm": 0.28063979744911194, "learning_rate": 0.0001, "loss": 1.5263, "step": 4423 }, { "epoch": 0.7103403982016698, "grad_norm": 0.27927061915397644, "learning_rate": 0.0001, "loss": 1.4998, "step": 4424 }, { "epoch": 0.7105009633911368, "grad_norm": 0.25157079100608826, "learning_rate": 0.0001, "loss": 1.4429, "step": 4425 }, { "epoch": 0.7106615285806037, "grad_norm": 0.3160429894924164, "learning_rate": 0.0001, "loss": 1.5159, "step": 4426 }, { "epoch": 0.7108220937700707, "grad_norm": 0.28867828845977783, "learning_rate": 0.0001, "loss": 1.4464, "step": 4427 }, { "epoch": 0.7109826589595376, "grad_norm": 0.2587195038795471, "learning_rate": 0.0001, "loss": 1.4863, "step": 4428 }, { "epoch": 0.7111432241490045, "grad_norm": 0.2741629183292389, "learning_rate": 0.0001, "loss": 1.4089, "step": 4429 }, { "epoch": 0.7113037893384714, "grad_norm": 0.26618364453315735, "learning_rate": 0.0001, "loss": 1.5135, "step": 4430 }, { "epoch": 0.7114643545279383, "grad_norm": 0.2516707479953766, "learning_rate": 0.0001, "loss": 1.4103, "step": 4431 }, { "epoch": 0.7116249197174053, "grad_norm": 0.265982985496521, "learning_rate": 0.0001, "loss": 1.5198, "step": 4432 }, { "epoch": 0.7117854849068722, "grad_norm": 0.26953351497650146, "learning_rate": 0.0001, "loss": 1.4967, "step": 4433 }, { "epoch": 0.7119460500963392, "grad_norm": 0.271270751953125, "learning_rate": 0.0001, "loss": 1.4891, "step": 4434 }, { "epoch": 0.712106615285806, "grad_norm": 0.2619115710258484, "learning_rate": 0.0001, "loss": 1.5203, "step": 4435 }, { "epoch": 0.7122671804752729, "grad_norm": 0.26357564330101013, "learning_rate": 0.0001, "loss": 1.4419, "step": 4436 }, { "epoch": 0.7124277456647399, "grad_norm": 0.27811411023139954, "learning_rate": 0.0001, "loss": 1.5185, "step": 4437 }, { "epoch": 0.7125883108542068, "grad_norm": 0.27106788754463196, "learning_rate": 0.0001, "loss": 1.4602, "step": 4438 }, { "epoch": 0.7127488760436738, "grad_norm": 0.27602043747901917, "learning_rate": 0.0001, "loss": 1.4671, "step": 4439 }, { "epoch": 0.7129094412331407, "grad_norm": 0.28530940413475037, "learning_rate": 0.0001, "loss": 1.5047, "step": 4440 }, { "epoch": 0.7130700064226075, "grad_norm": 0.2758553624153137, "learning_rate": 0.0001, "loss": 1.5065, "step": 4441 }, { "epoch": 0.7132305716120745, "grad_norm": 0.2505316436290741, "learning_rate": 0.0001, "loss": 1.3736, "step": 4442 }, { "epoch": 0.7133911368015414, "grad_norm": 0.25257694721221924, "learning_rate": 0.0001, "loss": 1.5111, "step": 4443 }, { "epoch": 0.7135517019910084, "grad_norm": 0.3961699306964874, "learning_rate": 0.0001, "loss": 1.4508, "step": 4444 }, { "epoch": 0.7137122671804753, "grad_norm": 0.25373756885528564, "learning_rate": 0.0001, "loss": 1.4998, "step": 4445 }, { "epoch": 0.7138728323699421, "grad_norm": 0.2606360614299774, "learning_rate": 0.0001, "loss": 1.5664, "step": 4446 }, { "epoch": 0.7140333975594091, "grad_norm": 0.26831522583961487, "learning_rate": 0.0001, "loss": 1.511, "step": 4447 }, { "epoch": 0.714193962748876, "grad_norm": 0.2633364498615265, "learning_rate": 0.0001, "loss": 1.4681, "step": 4448 }, { "epoch": 0.714354527938343, "grad_norm": 0.27485334873199463, "learning_rate": 0.0001, "loss": 1.5337, "step": 4449 }, { "epoch": 0.7145150931278099, "grad_norm": 0.2693502604961395, "learning_rate": 0.0001, "loss": 1.4955, "step": 4450 }, { "epoch": 0.7146756583172769, "grad_norm": 0.2679779529571533, "learning_rate": 0.0001, "loss": 1.42, "step": 4451 }, { "epoch": 0.7148362235067437, "grad_norm": 0.2692188024520874, "learning_rate": 0.0001, "loss": 1.4819, "step": 4452 }, { "epoch": 0.7149967886962106, "grad_norm": 0.24806596338748932, "learning_rate": 0.0001, "loss": 1.4835, "step": 4453 }, { "epoch": 0.7151573538856776, "grad_norm": 0.2935852110385895, "learning_rate": 0.0001, "loss": 1.5861, "step": 4454 }, { "epoch": 0.7153179190751445, "grad_norm": 0.26009488105773926, "learning_rate": 0.0001, "loss": 1.5451, "step": 4455 }, { "epoch": 0.7154784842646115, "grad_norm": 0.2731396555900574, "learning_rate": 0.0001, "loss": 1.5699, "step": 4456 }, { "epoch": 0.7156390494540783, "grad_norm": 0.28009775280952454, "learning_rate": 0.0001, "loss": 1.4371, "step": 4457 }, { "epoch": 0.7157996146435452, "grad_norm": 0.26409244537353516, "learning_rate": 0.0001, "loss": 1.5191, "step": 4458 }, { "epoch": 0.7159601798330122, "grad_norm": 0.2744426429271698, "learning_rate": 0.0001, "loss": 1.5256, "step": 4459 }, { "epoch": 0.7161207450224791, "grad_norm": 0.2576165795326233, "learning_rate": 0.0001, "loss": 1.4554, "step": 4460 }, { "epoch": 0.7162813102119461, "grad_norm": 0.26500216126441956, "learning_rate": 0.0001, "loss": 1.5102, "step": 4461 }, { "epoch": 0.716441875401413, "grad_norm": 0.29779383540153503, "learning_rate": 0.0001, "loss": 1.5347, "step": 4462 }, { "epoch": 0.7166024405908799, "grad_norm": 0.2547025680541992, "learning_rate": 0.0001, "loss": 1.5063, "step": 4463 }, { "epoch": 0.7167630057803468, "grad_norm": 0.298776239156723, "learning_rate": 0.0001, "loss": 1.5235, "step": 4464 }, { "epoch": 0.7169235709698137, "grad_norm": 0.2800496816635132, "learning_rate": 0.0001, "loss": 1.4428, "step": 4465 }, { "epoch": 0.7170841361592807, "grad_norm": 0.2626544237136841, "learning_rate": 0.0001, "loss": 1.4819, "step": 4466 }, { "epoch": 0.7172447013487476, "grad_norm": 0.25592851638793945, "learning_rate": 0.0001, "loss": 1.49, "step": 4467 }, { "epoch": 0.7174052665382146, "grad_norm": 0.2628372609615326, "learning_rate": 0.0001, "loss": 1.3694, "step": 4468 }, { "epoch": 0.7175658317276814, "grad_norm": 0.2570657730102539, "learning_rate": 0.0001, "loss": 1.4635, "step": 4469 }, { "epoch": 0.7177263969171483, "grad_norm": 0.25978749990463257, "learning_rate": 0.0001, "loss": 1.5157, "step": 4470 }, { "epoch": 0.7178869621066153, "grad_norm": 0.259258896112442, "learning_rate": 0.0001, "loss": 1.5179, "step": 4471 }, { "epoch": 0.7180475272960822, "grad_norm": 0.25281277298927307, "learning_rate": 0.0001, "loss": 1.4334, "step": 4472 }, { "epoch": 0.7182080924855492, "grad_norm": 0.2575565278530121, "learning_rate": 0.0001, "loss": 1.433, "step": 4473 }, { "epoch": 0.718368657675016, "grad_norm": 0.2693718373775482, "learning_rate": 0.0001, "loss": 1.3869, "step": 4474 }, { "epoch": 0.718529222864483, "grad_norm": 0.2633496820926666, "learning_rate": 0.0001, "loss": 1.5227, "step": 4475 }, { "epoch": 0.7186897880539499, "grad_norm": 0.2422046959400177, "learning_rate": 0.0001, "loss": 1.3841, "step": 4476 }, { "epoch": 0.7188503532434168, "grad_norm": 0.2973977029323578, "learning_rate": 0.0001, "loss": 1.5567, "step": 4477 }, { "epoch": 0.7190109184328838, "grad_norm": 0.2742076516151428, "learning_rate": 0.0001, "loss": 1.4491, "step": 4478 }, { "epoch": 0.7191714836223507, "grad_norm": 0.2550565302371979, "learning_rate": 0.0001, "loss": 1.4604, "step": 4479 }, { "epoch": 0.7193320488118176, "grad_norm": 0.2700750231742859, "learning_rate": 0.0001, "loss": 1.5673, "step": 4480 }, { "epoch": 0.7194926140012845, "grad_norm": 0.2693113386631012, "learning_rate": 0.0001, "loss": 1.5352, "step": 4481 }, { "epoch": 0.7196531791907514, "grad_norm": 0.2644595801830292, "learning_rate": 0.0001, "loss": 1.5251, "step": 4482 }, { "epoch": 0.7198137443802184, "grad_norm": 0.25771331787109375, "learning_rate": 0.0001, "loss": 1.5294, "step": 4483 }, { "epoch": 0.7199743095696853, "grad_norm": 0.2605074942111969, "learning_rate": 0.0001, "loss": 1.4934, "step": 4484 }, { "epoch": 0.7201348747591522, "grad_norm": 0.25524941086769104, "learning_rate": 0.0001, "loss": 1.5205, "step": 4485 }, { "epoch": 0.7202954399486191, "grad_norm": 0.2571198344230652, "learning_rate": 0.0001, "loss": 1.4342, "step": 4486 }, { "epoch": 0.720456005138086, "grad_norm": 0.282124787569046, "learning_rate": 0.0001, "loss": 1.5009, "step": 4487 }, { "epoch": 0.720616570327553, "grad_norm": 0.27443233132362366, "learning_rate": 0.0001, "loss": 1.4511, "step": 4488 }, { "epoch": 0.7207771355170199, "grad_norm": 0.2842823565006256, "learning_rate": 0.0001, "loss": 1.4834, "step": 4489 }, { "epoch": 0.7209377007064869, "grad_norm": 0.2627255320549011, "learning_rate": 0.0001, "loss": 1.5147, "step": 4490 }, { "epoch": 0.7210982658959537, "grad_norm": 0.25318068265914917, "learning_rate": 0.0001, "loss": 1.5308, "step": 4491 }, { "epoch": 0.7212588310854207, "grad_norm": 0.25988972187042236, "learning_rate": 0.0001, "loss": 1.4123, "step": 4492 }, { "epoch": 0.7214193962748876, "grad_norm": 0.2656155228614807, "learning_rate": 0.0001, "loss": 1.4676, "step": 4493 }, { "epoch": 0.7215799614643545, "grad_norm": 0.2692951261997223, "learning_rate": 0.0001, "loss": 1.4891, "step": 4494 }, { "epoch": 0.7217405266538215, "grad_norm": 0.267822802066803, "learning_rate": 0.0001, "loss": 1.5172, "step": 4495 }, { "epoch": 0.7219010918432883, "grad_norm": 0.26387912034988403, "learning_rate": 0.0001, "loss": 1.4045, "step": 4496 }, { "epoch": 0.7220616570327553, "grad_norm": 0.2647508978843689, "learning_rate": 0.0001, "loss": 1.4078, "step": 4497 }, { "epoch": 0.7222222222222222, "grad_norm": 0.27734246850013733, "learning_rate": 0.0001, "loss": 1.4583, "step": 4498 }, { "epoch": 0.7223827874116892, "grad_norm": 0.28359514474868774, "learning_rate": 0.0001, "loss": 1.5409, "step": 4499 }, { "epoch": 0.7225433526011561, "grad_norm": 0.26455989480018616, "learning_rate": 0.0001, "loss": 1.4627, "step": 4500 }, { "epoch": 0.722703917790623, "grad_norm": 0.28599298000335693, "learning_rate": 0.0001, "loss": 1.5766, "step": 4501 }, { "epoch": 0.7228644829800899, "grad_norm": 0.2679843604564667, "learning_rate": 0.0001, "loss": 1.5495, "step": 4502 }, { "epoch": 0.7230250481695568, "grad_norm": 0.25494423508644104, "learning_rate": 0.0001, "loss": 1.531, "step": 4503 }, { "epoch": 0.7231856133590238, "grad_norm": 0.2747451961040497, "learning_rate": 0.0001, "loss": 1.4802, "step": 4504 }, { "epoch": 0.7233461785484907, "grad_norm": 0.2605855464935303, "learning_rate": 0.0001, "loss": 1.5682, "step": 4505 }, { "epoch": 0.7235067437379576, "grad_norm": 0.27038636803627014, "learning_rate": 0.0001, "loss": 1.5644, "step": 4506 }, { "epoch": 0.7236673089274245, "grad_norm": 0.28356048464775085, "learning_rate": 0.0001, "loss": 1.5086, "step": 4507 }, { "epoch": 0.7238278741168914, "grad_norm": 0.2688811421394348, "learning_rate": 0.0001, "loss": 1.4767, "step": 4508 }, { "epoch": 0.7239884393063584, "grad_norm": 0.2893103063106537, "learning_rate": 0.0001, "loss": 1.5314, "step": 4509 }, { "epoch": 0.7241490044958253, "grad_norm": 0.27945476770401, "learning_rate": 0.0001, "loss": 1.5808, "step": 4510 }, { "epoch": 0.7243095696852923, "grad_norm": 0.2593180537223816, "learning_rate": 0.0001, "loss": 1.4916, "step": 4511 }, { "epoch": 0.7244701348747592, "grad_norm": 0.2758706212043762, "learning_rate": 0.0001, "loss": 1.4831, "step": 4512 }, { "epoch": 0.724630700064226, "grad_norm": 0.25680720806121826, "learning_rate": 0.0001, "loss": 1.4713, "step": 4513 }, { "epoch": 0.724791265253693, "grad_norm": 0.27224671840667725, "learning_rate": 0.0001, "loss": 1.4702, "step": 4514 }, { "epoch": 0.7249518304431599, "grad_norm": 0.3044149577617645, "learning_rate": 0.0001, "loss": 1.5064, "step": 4515 }, { "epoch": 0.7251123956326269, "grad_norm": 0.27164554595947266, "learning_rate": 0.0001, "loss": 1.5005, "step": 4516 }, { "epoch": 0.7252729608220938, "grad_norm": 0.276862770318985, "learning_rate": 0.0001, "loss": 1.4302, "step": 4517 }, { "epoch": 0.7254335260115607, "grad_norm": 0.267896831035614, "learning_rate": 0.0001, "loss": 1.5807, "step": 4518 }, { "epoch": 0.7255940912010276, "grad_norm": 0.28572964668273926, "learning_rate": 0.0001, "loss": 1.4622, "step": 4519 }, { "epoch": 0.7257546563904945, "grad_norm": 0.266260027885437, "learning_rate": 0.0001, "loss": 1.5402, "step": 4520 }, { "epoch": 0.7259152215799615, "grad_norm": 0.2602228820323944, "learning_rate": 0.0001, "loss": 1.4577, "step": 4521 }, { "epoch": 0.7260757867694284, "grad_norm": 0.25212356448173523, "learning_rate": 0.0001, "loss": 1.4946, "step": 4522 }, { "epoch": 0.7262363519588954, "grad_norm": 0.25058719515800476, "learning_rate": 0.0001, "loss": 1.5004, "step": 4523 }, { "epoch": 0.7263969171483622, "grad_norm": 0.25646036863327026, "learning_rate": 0.0001, "loss": 1.4732, "step": 4524 }, { "epoch": 0.7265574823378291, "grad_norm": 0.25635862350463867, "learning_rate": 0.0001, "loss": 1.4925, "step": 4525 }, { "epoch": 0.7267180475272961, "grad_norm": 0.4270424544811249, "learning_rate": 0.0001, "loss": 1.4069, "step": 4526 }, { "epoch": 0.726878612716763, "grad_norm": 0.26464807987213135, "learning_rate": 0.0001, "loss": 1.4664, "step": 4527 }, { "epoch": 0.72703917790623, "grad_norm": 0.2629585564136505, "learning_rate": 0.0001, "loss": 1.4861, "step": 4528 }, { "epoch": 0.7271997430956969, "grad_norm": 0.2564154267311096, "learning_rate": 0.0001, "loss": 1.4986, "step": 4529 }, { "epoch": 0.7273603082851637, "grad_norm": 0.28767871856689453, "learning_rate": 0.0001, "loss": 1.471, "step": 4530 }, { "epoch": 0.7275208734746307, "grad_norm": 0.2589711546897888, "learning_rate": 0.0001, "loss": 1.497, "step": 4531 }, { "epoch": 0.7276814386640976, "grad_norm": 0.2619282603263855, "learning_rate": 0.0001, "loss": 1.5319, "step": 4532 }, { "epoch": 0.7278420038535646, "grad_norm": 0.28460222482681274, "learning_rate": 0.0001, "loss": 1.534, "step": 4533 }, { "epoch": 0.7280025690430315, "grad_norm": 0.26082509756088257, "learning_rate": 0.0001, "loss": 1.5575, "step": 4534 }, { "epoch": 0.7281631342324983, "grad_norm": 0.2523138225078583, "learning_rate": 0.0001, "loss": 1.5014, "step": 4535 }, { "epoch": 0.7283236994219653, "grad_norm": 0.2843577563762665, "learning_rate": 0.0001, "loss": 1.5753, "step": 4536 }, { "epoch": 0.7284842646114322, "grad_norm": 0.26808345317840576, "learning_rate": 0.0001, "loss": 1.5482, "step": 4537 }, { "epoch": 0.7286448298008992, "grad_norm": 0.29650044441223145, "learning_rate": 0.0001, "loss": 1.5501, "step": 4538 }, { "epoch": 0.7288053949903661, "grad_norm": 0.30034783482551575, "learning_rate": 0.0001, "loss": 1.4617, "step": 4539 }, { "epoch": 0.7289659601798331, "grad_norm": 0.2697218358516693, "learning_rate": 0.0001, "loss": 1.5538, "step": 4540 }, { "epoch": 0.7291265253692999, "grad_norm": 0.26011794805526733, "learning_rate": 0.0001, "loss": 1.5113, "step": 4541 }, { "epoch": 0.7292870905587668, "grad_norm": 0.28395283222198486, "learning_rate": 0.0001, "loss": 1.5384, "step": 4542 }, { "epoch": 0.7294476557482338, "grad_norm": 0.3622431755065918, "learning_rate": 0.0001, "loss": 1.5581, "step": 4543 }, { "epoch": 0.7296082209377007, "grad_norm": 0.26604852080345154, "learning_rate": 0.0001, "loss": 1.4882, "step": 4544 }, { "epoch": 0.7297687861271677, "grad_norm": 0.2625166177749634, "learning_rate": 0.0001, "loss": 1.4636, "step": 4545 }, { "epoch": 0.7299293513166345, "grad_norm": 0.2692174017429352, "learning_rate": 0.0001, "loss": 1.4913, "step": 4546 }, { "epoch": 0.7300899165061014, "grad_norm": 0.2643943727016449, "learning_rate": 0.0001, "loss": 1.4963, "step": 4547 }, { "epoch": 0.7302504816955684, "grad_norm": 0.27789101004600525, "learning_rate": 0.0001, "loss": 1.4675, "step": 4548 }, { "epoch": 0.7304110468850353, "grad_norm": 0.26574602723121643, "learning_rate": 0.0001, "loss": 1.4863, "step": 4549 }, { "epoch": 0.7305716120745023, "grad_norm": 0.28030118346214294, "learning_rate": 0.0001, "loss": 1.4317, "step": 4550 }, { "epoch": 0.7307321772639692, "grad_norm": 0.2659422755241394, "learning_rate": 0.0001, "loss": 1.4541, "step": 4551 }, { "epoch": 0.730892742453436, "grad_norm": 0.2769496738910675, "learning_rate": 0.0001, "loss": 1.5108, "step": 4552 }, { "epoch": 0.731053307642903, "grad_norm": 0.3044896125793457, "learning_rate": 0.0001, "loss": 1.501, "step": 4553 }, { "epoch": 0.7312138728323699, "grad_norm": 0.2771703004837036, "learning_rate": 0.0001, "loss": 1.5176, "step": 4554 }, { "epoch": 0.7313744380218369, "grad_norm": 0.27319344878196716, "learning_rate": 0.0001, "loss": 1.5388, "step": 4555 }, { "epoch": 0.7315350032113038, "grad_norm": 0.2738259732723236, "learning_rate": 0.0001, "loss": 1.48, "step": 4556 }, { "epoch": 0.7316955684007708, "grad_norm": 0.2523276209831238, "learning_rate": 0.0001, "loss": 1.3931, "step": 4557 }, { "epoch": 0.7318561335902376, "grad_norm": 0.2789495289325714, "learning_rate": 0.0001, "loss": 1.5377, "step": 4558 }, { "epoch": 0.7320166987797045, "grad_norm": 0.27878350019454956, "learning_rate": 0.0001, "loss": 1.452, "step": 4559 }, { "epoch": 0.7321772639691715, "grad_norm": 0.25896358489990234, "learning_rate": 0.0001, "loss": 1.3775, "step": 4560 }, { "epoch": 0.7323378291586384, "grad_norm": 0.27563977241516113, "learning_rate": 0.0001, "loss": 1.5585, "step": 4561 }, { "epoch": 0.7324983943481054, "grad_norm": 0.27830901741981506, "learning_rate": 0.0001, "loss": 1.5122, "step": 4562 }, { "epoch": 0.7326589595375722, "grad_norm": 0.28074386715888977, "learning_rate": 0.0001, "loss": 1.5732, "step": 4563 }, { "epoch": 0.7328195247270392, "grad_norm": 0.31096115708351135, "learning_rate": 0.0001, "loss": 1.5196, "step": 4564 }, { "epoch": 0.7329800899165061, "grad_norm": 0.2652338743209839, "learning_rate": 0.0001, "loss": 1.5123, "step": 4565 }, { "epoch": 0.733140655105973, "grad_norm": 0.255057692527771, "learning_rate": 0.0001, "loss": 1.4401, "step": 4566 }, { "epoch": 0.73330122029544, "grad_norm": 0.27116659283638, "learning_rate": 0.0001, "loss": 1.5022, "step": 4567 }, { "epoch": 0.7334617854849069, "grad_norm": 0.26373663544654846, "learning_rate": 0.0001, "loss": 1.5582, "step": 4568 }, { "epoch": 0.7336223506743738, "grad_norm": 0.2734716534614563, "learning_rate": 0.0001, "loss": 1.4851, "step": 4569 }, { "epoch": 0.7337829158638407, "grad_norm": 0.26862290501594543, "learning_rate": 0.0001, "loss": 1.4386, "step": 4570 }, { "epoch": 0.7339434810533076, "grad_norm": 0.2610526382923126, "learning_rate": 0.0001, "loss": 1.5247, "step": 4571 }, { "epoch": 0.7341040462427746, "grad_norm": 0.26719582080841064, "learning_rate": 0.0001, "loss": 1.4941, "step": 4572 }, { "epoch": 0.7342646114322415, "grad_norm": 0.25229886174201965, "learning_rate": 0.0001, "loss": 1.4897, "step": 4573 }, { "epoch": 0.7344251766217084, "grad_norm": 0.2586591839790344, "learning_rate": 0.0001, "loss": 1.5035, "step": 4574 }, { "epoch": 0.7345857418111753, "grad_norm": 0.26860129833221436, "learning_rate": 0.0001, "loss": 1.4665, "step": 4575 }, { "epoch": 0.7347463070006423, "grad_norm": 0.2788375914096832, "learning_rate": 0.0001, "loss": 1.5063, "step": 4576 }, { "epoch": 0.7349068721901092, "grad_norm": 0.2794533967971802, "learning_rate": 0.0001, "loss": 1.5398, "step": 4577 }, { "epoch": 0.7350674373795761, "grad_norm": 0.2631220817565918, "learning_rate": 0.0001, "loss": 1.5352, "step": 4578 }, { "epoch": 0.7352280025690431, "grad_norm": 0.27125927805900574, "learning_rate": 0.0001, "loss": 1.5272, "step": 4579 }, { "epoch": 0.7353885677585099, "grad_norm": 0.26662683486938477, "learning_rate": 0.0001, "loss": 1.4715, "step": 4580 }, { "epoch": 0.7355491329479769, "grad_norm": 0.2543308138847351, "learning_rate": 0.0001, "loss": 1.4585, "step": 4581 }, { "epoch": 0.7357096981374438, "grad_norm": 0.2568158805370331, "learning_rate": 0.0001, "loss": 1.5014, "step": 4582 }, { "epoch": 0.7358702633269107, "grad_norm": 0.26792454719543457, "learning_rate": 0.0001, "loss": 1.533, "step": 4583 }, { "epoch": 0.7360308285163777, "grad_norm": 0.26723480224609375, "learning_rate": 0.0001, "loss": 1.5555, "step": 4584 }, { "epoch": 0.7361913937058445, "grad_norm": 0.24441014230251312, "learning_rate": 0.0001, "loss": 1.4151, "step": 4585 }, { "epoch": 0.7363519588953115, "grad_norm": 0.26682451367378235, "learning_rate": 0.0001, "loss": 1.579, "step": 4586 }, { "epoch": 0.7365125240847784, "grad_norm": 0.26250365376472473, "learning_rate": 0.0001, "loss": 1.5039, "step": 4587 }, { "epoch": 0.7366730892742454, "grad_norm": 0.2574078142642975, "learning_rate": 0.0001, "loss": 1.4819, "step": 4588 }, { "epoch": 0.7368336544637123, "grad_norm": 0.2646680474281311, "learning_rate": 0.0001, "loss": 1.474, "step": 4589 }, { "epoch": 0.7369942196531792, "grad_norm": 0.27785542607307434, "learning_rate": 0.0001, "loss": 1.5371, "step": 4590 }, { "epoch": 0.7371547848426461, "grad_norm": 0.2574950158596039, "learning_rate": 0.0001, "loss": 1.4823, "step": 4591 }, { "epoch": 0.737315350032113, "grad_norm": 0.2878223955631256, "learning_rate": 0.0001, "loss": 1.5013, "step": 4592 }, { "epoch": 0.73747591522158, "grad_norm": 0.269226998090744, "learning_rate": 0.0001, "loss": 1.4697, "step": 4593 }, { "epoch": 0.7376364804110469, "grad_norm": 0.2553509473800659, "learning_rate": 0.0001, "loss": 1.4206, "step": 4594 }, { "epoch": 0.7377970456005138, "grad_norm": 0.2620447278022766, "learning_rate": 0.0001, "loss": 1.4771, "step": 4595 }, { "epoch": 0.7379576107899807, "grad_norm": 0.2647787034511566, "learning_rate": 0.0001, "loss": 1.5159, "step": 4596 }, { "epoch": 0.7381181759794476, "grad_norm": 0.2749190032482147, "learning_rate": 0.0001, "loss": 1.4306, "step": 4597 }, { "epoch": 0.7382787411689146, "grad_norm": 0.26950743794441223, "learning_rate": 0.0001, "loss": 1.5582, "step": 4598 }, { "epoch": 0.7384393063583815, "grad_norm": 0.2633620500564575, "learning_rate": 0.0001, "loss": 1.492, "step": 4599 }, { "epoch": 0.7385998715478485, "grad_norm": 0.2540116310119629, "learning_rate": 0.0001, "loss": 1.4603, "step": 4600 }, { "epoch": 0.7387604367373154, "grad_norm": 0.26467224955558777, "learning_rate": 0.0001, "loss": 1.4895, "step": 4601 }, { "epoch": 0.7389210019267822, "grad_norm": 0.27076637744903564, "learning_rate": 0.0001, "loss": 1.4849, "step": 4602 }, { "epoch": 0.7390815671162492, "grad_norm": 0.26632288098335266, "learning_rate": 0.0001, "loss": 1.4634, "step": 4603 }, { "epoch": 0.7392421323057161, "grad_norm": 0.26025721430778503, "learning_rate": 0.0001, "loss": 1.5347, "step": 4604 }, { "epoch": 0.7394026974951831, "grad_norm": 0.4053424894809723, "learning_rate": 0.0001, "loss": 1.5655, "step": 4605 }, { "epoch": 0.73956326268465, "grad_norm": 0.27238404750823975, "learning_rate": 0.0001, "loss": 1.5377, "step": 4606 }, { "epoch": 0.739723827874117, "grad_norm": 0.26540979743003845, "learning_rate": 0.0001, "loss": 1.571, "step": 4607 }, { "epoch": 0.7398843930635838, "grad_norm": 0.2828616201877594, "learning_rate": 0.0001, "loss": 1.4741, "step": 4608 }, { "epoch": 0.7400449582530507, "grad_norm": 0.2742636203765869, "learning_rate": 0.0001, "loss": 1.5357, "step": 4609 }, { "epoch": 0.7402055234425177, "grad_norm": 0.2712089717388153, "learning_rate": 0.0001, "loss": 1.4794, "step": 4610 }, { "epoch": 0.7403660886319846, "grad_norm": 0.2567654848098755, "learning_rate": 0.0001, "loss": 1.432, "step": 4611 }, { "epoch": 0.7405266538214516, "grad_norm": 0.2555319368839264, "learning_rate": 0.0001, "loss": 1.5154, "step": 4612 }, { "epoch": 0.7406872190109184, "grad_norm": 0.29317590594291687, "learning_rate": 0.0001, "loss": 1.5601, "step": 4613 }, { "epoch": 0.7408477842003853, "grad_norm": 0.2835467457771301, "learning_rate": 0.0001, "loss": 1.5262, "step": 4614 }, { "epoch": 0.7410083493898523, "grad_norm": 0.26752805709838867, "learning_rate": 0.0001, "loss": 1.4735, "step": 4615 }, { "epoch": 0.7411689145793192, "grad_norm": 0.2731216847896576, "learning_rate": 0.0001, "loss": 1.4841, "step": 4616 }, { "epoch": 0.7413294797687862, "grad_norm": 0.2601814866065979, "learning_rate": 0.0001, "loss": 1.4958, "step": 4617 }, { "epoch": 0.7414900449582531, "grad_norm": 0.272246778011322, "learning_rate": 0.0001, "loss": 1.4808, "step": 4618 }, { "epoch": 0.7416506101477199, "grad_norm": 0.2681845724582672, "learning_rate": 0.0001, "loss": 1.4677, "step": 4619 }, { "epoch": 0.7418111753371869, "grad_norm": 0.2591359317302704, "learning_rate": 0.0001, "loss": 1.5269, "step": 4620 }, { "epoch": 0.7419717405266538, "grad_norm": 0.2780703902244568, "learning_rate": 0.0001, "loss": 1.4674, "step": 4621 }, { "epoch": 0.7421323057161208, "grad_norm": 0.2710312008857727, "learning_rate": 0.0001, "loss": 1.4628, "step": 4622 }, { "epoch": 0.7422928709055877, "grad_norm": 0.27332204580307007, "learning_rate": 0.0001, "loss": 1.4587, "step": 4623 }, { "epoch": 0.7424534360950545, "grad_norm": 0.2555524408817291, "learning_rate": 0.0001, "loss": 1.4517, "step": 4624 }, { "epoch": 0.7426140012845215, "grad_norm": 0.26889708638191223, "learning_rate": 0.0001, "loss": 1.4995, "step": 4625 }, { "epoch": 0.7427745664739884, "grad_norm": 0.26449641585350037, "learning_rate": 0.0001, "loss": 1.4794, "step": 4626 }, { "epoch": 0.7429351316634554, "grad_norm": 0.2725636959075928, "learning_rate": 0.0001, "loss": 1.4984, "step": 4627 }, { "epoch": 0.7430956968529223, "grad_norm": 0.2693058252334595, "learning_rate": 0.0001, "loss": 1.5372, "step": 4628 }, { "epoch": 0.7432562620423893, "grad_norm": 0.26393017172813416, "learning_rate": 0.0001, "loss": 1.4811, "step": 4629 }, { "epoch": 0.7434168272318561, "grad_norm": 0.2784920334815979, "learning_rate": 0.0001, "loss": 1.4104, "step": 4630 }, { "epoch": 0.743577392421323, "grad_norm": 0.25803062319755554, "learning_rate": 0.0001, "loss": 1.5376, "step": 4631 }, { "epoch": 0.74373795761079, "grad_norm": 0.25867918133735657, "learning_rate": 0.0001, "loss": 1.4729, "step": 4632 }, { "epoch": 0.7438985228002569, "grad_norm": 0.27000972628593445, "learning_rate": 0.0001, "loss": 1.4882, "step": 4633 }, { "epoch": 0.7440590879897239, "grad_norm": 0.28312546014785767, "learning_rate": 0.0001, "loss": 1.5758, "step": 4634 }, { "epoch": 0.7442196531791907, "grad_norm": 0.27580639719963074, "learning_rate": 0.0001, "loss": 1.4736, "step": 4635 }, { "epoch": 0.7443802183686576, "grad_norm": 0.270631343126297, "learning_rate": 0.0001, "loss": 1.4746, "step": 4636 }, { "epoch": 0.7445407835581246, "grad_norm": 0.2756424844264984, "learning_rate": 0.0001, "loss": 1.5598, "step": 4637 }, { "epoch": 0.7447013487475915, "grad_norm": 0.2685732841491699, "learning_rate": 0.0001, "loss": 1.5022, "step": 4638 }, { "epoch": 0.7448619139370585, "grad_norm": 0.2652704119682312, "learning_rate": 0.0001, "loss": 1.4959, "step": 4639 }, { "epoch": 0.7450224791265254, "grad_norm": 0.26038676500320435, "learning_rate": 0.0001, "loss": 1.4565, "step": 4640 }, { "epoch": 0.7451830443159922, "grad_norm": 0.2651382386684418, "learning_rate": 0.0001, "loss": 1.4948, "step": 4641 }, { "epoch": 0.7453436095054592, "grad_norm": 0.24904988706111908, "learning_rate": 0.0001, "loss": 1.4551, "step": 4642 }, { "epoch": 0.7455041746949261, "grad_norm": 0.26152366399765015, "learning_rate": 0.0001, "loss": 1.5302, "step": 4643 }, { "epoch": 0.7456647398843931, "grad_norm": 0.2725069224834442, "learning_rate": 0.0001, "loss": 1.5288, "step": 4644 }, { "epoch": 0.74582530507386, "grad_norm": 0.26113831996917725, "learning_rate": 0.0001, "loss": 1.5031, "step": 4645 }, { "epoch": 0.7459858702633269, "grad_norm": 0.2562262713909149, "learning_rate": 0.0001, "loss": 1.5174, "step": 4646 }, { "epoch": 0.7461464354527938, "grad_norm": 0.27277621626853943, "learning_rate": 0.0001, "loss": 1.4447, "step": 4647 }, { "epoch": 0.7463070006422607, "grad_norm": 0.27036160230636597, "learning_rate": 0.0001, "loss": 1.4749, "step": 4648 }, { "epoch": 0.7464675658317277, "grad_norm": 0.27666088938713074, "learning_rate": 0.0001, "loss": 1.5419, "step": 4649 }, { "epoch": 0.7466281310211946, "grad_norm": 0.2988976538181305, "learning_rate": 0.0001, "loss": 1.4219, "step": 4650 }, { "epoch": 0.7467886962106616, "grad_norm": 0.30231142044067383, "learning_rate": 0.0001, "loss": 1.4745, "step": 4651 }, { "epoch": 0.7469492614001284, "grad_norm": 0.279981404542923, "learning_rate": 0.0001, "loss": 1.521, "step": 4652 }, { "epoch": 0.7471098265895953, "grad_norm": 0.26500624418258667, "learning_rate": 0.0001, "loss": 1.5127, "step": 4653 }, { "epoch": 0.7472703917790623, "grad_norm": 0.26284492015838623, "learning_rate": 0.0001, "loss": 1.4263, "step": 4654 }, { "epoch": 0.7474309569685292, "grad_norm": 0.27375173568725586, "learning_rate": 0.0001, "loss": 1.4665, "step": 4655 }, { "epoch": 0.7475915221579962, "grad_norm": 0.2888254225254059, "learning_rate": 0.0001, "loss": 1.5209, "step": 4656 }, { "epoch": 0.7477520873474631, "grad_norm": 0.31308513879776, "learning_rate": 0.0001, "loss": 1.5274, "step": 4657 }, { "epoch": 0.74791265253693, "grad_norm": 0.267828106880188, "learning_rate": 0.0001, "loss": 1.5114, "step": 4658 }, { "epoch": 0.7480732177263969, "grad_norm": 0.2816733121871948, "learning_rate": 0.0001, "loss": 1.5678, "step": 4659 }, { "epoch": 0.7482337829158638, "grad_norm": 0.26341959834098816, "learning_rate": 0.0001, "loss": 1.4596, "step": 4660 }, { "epoch": 0.7483943481053308, "grad_norm": 0.2673376798629761, "learning_rate": 0.0001, "loss": 1.4437, "step": 4661 }, { "epoch": 0.7485549132947977, "grad_norm": 0.2731376588344574, "learning_rate": 0.0001, "loss": 1.4575, "step": 4662 }, { "epoch": 0.7487154784842646, "grad_norm": 0.2772185206413269, "learning_rate": 0.0001, "loss": 1.568, "step": 4663 }, { "epoch": 0.7488760436737315, "grad_norm": 0.27636203169822693, "learning_rate": 0.0001, "loss": 1.4845, "step": 4664 }, { "epoch": 0.7490366088631984, "grad_norm": 0.28590911626815796, "learning_rate": 0.0001, "loss": 1.5398, "step": 4665 }, { "epoch": 0.7491971740526654, "grad_norm": 0.26970577239990234, "learning_rate": 0.0001, "loss": 1.4664, "step": 4666 }, { "epoch": 0.7493577392421323, "grad_norm": 0.2564913332462311, "learning_rate": 0.0001, "loss": 1.41, "step": 4667 }, { "epoch": 0.7495183044315993, "grad_norm": 0.2824428081512451, "learning_rate": 0.0001, "loss": 1.5264, "step": 4668 }, { "epoch": 0.7496788696210661, "grad_norm": 0.2598324120044708, "learning_rate": 0.0001, "loss": 1.4318, "step": 4669 }, { "epoch": 0.7498394348105331, "grad_norm": 0.2638610899448395, "learning_rate": 0.0001, "loss": 1.5312, "step": 4670 }, { "epoch": 0.75, "grad_norm": 0.26793932914733887, "learning_rate": 0.0001, "loss": 1.5179, "step": 4671 }, { "epoch": 0.7501605651894669, "grad_norm": 0.2577224671840668, "learning_rate": 0.0001, "loss": 1.4373, "step": 4672 }, { "epoch": 0.7503211303789339, "grad_norm": 0.27305158972740173, "learning_rate": 0.0001, "loss": 1.4649, "step": 4673 }, { "epoch": 0.7504816955684007, "grad_norm": 0.24940225481987, "learning_rate": 0.0001, "loss": 1.4178, "step": 4674 }, { "epoch": 0.7506422607578677, "grad_norm": 0.2555713355541229, "learning_rate": 0.0001, "loss": 1.5347, "step": 4675 }, { "epoch": 0.7508028259473346, "grad_norm": 0.2697537839412689, "learning_rate": 0.0001, "loss": 1.499, "step": 4676 }, { "epoch": 0.7509633911368016, "grad_norm": 0.2856290638446808, "learning_rate": 0.0001, "loss": 1.4792, "step": 4677 }, { "epoch": 0.7511239563262685, "grad_norm": 0.2914230227470398, "learning_rate": 0.0001, "loss": 1.5277, "step": 4678 }, { "epoch": 0.7512845215157354, "grad_norm": 0.2725703716278076, "learning_rate": 0.0001, "loss": 1.5056, "step": 4679 }, { "epoch": 0.7514450867052023, "grad_norm": 0.26528775691986084, "learning_rate": 0.0001, "loss": 1.5179, "step": 4680 }, { "epoch": 0.7516056518946692, "grad_norm": 0.27045440673828125, "learning_rate": 0.0001, "loss": 1.5296, "step": 4681 }, { "epoch": 0.7517662170841362, "grad_norm": 0.2855928838253021, "learning_rate": 0.0001, "loss": 1.481, "step": 4682 }, { "epoch": 0.7519267822736031, "grad_norm": 0.2561173141002655, "learning_rate": 0.0001, "loss": 1.3798, "step": 4683 }, { "epoch": 0.75208734746307, "grad_norm": 0.2532760202884674, "learning_rate": 0.0001, "loss": 1.4825, "step": 4684 }, { "epoch": 0.7522479126525369, "grad_norm": 0.27161532640457153, "learning_rate": 0.0001, "loss": 1.4858, "step": 4685 }, { "epoch": 0.7524084778420038, "grad_norm": 0.2532717287540436, "learning_rate": 0.0001, "loss": 1.4916, "step": 4686 }, { "epoch": 0.7525690430314708, "grad_norm": 0.257660835981369, "learning_rate": 0.0001, "loss": 1.4006, "step": 4687 }, { "epoch": 0.7527296082209377, "grad_norm": 0.26825979351997375, "learning_rate": 0.0001, "loss": 1.5558, "step": 4688 }, { "epoch": 0.7528901734104047, "grad_norm": 0.2663351595401764, "learning_rate": 0.0001, "loss": 1.5599, "step": 4689 }, { "epoch": 0.7530507385998716, "grad_norm": 0.269704133272171, "learning_rate": 0.0001, "loss": 1.4263, "step": 4690 }, { "epoch": 0.7532113037893384, "grad_norm": 0.27241092920303345, "learning_rate": 0.0001, "loss": 1.5113, "step": 4691 }, { "epoch": 0.7533718689788054, "grad_norm": 0.2649691700935364, "learning_rate": 0.0001, "loss": 1.5441, "step": 4692 }, { "epoch": 0.7535324341682723, "grad_norm": 0.27136510610580444, "learning_rate": 0.0001, "loss": 1.515, "step": 4693 }, { "epoch": 0.7536929993577393, "grad_norm": 0.2539619207382202, "learning_rate": 0.0001, "loss": 1.4025, "step": 4694 }, { "epoch": 0.7538535645472062, "grad_norm": 0.27253806591033936, "learning_rate": 0.0001, "loss": 1.5135, "step": 4695 }, { "epoch": 0.7540141297366731, "grad_norm": 0.2730512320995331, "learning_rate": 0.0001, "loss": 1.5218, "step": 4696 }, { "epoch": 0.75417469492614, "grad_norm": 0.26921844482421875, "learning_rate": 0.0001, "loss": 1.3778, "step": 4697 }, { "epoch": 0.7543352601156069, "grad_norm": 0.264752596616745, "learning_rate": 0.0001, "loss": 1.4958, "step": 4698 }, { "epoch": 0.7544958253050739, "grad_norm": 0.27301615476608276, "learning_rate": 0.0001, "loss": 1.5358, "step": 4699 }, { "epoch": 0.7546563904945408, "grad_norm": 0.2675076425075531, "learning_rate": 0.0001, "loss": 1.4918, "step": 4700 }, { "epoch": 0.7548169556840078, "grad_norm": 0.2580350637435913, "learning_rate": 0.0001, "loss": 1.4876, "step": 4701 }, { "epoch": 0.7549775208734746, "grad_norm": 0.2589607536792755, "learning_rate": 0.0001, "loss": 1.4948, "step": 4702 }, { "epoch": 0.7551380860629415, "grad_norm": 0.26058530807495117, "learning_rate": 0.0001, "loss": 1.3502, "step": 4703 }, { "epoch": 0.7552986512524085, "grad_norm": 0.2683979570865631, "learning_rate": 0.0001, "loss": 1.5257, "step": 4704 }, { "epoch": 0.7554592164418754, "grad_norm": 0.26223766803741455, "learning_rate": 0.0001, "loss": 1.5359, "step": 4705 }, { "epoch": 0.7556197816313424, "grad_norm": 0.3043115735054016, "learning_rate": 0.0001, "loss": 1.4526, "step": 4706 }, { "epoch": 0.7557803468208093, "grad_norm": 0.2684251666069031, "learning_rate": 0.0001, "loss": 1.5332, "step": 4707 }, { "epoch": 0.7559409120102761, "grad_norm": 0.25287577509880066, "learning_rate": 0.0001, "loss": 1.3995, "step": 4708 }, { "epoch": 0.7561014771997431, "grad_norm": 0.25544285774230957, "learning_rate": 0.0001, "loss": 1.5004, "step": 4709 }, { "epoch": 0.75626204238921, "grad_norm": 0.25161290168762207, "learning_rate": 0.0001, "loss": 1.4749, "step": 4710 }, { "epoch": 0.756422607578677, "grad_norm": 0.280152291059494, "learning_rate": 0.0001, "loss": 1.4558, "step": 4711 }, { "epoch": 0.7565831727681439, "grad_norm": 0.2701803743839264, "learning_rate": 0.0001, "loss": 1.4781, "step": 4712 }, { "epoch": 0.7567437379576107, "grad_norm": 0.254133015871048, "learning_rate": 0.0001, "loss": 1.4657, "step": 4713 }, { "epoch": 0.7569043031470777, "grad_norm": 0.2725158929824829, "learning_rate": 0.0001, "loss": 1.5022, "step": 4714 }, { "epoch": 0.7570648683365446, "grad_norm": 0.2598293721675873, "learning_rate": 0.0001, "loss": 1.446, "step": 4715 }, { "epoch": 0.7572254335260116, "grad_norm": 0.26696163415908813, "learning_rate": 0.0001, "loss": 1.5129, "step": 4716 }, { "epoch": 0.7573859987154785, "grad_norm": 0.26862427592277527, "learning_rate": 0.0001, "loss": 1.4589, "step": 4717 }, { "epoch": 0.7575465639049455, "grad_norm": 0.2665650248527527, "learning_rate": 0.0001, "loss": 1.5172, "step": 4718 }, { "epoch": 0.7577071290944123, "grad_norm": 0.2625967860221863, "learning_rate": 0.0001, "loss": 1.4693, "step": 4719 }, { "epoch": 0.7578676942838792, "grad_norm": 0.25631463527679443, "learning_rate": 0.0001, "loss": 1.5017, "step": 4720 }, { "epoch": 0.7580282594733462, "grad_norm": 0.2516089081764221, "learning_rate": 0.0001, "loss": 1.4522, "step": 4721 }, { "epoch": 0.7581888246628131, "grad_norm": 0.25942090153694153, "learning_rate": 0.0001, "loss": 1.5062, "step": 4722 }, { "epoch": 0.7583493898522801, "grad_norm": 0.2715856432914734, "learning_rate": 0.0001, "loss": 1.455, "step": 4723 }, { "epoch": 0.7585099550417469, "grad_norm": 0.24603420495986938, "learning_rate": 0.0001, "loss": 1.4145, "step": 4724 }, { "epoch": 0.7586705202312138, "grad_norm": 0.2652493715286255, "learning_rate": 0.0001, "loss": 1.5064, "step": 4725 }, { "epoch": 0.7588310854206808, "grad_norm": 0.2675178647041321, "learning_rate": 0.0001, "loss": 1.4618, "step": 4726 }, { "epoch": 0.7589916506101477, "grad_norm": 0.25946810841560364, "learning_rate": 0.0001, "loss": 1.4265, "step": 4727 }, { "epoch": 0.7591522157996147, "grad_norm": 0.3071325421333313, "learning_rate": 0.0001, "loss": 1.6179, "step": 4728 }, { "epoch": 0.7593127809890816, "grad_norm": 0.2836286425590515, "learning_rate": 0.0001, "loss": 1.5251, "step": 4729 }, { "epoch": 0.7594733461785484, "grad_norm": 0.2590748369693756, "learning_rate": 0.0001, "loss": 1.4962, "step": 4730 }, { "epoch": 0.7596339113680154, "grad_norm": 0.2758234143257141, "learning_rate": 0.0001, "loss": 1.505, "step": 4731 }, { "epoch": 0.7597944765574823, "grad_norm": 0.2708079218864441, "learning_rate": 0.0001, "loss": 1.5904, "step": 4732 }, { "epoch": 0.7599550417469493, "grad_norm": 0.27538663148880005, "learning_rate": 0.0001, "loss": 1.5742, "step": 4733 }, { "epoch": 0.7601156069364162, "grad_norm": 0.2765510380268097, "learning_rate": 0.0001, "loss": 1.4783, "step": 4734 }, { "epoch": 0.760276172125883, "grad_norm": 0.25619640946388245, "learning_rate": 0.0001, "loss": 1.4826, "step": 4735 }, { "epoch": 0.76043673731535, "grad_norm": 0.2517492473125458, "learning_rate": 0.0001, "loss": 1.4504, "step": 4736 }, { "epoch": 0.7605973025048169, "grad_norm": 0.25144222378730774, "learning_rate": 0.0001, "loss": 1.4623, "step": 4737 }, { "epoch": 0.7607578676942839, "grad_norm": 0.26716700196266174, "learning_rate": 0.0001, "loss": 1.5116, "step": 4738 }, { "epoch": 0.7609184328837508, "grad_norm": 0.2455921471118927, "learning_rate": 0.0001, "loss": 1.4347, "step": 4739 }, { "epoch": 0.7610789980732178, "grad_norm": 0.2558843791484833, "learning_rate": 0.0001, "loss": 1.4003, "step": 4740 }, { "epoch": 0.7612395632626846, "grad_norm": 0.2663254141807556, "learning_rate": 0.0001, "loss": 1.4465, "step": 4741 }, { "epoch": 0.7614001284521515, "grad_norm": 0.2517986595630646, "learning_rate": 0.0001, "loss": 1.4526, "step": 4742 }, { "epoch": 0.7615606936416185, "grad_norm": 0.2688898742198944, "learning_rate": 0.0001, "loss": 1.5246, "step": 4743 }, { "epoch": 0.7617212588310854, "grad_norm": 0.2682409882545471, "learning_rate": 0.0001, "loss": 1.5623, "step": 4744 }, { "epoch": 0.7618818240205524, "grad_norm": 0.2704203128814697, "learning_rate": 0.0001, "loss": 1.4919, "step": 4745 }, { "epoch": 0.7620423892100193, "grad_norm": 0.27167192101478577, "learning_rate": 0.0001, "loss": 1.5474, "step": 4746 }, { "epoch": 0.7622029543994862, "grad_norm": 0.27285847067832947, "learning_rate": 0.0001, "loss": 1.5328, "step": 4747 }, { "epoch": 0.7623635195889531, "grad_norm": 0.25148075819015503, "learning_rate": 0.0001, "loss": 1.4122, "step": 4748 }, { "epoch": 0.76252408477842, "grad_norm": 0.31446388363838196, "learning_rate": 0.0001, "loss": 1.4981, "step": 4749 }, { "epoch": 0.762684649967887, "grad_norm": 0.2945290803909302, "learning_rate": 0.0001, "loss": 1.5408, "step": 4750 }, { "epoch": 0.7628452151573539, "grad_norm": 0.25229719281196594, "learning_rate": 0.0001, "loss": 1.4355, "step": 4751 }, { "epoch": 0.7630057803468208, "grad_norm": 0.2849136292934418, "learning_rate": 0.0001, "loss": 1.4678, "step": 4752 }, { "epoch": 0.7631663455362877, "grad_norm": 0.27459242939949036, "learning_rate": 0.0001, "loss": 1.4002, "step": 4753 }, { "epoch": 0.7633269107257546, "grad_norm": 0.2552124857902527, "learning_rate": 0.0001, "loss": 1.545, "step": 4754 }, { "epoch": 0.7634874759152216, "grad_norm": 0.2512931525707245, "learning_rate": 0.0001, "loss": 1.4921, "step": 4755 }, { "epoch": 0.7636480411046885, "grad_norm": 0.2796391248703003, "learning_rate": 0.0001, "loss": 1.5396, "step": 4756 }, { "epoch": 0.7638086062941555, "grad_norm": 0.2868058383464813, "learning_rate": 0.0001, "loss": 1.5936, "step": 4757 }, { "epoch": 0.7639691714836223, "grad_norm": 0.25818485021591187, "learning_rate": 0.0001, "loss": 1.4779, "step": 4758 }, { "epoch": 0.7641297366730893, "grad_norm": 0.2648962736129761, "learning_rate": 0.0001, "loss": 1.5517, "step": 4759 }, { "epoch": 0.7642903018625562, "grad_norm": 0.305493026971817, "learning_rate": 0.0001, "loss": 1.5399, "step": 4760 }, { "epoch": 0.7644508670520231, "grad_norm": 0.2696012854576111, "learning_rate": 0.0001, "loss": 1.4914, "step": 4761 }, { "epoch": 0.7646114322414901, "grad_norm": 0.26010653376579285, "learning_rate": 0.0001, "loss": 1.3876, "step": 4762 }, { "epoch": 0.7647719974309569, "grad_norm": 0.27666372060775757, "learning_rate": 0.0001, "loss": 1.494, "step": 4763 }, { "epoch": 0.7649325626204239, "grad_norm": 0.27314937114715576, "learning_rate": 0.0001, "loss": 1.4817, "step": 4764 }, { "epoch": 0.7650931278098908, "grad_norm": 0.2704566419124603, "learning_rate": 0.0001, "loss": 1.4727, "step": 4765 }, { "epoch": 0.7652536929993577, "grad_norm": 0.2436332404613495, "learning_rate": 0.0001, "loss": 1.4332, "step": 4766 }, { "epoch": 0.7654142581888247, "grad_norm": 0.28788483142852783, "learning_rate": 0.0001, "loss": 1.6135, "step": 4767 }, { "epoch": 0.7655748233782916, "grad_norm": 0.26671111583709717, "learning_rate": 0.0001, "loss": 1.4954, "step": 4768 }, { "epoch": 0.7657353885677585, "grad_norm": 0.2638619840145111, "learning_rate": 0.0001, "loss": 1.4693, "step": 4769 }, { "epoch": 0.7658959537572254, "grad_norm": 0.26721131801605225, "learning_rate": 0.0001, "loss": 1.4429, "step": 4770 }, { "epoch": 0.7660565189466924, "grad_norm": 0.2585608661174774, "learning_rate": 0.0001, "loss": 1.4793, "step": 4771 }, { "epoch": 0.7662170841361593, "grad_norm": 0.2614308297634125, "learning_rate": 0.0001, "loss": 1.5581, "step": 4772 }, { "epoch": 0.7663776493256262, "grad_norm": 0.3514977693557739, "learning_rate": 0.0001, "loss": 1.4327, "step": 4773 }, { "epoch": 0.7665382145150931, "grad_norm": 0.26077356934547424, "learning_rate": 0.0001, "loss": 1.4794, "step": 4774 }, { "epoch": 0.76669877970456, "grad_norm": 0.2752145826816559, "learning_rate": 0.0001, "loss": 1.4869, "step": 4775 }, { "epoch": 0.766859344894027, "grad_norm": 0.2619808316230774, "learning_rate": 0.0001, "loss": 1.49, "step": 4776 }, { "epoch": 0.7670199100834939, "grad_norm": 0.2664008140563965, "learning_rate": 0.0001, "loss": 1.524, "step": 4777 }, { "epoch": 0.7671804752729608, "grad_norm": 0.2534385323524475, "learning_rate": 0.0001, "loss": 1.4437, "step": 4778 }, { "epoch": 0.7673410404624278, "grad_norm": 0.26758888363838196, "learning_rate": 0.0001, "loss": 1.4656, "step": 4779 }, { "epoch": 0.7675016056518946, "grad_norm": 0.26431041955947876, "learning_rate": 0.0001, "loss": 1.4692, "step": 4780 }, { "epoch": 0.7676621708413616, "grad_norm": 0.27445584535598755, "learning_rate": 0.0001, "loss": 1.5469, "step": 4781 }, { "epoch": 0.7678227360308285, "grad_norm": 0.24833647906780243, "learning_rate": 0.0001, "loss": 1.4113, "step": 4782 }, { "epoch": 0.7679833012202955, "grad_norm": 0.2731981873512268, "learning_rate": 0.0001, "loss": 1.5062, "step": 4783 }, { "epoch": 0.7681438664097624, "grad_norm": 0.2720390856266022, "learning_rate": 0.0001, "loss": 1.5158, "step": 4784 }, { "epoch": 0.7683044315992292, "grad_norm": 0.2624323070049286, "learning_rate": 0.0001, "loss": 1.5222, "step": 4785 }, { "epoch": 0.7684649967886962, "grad_norm": 0.26187559962272644, "learning_rate": 0.0001, "loss": 1.4671, "step": 4786 }, { "epoch": 0.7686255619781631, "grad_norm": 0.2950212061405182, "learning_rate": 0.0001, "loss": 1.5074, "step": 4787 }, { "epoch": 0.7687861271676301, "grad_norm": 0.26819872856140137, "learning_rate": 0.0001, "loss": 1.5208, "step": 4788 }, { "epoch": 0.768946692357097, "grad_norm": 0.2585736811161041, "learning_rate": 0.0001, "loss": 1.3985, "step": 4789 }, { "epoch": 0.769107257546564, "grad_norm": 0.2602596580982208, "learning_rate": 0.0001, "loss": 1.5587, "step": 4790 }, { "epoch": 0.7692678227360308, "grad_norm": 0.2900967597961426, "learning_rate": 0.0001, "loss": 1.4779, "step": 4791 }, { "epoch": 0.7694283879254977, "grad_norm": 0.2687471807003021, "learning_rate": 0.0001, "loss": 1.4822, "step": 4792 }, { "epoch": 0.7695889531149647, "grad_norm": 0.286593496799469, "learning_rate": 0.0001, "loss": 1.4339, "step": 4793 }, { "epoch": 0.7697495183044316, "grad_norm": 0.2533605694770813, "learning_rate": 0.0001, "loss": 1.4495, "step": 4794 }, { "epoch": 0.7699100834938986, "grad_norm": 0.25842908024787903, "learning_rate": 0.0001, "loss": 1.5387, "step": 4795 }, { "epoch": 0.7700706486833655, "grad_norm": 0.2921748161315918, "learning_rate": 0.0001, "loss": 1.5241, "step": 4796 }, { "epoch": 0.7702312138728323, "grad_norm": 0.2845020592212677, "learning_rate": 0.0001, "loss": 1.5323, "step": 4797 }, { "epoch": 0.7703917790622993, "grad_norm": 0.2742638885974884, "learning_rate": 0.0001, "loss": 1.4803, "step": 4798 }, { "epoch": 0.7705523442517662, "grad_norm": 0.2830826938152313, "learning_rate": 0.0001, "loss": 1.4818, "step": 4799 }, { "epoch": 0.7707129094412332, "grad_norm": 0.27695101499557495, "learning_rate": 0.0001, "loss": 1.5127, "step": 4800 }, { "epoch": 0.7708734746307001, "grad_norm": 0.2770475447177887, "learning_rate": 0.0001, "loss": 1.4154, "step": 4801 }, { "epoch": 0.7710340398201669, "grad_norm": 0.26153096556663513, "learning_rate": 0.0001, "loss": 1.4582, "step": 4802 }, { "epoch": 0.7711946050096339, "grad_norm": 0.26818645000457764, "learning_rate": 0.0001, "loss": 1.4709, "step": 4803 }, { "epoch": 0.7713551701991008, "grad_norm": 0.27246400713920593, "learning_rate": 0.0001, "loss": 1.441, "step": 4804 }, { "epoch": 0.7715157353885678, "grad_norm": 0.26476332545280457, "learning_rate": 0.0001, "loss": 1.5333, "step": 4805 }, { "epoch": 0.7716763005780347, "grad_norm": 0.26698577404022217, "learning_rate": 0.0001, "loss": 1.5364, "step": 4806 }, { "epoch": 0.7718368657675017, "grad_norm": 0.2695167660713196, "learning_rate": 0.0001, "loss": 1.4789, "step": 4807 }, { "epoch": 0.7719974309569685, "grad_norm": 0.2772916853427887, "learning_rate": 0.0001, "loss": 1.502, "step": 4808 }, { "epoch": 0.7721579961464354, "grad_norm": 0.2715173363685608, "learning_rate": 0.0001, "loss": 1.4884, "step": 4809 }, { "epoch": 0.7723185613359024, "grad_norm": 0.27370476722717285, "learning_rate": 0.0001, "loss": 1.4717, "step": 4810 }, { "epoch": 0.7724791265253693, "grad_norm": 0.26012903451919556, "learning_rate": 0.0001, "loss": 1.5177, "step": 4811 }, { "epoch": 0.7726396917148363, "grad_norm": 0.2607390582561493, "learning_rate": 0.0001, "loss": 1.4756, "step": 4812 }, { "epoch": 0.7728002569043031, "grad_norm": 0.28200775384902954, "learning_rate": 0.0001, "loss": 1.524, "step": 4813 }, { "epoch": 0.77296082209377, "grad_norm": 0.2612951695919037, "learning_rate": 0.0001, "loss": 1.4588, "step": 4814 }, { "epoch": 0.773121387283237, "grad_norm": 0.26932767033576965, "learning_rate": 0.0001, "loss": 1.4984, "step": 4815 }, { "epoch": 0.7732819524727039, "grad_norm": 0.26361799240112305, "learning_rate": 0.0001, "loss": 1.4578, "step": 4816 }, { "epoch": 0.7734425176621709, "grad_norm": 0.2659071385860443, "learning_rate": 0.0001, "loss": 1.5742, "step": 4817 }, { "epoch": 0.7736030828516378, "grad_norm": 0.26427409052848816, "learning_rate": 0.0001, "loss": 1.5407, "step": 4818 }, { "epoch": 0.7737636480411046, "grad_norm": 0.2671090066432953, "learning_rate": 0.0001, "loss": 1.5054, "step": 4819 }, { "epoch": 0.7739242132305716, "grad_norm": 0.26824119687080383, "learning_rate": 0.0001, "loss": 1.527, "step": 4820 }, { "epoch": 0.7740847784200385, "grad_norm": 0.25563180446624756, "learning_rate": 0.0001, "loss": 1.4901, "step": 4821 }, { "epoch": 0.7742453436095055, "grad_norm": 0.25880300998687744, "learning_rate": 0.0001, "loss": 1.4817, "step": 4822 }, { "epoch": 0.7744059087989724, "grad_norm": 0.2709629237651825, "learning_rate": 0.0001, "loss": 1.6155, "step": 4823 }, { "epoch": 0.7745664739884393, "grad_norm": 0.26997342705726624, "learning_rate": 0.0001, "loss": 1.5066, "step": 4824 }, { "epoch": 0.7747270391779062, "grad_norm": 0.26577097177505493, "learning_rate": 0.0001, "loss": 1.5907, "step": 4825 }, { "epoch": 0.7748876043673731, "grad_norm": 0.26206445693969727, "learning_rate": 0.0001, "loss": 1.5525, "step": 4826 }, { "epoch": 0.7750481695568401, "grad_norm": 0.26891064643859863, "learning_rate": 0.0001, "loss": 1.585, "step": 4827 }, { "epoch": 0.775208734746307, "grad_norm": 0.2602813243865967, "learning_rate": 0.0001, "loss": 1.5347, "step": 4828 }, { "epoch": 0.775369299935774, "grad_norm": 0.2580989897251129, "learning_rate": 0.0001, "loss": 1.4591, "step": 4829 }, { "epoch": 0.7755298651252408, "grad_norm": 0.2754908800125122, "learning_rate": 0.0001, "loss": 1.5025, "step": 4830 }, { "epoch": 0.7756904303147077, "grad_norm": 0.4796976149082184, "learning_rate": 0.0001, "loss": 1.5579, "step": 4831 }, { "epoch": 0.7758509955041747, "grad_norm": 0.2524743378162384, "learning_rate": 0.0001, "loss": 1.3753, "step": 4832 }, { "epoch": 0.7760115606936416, "grad_norm": 0.25471341609954834, "learning_rate": 0.0001, "loss": 1.4611, "step": 4833 }, { "epoch": 0.7761721258831086, "grad_norm": 0.27099505066871643, "learning_rate": 0.0001, "loss": 1.5078, "step": 4834 }, { "epoch": 0.7763326910725755, "grad_norm": 0.28913718461990356, "learning_rate": 0.0001, "loss": 1.5626, "step": 4835 }, { "epoch": 0.7764932562620424, "grad_norm": 0.29221460223197937, "learning_rate": 0.0001, "loss": 1.5956, "step": 4836 }, { "epoch": 0.7766538214515093, "grad_norm": 0.26697152853012085, "learning_rate": 0.0001, "loss": 1.5113, "step": 4837 }, { "epoch": 0.7768143866409762, "grad_norm": 0.26228007674217224, "learning_rate": 0.0001, "loss": 1.4794, "step": 4838 }, { "epoch": 0.7769749518304432, "grad_norm": 0.2753364145755768, "learning_rate": 0.0001, "loss": 1.5021, "step": 4839 }, { "epoch": 0.7771355170199101, "grad_norm": 0.27867865562438965, "learning_rate": 0.0001, "loss": 1.4644, "step": 4840 }, { "epoch": 0.777296082209377, "grad_norm": 0.276676207780838, "learning_rate": 0.0001, "loss": 1.4687, "step": 4841 }, { "epoch": 0.7774566473988439, "grad_norm": 0.26723018288612366, "learning_rate": 0.0001, "loss": 1.4774, "step": 4842 }, { "epoch": 0.7776172125883108, "grad_norm": 0.2919076383113861, "learning_rate": 0.0001, "loss": 1.4366, "step": 4843 }, { "epoch": 0.7777777777777778, "grad_norm": 0.257845014333725, "learning_rate": 0.0001, "loss": 1.4358, "step": 4844 }, { "epoch": 0.7779383429672447, "grad_norm": 0.2613871395587921, "learning_rate": 0.0001, "loss": 1.4362, "step": 4845 }, { "epoch": 0.7780989081567117, "grad_norm": 0.2684071362018585, "learning_rate": 0.0001, "loss": 1.4661, "step": 4846 }, { "epoch": 0.7782594733461785, "grad_norm": 0.2790563702583313, "learning_rate": 0.0001, "loss": 1.5563, "step": 4847 }, { "epoch": 0.7784200385356455, "grad_norm": 0.27308058738708496, "learning_rate": 0.0001, "loss": 1.4427, "step": 4848 }, { "epoch": 0.7785806037251124, "grad_norm": 0.2813565135002136, "learning_rate": 0.0001, "loss": 1.544, "step": 4849 }, { "epoch": 0.7787411689145793, "grad_norm": 0.27245306968688965, "learning_rate": 0.0001, "loss": 1.5118, "step": 4850 }, { "epoch": 0.7789017341040463, "grad_norm": 0.2604306936264038, "learning_rate": 0.0001, "loss": 1.4451, "step": 4851 }, { "epoch": 0.7790622992935131, "grad_norm": 0.24787943065166473, "learning_rate": 0.0001, "loss": 1.4135, "step": 4852 }, { "epoch": 0.7792228644829801, "grad_norm": 0.26028093695640564, "learning_rate": 0.0001, "loss": 1.4898, "step": 4853 }, { "epoch": 0.779383429672447, "grad_norm": 0.2668739855289459, "learning_rate": 0.0001, "loss": 1.4557, "step": 4854 }, { "epoch": 0.779543994861914, "grad_norm": 0.2671746015548706, "learning_rate": 0.0001, "loss": 1.5064, "step": 4855 }, { "epoch": 0.7797045600513809, "grad_norm": 0.25071027874946594, "learning_rate": 0.0001, "loss": 1.4033, "step": 4856 }, { "epoch": 0.7798651252408478, "grad_norm": 0.27966752648353577, "learning_rate": 0.0001, "loss": 1.5315, "step": 4857 }, { "epoch": 0.7800256904303147, "grad_norm": 0.264328271150589, "learning_rate": 0.0001, "loss": 1.4493, "step": 4858 }, { "epoch": 0.7801862556197816, "grad_norm": 0.2697535753250122, "learning_rate": 0.0001, "loss": 1.4789, "step": 4859 }, { "epoch": 0.7803468208092486, "grad_norm": 0.2685675024986267, "learning_rate": 0.0001, "loss": 1.4747, "step": 4860 }, { "epoch": 0.7805073859987155, "grad_norm": 0.25597450137138367, "learning_rate": 0.0001, "loss": 1.5201, "step": 4861 }, { "epoch": 0.7806679511881824, "grad_norm": 0.2750207483768463, "learning_rate": 0.0001, "loss": 1.4902, "step": 4862 }, { "epoch": 0.7808285163776493, "grad_norm": 0.2637827694416046, "learning_rate": 0.0001, "loss": 1.534, "step": 4863 }, { "epoch": 0.7809890815671162, "grad_norm": 0.2612398564815521, "learning_rate": 0.0001, "loss": 1.4627, "step": 4864 }, { "epoch": 0.7811496467565832, "grad_norm": 0.25394272804260254, "learning_rate": 0.0001, "loss": 1.452, "step": 4865 }, { "epoch": 0.7813102119460501, "grad_norm": 0.2608264684677124, "learning_rate": 0.0001, "loss": 1.4986, "step": 4866 }, { "epoch": 0.781470777135517, "grad_norm": 0.2770000398159027, "learning_rate": 0.0001, "loss": 1.5841, "step": 4867 }, { "epoch": 0.781631342324984, "grad_norm": 0.2652257978916168, "learning_rate": 0.0001, "loss": 1.5138, "step": 4868 }, { "epoch": 0.7817919075144508, "grad_norm": 0.2749718129634857, "learning_rate": 0.0001, "loss": 1.4816, "step": 4869 }, { "epoch": 0.7819524727039178, "grad_norm": 0.25634610652923584, "learning_rate": 0.0001, "loss": 1.5285, "step": 4870 }, { "epoch": 0.7821130378933847, "grad_norm": 0.26548346877098083, "learning_rate": 0.0001, "loss": 1.4286, "step": 4871 }, { "epoch": 0.7822736030828517, "grad_norm": 0.2843562066555023, "learning_rate": 0.0001, "loss": 1.5951, "step": 4872 }, { "epoch": 0.7824341682723186, "grad_norm": 0.2538413107395172, "learning_rate": 0.0001, "loss": 1.4865, "step": 4873 }, { "epoch": 0.7825947334617854, "grad_norm": 0.2846985161304474, "learning_rate": 0.0001, "loss": 1.5546, "step": 4874 }, { "epoch": 0.7827552986512524, "grad_norm": 0.27610915899276733, "learning_rate": 0.0001, "loss": 1.5497, "step": 4875 }, { "epoch": 0.7829158638407193, "grad_norm": 0.27627769112586975, "learning_rate": 0.0001, "loss": 1.5147, "step": 4876 }, { "epoch": 0.7830764290301863, "grad_norm": 0.2938385307788849, "learning_rate": 0.0001, "loss": 1.4364, "step": 4877 }, { "epoch": 0.7832369942196532, "grad_norm": 0.2528034746646881, "learning_rate": 0.0001, "loss": 1.4407, "step": 4878 }, { "epoch": 0.7833975594091201, "grad_norm": 0.2692616581916809, "learning_rate": 0.0001, "loss": 1.4386, "step": 4879 }, { "epoch": 0.783558124598587, "grad_norm": 0.26693812012672424, "learning_rate": 0.0001, "loss": 1.5488, "step": 4880 }, { "epoch": 0.7837186897880539, "grad_norm": 0.3015109598636627, "learning_rate": 0.0001, "loss": 1.4796, "step": 4881 }, { "epoch": 0.7838792549775209, "grad_norm": 0.29318591952323914, "learning_rate": 0.0001, "loss": 1.5622, "step": 4882 }, { "epoch": 0.7840398201669878, "grad_norm": 0.2597436308860779, "learning_rate": 0.0001, "loss": 1.4494, "step": 4883 }, { "epoch": 0.7842003853564548, "grad_norm": 0.25885215401649475, "learning_rate": 0.0001, "loss": 1.5154, "step": 4884 }, { "epoch": 0.7843609505459217, "grad_norm": 0.2527919411659241, "learning_rate": 0.0001, "loss": 1.4266, "step": 4885 }, { "epoch": 0.7845215157353885, "grad_norm": 0.288007527589798, "learning_rate": 0.0001, "loss": 1.5183, "step": 4886 }, { "epoch": 0.7846820809248555, "grad_norm": 0.2749107778072357, "learning_rate": 0.0001, "loss": 1.4981, "step": 4887 }, { "epoch": 0.7848426461143224, "grad_norm": 0.2630927264690399, "learning_rate": 0.0001, "loss": 1.4514, "step": 4888 }, { "epoch": 0.7850032113037894, "grad_norm": 0.27400052547454834, "learning_rate": 0.0001, "loss": 1.5732, "step": 4889 }, { "epoch": 0.7851637764932563, "grad_norm": 0.2656361758708954, "learning_rate": 0.0001, "loss": 1.4284, "step": 4890 }, { "epoch": 0.7853243416827231, "grad_norm": 0.26750198006629944, "learning_rate": 0.0001, "loss": 1.4855, "step": 4891 }, { "epoch": 0.7854849068721901, "grad_norm": 0.2690584361553192, "learning_rate": 0.0001, "loss": 1.5387, "step": 4892 }, { "epoch": 0.785645472061657, "grad_norm": 0.2951248586177826, "learning_rate": 0.0001, "loss": 1.4459, "step": 4893 }, { "epoch": 0.785806037251124, "grad_norm": 0.2853166162967682, "learning_rate": 0.0001, "loss": 1.5257, "step": 4894 }, { "epoch": 0.7859666024405909, "grad_norm": 0.25854942202568054, "learning_rate": 0.0001, "loss": 1.3991, "step": 4895 }, { "epoch": 0.7861271676300579, "grad_norm": 0.27628210186958313, "learning_rate": 0.0001, "loss": 1.5019, "step": 4896 }, { "epoch": 0.7862877328195247, "grad_norm": 0.2692117989063263, "learning_rate": 0.0001, "loss": 1.453, "step": 4897 }, { "epoch": 0.7864482980089916, "grad_norm": 0.2619384527206421, "learning_rate": 0.0001, "loss": 1.474, "step": 4898 }, { "epoch": 0.7866088631984586, "grad_norm": 0.26570191979408264, "learning_rate": 0.0001, "loss": 1.4745, "step": 4899 }, { "epoch": 0.7867694283879255, "grad_norm": 0.2800072729587555, "learning_rate": 0.0001, "loss": 1.5106, "step": 4900 }, { "epoch": 0.7869299935773925, "grad_norm": 0.2808997631072998, "learning_rate": 0.0001, "loss": 1.526, "step": 4901 }, { "epoch": 0.7870905587668593, "grad_norm": 0.2560919523239136, "learning_rate": 0.0001, "loss": 1.4728, "step": 4902 }, { "epoch": 0.7872511239563262, "grad_norm": 0.27413082122802734, "learning_rate": 0.0001, "loss": 1.5347, "step": 4903 }, { "epoch": 0.7874116891457932, "grad_norm": 0.26692163944244385, "learning_rate": 0.0001, "loss": 1.4403, "step": 4904 }, { "epoch": 0.7875722543352601, "grad_norm": 0.28223931789398193, "learning_rate": 0.0001, "loss": 1.4638, "step": 4905 }, { "epoch": 0.7877328195247271, "grad_norm": 0.2553137242794037, "learning_rate": 0.0001, "loss": 1.4612, "step": 4906 }, { "epoch": 0.787893384714194, "grad_norm": 0.30425265431404114, "learning_rate": 0.0001, "loss": 1.5459, "step": 4907 }, { "epoch": 0.7880539499036608, "grad_norm": 0.25780966877937317, "learning_rate": 0.0001, "loss": 1.3951, "step": 4908 }, { "epoch": 0.7882145150931278, "grad_norm": 0.27276623249053955, "learning_rate": 0.0001, "loss": 1.5178, "step": 4909 }, { "epoch": 0.7883750802825947, "grad_norm": 0.27824667096138, "learning_rate": 0.0001, "loss": 1.4709, "step": 4910 }, { "epoch": 0.7885356454720617, "grad_norm": 0.27756956219673157, "learning_rate": 0.0001, "loss": 1.53, "step": 4911 }, { "epoch": 0.7886962106615286, "grad_norm": 0.2589087188243866, "learning_rate": 0.0001, "loss": 1.42, "step": 4912 }, { "epoch": 0.7888567758509955, "grad_norm": 0.26565444469451904, "learning_rate": 0.0001, "loss": 1.4952, "step": 4913 }, { "epoch": 0.7890173410404624, "grad_norm": 0.272074431180954, "learning_rate": 0.0001, "loss": 1.4637, "step": 4914 }, { "epoch": 0.7891779062299293, "grad_norm": 0.257182240486145, "learning_rate": 0.0001, "loss": 1.5132, "step": 4915 }, { "epoch": 0.7893384714193963, "grad_norm": 0.2661198079586029, "learning_rate": 0.0001, "loss": 1.4332, "step": 4916 }, { "epoch": 0.7894990366088632, "grad_norm": 0.3132728040218353, "learning_rate": 0.0001, "loss": 1.4547, "step": 4917 }, { "epoch": 0.7896596017983302, "grad_norm": 0.27426013350486755, "learning_rate": 0.0001, "loss": 1.4991, "step": 4918 }, { "epoch": 0.789820166987797, "grad_norm": 0.27901703119277954, "learning_rate": 0.0001, "loss": 1.5383, "step": 4919 }, { "epoch": 0.789980732177264, "grad_norm": 0.25175678730010986, "learning_rate": 0.0001, "loss": 1.5175, "step": 4920 }, { "epoch": 0.7901412973667309, "grad_norm": 0.2794884145259857, "learning_rate": 0.0001, "loss": 1.5227, "step": 4921 }, { "epoch": 0.7903018625561978, "grad_norm": 0.25591138005256653, "learning_rate": 0.0001, "loss": 1.4893, "step": 4922 }, { "epoch": 0.7904624277456648, "grad_norm": 0.26282110810279846, "learning_rate": 0.0001, "loss": 1.4878, "step": 4923 }, { "epoch": 0.7906229929351317, "grad_norm": 0.3139071464538574, "learning_rate": 0.0001, "loss": 1.4877, "step": 4924 }, { "epoch": 0.7907835581245986, "grad_norm": 0.2633921504020691, "learning_rate": 0.0001, "loss": 1.3999, "step": 4925 }, { "epoch": 0.7909441233140655, "grad_norm": 0.2790989577770233, "learning_rate": 0.0001, "loss": 1.4996, "step": 4926 }, { "epoch": 0.7911046885035324, "grad_norm": 0.26268425583839417, "learning_rate": 0.0001, "loss": 1.4982, "step": 4927 }, { "epoch": 0.7912652536929994, "grad_norm": 0.27156296372413635, "learning_rate": 0.0001, "loss": 1.445, "step": 4928 }, { "epoch": 0.7914258188824663, "grad_norm": 0.250273734331131, "learning_rate": 0.0001, "loss": 1.4499, "step": 4929 }, { "epoch": 0.7915863840719332, "grad_norm": 0.2585732042789459, "learning_rate": 0.0001, "loss": 1.419, "step": 4930 }, { "epoch": 0.7917469492614001, "grad_norm": 0.2718723714351654, "learning_rate": 0.0001, "loss": 1.4687, "step": 4931 }, { "epoch": 0.791907514450867, "grad_norm": 0.26102644205093384, "learning_rate": 0.0001, "loss": 1.4524, "step": 4932 }, { "epoch": 0.792068079640334, "grad_norm": 0.2890603840351105, "learning_rate": 0.0001, "loss": 1.4379, "step": 4933 }, { "epoch": 0.7922286448298009, "grad_norm": 0.28953224420547485, "learning_rate": 0.0001, "loss": 1.5449, "step": 4934 }, { "epoch": 0.7923892100192679, "grad_norm": 0.26467910408973694, "learning_rate": 0.0001, "loss": 1.4644, "step": 4935 }, { "epoch": 0.7925497752087347, "grad_norm": 0.610118567943573, "learning_rate": 0.0001, "loss": 1.4814, "step": 4936 }, { "epoch": 0.7927103403982017, "grad_norm": 0.2836211323738098, "learning_rate": 0.0001, "loss": 1.5604, "step": 4937 }, { "epoch": 0.7928709055876686, "grad_norm": 0.26038748025894165, "learning_rate": 0.0001, "loss": 1.4935, "step": 4938 }, { "epoch": 0.7930314707771355, "grad_norm": 0.2619526982307434, "learning_rate": 0.0001, "loss": 1.5362, "step": 4939 }, { "epoch": 0.7931920359666025, "grad_norm": 0.2593645751476288, "learning_rate": 0.0001, "loss": 1.3833, "step": 4940 }, { "epoch": 0.7933526011560693, "grad_norm": 0.27356091141700745, "learning_rate": 0.0001, "loss": 1.4723, "step": 4941 }, { "epoch": 0.7935131663455363, "grad_norm": 0.3041076362133026, "learning_rate": 0.0001, "loss": 1.4685, "step": 4942 }, { "epoch": 0.7936737315350032, "grad_norm": 0.2665919065475464, "learning_rate": 0.0001, "loss": 1.4729, "step": 4943 }, { "epoch": 0.7938342967244701, "grad_norm": 0.27540159225463867, "learning_rate": 0.0001, "loss": 1.4914, "step": 4944 }, { "epoch": 0.7939948619139371, "grad_norm": 0.2577289938926697, "learning_rate": 0.0001, "loss": 1.4944, "step": 4945 }, { "epoch": 0.794155427103404, "grad_norm": 0.261925607919693, "learning_rate": 0.0001, "loss": 1.451, "step": 4946 }, { "epoch": 0.7943159922928709, "grad_norm": 0.2669730484485626, "learning_rate": 0.0001, "loss": 1.5532, "step": 4947 }, { "epoch": 0.7944765574823378, "grad_norm": 0.2583950459957123, "learning_rate": 0.0001, "loss": 1.4655, "step": 4948 }, { "epoch": 0.7946371226718048, "grad_norm": 0.2625668942928314, "learning_rate": 0.0001, "loss": 1.4777, "step": 4949 }, { "epoch": 0.7947976878612717, "grad_norm": 0.2661846876144409, "learning_rate": 0.0001, "loss": 1.5237, "step": 4950 }, { "epoch": 0.7949582530507386, "grad_norm": 0.2708057463169098, "learning_rate": 0.0001, "loss": 1.5202, "step": 4951 }, { "epoch": 0.7951188182402055, "grad_norm": 0.26769402623176575, "learning_rate": 0.0001, "loss": 1.5222, "step": 4952 }, { "epoch": 0.7952793834296724, "grad_norm": 0.267253577709198, "learning_rate": 0.0001, "loss": 1.4605, "step": 4953 }, { "epoch": 0.7954399486191394, "grad_norm": 0.2620091438293457, "learning_rate": 0.0001, "loss": 1.4502, "step": 4954 }, { "epoch": 0.7956005138086063, "grad_norm": 0.27369511127471924, "learning_rate": 0.0001, "loss": 1.4489, "step": 4955 }, { "epoch": 0.7957610789980732, "grad_norm": 0.2532361149787903, "learning_rate": 0.0001, "loss": 1.4876, "step": 4956 }, { "epoch": 0.7959216441875402, "grad_norm": 0.258556991815567, "learning_rate": 0.0001, "loss": 1.4314, "step": 4957 }, { "epoch": 0.796082209377007, "grad_norm": 0.2675907611846924, "learning_rate": 0.0001, "loss": 1.4406, "step": 4958 }, { "epoch": 0.796242774566474, "grad_norm": 0.2844993770122528, "learning_rate": 0.0001, "loss": 1.4963, "step": 4959 }, { "epoch": 0.7964033397559409, "grad_norm": 0.2730778455734253, "learning_rate": 0.0001, "loss": 1.5229, "step": 4960 }, { "epoch": 0.7965639049454079, "grad_norm": 0.29573535919189453, "learning_rate": 0.0001, "loss": 1.5178, "step": 4961 }, { "epoch": 0.7967244701348748, "grad_norm": 0.27346470952033997, "learning_rate": 0.0001, "loss": 1.4969, "step": 4962 }, { "epoch": 0.7968850353243416, "grad_norm": 0.29766732454299927, "learning_rate": 0.0001, "loss": 1.4557, "step": 4963 }, { "epoch": 0.7970456005138086, "grad_norm": 0.262014776468277, "learning_rate": 0.0001, "loss": 1.4524, "step": 4964 }, { "epoch": 0.7972061657032755, "grad_norm": 0.2678148150444031, "learning_rate": 0.0001, "loss": 1.4671, "step": 4965 }, { "epoch": 0.7973667308927425, "grad_norm": 0.2627437114715576, "learning_rate": 0.0001, "loss": 1.4819, "step": 4966 }, { "epoch": 0.7975272960822094, "grad_norm": 0.2854844927787781, "learning_rate": 0.0001, "loss": 1.4952, "step": 4967 }, { "epoch": 0.7976878612716763, "grad_norm": 0.26114368438720703, "learning_rate": 0.0001, "loss": 1.4042, "step": 4968 }, { "epoch": 0.7978484264611432, "grad_norm": 0.2642273008823395, "learning_rate": 0.0001, "loss": 1.5096, "step": 4969 }, { "epoch": 0.7980089916506101, "grad_norm": 0.25962576270103455, "learning_rate": 0.0001, "loss": 1.3862, "step": 4970 }, { "epoch": 0.7981695568400771, "grad_norm": 0.27187272906303406, "learning_rate": 0.0001, "loss": 1.4273, "step": 4971 }, { "epoch": 0.798330122029544, "grad_norm": 0.26994588971138, "learning_rate": 0.0001, "loss": 1.5165, "step": 4972 }, { "epoch": 0.798490687219011, "grad_norm": 0.2657373249530792, "learning_rate": 0.0001, "loss": 1.5219, "step": 4973 }, { "epoch": 0.7986512524084779, "grad_norm": 0.2619072496891022, "learning_rate": 0.0001, "loss": 1.4555, "step": 4974 }, { "epoch": 0.7988118175979447, "grad_norm": 0.283348947763443, "learning_rate": 0.0001, "loss": 1.5939, "step": 4975 }, { "epoch": 0.7989723827874117, "grad_norm": 0.2702791392803192, "learning_rate": 0.0001, "loss": 1.484, "step": 4976 }, { "epoch": 0.7991329479768786, "grad_norm": 0.26904091238975525, "learning_rate": 0.0001, "loss": 1.4473, "step": 4977 }, { "epoch": 0.7992935131663456, "grad_norm": 0.2811717987060547, "learning_rate": 0.0001, "loss": 1.445, "step": 4978 }, { "epoch": 0.7994540783558125, "grad_norm": 0.28999173641204834, "learning_rate": 0.0001, "loss": 1.5023, "step": 4979 }, { "epoch": 0.7996146435452793, "grad_norm": 0.2871098518371582, "learning_rate": 0.0001, "loss": 1.4955, "step": 4980 }, { "epoch": 0.7997752087347463, "grad_norm": 0.2720760107040405, "learning_rate": 0.0001, "loss": 1.4889, "step": 4981 }, { "epoch": 0.7999357739242132, "grad_norm": 0.25334689021110535, "learning_rate": 0.0001, "loss": 1.3643, "step": 4982 }, { "epoch": 0.8000963391136802, "grad_norm": 0.2747771441936493, "learning_rate": 0.0001, "loss": 1.4334, "step": 4983 }, { "epoch": 0.8002569043031471, "grad_norm": 0.2666880190372467, "learning_rate": 0.0001, "loss": 1.578, "step": 4984 }, { "epoch": 0.800417469492614, "grad_norm": 0.279110848903656, "learning_rate": 0.0001, "loss": 1.4749, "step": 4985 }, { "epoch": 0.8005780346820809, "grad_norm": 0.27905863523483276, "learning_rate": 0.0001, "loss": 1.4997, "step": 4986 }, { "epoch": 0.8007385998715478, "grad_norm": 0.2762279212474823, "learning_rate": 0.0001, "loss": 1.5345, "step": 4987 }, { "epoch": 0.8008991650610148, "grad_norm": 0.2663026750087738, "learning_rate": 0.0001, "loss": 1.4433, "step": 4988 }, { "epoch": 0.8010597302504817, "grad_norm": 0.275191068649292, "learning_rate": 0.0001, "loss": 1.5455, "step": 4989 }, { "epoch": 0.8012202954399487, "grad_norm": 0.2972000241279602, "learning_rate": 0.0001, "loss": 1.5177, "step": 4990 }, { "epoch": 0.8013808606294155, "grad_norm": 0.2799358367919922, "learning_rate": 0.0001, "loss": 1.582, "step": 4991 }, { "epoch": 0.8015414258188824, "grad_norm": 0.28580302000045776, "learning_rate": 0.0001, "loss": 1.5184, "step": 4992 }, { "epoch": 0.8017019910083494, "grad_norm": 0.26291462779045105, "learning_rate": 0.0001, "loss": 1.4824, "step": 4993 }, { "epoch": 0.8018625561978163, "grad_norm": 0.29605501890182495, "learning_rate": 0.0001, "loss": 1.5611, "step": 4994 }, { "epoch": 0.8020231213872833, "grad_norm": 0.2709025740623474, "learning_rate": 0.0001, "loss": 1.5195, "step": 4995 }, { "epoch": 0.8021836865767502, "grad_norm": 0.26770129799842834, "learning_rate": 0.0001, "loss": 1.4864, "step": 4996 }, { "epoch": 0.802344251766217, "grad_norm": 0.26983755826950073, "learning_rate": 0.0001, "loss": 1.511, "step": 4997 }, { "epoch": 0.802504816955684, "grad_norm": 0.2597654163837433, "learning_rate": 0.0001, "loss": 1.5056, "step": 4998 }, { "epoch": 0.8026653821451509, "grad_norm": 0.274734765291214, "learning_rate": 0.0001, "loss": 1.5336, "step": 4999 }, { "epoch": 0.8028259473346179, "grad_norm": 0.2811043858528137, "learning_rate": 0.0001, "loss": 1.4991, "step": 5000 }, { "epoch": 0.8029865125240848, "grad_norm": 0.26507386565208435, "learning_rate": 0.0001, "loss": 1.3735, "step": 5001 }, { "epoch": 0.8031470777135516, "grad_norm": 0.27205589413642883, "learning_rate": 0.0001, "loss": 1.4867, "step": 5002 }, { "epoch": 0.8033076429030186, "grad_norm": 0.2646946609020233, "learning_rate": 0.0001, "loss": 1.505, "step": 5003 }, { "epoch": 0.8034682080924855, "grad_norm": 0.27561819553375244, "learning_rate": 0.0001, "loss": 1.522, "step": 5004 }, { "epoch": 0.8036287732819525, "grad_norm": 0.25112950801849365, "learning_rate": 0.0001, "loss": 1.4336, "step": 5005 }, { "epoch": 0.8037893384714194, "grad_norm": 0.2693318724632263, "learning_rate": 0.0001, "loss": 1.4009, "step": 5006 }, { "epoch": 0.8039499036608864, "grad_norm": 0.2844517230987549, "learning_rate": 0.0001, "loss": 1.5084, "step": 5007 }, { "epoch": 0.8041104688503532, "grad_norm": 2.04303240776062, "learning_rate": 0.0001, "loss": 1.4859, "step": 5008 }, { "epoch": 0.8042710340398201, "grad_norm": 0.2620633542537689, "learning_rate": 0.0001, "loss": 1.5667, "step": 5009 }, { "epoch": 0.8044315992292871, "grad_norm": 0.28380200266838074, "learning_rate": 0.0001, "loss": 1.4166, "step": 5010 }, { "epoch": 0.804592164418754, "grad_norm": 0.31242579221725464, "learning_rate": 0.0001, "loss": 1.5084, "step": 5011 }, { "epoch": 0.804752729608221, "grad_norm": 0.28257498145103455, "learning_rate": 0.0001, "loss": 1.4796, "step": 5012 }, { "epoch": 0.8049132947976878, "grad_norm": 0.3021131455898285, "learning_rate": 0.0001, "loss": 1.4714, "step": 5013 }, { "epoch": 0.8050738599871547, "grad_norm": 0.2672417461872101, "learning_rate": 0.0001, "loss": 1.4839, "step": 5014 }, { "epoch": 0.8052344251766217, "grad_norm": 0.27186620235443115, "learning_rate": 0.0001, "loss": 1.4964, "step": 5015 }, { "epoch": 0.8053949903660886, "grad_norm": 0.26172634959220886, "learning_rate": 0.0001, "loss": 1.4633, "step": 5016 }, { "epoch": 0.8055555555555556, "grad_norm": 0.2712869346141815, "learning_rate": 0.0001, "loss": 1.4988, "step": 5017 }, { "epoch": 0.8057161207450225, "grad_norm": 0.2918786108493805, "learning_rate": 0.0001, "loss": 1.5346, "step": 5018 }, { "epoch": 0.8058766859344894, "grad_norm": 0.28895458579063416, "learning_rate": 0.0001, "loss": 1.4888, "step": 5019 }, { "epoch": 0.8060372511239563, "grad_norm": 0.27583470940589905, "learning_rate": 0.0001, "loss": 1.4827, "step": 5020 }, { "epoch": 0.8061978163134232, "grad_norm": 0.26342907547950745, "learning_rate": 0.0001, "loss": 1.4998, "step": 5021 }, { "epoch": 0.8063583815028902, "grad_norm": 0.2677418887615204, "learning_rate": 0.0001, "loss": 1.4933, "step": 5022 }, { "epoch": 0.8065189466923571, "grad_norm": 0.289798378944397, "learning_rate": 0.0001, "loss": 1.5267, "step": 5023 }, { "epoch": 0.8066795118818241, "grad_norm": 0.28941789269447327, "learning_rate": 0.0001, "loss": 1.5593, "step": 5024 }, { "epoch": 0.8068400770712909, "grad_norm": 0.270546019077301, "learning_rate": 0.0001, "loss": 1.5388, "step": 5025 }, { "epoch": 0.8070006422607579, "grad_norm": 0.2650315463542938, "learning_rate": 0.0001, "loss": 1.4661, "step": 5026 }, { "epoch": 0.8071612074502248, "grad_norm": 0.289795458316803, "learning_rate": 0.0001, "loss": 1.5281, "step": 5027 }, { "epoch": 0.8073217726396917, "grad_norm": 0.28381696343421936, "learning_rate": 0.0001, "loss": 1.5533, "step": 5028 }, { "epoch": 0.8074823378291587, "grad_norm": 0.2664477527141571, "learning_rate": 0.0001, "loss": 1.4749, "step": 5029 }, { "epoch": 0.8076429030186255, "grad_norm": 0.26828449964523315, "learning_rate": 0.0001, "loss": 1.5559, "step": 5030 }, { "epoch": 0.8078034682080925, "grad_norm": 0.28125303983688354, "learning_rate": 0.0001, "loss": 1.4843, "step": 5031 }, { "epoch": 0.8079640333975594, "grad_norm": 0.32039839029312134, "learning_rate": 0.0001, "loss": 1.4856, "step": 5032 }, { "epoch": 0.8081245985870263, "grad_norm": 0.26907145977020264, "learning_rate": 0.0001, "loss": 1.4669, "step": 5033 }, { "epoch": 0.8082851637764933, "grad_norm": 0.258063405752182, "learning_rate": 0.0001, "loss": 1.5126, "step": 5034 }, { "epoch": 0.8084457289659602, "grad_norm": 0.2588548958301544, "learning_rate": 0.0001, "loss": 1.465, "step": 5035 }, { "epoch": 0.8086062941554271, "grad_norm": 0.255706787109375, "learning_rate": 0.0001, "loss": 1.433, "step": 5036 }, { "epoch": 0.808766859344894, "grad_norm": 0.27706262469291687, "learning_rate": 0.0001, "loss": 1.5994, "step": 5037 }, { "epoch": 0.808927424534361, "grad_norm": 0.31538891792297363, "learning_rate": 0.0001, "loss": 1.4693, "step": 5038 }, { "epoch": 0.8090879897238279, "grad_norm": 0.27659207582473755, "learning_rate": 0.0001, "loss": 1.4528, "step": 5039 }, { "epoch": 0.8092485549132948, "grad_norm": 0.2625981569290161, "learning_rate": 0.0001, "loss": 1.5124, "step": 5040 }, { "epoch": 0.8094091201027617, "grad_norm": 0.2831464111804962, "learning_rate": 0.0001, "loss": 1.6326, "step": 5041 }, { "epoch": 0.8095696852922286, "grad_norm": 0.29719650745391846, "learning_rate": 0.0001, "loss": 1.4677, "step": 5042 }, { "epoch": 0.8097302504816956, "grad_norm": 0.33954355120658875, "learning_rate": 0.0001, "loss": 1.5095, "step": 5043 }, { "epoch": 0.8098908156711625, "grad_norm": 0.2682896852493286, "learning_rate": 0.0001, "loss": 1.4545, "step": 5044 }, { "epoch": 0.8100513808606294, "grad_norm": 0.2693544030189514, "learning_rate": 0.0001, "loss": 1.4792, "step": 5045 }, { "epoch": 0.8102119460500964, "grad_norm": 0.31356045603752136, "learning_rate": 0.0001, "loss": 1.4621, "step": 5046 }, { "epoch": 0.8103725112395632, "grad_norm": 0.2878251075744629, "learning_rate": 0.0001, "loss": 1.5351, "step": 5047 }, { "epoch": 0.8105330764290302, "grad_norm": 0.27377381920814514, "learning_rate": 0.0001, "loss": 1.4388, "step": 5048 }, { "epoch": 0.8106936416184971, "grad_norm": 0.27631136775016785, "learning_rate": 0.0001, "loss": 1.5438, "step": 5049 }, { "epoch": 0.810854206807964, "grad_norm": 0.2838387191295624, "learning_rate": 0.0001, "loss": 1.4786, "step": 5050 }, { "epoch": 0.811014771997431, "grad_norm": 0.2612224817276001, "learning_rate": 0.0001, "loss": 1.5201, "step": 5051 }, { "epoch": 0.8111753371868978, "grad_norm": 0.26157259941101074, "learning_rate": 0.0001, "loss": 1.5067, "step": 5052 }, { "epoch": 0.8113359023763648, "grad_norm": 0.278354287147522, "learning_rate": 0.0001, "loss": 1.5035, "step": 5053 }, { "epoch": 0.8114964675658317, "grad_norm": 0.2769748270511627, "learning_rate": 0.0001, "loss": 1.4643, "step": 5054 }, { "epoch": 0.8116570327552987, "grad_norm": 0.2752477824687958, "learning_rate": 0.0001, "loss": 1.5277, "step": 5055 }, { "epoch": 0.8118175979447656, "grad_norm": 0.2681583762168884, "learning_rate": 0.0001, "loss": 1.5103, "step": 5056 }, { "epoch": 0.8119781631342325, "grad_norm": 0.26612240076065063, "learning_rate": 0.0001, "loss": 1.5354, "step": 5057 }, { "epoch": 0.8121387283236994, "grad_norm": 0.2747218608856201, "learning_rate": 0.0001, "loss": 1.4617, "step": 5058 }, { "epoch": 0.8122992935131663, "grad_norm": 0.24961914122104645, "learning_rate": 0.0001, "loss": 1.2948, "step": 5059 }, { "epoch": 0.8124598587026333, "grad_norm": 0.2838800847530365, "learning_rate": 0.0001, "loss": 1.5084, "step": 5060 }, { "epoch": 0.8126204238921002, "grad_norm": 0.2648646831512451, "learning_rate": 0.0001, "loss": 1.506, "step": 5061 }, { "epoch": 0.8127809890815672, "grad_norm": 0.2676868438720703, "learning_rate": 0.0001, "loss": 1.4899, "step": 5062 }, { "epoch": 0.8129415542710341, "grad_norm": 0.24974873661994934, "learning_rate": 0.0001, "loss": 1.4207, "step": 5063 }, { "epoch": 0.8131021194605009, "grad_norm": 0.2762000262737274, "learning_rate": 0.0001, "loss": 1.4827, "step": 5064 }, { "epoch": 0.8132626846499679, "grad_norm": 0.2552899420261383, "learning_rate": 0.0001, "loss": 1.5017, "step": 5065 }, { "epoch": 0.8134232498394348, "grad_norm": 0.2649138867855072, "learning_rate": 0.0001, "loss": 1.375, "step": 5066 }, { "epoch": 0.8135838150289018, "grad_norm": 0.26335644721984863, "learning_rate": 0.0001, "loss": 1.4779, "step": 5067 }, { "epoch": 0.8137443802183687, "grad_norm": 0.26281577348709106, "learning_rate": 0.0001, "loss": 1.4712, "step": 5068 }, { "epoch": 0.8139049454078355, "grad_norm": 0.26597362756729126, "learning_rate": 0.0001, "loss": 1.4837, "step": 5069 }, { "epoch": 0.8140655105973025, "grad_norm": 0.27353256940841675, "learning_rate": 0.0001, "loss": 1.5319, "step": 5070 }, { "epoch": 0.8142260757867694, "grad_norm": 0.2876351773738861, "learning_rate": 0.0001, "loss": 1.4816, "step": 5071 }, { "epoch": 0.8143866409762364, "grad_norm": 0.25910821557044983, "learning_rate": 0.0001, "loss": 1.4715, "step": 5072 }, { "epoch": 0.8145472061657033, "grad_norm": 0.2710203528404236, "learning_rate": 0.0001, "loss": 1.4432, "step": 5073 }, { "epoch": 0.8147077713551703, "grad_norm": 0.2655072808265686, "learning_rate": 0.0001, "loss": 1.5376, "step": 5074 }, { "epoch": 0.8148683365446371, "grad_norm": 0.2728174030780792, "learning_rate": 0.0001, "loss": 1.5714, "step": 5075 }, { "epoch": 0.815028901734104, "grad_norm": 0.26975101232528687, "learning_rate": 0.0001, "loss": 1.4301, "step": 5076 }, { "epoch": 0.815189466923571, "grad_norm": 0.25748392939567566, "learning_rate": 0.0001, "loss": 1.5084, "step": 5077 }, { "epoch": 0.8153500321130379, "grad_norm": 0.25959932804107666, "learning_rate": 0.0001, "loss": 1.5004, "step": 5078 }, { "epoch": 0.8155105973025049, "grad_norm": 0.25693467259407043, "learning_rate": 0.0001, "loss": 1.4671, "step": 5079 }, { "epoch": 0.8156711624919717, "grad_norm": 0.26165667176246643, "learning_rate": 0.0001, "loss": 1.3918, "step": 5080 }, { "epoch": 0.8158317276814386, "grad_norm": 0.27073153853416443, "learning_rate": 0.0001, "loss": 1.5717, "step": 5081 }, { "epoch": 0.8159922928709056, "grad_norm": 0.27127423882484436, "learning_rate": 0.0001, "loss": 1.5349, "step": 5082 }, { "epoch": 0.8161528580603725, "grad_norm": 0.26971322298049927, "learning_rate": 0.0001, "loss": 1.454, "step": 5083 }, { "epoch": 0.8163134232498395, "grad_norm": 0.26631149649620056, "learning_rate": 0.0001, "loss": 1.5591, "step": 5084 }, { "epoch": 0.8164739884393064, "grad_norm": 0.25626811385154724, "learning_rate": 0.0001, "loss": 1.4841, "step": 5085 }, { "epoch": 0.8166345536287732, "grad_norm": 0.274842768907547, "learning_rate": 0.0001, "loss": 1.4183, "step": 5086 }, { "epoch": 0.8167951188182402, "grad_norm": 0.2677263021469116, "learning_rate": 0.0001, "loss": 1.4909, "step": 5087 }, { "epoch": 0.8169556840077071, "grad_norm": 0.26319411396980286, "learning_rate": 0.0001, "loss": 1.4075, "step": 5088 }, { "epoch": 0.8171162491971741, "grad_norm": 0.2668224573135376, "learning_rate": 0.0001, "loss": 1.4753, "step": 5089 }, { "epoch": 0.817276814386641, "grad_norm": 0.27852606773376465, "learning_rate": 0.0001, "loss": 1.5219, "step": 5090 }, { "epoch": 0.8174373795761078, "grad_norm": 0.26831531524658203, "learning_rate": 0.0001, "loss": 1.5035, "step": 5091 }, { "epoch": 0.8175979447655748, "grad_norm": 0.29036858677864075, "learning_rate": 0.0001, "loss": 1.4808, "step": 5092 }, { "epoch": 0.8177585099550417, "grad_norm": 0.2693397104740143, "learning_rate": 0.0001, "loss": 1.4563, "step": 5093 }, { "epoch": 0.8179190751445087, "grad_norm": 0.26656654477119446, "learning_rate": 0.0001, "loss": 1.459, "step": 5094 }, { "epoch": 0.8180796403339756, "grad_norm": 0.4419085681438446, "learning_rate": 0.0001, "loss": 1.6211, "step": 5095 }, { "epoch": 0.8182402055234426, "grad_norm": 0.246019184589386, "learning_rate": 0.0001, "loss": 1.3657, "step": 5096 }, { "epoch": 0.8184007707129094, "grad_norm": 0.2562299966812134, "learning_rate": 0.0001, "loss": 1.4701, "step": 5097 }, { "epoch": 0.8185613359023763, "grad_norm": 0.2575107514858246, "learning_rate": 0.0001, "loss": 1.5029, "step": 5098 }, { "epoch": 0.8187219010918433, "grad_norm": 0.2756263017654419, "learning_rate": 0.0001, "loss": 1.5079, "step": 5099 }, { "epoch": 0.8188824662813102, "grad_norm": 0.2649906873703003, "learning_rate": 0.0001, "loss": 1.5112, "step": 5100 }, { "epoch": 0.8190430314707772, "grad_norm": 0.2766910195350647, "learning_rate": 0.0001, "loss": 1.4806, "step": 5101 }, { "epoch": 0.819203596660244, "grad_norm": 0.2611122131347656, "learning_rate": 0.0001, "loss": 1.4494, "step": 5102 }, { "epoch": 0.819364161849711, "grad_norm": 0.2742558717727661, "learning_rate": 0.0001, "loss": 1.5182, "step": 5103 }, { "epoch": 0.8195247270391779, "grad_norm": 0.272408127784729, "learning_rate": 0.0001, "loss": 1.5638, "step": 5104 }, { "epoch": 0.8196852922286448, "grad_norm": 0.25343772768974304, "learning_rate": 0.0001, "loss": 1.4802, "step": 5105 }, { "epoch": 0.8198458574181118, "grad_norm": 0.29670315980911255, "learning_rate": 0.0001, "loss": 1.3951, "step": 5106 }, { "epoch": 0.8200064226075787, "grad_norm": 0.2548818290233612, "learning_rate": 0.0001, "loss": 1.4713, "step": 5107 }, { "epoch": 0.8201669877970456, "grad_norm": 0.26189446449279785, "learning_rate": 0.0001, "loss": 1.4966, "step": 5108 }, { "epoch": 0.8203275529865125, "grad_norm": 0.8882119059562683, "learning_rate": 0.0001, "loss": 1.4813, "step": 5109 }, { "epoch": 0.8204881181759794, "grad_norm": 0.29879793524742126, "learning_rate": 0.0001, "loss": 1.5289, "step": 5110 }, { "epoch": 0.8206486833654464, "grad_norm": 0.27714061737060547, "learning_rate": 0.0001, "loss": 1.5195, "step": 5111 }, { "epoch": 0.8208092485549133, "grad_norm": 0.2755475342273712, "learning_rate": 0.0001, "loss": 1.5512, "step": 5112 }, { "epoch": 0.8209698137443803, "grad_norm": 0.2726532518863678, "learning_rate": 0.0001, "loss": 1.517, "step": 5113 }, { "epoch": 0.8211303789338471, "grad_norm": 0.2651592791080475, "learning_rate": 0.0001, "loss": 1.4032, "step": 5114 }, { "epoch": 0.821290944123314, "grad_norm": 0.2701057195663452, "learning_rate": 0.0001, "loss": 1.4587, "step": 5115 }, { "epoch": 0.821451509312781, "grad_norm": 0.2695329487323761, "learning_rate": 0.0001, "loss": 1.4754, "step": 5116 }, { "epoch": 0.8216120745022479, "grad_norm": 0.2873031795024872, "learning_rate": 0.0001, "loss": 1.4787, "step": 5117 }, { "epoch": 0.8217726396917149, "grad_norm": 0.29290664196014404, "learning_rate": 0.0001, "loss": 1.4805, "step": 5118 }, { "epoch": 0.8219332048811817, "grad_norm": 0.270831823348999, "learning_rate": 0.0001, "loss": 1.3993, "step": 5119 }, { "epoch": 0.8220937700706487, "grad_norm": 0.2933433949947357, "learning_rate": 0.0001, "loss": 1.5612, "step": 5120 }, { "epoch": 0.8222543352601156, "grad_norm": 0.2748069763183594, "learning_rate": 0.0001, "loss": 1.5219, "step": 5121 }, { "epoch": 0.8224149004495825, "grad_norm": 0.2588675022125244, "learning_rate": 0.0001, "loss": 1.4801, "step": 5122 }, { "epoch": 0.8225754656390495, "grad_norm": 0.2799936830997467, "learning_rate": 0.0001, "loss": 1.438, "step": 5123 }, { "epoch": 0.8227360308285164, "grad_norm": 0.27927446365356445, "learning_rate": 0.0001, "loss": 1.4703, "step": 5124 }, { "epoch": 0.8228965960179833, "grad_norm": 0.27305909991264343, "learning_rate": 0.0001, "loss": 1.5344, "step": 5125 }, { "epoch": 0.8230571612074502, "grad_norm": 0.2809090316295624, "learning_rate": 0.0001, "loss": 1.4558, "step": 5126 }, { "epoch": 0.8232177263969171, "grad_norm": 0.29485854506492615, "learning_rate": 0.0001, "loss": 1.5036, "step": 5127 }, { "epoch": 0.8233782915863841, "grad_norm": 0.2724483013153076, "learning_rate": 0.0001, "loss": 1.462, "step": 5128 }, { "epoch": 0.823538856775851, "grad_norm": 0.26400211453437805, "learning_rate": 0.0001, "loss": 1.5174, "step": 5129 }, { "epoch": 0.8236994219653179, "grad_norm": 0.26416003704071045, "learning_rate": 0.0001, "loss": 1.4405, "step": 5130 }, { "epoch": 0.8238599871547848, "grad_norm": 0.2855091094970703, "learning_rate": 0.0001, "loss": 1.4165, "step": 5131 }, { "epoch": 0.8240205523442518, "grad_norm": 0.27835193276405334, "learning_rate": 0.0001, "loss": 1.4652, "step": 5132 }, { "epoch": 0.8241811175337187, "grad_norm": 0.258139044046402, "learning_rate": 0.0001, "loss": 1.4526, "step": 5133 }, { "epoch": 0.8243416827231856, "grad_norm": 0.2563215494155884, "learning_rate": 0.0001, "loss": 1.4405, "step": 5134 }, { "epoch": 0.8245022479126526, "grad_norm": 0.2658300995826721, "learning_rate": 0.0001, "loss": 1.4993, "step": 5135 }, { "epoch": 0.8246628131021194, "grad_norm": 0.2655284106731415, "learning_rate": 0.0001, "loss": 1.4788, "step": 5136 }, { "epoch": 0.8248233782915864, "grad_norm": 0.27811136841773987, "learning_rate": 0.0001, "loss": 1.5086, "step": 5137 }, { "epoch": 0.8249839434810533, "grad_norm": 0.28676706552505493, "learning_rate": 0.0001, "loss": 1.4531, "step": 5138 }, { "epoch": 0.8251445086705202, "grad_norm": 0.2639085650444031, "learning_rate": 0.0001, "loss": 1.4882, "step": 5139 }, { "epoch": 0.8253050738599872, "grad_norm": 0.28511327505111694, "learning_rate": 0.0001, "loss": 1.4999, "step": 5140 }, { "epoch": 0.825465639049454, "grad_norm": 0.26077866554260254, "learning_rate": 0.0001, "loss": 1.4877, "step": 5141 }, { "epoch": 0.825626204238921, "grad_norm": 0.28479114174842834, "learning_rate": 0.0001, "loss": 1.596, "step": 5142 }, { "epoch": 0.8257867694283879, "grad_norm": 0.2742884159088135, "learning_rate": 0.0001, "loss": 1.4773, "step": 5143 }, { "epoch": 0.8259473346178549, "grad_norm": 0.26783105731010437, "learning_rate": 0.0001, "loss": 1.4657, "step": 5144 }, { "epoch": 0.8261078998073218, "grad_norm": 0.2745811939239502, "learning_rate": 0.0001, "loss": 1.5226, "step": 5145 }, { "epoch": 0.8262684649967887, "grad_norm": 0.2664661705493927, "learning_rate": 0.0001, "loss": 1.4311, "step": 5146 }, { "epoch": 0.8264290301862556, "grad_norm": 0.2732374370098114, "learning_rate": 0.0001, "loss": 1.3745, "step": 5147 }, { "epoch": 0.8265895953757225, "grad_norm": 0.2661466896533966, "learning_rate": 0.0001, "loss": 1.3327, "step": 5148 }, { "epoch": 0.8267501605651895, "grad_norm": 0.26395609974861145, "learning_rate": 0.0001, "loss": 1.474, "step": 5149 }, { "epoch": 0.8269107257546564, "grad_norm": 0.2627910375595093, "learning_rate": 0.0001, "loss": 1.5181, "step": 5150 }, { "epoch": 0.8270712909441233, "grad_norm": 0.2805642783641815, "learning_rate": 0.0001, "loss": 1.5115, "step": 5151 }, { "epoch": 0.8272318561335903, "grad_norm": 0.26596057415008545, "learning_rate": 0.0001, "loss": 1.4544, "step": 5152 }, { "epoch": 0.8273924213230571, "grad_norm": 0.2646350860595703, "learning_rate": 0.0001, "loss": 1.519, "step": 5153 }, { "epoch": 0.8275529865125241, "grad_norm": 0.2681911289691925, "learning_rate": 0.0001, "loss": 1.5043, "step": 5154 }, { "epoch": 0.827713551701991, "grad_norm": 0.2620398998260498, "learning_rate": 0.0001, "loss": 1.5099, "step": 5155 }, { "epoch": 0.827874116891458, "grad_norm": 0.26036378741264343, "learning_rate": 0.0001, "loss": 1.4766, "step": 5156 }, { "epoch": 0.8280346820809249, "grad_norm": 0.2933289408683777, "learning_rate": 0.0001, "loss": 1.5055, "step": 5157 }, { "epoch": 0.8281952472703917, "grad_norm": 0.26431766152381897, "learning_rate": 0.0001, "loss": 1.4785, "step": 5158 }, { "epoch": 0.8283558124598587, "grad_norm": 0.27542543411254883, "learning_rate": 0.0001, "loss": 1.4591, "step": 5159 }, { "epoch": 0.8285163776493256, "grad_norm": 0.26881033182144165, "learning_rate": 0.0001, "loss": 1.4321, "step": 5160 }, { "epoch": 0.8286769428387926, "grad_norm": 0.27814653515815735, "learning_rate": 0.0001, "loss": 1.4129, "step": 5161 }, { "epoch": 0.8288375080282595, "grad_norm": 0.259012371301651, "learning_rate": 0.0001, "loss": 1.4539, "step": 5162 }, { "epoch": 0.8289980732177264, "grad_norm": 0.2711510956287384, "learning_rate": 0.0001, "loss": 1.4343, "step": 5163 }, { "epoch": 0.8291586384071933, "grad_norm": 0.27185365557670593, "learning_rate": 0.0001, "loss": 1.4643, "step": 5164 }, { "epoch": 0.8293192035966602, "grad_norm": 0.25620895624160767, "learning_rate": 0.0001, "loss": 1.4095, "step": 5165 }, { "epoch": 0.8294797687861272, "grad_norm": 0.2644282281398773, "learning_rate": 0.0001, "loss": 1.5839, "step": 5166 }, { "epoch": 0.8296403339755941, "grad_norm": 0.2591506838798523, "learning_rate": 0.0001, "loss": 1.4084, "step": 5167 }, { "epoch": 0.8298008991650611, "grad_norm": 0.27087241411209106, "learning_rate": 0.0001, "loss": 1.5111, "step": 5168 }, { "epoch": 0.8299614643545279, "grad_norm": 0.2518240213394165, "learning_rate": 0.0001, "loss": 1.4937, "step": 5169 }, { "epoch": 0.8301220295439948, "grad_norm": 0.260097473859787, "learning_rate": 0.0001, "loss": 1.452, "step": 5170 }, { "epoch": 0.8302825947334618, "grad_norm": 0.292864590883255, "learning_rate": 0.0001, "loss": 1.4754, "step": 5171 }, { "epoch": 0.8304431599229287, "grad_norm": 0.2785932123661041, "learning_rate": 0.0001, "loss": 1.4306, "step": 5172 }, { "epoch": 0.8306037251123957, "grad_norm": 0.2712307572364807, "learning_rate": 0.0001, "loss": 1.4387, "step": 5173 }, { "epoch": 0.8307642903018626, "grad_norm": 0.26541319489479065, "learning_rate": 0.0001, "loss": 1.47, "step": 5174 }, { "epoch": 0.8309248554913294, "grad_norm": 0.2870188355445862, "learning_rate": 0.0001, "loss": 1.4839, "step": 5175 }, { "epoch": 0.8310854206807964, "grad_norm": 0.2913323938846588, "learning_rate": 0.0001, "loss": 1.4898, "step": 5176 }, { "epoch": 0.8312459858702633, "grad_norm": 0.2722484767436981, "learning_rate": 0.0001, "loss": 1.445, "step": 5177 }, { "epoch": 0.8314065510597303, "grad_norm": 0.2657018005847931, "learning_rate": 0.0001, "loss": 1.4855, "step": 5178 }, { "epoch": 0.8315671162491972, "grad_norm": 0.2667604684829712, "learning_rate": 0.0001, "loss": 1.5824, "step": 5179 }, { "epoch": 0.831727681438664, "grad_norm": 0.2654268741607666, "learning_rate": 0.0001, "loss": 1.5505, "step": 5180 }, { "epoch": 0.831888246628131, "grad_norm": 0.2547430992126465, "learning_rate": 0.0001, "loss": 1.4161, "step": 5181 }, { "epoch": 0.8320488118175979, "grad_norm": 0.2565046548843384, "learning_rate": 0.0001, "loss": 1.4418, "step": 5182 }, { "epoch": 0.8322093770070649, "grad_norm": 0.26058104634284973, "learning_rate": 0.0001, "loss": 1.545, "step": 5183 }, { "epoch": 0.8323699421965318, "grad_norm": 0.2802268862724304, "learning_rate": 0.0001, "loss": 1.399, "step": 5184 }, { "epoch": 0.8325305073859988, "grad_norm": 0.25977227091789246, "learning_rate": 0.0001, "loss": 1.5027, "step": 5185 }, { "epoch": 0.8326910725754656, "grad_norm": 0.2635708451271057, "learning_rate": 0.0001, "loss": 1.4115, "step": 5186 }, { "epoch": 0.8328516377649325, "grad_norm": 0.2661842405796051, "learning_rate": 0.0001, "loss": 1.4521, "step": 5187 }, { "epoch": 0.8330122029543995, "grad_norm": 0.266492635011673, "learning_rate": 0.0001, "loss": 1.4381, "step": 5188 }, { "epoch": 0.8331727681438664, "grad_norm": 0.2667500376701355, "learning_rate": 0.0001, "loss": 1.4577, "step": 5189 }, { "epoch": 0.8333333333333334, "grad_norm": 0.27786850929260254, "learning_rate": 0.0001, "loss": 1.5013, "step": 5190 }, { "epoch": 0.8334938985228002, "grad_norm": 0.28187161684036255, "learning_rate": 0.0001, "loss": 1.4082, "step": 5191 }, { "epoch": 0.8336544637122671, "grad_norm": 0.2588532865047455, "learning_rate": 0.0001, "loss": 1.4118, "step": 5192 }, { "epoch": 0.8338150289017341, "grad_norm": 0.2951449751853943, "learning_rate": 0.0001, "loss": 1.5414, "step": 5193 }, { "epoch": 0.833975594091201, "grad_norm": 0.2797166705131531, "learning_rate": 0.0001, "loss": 1.4807, "step": 5194 }, { "epoch": 0.834136159280668, "grad_norm": 0.2833901643753052, "learning_rate": 0.0001, "loss": 1.4815, "step": 5195 }, { "epoch": 0.8342967244701349, "grad_norm": 0.28295475244522095, "learning_rate": 0.0001, "loss": 1.4543, "step": 5196 }, { "epoch": 0.8344572896596018, "grad_norm": 0.268713116645813, "learning_rate": 0.0001, "loss": 1.4507, "step": 5197 }, { "epoch": 0.8346178548490687, "grad_norm": 0.2791979908943176, "learning_rate": 0.0001, "loss": 1.4525, "step": 5198 }, { "epoch": 0.8347784200385356, "grad_norm": 0.2836192846298218, "learning_rate": 0.0001, "loss": 1.5645, "step": 5199 }, { "epoch": 0.8349389852280026, "grad_norm": 0.2725848853588104, "learning_rate": 0.0001, "loss": 1.4823, "step": 5200 }, { "epoch": 0.8350995504174695, "grad_norm": 0.2682066559791565, "learning_rate": 0.0001, "loss": 1.5213, "step": 5201 }, { "epoch": 0.8352601156069365, "grad_norm": 0.2783905863761902, "learning_rate": 0.0001, "loss": 1.555, "step": 5202 }, { "epoch": 0.8354206807964033, "grad_norm": 0.26786595582962036, "learning_rate": 0.0001, "loss": 1.4646, "step": 5203 }, { "epoch": 0.8355812459858702, "grad_norm": 0.2640950083732605, "learning_rate": 0.0001, "loss": 1.4267, "step": 5204 }, { "epoch": 0.8357418111753372, "grad_norm": 0.27672556042671204, "learning_rate": 0.0001, "loss": 1.4804, "step": 5205 }, { "epoch": 0.8359023763648041, "grad_norm": 0.27699586749076843, "learning_rate": 0.0001, "loss": 1.4971, "step": 5206 }, { "epoch": 0.8360629415542711, "grad_norm": 0.26214927434921265, "learning_rate": 0.0001, "loss": 1.4213, "step": 5207 }, { "epoch": 0.8362235067437379, "grad_norm": 0.28681671619415283, "learning_rate": 0.0001, "loss": 1.4586, "step": 5208 }, { "epoch": 0.8363840719332049, "grad_norm": 0.2791830599308014, "learning_rate": 0.0001, "loss": 1.477, "step": 5209 }, { "epoch": 0.8365446371226718, "grad_norm": 0.2742231786251068, "learning_rate": 0.0001, "loss": 1.5096, "step": 5210 }, { "epoch": 0.8367052023121387, "grad_norm": 0.2677064836025238, "learning_rate": 0.0001, "loss": 1.4526, "step": 5211 }, { "epoch": 0.8368657675016057, "grad_norm": 0.28335240483283997, "learning_rate": 0.0001, "loss": 1.5283, "step": 5212 }, { "epoch": 0.8370263326910726, "grad_norm": 0.2656855583190918, "learning_rate": 0.0001, "loss": 1.4812, "step": 5213 }, { "epoch": 0.8371868978805395, "grad_norm": 0.281790167093277, "learning_rate": 0.0001, "loss": 1.5169, "step": 5214 }, { "epoch": 0.8373474630700064, "grad_norm": 0.277771919965744, "learning_rate": 0.0001, "loss": 1.471, "step": 5215 }, { "epoch": 0.8375080282594733, "grad_norm": 0.29074445366859436, "learning_rate": 0.0001, "loss": 1.5112, "step": 5216 }, { "epoch": 0.8376685934489403, "grad_norm": 0.2728709876537323, "learning_rate": 0.0001, "loss": 1.4522, "step": 5217 }, { "epoch": 0.8378291586384072, "grad_norm": 0.2697131037712097, "learning_rate": 0.0001, "loss": 1.5388, "step": 5218 }, { "epoch": 0.8379897238278741, "grad_norm": 0.26410043239593506, "learning_rate": 0.0001, "loss": 1.4333, "step": 5219 }, { "epoch": 0.838150289017341, "grad_norm": 0.27733278274536133, "learning_rate": 0.0001, "loss": 1.5169, "step": 5220 }, { "epoch": 0.838310854206808, "grad_norm": 0.26385006308555603, "learning_rate": 0.0001, "loss": 1.4538, "step": 5221 }, { "epoch": 0.8384714193962749, "grad_norm": 0.2629423439502716, "learning_rate": 0.0001, "loss": 1.5075, "step": 5222 }, { "epoch": 0.8386319845857418, "grad_norm": 0.27457037568092346, "learning_rate": 0.0001, "loss": 1.4637, "step": 5223 }, { "epoch": 0.8387925497752088, "grad_norm": 0.2441031038761139, "learning_rate": 0.0001, "loss": 1.4459, "step": 5224 }, { "epoch": 0.8389531149646756, "grad_norm": 0.2537875473499298, "learning_rate": 0.0001, "loss": 1.4015, "step": 5225 }, { "epoch": 0.8391136801541426, "grad_norm": 0.26629117131233215, "learning_rate": 0.0001, "loss": 1.4603, "step": 5226 }, { "epoch": 0.8392742453436095, "grad_norm": 0.26998355984687805, "learning_rate": 0.0001, "loss": 1.4563, "step": 5227 }, { "epoch": 0.8394348105330764, "grad_norm": 0.2816150188446045, "learning_rate": 0.0001, "loss": 1.6008, "step": 5228 }, { "epoch": 0.8395953757225434, "grad_norm": 0.2685852348804474, "learning_rate": 0.0001, "loss": 1.5131, "step": 5229 }, { "epoch": 0.8397559409120102, "grad_norm": 0.27722010016441345, "learning_rate": 0.0001, "loss": 1.5362, "step": 5230 }, { "epoch": 0.8399165061014772, "grad_norm": 0.2766026258468628, "learning_rate": 0.0001, "loss": 1.5552, "step": 5231 }, { "epoch": 0.8400770712909441, "grad_norm": 0.2711658477783203, "learning_rate": 0.0001, "loss": 1.5026, "step": 5232 }, { "epoch": 0.840237636480411, "grad_norm": 0.27401483058929443, "learning_rate": 0.0001, "loss": 1.4661, "step": 5233 }, { "epoch": 0.840398201669878, "grad_norm": 0.2823639512062073, "learning_rate": 0.0001, "loss": 1.475, "step": 5234 }, { "epoch": 0.8405587668593449, "grad_norm": 0.256363183259964, "learning_rate": 0.0001, "loss": 1.485, "step": 5235 }, { "epoch": 0.8407193320488118, "grad_norm": 0.2676762640476227, "learning_rate": 0.0001, "loss": 1.4965, "step": 5236 }, { "epoch": 0.8408798972382787, "grad_norm": 0.25752776861190796, "learning_rate": 0.0001, "loss": 1.4888, "step": 5237 }, { "epoch": 0.8410404624277457, "grad_norm": 0.2785666882991791, "learning_rate": 0.0001, "loss": 1.4931, "step": 5238 }, { "epoch": 0.8412010276172126, "grad_norm": 0.303597092628479, "learning_rate": 0.0001, "loss": 1.5779, "step": 5239 }, { "epoch": 0.8413615928066795, "grad_norm": 0.2613723874092102, "learning_rate": 0.0001, "loss": 1.4444, "step": 5240 }, { "epoch": 0.8415221579961464, "grad_norm": 0.25317010283470154, "learning_rate": 0.0001, "loss": 1.4211, "step": 5241 }, { "epoch": 0.8416827231856133, "grad_norm": 0.2762047052383423, "learning_rate": 0.0001, "loss": 1.5074, "step": 5242 }, { "epoch": 0.8418432883750803, "grad_norm": 0.38027575612068176, "learning_rate": 0.0001, "loss": 1.4986, "step": 5243 }, { "epoch": 0.8420038535645472, "grad_norm": 0.28426018357276917, "learning_rate": 0.0001, "loss": 1.5045, "step": 5244 }, { "epoch": 0.8421644187540142, "grad_norm": 0.26807907223701477, "learning_rate": 0.0001, "loss": 1.508, "step": 5245 }, { "epoch": 0.8423249839434811, "grad_norm": 0.26778993010520935, "learning_rate": 0.0001, "loss": 1.5063, "step": 5246 }, { "epoch": 0.8424855491329479, "grad_norm": 0.26153409481048584, "learning_rate": 0.0001, "loss": 1.51, "step": 5247 }, { "epoch": 0.8426461143224149, "grad_norm": 0.26222196221351624, "learning_rate": 0.0001, "loss": 1.5181, "step": 5248 }, { "epoch": 0.8428066795118818, "grad_norm": 0.26973864436149597, "learning_rate": 0.0001, "loss": 1.4557, "step": 5249 }, { "epoch": 0.8429672447013488, "grad_norm": 0.27253398299217224, "learning_rate": 0.0001, "loss": 1.4572, "step": 5250 }, { "epoch": 0.8431278098908157, "grad_norm": 0.26086390018463135, "learning_rate": 0.0001, "loss": 1.4947, "step": 5251 }, { "epoch": 0.8432883750802826, "grad_norm": 0.25629332661628723, "learning_rate": 0.0001, "loss": 1.4801, "step": 5252 }, { "epoch": 0.8434489402697495, "grad_norm": 0.27789345383644104, "learning_rate": 0.0001, "loss": 1.5344, "step": 5253 }, { "epoch": 0.8436095054592164, "grad_norm": 0.25398340821266174, "learning_rate": 0.0001, "loss": 1.5206, "step": 5254 }, { "epoch": 0.8437700706486834, "grad_norm": 0.2797142565250397, "learning_rate": 0.0001, "loss": 1.4989, "step": 5255 }, { "epoch": 0.8439306358381503, "grad_norm": 0.2748796343803406, "learning_rate": 0.0001, "loss": 1.5764, "step": 5256 }, { "epoch": 0.8440912010276173, "grad_norm": 0.255555123090744, "learning_rate": 0.0001, "loss": 1.4444, "step": 5257 }, { "epoch": 0.8442517662170841, "grad_norm": 0.27899670600891113, "learning_rate": 0.0001, "loss": 1.4988, "step": 5258 }, { "epoch": 0.844412331406551, "grad_norm": 0.2732127904891968, "learning_rate": 0.0001, "loss": 1.4811, "step": 5259 }, { "epoch": 0.844572896596018, "grad_norm": 0.2523353099822998, "learning_rate": 0.0001, "loss": 1.4327, "step": 5260 }, { "epoch": 0.8447334617854849, "grad_norm": 0.26799678802490234, "learning_rate": 0.0001, "loss": 1.419, "step": 5261 }, { "epoch": 0.8448940269749519, "grad_norm": 0.26532357931137085, "learning_rate": 0.0001, "loss": 1.4337, "step": 5262 }, { "epoch": 0.8450545921644188, "grad_norm": 0.2633708119392395, "learning_rate": 0.0001, "loss": 1.559, "step": 5263 }, { "epoch": 0.8452151573538856, "grad_norm": 0.27879685163497925, "learning_rate": 0.0001, "loss": 1.4835, "step": 5264 }, { "epoch": 0.8453757225433526, "grad_norm": 0.27665314078330994, "learning_rate": 0.0001, "loss": 1.4895, "step": 5265 }, { "epoch": 0.8455362877328195, "grad_norm": 0.2681369483470917, "learning_rate": 0.0001, "loss": 1.5189, "step": 5266 }, { "epoch": 0.8456968529222865, "grad_norm": 0.27890172600746155, "learning_rate": 0.0001, "loss": 1.4846, "step": 5267 }, { "epoch": 0.8458574181117534, "grad_norm": 0.2770402729511261, "learning_rate": 0.0001, "loss": 1.4708, "step": 5268 }, { "epoch": 0.8460179833012202, "grad_norm": 0.28067901730537415, "learning_rate": 0.0001, "loss": 1.493, "step": 5269 }, { "epoch": 0.8461785484906872, "grad_norm": 0.2720443308353424, "learning_rate": 0.0001, "loss": 1.465, "step": 5270 }, { "epoch": 0.8463391136801541, "grad_norm": 0.2602487802505493, "learning_rate": 0.0001, "loss": 1.4784, "step": 5271 }, { "epoch": 0.8464996788696211, "grad_norm": 0.2525050938129425, "learning_rate": 0.0001, "loss": 1.383, "step": 5272 }, { "epoch": 0.846660244059088, "grad_norm": 0.28399643301963806, "learning_rate": 0.0001, "loss": 1.5532, "step": 5273 }, { "epoch": 0.846820809248555, "grad_norm": 0.27307626605033875, "learning_rate": 0.0001, "loss": 1.5325, "step": 5274 }, { "epoch": 0.8469813744380218, "grad_norm": 0.26894059777259827, "learning_rate": 0.0001, "loss": 1.4124, "step": 5275 }, { "epoch": 0.8471419396274887, "grad_norm": 0.2619190514087677, "learning_rate": 0.0001, "loss": 1.5041, "step": 5276 }, { "epoch": 0.8473025048169557, "grad_norm": 0.25521716475486755, "learning_rate": 0.0001, "loss": 1.492, "step": 5277 }, { "epoch": 0.8474630700064226, "grad_norm": 0.25996091961860657, "learning_rate": 0.0001, "loss": 1.4228, "step": 5278 }, { "epoch": 0.8476236351958896, "grad_norm": 0.2658037841320038, "learning_rate": 0.0001, "loss": 1.5182, "step": 5279 }, { "epoch": 0.8477842003853564, "grad_norm": 0.2619093060493469, "learning_rate": 0.0001, "loss": 1.4209, "step": 5280 }, { "epoch": 0.8479447655748233, "grad_norm": 0.2617737054824829, "learning_rate": 0.0001, "loss": 1.4443, "step": 5281 }, { "epoch": 0.8481053307642903, "grad_norm": 0.2692483365535736, "learning_rate": 0.0001, "loss": 1.5543, "step": 5282 }, { "epoch": 0.8482658959537572, "grad_norm": 0.2643255293369293, "learning_rate": 0.0001, "loss": 1.4965, "step": 5283 }, { "epoch": 0.8484264611432242, "grad_norm": 0.2760925889015198, "learning_rate": 0.0001, "loss": 1.4917, "step": 5284 }, { "epoch": 0.8485870263326911, "grad_norm": 0.26432475447654724, "learning_rate": 0.0001, "loss": 1.5258, "step": 5285 }, { "epoch": 0.848747591522158, "grad_norm": 0.2962948679924011, "learning_rate": 0.0001, "loss": 1.5083, "step": 5286 }, { "epoch": 0.8489081567116249, "grad_norm": 0.2693207859992981, "learning_rate": 0.0001, "loss": 1.4983, "step": 5287 }, { "epoch": 0.8490687219010918, "grad_norm": 0.27385634183883667, "learning_rate": 0.0001, "loss": 1.4977, "step": 5288 }, { "epoch": 0.8492292870905588, "grad_norm": 0.2681661546230316, "learning_rate": 0.0001, "loss": 1.4905, "step": 5289 }, { "epoch": 0.8493898522800257, "grad_norm": 0.26129087805747986, "learning_rate": 0.0001, "loss": 1.4372, "step": 5290 }, { "epoch": 0.8495504174694927, "grad_norm": 0.2743181586265564, "learning_rate": 0.0001, "loss": 1.4226, "step": 5291 }, { "epoch": 0.8497109826589595, "grad_norm": 0.2775701880455017, "learning_rate": 0.0001, "loss": 1.5435, "step": 5292 }, { "epoch": 0.8498715478484264, "grad_norm": 0.2661895751953125, "learning_rate": 0.0001, "loss": 1.522, "step": 5293 }, { "epoch": 0.8500321130378934, "grad_norm": 0.27059099078178406, "learning_rate": 0.0001, "loss": 1.5029, "step": 5294 }, { "epoch": 0.8501926782273603, "grad_norm": 0.2673824727535248, "learning_rate": 0.0001, "loss": 1.4741, "step": 5295 }, { "epoch": 0.8503532434168273, "grad_norm": 0.25712454319000244, "learning_rate": 0.0001, "loss": 1.4721, "step": 5296 }, { "epoch": 0.8505138086062941, "grad_norm": 0.28328362107276917, "learning_rate": 0.0001, "loss": 1.4103, "step": 5297 }, { "epoch": 0.850674373795761, "grad_norm": 0.26156899333000183, "learning_rate": 0.0001, "loss": 1.5011, "step": 5298 }, { "epoch": 0.850834938985228, "grad_norm": 0.27011993527412415, "learning_rate": 0.0001, "loss": 1.4274, "step": 5299 }, { "epoch": 0.8509955041746949, "grad_norm": 0.26966166496276855, "learning_rate": 0.0001, "loss": 1.5654, "step": 5300 }, { "epoch": 0.8511560693641619, "grad_norm": 0.25330469012260437, "learning_rate": 0.0001, "loss": 1.4416, "step": 5301 }, { "epoch": 0.8513166345536288, "grad_norm": 0.2578624188899994, "learning_rate": 0.0001, "loss": 1.4339, "step": 5302 }, { "epoch": 0.8514771997430957, "grad_norm": 0.268004834651947, "learning_rate": 0.0001, "loss": 1.5447, "step": 5303 }, { "epoch": 0.8516377649325626, "grad_norm": 0.2628016173839569, "learning_rate": 0.0001, "loss": 1.4418, "step": 5304 }, { "epoch": 0.8517983301220295, "grad_norm": 0.26189881563186646, "learning_rate": 0.0001, "loss": 1.5414, "step": 5305 }, { "epoch": 0.8519588953114965, "grad_norm": 0.26972198486328125, "learning_rate": 0.0001, "loss": 1.5235, "step": 5306 }, { "epoch": 0.8521194605009634, "grad_norm": 0.27024611830711365, "learning_rate": 0.0001, "loss": 1.4779, "step": 5307 }, { "epoch": 0.8522800256904303, "grad_norm": 0.2719064950942993, "learning_rate": 0.0001, "loss": 1.5013, "step": 5308 }, { "epoch": 0.8524405908798972, "grad_norm": 0.27928489446640015, "learning_rate": 0.0001, "loss": 1.4162, "step": 5309 }, { "epoch": 0.8526011560693642, "grad_norm": 0.2659405767917633, "learning_rate": 0.0001, "loss": 1.4961, "step": 5310 }, { "epoch": 0.8527617212588311, "grad_norm": 0.28146472573280334, "learning_rate": 0.0001, "loss": 1.4631, "step": 5311 }, { "epoch": 0.852922286448298, "grad_norm": 0.2521761655807495, "learning_rate": 0.0001, "loss": 1.4895, "step": 5312 }, { "epoch": 0.853082851637765, "grad_norm": 0.26773086190223694, "learning_rate": 0.0001, "loss": 1.4861, "step": 5313 }, { "epoch": 0.8532434168272318, "grad_norm": 0.28108856081962585, "learning_rate": 0.0001, "loss": 1.4696, "step": 5314 }, { "epoch": 0.8534039820166988, "grad_norm": 0.277148574590683, "learning_rate": 0.0001, "loss": 1.4788, "step": 5315 }, { "epoch": 0.8535645472061657, "grad_norm": 0.2859897017478943, "learning_rate": 0.0001, "loss": 1.5683, "step": 5316 }, { "epoch": 0.8537251123956326, "grad_norm": 0.27081987261772156, "learning_rate": 0.0001, "loss": 1.4866, "step": 5317 }, { "epoch": 0.8538856775850996, "grad_norm": 0.2703607678413391, "learning_rate": 0.0001, "loss": 1.5545, "step": 5318 }, { "epoch": 0.8540462427745664, "grad_norm": 0.29761916399002075, "learning_rate": 0.0001, "loss": 1.4591, "step": 5319 }, { "epoch": 0.8542068079640334, "grad_norm": 0.27993783354759216, "learning_rate": 0.0001, "loss": 1.5114, "step": 5320 }, { "epoch": 0.8543673731535003, "grad_norm": 0.29130029678344727, "learning_rate": 0.0001, "loss": 1.4765, "step": 5321 }, { "epoch": 0.8545279383429673, "grad_norm": 0.2789912223815918, "learning_rate": 0.0001, "loss": 1.4779, "step": 5322 }, { "epoch": 0.8546885035324342, "grad_norm": 0.26253095269203186, "learning_rate": 0.0001, "loss": 1.4685, "step": 5323 }, { "epoch": 0.8548490687219011, "grad_norm": 0.26710742712020874, "learning_rate": 0.0001, "loss": 1.4584, "step": 5324 }, { "epoch": 0.855009633911368, "grad_norm": 0.25740182399749756, "learning_rate": 0.0001, "loss": 1.4335, "step": 5325 }, { "epoch": 0.8551701991008349, "grad_norm": 0.2696593403816223, "learning_rate": 0.0001, "loss": 1.4675, "step": 5326 }, { "epoch": 0.8553307642903019, "grad_norm": 0.2718164920806885, "learning_rate": 0.0001, "loss": 1.5228, "step": 5327 }, { "epoch": 0.8554913294797688, "grad_norm": 0.2829359173774719, "learning_rate": 0.0001, "loss": 1.5784, "step": 5328 }, { "epoch": 0.8556518946692357, "grad_norm": 0.2718700170516968, "learning_rate": 0.0001, "loss": 1.5205, "step": 5329 }, { "epoch": 0.8558124598587026, "grad_norm": 0.2533983886241913, "learning_rate": 0.0001, "loss": 1.468, "step": 5330 }, { "epoch": 0.8559730250481695, "grad_norm": 0.25820839405059814, "learning_rate": 0.0001, "loss": 1.4253, "step": 5331 }, { "epoch": 0.8561335902376365, "grad_norm": 0.2800899147987366, "learning_rate": 0.0001, "loss": 1.5077, "step": 5332 }, { "epoch": 0.8562941554271034, "grad_norm": 0.26723790168762207, "learning_rate": 0.0001, "loss": 1.4986, "step": 5333 }, { "epoch": 0.8564547206165704, "grad_norm": 0.34934258460998535, "learning_rate": 0.0001, "loss": 1.4808, "step": 5334 }, { "epoch": 0.8566152858060373, "grad_norm": 0.26037484407424927, "learning_rate": 0.0001, "loss": 1.4963, "step": 5335 }, { "epoch": 0.8567758509955041, "grad_norm": 0.26629379391670227, "learning_rate": 0.0001, "loss": 1.4993, "step": 5336 }, { "epoch": 0.8569364161849711, "grad_norm": 0.28581124544143677, "learning_rate": 0.0001, "loss": 1.4839, "step": 5337 }, { "epoch": 0.857096981374438, "grad_norm": 0.26524630188941956, "learning_rate": 0.0001, "loss": 1.4362, "step": 5338 }, { "epoch": 0.857257546563905, "grad_norm": 0.2664186954498291, "learning_rate": 0.0001, "loss": 1.5323, "step": 5339 }, { "epoch": 0.8574181117533719, "grad_norm": 0.26289859414100647, "learning_rate": 0.0001, "loss": 1.529, "step": 5340 }, { "epoch": 0.8575786769428388, "grad_norm": 0.2632129490375519, "learning_rate": 0.0001, "loss": 1.5224, "step": 5341 }, { "epoch": 0.8577392421323057, "grad_norm": 0.26697301864624023, "learning_rate": 0.0001, "loss": 1.4813, "step": 5342 }, { "epoch": 0.8578998073217726, "grad_norm": 0.2633684575557709, "learning_rate": 0.0001, "loss": 1.5293, "step": 5343 }, { "epoch": 0.8580603725112396, "grad_norm": 0.26969778537750244, "learning_rate": 0.0001, "loss": 1.5256, "step": 5344 }, { "epoch": 0.8582209377007065, "grad_norm": 0.28679952025413513, "learning_rate": 0.0001, "loss": 1.4575, "step": 5345 }, { "epoch": 0.8583815028901735, "grad_norm": 0.2709237039089203, "learning_rate": 0.0001, "loss": 1.4327, "step": 5346 }, { "epoch": 0.8585420680796403, "grad_norm": 0.2711273431777954, "learning_rate": 0.0001, "loss": 1.4518, "step": 5347 }, { "epoch": 0.8587026332691072, "grad_norm": 0.2717254161834717, "learning_rate": 0.0001, "loss": 1.5657, "step": 5348 }, { "epoch": 0.8588631984585742, "grad_norm": 0.2722921073436737, "learning_rate": 0.0001, "loss": 1.4986, "step": 5349 }, { "epoch": 0.8590237636480411, "grad_norm": 0.27039068937301636, "learning_rate": 0.0001, "loss": 1.5133, "step": 5350 }, { "epoch": 0.8591843288375081, "grad_norm": 0.2701614201068878, "learning_rate": 0.0001, "loss": 1.5577, "step": 5351 }, { "epoch": 0.859344894026975, "grad_norm": 0.3063983619213104, "learning_rate": 0.0001, "loss": 1.4986, "step": 5352 }, { "epoch": 0.8595054592164418, "grad_norm": 0.2711946666240692, "learning_rate": 0.0001, "loss": 1.4582, "step": 5353 }, { "epoch": 0.8596660244059088, "grad_norm": 0.2693926990032196, "learning_rate": 0.0001, "loss": 1.477, "step": 5354 }, { "epoch": 0.8598265895953757, "grad_norm": 0.2748611867427826, "learning_rate": 0.0001, "loss": 1.5143, "step": 5355 }, { "epoch": 0.8599871547848427, "grad_norm": 0.2937244474887848, "learning_rate": 0.0001, "loss": 1.4983, "step": 5356 }, { "epoch": 0.8601477199743096, "grad_norm": 0.26753324270248413, "learning_rate": 0.0001, "loss": 1.5532, "step": 5357 }, { "epoch": 0.8603082851637764, "grad_norm": 0.2588178813457489, "learning_rate": 0.0001, "loss": 1.4297, "step": 5358 }, { "epoch": 0.8604688503532434, "grad_norm": 0.27366507053375244, "learning_rate": 0.0001, "loss": 1.438, "step": 5359 }, { "epoch": 0.8606294155427103, "grad_norm": 0.26366257667541504, "learning_rate": 0.0001, "loss": 1.437, "step": 5360 }, { "epoch": 0.8607899807321773, "grad_norm": 0.2775055468082428, "learning_rate": 0.0001, "loss": 1.4391, "step": 5361 }, { "epoch": 0.8609505459216442, "grad_norm": 0.27710622549057007, "learning_rate": 0.0001, "loss": 1.5381, "step": 5362 }, { "epoch": 0.8611111111111112, "grad_norm": 0.258648544549942, "learning_rate": 0.0001, "loss": 1.4236, "step": 5363 }, { "epoch": 0.861271676300578, "grad_norm": 0.2793954312801361, "learning_rate": 0.0001, "loss": 1.5147, "step": 5364 }, { "epoch": 0.8614322414900449, "grad_norm": 0.2777060568332672, "learning_rate": 0.0001, "loss": 1.4437, "step": 5365 }, { "epoch": 0.8615928066795119, "grad_norm": 0.28187084197998047, "learning_rate": 0.0001, "loss": 1.5089, "step": 5366 }, { "epoch": 0.8617533718689788, "grad_norm": 0.26416015625, "learning_rate": 0.0001, "loss": 1.4458, "step": 5367 }, { "epoch": 0.8619139370584458, "grad_norm": 0.2784457802772522, "learning_rate": 0.0001, "loss": 1.5126, "step": 5368 }, { "epoch": 0.8620745022479126, "grad_norm": 0.2705570161342621, "learning_rate": 0.0001, "loss": 1.4383, "step": 5369 }, { "epoch": 0.8622350674373795, "grad_norm": 0.2800073027610779, "learning_rate": 0.0001, "loss": 1.5053, "step": 5370 }, { "epoch": 0.8623956326268465, "grad_norm": 0.30694282054901123, "learning_rate": 0.0001, "loss": 1.5594, "step": 5371 }, { "epoch": 0.8625561978163134, "grad_norm": 0.25870153307914734, "learning_rate": 0.0001, "loss": 1.424, "step": 5372 }, { "epoch": 0.8627167630057804, "grad_norm": 0.29331105947494507, "learning_rate": 0.0001, "loss": 1.5723, "step": 5373 }, { "epoch": 0.8628773281952473, "grad_norm": 0.3343738615512848, "learning_rate": 0.0001, "loss": 1.5026, "step": 5374 }, { "epoch": 0.8630378933847141, "grad_norm": 0.2702012360095978, "learning_rate": 0.0001, "loss": 1.5063, "step": 5375 }, { "epoch": 0.8631984585741811, "grad_norm": 0.28902357816696167, "learning_rate": 0.0001, "loss": 1.5046, "step": 5376 }, { "epoch": 0.863359023763648, "grad_norm": 0.2942296266555786, "learning_rate": 0.0001, "loss": 1.5046, "step": 5377 }, { "epoch": 0.863519588953115, "grad_norm": 0.28400853276252747, "learning_rate": 0.0001, "loss": 1.51, "step": 5378 }, { "epoch": 0.8636801541425819, "grad_norm": 0.2927720546722412, "learning_rate": 0.0001, "loss": 1.4818, "step": 5379 }, { "epoch": 0.8638407193320488, "grad_norm": 0.26653483510017395, "learning_rate": 0.0001, "loss": 1.4673, "step": 5380 }, { "epoch": 0.8640012845215157, "grad_norm": 0.2643788456916809, "learning_rate": 0.0001, "loss": 1.523, "step": 5381 }, { "epoch": 0.8641618497109826, "grad_norm": 0.26809826493263245, "learning_rate": 0.0001, "loss": 1.4062, "step": 5382 }, { "epoch": 0.8643224149004496, "grad_norm": 0.26230669021606445, "learning_rate": 0.0001, "loss": 1.5291, "step": 5383 }, { "epoch": 0.8644829800899165, "grad_norm": 0.2660991847515106, "learning_rate": 0.0001, "loss": 1.4952, "step": 5384 }, { "epoch": 0.8646435452793835, "grad_norm": 0.27956071496009827, "learning_rate": 0.0001, "loss": 1.4282, "step": 5385 }, { "epoch": 0.8648041104688503, "grad_norm": 0.2823096513748169, "learning_rate": 0.0001, "loss": 1.543, "step": 5386 }, { "epoch": 0.8649646756583173, "grad_norm": 0.2795092463493347, "learning_rate": 0.0001, "loss": 1.4906, "step": 5387 }, { "epoch": 0.8651252408477842, "grad_norm": 0.26812344789505005, "learning_rate": 0.0001, "loss": 1.5251, "step": 5388 }, { "epoch": 0.8652858060372511, "grad_norm": 0.2653723359107971, "learning_rate": 0.0001, "loss": 1.4801, "step": 5389 }, { "epoch": 0.8654463712267181, "grad_norm": 0.272517591714859, "learning_rate": 0.0001, "loss": 1.4916, "step": 5390 }, { "epoch": 0.865606936416185, "grad_norm": 0.28307634592056274, "learning_rate": 0.0001, "loss": 1.4491, "step": 5391 }, { "epoch": 0.8657675016056519, "grad_norm": 0.26914727687835693, "learning_rate": 0.0001, "loss": 1.4699, "step": 5392 }, { "epoch": 0.8659280667951188, "grad_norm": 0.29564759135246277, "learning_rate": 0.0001, "loss": 1.4507, "step": 5393 }, { "epoch": 0.8660886319845857, "grad_norm": 0.2670200765132904, "learning_rate": 0.0001, "loss": 1.4429, "step": 5394 }, { "epoch": 0.8662491971740527, "grad_norm": 0.2828080654144287, "learning_rate": 0.0001, "loss": 1.4994, "step": 5395 }, { "epoch": 0.8664097623635196, "grad_norm": 0.27270907163619995, "learning_rate": 0.0001, "loss": 1.5513, "step": 5396 }, { "epoch": 0.8665703275529865, "grad_norm": 0.2735515534877777, "learning_rate": 0.0001, "loss": 1.4291, "step": 5397 }, { "epoch": 0.8667308927424534, "grad_norm": 0.2732189893722534, "learning_rate": 0.0001, "loss": 1.4891, "step": 5398 }, { "epoch": 0.8668914579319204, "grad_norm": 0.26425090432167053, "learning_rate": 0.0001, "loss": 1.5195, "step": 5399 }, { "epoch": 0.8670520231213873, "grad_norm": 0.2832198441028595, "learning_rate": 0.0001, "loss": 1.4958, "step": 5400 }, { "epoch": 0.8672125883108542, "grad_norm": 0.2647020220756531, "learning_rate": 0.0001, "loss": 1.5135, "step": 5401 }, { "epoch": 0.8673731535003212, "grad_norm": 0.26475292444229126, "learning_rate": 0.0001, "loss": 1.4592, "step": 5402 }, { "epoch": 0.867533718689788, "grad_norm": 0.2587742209434509, "learning_rate": 0.0001, "loss": 1.5061, "step": 5403 }, { "epoch": 0.867694283879255, "grad_norm": 0.2691299617290497, "learning_rate": 0.0001, "loss": 1.4102, "step": 5404 }, { "epoch": 0.8678548490687219, "grad_norm": 0.26208651065826416, "learning_rate": 0.0001, "loss": 1.4109, "step": 5405 }, { "epoch": 0.8680154142581888, "grad_norm": 0.2596539556980133, "learning_rate": 0.0001, "loss": 1.4853, "step": 5406 }, { "epoch": 0.8681759794476558, "grad_norm": 0.27161863446235657, "learning_rate": 0.0001, "loss": 1.4881, "step": 5407 }, { "epoch": 0.8683365446371226, "grad_norm": 0.2674539089202881, "learning_rate": 0.0001, "loss": 1.4426, "step": 5408 }, { "epoch": 0.8684971098265896, "grad_norm": 0.26671645045280457, "learning_rate": 0.0001, "loss": 1.4391, "step": 5409 }, { "epoch": 0.8686576750160565, "grad_norm": 0.3180280923843384, "learning_rate": 0.0001, "loss": 1.5427, "step": 5410 }, { "epoch": 0.8688182402055235, "grad_norm": 0.2686658799648285, "learning_rate": 0.0001, "loss": 1.4433, "step": 5411 }, { "epoch": 0.8689788053949904, "grad_norm": 0.2700963020324707, "learning_rate": 0.0001, "loss": 1.4939, "step": 5412 }, { "epoch": 0.8691393705844573, "grad_norm": 0.2653445601463318, "learning_rate": 0.0001, "loss": 1.524, "step": 5413 }, { "epoch": 0.8692999357739242, "grad_norm": 0.25825613737106323, "learning_rate": 0.0001, "loss": 1.4503, "step": 5414 }, { "epoch": 0.8694605009633911, "grad_norm": 0.2721810042858124, "learning_rate": 0.0001, "loss": 1.3788, "step": 5415 }, { "epoch": 0.8696210661528581, "grad_norm": 0.26655474305152893, "learning_rate": 0.0001, "loss": 1.5133, "step": 5416 }, { "epoch": 0.869781631342325, "grad_norm": 0.2792733907699585, "learning_rate": 0.0001, "loss": 1.4712, "step": 5417 }, { "epoch": 0.869942196531792, "grad_norm": 0.2694145143032074, "learning_rate": 0.0001, "loss": 1.4402, "step": 5418 }, { "epoch": 0.8701027617212588, "grad_norm": 0.28874000906944275, "learning_rate": 0.0001, "loss": 1.4661, "step": 5419 }, { "epoch": 0.8702633269107257, "grad_norm": 0.2771264314651489, "learning_rate": 0.0001, "loss": 1.5002, "step": 5420 }, { "epoch": 0.8704238921001927, "grad_norm": 0.2902165353298187, "learning_rate": 0.0001, "loss": 1.4846, "step": 5421 }, { "epoch": 0.8705844572896596, "grad_norm": 0.28441616892814636, "learning_rate": 0.0001, "loss": 1.4281, "step": 5422 }, { "epoch": 0.8707450224791266, "grad_norm": 0.4823160767555237, "learning_rate": 0.0001, "loss": 1.4394, "step": 5423 }, { "epoch": 0.8709055876685935, "grad_norm": 0.269285649061203, "learning_rate": 0.0001, "loss": 1.474, "step": 5424 }, { "epoch": 0.8710661528580603, "grad_norm": 0.26998424530029297, "learning_rate": 0.0001, "loss": 1.3978, "step": 5425 }, { "epoch": 0.8712267180475273, "grad_norm": 0.28708550333976746, "learning_rate": 0.0001, "loss": 1.4277, "step": 5426 }, { "epoch": 0.8713872832369942, "grad_norm": 0.2990915775299072, "learning_rate": 0.0001, "loss": 1.4551, "step": 5427 }, { "epoch": 0.8715478484264612, "grad_norm": 0.32246556878089905, "learning_rate": 0.0001, "loss": 1.4494, "step": 5428 }, { "epoch": 0.8717084136159281, "grad_norm": 0.26776349544525146, "learning_rate": 0.0001, "loss": 1.4335, "step": 5429 }, { "epoch": 0.871868978805395, "grad_norm": 0.2630974054336548, "learning_rate": 0.0001, "loss": 1.3887, "step": 5430 }, { "epoch": 0.8720295439948619, "grad_norm": 0.29397687315940857, "learning_rate": 0.0001, "loss": 1.5027, "step": 5431 }, { "epoch": 0.8721901091843288, "grad_norm": 0.264883428812027, "learning_rate": 0.0001, "loss": 1.5002, "step": 5432 }, { "epoch": 0.8723506743737958, "grad_norm": 0.2784595489501953, "learning_rate": 0.0001, "loss": 1.4356, "step": 5433 }, { "epoch": 0.8725112395632627, "grad_norm": 0.2815721929073334, "learning_rate": 0.0001, "loss": 1.4899, "step": 5434 }, { "epoch": 0.8726718047527297, "grad_norm": 0.27264702320098877, "learning_rate": 0.0001, "loss": 1.495, "step": 5435 }, { "epoch": 0.8728323699421965, "grad_norm": 0.38980332016944885, "learning_rate": 0.0001, "loss": 1.3819, "step": 5436 }, { "epoch": 0.8729929351316634, "grad_norm": 0.2716798186302185, "learning_rate": 0.0001, "loss": 1.3688, "step": 5437 }, { "epoch": 0.8731535003211304, "grad_norm": 0.2638702094554901, "learning_rate": 0.0001, "loss": 1.4922, "step": 5438 }, { "epoch": 0.8733140655105973, "grad_norm": 0.27662479877471924, "learning_rate": 0.0001, "loss": 1.4759, "step": 5439 }, { "epoch": 0.8734746307000643, "grad_norm": 0.27404460310935974, "learning_rate": 0.0001, "loss": 1.4806, "step": 5440 }, { "epoch": 0.8736351958895312, "grad_norm": 0.29231128096580505, "learning_rate": 0.0001, "loss": 1.5648, "step": 5441 }, { "epoch": 0.873795761078998, "grad_norm": 0.26786231994628906, "learning_rate": 0.0001, "loss": 1.4983, "step": 5442 }, { "epoch": 0.873956326268465, "grad_norm": 0.2640931010246277, "learning_rate": 0.0001, "loss": 1.4557, "step": 5443 }, { "epoch": 0.8741168914579319, "grad_norm": 0.27399834990501404, "learning_rate": 0.0001, "loss": 1.4503, "step": 5444 }, { "epoch": 0.8742774566473989, "grad_norm": 0.28965431451797485, "learning_rate": 0.0001, "loss": 1.5404, "step": 5445 }, { "epoch": 0.8744380218368658, "grad_norm": 0.34253400564193726, "learning_rate": 0.0001, "loss": 1.445, "step": 5446 }, { "epoch": 0.8745985870263326, "grad_norm": 0.2716148793697357, "learning_rate": 0.0001, "loss": 1.4432, "step": 5447 }, { "epoch": 0.8747591522157996, "grad_norm": 0.2725996673107147, "learning_rate": 0.0001, "loss": 1.4203, "step": 5448 }, { "epoch": 0.8749197174052665, "grad_norm": 0.26596102118492126, "learning_rate": 0.0001, "loss": 1.4237, "step": 5449 }, { "epoch": 0.8750802825947335, "grad_norm": 0.279045432806015, "learning_rate": 0.0001, "loss": 1.4495, "step": 5450 }, { "epoch": 0.8752408477842004, "grad_norm": 0.25917115807533264, "learning_rate": 0.0001, "loss": 1.4089, "step": 5451 }, { "epoch": 0.8754014129736674, "grad_norm": 0.25699543952941895, "learning_rate": 0.0001, "loss": 1.438, "step": 5452 }, { "epoch": 0.8755619781631342, "grad_norm": 0.28414681553840637, "learning_rate": 0.0001, "loss": 1.4592, "step": 5453 }, { "epoch": 0.8757225433526011, "grad_norm": 0.2951044738292694, "learning_rate": 0.0001, "loss": 1.4188, "step": 5454 }, { "epoch": 0.8758831085420681, "grad_norm": 0.3004177510738373, "learning_rate": 0.0001, "loss": 1.4602, "step": 5455 }, { "epoch": 0.876043673731535, "grad_norm": 0.274126261472702, "learning_rate": 0.0001, "loss": 1.5185, "step": 5456 }, { "epoch": 0.876204238921002, "grad_norm": 0.2812749743461609, "learning_rate": 0.0001, "loss": 1.4698, "step": 5457 }, { "epoch": 0.8763648041104688, "grad_norm": 0.26154085993766785, "learning_rate": 0.0001, "loss": 1.3933, "step": 5458 }, { "epoch": 0.8765253692999357, "grad_norm": 0.2749265432357788, "learning_rate": 0.0001, "loss": 1.4817, "step": 5459 }, { "epoch": 0.8766859344894027, "grad_norm": 0.2600361406803131, "learning_rate": 0.0001, "loss": 1.405, "step": 5460 }, { "epoch": 0.8768464996788696, "grad_norm": 0.29384899139404297, "learning_rate": 0.0001, "loss": 1.5376, "step": 5461 }, { "epoch": 0.8770070648683366, "grad_norm": 0.2780305743217468, "learning_rate": 0.0001, "loss": 1.4352, "step": 5462 }, { "epoch": 0.8771676300578035, "grad_norm": 0.2738387882709503, "learning_rate": 0.0001, "loss": 1.5633, "step": 5463 }, { "epoch": 0.8773281952472703, "grad_norm": 0.27933788299560547, "learning_rate": 0.0001, "loss": 1.54, "step": 5464 }, { "epoch": 0.8774887604367373, "grad_norm": 0.26793208718299866, "learning_rate": 0.0001, "loss": 1.5069, "step": 5465 }, { "epoch": 0.8776493256262042, "grad_norm": 0.2673221528530121, "learning_rate": 0.0001, "loss": 1.4992, "step": 5466 }, { "epoch": 0.8778098908156712, "grad_norm": 0.25794437527656555, "learning_rate": 0.0001, "loss": 1.3249, "step": 5467 }, { "epoch": 0.8779704560051381, "grad_norm": 0.2692946493625641, "learning_rate": 0.0001, "loss": 1.4745, "step": 5468 }, { "epoch": 0.878131021194605, "grad_norm": 0.27269116044044495, "learning_rate": 0.0001, "loss": 1.4185, "step": 5469 }, { "epoch": 0.8782915863840719, "grad_norm": 0.27077004313468933, "learning_rate": 0.0001, "loss": 1.4149, "step": 5470 }, { "epoch": 0.8784521515735388, "grad_norm": 0.27809661626815796, "learning_rate": 0.0001, "loss": 1.5599, "step": 5471 }, { "epoch": 0.8786127167630058, "grad_norm": 0.2573157250881195, "learning_rate": 0.0001, "loss": 1.4652, "step": 5472 }, { "epoch": 0.8787732819524727, "grad_norm": 0.27147430181503296, "learning_rate": 0.0001, "loss": 1.468, "step": 5473 }, { "epoch": 0.8789338471419397, "grad_norm": 0.27140012383461, "learning_rate": 0.0001, "loss": 1.4872, "step": 5474 }, { "epoch": 0.8790944123314065, "grad_norm": 0.2830095589160919, "learning_rate": 0.0001, "loss": 1.5283, "step": 5475 }, { "epoch": 0.8792549775208734, "grad_norm": 0.2714841663837433, "learning_rate": 0.0001, "loss": 1.4509, "step": 5476 }, { "epoch": 0.8794155427103404, "grad_norm": 0.27208662033081055, "learning_rate": 0.0001, "loss": 1.4991, "step": 5477 }, { "epoch": 0.8795761078998073, "grad_norm": 0.268839031457901, "learning_rate": 0.0001, "loss": 1.4898, "step": 5478 }, { "epoch": 0.8797366730892743, "grad_norm": 0.2627919018268585, "learning_rate": 0.0001, "loss": 1.4628, "step": 5479 }, { "epoch": 0.8798972382787412, "grad_norm": 0.2893525958061218, "learning_rate": 0.0001, "loss": 1.5424, "step": 5480 }, { "epoch": 0.880057803468208, "grad_norm": 0.27234259247779846, "learning_rate": 0.0001, "loss": 1.3436, "step": 5481 }, { "epoch": 0.880218368657675, "grad_norm": 0.265053927898407, "learning_rate": 0.0001, "loss": 1.4102, "step": 5482 }, { "epoch": 0.8803789338471419, "grad_norm": 0.27132099866867065, "learning_rate": 0.0001, "loss": 1.4566, "step": 5483 }, { "epoch": 0.8805394990366089, "grad_norm": 0.26990145444869995, "learning_rate": 0.0001, "loss": 1.4016, "step": 5484 }, { "epoch": 0.8807000642260758, "grad_norm": 0.26688551902770996, "learning_rate": 0.0001, "loss": 1.4271, "step": 5485 }, { "epoch": 0.8808606294155427, "grad_norm": 0.2790434956550598, "learning_rate": 0.0001, "loss": 1.492, "step": 5486 }, { "epoch": 0.8810211946050096, "grad_norm": 0.2871028482913971, "learning_rate": 0.0001, "loss": 1.4772, "step": 5487 }, { "epoch": 0.8811817597944765, "grad_norm": 0.2595863342285156, "learning_rate": 0.0001, "loss": 1.4739, "step": 5488 }, { "epoch": 0.8813423249839435, "grad_norm": 0.28704044222831726, "learning_rate": 0.0001, "loss": 1.5201, "step": 5489 }, { "epoch": 0.8815028901734104, "grad_norm": 0.2778429090976715, "learning_rate": 0.0001, "loss": 1.4335, "step": 5490 }, { "epoch": 0.8816634553628774, "grad_norm": 0.270887553691864, "learning_rate": 0.0001, "loss": 1.4402, "step": 5491 }, { "epoch": 0.8818240205523442, "grad_norm": 0.26834598183631897, "learning_rate": 0.0001, "loss": 1.4492, "step": 5492 }, { "epoch": 0.8819845857418112, "grad_norm": 0.26983681321144104, "learning_rate": 0.0001, "loss": 1.5221, "step": 5493 }, { "epoch": 0.8821451509312781, "grad_norm": 0.26623982191085815, "learning_rate": 0.0001, "loss": 1.5069, "step": 5494 }, { "epoch": 0.882305716120745, "grad_norm": 0.2848629951477051, "learning_rate": 0.0001, "loss": 1.4249, "step": 5495 }, { "epoch": 0.882466281310212, "grad_norm": 0.26681122183799744, "learning_rate": 0.0001, "loss": 1.4899, "step": 5496 }, { "epoch": 0.8826268464996788, "grad_norm": 0.26040562987327576, "learning_rate": 0.0001, "loss": 1.5291, "step": 5497 }, { "epoch": 0.8827874116891458, "grad_norm": 1.1018840074539185, "learning_rate": 0.0001, "loss": 1.4965, "step": 5498 }, { "epoch": 0.8829479768786127, "grad_norm": 0.26510193943977356, "learning_rate": 0.0001, "loss": 1.5338, "step": 5499 }, { "epoch": 0.8831085420680796, "grad_norm": 0.2770121693611145, "learning_rate": 0.0001, "loss": 1.4952, "step": 5500 }, { "epoch": 0.8832691072575466, "grad_norm": 0.2851237952709198, "learning_rate": 0.0001, "loss": 1.397, "step": 5501 }, { "epoch": 0.8834296724470135, "grad_norm": 0.2876242399215698, "learning_rate": 0.0001, "loss": 1.5258, "step": 5502 }, { "epoch": 0.8835902376364804, "grad_norm": 0.28714463114738464, "learning_rate": 0.0001, "loss": 1.4906, "step": 5503 }, { "epoch": 0.8837508028259473, "grad_norm": 0.2737811207771301, "learning_rate": 0.0001, "loss": 1.4187, "step": 5504 }, { "epoch": 0.8839113680154143, "grad_norm": 0.2581880986690521, "learning_rate": 0.0001, "loss": 1.4158, "step": 5505 }, { "epoch": 0.8840719332048812, "grad_norm": 0.27452948689460754, "learning_rate": 0.0001, "loss": 1.5494, "step": 5506 }, { "epoch": 0.8842324983943481, "grad_norm": 0.25586310029029846, "learning_rate": 0.0001, "loss": 1.5023, "step": 5507 }, { "epoch": 0.884393063583815, "grad_norm": 0.2672421634197235, "learning_rate": 0.0001, "loss": 1.4421, "step": 5508 }, { "epoch": 0.8845536287732819, "grad_norm": 0.2715543508529663, "learning_rate": 0.0001, "loss": 1.5156, "step": 5509 }, { "epoch": 0.8847141939627489, "grad_norm": 0.27912983298301697, "learning_rate": 0.0001, "loss": 1.52, "step": 5510 }, { "epoch": 0.8848747591522158, "grad_norm": 0.27395251393318176, "learning_rate": 0.0001, "loss": 1.5105, "step": 5511 }, { "epoch": 0.8850353243416827, "grad_norm": 0.28986644744873047, "learning_rate": 0.0001, "loss": 1.4493, "step": 5512 }, { "epoch": 0.8851958895311497, "grad_norm": 0.277619332075119, "learning_rate": 0.0001, "loss": 1.4742, "step": 5513 }, { "epoch": 0.8853564547206165, "grad_norm": 0.2849483788013458, "learning_rate": 0.0001, "loss": 1.4366, "step": 5514 }, { "epoch": 0.8855170199100835, "grad_norm": 0.26593446731567383, "learning_rate": 0.0001, "loss": 1.4509, "step": 5515 }, { "epoch": 0.8856775850995504, "grad_norm": 0.2862519323825836, "learning_rate": 0.0001, "loss": 1.5016, "step": 5516 }, { "epoch": 0.8858381502890174, "grad_norm": 0.2797463834285736, "learning_rate": 0.0001, "loss": 1.5768, "step": 5517 }, { "epoch": 0.8859987154784843, "grad_norm": 0.2575477659702301, "learning_rate": 0.0001, "loss": 1.437, "step": 5518 }, { "epoch": 0.8861592806679512, "grad_norm": 0.3024967610836029, "learning_rate": 0.0001, "loss": 1.4959, "step": 5519 }, { "epoch": 0.8863198458574181, "grad_norm": 0.28635838627815247, "learning_rate": 0.0001, "loss": 1.5708, "step": 5520 }, { "epoch": 0.886480411046885, "grad_norm": 0.2505148947238922, "learning_rate": 0.0001, "loss": 1.4379, "step": 5521 }, { "epoch": 0.886640976236352, "grad_norm": 0.2753680348396301, "learning_rate": 0.0001, "loss": 1.5203, "step": 5522 }, { "epoch": 0.8868015414258189, "grad_norm": 0.29059112071990967, "learning_rate": 0.0001, "loss": 1.4797, "step": 5523 }, { "epoch": 0.8869621066152859, "grad_norm": 0.2863348722457886, "learning_rate": 0.0001, "loss": 1.5352, "step": 5524 }, { "epoch": 0.8871226718047527, "grad_norm": 0.2873809039592743, "learning_rate": 0.0001, "loss": 1.5321, "step": 5525 }, { "epoch": 0.8872832369942196, "grad_norm": 0.2848975658416748, "learning_rate": 0.0001, "loss": 1.4525, "step": 5526 }, { "epoch": 0.8874438021836866, "grad_norm": 0.2567618489265442, "learning_rate": 0.0001, "loss": 1.4443, "step": 5527 }, { "epoch": 0.8876043673731535, "grad_norm": 0.26232290267944336, "learning_rate": 0.0001, "loss": 1.4997, "step": 5528 }, { "epoch": 0.8877649325626205, "grad_norm": 0.5042279958724976, "learning_rate": 0.0001, "loss": 1.4704, "step": 5529 }, { "epoch": 0.8879254977520874, "grad_norm": 0.27147457003593445, "learning_rate": 0.0001, "loss": 1.4107, "step": 5530 }, { "epoch": 0.8880860629415542, "grad_norm": 0.25734785199165344, "learning_rate": 0.0001, "loss": 1.4891, "step": 5531 }, { "epoch": 0.8882466281310212, "grad_norm": 0.2572897672653198, "learning_rate": 0.0001, "loss": 1.3871, "step": 5532 }, { "epoch": 0.8884071933204881, "grad_norm": 0.27331671118736267, "learning_rate": 0.0001, "loss": 1.5044, "step": 5533 }, { "epoch": 0.8885677585099551, "grad_norm": 0.2718226909637451, "learning_rate": 0.0001, "loss": 1.4272, "step": 5534 }, { "epoch": 0.888728323699422, "grad_norm": 0.29373684525489807, "learning_rate": 0.0001, "loss": 1.4484, "step": 5535 }, { "epoch": 0.8888888888888888, "grad_norm": 0.29399439692497253, "learning_rate": 0.0001, "loss": 1.4547, "step": 5536 }, { "epoch": 0.8890494540783558, "grad_norm": 0.27119654417037964, "learning_rate": 0.0001, "loss": 1.4411, "step": 5537 }, { "epoch": 0.8892100192678227, "grad_norm": 0.2952917218208313, "learning_rate": 0.0001, "loss": 1.4182, "step": 5538 }, { "epoch": 0.8893705844572897, "grad_norm": 0.28254881501197815, "learning_rate": 0.0001, "loss": 1.4993, "step": 5539 }, { "epoch": 0.8895311496467566, "grad_norm": 0.27757877111434937, "learning_rate": 0.0001, "loss": 1.5142, "step": 5540 }, { "epoch": 0.8896917148362236, "grad_norm": 0.27630168199539185, "learning_rate": 0.0001, "loss": 1.4694, "step": 5541 }, { "epoch": 0.8898522800256904, "grad_norm": 0.2867511808872223, "learning_rate": 0.0001, "loss": 1.3884, "step": 5542 }, { "epoch": 0.8900128452151573, "grad_norm": 0.27782851457595825, "learning_rate": 0.0001, "loss": 1.4658, "step": 5543 }, { "epoch": 0.8901734104046243, "grad_norm": 0.3377024233341217, "learning_rate": 0.0001, "loss": 1.4425, "step": 5544 }, { "epoch": 0.8903339755940912, "grad_norm": 0.27609983086586, "learning_rate": 0.0001, "loss": 1.3812, "step": 5545 }, { "epoch": 0.8904945407835582, "grad_norm": 0.29063501954078674, "learning_rate": 0.0001, "loss": 1.5037, "step": 5546 }, { "epoch": 0.890655105973025, "grad_norm": 0.2954513132572174, "learning_rate": 0.0001, "loss": 1.5111, "step": 5547 }, { "epoch": 0.8908156711624919, "grad_norm": 0.2803086042404175, "learning_rate": 0.0001, "loss": 1.4915, "step": 5548 }, { "epoch": 0.8909762363519589, "grad_norm": 0.2909174859523773, "learning_rate": 0.0001, "loss": 1.4672, "step": 5549 }, { "epoch": 0.8911368015414258, "grad_norm": 0.28933873772621155, "learning_rate": 0.0001, "loss": 1.518, "step": 5550 }, { "epoch": 0.8912973667308928, "grad_norm": 0.27435481548309326, "learning_rate": 0.0001, "loss": 1.4816, "step": 5551 }, { "epoch": 0.8914579319203597, "grad_norm": 0.2842663824558258, "learning_rate": 0.0001, "loss": 1.4744, "step": 5552 }, { "epoch": 0.8916184971098265, "grad_norm": 0.27131035923957825, "learning_rate": 0.0001, "loss": 1.4683, "step": 5553 }, { "epoch": 0.8917790622992935, "grad_norm": 0.2606917917728424, "learning_rate": 0.0001, "loss": 1.3991, "step": 5554 }, { "epoch": 0.8919396274887604, "grad_norm": 0.2685220241546631, "learning_rate": 0.0001, "loss": 1.4473, "step": 5555 }, { "epoch": 0.8921001926782274, "grad_norm": 0.2765880227088928, "learning_rate": 0.0001, "loss": 1.4593, "step": 5556 }, { "epoch": 0.8922607578676943, "grad_norm": 0.26068785786628723, "learning_rate": 0.0001, "loss": 1.4555, "step": 5557 }, { "epoch": 0.8924213230571612, "grad_norm": 0.2859265208244324, "learning_rate": 0.0001, "loss": 1.5097, "step": 5558 }, { "epoch": 0.8925818882466281, "grad_norm": 0.2766442894935608, "learning_rate": 0.0001, "loss": 1.465, "step": 5559 }, { "epoch": 0.892742453436095, "grad_norm": 0.27757006883621216, "learning_rate": 0.0001, "loss": 1.502, "step": 5560 }, { "epoch": 0.892903018625562, "grad_norm": 0.2708435654640198, "learning_rate": 0.0001, "loss": 1.5407, "step": 5561 }, { "epoch": 0.8930635838150289, "grad_norm": 0.27640071511268616, "learning_rate": 0.0001, "loss": 1.4641, "step": 5562 }, { "epoch": 0.8932241490044959, "grad_norm": 0.26688772439956665, "learning_rate": 0.0001, "loss": 1.4338, "step": 5563 }, { "epoch": 0.8933847141939627, "grad_norm": 0.2749989628791809, "learning_rate": 0.0001, "loss": 1.4659, "step": 5564 }, { "epoch": 0.8935452793834296, "grad_norm": 0.29130905866622925, "learning_rate": 0.0001, "loss": 1.4989, "step": 5565 }, { "epoch": 0.8937058445728966, "grad_norm": 0.28483328223228455, "learning_rate": 0.0001, "loss": 1.4407, "step": 5566 }, { "epoch": 0.8938664097623635, "grad_norm": 0.42114609479904175, "learning_rate": 0.0001, "loss": 1.5139, "step": 5567 }, { "epoch": 0.8940269749518305, "grad_norm": 0.2821149230003357, "learning_rate": 0.0001, "loss": 1.4913, "step": 5568 }, { "epoch": 0.8941875401412974, "grad_norm": 0.2953449487686157, "learning_rate": 0.0001, "loss": 1.4786, "step": 5569 }, { "epoch": 0.8943481053307643, "grad_norm": 0.26782578229904175, "learning_rate": 0.0001, "loss": 1.4649, "step": 5570 }, { "epoch": 0.8945086705202312, "grad_norm": 0.27817678451538086, "learning_rate": 0.0001, "loss": 1.52, "step": 5571 }, { "epoch": 0.8946692357096981, "grad_norm": 0.27889594435691833, "learning_rate": 0.0001, "loss": 1.4755, "step": 5572 }, { "epoch": 0.8948298008991651, "grad_norm": 0.25449198484420776, "learning_rate": 0.0001, "loss": 1.3736, "step": 5573 }, { "epoch": 0.894990366088632, "grad_norm": 0.27022528648376465, "learning_rate": 0.0001, "loss": 1.4576, "step": 5574 }, { "epoch": 0.8951509312780989, "grad_norm": 0.2697735130786896, "learning_rate": 0.0001, "loss": 1.4779, "step": 5575 }, { "epoch": 0.8953114964675658, "grad_norm": 0.28412875533103943, "learning_rate": 0.0001, "loss": 1.5155, "step": 5576 }, { "epoch": 0.8954720616570327, "grad_norm": 0.26869145035743713, "learning_rate": 0.0001, "loss": 1.4827, "step": 5577 }, { "epoch": 0.8956326268464997, "grad_norm": 0.26857876777648926, "learning_rate": 0.0001, "loss": 1.4641, "step": 5578 }, { "epoch": 0.8957931920359666, "grad_norm": 0.2718310058116913, "learning_rate": 0.0001, "loss": 1.5202, "step": 5579 }, { "epoch": 0.8959537572254336, "grad_norm": 0.27814191579818726, "learning_rate": 0.0001, "loss": 1.5414, "step": 5580 }, { "epoch": 0.8961143224149004, "grad_norm": 0.2594946324825287, "learning_rate": 0.0001, "loss": 1.3946, "step": 5581 }, { "epoch": 0.8962748876043674, "grad_norm": 0.2645629942417145, "learning_rate": 0.0001, "loss": 1.4806, "step": 5582 }, { "epoch": 0.8964354527938343, "grad_norm": 0.2640096843242645, "learning_rate": 0.0001, "loss": 1.5146, "step": 5583 }, { "epoch": 0.8965960179833012, "grad_norm": 0.2750597894191742, "learning_rate": 0.0001, "loss": 1.523, "step": 5584 }, { "epoch": 0.8967565831727682, "grad_norm": 0.26160359382629395, "learning_rate": 0.0001, "loss": 1.3128, "step": 5585 }, { "epoch": 0.896917148362235, "grad_norm": 0.2674008905887604, "learning_rate": 0.0001, "loss": 1.4988, "step": 5586 }, { "epoch": 0.897077713551702, "grad_norm": 0.3044244945049286, "learning_rate": 0.0001, "loss": 1.4849, "step": 5587 }, { "epoch": 0.8972382787411689, "grad_norm": 0.27884620428085327, "learning_rate": 0.0001, "loss": 1.3733, "step": 5588 }, { "epoch": 0.8973988439306358, "grad_norm": 0.27493929862976074, "learning_rate": 0.0001, "loss": 1.5163, "step": 5589 }, { "epoch": 0.8975594091201028, "grad_norm": 0.2740201950073242, "learning_rate": 0.0001, "loss": 1.5709, "step": 5590 }, { "epoch": 0.8977199743095697, "grad_norm": 0.2665272355079651, "learning_rate": 0.0001, "loss": 1.4594, "step": 5591 }, { "epoch": 0.8978805394990366, "grad_norm": 0.28412479162216187, "learning_rate": 0.0001, "loss": 1.3765, "step": 5592 }, { "epoch": 0.8980411046885035, "grad_norm": 0.25726547837257385, "learning_rate": 0.0001, "loss": 1.4562, "step": 5593 }, { "epoch": 0.8982016698779705, "grad_norm": 0.26011887192726135, "learning_rate": 0.0001, "loss": 1.4944, "step": 5594 }, { "epoch": 0.8983622350674374, "grad_norm": 0.27630525827407837, "learning_rate": 0.0001, "loss": 1.546, "step": 5595 }, { "epoch": 0.8985228002569043, "grad_norm": 0.27536508440971375, "learning_rate": 0.0001, "loss": 1.4899, "step": 5596 }, { "epoch": 0.8986833654463712, "grad_norm": 0.2938755452632904, "learning_rate": 0.0001, "loss": 1.2796, "step": 5597 }, { "epoch": 0.8988439306358381, "grad_norm": 0.2800268232822418, "learning_rate": 0.0001, "loss": 1.421, "step": 5598 }, { "epoch": 0.8990044958253051, "grad_norm": 0.2824115753173828, "learning_rate": 0.0001, "loss": 1.4583, "step": 5599 }, { "epoch": 0.899165061014772, "grad_norm": 0.28322410583496094, "learning_rate": 0.0001, "loss": 1.4766, "step": 5600 }, { "epoch": 0.899325626204239, "grad_norm": 0.2641003727912903, "learning_rate": 0.0001, "loss": 1.4357, "step": 5601 }, { "epoch": 0.8994861913937059, "grad_norm": 0.2806249260902405, "learning_rate": 0.0001, "loss": 1.4854, "step": 5602 }, { "epoch": 0.8996467565831727, "grad_norm": 0.27235904335975647, "learning_rate": 0.0001, "loss": 1.5044, "step": 5603 }, { "epoch": 0.8998073217726397, "grad_norm": 0.2679192125797272, "learning_rate": 0.0001, "loss": 1.508, "step": 5604 }, { "epoch": 0.8999678869621066, "grad_norm": 0.2759861350059509, "learning_rate": 0.0001, "loss": 1.4847, "step": 5605 }, { "epoch": 0.9001284521515736, "grad_norm": 0.26617512106895447, "learning_rate": 0.0001, "loss": 1.4159, "step": 5606 }, { "epoch": 0.9002890173410405, "grad_norm": 0.27368101477622986, "learning_rate": 0.0001, "loss": 1.4694, "step": 5607 }, { "epoch": 0.9004495825305073, "grad_norm": 0.2679339349269867, "learning_rate": 0.0001, "loss": 1.4271, "step": 5608 }, { "epoch": 0.9006101477199743, "grad_norm": 0.2649863660335541, "learning_rate": 0.0001, "loss": 1.43, "step": 5609 }, { "epoch": 0.9007707129094412, "grad_norm": 0.26748520135879517, "learning_rate": 0.0001, "loss": 1.5332, "step": 5610 }, { "epoch": 0.9009312780989082, "grad_norm": 0.26997894048690796, "learning_rate": 0.0001, "loss": 1.3761, "step": 5611 }, { "epoch": 0.9010918432883751, "grad_norm": 0.2782628536224365, "learning_rate": 0.0001, "loss": 1.4201, "step": 5612 }, { "epoch": 0.901252408477842, "grad_norm": 0.2644610106945038, "learning_rate": 0.0001, "loss": 1.4314, "step": 5613 }, { "epoch": 0.9014129736673089, "grad_norm": 0.26767414808273315, "learning_rate": 0.0001, "loss": 1.4027, "step": 5614 }, { "epoch": 0.9015735388567758, "grad_norm": 0.2820192277431488, "learning_rate": 0.0001, "loss": 1.4734, "step": 5615 }, { "epoch": 0.9017341040462428, "grad_norm": 0.27115315198898315, "learning_rate": 0.0001, "loss": 1.4578, "step": 5616 }, { "epoch": 0.9018946692357097, "grad_norm": 0.26880043745040894, "learning_rate": 0.0001, "loss": 1.4978, "step": 5617 }, { "epoch": 0.9020552344251767, "grad_norm": 0.49644196033477783, "learning_rate": 0.0001, "loss": 1.5041, "step": 5618 }, { "epoch": 0.9022157996146436, "grad_norm": 0.2766619920730591, "learning_rate": 0.0001, "loss": 1.4895, "step": 5619 }, { "epoch": 0.9023763648041104, "grad_norm": 0.25244495272636414, "learning_rate": 0.0001, "loss": 1.4198, "step": 5620 }, { "epoch": 0.9025369299935774, "grad_norm": 0.2686665952205658, "learning_rate": 0.0001, "loss": 1.4735, "step": 5621 }, { "epoch": 0.9026974951830443, "grad_norm": 0.2957390546798706, "learning_rate": 0.0001, "loss": 1.5258, "step": 5622 }, { "epoch": 0.9028580603725113, "grad_norm": 0.30311015248298645, "learning_rate": 0.0001, "loss": 1.4177, "step": 5623 }, { "epoch": 0.9030186255619782, "grad_norm": 0.2766197919845581, "learning_rate": 0.0001, "loss": 1.4683, "step": 5624 }, { "epoch": 0.903179190751445, "grad_norm": 0.2730553150177002, "learning_rate": 0.0001, "loss": 1.4722, "step": 5625 }, { "epoch": 0.903339755940912, "grad_norm": 0.28147438168525696, "learning_rate": 0.0001, "loss": 1.5865, "step": 5626 }, { "epoch": 0.9035003211303789, "grad_norm": 0.2749589681625366, "learning_rate": 0.0001, "loss": 1.4322, "step": 5627 }, { "epoch": 0.9036608863198459, "grad_norm": 0.2956511080265045, "learning_rate": 0.0001, "loss": 1.5257, "step": 5628 }, { "epoch": 0.9038214515093128, "grad_norm": 0.2774571180343628, "learning_rate": 0.0001, "loss": 1.4553, "step": 5629 }, { "epoch": 0.9039820166987798, "grad_norm": 0.2679741084575653, "learning_rate": 0.0001, "loss": 1.4936, "step": 5630 }, { "epoch": 0.9041425818882466, "grad_norm": 0.3090623915195465, "learning_rate": 0.0001, "loss": 1.5493, "step": 5631 }, { "epoch": 0.9043031470777135, "grad_norm": 0.26591628789901733, "learning_rate": 0.0001, "loss": 1.5005, "step": 5632 }, { "epoch": 0.9044637122671805, "grad_norm": 0.2733018100261688, "learning_rate": 0.0001, "loss": 1.4519, "step": 5633 }, { "epoch": 0.9046242774566474, "grad_norm": 0.2661973834037781, "learning_rate": 0.0001, "loss": 1.5416, "step": 5634 }, { "epoch": 0.9047848426461144, "grad_norm": 0.27166926860809326, "learning_rate": 0.0001, "loss": 1.4797, "step": 5635 }, { "epoch": 0.9049454078355812, "grad_norm": 0.2655140459537506, "learning_rate": 0.0001, "loss": 1.4686, "step": 5636 }, { "epoch": 0.9051059730250481, "grad_norm": 0.28489869832992554, "learning_rate": 0.0001, "loss": 1.4468, "step": 5637 }, { "epoch": 0.9052665382145151, "grad_norm": 0.2994062304496765, "learning_rate": 0.0001, "loss": 1.4438, "step": 5638 }, { "epoch": 0.905427103403982, "grad_norm": 0.2887355089187622, "learning_rate": 0.0001, "loss": 1.4588, "step": 5639 }, { "epoch": 0.905587668593449, "grad_norm": 0.29034167528152466, "learning_rate": 0.0001, "loss": 1.5389, "step": 5640 }, { "epoch": 0.9057482337829159, "grad_norm": 0.28640443086624146, "learning_rate": 0.0001, "loss": 1.4877, "step": 5641 }, { "epoch": 0.9059087989723827, "grad_norm": 0.28215524554252625, "learning_rate": 0.0001, "loss": 1.4479, "step": 5642 }, { "epoch": 0.9060693641618497, "grad_norm": 0.32695475220680237, "learning_rate": 0.0001, "loss": 1.4924, "step": 5643 }, { "epoch": 0.9062299293513166, "grad_norm": 0.29609331488609314, "learning_rate": 0.0001, "loss": 1.5179, "step": 5644 }, { "epoch": 0.9063904945407836, "grad_norm": 0.2883678078651428, "learning_rate": 0.0001, "loss": 1.4947, "step": 5645 }, { "epoch": 0.9065510597302505, "grad_norm": 0.2817707657814026, "learning_rate": 0.0001, "loss": 1.4946, "step": 5646 }, { "epoch": 0.9067116249197174, "grad_norm": 0.26052823662757874, "learning_rate": 0.0001, "loss": 1.4611, "step": 5647 }, { "epoch": 0.9068721901091843, "grad_norm": 0.26856547594070435, "learning_rate": 0.0001, "loss": 1.4527, "step": 5648 }, { "epoch": 0.9070327552986512, "grad_norm": 0.2800810933113098, "learning_rate": 0.0001, "loss": 1.4586, "step": 5649 }, { "epoch": 0.9071933204881182, "grad_norm": 0.2645875811576843, "learning_rate": 0.0001, "loss": 1.4309, "step": 5650 }, { "epoch": 0.9073538856775851, "grad_norm": 0.28114935755729675, "learning_rate": 0.0001, "loss": 1.5123, "step": 5651 }, { "epoch": 0.9075144508670521, "grad_norm": 0.2695562243461609, "learning_rate": 0.0001, "loss": 1.4983, "step": 5652 }, { "epoch": 0.9076750160565189, "grad_norm": 0.27862122654914856, "learning_rate": 0.0001, "loss": 1.4686, "step": 5653 }, { "epoch": 0.9078355812459858, "grad_norm": 0.9690446257591248, "learning_rate": 0.0001, "loss": 1.5502, "step": 5654 }, { "epoch": 0.9079961464354528, "grad_norm": 0.27264535427093506, "learning_rate": 0.0001, "loss": 1.4717, "step": 5655 }, { "epoch": 0.9081567116249197, "grad_norm": 0.27225714921951294, "learning_rate": 0.0001, "loss": 1.4961, "step": 5656 }, { "epoch": 0.9083172768143867, "grad_norm": 0.28575438261032104, "learning_rate": 0.0001, "loss": 1.4547, "step": 5657 }, { "epoch": 0.9084778420038536, "grad_norm": 0.29622870683670044, "learning_rate": 0.0001, "loss": 1.3789, "step": 5658 }, { "epoch": 0.9086384071933205, "grad_norm": 0.28283485770225525, "learning_rate": 0.0001, "loss": 1.4775, "step": 5659 }, { "epoch": 0.9087989723827874, "grad_norm": 0.27316781878471375, "learning_rate": 0.0001, "loss": 1.4421, "step": 5660 }, { "epoch": 0.9089595375722543, "grad_norm": 0.2823334038257599, "learning_rate": 0.0001, "loss": 1.4682, "step": 5661 }, { "epoch": 0.9091201027617213, "grad_norm": 0.27554428577423096, "learning_rate": 0.0001, "loss": 1.4361, "step": 5662 }, { "epoch": 0.9092806679511882, "grad_norm": 0.27766990661621094, "learning_rate": 0.0001, "loss": 1.527, "step": 5663 }, { "epoch": 0.9094412331406551, "grad_norm": 0.29925546050071716, "learning_rate": 0.0001, "loss": 1.5193, "step": 5664 }, { "epoch": 0.909601798330122, "grad_norm": 0.27168726921081543, "learning_rate": 0.0001, "loss": 1.4366, "step": 5665 }, { "epoch": 0.909762363519589, "grad_norm": 0.2684997022151947, "learning_rate": 0.0001, "loss": 1.4119, "step": 5666 }, { "epoch": 0.9099229287090559, "grad_norm": 0.2845095694065094, "learning_rate": 0.0001, "loss": 1.5543, "step": 5667 }, { "epoch": 0.9100834938985228, "grad_norm": 0.2682120203971863, "learning_rate": 0.0001, "loss": 1.4631, "step": 5668 }, { "epoch": 0.9102440590879898, "grad_norm": 0.26111850142478943, "learning_rate": 0.0001, "loss": 1.4614, "step": 5669 }, { "epoch": 0.9104046242774566, "grad_norm": 0.26749932765960693, "learning_rate": 0.0001, "loss": 1.5122, "step": 5670 }, { "epoch": 0.9105651894669236, "grad_norm": 0.27375659346580505, "learning_rate": 0.0001, "loss": 1.4932, "step": 5671 }, { "epoch": 0.9107257546563905, "grad_norm": 0.2742297947406769, "learning_rate": 0.0001, "loss": 1.4931, "step": 5672 }, { "epoch": 0.9108863198458574, "grad_norm": 0.2770094573497772, "learning_rate": 0.0001, "loss": 1.3913, "step": 5673 }, { "epoch": 0.9110468850353244, "grad_norm": 0.284294068813324, "learning_rate": 0.0001, "loss": 1.5404, "step": 5674 }, { "epoch": 0.9112074502247912, "grad_norm": 0.27823662757873535, "learning_rate": 0.0001, "loss": 1.5444, "step": 5675 }, { "epoch": 0.9113680154142582, "grad_norm": 0.27007046341896057, "learning_rate": 0.0001, "loss": 1.4577, "step": 5676 }, { "epoch": 0.9115285806037251, "grad_norm": 0.27728602290153503, "learning_rate": 0.0001, "loss": 1.531, "step": 5677 }, { "epoch": 0.911689145793192, "grad_norm": 0.3271082043647766, "learning_rate": 0.0001, "loss": 1.4856, "step": 5678 }, { "epoch": 0.911849710982659, "grad_norm": 0.27263563871383667, "learning_rate": 0.0001, "loss": 1.4884, "step": 5679 }, { "epoch": 0.9120102761721259, "grad_norm": 0.2838371992111206, "learning_rate": 0.0001, "loss": 1.5213, "step": 5680 }, { "epoch": 0.9121708413615928, "grad_norm": 0.26938414573669434, "learning_rate": 0.0001, "loss": 1.4646, "step": 5681 }, { "epoch": 0.9123314065510597, "grad_norm": 0.283206045627594, "learning_rate": 0.0001, "loss": 1.482, "step": 5682 }, { "epoch": 0.9124919717405267, "grad_norm": 0.2698117196559906, "learning_rate": 0.0001, "loss": 1.4331, "step": 5683 }, { "epoch": 0.9126525369299936, "grad_norm": 0.2849421203136444, "learning_rate": 0.0001, "loss": 1.5028, "step": 5684 }, { "epoch": 0.9128131021194605, "grad_norm": 0.27039268612861633, "learning_rate": 0.0001, "loss": 1.4567, "step": 5685 }, { "epoch": 0.9129736673089274, "grad_norm": 0.26833176612854004, "learning_rate": 0.0001, "loss": 1.4978, "step": 5686 }, { "epoch": 0.9131342324983943, "grad_norm": 0.2842022776603699, "learning_rate": 0.0001, "loss": 1.4712, "step": 5687 }, { "epoch": 0.9132947976878613, "grad_norm": 0.28925463557243347, "learning_rate": 0.0001, "loss": 1.5191, "step": 5688 }, { "epoch": 0.9134553628773282, "grad_norm": 0.2731495201587677, "learning_rate": 0.0001, "loss": 1.456, "step": 5689 }, { "epoch": 0.9136159280667951, "grad_norm": 0.2853472828865051, "learning_rate": 0.0001, "loss": 1.5377, "step": 5690 }, { "epoch": 0.9137764932562621, "grad_norm": 0.2628810703754425, "learning_rate": 0.0001, "loss": 1.4697, "step": 5691 }, { "epoch": 0.9139370584457289, "grad_norm": 0.2630572021007538, "learning_rate": 0.0001, "loss": 1.4778, "step": 5692 }, { "epoch": 0.9140976236351959, "grad_norm": 0.27523350715637207, "learning_rate": 0.0001, "loss": 1.4572, "step": 5693 }, { "epoch": 0.9142581888246628, "grad_norm": 0.267199844121933, "learning_rate": 0.0001, "loss": 1.5051, "step": 5694 }, { "epoch": 0.9144187540141298, "grad_norm": 0.2666296362876892, "learning_rate": 0.0001, "loss": 1.4595, "step": 5695 }, { "epoch": 0.9145793192035967, "grad_norm": 0.2623789608478546, "learning_rate": 0.0001, "loss": 1.4275, "step": 5696 }, { "epoch": 0.9147398843930635, "grad_norm": 0.2783721387386322, "learning_rate": 0.0001, "loss": 1.4741, "step": 5697 }, { "epoch": 0.9149004495825305, "grad_norm": 0.2690523862838745, "learning_rate": 0.0001, "loss": 1.4386, "step": 5698 }, { "epoch": 0.9150610147719974, "grad_norm": 0.27985507249832153, "learning_rate": 0.0001, "loss": 1.4814, "step": 5699 }, { "epoch": 0.9152215799614644, "grad_norm": 0.2744964063167572, "learning_rate": 0.0001, "loss": 1.5168, "step": 5700 }, { "epoch": 0.9153821451509313, "grad_norm": 0.29585015773773193, "learning_rate": 0.0001, "loss": 1.4577, "step": 5701 }, { "epoch": 0.9155427103403982, "grad_norm": 0.6999393105506897, "learning_rate": 0.0001, "loss": 1.4938, "step": 5702 }, { "epoch": 0.9157032755298651, "grad_norm": 0.26724332571029663, "learning_rate": 0.0001, "loss": 1.4642, "step": 5703 }, { "epoch": 0.915863840719332, "grad_norm": 0.26325684785842896, "learning_rate": 0.0001, "loss": 1.5335, "step": 5704 }, { "epoch": 0.916024405908799, "grad_norm": 0.2777913808822632, "learning_rate": 0.0001, "loss": 1.4725, "step": 5705 }, { "epoch": 0.9161849710982659, "grad_norm": 0.277014821767807, "learning_rate": 0.0001, "loss": 1.4831, "step": 5706 }, { "epoch": 0.9163455362877329, "grad_norm": 0.2649841010570526, "learning_rate": 0.0001, "loss": 1.4683, "step": 5707 }, { "epoch": 0.9165061014771998, "grad_norm": 0.25996530055999756, "learning_rate": 0.0001, "loss": 1.4099, "step": 5708 }, { "epoch": 0.9166666666666666, "grad_norm": 0.27567023038864136, "learning_rate": 0.0001, "loss": 1.4417, "step": 5709 }, { "epoch": 0.9168272318561336, "grad_norm": 0.26955941319465637, "learning_rate": 0.0001, "loss": 1.4445, "step": 5710 }, { "epoch": 0.9169877970456005, "grad_norm": 0.27277833223342896, "learning_rate": 0.0001, "loss": 1.5454, "step": 5711 }, { "epoch": 0.9171483622350675, "grad_norm": 0.26426592469215393, "learning_rate": 0.0001, "loss": 1.4566, "step": 5712 }, { "epoch": 0.9173089274245344, "grad_norm": 0.26648545265197754, "learning_rate": 0.0001, "loss": 1.49, "step": 5713 }, { "epoch": 0.9174694926140012, "grad_norm": 0.266801118850708, "learning_rate": 0.0001, "loss": 1.3964, "step": 5714 }, { "epoch": 0.9176300578034682, "grad_norm": 0.2627018690109253, "learning_rate": 0.0001, "loss": 1.486, "step": 5715 }, { "epoch": 0.9177906229929351, "grad_norm": 0.2842526137828827, "learning_rate": 0.0001, "loss": 1.4861, "step": 5716 }, { "epoch": 0.9179511881824021, "grad_norm": 0.26749587059020996, "learning_rate": 0.0001, "loss": 1.424, "step": 5717 }, { "epoch": 0.918111753371869, "grad_norm": 0.28474441170692444, "learning_rate": 0.0001, "loss": 1.4852, "step": 5718 }, { "epoch": 0.918272318561336, "grad_norm": 0.26730814576148987, "learning_rate": 0.0001, "loss": 1.3754, "step": 5719 }, { "epoch": 0.9184328837508028, "grad_norm": 0.27947551012039185, "learning_rate": 0.0001, "loss": 1.4497, "step": 5720 }, { "epoch": 0.9185934489402697, "grad_norm": 0.28426793217658997, "learning_rate": 0.0001, "loss": 1.4121, "step": 5721 }, { "epoch": 0.9187540141297367, "grad_norm": 0.28511571884155273, "learning_rate": 0.0001, "loss": 1.4188, "step": 5722 }, { "epoch": 0.9189145793192036, "grad_norm": 0.29709392786026, "learning_rate": 0.0001, "loss": 1.5127, "step": 5723 }, { "epoch": 0.9190751445086706, "grad_norm": 0.2649746537208557, "learning_rate": 0.0001, "loss": 1.4985, "step": 5724 }, { "epoch": 0.9192357096981374, "grad_norm": 0.2661975026130676, "learning_rate": 0.0001, "loss": 1.5049, "step": 5725 }, { "epoch": 0.9193962748876043, "grad_norm": 0.29220449924468994, "learning_rate": 0.0001, "loss": 1.4678, "step": 5726 }, { "epoch": 0.9195568400770713, "grad_norm": 0.26839110255241394, "learning_rate": 0.0001, "loss": 1.3938, "step": 5727 }, { "epoch": 0.9197174052665382, "grad_norm": 0.2792374789714813, "learning_rate": 0.0001, "loss": 1.4432, "step": 5728 }, { "epoch": 0.9198779704560052, "grad_norm": 0.2627927362918854, "learning_rate": 0.0001, "loss": 1.4709, "step": 5729 }, { "epoch": 0.9200385356454721, "grad_norm": 0.2641960084438324, "learning_rate": 0.0001, "loss": 1.5006, "step": 5730 }, { "epoch": 0.9201991008349389, "grad_norm": 0.27540579438209534, "learning_rate": 0.0001, "loss": 1.4866, "step": 5731 }, { "epoch": 0.9203596660244059, "grad_norm": 0.2716390788555145, "learning_rate": 0.0001, "loss": 1.4407, "step": 5732 }, { "epoch": 0.9205202312138728, "grad_norm": 0.2709677219390869, "learning_rate": 0.0001, "loss": 1.4148, "step": 5733 }, { "epoch": 0.9206807964033398, "grad_norm": 0.284290075302124, "learning_rate": 0.0001, "loss": 1.5233, "step": 5734 }, { "epoch": 0.9208413615928067, "grad_norm": 0.2621372938156128, "learning_rate": 0.0001, "loss": 1.4556, "step": 5735 }, { "epoch": 0.9210019267822736, "grad_norm": 0.27332156896591187, "learning_rate": 0.0001, "loss": 1.376, "step": 5736 }, { "epoch": 0.9211624919717405, "grad_norm": 0.27216067910194397, "learning_rate": 0.0001, "loss": 1.5591, "step": 5737 }, { "epoch": 0.9213230571612074, "grad_norm": 0.27463793754577637, "learning_rate": 0.0001, "loss": 1.5493, "step": 5738 }, { "epoch": 0.9214836223506744, "grad_norm": 0.28009170293807983, "learning_rate": 0.0001, "loss": 1.5244, "step": 5739 }, { "epoch": 0.9216441875401413, "grad_norm": 0.2847888767719269, "learning_rate": 0.0001, "loss": 1.49, "step": 5740 }, { "epoch": 0.9218047527296083, "grad_norm": 0.26402273774147034, "learning_rate": 0.0001, "loss": 1.5125, "step": 5741 }, { "epoch": 0.9219653179190751, "grad_norm": 0.2641732096672058, "learning_rate": 0.0001, "loss": 1.5069, "step": 5742 }, { "epoch": 0.922125883108542, "grad_norm": 0.29763108491897583, "learning_rate": 0.0001, "loss": 1.4892, "step": 5743 }, { "epoch": 0.922286448298009, "grad_norm": 0.2714250385761261, "learning_rate": 0.0001, "loss": 1.516, "step": 5744 }, { "epoch": 0.9224470134874759, "grad_norm": 0.2819075584411621, "learning_rate": 0.0001, "loss": 1.5841, "step": 5745 }, { "epoch": 0.9226075786769429, "grad_norm": 0.2795034945011139, "learning_rate": 0.0001, "loss": 1.4864, "step": 5746 }, { "epoch": 0.9227681438664097, "grad_norm": 0.2678384482860565, "learning_rate": 0.0001, "loss": 1.4533, "step": 5747 }, { "epoch": 0.9229287090558767, "grad_norm": 0.2672341763973236, "learning_rate": 0.0001, "loss": 1.5334, "step": 5748 }, { "epoch": 0.9230892742453436, "grad_norm": 0.27180591225624084, "learning_rate": 0.0001, "loss": 1.4305, "step": 5749 }, { "epoch": 0.9232498394348105, "grad_norm": 0.2737089693546295, "learning_rate": 0.0001, "loss": 1.5157, "step": 5750 }, { "epoch": 0.9234104046242775, "grad_norm": 0.2823292911052704, "learning_rate": 0.0001, "loss": 1.4531, "step": 5751 }, { "epoch": 0.9235709698137444, "grad_norm": 0.2742321789264679, "learning_rate": 0.0001, "loss": 1.4078, "step": 5752 }, { "epoch": 0.9237315350032113, "grad_norm": 0.2717142105102539, "learning_rate": 0.0001, "loss": 1.4479, "step": 5753 }, { "epoch": 0.9238921001926782, "grad_norm": 0.2947547435760498, "learning_rate": 0.0001, "loss": 1.4127, "step": 5754 }, { "epoch": 0.9240526653821451, "grad_norm": 0.2838168144226074, "learning_rate": 0.0001, "loss": 1.476, "step": 5755 }, { "epoch": 0.9242132305716121, "grad_norm": 0.27356472611427307, "learning_rate": 0.0001, "loss": 1.4374, "step": 5756 }, { "epoch": 0.924373795761079, "grad_norm": 0.303329735994339, "learning_rate": 0.0001, "loss": 1.5093, "step": 5757 }, { "epoch": 0.924534360950546, "grad_norm": 0.32209450006484985, "learning_rate": 0.0001, "loss": 1.5002, "step": 5758 }, { "epoch": 0.9246949261400128, "grad_norm": 0.2569775879383087, "learning_rate": 0.0001, "loss": 1.5086, "step": 5759 }, { "epoch": 0.9248554913294798, "grad_norm": 0.27903053164482117, "learning_rate": 0.0001, "loss": 1.4443, "step": 5760 }, { "epoch": 0.9250160565189467, "grad_norm": 0.25670740008354187, "learning_rate": 0.0001, "loss": 1.5015, "step": 5761 }, { "epoch": 0.9251766217084136, "grad_norm": 0.28923937678337097, "learning_rate": 0.0001, "loss": 1.4439, "step": 5762 }, { "epoch": 0.9253371868978806, "grad_norm": 0.2757799029350281, "learning_rate": 0.0001, "loss": 1.4689, "step": 5763 }, { "epoch": 0.9254977520873474, "grad_norm": 0.2755475342273712, "learning_rate": 0.0001, "loss": 1.473, "step": 5764 }, { "epoch": 0.9256583172768144, "grad_norm": 0.2650447189807892, "learning_rate": 0.0001, "loss": 1.3937, "step": 5765 }, { "epoch": 0.9258188824662813, "grad_norm": 0.2696771025657654, "learning_rate": 0.0001, "loss": 1.4672, "step": 5766 }, { "epoch": 0.9259794476557482, "grad_norm": 0.29964062571525574, "learning_rate": 0.0001, "loss": 1.4711, "step": 5767 }, { "epoch": 0.9261400128452152, "grad_norm": 0.32170623540878296, "learning_rate": 0.0001, "loss": 1.4489, "step": 5768 }, { "epoch": 0.9263005780346821, "grad_norm": 0.2805902361869812, "learning_rate": 0.0001, "loss": 1.4482, "step": 5769 }, { "epoch": 0.926461143224149, "grad_norm": 0.2696913778781891, "learning_rate": 0.0001, "loss": 1.4797, "step": 5770 }, { "epoch": 0.9266217084136159, "grad_norm": 0.27366599440574646, "learning_rate": 0.0001, "loss": 1.4612, "step": 5771 }, { "epoch": 0.9267822736030829, "grad_norm": 0.30340951681137085, "learning_rate": 0.0001, "loss": 1.4558, "step": 5772 }, { "epoch": 0.9269428387925498, "grad_norm": 0.2692902386188507, "learning_rate": 0.0001, "loss": 1.445, "step": 5773 }, { "epoch": 0.9271034039820167, "grad_norm": 0.29561668634414673, "learning_rate": 0.0001, "loss": 1.3912, "step": 5774 }, { "epoch": 0.9272639691714836, "grad_norm": 0.2912541925907135, "learning_rate": 0.0001, "loss": 1.4678, "step": 5775 }, { "epoch": 0.9274245343609505, "grad_norm": 0.27322182059288025, "learning_rate": 0.0001, "loss": 1.4082, "step": 5776 }, { "epoch": 0.9275850995504175, "grad_norm": 0.29552537202835083, "learning_rate": 0.0001, "loss": 1.4656, "step": 5777 }, { "epoch": 0.9277456647398844, "grad_norm": 0.2890673577785492, "learning_rate": 0.0001, "loss": 1.4762, "step": 5778 }, { "epoch": 0.9279062299293513, "grad_norm": 0.27739542722702026, "learning_rate": 0.0001, "loss": 1.556, "step": 5779 }, { "epoch": 0.9280667951188183, "grad_norm": 0.2785024046897888, "learning_rate": 0.0001, "loss": 1.5496, "step": 5780 }, { "epoch": 0.9282273603082851, "grad_norm": 0.25842979550361633, "learning_rate": 0.0001, "loss": 1.4732, "step": 5781 }, { "epoch": 0.9283879254977521, "grad_norm": 0.29370245337486267, "learning_rate": 0.0001, "loss": 1.4841, "step": 5782 }, { "epoch": 0.928548490687219, "grad_norm": 0.27456536889076233, "learning_rate": 0.0001, "loss": 1.401, "step": 5783 }, { "epoch": 0.928709055876686, "grad_norm": 0.2704974412918091, "learning_rate": 0.0001, "loss": 1.4826, "step": 5784 }, { "epoch": 0.9288696210661529, "grad_norm": 0.30428287386894226, "learning_rate": 0.0001, "loss": 1.4592, "step": 5785 }, { "epoch": 0.9290301862556197, "grad_norm": 0.29401740431785583, "learning_rate": 0.0001, "loss": 1.4012, "step": 5786 }, { "epoch": 0.9291907514450867, "grad_norm": 0.2812541723251343, "learning_rate": 0.0001, "loss": 1.3853, "step": 5787 }, { "epoch": 0.9293513166345536, "grad_norm": 0.28248366713523865, "learning_rate": 0.0001, "loss": 1.4619, "step": 5788 }, { "epoch": 0.9295118818240206, "grad_norm": 0.2675034999847412, "learning_rate": 0.0001, "loss": 1.4516, "step": 5789 }, { "epoch": 0.9296724470134875, "grad_norm": 0.26978638768196106, "learning_rate": 0.0001, "loss": 1.4693, "step": 5790 }, { "epoch": 0.9298330122029544, "grad_norm": 0.2856466770172119, "learning_rate": 0.0001, "loss": 1.5821, "step": 5791 }, { "epoch": 0.9299935773924213, "grad_norm": 0.271677166223526, "learning_rate": 0.0001, "loss": 1.3973, "step": 5792 }, { "epoch": 0.9301541425818882, "grad_norm": 0.28180110454559326, "learning_rate": 0.0001, "loss": 1.4597, "step": 5793 }, { "epoch": 0.9303147077713552, "grad_norm": 0.27508625388145447, "learning_rate": 0.0001, "loss": 1.5676, "step": 5794 }, { "epoch": 0.9304752729608221, "grad_norm": 0.2649001479148865, "learning_rate": 0.0001, "loss": 1.4636, "step": 5795 }, { "epoch": 0.930635838150289, "grad_norm": 0.28027021884918213, "learning_rate": 0.0001, "loss": 1.4499, "step": 5796 }, { "epoch": 0.930796403339756, "grad_norm": 0.28741174936294556, "learning_rate": 0.0001, "loss": 1.4998, "step": 5797 }, { "epoch": 0.9309569685292228, "grad_norm": 0.2758386433124542, "learning_rate": 0.0001, "loss": 1.5158, "step": 5798 }, { "epoch": 0.9311175337186898, "grad_norm": 0.2677361071109772, "learning_rate": 0.0001, "loss": 1.4557, "step": 5799 }, { "epoch": 0.9312780989081567, "grad_norm": 0.2974143326282501, "learning_rate": 0.0001, "loss": 1.425, "step": 5800 }, { "epoch": 0.9314386640976237, "grad_norm": 0.3010811507701874, "learning_rate": 0.0001, "loss": 1.3815, "step": 5801 }, { "epoch": 0.9315992292870906, "grad_norm": 0.2760634124279022, "learning_rate": 0.0001, "loss": 1.48, "step": 5802 }, { "epoch": 0.9317597944765574, "grad_norm": 0.2719084322452545, "learning_rate": 0.0001, "loss": 1.4971, "step": 5803 }, { "epoch": 0.9319203596660244, "grad_norm": 0.2982003092765808, "learning_rate": 0.0001, "loss": 1.5477, "step": 5804 }, { "epoch": 0.9320809248554913, "grad_norm": 0.30464425683021545, "learning_rate": 0.0001, "loss": 1.498, "step": 5805 }, { "epoch": 0.9322414900449583, "grad_norm": 0.2761540114879608, "learning_rate": 0.0001, "loss": 1.4721, "step": 5806 }, { "epoch": 0.9324020552344252, "grad_norm": 0.2752193510532379, "learning_rate": 0.0001, "loss": 1.4132, "step": 5807 }, { "epoch": 0.9325626204238922, "grad_norm": 0.26726093888282776, "learning_rate": 0.0001, "loss": 1.4526, "step": 5808 }, { "epoch": 0.932723185613359, "grad_norm": 0.26164042949676514, "learning_rate": 0.0001, "loss": 1.4471, "step": 5809 }, { "epoch": 0.9328837508028259, "grad_norm": 0.268328994512558, "learning_rate": 0.0001, "loss": 1.4582, "step": 5810 }, { "epoch": 0.9330443159922929, "grad_norm": 0.2759345471858978, "learning_rate": 0.0001, "loss": 1.5367, "step": 5811 }, { "epoch": 0.9332048811817598, "grad_norm": 0.2661013901233673, "learning_rate": 0.0001, "loss": 1.558, "step": 5812 }, { "epoch": 0.9333654463712268, "grad_norm": 0.2785830795764923, "learning_rate": 0.0001, "loss": 1.456, "step": 5813 }, { "epoch": 0.9335260115606936, "grad_norm": 0.26263684034347534, "learning_rate": 0.0001, "loss": 1.4605, "step": 5814 }, { "epoch": 0.9336865767501605, "grad_norm": 0.27695077657699585, "learning_rate": 0.0001, "loss": 1.4791, "step": 5815 }, { "epoch": 0.9338471419396275, "grad_norm": 0.28673410415649414, "learning_rate": 0.0001, "loss": 1.4811, "step": 5816 }, { "epoch": 0.9340077071290944, "grad_norm": 0.28872042894363403, "learning_rate": 0.0001, "loss": 1.5044, "step": 5817 }, { "epoch": 0.9341682723185614, "grad_norm": 0.27367937564849854, "learning_rate": 0.0001, "loss": 1.5029, "step": 5818 }, { "epoch": 0.9343288375080283, "grad_norm": 0.2637515366077423, "learning_rate": 0.0001, "loss": 1.4633, "step": 5819 }, { "epoch": 0.9344894026974951, "grad_norm": 0.2860463559627533, "learning_rate": 0.0001, "loss": 1.4937, "step": 5820 }, { "epoch": 0.9346499678869621, "grad_norm": 0.28555914759635925, "learning_rate": 0.0001, "loss": 1.445, "step": 5821 }, { "epoch": 0.934810533076429, "grad_norm": 0.27750441431999207, "learning_rate": 0.0001, "loss": 1.4198, "step": 5822 }, { "epoch": 0.934971098265896, "grad_norm": 0.2655699551105499, "learning_rate": 0.0001, "loss": 1.4535, "step": 5823 }, { "epoch": 0.9351316634553629, "grad_norm": 0.27347785234451294, "learning_rate": 0.0001, "loss": 1.5319, "step": 5824 }, { "epoch": 0.9352922286448297, "grad_norm": 0.28993481397628784, "learning_rate": 0.0001, "loss": 1.4661, "step": 5825 }, { "epoch": 0.9354527938342967, "grad_norm": 0.273605614900589, "learning_rate": 0.0001, "loss": 1.52, "step": 5826 }, { "epoch": 0.9356133590237636, "grad_norm": 0.25676229596138, "learning_rate": 0.0001, "loss": 1.4204, "step": 5827 }, { "epoch": 0.9357739242132306, "grad_norm": 0.27479469776153564, "learning_rate": 0.0001, "loss": 1.441, "step": 5828 }, { "epoch": 0.9359344894026975, "grad_norm": 0.2789990305900574, "learning_rate": 0.0001, "loss": 1.472, "step": 5829 }, { "epoch": 0.9360950545921645, "grad_norm": 0.27907949686050415, "learning_rate": 0.0001, "loss": 1.4628, "step": 5830 }, { "epoch": 0.9362556197816313, "grad_norm": 0.27137044072151184, "learning_rate": 0.0001, "loss": 1.4569, "step": 5831 }, { "epoch": 0.9364161849710982, "grad_norm": 0.2736453115940094, "learning_rate": 0.0001, "loss": 1.4437, "step": 5832 }, { "epoch": 0.9365767501605652, "grad_norm": 0.28897377848625183, "learning_rate": 0.0001, "loss": 1.4673, "step": 5833 }, { "epoch": 0.9367373153500321, "grad_norm": 0.26451051235198975, "learning_rate": 0.0001, "loss": 1.4595, "step": 5834 }, { "epoch": 0.9368978805394991, "grad_norm": 0.27823954820632935, "learning_rate": 0.0001, "loss": 1.4499, "step": 5835 }, { "epoch": 0.9370584457289659, "grad_norm": 0.27579158544540405, "learning_rate": 0.0001, "loss": 1.4965, "step": 5836 }, { "epoch": 0.9372190109184328, "grad_norm": 0.2658326029777527, "learning_rate": 0.0001, "loss": 1.4783, "step": 5837 }, { "epoch": 0.9373795761078998, "grad_norm": 0.28364139795303345, "learning_rate": 0.0001, "loss": 1.4208, "step": 5838 }, { "epoch": 0.9375401412973667, "grad_norm": 0.26561906933784485, "learning_rate": 0.0001, "loss": 1.4145, "step": 5839 }, { "epoch": 0.9377007064868337, "grad_norm": 0.2735253870487213, "learning_rate": 0.0001, "loss": 1.4599, "step": 5840 }, { "epoch": 0.9378612716763006, "grad_norm": 0.27882084250450134, "learning_rate": 0.0001, "loss": 1.5012, "step": 5841 }, { "epoch": 0.9380218368657675, "grad_norm": 0.2897747755050659, "learning_rate": 0.0001, "loss": 1.5606, "step": 5842 }, { "epoch": 0.9381824020552344, "grad_norm": 0.28041335940361023, "learning_rate": 0.0001, "loss": 1.4903, "step": 5843 }, { "epoch": 0.9383429672447013, "grad_norm": 0.2811106741428375, "learning_rate": 0.0001, "loss": 1.4833, "step": 5844 }, { "epoch": 0.9385035324341683, "grad_norm": 0.41006171703338623, "learning_rate": 0.0001, "loss": 1.4621, "step": 5845 }, { "epoch": 0.9386640976236352, "grad_norm": 0.2849881052970886, "learning_rate": 0.0001, "loss": 1.4206, "step": 5846 }, { "epoch": 0.9388246628131022, "grad_norm": 0.27344533801078796, "learning_rate": 0.0001, "loss": 1.45, "step": 5847 }, { "epoch": 0.938985228002569, "grad_norm": 0.2861812710762024, "learning_rate": 0.0001, "loss": 1.396, "step": 5848 }, { "epoch": 0.939145793192036, "grad_norm": 0.2781996428966522, "learning_rate": 0.0001, "loss": 1.5337, "step": 5849 }, { "epoch": 0.9393063583815029, "grad_norm": 0.27811169624328613, "learning_rate": 0.0001, "loss": 1.4811, "step": 5850 }, { "epoch": 0.9394669235709698, "grad_norm": 0.26856598258018494, "learning_rate": 0.0001, "loss": 1.3898, "step": 5851 }, { "epoch": 0.9396274887604368, "grad_norm": 0.2990454137325287, "learning_rate": 0.0001, "loss": 1.4769, "step": 5852 }, { "epoch": 0.9397880539499036, "grad_norm": 0.2712068259716034, "learning_rate": 0.0001, "loss": 1.4573, "step": 5853 }, { "epoch": 0.9399486191393706, "grad_norm": 0.2844711244106293, "learning_rate": 0.0001, "loss": 1.5406, "step": 5854 }, { "epoch": 0.9401091843288375, "grad_norm": 0.2788091003894806, "learning_rate": 0.0001, "loss": 1.5116, "step": 5855 }, { "epoch": 0.9402697495183044, "grad_norm": 0.2758863866329193, "learning_rate": 0.0001, "loss": 1.4672, "step": 5856 }, { "epoch": 0.9404303147077714, "grad_norm": 0.28007230162620544, "learning_rate": 0.0001, "loss": 1.4855, "step": 5857 }, { "epoch": 0.9405908798972383, "grad_norm": 0.32679662108421326, "learning_rate": 0.0001, "loss": 1.4784, "step": 5858 }, { "epoch": 0.9407514450867052, "grad_norm": 0.2912706732749939, "learning_rate": 0.0001, "loss": 1.3704, "step": 5859 }, { "epoch": 0.9409120102761721, "grad_norm": 0.27617281675338745, "learning_rate": 0.0001, "loss": 1.489, "step": 5860 }, { "epoch": 0.941072575465639, "grad_norm": 0.3089311718940735, "learning_rate": 0.0001, "loss": 1.5022, "step": 5861 }, { "epoch": 0.941233140655106, "grad_norm": 0.3159063160419464, "learning_rate": 0.0001, "loss": 1.44, "step": 5862 }, { "epoch": 0.9413937058445729, "grad_norm": 0.2852429151535034, "learning_rate": 0.0001, "loss": 1.4906, "step": 5863 }, { "epoch": 0.9415542710340398, "grad_norm": 0.2734900712966919, "learning_rate": 0.0001, "loss": 1.4781, "step": 5864 }, { "epoch": 0.9417148362235067, "grad_norm": 0.28580528497695923, "learning_rate": 0.0001, "loss": 1.5271, "step": 5865 }, { "epoch": 0.9418754014129737, "grad_norm": 0.28720587491989136, "learning_rate": 0.0001, "loss": 1.4516, "step": 5866 }, { "epoch": 0.9420359666024406, "grad_norm": 0.2807033061981201, "learning_rate": 0.0001, "loss": 1.4719, "step": 5867 }, { "epoch": 0.9421965317919075, "grad_norm": 0.29748672246932983, "learning_rate": 0.0001, "loss": 1.4244, "step": 5868 }, { "epoch": 0.9423570969813745, "grad_norm": 0.27877122163772583, "learning_rate": 0.0001, "loss": 1.5303, "step": 5869 }, { "epoch": 0.9425176621708413, "grad_norm": 0.26318901777267456, "learning_rate": 0.0001, "loss": 1.4143, "step": 5870 }, { "epoch": 0.9426782273603083, "grad_norm": 0.2971193492412567, "learning_rate": 0.0001, "loss": 1.4202, "step": 5871 }, { "epoch": 0.9428387925497752, "grad_norm": 0.2822686433792114, "learning_rate": 0.0001, "loss": 1.4454, "step": 5872 }, { "epoch": 0.9429993577392421, "grad_norm": 0.27671289443969727, "learning_rate": 0.0001, "loss": 1.455, "step": 5873 }, { "epoch": 0.9431599229287091, "grad_norm": 0.2803163230419159, "learning_rate": 0.0001, "loss": 1.4861, "step": 5874 }, { "epoch": 0.9433204881181759, "grad_norm": 0.3060593008995056, "learning_rate": 0.0001, "loss": 1.4971, "step": 5875 }, { "epoch": 0.9434810533076429, "grad_norm": 0.27979764342308044, "learning_rate": 0.0001, "loss": 1.5331, "step": 5876 }, { "epoch": 0.9436416184971098, "grad_norm": 0.27894946932792664, "learning_rate": 0.0001, "loss": 1.522, "step": 5877 }, { "epoch": 0.9438021836865768, "grad_norm": 0.28618311882019043, "learning_rate": 0.0001, "loss": 1.4215, "step": 5878 }, { "epoch": 0.9439627488760437, "grad_norm": 0.2913786470890045, "learning_rate": 0.0001, "loss": 1.5194, "step": 5879 }, { "epoch": 0.9441233140655106, "grad_norm": 0.2834182679653168, "learning_rate": 0.0001, "loss": 1.4395, "step": 5880 }, { "epoch": 0.9442838792549775, "grad_norm": 0.29673516750335693, "learning_rate": 0.0001, "loss": 1.4773, "step": 5881 }, { "epoch": 0.9444444444444444, "grad_norm": 0.2880716919898987, "learning_rate": 0.0001, "loss": 1.4757, "step": 5882 }, { "epoch": 0.9446050096339114, "grad_norm": 0.27656540274620056, "learning_rate": 0.0001, "loss": 1.4813, "step": 5883 }, { "epoch": 0.9447655748233783, "grad_norm": 0.259142130613327, "learning_rate": 0.0001, "loss": 1.4003, "step": 5884 }, { "epoch": 0.9449261400128453, "grad_norm": 0.2958175539970398, "learning_rate": 0.0001, "loss": 1.5299, "step": 5885 }, { "epoch": 0.9450867052023122, "grad_norm": 0.2727348208427429, "learning_rate": 0.0001, "loss": 1.3805, "step": 5886 }, { "epoch": 0.945247270391779, "grad_norm": 0.28077074885368347, "learning_rate": 0.0001, "loss": 1.5182, "step": 5887 }, { "epoch": 0.945407835581246, "grad_norm": 0.28943517804145813, "learning_rate": 0.0001, "loss": 1.4916, "step": 5888 }, { "epoch": 0.9455684007707129, "grad_norm": 0.28285327553749084, "learning_rate": 0.0001, "loss": 1.5546, "step": 5889 }, { "epoch": 0.9457289659601799, "grad_norm": 0.2768253982067108, "learning_rate": 0.0001, "loss": 1.4987, "step": 5890 }, { "epoch": 0.9458895311496468, "grad_norm": 0.2773067057132721, "learning_rate": 0.0001, "loss": 1.4761, "step": 5891 }, { "epoch": 0.9460500963391136, "grad_norm": 0.27290916442871094, "learning_rate": 0.0001, "loss": 1.4429, "step": 5892 }, { "epoch": 0.9462106615285806, "grad_norm": 0.2910511791706085, "learning_rate": 0.0001, "loss": 1.494, "step": 5893 }, { "epoch": 0.9463712267180475, "grad_norm": 0.27037549018859863, "learning_rate": 0.0001, "loss": 1.49, "step": 5894 }, { "epoch": 0.9465317919075145, "grad_norm": 0.2779509425163269, "learning_rate": 0.0001, "loss": 1.4289, "step": 5895 }, { "epoch": 0.9466923570969814, "grad_norm": 0.27673134207725525, "learning_rate": 0.0001, "loss": 1.4547, "step": 5896 }, { "epoch": 0.9468529222864484, "grad_norm": 0.2648793160915375, "learning_rate": 0.0001, "loss": 1.4743, "step": 5897 }, { "epoch": 0.9470134874759152, "grad_norm": 0.27191540598869324, "learning_rate": 0.0001, "loss": 1.5512, "step": 5898 }, { "epoch": 0.9471740526653821, "grad_norm": 0.26977187395095825, "learning_rate": 0.0001, "loss": 1.4684, "step": 5899 }, { "epoch": 0.9473346178548491, "grad_norm": 0.273308664560318, "learning_rate": 0.0001, "loss": 1.4252, "step": 5900 }, { "epoch": 0.947495183044316, "grad_norm": 0.27446800470352173, "learning_rate": 0.0001, "loss": 1.4646, "step": 5901 }, { "epoch": 0.947655748233783, "grad_norm": 0.29054462909698486, "learning_rate": 0.0001, "loss": 1.5228, "step": 5902 }, { "epoch": 0.9478163134232498, "grad_norm": 0.27276232838630676, "learning_rate": 0.0001, "loss": 1.4498, "step": 5903 }, { "epoch": 0.9479768786127167, "grad_norm": 0.2760717272758484, "learning_rate": 0.0001, "loss": 1.4693, "step": 5904 }, { "epoch": 0.9481374438021837, "grad_norm": 0.2777491509914398, "learning_rate": 0.0001, "loss": 1.4673, "step": 5905 }, { "epoch": 0.9482980089916506, "grad_norm": 0.2675125300884247, "learning_rate": 0.0001, "loss": 1.5352, "step": 5906 }, { "epoch": 0.9484585741811176, "grad_norm": 0.2754668891429901, "learning_rate": 0.0001, "loss": 1.4192, "step": 5907 }, { "epoch": 0.9486191393705845, "grad_norm": 0.28691086173057556, "learning_rate": 0.0001, "loss": 1.4273, "step": 5908 }, { "epoch": 0.9487797045600513, "grad_norm": 0.2644820809364319, "learning_rate": 0.0001, "loss": 1.4089, "step": 5909 }, { "epoch": 0.9489402697495183, "grad_norm": 0.27131006121635437, "learning_rate": 0.0001, "loss": 1.4656, "step": 5910 }, { "epoch": 0.9491008349389852, "grad_norm": 0.2807711660861969, "learning_rate": 0.0001, "loss": 1.4128, "step": 5911 }, { "epoch": 0.9492614001284522, "grad_norm": 0.26891958713531494, "learning_rate": 0.0001, "loss": 1.4235, "step": 5912 }, { "epoch": 0.9494219653179191, "grad_norm": 0.6112186312675476, "learning_rate": 0.0001, "loss": 1.4226, "step": 5913 }, { "epoch": 0.949582530507386, "grad_norm": 0.2824949026107788, "learning_rate": 0.0001, "loss": 1.5121, "step": 5914 }, { "epoch": 0.9497430956968529, "grad_norm": 0.3157612979412079, "learning_rate": 0.0001, "loss": 1.4592, "step": 5915 }, { "epoch": 0.9499036608863198, "grad_norm": 0.3086928129196167, "learning_rate": 0.0001, "loss": 1.4821, "step": 5916 }, { "epoch": 0.9500642260757868, "grad_norm": 0.26061418652534485, "learning_rate": 0.0001, "loss": 1.5238, "step": 5917 }, { "epoch": 0.9502247912652537, "grad_norm": 0.27031055092811584, "learning_rate": 0.0001, "loss": 1.386, "step": 5918 }, { "epoch": 0.9503853564547207, "grad_norm": 0.28135839104652405, "learning_rate": 0.0001, "loss": 1.5086, "step": 5919 }, { "epoch": 0.9505459216441875, "grad_norm": 0.31137263774871826, "learning_rate": 0.0001, "loss": 1.4556, "step": 5920 }, { "epoch": 0.9507064868336544, "grad_norm": 0.2834000289440155, "learning_rate": 0.0001, "loss": 1.4445, "step": 5921 }, { "epoch": 0.9508670520231214, "grad_norm": 0.27586954832077026, "learning_rate": 0.0001, "loss": 1.4798, "step": 5922 }, { "epoch": 0.9510276172125883, "grad_norm": 0.25340452790260315, "learning_rate": 0.0001, "loss": 1.3766, "step": 5923 }, { "epoch": 0.9511881824020553, "grad_norm": 0.29076021909713745, "learning_rate": 0.0001, "loss": 1.4756, "step": 5924 }, { "epoch": 0.9513487475915221, "grad_norm": 0.2764754295349121, "learning_rate": 0.0001, "loss": 1.534, "step": 5925 }, { "epoch": 0.951509312780989, "grad_norm": 0.28199848532676697, "learning_rate": 0.0001, "loss": 1.4777, "step": 5926 }, { "epoch": 0.951669877970456, "grad_norm": 0.2712484896183014, "learning_rate": 0.0001, "loss": 1.5353, "step": 5927 }, { "epoch": 0.9518304431599229, "grad_norm": 0.29423338174819946, "learning_rate": 0.0001, "loss": 1.5036, "step": 5928 }, { "epoch": 0.9519910083493899, "grad_norm": 0.2705962061882019, "learning_rate": 0.0001, "loss": 1.3911, "step": 5929 }, { "epoch": 0.9521515735388568, "grad_norm": 0.26917755603790283, "learning_rate": 0.0001, "loss": 1.4662, "step": 5930 }, { "epoch": 0.9523121387283237, "grad_norm": 0.30825498700141907, "learning_rate": 0.0001, "loss": 1.4396, "step": 5931 }, { "epoch": 0.9524727039177906, "grad_norm": 0.2915974259376526, "learning_rate": 0.0001, "loss": 1.4902, "step": 5932 }, { "epoch": 0.9526332691072575, "grad_norm": 0.2787433862686157, "learning_rate": 0.0001, "loss": 1.4666, "step": 5933 }, { "epoch": 0.9527938342967245, "grad_norm": 0.2745096981525421, "learning_rate": 0.0001, "loss": 1.4713, "step": 5934 }, { "epoch": 0.9529543994861914, "grad_norm": 0.26666146516799927, "learning_rate": 0.0001, "loss": 1.4314, "step": 5935 }, { "epoch": 0.9531149646756584, "grad_norm": 0.2656130790710449, "learning_rate": 0.0001, "loss": 1.4568, "step": 5936 }, { "epoch": 0.9532755298651252, "grad_norm": 0.2698099613189697, "learning_rate": 0.0001, "loss": 1.4545, "step": 5937 }, { "epoch": 0.9534360950545921, "grad_norm": 0.26967161893844604, "learning_rate": 0.0001, "loss": 1.4674, "step": 5938 }, { "epoch": 0.9535966602440591, "grad_norm": 0.27115342020988464, "learning_rate": 0.0001, "loss": 1.4547, "step": 5939 }, { "epoch": 0.953757225433526, "grad_norm": 0.2653388977050781, "learning_rate": 0.0001, "loss": 1.4779, "step": 5940 }, { "epoch": 0.953917790622993, "grad_norm": 0.2982099950313568, "learning_rate": 0.0001, "loss": 1.5727, "step": 5941 }, { "epoch": 0.9540783558124598, "grad_norm": 0.28064000606536865, "learning_rate": 0.0001, "loss": 1.4975, "step": 5942 }, { "epoch": 0.9542389210019268, "grad_norm": 0.2620096504688263, "learning_rate": 0.0001, "loss": 1.3736, "step": 5943 }, { "epoch": 0.9543994861913937, "grad_norm": 0.3034951686859131, "learning_rate": 0.0001, "loss": 1.4932, "step": 5944 }, { "epoch": 0.9545600513808606, "grad_norm": 0.2841450572013855, "learning_rate": 0.0001, "loss": 1.42, "step": 5945 }, { "epoch": 0.9547206165703276, "grad_norm": 0.9656533598899841, "learning_rate": 0.0001, "loss": 1.4952, "step": 5946 }, { "epoch": 0.9548811817597945, "grad_norm": 0.2824438214302063, "learning_rate": 0.0001, "loss": 1.4824, "step": 5947 }, { "epoch": 0.9550417469492614, "grad_norm": 0.29613274335861206, "learning_rate": 0.0001, "loss": 1.4816, "step": 5948 }, { "epoch": 0.9552023121387283, "grad_norm": 0.32699763774871826, "learning_rate": 0.0001, "loss": 1.545, "step": 5949 }, { "epoch": 0.9553628773281952, "grad_norm": 0.34357208013534546, "learning_rate": 0.0001, "loss": 1.4593, "step": 5950 }, { "epoch": 0.9555234425176622, "grad_norm": 0.2892925441265106, "learning_rate": 0.0001, "loss": 1.4865, "step": 5951 }, { "epoch": 0.9556840077071291, "grad_norm": 0.2864363491535187, "learning_rate": 0.0001, "loss": 1.5113, "step": 5952 }, { "epoch": 0.955844572896596, "grad_norm": 0.2872791886329651, "learning_rate": 0.0001, "loss": 1.4583, "step": 5953 }, { "epoch": 0.9560051380860629, "grad_norm": 0.2911718189716339, "learning_rate": 0.0001, "loss": 1.517, "step": 5954 }, { "epoch": 0.9561657032755299, "grad_norm": 0.28609058260917664, "learning_rate": 0.0001, "loss": 1.4682, "step": 5955 }, { "epoch": 0.9563262684649968, "grad_norm": 0.2892938554286957, "learning_rate": 0.0001, "loss": 1.5025, "step": 5956 }, { "epoch": 0.9564868336544637, "grad_norm": 0.2699921429157257, "learning_rate": 0.0001, "loss": 1.4165, "step": 5957 }, { "epoch": 0.9566473988439307, "grad_norm": 0.27306798100471497, "learning_rate": 0.0001, "loss": 1.4736, "step": 5958 }, { "epoch": 0.9568079640333975, "grad_norm": 0.26198604702949524, "learning_rate": 0.0001, "loss": 1.348, "step": 5959 }, { "epoch": 0.9569685292228645, "grad_norm": 0.2712896466255188, "learning_rate": 0.0001, "loss": 1.5062, "step": 5960 }, { "epoch": 0.9571290944123314, "grad_norm": 0.2693850100040436, "learning_rate": 0.0001, "loss": 1.4626, "step": 5961 }, { "epoch": 0.9572896596017983, "grad_norm": 0.28435471653938293, "learning_rate": 0.0001, "loss": 1.4935, "step": 5962 }, { "epoch": 0.9574502247912653, "grad_norm": 0.280477374792099, "learning_rate": 0.0001, "loss": 1.4133, "step": 5963 }, { "epoch": 0.9576107899807321, "grad_norm": 0.28667399287223816, "learning_rate": 0.0001, "loss": 1.4673, "step": 5964 }, { "epoch": 0.9577713551701991, "grad_norm": 0.2773427367210388, "learning_rate": 0.0001, "loss": 1.4987, "step": 5965 }, { "epoch": 0.957931920359666, "grad_norm": 0.25841793417930603, "learning_rate": 0.0001, "loss": 1.4617, "step": 5966 }, { "epoch": 0.958092485549133, "grad_norm": 0.2560836970806122, "learning_rate": 0.0001, "loss": 1.4402, "step": 5967 }, { "epoch": 0.9582530507385999, "grad_norm": 0.2719283998012543, "learning_rate": 0.0001, "loss": 1.4475, "step": 5968 }, { "epoch": 0.9584136159280668, "grad_norm": 0.26865220069885254, "learning_rate": 0.0001, "loss": 1.4518, "step": 5969 }, { "epoch": 0.9585741811175337, "grad_norm": 0.2628419101238251, "learning_rate": 0.0001, "loss": 1.514, "step": 5970 }, { "epoch": 0.9587347463070006, "grad_norm": 0.2671055495738983, "learning_rate": 0.0001, "loss": 1.5483, "step": 5971 }, { "epoch": 0.9588953114964676, "grad_norm": 0.28136101365089417, "learning_rate": 0.0001, "loss": 1.5116, "step": 5972 }, { "epoch": 0.9590558766859345, "grad_norm": 0.27849438786506653, "learning_rate": 0.0001, "loss": 1.5061, "step": 5973 }, { "epoch": 0.9592164418754014, "grad_norm": 0.2710857689380646, "learning_rate": 0.0001, "loss": 1.3976, "step": 5974 }, { "epoch": 0.9593770070648683, "grad_norm": 0.269164115190506, "learning_rate": 0.0001, "loss": 1.4451, "step": 5975 }, { "epoch": 0.9595375722543352, "grad_norm": 0.2771841287612915, "learning_rate": 0.0001, "loss": 1.4537, "step": 5976 }, { "epoch": 0.9596981374438022, "grad_norm": 0.2635327875614166, "learning_rate": 0.0001, "loss": 1.4565, "step": 5977 }, { "epoch": 0.9598587026332691, "grad_norm": 0.2794688045978546, "learning_rate": 0.0001, "loss": 1.4913, "step": 5978 }, { "epoch": 0.960019267822736, "grad_norm": 0.286764919757843, "learning_rate": 0.0001, "loss": 1.4734, "step": 5979 }, { "epoch": 0.960179833012203, "grad_norm": 0.2917470335960388, "learning_rate": 0.0001, "loss": 1.4634, "step": 5980 }, { "epoch": 0.9603403982016698, "grad_norm": 0.27942582964897156, "learning_rate": 0.0001, "loss": 1.5373, "step": 5981 }, { "epoch": 0.9605009633911368, "grad_norm": 0.27506059408187866, "learning_rate": 0.0001, "loss": 1.4676, "step": 5982 }, { "epoch": 0.9606615285806037, "grad_norm": 0.2907000184059143, "learning_rate": 0.0001, "loss": 1.4463, "step": 5983 }, { "epoch": 0.9608220937700707, "grad_norm": 0.27734360098838806, "learning_rate": 0.0001, "loss": 1.4884, "step": 5984 }, { "epoch": 0.9609826589595376, "grad_norm": 0.29087257385253906, "learning_rate": 0.0001, "loss": 1.4645, "step": 5985 }, { "epoch": 0.9611432241490045, "grad_norm": 0.26189374923706055, "learning_rate": 0.0001, "loss": 1.3998, "step": 5986 }, { "epoch": 0.9613037893384714, "grad_norm": 0.2801060080528259, "learning_rate": 0.0001, "loss": 1.4756, "step": 5987 }, { "epoch": 0.9614643545279383, "grad_norm": 0.2744179964065552, "learning_rate": 0.0001, "loss": 1.465, "step": 5988 }, { "epoch": 0.9616249197174053, "grad_norm": 0.28119373321533203, "learning_rate": 0.0001, "loss": 1.472, "step": 5989 }, { "epoch": 0.9617854849068722, "grad_norm": 0.2790205776691437, "learning_rate": 0.0001, "loss": 1.4537, "step": 5990 }, { "epoch": 0.9619460500963392, "grad_norm": 0.27779579162597656, "learning_rate": 0.0001, "loss": 1.4187, "step": 5991 }, { "epoch": 0.962106615285806, "grad_norm": 0.29540571570396423, "learning_rate": 0.0001, "loss": 1.4923, "step": 5992 }, { "epoch": 0.9622671804752729, "grad_norm": 0.2819844186306, "learning_rate": 0.0001, "loss": 1.4482, "step": 5993 }, { "epoch": 0.9624277456647399, "grad_norm": 0.27781808376312256, "learning_rate": 0.0001, "loss": 1.3551, "step": 5994 }, { "epoch": 0.9625883108542068, "grad_norm": 0.26764678955078125, "learning_rate": 0.0001, "loss": 1.3977, "step": 5995 }, { "epoch": 0.9627488760436738, "grad_norm": 0.2768016755580902, "learning_rate": 0.0001, "loss": 1.5034, "step": 5996 }, { "epoch": 0.9629094412331407, "grad_norm": 0.28940582275390625, "learning_rate": 0.0001, "loss": 1.4729, "step": 5997 }, { "epoch": 0.9630700064226075, "grad_norm": 0.27764105796813965, "learning_rate": 0.0001, "loss": 1.484, "step": 5998 }, { "epoch": 0.9632305716120745, "grad_norm": 0.2838978171348572, "learning_rate": 0.0001, "loss": 1.3841, "step": 5999 }, { "epoch": 0.9633911368015414, "grad_norm": 0.29951396584510803, "learning_rate": 0.0001, "loss": 1.4491, "step": 6000 }, { "epoch": 0.9635517019910084, "grad_norm": 0.2669565677642822, "learning_rate": 0.0001, "loss": 1.4029, "step": 6001 }, { "epoch": 0.9637122671804753, "grad_norm": 0.3049006164073944, "learning_rate": 0.0001, "loss": 1.4756, "step": 6002 }, { "epoch": 0.9638728323699421, "grad_norm": 0.27438488602638245, "learning_rate": 0.0001, "loss": 1.5432, "step": 6003 }, { "epoch": 0.9640333975594091, "grad_norm": 0.26807013154029846, "learning_rate": 0.0001, "loss": 1.4198, "step": 6004 }, { "epoch": 0.964193962748876, "grad_norm": 0.27279675006866455, "learning_rate": 0.0001, "loss": 1.5018, "step": 6005 }, { "epoch": 0.964354527938343, "grad_norm": 0.2722545564174652, "learning_rate": 0.0001, "loss": 1.5129, "step": 6006 }, { "epoch": 0.9645150931278099, "grad_norm": 0.276894748210907, "learning_rate": 0.0001, "loss": 1.5087, "step": 6007 }, { "epoch": 0.9646756583172769, "grad_norm": 0.272074818611145, "learning_rate": 0.0001, "loss": 1.4721, "step": 6008 }, { "epoch": 0.9648362235067437, "grad_norm": 0.2679750621318817, "learning_rate": 0.0001, "loss": 1.4397, "step": 6009 }, { "epoch": 0.9649967886962106, "grad_norm": 0.26656609773635864, "learning_rate": 0.0001, "loss": 1.4337, "step": 6010 }, { "epoch": 0.9651573538856776, "grad_norm": 0.2664980888366699, "learning_rate": 0.0001, "loss": 1.4173, "step": 6011 }, { "epoch": 0.9653179190751445, "grad_norm": 0.2975117862224579, "learning_rate": 0.0001, "loss": 1.4243, "step": 6012 }, { "epoch": 0.9654784842646115, "grad_norm": 0.2981017529964447, "learning_rate": 0.0001, "loss": 1.463, "step": 6013 }, { "epoch": 0.9656390494540783, "grad_norm": 0.27719423174858093, "learning_rate": 0.0001, "loss": 1.4027, "step": 6014 }, { "epoch": 0.9657996146435452, "grad_norm": 0.2824347913265228, "learning_rate": 0.0001, "loss": 1.3975, "step": 6015 }, { "epoch": 0.9659601798330122, "grad_norm": 0.28527072072029114, "learning_rate": 0.0001, "loss": 1.44, "step": 6016 }, { "epoch": 0.9661207450224791, "grad_norm": 0.26541316509246826, "learning_rate": 0.0001, "loss": 1.4369, "step": 6017 }, { "epoch": 0.9662813102119461, "grad_norm": 0.3047367334365845, "learning_rate": 0.0001, "loss": 1.5166, "step": 6018 }, { "epoch": 0.966441875401413, "grad_norm": 0.2990919351577759, "learning_rate": 0.0001, "loss": 1.4694, "step": 6019 }, { "epoch": 0.9666024405908799, "grad_norm": 0.27594760060310364, "learning_rate": 0.0001, "loss": 1.4195, "step": 6020 }, { "epoch": 0.9667630057803468, "grad_norm": 0.28485724329948425, "learning_rate": 0.0001, "loss": 1.5256, "step": 6021 }, { "epoch": 0.9669235709698137, "grad_norm": 0.296840637922287, "learning_rate": 0.0001, "loss": 1.4589, "step": 6022 }, { "epoch": 0.9670841361592807, "grad_norm": 0.27788132429122925, "learning_rate": 0.0001, "loss": 1.4796, "step": 6023 }, { "epoch": 0.9672447013487476, "grad_norm": 0.27840691804885864, "learning_rate": 0.0001, "loss": 1.4823, "step": 6024 }, { "epoch": 0.9674052665382146, "grad_norm": 0.29735177755355835, "learning_rate": 0.0001, "loss": 1.3861, "step": 6025 }, { "epoch": 0.9675658317276814, "grad_norm": 0.27756813168525696, "learning_rate": 0.0001, "loss": 1.4538, "step": 6026 }, { "epoch": 0.9677263969171483, "grad_norm": 0.27351388335227966, "learning_rate": 0.0001, "loss": 1.4276, "step": 6027 }, { "epoch": 0.9678869621066153, "grad_norm": 0.27169686555862427, "learning_rate": 0.0001, "loss": 1.4239, "step": 6028 }, { "epoch": 0.9680475272960822, "grad_norm": 0.3168017268180847, "learning_rate": 0.0001, "loss": 1.4175, "step": 6029 }, { "epoch": 0.9682080924855492, "grad_norm": 0.264495313167572, "learning_rate": 0.0001, "loss": 1.4514, "step": 6030 }, { "epoch": 0.968368657675016, "grad_norm": 0.26328355073928833, "learning_rate": 0.0001, "loss": 1.3986, "step": 6031 }, { "epoch": 0.968529222864483, "grad_norm": 0.2844201326370239, "learning_rate": 0.0001, "loss": 1.4448, "step": 6032 }, { "epoch": 0.9686897880539499, "grad_norm": 0.27860555052757263, "learning_rate": 0.0001, "loss": 1.4985, "step": 6033 }, { "epoch": 0.9688503532434168, "grad_norm": 0.27706608176231384, "learning_rate": 0.0001, "loss": 1.4341, "step": 6034 }, { "epoch": 0.9690109184328838, "grad_norm": 0.3051508665084839, "learning_rate": 0.0001, "loss": 1.457, "step": 6035 }, { "epoch": 0.9691714836223507, "grad_norm": 0.28054526448249817, "learning_rate": 0.0001, "loss": 1.449, "step": 6036 }, { "epoch": 0.9693320488118176, "grad_norm": 0.26974231004714966, "learning_rate": 0.0001, "loss": 1.4576, "step": 6037 }, { "epoch": 0.9694926140012845, "grad_norm": 0.2820809781551361, "learning_rate": 0.0001, "loss": 1.4367, "step": 6038 }, { "epoch": 0.9696531791907514, "grad_norm": 0.2799365222454071, "learning_rate": 0.0001, "loss": 1.5023, "step": 6039 }, { "epoch": 0.9698137443802184, "grad_norm": 0.2809252142906189, "learning_rate": 0.0001, "loss": 1.4693, "step": 6040 }, { "epoch": 0.9699743095696853, "grad_norm": 0.27401256561279297, "learning_rate": 0.0001, "loss": 1.5606, "step": 6041 }, { "epoch": 0.9701348747591522, "grad_norm": 0.27618643641471863, "learning_rate": 0.0001, "loss": 1.4711, "step": 6042 }, { "epoch": 0.9702954399486191, "grad_norm": 0.2657073140144348, "learning_rate": 0.0001, "loss": 1.4804, "step": 6043 }, { "epoch": 0.970456005138086, "grad_norm": 0.26313135027885437, "learning_rate": 0.0001, "loss": 1.4401, "step": 6044 }, { "epoch": 0.970616570327553, "grad_norm": 0.790553629398346, "learning_rate": 0.0001, "loss": 1.465, "step": 6045 }, { "epoch": 0.9707771355170199, "grad_norm": 0.28462934494018555, "learning_rate": 0.0001, "loss": 1.4823, "step": 6046 }, { "epoch": 0.9709377007064869, "grad_norm": 0.29648351669311523, "learning_rate": 0.0001, "loss": 1.3966, "step": 6047 }, { "epoch": 0.9710982658959537, "grad_norm": 0.2969985604286194, "learning_rate": 0.0001, "loss": 1.5062, "step": 6048 }, { "epoch": 0.9712588310854207, "grad_norm": 0.2779256999492645, "learning_rate": 0.0001, "loss": 1.5596, "step": 6049 }, { "epoch": 0.9714193962748876, "grad_norm": 0.2802236080169678, "learning_rate": 0.0001, "loss": 1.5852, "step": 6050 }, { "epoch": 0.9715799614643545, "grad_norm": 0.27366623282432556, "learning_rate": 0.0001, "loss": 1.5129, "step": 6051 }, { "epoch": 0.9717405266538215, "grad_norm": 0.2757594585418701, "learning_rate": 0.0001, "loss": 1.5257, "step": 6052 }, { "epoch": 0.9719010918432883, "grad_norm": 0.28436192870140076, "learning_rate": 0.0001, "loss": 1.4486, "step": 6053 }, { "epoch": 0.9720616570327553, "grad_norm": 0.2683652341365814, "learning_rate": 0.0001, "loss": 1.4946, "step": 6054 }, { "epoch": 0.9722222222222222, "grad_norm": 0.2610686123371124, "learning_rate": 0.0001, "loss": 1.4334, "step": 6055 }, { "epoch": 0.9723827874116892, "grad_norm": 0.28330203890800476, "learning_rate": 0.0001, "loss": 1.4521, "step": 6056 }, { "epoch": 0.9725433526011561, "grad_norm": 0.3239646852016449, "learning_rate": 0.0001, "loss": 1.4915, "step": 6057 }, { "epoch": 0.972703917790623, "grad_norm": 0.27439191937446594, "learning_rate": 0.0001, "loss": 1.4892, "step": 6058 }, { "epoch": 0.9728644829800899, "grad_norm": 0.266892671585083, "learning_rate": 0.0001, "loss": 1.4453, "step": 6059 }, { "epoch": 0.9730250481695568, "grad_norm": 0.2938152849674225, "learning_rate": 0.0001, "loss": 1.4933, "step": 6060 }, { "epoch": 0.9731856133590238, "grad_norm": 0.2957319915294647, "learning_rate": 0.0001, "loss": 1.5158, "step": 6061 }, { "epoch": 0.9733461785484907, "grad_norm": 0.27349087595939636, "learning_rate": 0.0001, "loss": 1.5462, "step": 6062 }, { "epoch": 0.9735067437379576, "grad_norm": 0.28939199447631836, "learning_rate": 0.0001, "loss": 1.4771, "step": 6063 }, { "epoch": 0.9736673089274245, "grad_norm": 0.2705721855163574, "learning_rate": 0.0001, "loss": 1.416, "step": 6064 }, { "epoch": 0.9738278741168914, "grad_norm": 0.2997393012046814, "learning_rate": 0.0001, "loss": 1.5235, "step": 6065 }, { "epoch": 0.9739884393063584, "grad_norm": 0.2901417315006256, "learning_rate": 0.0001, "loss": 1.382, "step": 6066 }, { "epoch": 0.9741490044958253, "grad_norm": 0.284340500831604, "learning_rate": 0.0001, "loss": 1.4807, "step": 6067 }, { "epoch": 0.9743095696852923, "grad_norm": 0.2745599150657654, "learning_rate": 0.0001, "loss": 1.3844, "step": 6068 }, { "epoch": 0.9744701348747592, "grad_norm": 0.29601961374282837, "learning_rate": 0.0001, "loss": 1.5104, "step": 6069 }, { "epoch": 0.974630700064226, "grad_norm": 0.29848021268844604, "learning_rate": 0.0001, "loss": 1.5022, "step": 6070 }, { "epoch": 0.974791265253693, "grad_norm": 0.26561540365219116, "learning_rate": 0.0001, "loss": 1.4765, "step": 6071 }, { "epoch": 0.9749518304431599, "grad_norm": 0.2823335826396942, "learning_rate": 0.0001, "loss": 1.4665, "step": 6072 }, { "epoch": 0.9751123956326269, "grad_norm": 0.28383684158325195, "learning_rate": 0.0001, "loss": 1.5105, "step": 6073 }, { "epoch": 0.9752729608220938, "grad_norm": 0.2845028042793274, "learning_rate": 0.0001, "loss": 1.4474, "step": 6074 }, { "epoch": 0.9754335260115607, "grad_norm": 0.2673282027244568, "learning_rate": 0.0001, "loss": 1.4024, "step": 6075 }, { "epoch": 0.9755940912010276, "grad_norm": 0.2782292664051056, "learning_rate": 0.0001, "loss": 1.4336, "step": 6076 }, { "epoch": 0.9757546563904945, "grad_norm": 0.2785802185535431, "learning_rate": 0.0001, "loss": 1.4614, "step": 6077 }, { "epoch": 0.9759152215799615, "grad_norm": 0.26905113458633423, "learning_rate": 0.0001, "loss": 1.3642, "step": 6078 }, { "epoch": 0.9760757867694284, "grad_norm": 0.2865033447742462, "learning_rate": 0.0001, "loss": 1.4281, "step": 6079 }, { "epoch": 0.9762363519588954, "grad_norm": 0.2800520956516266, "learning_rate": 0.0001, "loss": 1.4609, "step": 6080 }, { "epoch": 0.9763969171483622, "grad_norm": 0.3049313426017761, "learning_rate": 0.0001, "loss": 1.5001, "step": 6081 }, { "epoch": 0.9765574823378291, "grad_norm": 0.28505152463912964, "learning_rate": 0.0001, "loss": 1.4622, "step": 6082 }, { "epoch": 0.9767180475272961, "grad_norm": 0.2728041410446167, "learning_rate": 0.0001, "loss": 1.4396, "step": 6083 }, { "epoch": 0.976878612716763, "grad_norm": 0.27396929264068604, "learning_rate": 0.0001, "loss": 1.4796, "step": 6084 }, { "epoch": 0.97703917790623, "grad_norm": 0.5953158140182495, "learning_rate": 0.0001, "loss": 1.3769, "step": 6085 }, { "epoch": 0.9771997430956969, "grad_norm": 0.2573602497577667, "learning_rate": 0.0001, "loss": 1.4832, "step": 6086 }, { "epoch": 0.9773603082851637, "grad_norm": 0.2817201614379883, "learning_rate": 0.0001, "loss": 1.5091, "step": 6087 }, { "epoch": 0.9775208734746307, "grad_norm": 0.2709033489227295, "learning_rate": 0.0001, "loss": 1.4692, "step": 6088 }, { "epoch": 0.9776814386640976, "grad_norm": 0.30877017974853516, "learning_rate": 0.0001, "loss": 1.4484, "step": 6089 }, { "epoch": 0.9778420038535646, "grad_norm": 0.26173847913742065, "learning_rate": 0.0001, "loss": 1.4751, "step": 6090 }, { "epoch": 0.9780025690430315, "grad_norm": 0.2807842791080475, "learning_rate": 0.0001, "loss": 1.4638, "step": 6091 }, { "epoch": 0.9781631342324983, "grad_norm": 0.2715712785720825, "learning_rate": 0.0001, "loss": 1.4712, "step": 6092 }, { "epoch": 0.9783236994219653, "grad_norm": 0.2671475410461426, "learning_rate": 0.0001, "loss": 1.4161, "step": 6093 }, { "epoch": 0.9784842646114322, "grad_norm": 0.2942464053630829, "learning_rate": 0.0001, "loss": 1.4988, "step": 6094 }, { "epoch": 0.9786448298008992, "grad_norm": 0.2681715190410614, "learning_rate": 0.0001, "loss": 1.4191, "step": 6095 }, { "epoch": 0.9788053949903661, "grad_norm": 0.2870206832885742, "learning_rate": 0.0001, "loss": 1.3867, "step": 6096 }, { "epoch": 0.9789659601798331, "grad_norm": 0.2776786983013153, "learning_rate": 0.0001, "loss": 1.4712, "step": 6097 }, { "epoch": 0.9791265253692999, "grad_norm": 0.27417871356010437, "learning_rate": 0.0001, "loss": 1.4053, "step": 6098 }, { "epoch": 0.9792870905587668, "grad_norm": 0.2741701006889343, "learning_rate": 0.0001, "loss": 1.5258, "step": 6099 }, { "epoch": 0.9794476557482338, "grad_norm": 0.2945915758609772, "learning_rate": 0.0001, "loss": 1.549, "step": 6100 }, { "epoch": 0.9796082209377007, "grad_norm": 0.2797362208366394, "learning_rate": 0.0001, "loss": 1.5345, "step": 6101 }, { "epoch": 0.9797687861271677, "grad_norm": 0.2899368107318878, "learning_rate": 0.0001, "loss": 1.4929, "step": 6102 }, { "epoch": 0.9799293513166345, "grad_norm": 0.28004252910614014, "learning_rate": 0.0001, "loss": 1.4797, "step": 6103 }, { "epoch": 0.9800899165061014, "grad_norm": 0.3024308681488037, "learning_rate": 0.0001, "loss": 1.4515, "step": 6104 }, { "epoch": 0.9802504816955684, "grad_norm": 0.30811381340026855, "learning_rate": 0.0001, "loss": 1.4288, "step": 6105 }, { "epoch": 0.9804110468850353, "grad_norm": 0.29313984513282776, "learning_rate": 0.0001, "loss": 1.5526, "step": 6106 }, { "epoch": 0.9805716120745023, "grad_norm": 0.25765109062194824, "learning_rate": 0.0001, "loss": 1.4112, "step": 6107 }, { "epoch": 0.9807321772639692, "grad_norm": 0.2743788957595825, "learning_rate": 0.0001, "loss": 1.4229, "step": 6108 }, { "epoch": 0.980892742453436, "grad_norm": 0.28418049216270447, "learning_rate": 0.0001, "loss": 1.5385, "step": 6109 }, { "epoch": 0.981053307642903, "grad_norm": 0.2828102707862854, "learning_rate": 0.0001, "loss": 1.5066, "step": 6110 }, { "epoch": 0.9812138728323699, "grad_norm": 0.27831506729125977, "learning_rate": 0.0001, "loss": 1.4488, "step": 6111 }, { "epoch": 0.9813744380218369, "grad_norm": 0.2679727375507355, "learning_rate": 0.0001, "loss": 1.5022, "step": 6112 }, { "epoch": 0.9815350032113038, "grad_norm": 0.2713260054588318, "learning_rate": 0.0001, "loss": 1.4424, "step": 6113 }, { "epoch": 0.9816955684007708, "grad_norm": 0.2776862382888794, "learning_rate": 0.0001, "loss": 1.5187, "step": 6114 }, { "epoch": 0.9818561335902376, "grad_norm": 0.2952801585197449, "learning_rate": 0.0001, "loss": 1.5083, "step": 6115 }, { "epoch": 0.9820166987797045, "grad_norm": 0.3064367175102234, "learning_rate": 0.0001, "loss": 1.4903, "step": 6116 }, { "epoch": 0.9821772639691715, "grad_norm": 0.2772613763809204, "learning_rate": 0.0001, "loss": 1.4656, "step": 6117 }, { "epoch": 0.9823378291586384, "grad_norm": 0.283323734998703, "learning_rate": 0.0001, "loss": 1.463, "step": 6118 }, { "epoch": 0.9824983943481054, "grad_norm": 0.3010428547859192, "learning_rate": 0.0001, "loss": 1.5127, "step": 6119 }, { "epoch": 0.9826589595375722, "grad_norm": 0.27567583322525024, "learning_rate": 0.0001, "loss": 1.4729, "step": 6120 }, { "epoch": 0.9828195247270392, "grad_norm": 0.26461541652679443, "learning_rate": 0.0001, "loss": 1.4573, "step": 6121 }, { "epoch": 0.9829800899165061, "grad_norm": 0.2829727232456207, "learning_rate": 0.0001, "loss": 1.484, "step": 6122 }, { "epoch": 0.983140655105973, "grad_norm": 0.2693321108818054, "learning_rate": 0.0001, "loss": 1.4139, "step": 6123 }, { "epoch": 0.98330122029544, "grad_norm": 0.29115068912506104, "learning_rate": 0.0001, "loss": 1.5082, "step": 6124 }, { "epoch": 0.9834617854849069, "grad_norm": 0.28654834628105164, "learning_rate": 0.0001, "loss": 1.4419, "step": 6125 }, { "epoch": 0.9836223506743738, "grad_norm": 0.28508302569389343, "learning_rate": 0.0001, "loss": 1.4822, "step": 6126 }, { "epoch": 0.9837829158638407, "grad_norm": 0.2751412093639374, "learning_rate": 0.0001, "loss": 1.4292, "step": 6127 }, { "epoch": 0.9839434810533076, "grad_norm": 0.2865232229232788, "learning_rate": 0.0001, "loss": 1.544, "step": 6128 }, { "epoch": 0.9841040462427746, "grad_norm": 0.2914300858974457, "learning_rate": 0.0001, "loss": 1.5164, "step": 6129 }, { "epoch": 0.9842646114322415, "grad_norm": 0.28615498542785645, "learning_rate": 0.0001, "loss": 1.4364, "step": 6130 }, { "epoch": 0.9844251766217084, "grad_norm": 0.28262922167778015, "learning_rate": 0.0001, "loss": 1.4928, "step": 6131 }, { "epoch": 0.9845857418111753, "grad_norm": 0.27573537826538086, "learning_rate": 0.0001, "loss": 1.4675, "step": 6132 }, { "epoch": 0.9847463070006423, "grad_norm": 0.2692889869213104, "learning_rate": 0.0001, "loss": 1.4212, "step": 6133 }, { "epoch": 0.9849068721901092, "grad_norm": 0.2852250039577484, "learning_rate": 0.0001, "loss": 1.5296, "step": 6134 }, { "epoch": 0.9850674373795761, "grad_norm": 0.2724355161190033, "learning_rate": 0.0001, "loss": 1.4882, "step": 6135 }, { "epoch": 0.9852280025690431, "grad_norm": 0.29830068349838257, "learning_rate": 0.0001, "loss": 1.4987, "step": 6136 }, { "epoch": 0.9853885677585099, "grad_norm": 0.29177263379096985, "learning_rate": 0.0001, "loss": 1.6092, "step": 6137 }, { "epoch": 0.9855491329479769, "grad_norm": 0.2806275486946106, "learning_rate": 0.0001, "loss": 1.4853, "step": 6138 }, { "epoch": 0.9857096981374438, "grad_norm": 0.29522383213043213, "learning_rate": 0.0001, "loss": 1.4888, "step": 6139 }, { "epoch": 0.9858702633269107, "grad_norm": 0.2776612639427185, "learning_rate": 0.0001, "loss": 1.412, "step": 6140 }, { "epoch": 0.9860308285163777, "grad_norm": 0.2890492081642151, "learning_rate": 0.0001, "loss": 1.4377, "step": 6141 }, { "epoch": 0.9861913937058445, "grad_norm": 0.2801278829574585, "learning_rate": 0.0001, "loss": 1.4832, "step": 6142 }, { "epoch": 0.9863519588953115, "grad_norm": 0.2889750599861145, "learning_rate": 0.0001, "loss": 1.501, "step": 6143 }, { "epoch": 0.9865125240847784, "grad_norm": 0.2753765285015106, "learning_rate": 0.0001, "loss": 1.3762, "step": 6144 }, { "epoch": 0.9866730892742454, "grad_norm": 0.2816472053527832, "learning_rate": 0.0001, "loss": 1.517, "step": 6145 }, { "epoch": 0.9868336544637123, "grad_norm": 0.2731228172779083, "learning_rate": 0.0001, "loss": 1.4484, "step": 6146 }, { "epoch": 0.9869942196531792, "grad_norm": 0.3136705160140991, "learning_rate": 0.0001, "loss": 1.458, "step": 6147 }, { "epoch": 0.9871547848426461, "grad_norm": 0.26373156905174255, "learning_rate": 0.0001, "loss": 1.4254, "step": 6148 }, { "epoch": 0.987315350032113, "grad_norm": 0.28131967782974243, "learning_rate": 0.0001, "loss": 1.4722, "step": 6149 }, { "epoch": 0.98747591522158, "grad_norm": 0.2643895447254181, "learning_rate": 0.0001, "loss": 1.2916, "step": 6150 }, { "epoch": 0.9876364804110469, "grad_norm": 0.26116758584976196, "learning_rate": 0.0001, "loss": 1.4171, "step": 6151 }, { "epoch": 0.9877970456005138, "grad_norm": 0.2842550277709961, "learning_rate": 0.0001, "loss": 1.5123, "step": 6152 }, { "epoch": 0.9879576107899807, "grad_norm": 0.28289249539375305, "learning_rate": 0.0001, "loss": 1.4759, "step": 6153 }, { "epoch": 0.9881181759794476, "grad_norm": 0.2961236536502838, "learning_rate": 0.0001, "loss": 1.5121, "step": 6154 }, { "epoch": 0.9882787411689146, "grad_norm": 0.27023664116859436, "learning_rate": 0.0001, "loss": 1.4453, "step": 6155 }, { "epoch": 0.9884393063583815, "grad_norm": 0.28188565373420715, "learning_rate": 0.0001, "loss": 1.5199, "step": 6156 }, { "epoch": 0.9885998715478485, "grad_norm": 0.2707480490207672, "learning_rate": 0.0001, "loss": 1.4374, "step": 6157 }, { "epoch": 0.9887604367373154, "grad_norm": 0.2875424921512604, "learning_rate": 0.0001, "loss": 1.4498, "step": 6158 }, { "epoch": 0.9889210019267822, "grad_norm": 0.26978111267089844, "learning_rate": 0.0001, "loss": 1.4972, "step": 6159 }, { "epoch": 0.9890815671162492, "grad_norm": 0.26786309480667114, "learning_rate": 0.0001, "loss": 1.4748, "step": 6160 }, { "epoch": 0.9892421323057161, "grad_norm": 0.2710617780685425, "learning_rate": 0.0001, "loss": 1.4253, "step": 6161 }, { "epoch": 0.9894026974951831, "grad_norm": 0.26757073402404785, "learning_rate": 0.0001, "loss": 1.5044, "step": 6162 }, { "epoch": 0.98956326268465, "grad_norm": 0.2969173491001129, "learning_rate": 0.0001, "loss": 1.521, "step": 6163 }, { "epoch": 0.989723827874117, "grad_norm": 0.263449490070343, "learning_rate": 0.0001, "loss": 1.4488, "step": 6164 }, { "epoch": 0.9898843930635838, "grad_norm": 0.26113778352737427, "learning_rate": 0.0001, "loss": 1.4237, "step": 6165 }, { "epoch": 0.9900449582530507, "grad_norm": 0.281708300113678, "learning_rate": 0.0001, "loss": 1.4929, "step": 6166 }, { "epoch": 0.9902055234425177, "grad_norm": 0.274546355009079, "learning_rate": 0.0001, "loss": 1.5144, "step": 6167 }, { "epoch": 0.9903660886319846, "grad_norm": 0.2744201421737671, "learning_rate": 0.0001, "loss": 1.4446, "step": 6168 }, { "epoch": 0.9905266538214516, "grad_norm": 0.26994210481643677, "learning_rate": 0.0001, "loss": 1.4654, "step": 6169 }, { "epoch": 0.9906872190109184, "grad_norm": 0.2769588232040405, "learning_rate": 0.0001, "loss": 1.4318, "step": 6170 }, { "epoch": 0.9908477842003853, "grad_norm": 0.29553619027137756, "learning_rate": 0.0001, "loss": 1.5025, "step": 6171 }, { "epoch": 0.9910083493898523, "grad_norm": 0.2770235538482666, "learning_rate": 0.0001, "loss": 1.5148, "step": 6172 }, { "epoch": 0.9911689145793192, "grad_norm": 0.2792353630065918, "learning_rate": 0.0001, "loss": 1.5359, "step": 6173 }, { "epoch": 0.9913294797687862, "grad_norm": 0.276390016078949, "learning_rate": 0.0001, "loss": 1.4846, "step": 6174 }, { "epoch": 0.9914900449582531, "grad_norm": 0.27889484167099, "learning_rate": 0.0001, "loss": 1.4428, "step": 6175 }, { "epoch": 0.9916506101477199, "grad_norm": 0.30174416303634644, "learning_rate": 0.0001, "loss": 1.4598, "step": 6176 }, { "epoch": 0.9918111753371869, "grad_norm": 0.27532634139060974, "learning_rate": 0.0001, "loss": 1.4345, "step": 6177 }, { "epoch": 0.9919717405266538, "grad_norm": 0.27762967348098755, "learning_rate": 0.0001, "loss": 1.4002, "step": 6178 }, { "epoch": 0.9921323057161208, "grad_norm": 0.29852789640426636, "learning_rate": 0.0001, "loss": 1.4166, "step": 6179 }, { "epoch": 0.9922928709055877, "grad_norm": 0.27629998326301575, "learning_rate": 0.0001, "loss": 1.4837, "step": 6180 }, { "epoch": 0.9924534360950545, "grad_norm": 0.3078816533088684, "learning_rate": 0.0001, "loss": 1.5066, "step": 6181 }, { "epoch": 0.9926140012845215, "grad_norm": 0.2954596281051636, "learning_rate": 0.0001, "loss": 1.4908, "step": 6182 }, { "epoch": 0.9927745664739884, "grad_norm": 0.2811075747013092, "learning_rate": 0.0001, "loss": 1.5397, "step": 6183 }, { "epoch": 0.9929351316634554, "grad_norm": 0.2870076894760132, "learning_rate": 0.0001, "loss": 1.544, "step": 6184 }, { "epoch": 0.9930956968529223, "grad_norm": 0.32010337710380554, "learning_rate": 0.0001, "loss": 1.5037, "step": 6185 }, { "epoch": 0.9932562620423893, "grad_norm": 0.29296836256980896, "learning_rate": 0.0001, "loss": 1.5605, "step": 6186 }, { "epoch": 0.9934168272318561, "grad_norm": 0.2698921263217926, "learning_rate": 0.0001, "loss": 1.5104, "step": 6187 }, { "epoch": 0.993577392421323, "grad_norm": 0.2633209228515625, "learning_rate": 0.0001, "loss": 1.4218, "step": 6188 }, { "epoch": 0.99373795761079, "grad_norm": 0.2665596306324005, "learning_rate": 0.0001, "loss": 1.5071, "step": 6189 }, { "epoch": 0.9938985228002569, "grad_norm": 0.2854216396808624, "learning_rate": 0.0001, "loss": 1.537, "step": 6190 }, { "epoch": 0.9940590879897239, "grad_norm": 0.28087618947029114, "learning_rate": 0.0001, "loss": 1.4403, "step": 6191 }, { "epoch": 0.9942196531791907, "grad_norm": 0.2625194489955902, "learning_rate": 0.0001, "loss": 1.4798, "step": 6192 }, { "epoch": 0.9943802183686576, "grad_norm": 0.269114226102829, "learning_rate": 0.0001, "loss": 1.3957, "step": 6193 }, { "epoch": 0.9945407835581246, "grad_norm": 0.26467272639274597, "learning_rate": 0.0001, "loss": 1.4213, "step": 6194 }, { "epoch": 0.9947013487475915, "grad_norm": 0.2702041566371918, "learning_rate": 0.0001, "loss": 1.4794, "step": 6195 }, { "epoch": 0.9948619139370585, "grad_norm": 0.30384930968284607, "learning_rate": 0.0001, "loss": 1.4239, "step": 6196 }, { "epoch": 0.9950224791265254, "grad_norm": 0.27441248297691345, "learning_rate": 0.0001, "loss": 1.4826, "step": 6197 }, { "epoch": 0.9951830443159922, "grad_norm": 0.30752405524253845, "learning_rate": 0.0001, "loss": 1.459, "step": 6198 }, { "epoch": 0.9953436095054592, "grad_norm": 0.3062322437763214, "learning_rate": 0.0001, "loss": 1.4969, "step": 6199 }, { "epoch": 0.9955041746949261, "grad_norm": 0.3011613190174103, "learning_rate": 0.0001, "loss": 1.4248, "step": 6200 }, { "epoch": 0.9956647398843931, "grad_norm": 0.2832570970058441, "learning_rate": 0.0001, "loss": 1.5697, "step": 6201 }, { "epoch": 0.99582530507386, "grad_norm": 0.262753963470459, "learning_rate": 0.0001, "loss": 1.4788, "step": 6202 }, { "epoch": 0.9959858702633269, "grad_norm": 0.2819031774997711, "learning_rate": 0.0001, "loss": 1.4638, "step": 6203 }, { "epoch": 0.9961464354527938, "grad_norm": 0.2841491401195526, "learning_rate": 0.0001, "loss": 1.4599, "step": 6204 }, { "epoch": 0.9963070006422607, "grad_norm": 0.2673529386520386, "learning_rate": 0.0001, "loss": 1.491, "step": 6205 }, { "epoch": 0.9964675658317277, "grad_norm": 0.2715817987918854, "learning_rate": 0.0001, "loss": 1.5009, "step": 6206 }, { "epoch": 0.9966281310211946, "grad_norm": 0.30681803822517395, "learning_rate": 0.0001, "loss": 1.4673, "step": 6207 }, { "epoch": 0.9967886962106616, "grad_norm": 0.27776339650154114, "learning_rate": 0.0001, "loss": 1.3702, "step": 6208 }, { "epoch": 0.9969492614001284, "grad_norm": 0.26564541459083557, "learning_rate": 0.0001, "loss": 1.404, "step": 6209 }, { "epoch": 0.9971098265895953, "grad_norm": 0.2827150225639343, "learning_rate": 0.0001, "loss": 1.4291, "step": 6210 }, { "epoch": 0.9972703917790623, "grad_norm": 0.28041255474090576, "learning_rate": 0.0001, "loss": 1.4986, "step": 6211 }, { "epoch": 0.9974309569685292, "grad_norm": 0.2790243923664093, "learning_rate": 0.0001, "loss": 1.4789, "step": 6212 }, { "epoch": 0.9975915221579962, "grad_norm": 0.2771053910255432, "learning_rate": 0.0001, "loss": 1.4705, "step": 6213 }, { "epoch": 0.9977520873474631, "grad_norm": 0.27453845739364624, "learning_rate": 0.0001, "loss": 1.5308, "step": 6214 }, { "epoch": 0.99791265253693, "grad_norm": 0.28533750772476196, "learning_rate": 0.0001, "loss": 1.3929, "step": 6215 }, { "epoch": 0.9980732177263969, "grad_norm": 0.28414100408554077, "learning_rate": 0.0001, "loss": 1.471, "step": 6216 }, { "epoch": 0.9982337829158638, "grad_norm": 0.2757773995399475, "learning_rate": 0.0001, "loss": 1.4661, "step": 6217 }, { "epoch": 0.9983943481053308, "grad_norm": 0.27444469928741455, "learning_rate": 0.0001, "loss": 1.4961, "step": 6218 }, { "epoch": 0.9985549132947977, "grad_norm": 0.27805936336517334, "learning_rate": 0.0001, "loss": 1.6059, "step": 6219 }, { "epoch": 0.9987154784842646, "grad_norm": 0.2738643288612366, "learning_rate": 0.0001, "loss": 1.4357, "step": 6220 }, { "epoch": 0.9988760436737315, "grad_norm": 0.2820572555065155, "learning_rate": 0.0001, "loss": 1.4146, "step": 6221 }, { "epoch": 0.9990366088631984, "grad_norm": 0.2885551154613495, "learning_rate": 0.0001, "loss": 1.4504, "step": 6222 }, { "epoch": 0.9991971740526654, "grad_norm": 0.2716134488582611, "learning_rate": 0.0001, "loss": 1.4, "step": 6223 }, { "epoch": 0.9993577392421323, "grad_norm": 0.270205020904541, "learning_rate": 0.0001, "loss": 1.4337, "step": 6224 }, { "epoch": 0.9995183044315993, "grad_norm": 0.2815423607826233, "learning_rate": 0.0001, "loss": 1.5145, "step": 6225 }, { "epoch": 0.9996788696210661, "grad_norm": 0.3001120686531067, "learning_rate": 0.0001, "loss": 1.5119, "step": 6226 }, { "epoch": 0.9998394348105331, "grad_norm": 0.27873408794403076, "learning_rate": 0.0001, "loss": 1.5616, "step": 6227 }, { "epoch": 1.0, "grad_norm": 0.3003462553024292, "learning_rate": 0.0001, "loss": 1.4898, "step": 6228 }, { "epoch": 1.0, "step": 6228, "total_flos": 3.5626252265708323e+19, "train_loss": 1.5359360199558023, "train_runtime": 115641.057, "train_samples_per_second": 0.431, "train_steps_per_second": 0.054 } ], "logging_steps": 1, "max_steps": 6228, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 750, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.5626252265708323e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }