{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.999969552111561, "eval_steps": 500, "global_step": 16421, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 6.089577687787352e-05, "grad_norm": 1.2783087128363426, "learning_rate": 5.0000000000000004e-08, "loss": 1.2081, "step": 1 }, { "epoch": 0.00012179155375574704, "grad_norm": 1.3613236856754432, "learning_rate": 1.0000000000000001e-07, "loss": 1.2835, "step": 2 }, { "epoch": 0.00018268733063362056, "grad_norm": 1.30298213226854, "learning_rate": 1.5000000000000002e-07, "loss": 1.2077, "step": 3 }, { "epoch": 0.00024358310751149408, "grad_norm": 1.368037692261487, "learning_rate": 2.0000000000000002e-07, "loss": 1.2432, "step": 4 }, { "epoch": 0.0003044788843893676, "grad_norm": 1.4688587783314677, "learning_rate": 2.5000000000000004e-07, "loss": 1.2565, "step": 5 }, { "epoch": 0.0003653746612672411, "grad_norm": 1.3521010226655206, "learning_rate": 3.0000000000000004e-07, "loss": 1.2392, "step": 6 }, { "epoch": 0.0004262704381451146, "grad_norm": 1.29609633985871, "learning_rate": 3.5000000000000004e-07, "loss": 1.2221, "step": 7 }, { "epoch": 0.00048716621502298816, "grad_norm": 1.3769312847454855, "learning_rate": 4.0000000000000003e-07, "loss": 1.2871, "step": 8 }, { "epoch": 0.0005480619919008617, "grad_norm": 1.4064798068116708, "learning_rate": 4.5000000000000003e-07, "loss": 1.314, "step": 9 }, { "epoch": 0.0006089577687787351, "grad_norm": 1.364989462955391, "learning_rate": 5.000000000000001e-07, "loss": 1.2562, "step": 10 }, { "epoch": 0.0006698535456566087, "grad_norm": 1.2610318355920762, "learning_rate": 5.5e-07, "loss": 1.221, "step": 11 }, { "epoch": 0.0007307493225344822, "grad_norm": 1.3980912017066547, "learning_rate": 6.000000000000001e-07, "loss": 1.2528, "step": 12 }, { "epoch": 0.0007916450994123557, "grad_norm": 1.2606235299921411, "learning_rate": 6.5e-07, "loss": 1.1981, "step": 13 }, { "epoch": 0.0008525408762902292, "grad_norm": 1.2985310084373154, "learning_rate": 7.000000000000001e-07, "loss": 1.2424, "step": 14 }, { "epoch": 0.0009134366531681028, "grad_norm": 1.3163754114281772, "learning_rate": 7.5e-07, "loss": 1.2285, "step": 15 }, { "epoch": 0.0009743324300459763, "grad_norm": 1.2821693232198674, "learning_rate": 8.000000000000001e-07, "loss": 1.23, "step": 16 }, { "epoch": 0.0010352282069238498, "grad_norm": 1.3210071176961222, "learning_rate": 8.500000000000001e-07, "loss": 1.2896, "step": 17 }, { "epoch": 0.0010961239838017233, "grad_norm": 1.4278164117207706, "learning_rate": 9.000000000000001e-07, "loss": 1.2955, "step": 18 }, { "epoch": 0.0011570197606795968, "grad_norm": 1.449650040941209, "learning_rate": 9.500000000000001e-07, "loss": 1.304, "step": 19 }, { "epoch": 0.0012179155375574703, "grad_norm": 1.2024611163883157, "learning_rate": 1.0000000000000002e-06, "loss": 1.2692, "step": 20 }, { "epoch": 0.001278811314435344, "grad_norm": 1.190892382796453, "learning_rate": 1.0500000000000001e-06, "loss": 1.2073, "step": 21 }, { "epoch": 0.0013397070913132175, "grad_norm": 1.1450327134036158, "learning_rate": 1.1e-06, "loss": 1.1972, "step": 22 }, { "epoch": 0.001400602868191091, "grad_norm": 1.0909544550721726, "learning_rate": 1.1500000000000002e-06, "loss": 1.1695, "step": 23 }, { "epoch": 0.0014614986450689645, "grad_norm": 1.088507811747228, "learning_rate": 1.2000000000000002e-06, "loss": 1.2186, "step": 24 }, { "epoch": 0.001522394421946838, "grad_norm": 1.1452750682112331, "learning_rate": 1.25e-06, "loss": 1.2484, "step": 25 }, { "epoch": 0.0015832901988247115, "grad_norm": 1.085326027122175, "learning_rate": 1.3e-06, "loss": 1.228, "step": 26 }, { "epoch": 0.001644185975702585, "grad_norm": 0.9838162373848378, "learning_rate": 1.3500000000000002e-06, "loss": 1.1831, "step": 27 }, { "epoch": 0.0017050817525804585, "grad_norm": 0.9875806111015505, "learning_rate": 1.4000000000000001e-06, "loss": 1.1558, "step": 28 }, { "epoch": 0.0017659775294583322, "grad_norm": 0.9236168177688888, "learning_rate": 1.45e-06, "loss": 1.1292, "step": 29 }, { "epoch": 0.0018268733063362057, "grad_norm": 0.9042913599389717, "learning_rate": 1.5e-06, "loss": 1.1916, "step": 30 }, { "epoch": 0.0018877690832140792, "grad_norm": 0.9688611403020976, "learning_rate": 1.5500000000000002e-06, "loss": 1.1633, "step": 31 }, { "epoch": 0.0019486648600919526, "grad_norm": 0.9127073730197273, "learning_rate": 1.6000000000000001e-06, "loss": 1.2167, "step": 32 }, { "epoch": 0.0020095606369698264, "grad_norm": 0.8422078353450044, "learning_rate": 1.6500000000000003e-06, "loss": 1.1119, "step": 33 }, { "epoch": 0.0020704564138476996, "grad_norm": 0.8507073955625997, "learning_rate": 1.7000000000000002e-06, "loss": 1.1717, "step": 34 }, { "epoch": 0.0021313521907255733, "grad_norm": 0.7688402284704656, "learning_rate": 1.75e-06, "loss": 1.0982, "step": 35 }, { "epoch": 0.0021922479676034466, "grad_norm": 0.8209157124780203, "learning_rate": 1.8000000000000001e-06, "loss": 1.1588, "step": 36 }, { "epoch": 0.0022531437444813203, "grad_norm": 0.7732505451688806, "learning_rate": 1.85e-06, "loss": 1.1405, "step": 37 }, { "epoch": 0.0023140395213591936, "grad_norm": 0.7400784055294407, "learning_rate": 1.9000000000000002e-06, "loss": 1.1173, "step": 38 }, { "epoch": 0.0023749352982370673, "grad_norm": 0.74552272964745, "learning_rate": 1.9500000000000004e-06, "loss": 1.126, "step": 39 }, { "epoch": 0.0024358310751149406, "grad_norm": 0.7661829692157198, "learning_rate": 2.0000000000000003e-06, "loss": 1.1094, "step": 40 }, { "epoch": 0.0024967268519928143, "grad_norm": 0.6986791166318455, "learning_rate": 2.05e-06, "loss": 1.0649, "step": 41 }, { "epoch": 0.002557622628870688, "grad_norm": 0.7239054229001102, "learning_rate": 2.1000000000000002e-06, "loss": 1.0664, "step": 42 }, { "epoch": 0.0026185184057485613, "grad_norm": 0.6796740833720761, "learning_rate": 2.15e-06, "loss": 1.0484, "step": 43 }, { "epoch": 0.002679414182626435, "grad_norm": 0.7236233259643801, "learning_rate": 2.2e-06, "loss": 1.0719, "step": 44 }, { "epoch": 0.0027403099595043083, "grad_norm": 0.7131928979660329, "learning_rate": 2.25e-06, "loss": 1.0954, "step": 45 }, { "epoch": 0.002801205736382182, "grad_norm": 0.6916052186956844, "learning_rate": 2.3000000000000004e-06, "loss": 1.068, "step": 46 }, { "epoch": 0.0028621015132600553, "grad_norm": 0.7338346898557735, "learning_rate": 2.35e-06, "loss": 1.1337, "step": 47 }, { "epoch": 0.002922997290137929, "grad_norm": 0.7127052386654825, "learning_rate": 2.4000000000000003e-06, "loss": 1.0364, "step": 48 }, { "epoch": 0.0029838930670158022, "grad_norm": 0.7109363185975538, "learning_rate": 2.4500000000000003e-06, "loss": 1.0356, "step": 49 }, { "epoch": 0.003044788843893676, "grad_norm": 0.6404186765114072, "learning_rate": 2.5e-06, "loss": 1.0379, "step": 50 }, { "epoch": 0.0031056846207715497, "grad_norm": 0.6998851088838369, "learning_rate": 2.55e-06, "loss": 1.0652, "step": 51 }, { "epoch": 0.003166580397649423, "grad_norm": 0.6466384854106114, "learning_rate": 2.6e-06, "loss": 1.0206, "step": 52 }, { "epoch": 0.0032274761745272967, "grad_norm": 0.6652449670577572, "learning_rate": 2.6500000000000005e-06, "loss": 1.0451, "step": 53 }, { "epoch": 0.00328837195140517, "grad_norm": 0.6794460180057439, "learning_rate": 2.7000000000000004e-06, "loss": 1.0077, "step": 54 }, { "epoch": 0.0033492677282830436, "grad_norm": 0.6222955922127837, "learning_rate": 2.7500000000000004e-06, "loss": 1.0051, "step": 55 }, { "epoch": 0.003410163505160917, "grad_norm": 0.6895274401041883, "learning_rate": 2.8000000000000003e-06, "loss": 1.0272, "step": 56 }, { "epoch": 0.0034710592820387906, "grad_norm": 0.6847377406567425, "learning_rate": 2.85e-06, "loss": 0.9933, "step": 57 }, { "epoch": 0.0035319550589166643, "grad_norm": 0.6141086214919128, "learning_rate": 2.9e-06, "loss": 0.9998, "step": 58 }, { "epoch": 0.0035928508357945376, "grad_norm": 0.6767516370650634, "learning_rate": 2.95e-06, "loss": 0.9733, "step": 59 }, { "epoch": 0.0036537466126724113, "grad_norm": 0.654701145061518, "learning_rate": 3e-06, "loss": 1.0394, "step": 60 }, { "epoch": 0.0037146423895502846, "grad_norm": 0.6497783455180624, "learning_rate": 3.05e-06, "loss": 0.9817, "step": 61 }, { "epoch": 0.0037755381664281583, "grad_norm": 0.6272373703795496, "learning_rate": 3.1000000000000004e-06, "loss": 1.0388, "step": 62 }, { "epoch": 0.0038364339433060316, "grad_norm": 0.6666492103700693, "learning_rate": 3.1500000000000003e-06, "loss": 1.0004, "step": 63 }, { "epoch": 0.0038973297201839053, "grad_norm": 0.6210329756811196, "learning_rate": 3.2000000000000003e-06, "loss": 0.9683, "step": 64 }, { "epoch": 0.003958225497061779, "grad_norm": 0.6284933174969854, "learning_rate": 3.2500000000000002e-06, "loss": 0.9259, "step": 65 }, { "epoch": 0.004019121273939653, "grad_norm": 0.6181144182280022, "learning_rate": 3.3000000000000006e-06, "loss": 0.9478, "step": 66 }, { "epoch": 0.0040800170508175256, "grad_norm": 0.6638012384977436, "learning_rate": 3.3500000000000005e-06, "loss": 0.9454, "step": 67 }, { "epoch": 0.004140912827695399, "grad_norm": 0.6126923389742809, "learning_rate": 3.4000000000000005e-06, "loss": 0.8998, "step": 68 }, { "epoch": 0.004201808604573273, "grad_norm": 0.6498517762588266, "learning_rate": 3.45e-06, "loss": 0.9558, "step": 69 }, { "epoch": 0.004262704381451147, "grad_norm": 0.6299906610400277, "learning_rate": 3.5e-06, "loss": 0.9364, "step": 70 }, { "epoch": 0.0043236001583290195, "grad_norm": 0.6767449694631275, "learning_rate": 3.5500000000000003e-06, "loss": 0.9062, "step": 71 }, { "epoch": 0.004384495935206893, "grad_norm": 0.741078767464641, "learning_rate": 3.6000000000000003e-06, "loss": 0.9693, "step": 72 }, { "epoch": 0.004445391712084767, "grad_norm": 0.6679504048480375, "learning_rate": 3.65e-06, "loss": 0.9224, "step": 73 }, { "epoch": 0.004506287488962641, "grad_norm": 0.719246215898473, "learning_rate": 3.7e-06, "loss": 0.9685, "step": 74 }, { "epoch": 0.004567183265840514, "grad_norm": 0.637886583107297, "learning_rate": 3.7500000000000005e-06, "loss": 0.9074, "step": 75 }, { "epoch": 0.004628079042718387, "grad_norm": 0.6331989492396305, "learning_rate": 3.8000000000000005e-06, "loss": 0.9085, "step": 76 }, { "epoch": 0.004688974819596261, "grad_norm": 0.6494531462077495, "learning_rate": 3.85e-06, "loss": 0.8724, "step": 77 }, { "epoch": 0.004749870596474135, "grad_norm": 0.6768427312532551, "learning_rate": 3.900000000000001e-06, "loss": 0.9074, "step": 78 }, { "epoch": 0.004810766373352008, "grad_norm": 0.6838136341557196, "learning_rate": 3.95e-06, "loss": 0.9191, "step": 79 }, { "epoch": 0.004871662150229881, "grad_norm": 0.6802361612355575, "learning_rate": 4.000000000000001e-06, "loss": 0.8616, "step": 80 }, { "epoch": 0.004932557927107755, "grad_norm": 0.7121742369187243, "learning_rate": 4.05e-06, "loss": 0.8727, "step": 81 }, { "epoch": 0.004993453703985629, "grad_norm": 0.720021332039233, "learning_rate": 4.1e-06, "loss": 0.924, "step": 82 }, { "epoch": 0.005054349480863502, "grad_norm": 0.6173661542330908, "learning_rate": 4.15e-06, "loss": 0.8718, "step": 83 }, { "epoch": 0.005115245257741376, "grad_norm": 0.7136441168396123, "learning_rate": 4.2000000000000004e-06, "loss": 0.8726, "step": 84 }, { "epoch": 0.005176141034619249, "grad_norm": 0.6466816889421091, "learning_rate": 4.25e-06, "loss": 0.8748, "step": 85 }, { "epoch": 0.005237036811497123, "grad_norm": 0.6782624476040339, "learning_rate": 4.3e-06, "loss": 0.8709, "step": 86 }, { "epoch": 0.005297932588374996, "grad_norm": 0.604268387158685, "learning_rate": 4.350000000000001e-06, "loss": 0.8512, "step": 87 }, { "epoch": 0.00535882836525287, "grad_norm": 0.6591563240624356, "learning_rate": 4.4e-06, "loss": 0.9027, "step": 88 }, { "epoch": 0.005419724142130743, "grad_norm": 0.6367661068281147, "learning_rate": 4.450000000000001e-06, "loss": 0.862, "step": 89 }, { "epoch": 0.0054806199190086165, "grad_norm": 0.652591644341844, "learning_rate": 4.5e-06, "loss": 0.8405, "step": 90 }, { "epoch": 0.00554151569588649, "grad_norm": 0.6460001302844072, "learning_rate": 4.5500000000000005e-06, "loss": 0.8678, "step": 91 }, { "epoch": 0.005602411472764364, "grad_norm": 0.6528770416590572, "learning_rate": 4.600000000000001e-06, "loss": 0.9136, "step": 92 }, { "epoch": 0.005663307249642238, "grad_norm": 0.6154680768220303, "learning_rate": 4.65e-06, "loss": 0.811, "step": 93 }, { "epoch": 0.0057242030265201105, "grad_norm": 0.6414346841538292, "learning_rate": 4.7e-06, "loss": 0.8646, "step": 94 }, { "epoch": 0.005785098803397984, "grad_norm": 0.6476425301670001, "learning_rate": 4.75e-06, "loss": 0.8607, "step": 95 }, { "epoch": 0.005845994580275858, "grad_norm": 0.665120292815135, "learning_rate": 4.800000000000001e-06, "loss": 0.8155, "step": 96 }, { "epoch": 0.005906890357153732, "grad_norm": 0.6543842343163409, "learning_rate": 4.85e-06, "loss": 0.839, "step": 97 }, { "epoch": 0.0059677861340316045, "grad_norm": 0.6493833764230088, "learning_rate": 4.9000000000000005e-06, "loss": 0.7764, "step": 98 }, { "epoch": 0.006028681910909478, "grad_norm": 0.6910350684709983, "learning_rate": 4.95e-06, "loss": 0.835, "step": 99 }, { "epoch": 0.006089577687787352, "grad_norm": 0.6939532471647948, "learning_rate": 5e-06, "loss": 0.8889, "step": 100 }, { "epoch": 0.006150473464665226, "grad_norm": 0.6801399484740052, "learning_rate": 4.999999998726526e-06, "loss": 0.8525, "step": 101 }, { "epoch": 0.006211369241543099, "grad_norm": 0.6057438194423901, "learning_rate": 4.999999994906104e-06, "loss": 0.804, "step": 102 }, { "epoch": 0.006272265018420972, "grad_norm": 0.6381262259706383, "learning_rate": 4.999999988538734e-06, "loss": 0.8352, "step": 103 }, { "epoch": 0.006333160795298846, "grad_norm": 0.6503324179586444, "learning_rate": 4.999999979624416e-06, "loss": 0.8122, "step": 104 }, { "epoch": 0.00639405657217672, "grad_norm": 0.6508882951803143, "learning_rate": 4.999999968163151e-06, "loss": 0.8378, "step": 105 }, { "epoch": 0.006454952349054593, "grad_norm": 0.6761069223715721, "learning_rate": 4.999999954154936e-06, "loss": 0.8092, "step": 106 }, { "epoch": 0.006515848125932467, "grad_norm": 0.6242061897581022, "learning_rate": 4.999999937599774e-06, "loss": 0.7822, "step": 107 }, { "epoch": 0.00657674390281034, "grad_norm": 0.6737989504180351, "learning_rate": 4.999999918497664e-06, "loss": 0.7877, "step": 108 }, { "epoch": 0.0066376396796882136, "grad_norm": 0.6557666927699501, "learning_rate": 4.999999896848606e-06, "loss": 0.7953, "step": 109 }, { "epoch": 0.006698535456566087, "grad_norm": 0.6590897897144997, "learning_rate": 4.9999998726526e-06, "loss": 0.8083, "step": 110 }, { "epoch": 0.006759431233443961, "grad_norm": 0.6633869714276917, "learning_rate": 4.999999845909646e-06, "loss": 0.7788, "step": 111 }, { "epoch": 0.006820327010321834, "grad_norm": 0.6795631537974902, "learning_rate": 4.999999816619745e-06, "loss": 0.7729, "step": 112 }, { "epoch": 0.0068812227871997075, "grad_norm": 0.6601932877676258, "learning_rate": 4.999999784782895e-06, "loss": 0.7891, "step": 113 }, { "epoch": 0.006942118564077581, "grad_norm": 0.6622531999132142, "learning_rate": 4.999999750399098e-06, "loss": 0.7941, "step": 114 }, { "epoch": 0.007003014340955455, "grad_norm": 0.6686235251282103, "learning_rate": 4.999999713468353e-06, "loss": 0.8606, "step": 115 }, { "epoch": 0.007063910117833329, "grad_norm": 0.6479924171702877, "learning_rate": 4.9999996739906605e-06, "loss": 0.7609, "step": 116 }, { "epoch": 0.0071248058947112015, "grad_norm": 0.685268150071863, "learning_rate": 4.9999996319660195e-06, "loss": 0.759, "step": 117 }, { "epoch": 0.007185701671589075, "grad_norm": 0.6512632561280425, "learning_rate": 4.999999587394432e-06, "loss": 0.7775, "step": 118 }, { "epoch": 0.007246597448466949, "grad_norm": 0.6437624902005535, "learning_rate": 4.9999995402758964e-06, "loss": 0.8006, "step": 119 }, { "epoch": 0.007307493225344823, "grad_norm": 0.6737099934566816, "learning_rate": 4.9999994906104135e-06, "loss": 0.7856, "step": 120 }, { "epoch": 0.0073683890022226955, "grad_norm": 0.655236375407972, "learning_rate": 4.999999438397982e-06, "loss": 0.743, "step": 121 }, { "epoch": 0.007429284779100569, "grad_norm": 0.6829460867692894, "learning_rate": 4.999999383638604e-06, "loss": 0.7955, "step": 122 }, { "epoch": 0.007490180555978443, "grad_norm": 0.6872450008851713, "learning_rate": 4.999999326332278e-06, "loss": 0.8011, "step": 123 }, { "epoch": 0.007551076332856317, "grad_norm": 0.6323451453704254, "learning_rate": 4.999999266479006e-06, "loss": 0.7418, "step": 124 }, { "epoch": 0.00761197210973419, "grad_norm": 0.6290339271054501, "learning_rate": 4.999999204078785e-06, "loss": 0.8091, "step": 125 }, { "epoch": 0.007672867886612063, "grad_norm": 0.6495150715269111, "learning_rate": 4.999999139131618e-06, "loss": 0.8052, "step": 126 }, { "epoch": 0.007733763663489937, "grad_norm": 0.6542039835413054, "learning_rate": 4.999999071637503e-06, "loss": 0.7447, "step": 127 }, { "epoch": 0.007794659440367811, "grad_norm": 0.661968201697012, "learning_rate": 4.999999001596441e-06, "loss": 0.7957, "step": 128 }, { "epoch": 0.007855555217245683, "grad_norm": 0.7160738512666721, "learning_rate": 4.9999989290084324e-06, "loss": 0.7541, "step": 129 }, { "epoch": 0.007916450994123558, "grad_norm": 0.7122792990292478, "learning_rate": 4.999998853873478e-06, "loss": 0.7819, "step": 130 }, { "epoch": 0.00797734677100143, "grad_norm": 0.6281901645388035, "learning_rate": 4.9999987761915745e-06, "loss": 0.7744, "step": 131 }, { "epoch": 0.008038242547879305, "grad_norm": 0.6733062772158463, "learning_rate": 4.999998695962725e-06, "loss": 0.7735, "step": 132 }, { "epoch": 0.008099138324757178, "grad_norm": 0.6632923519770085, "learning_rate": 4.9999986131869295e-06, "loss": 0.7773, "step": 133 }, { "epoch": 0.008160034101635051, "grad_norm": 0.6814438859738214, "learning_rate": 4.999998527864187e-06, "loss": 0.7964, "step": 134 }, { "epoch": 0.008220929878512926, "grad_norm": 0.7397127769806833, "learning_rate": 4.9999984399944975e-06, "loss": 0.7545, "step": 135 }, { "epoch": 0.008281825655390799, "grad_norm": 0.6371514271193969, "learning_rate": 4.999998349577862e-06, "loss": 0.77, "step": 136 }, { "epoch": 0.008342721432268671, "grad_norm": 0.7438929630053858, "learning_rate": 4.999998256614281e-06, "loss": 0.7888, "step": 137 }, { "epoch": 0.008403617209146546, "grad_norm": 0.7719529834546661, "learning_rate": 4.999998161103753e-06, "loss": 0.7503, "step": 138 }, { "epoch": 0.008464512986024419, "grad_norm": 0.658634004291322, "learning_rate": 4.999998063046278e-06, "loss": 0.7677, "step": 139 }, { "epoch": 0.008525408762902293, "grad_norm": 0.6992507081175925, "learning_rate": 4.9999979624418586e-06, "loss": 0.72, "step": 140 }, { "epoch": 0.008586304539780166, "grad_norm": 0.6605488740604673, "learning_rate": 4.999997859290492e-06, "loss": 0.749, "step": 141 }, { "epoch": 0.008647200316658039, "grad_norm": 0.7171750160790671, "learning_rate": 4.9999977535921796e-06, "loss": 0.7752, "step": 142 }, { "epoch": 0.008708096093535914, "grad_norm": 0.6530496500614424, "learning_rate": 4.999997645346922e-06, "loss": 0.7705, "step": 143 }, { "epoch": 0.008768991870413786, "grad_norm": 0.7130315279628207, "learning_rate": 4.999997534554719e-06, "loss": 0.8622, "step": 144 }, { "epoch": 0.008829887647291661, "grad_norm": 0.7544125161508873, "learning_rate": 4.99999742121557e-06, "loss": 0.7688, "step": 145 }, { "epoch": 0.008890783424169534, "grad_norm": 0.6802518901542594, "learning_rate": 4.999997305329476e-06, "loss": 0.7766, "step": 146 }, { "epoch": 0.008951679201047407, "grad_norm": 0.6921700626668295, "learning_rate": 4.9999971868964355e-06, "loss": 0.7423, "step": 147 }, { "epoch": 0.009012574977925281, "grad_norm": 0.6901437006788196, "learning_rate": 4.999997065916451e-06, "loss": 0.8055, "step": 148 }, { "epoch": 0.009073470754803154, "grad_norm": 0.6933242049538111, "learning_rate": 4.999996942389521e-06, "loss": 0.8123, "step": 149 }, { "epoch": 0.009134366531681029, "grad_norm": 0.6841548888369027, "learning_rate": 4.999996816315647e-06, "loss": 0.765, "step": 150 }, { "epoch": 0.009195262308558902, "grad_norm": 0.738456866140826, "learning_rate": 4.999996687694827e-06, "loss": 0.7103, "step": 151 }, { "epoch": 0.009256158085436774, "grad_norm": 0.6700083821557311, "learning_rate": 4.999996556527062e-06, "loss": 0.7403, "step": 152 }, { "epoch": 0.009317053862314649, "grad_norm": 0.7300100644303995, "learning_rate": 4.999996422812354e-06, "loss": 0.7786, "step": 153 }, { "epoch": 0.009377949639192522, "grad_norm": 0.7012301576521878, "learning_rate": 4.999996286550701e-06, "loss": 0.7361, "step": 154 }, { "epoch": 0.009438845416070395, "grad_norm": 0.6559976995671444, "learning_rate": 4.9999961477421034e-06, "loss": 0.7412, "step": 155 }, { "epoch": 0.00949974119294827, "grad_norm": 0.6703323494691581, "learning_rate": 4.999996006386563e-06, "loss": 0.7617, "step": 156 }, { "epoch": 0.009560636969826142, "grad_norm": 0.7587015195055649, "learning_rate": 4.999995862484077e-06, "loss": 0.7329, "step": 157 }, { "epoch": 0.009621532746704017, "grad_norm": 0.7411009769887599, "learning_rate": 4.9999957160346476e-06, "loss": 0.7656, "step": 158 }, { "epoch": 0.00968242852358189, "grad_norm": 0.7452934246409302, "learning_rate": 4.999995567038275e-06, "loss": 0.756, "step": 159 }, { "epoch": 0.009743324300459762, "grad_norm": 0.71031818418992, "learning_rate": 4.999995415494959e-06, "loss": 0.7395, "step": 160 }, { "epoch": 0.009804220077337637, "grad_norm": 0.7235057867877875, "learning_rate": 4.999995261404699e-06, "loss": 0.7627, "step": 161 }, { "epoch": 0.00986511585421551, "grad_norm": 0.6731178777700476, "learning_rate": 4.999995104767497e-06, "loss": 0.7601, "step": 162 }, { "epoch": 0.009926011631093384, "grad_norm": 0.7028183845454333, "learning_rate": 4.99999494558335e-06, "loss": 0.7239, "step": 163 }, { "epoch": 0.009986907407971257, "grad_norm": 0.6656216184227579, "learning_rate": 4.9999947838522615e-06, "loss": 0.7534, "step": 164 }, { "epoch": 0.01004780318484913, "grad_norm": 0.7186416210405268, "learning_rate": 4.9999946195742295e-06, "loss": 0.7387, "step": 165 }, { "epoch": 0.010108698961727005, "grad_norm": 0.6638945421808334, "learning_rate": 4.999994452749256e-06, "loss": 0.7372, "step": 166 }, { "epoch": 0.010169594738604877, "grad_norm": 0.6927389901084501, "learning_rate": 4.99999428337734e-06, "loss": 0.7426, "step": 167 }, { "epoch": 0.010230490515482752, "grad_norm": 0.6661595960886136, "learning_rate": 4.999994111458481e-06, "loss": 0.6729, "step": 168 }, { "epoch": 0.010291386292360625, "grad_norm": 0.6830685353785078, "learning_rate": 4.999993936992681e-06, "loss": 0.746, "step": 169 }, { "epoch": 0.010352282069238498, "grad_norm": 0.7088937701487941, "learning_rate": 4.999993759979938e-06, "loss": 0.7377, "step": 170 }, { "epoch": 0.010413177846116372, "grad_norm": 0.7365563605281285, "learning_rate": 4.999993580420254e-06, "loss": 0.6641, "step": 171 }, { "epoch": 0.010474073622994245, "grad_norm": 0.7002862888633864, "learning_rate": 4.999993398313629e-06, "loss": 0.7457, "step": 172 }, { "epoch": 0.01053496939987212, "grad_norm": 0.7055463561808132, "learning_rate": 4.999993213660062e-06, "loss": 0.7409, "step": 173 }, { "epoch": 0.010595865176749993, "grad_norm": 0.7268351427171649, "learning_rate": 4.999993026459553e-06, "loss": 0.7227, "step": 174 }, { "epoch": 0.010656760953627865, "grad_norm": 0.6972055186960928, "learning_rate": 4.999992836712104e-06, "loss": 0.7249, "step": 175 }, { "epoch": 0.01071765673050574, "grad_norm": 0.7284296199886579, "learning_rate": 4.999992644417715e-06, "loss": 0.7425, "step": 176 }, { "epoch": 0.010778552507383613, "grad_norm": 0.7450000144631348, "learning_rate": 4.999992449576384e-06, "loss": 0.6667, "step": 177 }, { "epoch": 0.010839448284261486, "grad_norm": 0.6702586477860759, "learning_rate": 4.999992252188114e-06, "loss": 0.737, "step": 178 }, { "epoch": 0.01090034406113936, "grad_norm": 0.7276886097678802, "learning_rate": 4.999992052252904e-06, "loss": 0.6886, "step": 179 }, { "epoch": 0.010961239838017233, "grad_norm": 0.7225606522968886, "learning_rate": 4.9999918497707525e-06, "loss": 0.7279, "step": 180 }, { "epoch": 0.011022135614895108, "grad_norm": 0.6800258954571242, "learning_rate": 4.999991644741662e-06, "loss": 0.6573, "step": 181 }, { "epoch": 0.01108303139177298, "grad_norm": 0.7479304269240068, "learning_rate": 4.999991437165632e-06, "loss": 0.7354, "step": 182 }, { "epoch": 0.011143927168650853, "grad_norm": 0.7468248698179674, "learning_rate": 4.999991227042663e-06, "loss": 0.7741, "step": 183 }, { "epoch": 0.011204822945528728, "grad_norm": 0.7381839443912191, "learning_rate": 4.999991014372755e-06, "loss": 0.7812, "step": 184 }, { "epoch": 0.0112657187224066, "grad_norm": 0.7351804188410662, "learning_rate": 4.999990799155908e-06, "loss": 0.7668, "step": 185 }, { "epoch": 0.011326614499284475, "grad_norm": 0.7128303002347449, "learning_rate": 4.999990581392122e-06, "loss": 0.7439, "step": 186 }, { "epoch": 0.011387510276162348, "grad_norm": 0.7192879692104173, "learning_rate": 4.999990361081398e-06, "loss": 0.7388, "step": 187 }, { "epoch": 0.011448406053040221, "grad_norm": 0.7368820906153425, "learning_rate": 4.999990138223736e-06, "loss": 0.7213, "step": 188 }, { "epoch": 0.011509301829918096, "grad_norm": 0.735237081482363, "learning_rate": 4.999989912819135e-06, "loss": 0.7792, "step": 189 }, { "epoch": 0.011570197606795968, "grad_norm": 0.7626984494359809, "learning_rate": 4.999989684867598e-06, "loss": 0.7494, "step": 190 }, { "epoch": 0.011631093383673843, "grad_norm": 0.7559286289168233, "learning_rate": 4.999989454369122e-06, "loss": 0.7788, "step": 191 }, { "epoch": 0.011691989160551716, "grad_norm": 0.774095413744315, "learning_rate": 4.999989221323709e-06, "loss": 0.7185, "step": 192 }, { "epoch": 0.011752884937429589, "grad_norm": 0.6982704051138752, "learning_rate": 4.9999889857313585e-06, "loss": 0.7331, "step": 193 }, { "epoch": 0.011813780714307463, "grad_norm": 0.6979326595216816, "learning_rate": 4.999988747592073e-06, "loss": 0.7229, "step": 194 }, { "epoch": 0.011874676491185336, "grad_norm": 0.7532191780884344, "learning_rate": 4.999988506905849e-06, "loss": 0.7282, "step": 195 }, { "epoch": 0.011935572268063209, "grad_norm": 0.6907249300335847, "learning_rate": 4.99998826367269e-06, "loss": 0.7211, "step": 196 }, { "epoch": 0.011996468044941084, "grad_norm": 0.7517778917690918, "learning_rate": 4.9999880178925945e-06, "loss": 0.7515, "step": 197 }, { "epoch": 0.012057363821818956, "grad_norm": 0.7176994339981974, "learning_rate": 4.999987769565563e-06, "loss": 0.7661, "step": 198 }, { "epoch": 0.012118259598696831, "grad_norm": 0.7295939588365645, "learning_rate": 4.999987518691595e-06, "loss": 0.7175, "step": 199 }, { "epoch": 0.012179155375574704, "grad_norm": 0.7999127501197879, "learning_rate": 4.9999872652706925e-06, "loss": 0.7044, "step": 200 }, { "epoch": 0.012240051152452577, "grad_norm": 0.6922281983934921, "learning_rate": 4.999987009302856e-06, "loss": 0.7247, "step": 201 }, { "epoch": 0.012300946929330451, "grad_norm": 0.7079161306807291, "learning_rate": 4.999986750788084e-06, "loss": 0.6779, "step": 202 }, { "epoch": 0.012361842706208324, "grad_norm": 0.6801442855984485, "learning_rate": 4.9999864897263765e-06, "loss": 0.6863, "step": 203 }, { "epoch": 0.012422738483086199, "grad_norm": 0.7283869699903739, "learning_rate": 4.999986226117735e-06, "loss": 0.7491, "step": 204 }, { "epoch": 0.012483634259964072, "grad_norm": 0.7362100167076587, "learning_rate": 4.999985959962161e-06, "loss": 0.6756, "step": 205 }, { "epoch": 0.012544530036841944, "grad_norm": 0.7206450618922078, "learning_rate": 4.999985691259652e-06, "loss": 0.7533, "step": 206 }, { "epoch": 0.012605425813719819, "grad_norm": 0.7939605905912132, "learning_rate": 4.99998542001021e-06, "loss": 0.7536, "step": 207 }, { "epoch": 0.012666321590597692, "grad_norm": 0.7820908083479423, "learning_rate": 4.999985146213835e-06, "loss": 0.7388, "step": 208 }, { "epoch": 0.012727217367475566, "grad_norm": 0.7637260642072888, "learning_rate": 4.999984869870527e-06, "loss": 0.7277, "step": 209 }, { "epoch": 0.01278811314435344, "grad_norm": 0.6837686847407585, "learning_rate": 4.999984590980285e-06, "loss": 0.7043, "step": 210 }, { "epoch": 0.012849008921231312, "grad_norm": 0.7541234576238729, "learning_rate": 4.999984309543113e-06, "loss": 0.7302, "step": 211 }, { "epoch": 0.012909904698109187, "grad_norm": 0.7293000900276323, "learning_rate": 4.999984025559007e-06, "loss": 0.7178, "step": 212 }, { "epoch": 0.01297080047498706, "grad_norm": 0.7724389324980305, "learning_rate": 4.999983739027971e-06, "loss": 0.6974, "step": 213 }, { "epoch": 0.013031696251864934, "grad_norm": 0.7679095595828379, "learning_rate": 4.9999834499500025e-06, "loss": 0.7225, "step": 214 }, { "epoch": 0.013092592028742807, "grad_norm": 0.763849029802229, "learning_rate": 4.999983158325103e-06, "loss": 0.7417, "step": 215 }, { "epoch": 0.01315348780562068, "grad_norm": 0.7896744330689136, "learning_rate": 4.999982864153272e-06, "loss": 0.6555, "step": 216 }, { "epoch": 0.013214383582498554, "grad_norm": 0.7518091043051811, "learning_rate": 4.999982567434511e-06, "loss": 0.6608, "step": 217 }, { "epoch": 0.013275279359376427, "grad_norm": 0.7217134085677327, "learning_rate": 4.99998226816882e-06, "loss": 0.6772, "step": 218 }, { "epoch": 0.0133361751362543, "grad_norm": 0.7660022936244933, "learning_rate": 4.999981966356199e-06, "loss": 0.7223, "step": 219 }, { "epoch": 0.013397070913132175, "grad_norm": 0.8043673906066173, "learning_rate": 4.999981661996648e-06, "loss": 0.6696, "step": 220 }, { "epoch": 0.013457966690010047, "grad_norm": 0.7726735235289754, "learning_rate": 4.999981355090167e-06, "loss": 0.7308, "step": 221 }, { "epoch": 0.013518862466887922, "grad_norm": 0.751614647030011, "learning_rate": 4.999981045636758e-06, "loss": 0.6988, "step": 222 }, { "epoch": 0.013579758243765795, "grad_norm": 0.7813709504981456, "learning_rate": 4.999980733636421e-06, "loss": 0.7198, "step": 223 }, { "epoch": 0.013640654020643668, "grad_norm": 0.778072375539278, "learning_rate": 4.999980419089154e-06, "loss": 0.7183, "step": 224 }, { "epoch": 0.013701549797521542, "grad_norm": 0.707481014697059, "learning_rate": 4.99998010199496e-06, "loss": 0.7296, "step": 225 }, { "epoch": 0.013762445574399415, "grad_norm": 0.6722953148537729, "learning_rate": 4.999979782353838e-06, "loss": 0.7127, "step": 226 }, { "epoch": 0.01382334135127729, "grad_norm": 0.7562454337840977, "learning_rate": 4.999979460165788e-06, "loss": 0.7345, "step": 227 }, { "epoch": 0.013884237128155162, "grad_norm": 0.8015498144333107, "learning_rate": 4.999979135430812e-06, "loss": 0.6884, "step": 228 }, { "epoch": 0.013945132905033035, "grad_norm": 0.7689328295369451, "learning_rate": 4.999978808148908e-06, "loss": 0.7084, "step": 229 }, { "epoch": 0.01400602868191091, "grad_norm": 0.7330568283569818, "learning_rate": 4.999978478320078e-06, "loss": 0.6386, "step": 230 }, { "epoch": 0.014066924458788783, "grad_norm": 0.7853218550645573, "learning_rate": 4.999978145944322e-06, "loss": 0.711, "step": 231 }, { "epoch": 0.014127820235666657, "grad_norm": 0.7928018634524029, "learning_rate": 4.9999778110216405e-06, "loss": 0.6643, "step": 232 }, { "epoch": 0.01418871601254453, "grad_norm": 0.8175395654488663, "learning_rate": 4.999977473552034e-06, "loss": 0.6837, "step": 233 }, { "epoch": 0.014249611789422403, "grad_norm": 0.7349920139248245, "learning_rate": 4.9999771335355005e-06, "loss": 0.7012, "step": 234 }, { "epoch": 0.014310507566300278, "grad_norm": 0.7013419658758762, "learning_rate": 4.999976790972044e-06, "loss": 0.6687, "step": 235 }, { "epoch": 0.01437140334317815, "grad_norm": 0.7258687937921421, "learning_rate": 4.9999764458616625e-06, "loss": 0.6783, "step": 236 }, { "epoch": 0.014432299120056023, "grad_norm": 0.7941293289605419, "learning_rate": 4.999976098204358e-06, "loss": 0.7409, "step": 237 }, { "epoch": 0.014493194896933898, "grad_norm": 0.7322461764813147, "learning_rate": 4.999975748000129e-06, "loss": 0.6797, "step": 238 }, { "epoch": 0.01455409067381177, "grad_norm": 0.6994558154611148, "learning_rate": 4.999975395248976e-06, "loss": 0.7129, "step": 239 }, { "epoch": 0.014614986450689645, "grad_norm": 0.7468726168659512, "learning_rate": 4.9999750399509005e-06, "loss": 0.6933, "step": 240 }, { "epoch": 0.014675882227567518, "grad_norm": 0.7246430714836691, "learning_rate": 4.999974682105903e-06, "loss": 0.7282, "step": 241 }, { "epoch": 0.014736778004445391, "grad_norm": 0.778010169433594, "learning_rate": 4.999974321713983e-06, "loss": 0.6623, "step": 242 }, { "epoch": 0.014797673781323266, "grad_norm": 0.7264859630625451, "learning_rate": 4.99997395877514e-06, "loss": 0.6392, "step": 243 }, { "epoch": 0.014858569558201138, "grad_norm": 0.7953761232979301, "learning_rate": 4.999973593289378e-06, "loss": 0.6854, "step": 244 }, { "epoch": 0.014919465335079013, "grad_norm": 0.8169468837231547, "learning_rate": 4.999973225256693e-06, "loss": 0.7259, "step": 245 }, { "epoch": 0.014980361111956886, "grad_norm": 0.792524461048797, "learning_rate": 4.999972854677087e-06, "loss": 0.7046, "step": 246 }, { "epoch": 0.015041256888834759, "grad_norm": 0.7940635197160933, "learning_rate": 4.999972481550562e-06, "loss": 0.7874, "step": 247 }, { "epoch": 0.015102152665712633, "grad_norm": 0.7311284478399898, "learning_rate": 4.9999721058771165e-06, "loss": 0.6631, "step": 248 }, { "epoch": 0.015163048442590506, "grad_norm": 0.7400526588256793, "learning_rate": 4.99997172765675e-06, "loss": 0.6494, "step": 249 }, { "epoch": 0.01522394421946838, "grad_norm": 0.796559565322799, "learning_rate": 4.999971346889466e-06, "loss": 0.6853, "step": 250 }, { "epoch": 0.015284839996346253, "grad_norm": 0.7719225920603545, "learning_rate": 4.999970963575263e-06, "loss": 0.6911, "step": 251 }, { "epoch": 0.015345735773224126, "grad_norm": 0.8260605781988979, "learning_rate": 4.999970577714141e-06, "loss": 0.7203, "step": 252 }, { "epoch": 0.015406631550102001, "grad_norm": 0.7720637369342904, "learning_rate": 4.999970189306101e-06, "loss": 0.654, "step": 253 }, { "epoch": 0.015467527326979874, "grad_norm": 0.7839169050574772, "learning_rate": 4.999969798351144e-06, "loss": 0.6919, "step": 254 }, { "epoch": 0.015528423103857748, "grad_norm": 0.8148309106221141, "learning_rate": 4.999969404849268e-06, "loss": 0.6542, "step": 255 }, { "epoch": 0.015589318880735621, "grad_norm": 0.7853665700861517, "learning_rate": 4.999969008800477e-06, "loss": 0.7079, "step": 256 }, { "epoch": 0.015650214657613494, "grad_norm": 0.744231971596786, "learning_rate": 4.999968610204769e-06, "loss": 0.6509, "step": 257 }, { "epoch": 0.015711110434491367, "grad_norm": 0.855620516744312, "learning_rate": 4.999968209062145e-06, "loss": 0.6607, "step": 258 }, { "epoch": 0.015772006211369243, "grad_norm": 0.7339662937019528, "learning_rate": 4.999967805372605e-06, "loss": 0.6427, "step": 259 }, { "epoch": 0.015832901988247116, "grad_norm": 0.738609131906306, "learning_rate": 4.999967399136151e-06, "loss": 0.6819, "step": 260 }, { "epoch": 0.01589379776512499, "grad_norm": 0.7867287424343871, "learning_rate": 4.99996699035278e-06, "loss": 0.6494, "step": 261 }, { "epoch": 0.01595469354200286, "grad_norm": 0.7354194659393885, "learning_rate": 4.999966579022497e-06, "loss": 0.6913, "step": 262 }, { "epoch": 0.016015589318880735, "grad_norm": 0.7819926810876368, "learning_rate": 4.999966165145298e-06, "loss": 0.7125, "step": 263 }, { "epoch": 0.01607648509575861, "grad_norm": 0.7054957363647959, "learning_rate": 4.999965748721187e-06, "loss": 0.6564, "step": 264 }, { "epoch": 0.016137380872636484, "grad_norm": 0.7217517075926269, "learning_rate": 4.9999653297501615e-06, "loss": 0.66, "step": 265 }, { "epoch": 0.016198276649514357, "grad_norm": 0.7094708912526655, "learning_rate": 4.999964908232224e-06, "loss": 0.6605, "step": 266 }, { "epoch": 0.01625917242639223, "grad_norm": 0.7163070332356367, "learning_rate": 4.9999644841673745e-06, "loss": 0.6842, "step": 267 }, { "epoch": 0.016320068203270102, "grad_norm": 0.7790964287377811, "learning_rate": 4.999964057555613e-06, "loss": 0.7121, "step": 268 }, { "epoch": 0.016380963980147975, "grad_norm": 0.8366744934298782, "learning_rate": 4.99996362839694e-06, "loss": 0.749, "step": 269 }, { "epoch": 0.01644185975702585, "grad_norm": 0.7626826126815852, "learning_rate": 4.999963196691355e-06, "loss": 0.6751, "step": 270 }, { "epoch": 0.016502755533903724, "grad_norm": 0.7951658830117461, "learning_rate": 4.999962762438861e-06, "loss": 0.6711, "step": 271 }, { "epoch": 0.016563651310781597, "grad_norm": 0.7309595516242022, "learning_rate": 4.9999623256394565e-06, "loss": 0.7304, "step": 272 }, { "epoch": 0.01662454708765947, "grad_norm": 0.7608699663306677, "learning_rate": 4.9999618862931424e-06, "loss": 0.7223, "step": 273 }, { "epoch": 0.016685442864537343, "grad_norm": 0.804576193407754, "learning_rate": 4.999961444399919e-06, "loss": 0.6703, "step": 274 }, { "epoch": 0.01674633864141522, "grad_norm": 0.8268714208076846, "learning_rate": 4.999960999959787e-06, "loss": 0.6865, "step": 275 }, { "epoch": 0.016807234418293092, "grad_norm": 0.7632156726774376, "learning_rate": 4.999960552972746e-06, "loss": 0.6993, "step": 276 }, { "epoch": 0.016868130195170965, "grad_norm": 0.7542677251057993, "learning_rate": 4.999960103438798e-06, "loss": 0.6792, "step": 277 }, { "epoch": 0.016929025972048838, "grad_norm": 0.8387648177951366, "learning_rate": 4.9999596513579416e-06, "loss": 0.6851, "step": 278 }, { "epoch": 0.01698992174892671, "grad_norm": 0.694673133486192, "learning_rate": 4.99995919673018e-06, "loss": 0.6624, "step": 279 }, { "epoch": 0.017050817525804587, "grad_norm": 0.8133784896972817, "learning_rate": 4.99995873955551e-06, "loss": 0.6576, "step": 280 }, { "epoch": 0.01711171330268246, "grad_norm": 0.8273860908783668, "learning_rate": 4.999958279833936e-06, "loss": 0.6924, "step": 281 }, { "epoch": 0.017172609079560332, "grad_norm": 0.8277678773537923, "learning_rate": 4.999957817565455e-06, "loss": 0.7333, "step": 282 }, { "epoch": 0.017233504856438205, "grad_norm": 0.8328156500701911, "learning_rate": 4.999957352750069e-06, "loss": 0.6616, "step": 283 }, { "epoch": 0.017294400633316078, "grad_norm": 0.7669797247375859, "learning_rate": 4.999956885387779e-06, "loss": 0.691, "step": 284 }, { "epoch": 0.017355296410193954, "grad_norm": 0.7933377384491528, "learning_rate": 4.999956415478584e-06, "loss": 0.6987, "step": 285 }, { "epoch": 0.017416192187071827, "grad_norm": 0.8116884979172979, "learning_rate": 4.9999559430224865e-06, "loss": 0.6351, "step": 286 }, { "epoch": 0.0174770879639497, "grad_norm": 0.7843909400630843, "learning_rate": 4.999955468019485e-06, "loss": 0.7254, "step": 287 }, { "epoch": 0.017537983740827573, "grad_norm": 0.755313476991527, "learning_rate": 4.9999549904695815e-06, "loss": 0.7352, "step": 288 }, { "epoch": 0.017598879517705446, "grad_norm": 0.7083760414508964, "learning_rate": 4.999954510372776e-06, "loss": 0.702, "step": 289 }, { "epoch": 0.017659775294583322, "grad_norm": 0.7830708445309442, "learning_rate": 4.999954027729068e-06, "loss": 0.637, "step": 290 }, { "epoch": 0.017720671071461195, "grad_norm": 0.8026591907283346, "learning_rate": 4.9999535425384595e-06, "loss": 0.6624, "step": 291 }, { "epoch": 0.017781566848339068, "grad_norm": 0.7398162509596033, "learning_rate": 4.99995305480095e-06, "loss": 0.6971, "step": 292 }, { "epoch": 0.01784246262521694, "grad_norm": 0.849608682059187, "learning_rate": 4.99995256451654e-06, "loss": 0.7151, "step": 293 }, { "epoch": 0.017903358402094813, "grad_norm": 0.8078020917319928, "learning_rate": 4.999952071685231e-06, "loss": 0.7479, "step": 294 }, { "epoch": 0.01796425417897269, "grad_norm": 0.7747287091256054, "learning_rate": 4.9999515763070224e-06, "loss": 0.705, "step": 295 }, { "epoch": 0.018025149955850563, "grad_norm": 0.7507041980159189, "learning_rate": 4.999951078381915e-06, "loss": 0.6359, "step": 296 }, { "epoch": 0.018086045732728435, "grad_norm": 0.8154290929181294, "learning_rate": 4.99995057790991e-06, "loss": 0.7149, "step": 297 }, { "epoch": 0.01814694150960631, "grad_norm": 0.7616529413229899, "learning_rate": 4.999950074891007e-06, "loss": 0.6635, "step": 298 }, { "epoch": 0.01820783728648418, "grad_norm": 0.7770854590693149, "learning_rate": 4.999949569325206e-06, "loss": 0.6445, "step": 299 }, { "epoch": 0.018268733063362057, "grad_norm": 0.7794103786448381, "learning_rate": 4.999949061212509e-06, "loss": 0.7052, "step": 300 }, { "epoch": 0.01832962884023993, "grad_norm": 0.7067674189412413, "learning_rate": 4.999948550552916e-06, "loss": 0.6475, "step": 301 }, { "epoch": 0.018390524617117803, "grad_norm": 0.7642864830013847, "learning_rate": 4.999948037346428e-06, "loss": 0.6796, "step": 302 }, { "epoch": 0.018451420393995676, "grad_norm": 0.7693880119536499, "learning_rate": 4.9999475215930434e-06, "loss": 0.6664, "step": 303 }, { "epoch": 0.01851231617087355, "grad_norm": 0.7728419105665237, "learning_rate": 4.999947003292766e-06, "loss": 0.6919, "step": 304 }, { "epoch": 0.018573211947751425, "grad_norm": 0.7535372175710863, "learning_rate": 4.9999464824455936e-06, "loss": 0.6505, "step": 305 }, { "epoch": 0.018634107724629298, "grad_norm": 0.8302332353833313, "learning_rate": 4.999945959051527e-06, "loss": 0.6686, "step": 306 }, { "epoch": 0.01869500350150717, "grad_norm": 0.8065572425218912, "learning_rate": 4.999945433110569e-06, "loss": 0.6692, "step": 307 }, { "epoch": 0.018755899278385044, "grad_norm": 0.7452052857864022, "learning_rate": 4.999944904622718e-06, "loss": 0.6713, "step": 308 }, { "epoch": 0.018816795055262917, "grad_norm": 0.8100509913545736, "learning_rate": 4.999944373587974e-06, "loss": 0.6211, "step": 309 }, { "epoch": 0.01887769083214079, "grad_norm": 0.8381308714422074, "learning_rate": 4.99994384000634e-06, "loss": 0.6827, "step": 310 }, { "epoch": 0.018938586609018666, "grad_norm": 0.7948504658516677, "learning_rate": 4.999943303877814e-06, "loss": 0.7021, "step": 311 }, { "epoch": 0.01899948238589654, "grad_norm": 0.7992771295231328, "learning_rate": 4.999942765202399e-06, "loss": 0.693, "step": 312 }, { "epoch": 0.01906037816277441, "grad_norm": 0.7737377698601231, "learning_rate": 4.999942223980094e-06, "loss": 0.708, "step": 313 }, { "epoch": 0.019121273939652284, "grad_norm": 0.7643948245435148, "learning_rate": 4.999941680210899e-06, "loss": 0.701, "step": 314 }, { "epoch": 0.019182169716530157, "grad_norm": 0.8202366713481364, "learning_rate": 4.999941133894816e-06, "loss": 0.6673, "step": 315 }, { "epoch": 0.019243065493408033, "grad_norm": 0.7493324617677929, "learning_rate": 4.999940585031845e-06, "loss": 0.6509, "step": 316 }, { "epoch": 0.019303961270285906, "grad_norm": 0.7397035566441604, "learning_rate": 4.9999400336219865e-06, "loss": 0.6529, "step": 317 }, { "epoch": 0.01936485704716378, "grad_norm": 0.8067959666796467, "learning_rate": 4.999939479665241e-06, "loss": 0.7073, "step": 318 }, { "epoch": 0.019425752824041652, "grad_norm": 0.8446904156112273, "learning_rate": 4.999938923161609e-06, "loss": 0.6674, "step": 319 }, { "epoch": 0.019486648600919525, "grad_norm": 0.8248495149429563, "learning_rate": 4.999938364111092e-06, "loss": 0.6915, "step": 320 }, { "epoch": 0.0195475443777974, "grad_norm": 0.8457527035021967, "learning_rate": 4.999937802513689e-06, "loss": 0.6336, "step": 321 }, { "epoch": 0.019608440154675274, "grad_norm": 0.9102832931052375, "learning_rate": 4.999937238369401e-06, "loss": 0.6504, "step": 322 }, { "epoch": 0.019669335931553147, "grad_norm": 0.8723419850799531, "learning_rate": 4.999936671678229e-06, "loss": 0.6275, "step": 323 }, { "epoch": 0.01973023170843102, "grad_norm": 0.8620196142082291, "learning_rate": 4.999936102440174e-06, "loss": 0.6495, "step": 324 }, { "epoch": 0.019791127485308892, "grad_norm": 0.7989741024823176, "learning_rate": 4.999935530655237e-06, "loss": 0.6399, "step": 325 }, { "epoch": 0.01985202326218677, "grad_norm": 0.7950548359449368, "learning_rate": 4.999934956323415e-06, "loss": 0.6364, "step": 326 }, { "epoch": 0.01991291903906464, "grad_norm": 0.8332394553546462, "learning_rate": 4.999934379444714e-06, "loss": 0.6533, "step": 327 }, { "epoch": 0.019973814815942514, "grad_norm": 0.8578002368884328, "learning_rate": 4.99993380001913e-06, "loss": 0.7001, "step": 328 }, { "epoch": 0.020034710592820387, "grad_norm": 0.8110497984296026, "learning_rate": 4.999933218046667e-06, "loss": 0.6798, "step": 329 }, { "epoch": 0.02009560636969826, "grad_norm": 0.9201520261127892, "learning_rate": 4.999932633527322e-06, "loss": 0.6805, "step": 330 }, { "epoch": 0.020156502146576136, "grad_norm": 0.8504251932566226, "learning_rate": 4.999932046461099e-06, "loss": 0.6866, "step": 331 }, { "epoch": 0.02021739792345401, "grad_norm": 0.7991206341289229, "learning_rate": 4.999931456847998e-06, "loss": 0.6775, "step": 332 }, { "epoch": 0.020278293700331882, "grad_norm": 0.8247108538023933, "learning_rate": 4.999930864688016e-06, "loss": 0.7175, "step": 333 }, { "epoch": 0.020339189477209755, "grad_norm": 0.7772316142576166, "learning_rate": 4.9999302699811595e-06, "loss": 0.6883, "step": 334 }, { "epoch": 0.020400085254087628, "grad_norm": 0.8188218511497163, "learning_rate": 4.999929672727424e-06, "loss": 0.6635, "step": 335 }, { "epoch": 0.020460981030965504, "grad_norm": 0.8352994588630622, "learning_rate": 4.999929072926812e-06, "loss": 0.6332, "step": 336 }, { "epoch": 0.020521876807843377, "grad_norm": 0.7869442911377629, "learning_rate": 4.999928470579326e-06, "loss": 0.689, "step": 337 }, { "epoch": 0.02058277258472125, "grad_norm": 0.8153634551720391, "learning_rate": 4.999927865684963e-06, "loss": 0.6781, "step": 338 }, { "epoch": 0.020643668361599123, "grad_norm": 0.7593974657912167, "learning_rate": 4.999927258243727e-06, "loss": 0.6473, "step": 339 }, { "epoch": 0.020704564138476995, "grad_norm": 0.8222330936101717, "learning_rate": 4.999926648255616e-06, "loss": 0.6833, "step": 340 }, { "epoch": 0.020765459915354872, "grad_norm": 0.7880456730025663, "learning_rate": 4.999926035720633e-06, "loss": 0.6331, "step": 341 }, { "epoch": 0.020826355692232745, "grad_norm": 0.8423418522027794, "learning_rate": 4.999925420638776e-06, "loss": 0.6695, "step": 342 }, { "epoch": 0.020887251469110617, "grad_norm": 0.8273999818990211, "learning_rate": 4.999924803010047e-06, "loss": 0.6733, "step": 343 }, { "epoch": 0.02094814724598849, "grad_norm": 0.8068359260397066, "learning_rate": 4.999924182834447e-06, "loss": 0.6131, "step": 344 }, { "epoch": 0.021009043022866363, "grad_norm": 0.8444209022615488, "learning_rate": 4.999923560111977e-06, "loss": 0.6655, "step": 345 }, { "epoch": 0.02106993879974424, "grad_norm": 0.7678009798881188, "learning_rate": 4.999922934842636e-06, "loss": 0.6314, "step": 346 }, { "epoch": 0.021130834576622112, "grad_norm": 0.7838240410282088, "learning_rate": 4.999922307026425e-06, "loss": 0.6676, "step": 347 }, { "epoch": 0.021191730353499985, "grad_norm": 0.8078629063928293, "learning_rate": 4.999921676663345e-06, "loss": 0.7461, "step": 348 }, { "epoch": 0.021252626130377858, "grad_norm": 0.7668198901833109, "learning_rate": 4.999921043753398e-06, "loss": 0.6222, "step": 349 }, { "epoch": 0.02131352190725573, "grad_norm": 0.9078761397001247, "learning_rate": 4.999920408296582e-06, "loss": 0.6603, "step": 350 }, { "epoch": 0.021374417684133604, "grad_norm": 0.7818911046746919, "learning_rate": 4.9999197702929e-06, "loss": 0.668, "step": 351 }, { "epoch": 0.02143531346101148, "grad_norm": 0.8191784430566436, "learning_rate": 4.999919129742352e-06, "loss": 0.6599, "step": 352 }, { "epoch": 0.021496209237889353, "grad_norm": 0.783071107294683, "learning_rate": 4.999918486644938e-06, "loss": 0.6581, "step": 353 }, { "epoch": 0.021557105014767226, "grad_norm": 0.7517410234914055, "learning_rate": 4.999917841000659e-06, "loss": 0.6571, "step": 354 }, { "epoch": 0.0216180007916451, "grad_norm": 0.7625094896728111, "learning_rate": 4.999917192809516e-06, "loss": 0.6256, "step": 355 }, { "epoch": 0.02167889656852297, "grad_norm": 0.8096196240480884, "learning_rate": 4.999916542071509e-06, "loss": 0.64, "step": 356 }, { "epoch": 0.021739792345400848, "grad_norm": 0.8479246369556783, "learning_rate": 4.999915888786639e-06, "loss": 0.706, "step": 357 }, { "epoch": 0.02180068812227872, "grad_norm": 0.8228950423793091, "learning_rate": 4.999915232954906e-06, "loss": 0.6632, "step": 358 }, { "epoch": 0.021861583899156593, "grad_norm": 0.7975163297341146, "learning_rate": 4.999914574576313e-06, "loss": 0.5989, "step": 359 }, { "epoch": 0.021922479676034466, "grad_norm": 0.8884259076325945, "learning_rate": 4.999913913650858e-06, "loss": 0.674, "step": 360 }, { "epoch": 0.02198337545291234, "grad_norm": 0.8486122524768349, "learning_rate": 4.9999132501785424e-06, "loss": 0.7057, "step": 361 }, { "epoch": 0.022044271229790215, "grad_norm": 0.9128520789758711, "learning_rate": 4.999912584159368e-06, "loss": 0.6989, "step": 362 }, { "epoch": 0.022105167006668088, "grad_norm": 0.7805790238367319, "learning_rate": 4.999911915593334e-06, "loss": 0.5996, "step": 363 }, { "epoch": 0.02216606278354596, "grad_norm": 0.7661732478003527, "learning_rate": 4.999911244480441e-06, "loss": 0.6946, "step": 364 }, { "epoch": 0.022226958560423834, "grad_norm": 0.7783461548955787, "learning_rate": 4.999910570820692e-06, "loss": 0.6234, "step": 365 }, { "epoch": 0.022287854337301707, "grad_norm": 0.8039747208398055, "learning_rate": 4.999909894614085e-06, "loss": 0.6623, "step": 366 }, { "epoch": 0.022348750114179583, "grad_norm": 0.8185305763492312, "learning_rate": 4.999909215860622e-06, "loss": 0.6372, "step": 367 }, { "epoch": 0.022409645891057456, "grad_norm": 0.8271119318617564, "learning_rate": 4.999908534560304e-06, "loss": 0.6636, "step": 368 }, { "epoch": 0.02247054166793533, "grad_norm": 0.809494333885654, "learning_rate": 4.99990785071313e-06, "loss": 0.6382, "step": 369 }, { "epoch": 0.0225314374448132, "grad_norm": 0.7639997683021247, "learning_rate": 4.999907164319103e-06, "loss": 0.6393, "step": 370 }, { "epoch": 0.022592333221691074, "grad_norm": 0.965544178297169, "learning_rate": 4.999906475378222e-06, "loss": 0.6704, "step": 371 }, { "epoch": 0.02265322899856895, "grad_norm": 0.8368325157524191, "learning_rate": 4.999905783890489e-06, "loss": 0.6653, "step": 372 }, { "epoch": 0.022714124775446824, "grad_norm": 0.8580585134740449, "learning_rate": 4.9999050898559026e-06, "loss": 0.6906, "step": 373 }, { "epoch": 0.022775020552324696, "grad_norm": 0.8864382843941513, "learning_rate": 4.999904393274465e-06, "loss": 0.6924, "step": 374 }, { "epoch": 0.02283591632920257, "grad_norm": 0.8214585001478643, "learning_rate": 4.999903694146178e-06, "loss": 0.6324, "step": 375 }, { "epoch": 0.022896812106080442, "grad_norm": 0.8003680388857501, "learning_rate": 4.999902992471041e-06, "loss": 0.6034, "step": 376 }, { "epoch": 0.02295770788295832, "grad_norm": 0.8965912704506847, "learning_rate": 4.999902288249053e-06, "loss": 0.6284, "step": 377 }, { "epoch": 0.02301860365983619, "grad_norm": 0.8166609665623451, "learning_rate": 4.9999015814802184e-06, "loss": 0.6312, "step": 378 }, { "epoch": 0.023079499436714064, "grad_norm": 0.8542409224508395, "learning_rate": 4.999900872164536e-06, "loss": 0.6432, "step": 379 }, { "epoch": 0.023140395213591937, "grad_norm": 0.77172810509425, "learning_rate": 4.999900160302006e-06, "loss": 0.6586, "step": 380 }, { "epoch": 0.02320129099046981, "grad_norm": 0.8023244526896741, "learning_rate": 4.999899445892629e-06, "loss": 0.6139, "step": 381 }, { "epoch": 0.023262186767347686, "grad_norm": 0.8375708187024465, "learning_rate": 4.9998987289364074e-06, "loss": 0.674, "step": 382 }, { "epoch": 0.02332308254422556, "grad_norm": 0.9194454460619872, "learning_rate": 4.99989800943334e-06, "loss": 0.7069, "step": 383 }, { "epoch": 0.023383978321103432, "grad_norm": 0.8709395477168814, "learning_rate": 4.999897287383429e-06, "loss": 0.6863, "step": 384 }, { "epoch": 0.023444874097981305, "grad_norm": 0.8223666780585378, "learning_rate": 4.999896562786676e-06, "loss": 0.6239, "step": 385 }, { "epoch": 0.023505769874859177, "grad_norm": 0.8496100396312322, "learning_rate": 4.999895835643078e-06, "loss": 0.6622, "step": 386 }, { "epoch": 0.023566665651737054, "grad_norm": 0.8867287690792346, "learning_rate": 4.99989510595264e-06, "loss": 0.6172, "step": 387 }, { "epoch": 0.023627561428614927, "grad_norm": 0.8100565682035575, "learning_rate": 4.99989437371536e-06, "loss": 0.6875, "step": 388 }, { "epoch": 0.0236884572054928, "grad_norm": 0.8140553860900619, "learning_rate": 4.999893638931239e-06, "loss": 0.6329, "step": 389 }, { "epoch": 0.023749352982370672, "grad_norm": 0.7897498227490352, "learning_rate": 4.999892901600279e-06, "loss": 0.6315, "step": 390 }, { "epoch": 0.023810248759248545, "grad_norm": 0.8595494455330078, "learning_rate": 4.99989216172248e-06, "loss": 0.5807, "step": 391 }, { "epoch": 0.023871144536126418, "grad_norm": 0.7760899615846001, "learning_rate": 4.9998914192978436e-06, "loss": 0.5885, "step": 392 }, { "epoch": 0.023932040313004294, "grad_norm": 0.7979232600213241, "learning_rate": 4.999890674326369e-06, "loss": 0.6807, "step": 393 }, { "epoch": 0.023992936089882167, "grad_norm": 0.8135092723190958, "learning_rate": 4.999889926808058e-06, "loss": 0.6347, "step": 394 }, { "epoch": 0.02405383186676004, "grad_norm": 0.8040599904284218, "learning_rate": 4.9998891767429105e-06, "loss": 0.651, "step": 395 }, { "epoch": 0.024114727643637913, "grad_norm": 0.8146045467916457, "learning_rate": 4.999888424130929e-06, "loss": 0.6331, "step": 396 }, { "epoch": 0.024175623420515786, "grad_norm": 0.8138900703214225, "learning_rate": 4.999887668972112e-06, "loss": 0.6434, "step": 397 }, { "epoch": 0.024236519197393662, "grad_norm": 0.8443801016204281, "learning_rate": 4.999886911266462e-06, "loss": 0.6236, "step": 398 }, { "epoch": 0.024297414974271535, "grad_norm": 0.796103696298503, "learning_rate": 4.999886151013979e-06, "loss": 0.5971, "step": 399 }, { "epoch": 0.024358310751149408, "grad_norm": 0.8408801222708111, "learning_rate": 4.9998853882146645e-06, "loss": 0.6476, "step": 400 }, { "epoch": 0.02441920652802728, "grad_norm": 0.8185994200479856, "learning_rate": 4.9998846228685185e-06, "loss": 0.613, "step": 401 }, { "epoch": 0.024480102304905153, "grad_norm": 0.8533862506865407, "learning_rate": 4.999883854975542e-06, "loss": 0.654, "step": 402 }, { "epoch": 0.02454099808178303, "grad_norm": 0.8278738369638949, "learning_rate": 4.999883084535735e-06, "loss": 0.6384, "step": 403 }, { "epoch": 0.024601893858660902, "grad_norm": 0.8493634595896167, "learning_rate": 4.999882311549099e-06, "loss": 0.636, "step": 404 }, { "epoch": 0.024662789635538775, "grad_norm": 0.7979396448016045, "learning_rate": 4.999881536015636e-06, "loss": 0.6746, "step": 405 }, { "epoch": 0.024723685412416648, "grad_norm": 0.8768286232589058, "learning_rate": 4.999880757935345e-06, "loss": 0.7483, "step": 406 }, { "epoch": 0.02478458118929452, "grad_norm": 0.8304333564660699, "learning_rate": 4.999879977308228e-06, "loss": 0.6407, "step": 407 }, { "epoch": 0.024845476966172397, "grad_norm": 0.874930872113359, "learning_rate": 4.999879194134285e-06, "loss": 0.6642, "step": 408 }, { "epoch": 0.02490637274305027, "grad_norm": 0.8517903392881333, "learning_rate": 4.999878408413517e-06, "loss": 0.6373, "step": 409 }, { "epoch": 0.024967268519928143, "grad_norm": 0.8160019892686183, "learning_rate": 4.999877620145925e-06, "loss": 0.6872, "step": 410 }, { "epoch": 0.025028164296806016, "grad_norm": 0.8174442246705625, "learning_rate": 4.999876829331509e-06, "loss": 0.5921, "step": 411 }, { "epoch": 0.02508906007368389, "grad_norm": 0.8201156842848333, "learning_rate": 4.999876035970272e-06, "loss": 0.7016, "step": 412 }, { "epoch": 0.025149955850561765, "grad_norm": 0.8449310704720301, "learning_rate": 4.999875240062212e-06, "loss": 0.6772, "step": 413 }, { "epoch": 0.025210851627439638, "grad_norm": 0.8231243081860802, "learning_rate": 4.999874441607331e-06, "loss": 0.7075, "step": 414 }, { "epoch": 0.02527174740431751, "grad_norm": 0.8324086827787579, "learning_rate": 4.9998736406056305e-06, "loss": 0.6074, "step": 415 }, { "epoch": 0.025332643181195384, "grad_norm": 0.8483743731054529, "learning_rate": 4.99987283705711e-06, "loss": 0.6476, "step": 416 }, { "epoch": 0.025393538958073256, "grad_norm": 0.8748152498162979, "learning_rate": 4.999872030961772e-06, "loss": 0.6642, "step": 417 }, { "epoch": 0.025454434734951133, "grad_norm": 0.8378792265286606, "learning_rate": 4.999871222319615e-06, "loss": 0.6274, "step": 418 }, { "epoch": 0.025515330511829006, "grad_norm": 0.8804880604185217, "learning_rate": 4.999870411130643e-06, "loss": 0.6104, "step": 419 }, { "epoch": 0.02557622628870688, "grad_norm": 0.8179281965456936, "learning_rate": 4.999869597394854e-06, "loss": 0.6362, "step": 420 }, { "epoch": 0.02563712206558475, "grad_norm": 0.9790355006042828, "learning_rate": 4.999868781112249e-06, "loss": 0.6399, "step": 421 }, { "epoch": 0.025698017842462624, "grad_norm": 0.8138044658823335, "learning_rate": 4.999867962282831e-06, "loss": 0.6422, "step": 422 }, { "epoch": 0.0257589136193405, "grad_norm": 0.9501232443853702, "learning_rate": 4.999867140906599e-06, "loss": 0.6503, "step": 423 }, { "epoch": 0.025819809396218373, "grad_norm": 0.8567002829777202, "learning_rate": 4.999866316983554e-06, "loss": 0.589, "step": 424 }, { "epoch": 0.025880705173096246, "grad_norm": 0.8524126162250004, "learning_rate": 4.999865490513698e-06, "loss": 0.6508, "step": 425 }, { "epoch": 0.02594160094997412, "grad_norm": 0.8158516658240909, "learning_rate": 4.99986466149703e-06, "loss": 0.6074, "step": 426 }, { "epoch": 0.02600249672685199, "grad_norm": 0.8554304852404819, "learning_rate": 4.9998638299335524e-06, "loss": 0.588, "step": 427 }, { "epoch": 0.026063392503729868, "grad_norm": 0.8732335864923195, "learning_rate": 4.999862995823265e-06, "loss": 0.6121, "step": 428 }, { "epoch": 0.02612428828060774, "grad_norm": 0.8625349924583123, "learning_rate": 4.99986215916617e-06, "loss": 0.6891, "step": 429 }, { "epoch": 0.026185184057485614, "grad_norm": 0.9043225624217842, "learning_rate": 4.999861319962267e-06, "loss": 0.6728, "step": 430 }, { "epoch": 0.026246079834363487, "grad_norm": 0.9233222681170974, "learning_rate": 4.999860478211558e-06, "loss": 0.6098, "step": 431 }, { "epoch": 0.02630697561124136, "grad_norm": 0.8679685840916898, "learning_rate": 4.9998596339140415e-06, "loss": 0.6997, "step": 432 }, { "epoch": 0.026367871388119232, "grad_norm": 0.8797328107786474, "learning_rate": 4.999858787069722e-06, "loss": 0.6575, "step": 433 }, { "epoch": 0.02642876716499711, "grad_norm": 0.8773337916187751, "learning_rate": 4.999857937678596e-06, "loss": 0.6885, "step": 434 }, { "epoch": 0.02648966294187498, "grad_norm": 0.8463141106416795, "learning_rate": 4.999857085740668e-06, "loss": 0.6153, "step": 435 }, { "epoch": 0.026550558718752854, "grad_norm": 0.8695931277823966, "learning_rate": 4.999856231255937e-06, "loss": 0.6126, "step": 436 }, { "epoch": 0.026611454495630727, "grad_norm": 0.8130096227868048, "learning_rate": 4.999855374224406e-06, "loss": 0.6157, "step": 437 }, { "epoch": 0.0266723502725086, "grad_norm": 0.8637057669130215, "learning_rate": 4.999854514646073e-06, "loss": 0.6509, "step": 438 }, { "epoch": 0.026733246049386476, "grad_norm": 0.8635227944655051, "learning_rate": 4.99985365252094e-06, "loss": 0.6862, "step": 439 }, { "epoch": 0.02679414182626435, "grad_norm": 0.8098887086410573, "learning_rate": 4.999852787849009e-06, "loss": 0.6531, "step": 440 }, { "epoch": 0.026855037603142222, "grad_norm": 0.8896421369930613, "learning_rate": 4.999851920630278e-06, "loss": 0.6069, "step": 441 }, { "epoch": 0.026915933380020095, "grad_norm": 0.8885332035235441, "learning_rate": 4.999851050864752e-06, "loss": 0.6384, "step": 442 }, { "epoch": 0.026976829156897968, "grad_norm": 0.819210187872491, "learning_rate": 4.999850178552429e-06, "loss": 0.5742, "step": 443 }, { "epoch": 0.027037724933775844, "grad_norm": 0.8252928496315517, "learning_rate": 4.999849303693311e-06, "loss": 0.5884, "step": 444 }, { "epoch": 0.027098620710653717, "grad_norm": 0.9372406522190535, "learning_rate": 4.999848426287398e-06, "loss": 0.6173, "step": 445 }, { "epoch": 0.02715951648753159, "grad_norm": 0.8747337376163049, "learning_rate": 4.999847546334692e-06, "loss": 0.6444, "step": 446 }, { "epoch": 0.027220412264409462, "grad_norm": 0.8660984985124811, "learning_rate": 4.999846663835193e-06, "loss": 0.592, "step": 447 }, { "epoch": 0.027281308041287335, "grad_norm": 0.8130650490781918, "learning_rate": 4.999845778788902e-06, "loss": 0.6932, "step": 448 }, { "epoch": 0.02734220381816521, "grad_norm": 0.8876907139009043, "learning_rate": 4.99984489119582e-06, "loss": 0.703, "step": 449 }, { "epoch": 0.027403099595043084, "grad_norm": 0.8579019826340917, "learning_rate": 4.999844001055948e-06, "loss": 0.5935, "step": 450 }, { "epoch": 0.027463995371920957, "grad_norm": 0.8024649278022873, "learning_rate": 4.999843108369287e-06, "loss": 0.6546, "step": 451 }, { "epoch": 0.02752489114879883, "grad_norm": 0.9103199335431142, "learning_rate": 4.999842213135837e-06, "loss": 0.6371, "step": 452 }, { "epoch": 0.027585786925676703, "grad_norm": 0.8983894021899964, "learning_rate": 4.999841315355601e-06, "loss": 0.6252, "step": 453 }, { "epoch": 0.02764668270255458, "grad_norm": 0.8104551986647945, "learning_rate": 4.999840415028579e-06, "loss": 0.6141, "step": 454 }, { "epoch": 0.027707578479432452, "grad_norm": 0.8522042675253364, "learning_rate": 4.99983951215477e-06, "loss": 0.6532, "step": 455 }, { "epoch": 0.027768474256310325, "grad_norm": 0.8392137291140584, "learning_rate": 4.999838606734177e-06, "loss": 0.6075, "step": 456 }, { "epoch": 0.027829370033188198, "grad_norm": 0.9015535377891991, "learning_rate": 4.9998376987668004e-06, "loss": 0.6276, "step": 457 }, { "epoch": 0.02789026581006607, "grad_norm": 0.8574097920804514, "learning_rate": 4.999836788252642e-06, "loss": 0.5974, "step": 458 }, { "epoch": 0.027951161586943947, "grad_norm": 0.8825154827224055, "learning_rate": 4.999835875191701e-06, "loss": 0.6317, "step": 459 }, { "epoch": 0.02801205736382182, "grad_norm": 0.8421333580415384, "learning_rate": 4.999834959583979e-06, "loss": 0.5858, "step": 460 }, { "epoch": 0.028072953140699693, "grad_norm": 0.7911673451621303, "learning_rate": 4.999834041429478e-06, "loss": 0.6435, "step": 461 }, { "epoch": 0.028133848917577566, "grad_norm": 0.8803682659414177, "learning_rate": 4.9998331207281974e-06, "loss": 0.6328, "step": 462 }, { "epoch": 0.02819474469445544, "grad_norm": 0.7932756415951809, "learning_rate": 4.999832197480139e-06, "loss": 0.6487, "step": 463 }, { "epoch": 0.028255640471333315, "grad_norm": 0.8603290962794222, "learning_rate": 4.9998312716853035e-06, "loss": 0.6257, "step": 464 }, { "epoch": 0.028316536248211188, "grad_norm": 0.8804126469049063, "learning_rate": 4.999830343343692e-06, "loss": 0.632, "step": 465 }, { "epoch": 0.02837743202508906, "grad_norm": 0.8996107808081768, "learning_rate": 4.999829412455305e-06, "loss": 0.5827, "step": 466 }, { "epoch": 0.028438327801966933, "grad_norm": 0.8758628922903379, "learning_rate": 4.999828479020144e-06, "loss": 0.672, "step": 467 }, { "epoch": 0.028499223578844806, "grad_norm": 0.8572420595536744, "learning_rate": 4.999827543038209e-06, "loss": 0.5981, "step": 468 }, { "epoch": 0.028560119355722682, "grad_norm": 0.7776560270734729, "learning_rate": 4.9998266045095025e-06, "loss": 0.6186, "step": 469 }, { "epoch": 0.028621015132600555, "grad_norm": 0.9649287795955926, "learning_rate": 4.999825663434024e-06, "loss": 0.6683, "step": 470 }, { "epoch": 0.028681910909478428, "grad_norm": 0.8512084010554957, "learning_rate": 4.9998247198117764e-06, "loss": 0.6113, "step": 471 }, { "epoch": 0.0287428066863563, "grad_norm": 0.9290489888783665, "learning_rate": 4.999823773642759e-06, "loss": 0.7006, "step": 472 }, { "epoch": 0.028803702463234174, "grad_norm": 0.8798085039346486, "learning_rate": 4.999822824926972e-06, "loss": 0.6754, "step": 473 }, { "epoch": 0.028864598240112047, "grad_norm": 0.8976873255604702, "learning_rate": 4.999821873664418e-06, "loss": 0.6282, "step": 474 }, { "epoch": 0.028925494016989923, "grad_norm": 0.9058676247338799, "learning_rate": 4.999820919855098e-06, "loss": 0.6124, "step": 475 }, { "epoch": 0.028986389793867796, "grad_norm": 0.8586046721654451, "learning_rate": 4.999819963499012e-06, "loss": 0.6293, "step": 476 }, { "epoch": 0.02904728557074567, "grad_norm": 0.8128610641268553, "learning_rate": 4.999819004596161e-06, "loss": 0.5654, "step": 477 }, { "epoch": 0.02910818134762354, "grad_norm": 0.8368807810282304, "learning_rate": 4.999818043146546e-06, "loss": 0.6426, "step": 478 }, { "epoch": 0.029169077124501414, "grad_norm": 0.8839469693107543, "learning_rate": 4.99981707915017e-06, "loss": 0.6236, "step": 479 }, { "epoch": 0.02922997290137929, "grad_norm": 0.9439892964129745, "learning_rate": 4.999816112607031e-06, "loss": 0.6928, "step": 480 }, { "epoch": 0.029290868678257163, "grad_norm": 0.9791338644215659, "learning_rate": 4.999815143517132e-06, "loss": 0.6777, "step": 481 }, { "epoch": 0.029351764455135036, "grad_norm": 0.8978624281608163, "learning_rate": 4.999814171880473e-06, "loss": 0.6304, "step": 482 }, { "epoch": 0.02941266023201291, "grad_norm": 0.8811033571403838, "learning_rate": 4.999813197697055e-06, "loss": 0.645, "step": 483 }, { "epoch": 0.029473556008890782, "grad_norm": 0.8445482476809665, "learning_rate": 4.9998122209668795e-06, "loss": 0.6622, "step": 484 }, { "epoch": 0.029534451785768658, "grad_norm": 0.8514340570722598, "learning_rate": 4.999811241689948e-06, "loss": 0.6286, "step": 485 }, { "epoch": 0.02959534756264653, "grad_norm": 0.8256422870893059, "learning_rate": 4.99981025986626e-06, "loss": 0.661, "step": 486 }, { "epoch": 0.029656243339524404, "grad_norm": 0.842447794611616, "learning_rate": 4.999809275495817e-06, "loss": 0.5762, "step": 487 }, { "epoch": 0.029717139116402277, "grad_norm": 0.8902189507645389, "learning_rate": 4.999808288578621e-06, "loss": 0.6152, "step": 488 }, { "epoch": 0.02977803489328015, "grad_norm": 0.9032549698145211, "learning_rate": 4.999807299114672e-06, "loss": 0.5915, "step": 489 }, { "epoch": 0.029838930670158026, "grad_norm": 0.8628736875286452, "learning_rate": 4.999806307103972e-06, "loss": 0.5834, "step": 490 }, { "epoch": 0.0298998264470359, "grad_norm": 0.8574011370442478, "learning_rate": 4.99980531254652e-06, "loss": 0.656, "step": 491 }, { "epoch": 0.02996072222391377, "grad_norm": 0.8204905039182027, "learning_rate": 4.9998043154423185e-06, "loss": 0.6307, "step": 492 }, { "epoch": 0.030021618000791644, "grad_norm": 0.9003122139678936, "learning_rate": 4.999803315791369e-06, "loss": 0.645, "step": 493 }, { "epoch": 0.030082513777669517, "grad_norm": 0.8472178063785583, "learning_rate": 4.999802313593671e-06, "loss": 0.6423, "step": 494 }, { "epoch": 0.030143409554547394, "grad_norm": 0.947024560501822, "learning_rate": 4.999801308849228e-06, "loss": 0.6295, "step": 495 }, { "epoch": 0.030204305331425266, "grad_norm": 0.8582464915081885, "learning_rate": 4.999800301558037e-06, "loss": 0.6062, "step": 496 }, { "epoch": 0.03026520110830314, "grad_norm": 0.9226052488796491, "learning_rate": 4.999799291720103e-06, "loss": 0.5993, "step": 497 }, { "epoch": 0.030326096885181012, "grad_norm": 0.7994435196941898, "learning_rate": 4.999798279335426e-06, "loss": 0.6676, "step": 498 }, { "epoch": 0.030386992662058885, "grad_norm": 0.8641846892253697, "learning_rate": 4.999797264404006e-06, "loss": 0.6413, "step": 499 }, { "epoch": 0.03044788843893676, "grad_norm": 0.9212093973444776, "learning_rate": 4.999796246925844e-06, "loss": 0.6241, "step": 500 }, { "epoch": 0.030508784215814634, "grad_norm": 0.8953382523685107, "learning_rate": 4.999795226900942e-06, "loss": 0.6708, "step": 501 }, { "epoch": 0.030569679992692507, "grad_norm": 0.8633566355474558, "learning_rate": 4.9997942043293e-06, "loss": 0.6247, "step": 502 }, { "epoch": 0.03063057576957038, "grad_norm": 0.9213562353535294, "learning_rate": 4.99979317921092e-06, "loss": 0.6294, "step": 503 }, { "epoch": 0.030691471546448253, "grad_norm": 0.9319135391682981, "learning_rate": 4.9997921515458034e-06, "loss": 0.6796, "step": 504 }, { "epoch": 0.03075236732332613, "grad_norm": 0.8314602517634049, "learning_rate": 4.99979112133395e-06, "loss": 0.6616, "step": 505 }, { "epoch": 0.030813263100204002, "grad_norm": 0.8529202620576182, "learning_rate": 4.999790088575361e-06, "loss": 0.6847, "step": 506 }, { "epoch": 0.030874158877081875, "grad_norm": 0.9324200193624258, "learning_rate": 4.9997890532700375e-06, "loss": 0.6372, "step": 507 }, { "epoch": 0.030935054653959747, "grad_norm": 0.8947452907370006, "learning_rate": 4.999788015417982e-06, "loss": 0.6792, "step": 508 }, { "epoch": 0.03099595043083762, "grad_norm": 0.8675204188242819, "learning_rate": 4.9997869750191945e-06, "loss": 0.6739, "step": 509 }, { "epoch": 0.031056846207715497, "grad_norm": 0.916743570141975, "learning_rate": 4.999785932073674e-06, "loss": 0.6995, "step": 510 }, { "epoch": 0.03111774198459337, "grad_norm": 0.8589994995363067, "learning_rate": 4.999784886581426e-06, "loss": 0.665, "step": 511 }, { "epoch": 0.031178637761471242, "grad_norm": 0.892772193362744, "learning_rate": 4.999783838542448e-06, "loss": 0.5941, "step": 512 }, { "epoch": 0.031239533538349115, "grad_norm": 0.9116854607245185, "learning_rate": 4.999782787956742e-06, "loss": 0.6227, "step": 513 }, { "epoch": 0.03130042931522699, "grad_norm": 0.9205514070820439, "learning_rate": 4.99978173482431e-06, "loss": 0.6396, "step": 514 }, { "epoch": 0.03136132509210486, "grad_norm": 0.8921555871366313, "learning_rate": 4.999780679145152e-06, "loss": 0.6008, "step": 515 }, { "epoch": 0.031422220868982734, "grad_norm": 0.8640570532389226, "learning_rate": 4.999779620919269e-06, "loss": 0.5905, "step": 516 }, { "epoch": 0.03148311664586061, "grad_norm": 0.8659278493603734, "learning_rate": 4.999778560146663e-06, "loss": 0.6742, "step": 517 }, { "epoch": 0.031544012422738486, "grad_norm": 0.8797486433033918, "learning_rate": 4.999777496827334e-06, "loss": 0.684, "step": 518 }, { "epoch": 0.03160490819961636, "grad_norm": 0.9023872523215322, "learning_rate": 4.999776430961284e-06, "loss": 0.6229, "step": 519 }, { "epoch": 0.03166580397649423, "grad_norm": 0.9529985199337727, "learning_rate": 4.999775362548514e-06, "loss": 0.6656, "step": 520 }, { "epoch": 0.031726699753372105, "grad_norm": 0.8798293295109569, "learning_rate": 4.999774291589025e-06, "loss": 0.626, "step": 521 }, { "epoch": 0.03178759553024998, "grad_norm": 0.8742176709815166, "learning_rate": 4.999773218082817e-06, "loss": 0.584, "step": 522 }, { "epoch": 0.03184849130712785, "grad_norm": 0.8449710716525559, "learning_rate": 4.9997721420298926e-06, "loss": 0.6074, "step": 523 }, { "epoch": 0.03190938708400572, "grad_norm": 0.8766425922899205, "learning_rate": 4.999771063430252e-06, "loss": 0.6151, "step": 524 }, { "epoch": 0.031970282860883596, "grad_norm": 0.8673171725822507, "learning_rate": 4.999769982283897e-06, "loss": 0.6324, "step": 525 }, { "epoch": 0.03203117863776147, "grad_norm": 0.8753341372596111, "learning_rate": 4.999768898590829e-06, "loss": 0.7082, "step": 526 }, { "epoch": 0.03209207441463934, "grad_norm": 0.8658220014086978, "learning_rate": 4.999767812351047e-06, "loss": 0.5884, "step": 527 }, { "epoch": 0.03215297019151722, "grad_norm": 0.9201531370795614, "learning_rate": 4.999766723564554e-06, "loss": 0.6084, "step": 528 }, { "epoch": 0.032213865968395095, "grad_norm": 0.8889450976246752, "learning_rate": 4.999765632231352e-06, "loss": 0.6634, "step": 529 }, { "epoch": 0.03227476174527297, "grad_norm": 0.857675306959387, "learning_rate": 4.999764538351439e-06, "loss": 0.6411, "step": 530 }, { "epoch": 0.03233565752215084, "grad_norm": 0.9048851142462304, "learning_rate": 4.999763441924818e-06, "loss": 0.5977, "step": 531 }, { "epoch": 0.03239655329902871, "grad_norm": 0.8919630006035432, "learning_rate": 4.999762342951491e-06, "loss": 0.6414, "step": 532 }, { "epoch": 0.032457449075906586, "grad_norm": 0.901297362341284, "learning_rate": 4.999761241431457e-06, "loss": 0.6402, "step": 533 }, { "epoch": 0.03251834485278446, "grad_norm": 0.8681590587464822, "learning_rate": 4.9997601373647185e-06, "loss": 0.6583, "step": 534 }, { "epoch": 0.03257924062966233, "grad_norm": 0.8539040316466944, "learning_rate": 4.9997590307512765e-06, "loss": 0.5911, "step": 535 }, { "epoch": 0.032640136406540204, "grad_norm": 0.9441666570205184, "learning_rate": 4.999757921591132e-06, "loss": 0.6235, "step": 536 }, { "epoch": 0.03270103218341808, "grad_norm": 0.9414750838533829, "learning_rate": 4.999756809884287e-06, "loss": 0.619, "step": 537 }, { "epoch": 0.03276192796029595, "grad_norm": 0.9175708951908074, "learning_rate": 4.99975569563074e-06, "loss": 0.6257, "step": 538 }, { "epoch": 0.03282282373717383, "grad_norm": 0.9214948183903635, "learning_rate": 4.999754578830495e-06, "loss": 0.5955, "step": 539 }, { "epoch": 0.0328837195140517, "grad_norm": 0.9424129726282491, "learning_rate": 4.9997534594835514e-06, "loss": 0.6202, "step": 540 }, { "epoch": 0.032944615290929576, "grad_norm": 0.9389387099284442, "learning_rate": 4.999752337589911e-06, "loss": 0.5515, "step": 541 }, { "epoch": 0.03300551106780745, "grad_norm": 0.88234522356561, "learning_rate": 4.999751213149575e-06, "loss": 0.6645, "step": 542 }, { "epoch": 0.03306640684468532, "grad_norm": 0.9028932671934276, "learning_rate": 4.999750086162545e-06, "loss": 0.558, "step": 543 }, { "epoch": 0.033127302621563194, "grad_norm": 0.8040073518496615, "learning_rate": 4.999748956628822e-06, "loss": 0.6015, "step": 544 }, { "epoch": 0.03318819839844107, "grad_norm": 0.9442654574210785, "learning_rate": 4.999747824548405e-06, "loss": 0.6639, "step": 545 }, { "epoch": 0.03324909417531894, "grad_norm": 0.9015275678577019, "learning_rate": 4.9997466899212974e-06, "loss": 0.6872, "step": 546 }, { "epoch": 0.03330998995219681, "grad_norm": 0.8163607014405337, "learning_rate": 4.9997455527475005e-06, "loss": 0.5867, "step": 547 }, { "epoch": 0.033370885729074685, "grad_norm": 0.8935310340996083, "learning_rate": 4.999744413027014e-06, "loss": 0.6019, "step": 548 }, { "epoch": 0.033431781505952565, "grad_norm": 0.8901360528923237, "learning_rate": 4.999743270759841e-06, "loss": 0.622, "step": 549 }, { "epoch": 0.03349267728283044, "grad_norm": 0.8586180481140363, "learning_rate": 4.999742125945981e-06, "loss": 0.616, "step": 550 }, { "epoch": 0.03355357305970831, "grad_norm": 0.8230395599384366, "learning_rate": 4.999740978585435e-06, "loss": 0.5988, "step": 551 }, { "epoch": 0.033614468836586184, "grad_norm": 0.9145119778704469, "learning_rate": 4.999739828678206e-06, "loss": 0.5673, "step": 552 }, { "epoch": 0.03367536461346406, "grad_norm": 1.0541586394347071, "learning_rate": 4.999738676224294e-06, "loss": 0.6603, "step": 553 }, { "epoch": 0.03373626039034193, "grad_norm": 0.873099280451214, "learning_rate": 4.9997375212236995e-06, "loss": 0.708, "step": 554 }, { "epoch": 0.0337971561672198, "grad_norm": 0.9562625677917975, "learning_rate": 4.999736363676424e-06, "loss": 0.5916, "step": 555 }, { "epoch": 0.033858051944097675, "grad_norm": 0.8317525720319099, "learning_rate": 4.9997352035824695e-06, "loss": 0.6216, "step": 556 }, { "epoch": 0.03391894772097555, "grad_norm": 0.9171177659800822, "learning_rate": 4.9997340409418375e-06, "loss": 0.63, "step": 557 }, { "epoch": 0.03397984349785342, "grad_norm": 0.8989458141767893, "learning_rate": 4.999732875754527e-06, "loss": 0.5978, "step": 558 }, { "epoch": 0.0340407392747313, "grad_norm": 0.9019813252373554, "learning_rate": 4.999731708020542e-06, "loss": 0.5635, "step": 559 }, { "epoch": 0.03410163505160917, "grad_norm": 0.8727992627575653, "learning_rate": 4.999730537739882e-06, "loss": 0.6766, "step": 560 }, { "epoch": 0.034162530828487046, "grad_norm": 1.047977154290726, "learning_rate": 4.999729364912548e-06, "loss": 0.602, "step": 561 }, { "epoch": 0.03422342660536492, "grad_norm": 0.8666786451313352, "learning_rate": 4.999728189538542e-06, "loss": 0.5989, "step": 562 }, { "epoch": 0.03428432238224279, "grad_norm": 0.9645031060810192, "learning_rate": 4.999727011617865e-06, "loss": 0.658, "step": 563 }, { "epoch": 0.034345218159120665, "grad_norm": 0.8940944226840101, "learning_rate": 4.999725831150518e-06, "loss": 0.6444, "step": 564 }, { "epoch": 0.03440611393599854, "grad_norm": 0.9346654203953113, "learning_rate": 4.999724648136502e-06, "loss": 0.6207, "step": 565 }, { "epoch": 0.03446700971287641, "grad_norm": 0.9113792083007628, "learning_rate": 4.999723462575819e-06, "loss": 0.5924, "step": 566 }, { "epoch": 0.03452790548975428, "grad_norm": 0.8895091665305688, "learning_rate": 4.999722274468469e-06, "loss": 0.6159, "step": 567 }, { "epoch": 0.034588801266632156, "grad_norm": 0.8292053902675454, "learning_rate": 4.999721083814455e-06, "loss": 0.6013, "step": 568 }, { "epoch": 0.034649697043510036, "grad_norm": 0.8664356414310306, "learning_rate": 4.999719890613776e-06, "loss": 0.5749, "step": 569 }, { "epoch": 0.03471059282038791, "grad_norm": 0.8999877858699046, "learning_rate": 4.999718694866435e-06, "loss": 0.6479, "step": 570 }, { "epoch": 0.03477148859726578, "grad_norm": 0.8468004589859273, "learning_rate": 4.999717496572433e-06, "loss": 0.6039, "step": 571 }, { "epoch": 0.034832384374143655, "grad_norm": 0.872879419765749, "learning_rate": 4.99971629573177e-06, "loss": 0.6106, "step": 572 }, { "epoch": 0.03489328015102153, "grad_norm": 0.9334862102425937, "learning_rate": 4.999715092344448e-06, "loss": 0.6107, "step": 573 }, { "epoch": 0.0349541759278994, "grad_norm": 0.9291657398918164, "learning_rate": 4.999713886410468e-06, "loss": 0.592, "step": 574 }, { "epoch": 0.03501507170477727, "grad_norm": 0.9563999889459246, "learning_rate": 4.999712677929832e-06, "loss": 0.5983, "step": 575 }, { "epoch": 0.035075967481655146, "grad_norm": 0.9319219175370166, "learning_rate": 4.99971146690254e-06, "loss": 0.5864, "step": 576 }, { "epoch": 0.03513686325853302, "grad_norm": 0.899887376892984, "learning_rate": 4.999710253328595e-06, "loss": 0.6194, "step": 577 }, { "epoch": 0.03519775903541089, "grad_norm": 0.8715018890128784, "learning_rate": 4.999709037207997e-06, "loss": 0.6476, "step": 578 }, { "epoch": 0.035258654812288764, "grad_norm": 0.944585602653932, "learning_rate": 4.999707818540747e-06, "loss": 0.602, "step": 579 }, { "epoch": 0.035319550589166644, "grad_norm": 0.8561889637672415, "learning_rate": 4.999706597326847e-06, "loss": 0.6168, "step": 580 }, { "epoch": 0.03538044636604452, "grad_norm": 0.8056900907454947, "learning_rate": 4.999705373566297e-06, "loss": 0.6431, "step": 581 }, { "epoch": 0.03544134214292239, "grad_norm": 0.8910734324274614, "learning_rate": 4.9997041472591e-06, "loss": 0.6251, "step": 582 }, { "epoch": 0.03550223791980026, "grad_norm": 0.9032061941764189, "learning_rate": 4.999702918405256e-06, "loss": 0.5924, "step": 583 }, { "epoch": 0.035563133696678136, "grad_norm": 0.9841111970684098, "learning_rate": 4.999701687004767e-06, "loss": 0.549, "step": 584 }, { "epoch": 0.03562402947355601, "grad_norm": 0.9286790743514601, "learning_rate": 4.999700453057633e-06, "loss": 0.5954, "step": 585 }, { "epoch": 0.03568492525043388, "grad_norm": 0.8751425080153574, "learning_rate": 4.999699216563857e-06, "loss": 0.639, "step": 586 }, { "epoch": 0.035745821027311754, "grad_norm": 0.993809385074006, "learning_rate": 4.99969797752344e-06, "loss": 0.6132, "step": 587 }, { "epoch": 0.03580671680418963, "grad_norm": 0.912702769692447, "learning_rate": 4.999696735936382e-06, "loss": 0.583, "step": 588 }, { "epoch": 0.0358676125810675, "grad_norm": 1.0278836228844093, "learning_rate": 4.9996954918026844e-06, "loss": 0.6257, "step": 589 }, { "epoch": 0.03592850835794538, "grad_norm": 0.9310595252437611, "learning_rate": 4.99969424512235e-06, "loss": 0.6049, "step": 590 }, { "epoch": 0.03598940413482325, "grad_norm": 1.0095404541613693, "learning_rate": 4.999692995895378e-06, "loss": 0.6355, "step": 591 }, { "epoch": 0.036050299911701125, "grad_norm": 0.8645506411402605, "learning_rate": 4.999691744121771e-06, "loss": 0.6055, "step": 592 }, { "epoch": 0.036111195688579, "grad_norm": 0.8764079291608898, "learning_rate": 4.999690489801531e-06, "loss": 0.5862, "step": 593 }, { "epoch": 0.03617209146545687, "grad_norm": 0.9658373514336904, "learning_rate": 4.999689232934657e-06, "loss": 0.5565, "step": 594 }, { "epoch": 0.036232987242334744, "grad_norm": 0.9244628996065855, "learning_rate": 4.999687973521152e-06, "loss": 0.6371, "step": 595 }, { "epoch": 0.03629388301921262, "grad_norm": 0.8841726696823312, "learning_rate": 4.999686711561017e-06, "loss": 0.6546, "step": 596 }, { "epoch": 0.03635477879609049, "grad_norm": 0.9090601064042594, "learning_rate": 4.999685447054253e-06, "loss": 0.6076, "step": 597 }, { "epoch": 0.03641567457296836, "grad_norm": 0.9095515469713252, "learning_rate": 4.999684180000862e-06, "loss": 0.5946, "step": 598 }, { "epoch": 0.036476570349846235, "grad_norm": 0.9787665184280723, "learning_rate": 4.999682910400844e-06, "loss": 0.6599, "step": 599 }, { "epoch": 0.036537466126724115, "grad_norm": 0.9284584729010522, "learning_rate": 4.999681638254202e-06, "loss": 0.6131, "step": 600 }, { "epoch": 0.03659836190360199, "grad_norm": 0.9454009442104677, "learning_rate": 4.999680363560935e-06, "loss": 0.675, "step": 601 }, { "epoch": 0.03665925768047986, "grad_norm": 0.9077061559228002, "learning_rate": 4.999679086321046e-06, "loss": 0.6455, "step": 602 }, { "epoch": 0.03672015345735773, "grad_norm": 0.8582415633151084, "learning_rate": 4.999677806534536e-06, "loss": 0.664, "step": 603 }, { "epoch": 0.036781049234235606, "grad_norm": 0.8781249091544013, "learning_rate": 4.999676524201406e-06, "loss": 0.6031, "step": 604 }, { "epoch": 0.03684194501111348, "grad_norm": 0.9170951091774144, "learning_rate": 4.9996752393216585e-06, "loss": 0.6073, "step": 605 }, { "epoch": 0.03690284078799135, "grad_norm": 0.9041797233698343, "learning_rate": 4.999673951895293e-06, "loss": 0.6117, "step": 606 }, { "epoch": 0.036963736564869225, "grad_norm": 0.882594077824798, "learning_rate": 4.999672661922313e-06, "loss": 0.5807, "step": 607 }, { "epoch": 0.0370246323417471, "grad_norm": 0.9469328310521461, "learning_rate": 4.999671369402716e-06, "loss": 0.6157, "step": 608 }, { "epoch": 0.03708552811862497, "grad_norm": 0.9452051310432236, "learning_rate": 4.999670074336508e-06, "loss": 0.6018, "step": 609 }, { "epoch": 0.03714642389550285, "grad_norm": 0.9039953983026277, "learning_rate": 4.999668776723687e-06, "loss": 0.6013, "step": 610 }, { "epoch": 0.03720731967238072, "grad_norm": 0.9252127217156216, "learning_rate": 4.999667476564256e-06, "loss": 0.6308, "step": 611 }, { "epoch": 0.037268215449258596, "grad_norm": 0.864873936782976, "learning_rate": 4.999666173858215e-06, "loss": 0.6641, "step": 612 }, { "epoch": 0.03732911122613647, "grad_norm": 0.9580950388042523, "learning_rate": 4.999664868605567e-06, "loss": 0.5854, "step": 613 }, { "epoch": 0.03739000700301434, "grad_norm": 0.9256683906637035, "learning_rate": 4.9996635608063115e-06, "loss": 0.6399, "step": 614 }, { "epoch": 0.037450902779892215, "grad_norm": 0.9016899595850172, "learning_rate": 4.999662250460451e-06, "loss": 0.6404, "step": 615 }, { "epoch": 0.03751179855677009, "grad_norm": 0.9047804944091163, "learning_rate": 4.999660937567987e-06, "loss": 0.6561, "step": 616 }, { "epoch": 0.03757269433364796, "grad_norm": 0.9325088912193812, "learning_rate": 4.999659622128921e-06, "loss": 0.6247, "step": 617 }, { "epoch": 0.03763359011052583, "grad_norm": 0.9038121092027975, "learning_rate": 4.999658304143252e-06, "loss": 0.5715, "step": 618 }, { "epoch": 0.037694485887403706, "grad_norm": 0.921784187090497, "learning_rate": 4.9996569836109844e-06, "loss": 0.5563, "step": 619 }, { "epoch": 0.03775538166428158, "grad_norm": 0.8897618584779313, "learning_rate": 4.999655660532118e-06, "loss": 0.5698, "step": 620 }, { "epoch": 0.03781627744115946, "grad_norm": 0.9198661021216509, "learning_rate": 4.999654334906654e-06, "loss": 0.6161, "step": 621 }, { "epoch": 0.03787717321803733, "grad_norm": 0.9152990649347437, "learning_rate": 4.999653006734594e-06, "loss": 0.5984, "step": 622 }, { "epoch": 0.037938068994915204, "grad_norm": 0.953253278403368, "learning_rate": 4.99965167601594e-06, "loss": 0.5599, "step": 623 }, { "epoch": 0.03799896477179308, "grad_norm": 0.9791020021243315, "learning_rate": 4.9996503427506925e-06, "loss": 0.5426, "step": 624 }, { "epoch": 0.03805986054867095, "grad_norm": 0.9883139154101797, "learning_rate": 4.9996490069388535e-06, "loss": 0.5856, "step": 625 }, { "epoch": 0.03812075632554882, "grad_norm": 0.9910716964620262, "learning_rate": 4.9996476685804235e-06, "loss": 0.5973, "step": 626 }, { "epoch": 0.038181652102426696, "grad_norm": 0.8817347575755441, "learning_rate": 4.999646327675405e-06, "loss": 0.5756, "step": 627 }, { "epoch": 0.03824254787930457, "grad_norm": 0.9007983196151219, "learning_rate": 4.999644984223798e-06, "loss": 0.5948, "step": 628 }, { "epoch": 0.03830344365618244, "grad_norm": 0.9433073365518962, "learning_rate": 4.999643638225605e-06, "loss": 0.644, "step": 629 }, { "epoch": 0.038364339433060314, "grad_norm": 0.8448417977554687, "learning_rate": 4.999642289680828e-06, "loss": 0.5983, "step": 630 }, { "epoch": 0.038425235209938194, "grad_norm": 1.0049028553947297, "learning_rate": 4.999640938589466e-06, "loss": 0.6161, "step": 631 }, { "epoch": 0.03848613098681607, "grad_norm": 0.8934088358919284, "learning_rate": 4.999639584951524e-06, "loss": 0.6236, "step": 632 }, { "epoch": 0.03854702676369394, "grad_norm": 0.9209963584503512, "learning_rate": 4.999638228766999e-06, "loss": 0.6077, "step": 633 }, { "epoch": 0.03860792254057181, "grad_norm": 0.8744335984171467, "learning_rate": 4.999636870035894e-06, "loss": 0.5463, "step": 634 }, { "epoch": 0.038668818317449685, "grad_norm": 0.9006860787013771, "learning_rate": 4.999635508758213e-06, "loss": 0.6572, "step": 635 }, { "epoch": 0.03872971409432756, "grad_norm": 1.0048829027310382, "learning_rate": 4.999634144933954e-06, "loss": 0.6219, "step": 636 }, { "epoch": 0.03879060987120543, "grad_norm": 0.960703868038752, "learning_rate": 4.999632778563121e-06, "loss": 0.6192, "step": 637 }, { "epoch": 0.038851505648083304, "grad_norm": 0.8858878315830038, "learning_rate": 4.999631409645712e-06, "loss": 0.6386, "step": 638 }, { "epoch": 0.03891240142496118, "grad_norm": 0.9100843045170944, "learning_rate": 4.999630038181732e-06, "loss": 0.6065, "step": 639 }, { "epoch": 0.03897329720183905, "grad_norm": 0.9013908681215274, "learning_rate": 4.999628664171181e-06, "loss": 0.6028, "step": 640 }, { "epoch": 0.03903419297871693, "grad_norm": 0.9344584537333636, "learning_rate": 4.99962728761406e-06, "loss": 0.553, "step": 641 }, { "epoch": 0.0390950887555948, "grad_norm": 0.9112795402529298, "learning_rate": 4.99962590851037e-06, "loss": 0.5771, "step": 642 }, { "epoch": 0.039155984532472675, "grad_norm": 0.9213674452736826, "learning_rate": 4.999624526860114e-06, "loss": 0.6231, "step": 643 }, { "epoch": 0.03921688030935055, "grad_norm": 0.9540262167687221, "learning_rate": 4.999623142663293e-06, "loss": 0.5647, "step": 644 }, { "epoch": 0.03927777608622842, "grad_norm": 0.9364337374882771, "learning_rate": 4.999621755919906e-06, "loss": 0.6524, "step": 645 }, { "epoch": 0.03933867186310629, "grad_norm": 1.0274540312449252, "learning_rate": 4.999620366629958e-06, "loss": 0.6092, "step": 646 }, { "epoch": 0.039399567639984166, "grad_norm": 0.9142972916088651, "learning_rate": 4.999618974793448e-06, "loss": 0.6066, "step": 647 }, { "epoch": 0.03946046341686204, "grad_norm": 0.830492781702811, "learning_rate": 4.9996175804103775e-06, "loss": 0.6173, "step": 648 }, { "epoch": 0.03952135919373991, "grad_norm": 0.9094778739119247, "learning_rate": 4.99961618348075e-06, "loss": 0.6474, "step": 649 }, { "epoch": 0.039582254970617785, "grad_norm": 0.9674054701354592, "learning_rate": 4.9996147840045645e-06, "loss": 0.6655, "step": 650 }, { "epoch": 0.039643150747495665, "grad_norm": 0.8876152331177954, "learning_rate": 4.9996133819818235e-06, "loss": 0.6642, "step": 651 }, { "epoch": 0.03970404652437354, "grad_norm": 0.9971611987194013, "learning_rate": 4.999611977412529e-06, "loss": 0.5769, "step": 652 }, { "epoch": 0.03976494230125141, "grad_norm": 0.9597730520114094, "learning_rate": 4.999610570296681e-06, "loss": 0.5856, "step": 653 }, { "epoch": 0.03982583807812928, "grad_norm": 0.8636268067314244, "learning_rate": 4.9996091606342826e-06, "loss": 0.6241, "step": 654 }, { "epoch": 0.039886733855007156, "grad_norm": 0.9005671874770992, "learning_rate": 4.9996077484253335e-06, "loss": 0.6216, "step": 655 }, { "epoch": 0.03994762963188503, "grad_norm": 0.9077103925356858, "learning_rate": 4.9996063336698366e-06, "loss": 0.6289, "step": 656 }, { "epoch": 0.0400085254087629, "grad_norm": 0.9113273615026914, "learning_rate": 4.999604916367793e-06, "loss": 0.5919, "step": 657 }, { "epoch": 0.040069421185640774, "grad_norm": 0.8262017117520554, "learning_rate": 4.999603496519203e-06, "loss": 0.6064, "step": 658 }, { "epoch": 0.04013031696251865, "grad_norm": 0.9694948951627942, "learning_rate": 4.999602074124069e-06, "loss": 0.6463, "step": 659 }, { "epoch": 0.04019121273939652, "grad_norm": 0.8392799413068273, "learning_rate": 4.999600649182393e-06, "loss": 0.668, "step": 660 }, { "epoch": 0.04025210851627439, "grad_norm": 0.957416939579636, "learning_rate": 4.999599221694176e-06, "loss": 0.5946, "step": 661 }, { "epoch": 0.04031300429315227, "grad_norm": 1.0548134373064368, "learning_rate": 4.999597791659418e-06, "loss": 0.6535, "step": 662 }, { "epoch": 0.040373900070030146, "grad_norm": 0.9436762516635531, "learning_rate": 4.999596359078123e-06, "loss": 0.5918, "step": 663 }, { "epoch": 0.04043479584690802, "grad_norm": 0.9425440991316258, "learning_rate": 4.99959492395029e-06, "loss": 0.5987, "step": 664 }, { "epoch": 0.04049569162378589, "grad_norm": 0.9491197095871141, "learning_rate": 4.999593486275923e-06, "loss": 0.6337, "step": 665 }, { "epoch": 0.040556587400663764, "grad_norm": 0.9038244821459279, "learning_rate": 4.999592046055022e-06, "loss": 0.6228, "step": 666 }, { "epoch": 0.04061748317754164, "grad_norm": 0.8932053840474375, "learning_rate": 4.9995906032875874e-06, "loss": 0.6197, "step": 667 }, { "epoch": 0.04067837895441951, "grad_norm": 0.914825936881544, "learning_rate": 4.999589157973623e-06, "loss": 0.5954, "step": 668 }, { "epoch": 0.04073927473129738, "grad_norm": 0.9046013277943018, "learning_rate": 4.999587710113128e-06, "loss": 0.6258, "step": 669 }, { "epoch": 0.040800170508175256, "grad_norm": 0.8894805148565166, "learning_rate": 4.999586259706106e-06, "loss": 0.5538, "step": 670 }, { "epoch": 0.04086106628505313, "grad_norm": 0.986441106154532, "learning_rate": 4.999584806752558e-06, "loss": 0.5559, "step": 671 }, { "epoch": 0.04092196206193101, "grad_norm": 0.9229374952204834, "learning_rate": 4.999583351252484e-06, "loss": 0.6485, "step": 672 }, { "epoch": 0.04098285783880888, "grad_norm": 0.9453155717116566, "learning_rate": 4.9995818932058856e-06, "loss": 0.6161, "step": 673 }, { "epoch": 0.041043753615686754, "grad_norm": 0.9346829885044637, "learning_rate": 4.999580432612767e-06, "loss": 0.5639, "step": 674 }, { "epoch": 0.04110464939256463, "grad_norm": 0.9591041997176056, "learning_rate": 4.999578969473127e-06, "loss": 0.5394, "step": 675 }, { "epoch": 0.0411655451694425, "grad_norm": 1.0664029652476756, "learning_rate": 4.999577503786968e-06, "loss": 0.6045, "step": 676 }, { "epoch": 0.04122644094632037, "grad_norm": 0.9523609107075215, "learning_rate": 4.999576035554291e-06, "loss": 0.6153, "step": 677 }, { "epoch": 0.041287336723198245, "grad_norm": 0.9905026149765738, "learning_rate": 4.999574564775098e-06, "loss": 0.6167, "step": 678 }, { "epoch": 0.04134823250007612, "grad_norm": 0.9037248990876888, "learning_rate": 4.999573091449391e-06, "loss": 0.5963, "step": 679 }, { "epoch": 0.04140912827695399, "grad_norm": 1.07494335887253, "learning_rate": 4.99957161557717e-06, "loss": 0.5675, "step": 680 }, { "epoch": 0.041470024053831864, "grad_norm": 0.9441389547214672, "learning_rate": 4.999570137158438e-06, "loss": 0.6014, "step": 681 }, { "epoch": 0.041530919830709744, "grad_norm": 0.9694120046656267, "learning_rate": 4.999568656193196e-06, "loss": 0.647, "step": 682 }, { "epoch": 0.041591815607587616, "grad_norm": 0.9803819813472991, "learning_rate": 4.999567172681445e-06, "loss": 0.583, "step": 683 }, { "epoch": 0.04165271138446549, "grad_norm": 0.886694519787291, "learning_rate": 4.999565686623188e-06, "loss": 0.5928, "step": 684 }, { "epoch": 0.04171360716134336, "grad_norm": 0.9447995519440603, "learning_rate": 4.999564198018424e-06, "loss": 0.5411, "step": 685 }, { "epoch": 0.041774502938221235, "grad_norm": 0.930279251792346, "learning_rate": 4.999562706867157e-06, "loss": 0.5998, "step": 686 }, { "epoch": 0.04183539871509911, "grad_norm": 0.9479938777674947, "learning_rate": 4.999561213169387e-06, "loss": 0.5381, "step": 687 }, { "epoch": 0.04189629449197698, "grad_norm": 0.9561622308464508, "learning_rate": 4.999559716925116e-06, "loss": 0.6587, "step": 688 }, { "epoch": 0.04195719026885485, "grad_norm": 0.9669579508253766, "learning_rate": 4.999558218134346e-06, "loss": 0.6025, "step": 689 }, { "epoch": 0.042018086045732726, "grad_norm": 0.8637025172586335, "learning_rate": 4.999556716797077e-06, "loss": 0.633, "step": 690 }, { "epoch": 0.0420789818226106, "grad_norm": 0.9384106325659761, "learning_rate": 4.9995552129133125e-06, "loss": 0.6035, "step": 691 }, { "epoch": 0.04213987759948848, "grad_norm": 0.9128893538889681, "learning_rate": 4.999553706483053e-06, "loss": 0.5676, "step": 692 }, { "epoch": 0.04220077337636635, "grad_norm": 0.9462418677363831, "learning_rate": 4.9995521975063e-06, "loss": 0.6357, "step": 693 }, { "epoch": 0.042261669153244225, "grad_norm": 0.8920234034234613, "learning_rate": 4.999550685983055e-06, "loss": 0.6374, "step": 694 }, { "epoch": 0.0423225649301221, "grad_norm": 0.9978034806481134, "learning_rate": 4.99954917191332e-06, "loss": 0.555, "step": 695 }, { "epoch": 0.04238346070699997, "grad_norm": 0.8876790738434981, "learning_rate": 4.999547655297096e-06, "loss": 0.631, "step": 696 }, { "epoch": 0.04244435648387784, "grad_norm": 0.921682239980844, "learning_rate": 4.999546136134385e-06, "loss": 0.6452, "step": 697 }, { "epoch": 0.042505252260755716, "grad_norm": 0.9255251442065211, "learning_rate": 4.999544614425188e-06, "loss": 0.6183, "step": 698 }, { "epoch": 0.04256614803763359, "grad_norm": 0.9059807718047895, "learning_rate": 4.999543090169508e-06, "loss": 0.6693, "step": 699 }, { "epoch": 0.04262704381451146, "grad_norm": 0.8948268457624917, "learning_rate": 4.999541563367344e-06, "loss": 0.5963, "step": 700 }, { "epoch": 0.042687939591389334, "grad_norm": 0.8659892660880926, "learning_rate": 4.9995400340187e-06, "loss": 0.6506, "step": 701 }, { "epoch": 0.04274883536826721, "grad_norm": 1.0102530844779543, "learning_rate": 4.9995385021235765e-06, "loss": 0.6023, "step": 702 }, { "epoch": 0.04280973114514509, "grad_norm": 0.9196330495222329, "learning_rate": 4.999536967681975e-06, "loss": 0.6814, "step": 703 }, { "epoch": 0.04287062692202296, "grad_norm": 1.003902450643636, "learning_rate": 4.999535430693897e-06, "loss": 0.5123, "step": 704 }, { "epoch": 0.04293152269890083, "grad_norm": 0.9996990087861035, "learning_rate": 4.999533891159345e-06, "loss": 0.6605, "step": 705 }, { "epoch": 0.042992418475778706, "grad_norm": 0.9013058004912472, "learning_rate": 4.999532349078319e-06, "loss": 0.6083, "step": 706 }, { "epoch": 0.04305331425265658, "grad_norm": 0.9187164104203875, "learning_rate": 4.999530804450822e-06, "loss": 0.5792, "step": 707 }, { "epoch": 0.04311421002953445, "grad_norm": 0.869619877511593, "learning_rate": 4.999529257276854e-06, "loss": 0.5762, "step": 708 }, { "epoch": 0.043175105806412324, "grad_norm": 1.0165200779387324, "learning_rate": 4.9995277075564185e-06, "loss": 0.6096, "step": 709 }, { "epoch": 0.0432360015832902, "grad_norm": 0.9419292870964436, "learning_rate": 4.999526155289516e-06, "loss": 0.6503, "step": 710 }, { "epoch": 0.04329689736016807, "grad_norm": 0.8973690833937918, "learning_rate": 4.999524600476148e-06, "loss": 0.5583, "step": 711 }, { "epoch": 0.04335779313704594, "grad_norm": 0.9644238717075332, "learning_rate": 4.999523043116316e-06, "loss": 0.6225, "step": 712 }, { "epoch": 0.04341868891392382, "grad_norm": 1.0161142029877794, "learning_rate": 4.999521483210023e-06, "loss": 0.6323, "step": 713 }, { "epoch": 0.043479584690801695, "grad_norm": 0.923608328470222, "learning_rate": 4.999519920757269e-06, "loss": 0.6008, "step": 714 }, { "epoch": 0.04354048046767957, "grad_norm": 0.9724553920360316, "learning_rate": 4.9995183557580555e-06, "loss": 0.7392, "step": 715 }, { "epoch": 0.04360137624455744, "grad_norm": 0.9154211378171839, "learning_rate": 4.9995167882123855e-06, "loss": 0.5985, "step": 716 }, { "epoch": 0.043662272021435314, "grad_norm": 0.9324359341923898, "learning_rate": 4.9995152181202584e-06, "loss": 0.6001, "step": 717 }, { "epoch": 0.04372316779831319, "grad_norm": 1.0140034940689702, "learning_rate": 4.999513645481678e-06, "loss": 0.5939, "step": 718 }, { "epoch": 0.04378406357519106, "grad_norm": 0.9265048799590763, "learning_rate": 4.999512070296646e-06, "loss": 0.6576, "step": 719 }, { "epoch": 0.04384495935206893, "grad_norm": 0.9640522833241182, "learning_rate": 4.999510492565161e-06, "loss": 0.6182, "step": 720 }, { "epoch": 0.043905855128946805, "grad_norm": 0.9486803523455828, "learning_rate": 4.999508912287228e-06, "loss": 0.5919, "step": 721 }, { "epoch": 0.04396675090582468, "grad_norm": 0.938953694254195, "learning_rate": 4.999507329462848e-06, "loss": 0.6497, "step": 722 }, { "epoch": 0.04402764668270256, "grad_norm": 1.0242143796077097, "learning_rate": 4.999505744092021e-06, "loss": 0.6008, "step": 723 }, { "epoch": 0.04408854245958043, "grad_norm": 0.9256790563970995, "learning_rate": 4.999504156174749e-06, "loss": 0.6146, "step": 724 }, { "epoch": 0.044149438236458304, "grad_norm": 1.0143896284552028, "learning_rate": 4.999502565711035e-06, "loss": 0.6521, "step": 725 }, { "epoch": 0.044210334013336176, "grad_norm": 0.8929719792909309, "learning_rate": 4.999500972700879e-06, "loss": 0.617, "step": 726 }, { "epoch": 0.04427122979021405, "grad_norm": 1.0005012415238297, "learning_rate": 4.999499377144284e-06, "loss": 0.6577, "step": 727 }, { "epoch": 0.04433212556709192, "grad_norm": 1.0311959592900908, "learning_rate": 4.999497779041251e-06, "loss": 0.5765, "step": 728 }, { "epoch": 0.044393021343969795, "grad_norm": 0.8731169576004627, "learning_rate": 4.999496178391782e-06, "loss": 0.5969, "step": 729 }, { "epoch": 0.04445391712084767, "grad_norm": 0.8646869955184578, "learning_rate": 4.999494575195878e-06, "loss": 0.62, "step": 730 }, { "epoch": 0.04451481289772554, "grad_norm": 0.9426373451556163, "learning_rate": 4.99949296945354e-06, "loss": 0.6255, "step": 731 }, { "epoch": 0.04457570867460341, "grad_norm": 0.959868185241791, "learning_rate": 4.999491361164771e-06, "loss": 0.5724, "step": 732 }, { "epoch": 0.04463660445148129, "grad_norm": 0.8924901219220744, "learning_rate": 4.9994897503295725e-06, "loss": 0.6726, "step": 733 }, { "epoch": 0.044697500228359166, "grad_norm": 0.9677539445708461, "learning_rate": 4.999488136947945e-06, "loss": 0.5682, "step": 734 }, { "epoch": 0.04475839600523704, "grad_norm": 0.9940509207431923, "learning_rate": 4.999486521019892e-06, "loss": 0.5459, "step": 735 }, { "epoch": 0.04481929178211491, "grad_norm": 0.9577280424090192, "learning_rate": 4.999484902545414e-06, "loss": 0.6042, "step": 736 }, { "epoch": 0.044880187558992785, "grad_norm": 0.9384139213964082, "learning_rate": 4.999483281524512e-06, "loss": 0.548, "step": 737 }, { "epoch": 0.04494108333587066, "grad_norm": 0.9193230592459444, "learning_rate": 4.99948165795719e-06, "loss": 0.5731, "step": 738 }, { "epoch": 0.04500197911274853, "grad_norm": 0.9887821157606658, "learning_rate": 4.999480031843445e-06, "loss": 0.5933, "step": 739 }, { "epoch": 0.0450628748896264, "grad_norm": 0.9403209417411679, "learning_rate": 4.999478403183284e-06, "loss": 0.6465, "step": 740 }, { "epoch": 0.045123770666504276, "grad_norm": 0.9196633532170205, "learning_rate": 4.999476771976706e-06, "loss": 0.5771, "step": 741 }, { "epoch": 0.04518466644338215, "grad_norm": 0.8865552161989972, "learning_rate": 4.999475138223714e-06, "loss": 0.5807, "step": 742 }, { "epoch": 0.04524556222026002, "grad_norm": 0.9165595759481814, "learning_rate": 4.9994735019243066e-06, "loss": 0.6349, "step": 743 }, { "epoch": 0.0453064579971379, "grad_norm": 0.9631010238514078, "learning_rate": 4.999471863078489e-06, "loss": 0.5652, "step": 744 }, { "epoch": 0.045367353774015774, "grad_norm": 0.9887085061737799, "learning_rate": 4.9994702216862615e-06, "loss": 0.647, "step": 745 }, { "epoch": 0.04542824955089365, "grad_norm": 0.9173855291252766, "learning_rate": 4.999468577747625e-06, "loss": 0.634, "step": 746 }, { "epoch": 0.04548914532777152, "grad_norm": 0.9349271936047793, "learning_rate": 4.9994669312625825e-06, "loss": 0.6375, "step": 747 }, { "epoch": 0.04555004110464939, "grad_norm": 0.9976711644210832, "learning_rate": 4.999465282231134e-06, "loss": 0.5569, "step": 748 }, { "epoch": 0.045610936881527266, "grad_norm": 0.9516578277755166, "learning_rate": 4.999463630653283e-06, "loss": 0.5967, "step": 749 }, { "epoch": 0.04567183265840514, "grad_norm": 0.9562888046100467, "learning_rate": 4.99946197652903e-06, "loss": 0.6369, "step": 750 }, { "epoch": 0.04573272843528301, "grad_norm": 0.8918414433334476, "learning_rate": 4.999460319858378e-06, "loss": 0.5717, "step": 751 }, { "epoch": 0.045793624212160884, "grad_norm": 0.8705508655665568, "learning_rate": 4.999458660641327e-06, "loss": 0.6079, "step": 752 }, { "epoch": 0.04585451998903876, "grad_norm": 0.8929097034883898, "learning_rate": 4.99945699887788e-06, "loss": 0.5993, "step": 753 }, { "epoch": 0.04591541576591664, "grad_norm": 0.9270321038640618, "learning_rate": 4.999455334568037e-06, "loss": 0.554, "step": 754 }, { "epoch": 0.04597631154279451, "grad_norm": 0.9538659674683392, "learning_rate": 4.999453667711802e-06, "loss": 0.5975, "step": 755 }, { "epoch": 0.04603720731967238, "grad_norm": 1.0527984843916622, "learning_rate": 4.999451998309176e-06, "loss": 0.6631, "step": 756 }, { "epoch": 0.046098103096550255, "grad_norm": 0.8932276761523216, "learning_rate": 4.999450326360159e-06, "loss": 0.6405, "step": 757 }, { "epoch": 0.04615899887342813, "grad_norm": 0.929582613448443, "learning_rate": 4.999448651864754e-06, "loss": 0.6442, "step": 758 }, { "epoch": 0.046219894650306, "grad_norm": 0.98020531604507, "learning_rate": 4.999446974822964e-06, "loss": 0.582, "step": 759 }, { "epoch": 0.046280790427183874, "grad_norm": 0.9329649880851422, "learning_rate": 4.999445295234789e-06, "loss": 0.5977, "step": 760 }, { "epoch": 0.04634168620406175, "grad_norm": 0.9443838505098391, "learning_rate": 4.999443613100231e-06, "loss": 0.6293, "step": 761 }, { "epoch": 0.04640258198093962, "grad_norm": 1.0175858888368574, "learning_rate": 4.999441928419291e-06, "loss": 0.5832, "step": 762 }, { "epoch": 0.04646347775781749, "grad_norm": 1.0137322426102544, "learning_rate": 4.999440241191972e-06, "loss": 0.5846, "step": 763 }, { "epoch": 0.04652437353469537, "grad_norm": 0.86840977946459, "learning_rate": 4.999438551418276e-06, "loss": 0.6258, "step": 764 }, { "epoch": 0.046585269311573245, "grad_norm": 0.9206770049578523, "learning_rate": 4.999436859098202e-06, "loss": 0.5963, "step": 765 }, { "epoch": 0.04664616508845112, "grad_norm": 1.0060434258546966, "learning_rate": 4.999435164231756e-06, "loss": 0.5695, "step": 766 }, { "epoch": 0.04670706086532899, "grad_norm": 0.9719706880019677, "learning_rate": 4.999433466818936e-06, "loss": 0.5677, "step": 767 }, { "epoch": 0.046767956642206863, "grad_norm": 1.025244253285763, "learning_rate": 4.999431766859746e-06, "loss": 0.5873, "step": 768 }, { "epoch": 0.046828852419084736, "grad_norm": 0.917547173173034, "learning_rate": 4.999430064354186e-06, "loss": 0.5823, "step": 769 }, { "epoch": 0.04688974819596261, "grad_norm": 0.986122664562976, "learning_rate": 4.9994283593022595e-06, "loss": 0.5831, "step": 770 }, { "epoch": 0.04695064397284048, "grad_norm": 0.9848048477602993, "learning_rate": 4.999426651703967e-06, "loss": 0.5854, "step": 771 }, { "epoch": 0.047011539749718355, "grad_norm": 0.9432499839603535, "learning_rate": 4.99942494155931e-06, "loss": 0.6311, "step": 772 }, { "epoch": 0.04707243552659623, "grad_norm": 0.9778250298213461, "learning_rate": 4.999423228868292e-06, "loss": 0.5673, "step": 773 }, { "epoch": 0.04713333130347411, "grad_norm": 1.0060280814458449, "learning_rate": 4.999421513630912e-06, "loss": 0.6342, "step": 774 }, { "epoch": 0.04719422708035198, "grad_norm": 0.9785373952591474, "learning_rate": 4.999419795847174e-06, "loss": 0.5378, "step": 775 }, { "epoch": 0.04725512285722985, "grad_norm": 0.9845759993206283, "learning_rate": 4.999418075517079e-06, "loss": 0.5751, "step": 776 }, { "epoch": 0.047316018634107726, "grad_norm": 0.9959300781454401, "learning_rate": 4.9994163526406305e-06, "loss": 0.6859, "step": 777 }, { "epoch": 0.0473769144109856, "grad_norm": 0.9582242353296838, "learning_rate": 4.999414627217827e-06, "loss": 0.6106, "step": 778 }, { "epoch": 0.04743781018786347, "grad_norm": 0.9365561149969012, "learning_rate": 4.999412899248672e-06, "loss": 0.5972, "step": 779 }, { "epoch": 0.047498705964741345, "grad_norm": 1.0014248409600786, "learning_rate": 4.999411168733167e-06, "loss": 0.6276, "step": 780 }, { "epoch": 0.04755960174161922, "grad_norm": 1.0160758579230542, "learning_rate": 4.999409435671314e-06, "loss": 0.5586, "step": 781 }, { "epoch": 0.04762049751849709, "grad_norm": 0.9753210214773294, "learning_rate": 4.9994077000631145e-06, "loss": 0.6149, "step": 782 }, { "epoch": 0.04768139329537496, "grad_norm": 0.9070771520787325, "learning_rate": 4.999405961908571e-06, "loss": 0.5732, "step": 783 }, { "epoch": 0.047742289072252836, "grad_norm": 0.931001782111828, "learning_rate": 4.999404221207684e-06, "loss": 0.6448, "step": 784 }, { "epoch": 0.047803184849130716, "grad_norm": 0.9226829783856478, "learning_rate": 4.999402477960456e-06, "loss": 0.5966, "step": 785 }, { "epoch": 0.04786408062600859, "grad_norm": 0.9485489763058337, "learning_rate": 4.999400732166889e-06, "loss": 0.5325, "step": 786 }, { "epoch": 0.04792497640288646, "grad_norm": 0.910428303127011, "learning_rate": 4.999398983826984e-06, "loss": 0.5413, "step": 787 }, { "epoch": 0.047985872179764334, "grad_norm": 0.9378468717312077, "learning_rate": 4.999397232940744e-06, "loss": 0.5968, "step": 788 }, { "epoch": 0.04804676795664221, "grad_norm": 0.8886185196144795, "learning_rate": 4.99939547950817e-06, "loss": 0.549, "step": 789 }, { "epoch": 0.04810766373352008, "grad_norm": 1.024857859512105, "learning_rate": 4.999393723529264e-06, "loss": 0.6216, "step": 790 }, { "epoch": 0.04816855951039795, "grad_norm": 0.9320595287923821, "learning_rate": 4.999391965004027e-06, "loss": 0.5877, "step": 791 }, { "epoch": 0.048229455287275826, "grad_norm": 0.9985372307979925, "learning_rate": 4.999390203932461e-06, "loss": 0.6102, "step": 792 }, { "epoch": 0.0482903510641537, "grad_norm": 1.0372495403321893, "learning_rate": 4.999388440314569e-06, "loss": 0.6414, "step": 793 }, { "epoch": 0.04835124684103157, "grad_norm": 0.960207516496005, "learning_rate": 4.999386674150351e-06, "loss": 0.5857, "step": 794 }, { "epoch": 0.04841214261790945, "grad_norm": 0.9615645096753326, "learning_rate": 4.999384905439811e-06, "loss": 0.6236, "step": 795 }, { "epoch": 0.048473038394787324, "grad_norm": 0.9940042365119532, "learning_rate": 4.999383134182951e-06, "loss": 0.5982, "step": 796 }, { "epoch": 0.0485339341716652, "grad_norm": 1.0251465671341073, "learning_rate": 4.999381360379769e-06, "loss": 0.603, "step": 797 }, { "epoch": 0.04859482994854307, "grad_norm": 0.901480048262466, "learning_rate": 4.999379584030269e-06, "loss": 0.6122, "step": 798 }, { "epoch": 0.04865572572542094, "grad_norm": 0.9519083293258097, "learning_rate": 4.999377805134454e-06, "loss": 0.6595, "step": 799 }, { "epoch": 0.048716621502298815, "grad_norm": 0.9872030055075024, "learning_rate": 4.999376023692326e-06, "loss": 0.6186, "step": 800 }, { "epoch": 0.04877751727917669, "grad_norm": 0.964092841760155, "learning_rate": 4.999374239703884e-06, "loss": 0.555, "step": 801 }, { "epoch": 0.04883841305605456, "grad_norm": 0.9538961824793578, "learning_rate": 4.999372453169132e-06, "loss": 0.5997, "step": 802 }, { "epoch": 0.048899308832932434, "grad_norm": 0.8858989606100357, "learning_rate": 4.999370664088071e-06, "loss": 0.6076, "step": 803 }, { "epoch": 0.04896020460981031, "grad_norm": 0.9728672673987911, "learning_rate": 4.999368872460704e-06, "loss": 0.6043, "step": 804 }, { "epoch": 0.049021100386688186, "grad_norm": 0.9598993376637882, "learning_rate": 4.999367078287032e-06, "loss": 0.5297, "step": 805 }, { "epoch": 0.04908199616356606, "grad_norm": 0.9580739268832047, "learning_rate": 4.999365281567056e-06, "loss": 0.5061, "step": 806 }, { "epoch": 0.04914289194044393, "grad_norm": 1.010286124092416, "learning_rate": 4.999363482300778e-06, "loss": 0.5775, "step": 807 }, { "epoch": 0.049203787717321805, "grad_norm": 1.0066947030690239, "learning_rate": 4.999361680488202e-06, "loss": 0.5813, "step": 808 }, { "epoch": 0.04926468349419968, "grad_norm": 1.104535419206599, "learning_rate": 4.999359876129327e-06, "loss": 0.505, "step": 809 }, { "epoch": 0.04932557927107755, "grad_norm": 0.9292068801621749, "learning_rate": 4.999358069224157e-06, "loss": 0.5509, "step": 810 }, { "epoch": 0.049386475047955423, "grad_norm": 0.913664262036396, "learning_rate": 4.999356259772693e-06, "loss": 0.5474, "step": 811 }, { "epoch": 0.049447370824833296, "grad_norm": 0.9686742961110151, "learning_rate": 4.9993544477749364e-06, "loss": 0.5781, "step": 812 }, { "epoch": 0.04950826660171117, "grad_norm": 0.9852725392481547, "learning_rate": 4.99935263323089e-06, "loss": 0.6359, "step": 813 }, { "epoch": 0.04956916237858904, "grad_norm": 1.0066939369739636, "learning_rate": 4.999350816140554e-06, "loss": 0.53, "step": 814 }, { "epoch": 0.04963005815546692, "grad_norm": 1.0096493534567426, "learning_rate": 4.999348996503932e-06, "loss": 0.5883, "step": 815 }, { "epoch": 0.049690953932344795, "grad_norm": 0.8903071596326964, "learning_rate": 4.9993471743210254e-06, "loss": 0.6134, "step": 816 }, { "epoch": 0.04975184970922267, "grad_norm": 0.9692841576790168, "learning_rate": 4.999345349591835e-06, "loss": 0.5866, "step": 817 }, { "epoch": 0.04981274548610054, "grad_norm": 0.9785250226048439, "learning_rate": 4.999343522316364e-06, "loss": 0.5418, "step": 818 }, { "epoch": 0.04987364126297841, "grad_norm": 0.9462277946054182, "learning_rate": 4.9993416924946145e-06, "loss": 0.6104, "step": 819 }, { "epoch": 0.049934537039856286, "grad_norm": 0.9377600211916157, "learning_rate": 4.999339860126587e-06, "loss": 0.6118, "step": 820 }, { "epoch": 0.04999543281673416, "grad_norm": 0.9250218537733235, "learning_rate": 4.999338025212284e-06, "loss": 0.6629, "step": 821 }, { "epoch": 0.05005632859361203, "grad_norm": 0.9292312470670782, "learning_rate": 4.999336187751708e-06, "loss": 0.6509, "step": 822 }, { "epoch": 0.050117224370489905, "grad_norm": 0.9341040472351617, "learning_rate": 4.9993343477448595e-06, "loss": 0.5608, "step": 823 }, { "epoch": 0.05017812014736778, "grad_norm": 0.9735742904458778, "learning_rate": 4.999332505191741e-06, "loss": 0.5559, "step": 824 }, { "epoch": 0.05023901592424565, "grad_norm": 1.0373772947261266, "learning_rate": 4.999330660092355e-06, "loss": 0.6371, "step": 825 }, { "epoch": 0.05029991170112353, "grad_norm": 1.018542355597574, "learning_rate": 4.999328812446704e-06, "loss": 0.6169, "step": 826 }, { "epoch": 0.0503608074780014, "grad_norm": 1.014960941387071, "learning_rate": 4.9993269622547866e-06, "loss": 0.5525, "step": 827 }, { "epoch": 0.050421703254879276, "grad_norm": 1.04122706799669, "learning_rate": 4.999325109516607e-06, "loss": 0.5553, "step": 828 }, { "epoch": 0.05048259903175715, "grad_norm": 0.9918292544711612, "learning_rate": 4.999323254232169e-06, "loss": 0.5774, "step": 829 }, { "epoch": 0.05054349480863502, "grad_norm": 0.9107401710702069, "learning_rate": 4.999321396401471e-06, "loss": 0.5996, "step": 830 }, { "epoch": 0.050604390585512894, "grad_norm": 0.933737560960647, "learning_rate": 4.999319536024516e-06, "loss": 0.5894, "step": 831 }, { "epoch": 0.05066528636239077, "grad_norm": 0.9521402646326996, "learning_rate": 4.9993176731013075e-06, "loss": 0.54, "step": 832 }, { "epoch": 0.05072618213926864, "grad_norm": 0.9278641037840688, "learning_rate": 4.999315807631845e-06, "loss": 0.5682, "step": 833 }, { "epoch": 0.05078707791614651, "grad_norm": 0.8731156499825803, "learning_rate": 4.999313939616131e-06, "loss": 0.6241, "step": 834 }, { "epoch": 0.050847973693024386, "grad_norm": 0.8581551839165459, "learning_rate": 4.999312069054169e-06, "loss": 0.6343, "step": 835 }, { "epoch": 0.050908869469902265, "grad_norm": 0.9763777978879581, "learning_rate": 4.99931019594596e-06, "loss": 0.5899, "step": 836 }, { "epoch": 0.05096976524678014, "grad_norm": 1.0378729561443174, "learning_rate": 4.999308320291505e-06, "loss": 0.6062, "step": 837 }, { "epoch": 0.05103066102365801, "grad_norm": 0.9900485277881569, "learning_rate": 4.999306442090807e-06, "loss": 0.6356, "step": 838 }, { "epoch": 0.051091556800535884, "grad_norm": 0.9531887085745027, "learning_rate": 4.999304561343867e-06, "loss": 0.6107, "step": 839 }, { "epoch": 0.05115245257741376, "grad_norm": 0.9258363215493612, "learning_rate": 4.9993026780506885e-06, "loss": 0.5974, "step": 840 }, { "epoch": 0.05121334835429163, "grad_norm": 0.9540190883099346, "learning_rate": 4.999300792211272e-06, "loss": 0.6013, "step": 841 }, { "epoch": 0.0512742441311695, "grad_norm": 0.902786019858358, "learning_rate": 4.99929890382562e-06, "loss": 0.5832, "step": 842 }, { "epoch": 0.051335139908047375, "grad_norm": 0.9826552125824675, "learning_rate": 4.999297012893734e-06, "loss": 0.5643, "step": 843 }, { "epoch": 0.05139603568492525, "grad_norm": 0.9767142580348566, "learning_rate": 4.999295119415616e-06, "loss": 0.5667, "step": 844 }, { "epoch": 0.05145693146180312, "grad_norm": 0.914766017685997, "learning_rate": 4.999293223391268e-06, "loss": 0.5958, "step": 845 }, { "epoch": 0.051517827238681, "grad_norm": 1.0138141246731234, "learning_rate": 4.999291324820692e-06, "loss": 0.6552, "step": 846 }, { "epoch": 0.051578723015558874, "grad_norm": 0.9395028794347936, "learning_rate": 4.99928942370389e-06, "loss": 0.6078, "step": 847 }, { "epoch": 0.051639618792436746, "grad_norm": 0.9645746957456716, "learning_rate": 4.9992875200408644e-06, "loss": 0.5825, "step": 848 }, { "epoch": 0.05170051456931462, "grad_norm": 0.9459173703794295, "learning_rate": 4.999285613831616e-06, "loss": 0.5547, "step": 849 }, { "epoch": 0.05176141034619249, "grad_norm": 0.988055949633709, "learning_rate": 4.999283705076148e-06, "loss": 0.554, "step": 850 }, { "epoch": 0.051822306123070365, "grad_norm": 0.9166868143248652, "learning_rate": 4.999281793774461e-06, "loss": 0.5712, "step": 851 }, { "epoch": 0.05188320189994824, "grad_norm": 0.8833575473460488, "learning_rate": 4.9992798799265584e-06, "loss": 0.5833, "step": 852 }, { "epoch": 0.05194409767682611, "grad_norm": 0.995540197880221, "learning_rate": 4.999277963532441e-06, "loss": 0.5498, "step": 853 }, { "epoch": 0.05200499345370398, "grad_norm": 0.9189699146347367, "learning_rate": 4.999276044592111e-06, "loss": 0.613, "step": 854 }, { "epoch": 0.052065889230581856, "grad_norm": 0.944068537992771, "learning_rate": 4.999274123105571e-06, "loss": 0.5806, "step": 855 }, { "epoch": 0.052126785007459736, "grad_norm": 0.9907420617496323, "learning_rate": 4.999272199072822e-06, "loss": 0.6301, "step": 856 }, { "epoch": 0.05218768078433761, "grad_norm": 1.0487930927001048, "learning_rate": 4.999270272493867e-06, "loss": 0.6161, "step": 857 }, { "epoch": 0.05224857656121548, "grad_norm": 0.9307390265704412, "learning_rate": 4.999268343368707e-06, "loss": 0.6193, "step": 858 }, { "epoch": 0.052309472338093355, "grad_norm": 0.9736041482820871, "learning_rate": 4.999266411697344e-06, "loss": 0.5209, "step": 859 }, { "epoch": 0.05237036811497123, "grad_norm": 0.9726051870611648, "learning_rate": 4.999264477479782e-06, "loss": 0.574, "step": 860 }, { "epoch": 0.0524312638918491, "grad_norm": 0.9380955889526035, "learning_rate": 4.99926254071602e-06, "loss": 0.5906, "step": 861 }, { "epoch": 0.05249215966872697, "grad_norm": 1.0000164637462818, "learning_rate": 4.999260601406061e-06, "loss": 0.5512, "step": 862 }, { "epoch": 0.052553055445604846, "grad_norm": 1.0668235990302743, "learning_rate": 4.999258659549908e-06, "loss": 0.576, "step": 863 }, { "epoch": 0.05261395122248272, "grad_norm": 0.9206942741447739, "learning_rate": 4.999256715147562e-06, "loss": 0.5928, "step": 864 }, { "epoch": 0.05267484699936059, "grad_norm": 0.9059627335453616, "learning_rate": 4.999254768199025e-06, "loss": 0.6216, "step": 865 }, { "epoch": 0.052735742776238465, "grad_norm": 0.9888613568503292, "learning_rate": 4.9992528187043e-06, "loss": 0.5571, "step": 866 }, { "epoch": 0.052796638553116344, "grad_norm": 1.0487183069217008, "learning_rate": 4.999250866663387e-06, "loss": 0.5197, "step": 867 }, { "epoch": 0.05285753432999422, "grad_norm": 0.9861891119523672, "learning_rate": 4.99924891207629e-06, "loss": 0.5482, "step": 868 }, { "epoch": 0.05291843010687209, "grad_norm": 0.9265120773899509, "learning_rate": 4.9992469549430105e-06, "loss": 0.583, "step": 869 }, { "epoch": 0.05297932588374996, "grad_norm": 0.9620551590646347, "learning_rate": 4.999244995263549e-06, "loss": 0.5933, "step": 870 }, { "epoch": 0.053040221660627836, "grad_norm": 0.8991086303466952, "learning_rate": 4.999243033037909e-06, "loss": 0.5566, "step": 871 }, { "epoch": 0.05310111743750571, "grad_norm": 0.9439204094319452, "learning_rate": 4.999241068266093e-06, "loss": 0.6026, "step": 872 }, { "epoch": 0.05316201321438358, "grad_norm": 0.9693802821420067, "learning_rate": 4.999239100948101e-06, "loss": 0.6113, "step": 873 }, { "epoch": 0.053222908991261454, "grad_norm": 1.0157958705334358, "learning_rate": 4.999237131083936e-06, "loss": 0.5164, "step": 874 }, { "epoch": 0.05328380476813933, "grad_norm": 0.916966239117875, "learning_rate": 4.9992351586736015e-06, "loss": 0.6001, "step": 875 }, { "epoch": 0.0533447005450172, "grad_norm": 0.9062026804756468, "learning_rate": 4.999233183717097e-06, "loss": 0.564, "step": 876 }, { "epoch": 0.05340559632189508, "grad_norm": 0.9692945174482966, "learning_rate": 4.999231206214427e-06, "loss": 0.6266, "step": 877 }, { "epoch": 0.05346649209877295, "grad_norm": 0.9645985893640531, "learning_rate": 4.999229226165591e-06, "loss": 0.6047, "step": 878 }, { "epoch": 0.053527387875650825, "grad_norm": 0.9755561642202087, "learning_rate": 4.999227243570593e-06, "loss": 0.6407, "step": 879 }, { "epoch": 0.0535882836525287, "grad_norm": 0.9607968153497156, "learning_rate": 4.999225258429434e-06, "loss": 0.572, "step": 880 }, { "epoch": 0.05364917942940657, "grad_norm": 1.0366656920694501, "learning_rate": 4.999223270742116e-06, "loss": 0.5733, "step": 881 }, { "epoch": 0.053710075206284444, "grad_norm": 0.9977432683631673, "learning_rate": 4.999221280508641e-06, "loss": 0.601, "step": 882 }, { "epoch": 0.05377097098316232, "grad_norm": 0.9395376531736732, "learning_rate": 4.999219287729012e-06, "loss": 0.5564, "step": 883 }, { "epoch": 0.05383186676004019, "grad_norm": 0.9623150967604917, "learning_rate": 4.999217292403231e-06, "loss": 0.5794, "step": 884 }, { "epoch": 0.05389276253691806, "grad_norm": 0.919783859292383, "learning_rate": 4.999215294531297e-06, "loss": 0.6179, "step": 885 }, { "epoch": 0.053953658313795935, "grad_norm": 1.0199002750942012, "learning_rate": 4.9992132941132175e-06, "loss": 0.622, "step": 886 }, { "epoch": 0.054014554090673815, "grad_norm": 1.0158937419712673, "learning_rate": 4.99921129114899e-06, "loss": 0.6724, "step": 887 }, { "epoch": 0.05407544986755169, "grad_norm": 0.983942346074935, "learning_rate": 4.999209285638618e-06, "loss": 0.5676, "step": 888 }, { "epoch": 0.05413634564442956, "grad_norm": 1.02515205635902, "learning_rate": 4.9992072775821034e-06, "loss": 0.5917, "step": 889 }, { "epoch": 0.054197241421307434, "grad_norm": 0.9629787411552249, "learning_rate": 4.999205266979448e-06, "loss": 0.6072, "step": 890 }, { "epoch": 0.054258137198185306, "grad_norm": 0.9423121251248786, "learning_rate": 4.999203253830655e-06, "loss": 0.5951, "step": 891 }, { "epoch": 0.05431903297506318, "grad_norm": 0.9819222992913832, "learning_rate": 4.999201238135724e-06, "loss": 0.582, "step": 892 }, { "epoch": 0.05437992875194105, "grad_norm": 0.9120249201468291, "learning_rate": 4.999199219894661e-06, "loss": 0.5398, "step": 893 }, { "epoch": 0.054440824528818925, "grad_norm": 1.0033820288334894, "learning_rate": 4.999197199107465e-06, "loss": 0.5592, "step": 894 }, { "epoch": 0.0545017203056968, "grad_norm": 0.9648336948732708, "learning_rate": 4.9991951757741385e-06, "loss": 0.5572, "step": 895 }, { "epoch": 0.05456261608257467, "grad_norm": 0.9958211413284794, "learning_rate": 4.9991931498946844e-06, "loss": 0.5534, "step": 896 }, { "epoch": 0.05462351185945255, "grad_norm": 1.1091107416092967, "learning_rate": 4.9991911214691044e-06, "loss": 0.5244, "step": 897 }, { "epoch": 0.05468440763633042, "grad_norm": 1.052389843086847, "learning_rate": 4.999189090497399e-06, "loss": 0.5887, "step": 898 }, { "epoch": 0.054745303413208296, "grad_norm": 1.048474714029665, "learning_rate": 4.999187056979573e-06, "loss": 0.5791, "step": 899 }, { "epoch": 0.05480619919008617, "grad_norm": 0.9542346999397776, "learning_rate": 4.999185020915628e-06, "loss": 0.6018, "step": 900 }, { "epoch": 0.05486709496696404, "grad_norm": 0.9336475324504342, "learning_rate": 4.9991829823055636e-06, "loss": 0.6023, "step": 901 }, { "epoch": 0.054927990743841915, "grad_norm": 1.0040457828259335, "learning_rate": 4.999180941149384e-06, "loss": 0.6155, "step": 902 }, { "epoch": 0.05498888652071979, "grad_norm": 0.9858314904658784, "learning_rate": 4.9991788974470914e-06, "loss": 0.6287, "step": 903 }, { "epoch": 0.05504978229759766, "grad_norm": 0.9953340490288817, "learning_rate": 4.999176851198687e-06, "loss": 0.5054, "step": 904 }, { "epoch": 0.05511067807447553, "grad_norm": 0.9679008809017587, "learning_rate": 4.999174802404173e-06, "loss": 0.6151, "step": 905 }, { "epoch": 0.055171573851353406, "grad_norm": 0.9669218403930584, "learning_rate": 4.9991727510635515e-06, "loss": 0.5587, "step": 906 }, { "epoch": 0.05523246962823128, "grad_norm": 0.958223416813371, "learning_rate": 4.999170697176825e-06, "loss": 0.5733, "step": 907 }, { "epoch": 0.05529336540510916, "grad_norm": 0.9915322495021359, "learning_rate": 4.999168640743996e-06, "loss": 0.6855, "step": 908 }, { "epoch": 0.05535426118198703, "grad_norm": 0.9869147209236713, "learning_rate": 4.999166581765065e-06, "loss": 0.6708, "step": 909 }, { "epoch": 0.055415156958864904, "grad_norm": 1.001419197418264, "learning_rate": 4.999164520240035e-06, "loss": 0.5856, "step": 910 }, { "epoch": 0.05547605273574278, "grad_norm": 1.0238416578916918, "learning_rate": 4.999162456168909e-06, "loss": 0.5988, "step": 911 }, { "epoch": 0.05553694851262065, "grad_norm": 0.9880094228373512, "learning_rate": 4.9991603895516875e-06, "loss": 0.5768, "step": 912 }, { "epoch": 0.05559784428949852, "grad_norm": 1.0049574948458533, "learning_rate": 4.999158320388374e-06, "loss": 0.5654, "step": 913 }, { "epoch": 0.055658740066376396, "grad_norm": 0.9817767903549594, "learning_rate": 4.999156248678969e-06, "loss": 0.6162, "step": 914 }, { "epoch": 0.05571963584325427, "grad_norm": 1.0330008871990495, "learning_rate": 4.999154174423476e-06, "loss": 0.6172, "step": 915 }, { "epoch": 0.05578053162013214, "grad_norm": 0.9873130877929537, "learning_rate": 4.999152097621897e-06, "loss": 0.5678, "step": 916 }, { "epoch": 0.055841427397010014, "grad_norm": 1.0944351183975671, "learning_rate": 4.999150018274234e-06, "loss": 0.5687, "step": 917 }, { "epoch": 0.055902323173887894, "grad_norm": 0.9938616582301094, "learning_rate": 4.999147936380488e-06, "loss": 0.6131, "step": 918 }, { "epoch": 0.05596321895076577, "grad_norm": 0.9798743780188272, "learning_rate": 4.999145851940664e-06, "loss": 0.5556, "step": 919 }, { "epoch": 0.05602411472764364, "grad_norm": 0.9300771167017353, "learning_rate": 4.9991437649547595e-06, "loss": 0.5776, "step": 920 }, { "epoch": 0.05608501050452151, "grad_norm": 1.0847118771233735, "learning_rate": 4.99914167542278e-06, "loss": 0.548, "step": 921 }, { "epoch": 0.056145906281399385, "grad_norm": 0.9282673970660102, "learning_rate": 4.999139583344728e-06, "loss": 0.5492, "step": 922 }, { "epoch": 0.05620680205827726, "grad_norm": 0.9706297051493294, "learning_rate": 4.9991374887206046e-06, "loss": 0.5056, "step": 923 }, { "epoch": 0.05626769783515513, "grad_norm": 0.9066904517268061, "learning_rate": 4.999135391550411e-06, "loss": 0.5912, "step": 924 }, { "epoch": 0.056328593612033004, "grad_norm": 1.0242793581786516, "learning_rate": 4.99913329183415e-06, "loss": 0.5405, "step": 925 }, { "epoch": 0.05638948938891088, "grad_norm": 0.9012103028359335, "learning_rate": 4.999131189571825e-06, "loss": 0.571, "step": 926 }, { "epoch": 0.05645038516578875, "grad_norm": 0.9875609443521314, "learning_rate": 4.999129084763437e-06, "loss": 0.6046, "step": 927 }, { "epoch": 0.05651128094266663, "grad_norm": 1.1107652948365199, "learning_rate": 4.999126977408987e-06, "loss": 0.5697, "step": 928 }, { "epoch": 0.0565721767195445, "grad_norm": 0.9817597955960062, "learning_rate": 4.999124867508479e-06, "loss": 0.6138, "step": 929 }, { "epoch": 0.056633072496422375, "grad_norm": 0.958178297368871, "learning_rate": 4.999122755061915e-06, "loss": 0.5918, "step": 930 }, { "epoch": 0.05669396827330025, "grad_norm": 0.8915922827243015, "learning_rate": 4.999120640069297e-06, "loss": 0.5916, "step": 931 }, { "epoch": 0.05675486405017812, "grad_norm": 1.0819682663381285, "learning_rate": 4.999118522530626e-06, "loss": 0.6275, "step": 932 }, { "epoch": 0.056815759827055994, "grad_norm": 0.9612767679043457, "learning_rate": 4.999116402445905e-06, "loss": 0.6339, "step": 933 }, { "epoch": 0.056876655603933866, "grad_norm": 1.012949804153997, "learning_rate": 4.999114279815137e-06, "loss": 0.6007, "step": 934 }, { "epoch": 0.05693755138081174, "grad_norm": 0.9924375420962076, "learning_rate": 4.999112154638322e-06, "loss": 0.5566, "step": 935 }, { "epoch": 0.05699844715768961, "grad_norm": 0.9743777338841779, "learning_rate": 4.999110026915465e-06, "loss": 0.5552, "step": 936 }, { "epoch": 0.057059342934567485, "grad_norm": 1.04262078070277, "learning_rate": 4.9991078966465665e-06, "loss": 0.5471, "step": 937 }, { "epoch": 0.057120238711445365, "grad_norm": 1.1653106656095904, "learning_rate": 4.999105763831628e-06, "loss": 0.575, "step": 938 }, { "epoch": 0.05718113448832324, "grad_norm": 0.9610113750728128, "learning_rate": 4.999103628470653e-06, "loss": 0.5534, "step": 939 }, { "epoch": 0.05724203026520111, "grad_norm": 1.0845247767025128, "learning_rate": 4.999101490563642e-06, "loss": 0.6186, "step": 940 }, { "epoch": 0.05730292604207898, "grad_norm": 0.9450490153760482, "learning_rate": 4.9990993501106e-06, "loss": 0.5765, "step": 941 }, { "epoch": 0.057363821818956856, "grad_norm": 0.9389444131240168, "learning_rate": 4.999097207111527e-06, "loss": 0.5432, "step": 942 }, { "epoch": 0.05742471759583473, "grad_norm": 1.0247710972217723, "learning_rate": 4.999095061566426e-06, "loss": 0.5902, "step": 943 }, { "epoch": 0.0574856133727126, "grad_norm": 1.0023695737922138, "learning_rate": 4.999092913475298e-06, "loss": 0.5639, "step": 944 }, { "epoch": 0.057546509149590475, "grad_norm": 0.9459005809385052, "learning_rate": 4.999090762838147e-06, "loss": 0.6156, "step": 945 }, { "epoch": 0.05760740492646835, "grad_norm": 0.9201949013742049, "learning_rate": 4.999088609654973e-06, "loss": 0.5655, "step": 946 }, { "epoch": 0.05766830070334622, "grad_norm": 0.9783418905613359, "learning_rate": 4.999086453925781e-06, "loss": 0.5255, "step": 947 }, { "epoch": 0.05772919648022409, "grad_norm": 1.0413227822670452, "learning_rate": 4.999084295650572e-06, "loss": 0.6082, "step": 948 }, { "epoch": 0.05779009225710197, "grad_norm": 1.0100024236177638, "learning_rate": 4.999082134829346e-06, "loss": 0.5836, "step": 949 }, { "epoch": 0.057850988033979846, "grad_norm": 0.9883102240636047, "learning_rate": 4.999079971462108e-06, "loss": 0.5491, "step": 950 }, { "epoch": 0.05791188381085772, "grad_norm": 0.9952166904643946, "learning_rate": 4.9990778055488595e-06, "loss": 0.6341, "step": 951 }, { "epoch": 0.05797277958773559, "grad_norm": 0.9630369906433324, "learning_rate": 4.999075637089602e-06, "loss": 0.6206, "step": 952 }, { "epoch": 0.058033675364613464, "grad_norm": 0.9232431552310361, "learning_rate": 4.9990734660843385e-06, "loss": 0.5584, "step": 953 }, { "epoch": 0.05809457114149134, "grad_norm": 0.9656190323655853, "learning_rate": 4.999071292533071e-06, "loss": 0.5972, "step": 954 }, { "epoch": 0.05815546691836921, "grad_norm": 1.1141077254197984, "learning_rate": 4.999069116435802e-06, "loss": 0.5993, "step": 955 }, { "epoch": 0.05821636269524708, "grad_norm": 0.9578627542401189, "learning_rate": 4.999066937792533e-06, "loss": 0.597, "step": 956 }, { "epoch": 0.058277258472124956, "grad_norm": 1.0118483158015108, "learning_rate": 4.999064756603266e-06, "loss": 0.5723, "step": 957 }, { "epoch": 0.05833815424900283, "grad_norm": 0.9347829753802769, "learning_rate": 4.999062572868004e-06, "loss": 0.5311, "step": 958 }, { "epoch": 0.05839905002588071, "grad_norm": 0.9821849686024163, "learning_rate": 4.999060386586749e-06, "loss": 0.6159, "step": 959 }, { "epoch": 0.05845994580275858, "grad_norm": 0.9625982163690116, "learning_rate": 4.999058197759504e-06, "loss": 0.6315, "step": 960 }, { "epoch": 0.058520841579636454, "grad_norm": 0.985970204798722, "learning_rate": 4.99905600638627e-06, "loss": 0.5879, "step": 961 }, { "epoch": 0.05858173735651433, "grad_norm": 0.9040630502875485, "learning_rate": 4.99905381246705e-06, "loss": 0.5834, "step": 962 }, { "epoch": 0.0586426331333922, "grad_norm": 0.9529992540525056, "learning_rate": 4.999051616001845e-06, "loss": 0.5297, "step": 963 }, { "epoch": 0.05870352891027007, "grad_norm": 1.0177628538361756, "learning_rate": 4.999049416990659e-06, "loss": 0.5658, "step": 964 }, { "epoch": 0.058764424687147945, "grad_norm": 1.025677254273749, "learning_rate": 4.999047215433493e-06, "loss": 0.5683, "step": 965 }, { "epoch": 0.05882532046402582, "grad_norm": 0.9262938445100659, "learning_rate": 4.9990450113303494e-06, "loss": 0.6469, "step": 966 }, { "epoch": 0.05888621624090369, "grad_norm": 1.0101484030065253, "learning_rate": 4.999042804681231e-06, "loss": 0.5898, "step": 967 }, { "epoch": 0.058947112017781564, "grad_norm": 0.9656606109132535, "learning_rate": 4.99904059548614e-06, "loss": 0.6228, "step": 968 }, { "epoch": 0.059008007794659444, "grad_norm": 0.9758478490957241, "learning_rate": 4.999038383745078e-06, "loss": 0.6303, "step": 969 }, { "epoch": 0.059068903571537316, "grad_norm": 0.9809284115056828, "learning_rate": 4.9990361694580485e-06, "loss": 0.5532, "step": 970 }, { "epoch": 0.05912979934841519, "grad_norm": 0.9660620196980217, "learning_rate": 4.999033952625052e-06, "loss": 0.5676, "step": 971 }, { "epoch": 0.05919069512529306, "grad_norm": 0.982818115263906, "learning_rate": 4.999031733246092e-06, "loss": 0.5945, "step": 972 }, { "epoch": 0.059251590902170935, "grad_norm": 1.0238410769111408, "learning_rate": 4.99902951132117e-06, "loss": 0.6221, "step": 973 }, { "epoch": 0.05931248667904881, "grad_norm": 0.9452475731268106, "learning_rate": 4.99902728685029e-06, "loss": 0.5963, "step": 974 }, { "epoch": 0.05937338245592668, "grad_norm": 0.9353972099363475, "learning_rate": 4.999025059833451e-06, "loss": 0.5465, "step": 975 }, { "epoch": 0.059434278232804554, "grad_norm": 0.9613843024143783, "learning_rate": 4.999022830270659e-06, "loss": 0.6286, "step": 976 }, { "epoch": 0.059495174009682426, "grad_norm": 0.9850293263276708, "learning_rate": 4.999020598161913e-06, "loss": 0.6424, "step": 977 }, { "epoch": 0.0595560697865603, "grad_norm": 0.9243184469812022, "learning_rate": 4.9990183635072174e-06, "loss": 0.5962, "step": 978 }, { "epoch": 0.05961696556343818, "grad_norm": 1.016453642250704, "learning_rate": 4.9990161263065744e-06, "loss": 0.5646, "step": 979 }, { "epoch": 0.05967786134031605, "grad_norm": 1.035162138445804, "learning_rate": 4.999013886559986e-06, "loss": 0.6231, "step": 980 }, { "epoch": 0.059738757117193925, "grad_norm": 0.9630200059730052, "learning_rate": 4.999011644267453e-06, "loss": 0.6196, "step": 981 }, { "epoch": 0.0597996528940718, "grad_norm": 1.0448474251584412, "learning_rate": 4.999009399428979e-06, "loss": 0.5697, "step": 982 }, { "epoch": 0.05986054867094967, "grad_norm": 0.999981067244038, "learning_rate": 4.999007152044567e-06, "loss": 0.5868, "step": 983 }, { "epoch": 0.05992144444782754, "grad_norm": 0.9744796315018791, "learning_rate": 4.9990049021142174e-06, "loss": 0.5211, "step": 984 }, { "epoch": 0.059982340224705416, "grad_norm": 0.9276972091988175, "learning_rate": 4.999002649637935e-06, "loss": 0.6277, "step": 985 }, { "epoch": 0.06004323600158329, "grad_norm": 0.9113780601713034, "learning_rate": 4.9990003946157195e-06, "loss": 0.5607, "step": 986 }, { "epoch": 0.06010413177846116, "grad_norm": 0.9739931912062036, "learning_rate": 4.998998137047575e-06, "loss": 0.599, "step": 987 }, { "epoch": 0.060165027555339035, "grad_norm": 0.9668588210184933, "learning_rate": 4.998995876933503e-06, "loss": 0.6105, "step": 988 }, { "epoch": 0.06022592333221691, "grad_norm": 0.9332709377713672, "learning_rate": 4.998993614273505e-06, "loss": 0.4966, "step": 989 }, { "epoch": 0.06028681910909479, "grad_norm": 1.0368196759374475, "learning_rate": 4.998991349067585e-06, "loss": 0.5705, "step": 990 }, { "epoch": 0.06034771488597266, "grad_norm": 0.9974037908910551, "learning_rate": 4.998989081315745e-06, "loss": 0.5861, "step": 991 }, { "epoch": 0.06040861066285053, "grad_norm": 0.9573758346726627, "learning_rate": 4.998986811017986e-06, "loss": 0.5793, "step": 992 }, { "epoch": 0.060469506439728406, "grad_norm": 0.9953272158318682, "learning_rate": 4.998984538174313e-06, "loss": 0.6221, "step": 993 }, { "epoch": 0.06053040221660628, "grad_norm": 1.1149007331849212, "learning_rate": 4.998982262784725e-06, "loss": 0.6075, "step": 994 }, { "epoch": 0.06059129799348415, "grad_norm": 1.0312247660065077, "learning_rate": 4.998979984849226e-06, "loss": 0.58, "step": 995 }, { "epoch": 0.060652193770362024, "grad_norm": 1.0180439298478936, "learning_rate": 4.998977704367818e-06, "loss": 0.5572, "step": 996 }, { "epoch": 0.0607130895472399, "grad_norm": 1.1309483453352378, "learning_rate": 4.998975421340504e-06, "loss": 0.5389, "step": 997 }, { "epoch": 0.06077398532411777, "grad_norm": 0.9311858795363546, "learning_rate": 4.998973135767285e-06, "loss": 0.6189, "step": 998 }, { "epoch": 0.06083488110099564, "grad_norm": 1.0640609902010916, "learning_rate": 4.998970847648165e-06, "loss": 0.5642, "step": 999 }, { "epoch": 0.06089577687787352, "grad_norm": 0.988276046726626, "learning_rate": 4.998968556983145e-06, "loss": 0.5866, "step": 1000 }, { "epoch": 0.060956672654751395, "grad_norm": 0.9615023882318459, "learning_rate": 4.998966263772228e-06, "loss": 0.5252, "step": 1001 }, { "epoch": 0.06101756843162927, "grad_norm": 0.8900036389478199, "learning_rate": 4.998963968015416e-06, "loss": 0.5887, "step": 1002 }, { "epoch": 0.06107846420850714, "grad_norm": 0.8879068157206109, "learning_rate": 4.998961669712711e-06, "loss": 0.6085, "step": 1003 }, { "epoch": 0.061139359985385014, "grad_norm": 1.0825580494935745, "learning_rate": 4.998959368864117e-06, "loss": 0.5212, "step": 1004 }, { "epoch": 0.06120025576226289, "grad_norm": 1.0409385027245937, "learning_rate": 4.998957065469634e-06, "loss": 0.5686, "step": 1005 }, { "epoch": 0.06126115153914076, "grad_norm": 0.9492060764973137, "learning_rate": 4.998954759529265e-06, "loss": 0.5927, "step": 1006 }, { "epoch": 0.06132204731601863, "grad_norm": 1.048044810796853, "learning_rate": 4.998952451043014e-06, "loss": 0.5682, "step": 1007 }, { "epoch": 0.061382943092896505, "grad_norm": 0.995580997141499, "learning_rate": 4.998950140010882e-06, "loss": 0.5913, "step": 1008 }, { "epoch": 0.06144383886977438, "grad_norm": 0.9932319534639721, "learning_rate": 4.998947826432871e-06, "loss": 0.6104, "step": 1009 }, { "epoch": 0.06150473464665226, "grad_norm": 0.9556961596428207, "learning_rate": 4.998945510308985e-06, "loss": 0.6173, "step": 1010 }, { "epoch": 0.06156563042353013, "grad_norm": 1.0119047229177949, "learning_rate": 4.998943191639225e-06, "loss": 0.5565, "step": 1011 }, { "epoch": 0.061626526200408004, "grad_norm": 1.0398035401782373, "learning_rate": 4.9989408704235935e-06, "loss": 0.6354, "step": 1012 }, { "epoch": 0.061687421977285876, "grad_norm": 0.969674417666448, "learning_rate": 4.998938546662092e-06, "loss": 0.5306, "step": 1013 }, { "epoch": 0.06174831775416375, "grad_norm": 1.0063879523780435, "learning_rate": 4.998936220354726e-06, "loss": 0.5818, "step": 1014 }, { "epoch": 0.06180921353104162, "grad_norm": 0.8904749085346283, "learning_rate": 4.998933891501493e-06, "loss": 0.601, "step": 1015 }, { "epoch": 0.061870109307919495, "grad_norm": 1.013446326427147, "learning_rate": 4.9989315601024e-06, "loss": 0.5847, "step": 1016 }, { "epoch": 0.06193100508479737, "grad_norm": 0.9571432349215787, "learning_rate": 4.998929226157447e-06, "loss": 0.5635, "step": 1017 }, { "epoch": 0.06199190086167524, "grad_norm": 1.050420964096867, "learning_rate": 4.998926889666636e-06, "loss": 0.5378, "step": 1018 }, { "epoch": 0.062052796638553114, "grad_norm": 1.0986099977093682, "learning_rate": 4.998924550629972e-06, "loss": 0.5794, "step": 1019 }, { "epoch": 0.06211369241543099, "grad_norm": 0.9694436907463465, "learning_rate": 4.998922209047454e-06, "loss": 0.5515, "step": 1020 }, { "epoch": 0.062174588192308866, "grad_norm": 0.9709300574970176, "learning_rate": 4.998919864919087e-06, "loss": 0.594, "step": 1021 }, { "epoch": 0.06223548396918674, "grad_norm": 0.9760997300259705, "learning_rate": 4.998917518244872e-06, "loss": 0.5374, "step": 1022 }, { "epoch": 0.06229637974606461, "grad_norm": 1.0520038812715131, "learning_rate": 4.998915169024812e-06, "loss": 0.571, "step": 1023 }, { "epoch": 0.062357275522942485, "grad_norm": 0.9633809633416157, "learning_rate": 4.998912817258909e-06, "loss": 0.5713, "step": 1024 }, { "epoch": 0.06241817129982036, "grad_norm": 1.0732327876368675, "learning_rate": 4.9989104629471655e-06, "loss": 0.6511, "step": 1025 }, { "epoch": 0.06247906707669823, "grad_norm": 1.0379288026350502, "learning_rate": 4.998908106089585e-06, "loss": 0.5848, "step": 1026 }, { "epoch": 0.0625399628535761, "grad_norm": 0.9588500683068157, "learning_rate": 4.998905746686167e-06, "loss": 0.5315, "step": 1027 }, { "epoch": 0.06260085863045398, "grad_norm": 0.9222342926626905, "learning_rate": 4.998903384736917e-06, "loss": 0.6112, "step": 1028 }, { "epoch": 0.06266175440733185, "grad_norm": 1.0003480541846217, "learning_rate": 4.998901020241837e-06, "loss": 0.5928, "step": 1029 }, { "epoch": 0.06272265018420972, "grad_norm": 1.0040783299477993, "learning_rate": 4.998898653200926e-06, "loss": 0.6068, "step": 1030 }, { "epoch": 0.0627835459610876, "grad_norm": 0.9873909827023611, "learning_rate": 4.998896283614191e-06, "loss": 0.595, "step": 1031 }, { "epoch": 0.06284444173796547, "grad_norm": 0.9913778116649314, "learning_rate": 4.998893911481632e-06, "loss": 0.6035, "step": 1032 }, { "epoch": 0.06290533751484334, "grad_norm": 1.0191250464875845, "learning_rate": 4.998891536803252e-06, "loss": 0.5622, "step": 1033 }, { "epoch": 0.06296623329172121, "grad_norm": 1.0339865024020396, "learning_rate": 4.998889159579054e-06, "loss": 0.5802, "step": 1034 }, { "epoch": 0.06302712906859909, "grad_norm": 0.9624897366021908, "learning_rate": 4.998886779809038e-06, "loss": 0.5951, "step": 1035 }, { "epoch": 0.06308802484547697, "grad_norm": 1.0082980554949585, "learning_rate": 4.998884397493209e-06, "loss": 0.5695, "step": 1036 }, { "epoch": 0.06314892062235485, "grad_norm": 0.9835347177182159, "learning_rate": 4.998882012631568e-06, "loss": 0.5524, "step": 1037 }, { "epoch": 0.06320981639923272, "grad_norm": 0.9228106346969007, "learning_rate": 4.998879625224119e-06, "loss": 0.5672, "step": 1038 }, { "epoch": 0.06327071217611059, "grad_norm": 0.971412673785772, "learning_rate": 4.998877235270862e-06, "loss": 0.5783, "step": 1039 }, { "epoch": 0.06333160795298846, "grad_norm": 0.9759853950004008, "learning_rate": 4.998874842771802e-06, "loss": 0.5585, "step": 1040 }, { "epoch": 0.06339250372986634, "grad_norm": 0.9295364897139666, "learning_rate": 4.998872447726939e-06, "loss": 0.5504, "step": 1041 }, { "epoch": 0.06345339950674421, "grad_norm": 1.0651623088783224, "learning_rate": 4.9988700501362775e-06, "loss": 0.5283, "step": 1042 }, { "epoch": 0.06351429528362208, "grad_norm": 0.985476450472393, "learning_rate": 4.9988676499998194e-06, "loss": 0.5524, "step": 1043 }, { "epoch": 0.06357519106049996, "grad_norm": 1.026868734688713, "learning_rate": 4.9988652473175666e-06, "loss": 0.5381, "step": 1044 }, { "epoch": 0.06363608683737783, "grad_norm": 1.0022146676368695, "learning_rate": 4.998862842089522e-06, "loss": 0.5647, "step": 1045 }, { "epoch": 0.0636969826142557, "grad_norm": 1.0025426596597533, "learning_rate": 4.9988604343156874e-06, "loss": 0.5777, "step": 1046 }, { "epoch": 0.06375787839113357, "grad_norm": 0.9672946445464287, "learning_rate": 4.998858023996066e-06, "loss": 0.5916, "step": 1047 }, { "epoch": 0.06381877416801145, "grad_norm": 0.997312534248901, "learning_rate": 4.9988556111306605e-06, "loss": 0.5679, "step": 1048 }, { "epoch": 0.06387966994488932, "grad_norm": 1.0401262671804936, "learning_rate": 4.998853195719473e-06, "loss": 0.5844, "step": 1049 }, { "epoch": 0.06394056572176719, "grad_norm": 0.9723278895475567, "learning_rate": 4.998850777762505e-06, "loss": 0.6037, "step": 1050 }, { "epoch": 0.06400146149864507, "grad_norm": 0.9756063273709292, "learning_rate": 4.99884835725976e-06, "loss": 0.5945, "step": 1051 }, { "epoch": 0.06406235727552294, "grad_norm": 0.888221745643605, "learning_rate": 4.99884593421124e-06, "loss": 0.571, "step": 1052 }, { "epoch": 0.06412325305240081, "grad_norm": 1.0378789144161147, "learning_rate": 4.9988435086169485e-06, "loss": 0.642, "step": 1053 }, { "epoch": 0.06418414882927868, "grad_norm": 0.9559129206021948, "learning_rate": 4.998841080476886e-06, "loss": 0.6319, "step": 1054 }, { "epoch": 0.06424504460615656, "grad_norm": 0.993884959586545, "learning_rate": 4.998838649791057e-06, "loss": 0.5407, "step": 1055 }, { "epoch": 0.06430594038303444, "grad_norm": 0.9773463166639096, "learning_rate": 4.998836216559463e-06, "loss": 0.5693, "step": 1056 }, { "epoch": 0.06436683615991232, "grad_norm": 0.9700417314258541, "learning_rate": 4.998833780782107e-06, "loss": 0.5381, "step": 1057 }, { "epoch": 0.06442773193679019, "grad_norm": 0.9854544687656484, "learning_rate": 4.998831342458991e-06, "loss": 0.5076, "step": 1058 }, { "epoch": 0.06448862771366806, "grad_norm": 0.9601528857106746, "learning_rate": 4.998828901590117e-06, "loss": 0.489, "step": 1059 }, { "epoch": 0.06454952349054593, "grad_norm": 1.01240951155444, "learning_rate": 4.998826458175489e-06, "loss": 0.5273, "step": 1060 }, { "epoch": 0.06461041926742381, "grad_norm": 0.9719382259073027, "learning_rate": 4.998824012215108e-06, "loss": 0.6162, "step": 1061 }, { "epoch": 0.06467131504430168, "grad_norm": 0.9859552643660942, "learning_rate": 4.998821563708977e-06, "loss": 0.5111, "step": 1062 }, { "epoch": 0.06473221082117955, "grad_norm": 0.9889614016627707, "learning_rate": 4.998819112657098e-06, "loss": 0.5596, "step": 1063 }, { "epoch": 0.06479310659805743, "grad_norm": 0.9131786556684204, "learning_rate": 4.998816659059474e-06, "loss": 0.5755, "step": 1064 }, { "epoch": 0.0648540023749353, "grad_norm": 1.0471731208773147, "learning_rate": 4.9988142029161084e-06, "loss": 0.6015, "step": 1065 }, { "epoch": 0.06491489815181317, "grad_norm": 1.0916177345609486, "learning_rate": 4.998811744227002e-06, "loss": 0.5618, "step": 1066 }, { "epoch": 0.06497579392869104, "grad_norm": 1.030900803175794, "learning_rate": 4.998809282992159e-06, "loss": 0.5473, "step": 1067 }, { "epoch": 0.06503668970556892, "grad_norm": 1.012988665683088, "learning_rate": 4.998806819211581e-06, "loss": 0.5556, "step": 1068 }, { "epoch": 0.06509758548244679, "grad_norm": 1.0308606890686238, "learning_rate": 4.9988043528852706e-06, "loss": 0.6032, "step": 1069 }, { "epoch": 0.06515848125932466, "grad_norm": 1.0003979070817486, "learning_rate": 4.99880188401323e-06, "loss": 0.5508, "step": 1070 }, { "epoch": 0.06521937703620254, "grad_norm": 1.005335430255451, "learning_rate": 4.998799412595462e-06, "loss": 0.5265, "step": 1071 }, { "epoch": 0.06528027281308041, "grad_norm": 1.0130087494584408, "learning_rate": 4.998796938631969e-06, "loss": 0.5857, "step": 1072 }, { "epoch": 0.06534116858995828, "grad_norm": 1.0489067398765395, "learning_rate": 4.998794462122754e-06, "loss": 0.5952, "step": 1073 }, { "epoch": 0.06540206436683615, "grad_norm": 0.9366542245563985, "learning_rate": 4.998791983067818e-06, "loss": 0.6004, "step": 1074 }, { "epoch": 0.06546296014371403, "grad_norm": 0.9970823873553314, "learning_rate": 4.998789501467166e-06, "loss": 0.6117, "step": 1075 }, { "epoch": 0.0655238559205919, "grad_norm": 0.9471243141965684, "learning_rate": 4.998787017320799e-06, "loss": 0.5644, "step": 1076 }, { "epoch": 0.06558475169746979, "grad_norm": 1.0224714250757294, "learning_rate": 4.998784530628719e-06, "loss": 0.5302, "step": 1077 }, { "epoch": 0.06564564747434766, "grad_norm": 1.0089448369346707, "learning_rate": 4.99878204139093e-06, "loss": 0.595, "step": 1078 }, { "epoch": 0.06570654325122553, "grad_norm": 0.9755328186549233, "learning_rate": 4.998779549607433e-06, "loss": 0.5275, "step": 1079 }, { "epoch": 0.0657674390281034, "grad_norm": 1.0926095520808843, "learning_rate": 4.998777055278232e-06, "loss": 0.4772, "step": 1080 }, { "epoch": 0.06582833480498128, "grad_norm": 1.0285112897402637, "learning_rate": 4.998774558403329e-06, "loss": 0.5178, "step": 1081 }, { "epoch": 0.06588923058185915, "grad_norm": 1.0475680607819775, "learning_rate": 4.998772058982726e-06, "loss": 0.5743, "step": 1082 }, { "epoch": 0.06595012635873702, "grad_norm": 1.0076704965987107, "learning_rate": 4.998769557016426e-06, "loss": 0.5392, "step": 1083 }, { "epoch": 0.0660110221356149, "grad_norm": 0.9213828584482909, "learning_rate": 4.998767052504432e-06, "loss": 0.552, "step": 1084 }, { "epoch": 0.06607191791249277, "grad_norm": 0.9507968291714667, "learning_rate": 4.998764545446746e-06, "loss": 0.5786, "step": 1085 }, { "epoch": 0.06613281368937064, "grad_norm": 1.046112233826315, "learning_rate": 4.998762035843371e-06, "loss": 0.515, "step": 1086 }, { "epoch": 0.06619370946624852, "grad_norm": 0.9558113761476924, "learning_rate": 4.998759523694308e-06, "loss": 0.5409, "step": 1087 }, { "epoch": 0.06625460524312639, "grad_norm": 1.0625338775837616, "learning_rate": 4.998757008999562e-06, "loss": 0.6052, "step": 1088 }, { "epoch": 0.06631550102000426, "grad_norm": 1.0885209724630815, "learning_rate": 4.9987544917591335e-06, "loss": 0.5394, "step": 1089 }, { "epoch": 0.06637639679688213, "grad_norm": 1.1523947457763393, "learning_rate": 4.9987519719730256e-06, "loss": 0.5526, "step": 1090 }, { "epoch": 0.06643729257376, "grad_norm": 1.0455296794212945, "learning_rate": 4.9987494496412414e-06, "loss": 0.493, "step": 1091 }, { "epoch": 0.06649818835063788, "grad_norm": 0.9206695246454, "learning_rate": 4.998746924763784e-06, "loss": 0.5513, "step": 1092 }, { "epoch": 0.06655908412751575, "grad_norm": 1.0688938348338488, "learning_rate": 4.998744397340655e-06, "loss": 0.6174, "step": 1093 }, { "epoch": 0.06661997990439363, "grad_norm": 0.9455674405267943, "learning_rate": 4.9987418673718555e-06, "loss": 0.5863, "step": 1094 }, { "epoch": 0.0666808756812715, "grad_norm": 0.9582849901383308, "learning_rate": 4.998739334857391e-06, "loss": 0.5919, "step": 1095 }, { "epoch": 0.06674177145814937, "grad_norm": 0.9712541828912208, "learning_rate": 4.998736799797263e-06, "loss": 0.5764, "step": 1096 }, { "epoch": 0.06680266723502726, "grad_norm": 1.177735915768093, "learning_rate": 4.998734262191474e-06, "loss": 0.6355, "step": 1097 }, { "epoch": 0.06686356301190513, "grad_norm": 0.9639192610879905, "learning_rate": 4.998731722040026e-06, "loss": 0.5663, "step": 1098 }, { "epoch": 0.066924458788783, "grad_norm": 1.0868618625818645, "learning_rate": 4.998729179342922e-06, "loss": 0.563, "step": 1099 }, { "epoch": 0.06698535456566088, "grad_norm": 0.9668223644606615, "learning_rate": 4.998726634100166e-06, "loss": 0.5946, "step": 1100 }, { "epoch": 0.06704625034253875, "grad_norm": 1.0361331978325228, "learning_rate": 4.998724086311758e-06, "loss": 0.5771, "step": 1101 }, { "epoch": 0.06710714611941662, "grad_norm": 1.0136516205811252, "learning_rate": 4.998721535977702e-06, "loss": 0.5439, "step": 1102 }, { "epoch": 0.0671680418962945, "grad_norm": 0.9693332166873624, "learning_rate": 4.998718983098e-06, "loss": 0.5792, "step": 1103 }, { "epoch": 0.06722893767317237, "grad_norm": 0.966619422324193, "learning_rate": 4.998716427672656e-06, "loss": 0.5242, "step": 1104 }, { "epoch": 0.06728983345005024, "grad_norm": 0.981217783878956, "learning_rate": 4.998713869701671e-06, "loss": 0.5923, "step": 1105 }, { "epoch": 0.06735072922692811, "grad_norm": 1.0037972478721944, "learning_rate": 4.998711309185048e-06, "loss": 0.5851, "step": 1106 }, { "epoch": 0.06741162500380599, "grad_norm": 1.0188697580014867, "learning_rate": 4.99870874612279e-06, "loss": 0.6279, "step": 1107 }, { "epoch": 0.06747252078068386, "grad_norm": 1.006247925677007, "learning_rate": 4.9987061805149e-06, "loss": 0.6215, "step": 1108 }, { "epoch": 0.06753341655756173, "grad_norm": 1.100633696684614, "learning_rate": 4.99870361236138e-06, "loss": 0.5952, "step": 1109 }, { "epoch": 0.0675943123344396, "grad_norm": 0.914982456389676, "learning_rate": 4.998701041662233e-06, "loss": 0.5904, "step": 1110 }, { "epoch": 0.06765520811131748, "grad_norm": 0.9866660367589831, "learning_rate": 4.998698468417461e-06, "loss": 0.6209, "step": 1111 }, { "epoch": 0.06771610388819535, "grad_norm": 0.9287597465851741, "learning_rate": 4.998695892627067e-06, "loss": 0.5828, "step": 1112 }, { "epoch": 0.06777699966507322, "grad_norm": 1.1003454841382385, "learning_rate": 4.9986933142910534e-06, "loss": 0.6235, "step": 1113 }, { "epoch": 0.0678378954419511, "grad_norm": 0.9922321887365004, "learning_rate": 4.998690733409423e-06, "loss": 0.583, "step": 1114 }, { "epoch": 0.06789879121882897, "grad_norm": 0.9635948284733309, "learning_rate": 4.998688149982178e-06, "loss": 0.6688, "step": 1115 }, { "epoch": 0.06795968699570684, "grad_norm": 1.0795566658264677, "learning_rate": 4.998685564009322e-06, "loss": 0.5719, "step": 1116 }, { "epoch": 0.06802058277258471, "grad_norm": 1.0650639306623544, "learning_rate": 4.998682975490857e-06, "loss": 0.5265, "step": 1117 }, { "epoch": 0.0680814785494626, "grad_norm": 0.9957836849040824, "learning_rate": 4.998680384426786e-06, "loss": 0.5882, "step": 1118 }, { "epoch": 0.06814237432634047, "grad_norm": 1.0266044766736049, "learning_rate": 4.998677790817112e-06, "loss": 0.5541, "step": 1119 }, { "epoch": 0.06820327010321835, "grad_norm": 0.9802192274852616, "learning_rate": 4.998675194661835e-06, "loss": 0.6428, "step": 1120 }, { "epoch": 0.06826416588009622, "grad_norm": 1.005006294103443, "learning_rate": 4.998672595960961e-06, "loss": 0.5455, "step": 1121 }, { "epoch": 0.06832506165697409, "grad_norm": 1.0183542431614239, "learning_rate": 4.998669994714491e-06, "loss": 0.5353, "step": 1122 }, { "epoch": 0.06838595743385197, "grad_norm": 0.9770477351209569, "learning_rate": 4.998667390922428e-06, "loss": 0.5631, "step": 1123 }, { "epoch": 0.06844685321072984, "grad_norm": 1.112643019779469, "learning_rate": 4.998664784584775e-06, "loss": 0.5962, "step": 1124 }, { "epoch": 0.06850774898760771, "grad_norm": 0.9973371092242096, "learning_rate": 4.9986621757015345e-06, "loss": 0.5548, "step": 1125 }, { "epoch": 0.06856864476448558, "grad_norm": 0.9877069245089684, "learning_rate": 4.998659564272708e-06, "loss": 0.5718, "step": 1126 }, { "epoch": 0.06862954054136346, "grad_norm": 1.0047249413504298, "learning_rate": 4.9986569502983e-06, "loss": 0.5764, "step": 1127 }, { "epoch": 0.06869043631824133, "grad_norm": 0.9854046285369198, "learning_rate": 4.998654333778311e-06, "loss": 0.5669, "step": 1128 }, { "epoch": 0.0687513320951192, "grad_norm": 1.0365847617905175, "learning_rate": 4.998651714712745e-06, "loss": 0.6734, "step": 1129 }, { "epoch": 0.06881222787199708, "grad_norm": 0.9608209747177409, "learning_rate": 4.998649093101606e-06, "loss": 0.6245, "step": 1130 }, { "epoch": 0.06887312364887495, "grad_norm": 1.1286156592036898, "learning_rate": 4.998646468944894e-06, "loss": 0.4927, "step": 1131 }, { "epoch": 0.06893401942575282, "grad_norm": 0.9993360210773147, "learning_rate": 4.998643842242613e-06, "loss": 0.4652, "step": 1132 }, { "epoch": 0.0689949152026307, "grad_norm": 1.0340139170152896, "learning_rate": 4.998641212994767e-06, "loss": 0.5908, "step": 1133 }, { "epoch": 0.06905581097950857, "grad_norm": 1.027267392846702, "learning_rate": 4.998638581201356e-06, "loss": 0.531, "step": 1134 }, { "epoch": 0.06911670675638644, "grad_norm": 1.0371900217491317, "learning_rate": 4.998635946862384e-06, "loss": 0.4941, "step": 1135 }, { "epoch": 0.06917760253326431, "grad_norm": 1.100154467361626, "learning_rate": 4.998633309977854e-06, "loss": 0.616, "step": 1136 }, { "epoch": 0.06923849831014219, "grad_norm": 0.9698016221045916, "learning_rate": 4.998630670547768e-06, "loss": 0.5311, "step": 1137 }, { "epoch": 0.06929939408702007, "grad_norm": 1.0121844959032837, "learning_rate": 4.99862802857213e-06, "loss": 0.5307, "step": 1138 }, { "epoch": 0.06936028986389794, "grad_norm": 1.0171566608231233, "learning_rate": 4.99862538405094e-06, "loss": 0.5988, "step": 1139 }, { "epoch": 0.06942118564077582, "grad_norm": 1.0969127201630895, "learning_rate": 4.998622736984204e-06, "loss": 0.5519, "step": 1140 }, { "epoch": 0.06948208141765369, "grad_norm": 0.973346524939584, "learning_rate": 4.9986200873719224e-06, "loss": 0.5962, "step": 1141 }, { "epoch": 0.06954297719453156, "grad_norm": 0.9374202928308082, "learning_rate": 4.998617435214098e-06, "loss": 0.6045, "step": 1142 }, { "epoch": 0.06960387297140944, "grad_norm": 0.9877600934536377, "learning_rate": 4.998614780510735e-06, "loss": 0.5501, "step": 1143 }, { "epoch": 0.06966476874828731, "grad_norm": 0.9281263549960233, "learning_rate": 4.998612123261835e-06, "loss": 0.6014, "step": 1144 }, { "epoch": 0.06972566452516518, "grad_norm": 1.0244116103899692, "learning_rate": 4.998609463467401e-06, "loss": 0.5912, "step": 1145 }, { "epoch": 0.06978656030204305, "grad_norm": 0.9783845150248924, "learning_rate": 4.9986068011274356e-06, "loss": 0.6021, "step": 1146 }, { "epoch": 0.06984745607892093, "grad_norm": 1.0314553182330384, "learning_rate": 4.998604136241941e-06, "loss": 0.5593, "step": 1147 }, { "epoch": 0.0699083518557988, "grad_norm": 1.0617005258732215, "learning_rate": 4.998601468810921e-06, "loss": 0.6167, "step": 1148 }, { "epoch": 0.06996924763267667, "grad_norm": 0.9787644319417639, "learning_rate": 4.998598798834377e-06, "loss": 0.5635, "step": 1149 }, { "epoch": 0.07003014340955455, "grad_norm": 1.0315274561255217, "learning_rate": 4.998596126312314e-06, "loss": 0.576, "step": 1150 }, { "epoch": 0.07009103918643242, "grad_norm": 0.9973259441290757, "learning_rate": 4.998593451244732e-06, "loss": 0.4807, "step": 1151 }, { "epoch": 0.07015193496331029, "grad_norm": 0.9447770460077095, "learning_rate": 4.998590773631636e-06, "loss": 0.5238, "step": 1152 }, { "epoch": 0.07021283074018816, "grad_norm": 1.051885070012112, "learning_rate": 4.998588093473027e-06, "loss": 0.5496, "step": 1153 }, { "epoch": 0.07027372651706604, "grad_norm": 1.0492274723426336, "learning_rate": 4.998585410768908e-06, "loss": 0.5803, "step": 1154 }, { "epoch": 0.07033462229394391, "grad_norm": 1.022272407552834, "learning_rate": 4.998582725519283e-06, "loss": 0.6862, "step": 1155 }, { "epoch": 0.07039551807082178, "grad_norm": 1.0197645810543696, "learning_rate": 4.998580037724153e-06, "loss": 0.5733, "step": 1156 }, { "epoch": 0.07045641384769966, "grad_norm": 1.044850155500971, "learning_rate": 4.998577347383522e-06, "loss": 0.5856, "step": 1157 }, { "epoch": 0.07051730962457753, "grad_norm": 1.019892171857956, "learning_rate": 4.998574654497393e-06, "loss": 0.5448, "step": 1158 }, { "epoch": 0.07057820540145542, "grad_norm": 0.9660994332985852, "learning_rate": 4.9985719590657665e-06, "loss": 0.5697, "step": 1159 }, { "epoch": 0.07063910117833329, "grad_norm": 1.024350506751212, "learning_rate": 4.998569261088648e-06, "loss": 0.5383, "step": 1160 }, { "epoch": 0.07069999695521116, "grad_norm": 1.0020588073495769, "learning_rate": 4.998566560566039e-06, "loss": 0.5544, "step": 1161 }, { "epoch": 0.07076089273208903, "grad_norm": 1.0209602207347863, "learning_rate": 4.998563857497942e-06, "loss": 0.5386, "step": 1162 }, { "epoch": 0.0708217885089669, "grad_norm": 1.0574792912342732, "learning_rate": 4.99856115188436e-06, "loss": 0.5111, "step": 1163 }, { "epoch": 0.07088268428584478, "grad_norm": 1.0086509144332119, "learning_rate": 4.9985584437252965e-06, "loss": 0.5755, "step": 1164 }, { "epoch": 0.07094358006272265, "grad_norm": 0.9884474184798417, "learning_rate": 4.998555733020753e-06, "loss": 0.5677, "step": 1165 }, { "epoch": 0.07100447583960053, "grad_norm": 1.0071668429949763, "learning_rate": 4.998553019770733e-06, "loss": 0.5585, "step": 1166 }, { "epoch": 0.0710653716164784, "grad_norm": 0.9048342841336526, "learning_rate": 4.9985503039752396e-06, "loss": 0.6033, "step": 1167 }, { "epoch": 0.07112626739335627, "grad_norm": 1.060020860396882, "learning_rate": 4.998547585634275e-06, "loss": 0.5771, "step": 1168 }, { "epoch": 0.07118716317023414, "grad_norm": 0.9150263863933339, "learning_rate": 4.998544864747841e-06, "loss": 0.5861, "step": 1169 }, { "epoch": 0.07124805894711202, "grad_norm": 0.9986598364460237, "learning_rate": 4.998542141315942e-06, "loss": 0.5622, "step": 1170 }, { "epoch": 0.07130895472398989, "grad_norm": 1.0845580988350936, "learning_rate": 4.998539415338581e-06, "loss": 0.566, "step": 1171 }, { "epoch": 0.07136985050086776, "grad_norm": 1.0985118493485801, "learning_rate": 4.99853668681576e-06, "loss": 0.525, "step": 1172 }, { "epoch": 0.07143074627774564, "grad_norm": 1.0170281646705763, "learning_rate": 4.9985339557474804e-06, "loss": 0.5485, "step": 1173 }, { "epoch": 0.07149164205462351, "grad_norm": 0.9479186025390727, "learning_rate": 4.998531222133747e-06, "loss": 0.5503, "step": 1174 }, { "epoch": 0.07155253783150138, "grad_norm": 0.9477571767316635, "learning_rate": 4.998528485974562e-06, "loss": 0.5829, "step": 1175 }, { "epoch": 0.07161343360837925, "grad_norm": 1.0029526340235162, "learning_rate": 4.998525747269928e-06, "loss": 0.5529, "step": 1176 }, { "epoch": 0.07167432938525713, "grad_norm": 1.003240813093453, "learning_rate": 4.998523006019849e-06, "loss": 0.5326, "step": 1177 }, { "epoch": 0.071735225162135, "grad_norm": 1.0420448572655625, "learning_rate": 4.998520262224325e-06, "loss": 0.6129, "step": 1178 }, { "epoch": 0.07179612093901289, "grad_norm": 0.951074061795172, "learning_rate": 4.998517515883361e-06, "loss": 0.5948, "step": 1179 }, { "epoch": 0.07185701671589076, "grad_norm": 1.0552976507732788, "learning_rate": 4.99851476699696e-06, "loss": 0.549, "step": 1180 }, { "epoch": 0.07191791249276863, "grad_norm": 0.9669334924908903, "learning_rate": 4.998512015565124e-06, "loss": 0.6119, "step": 1181 }, { "epoch": 0.0719788082696465, "grad_norm": 1.0720507105725434, "learning_rate": 4.998509261587855e-06, "loss": 0.569, "step": 1182 }, { "epoch": 0.07203970404652438, "grad_norm": 1.0331390123016844, "learning_rate": 4.998506505065158e-06, "loss": 0.5765, "step": 1183 }, { "epoch": 0.07210059982340225, "grad_norm": 1.0390285014564644, "learning_rate": 4.998503745997034e-06, "loss": 0.576, "step": 1184 }, { "epoch": 0.07216149560028012, "grad_norm": 1.0016893206957649, "learning_rate": 4.9985009843834855e-06, "loss": 0.5732, "step": 1185 }, { "epoch": 0.072222391377158, "grad_norm": 1.0301703449090118, "learning_rate": 4.998498220224517e-06, "loss": 0.5929, "step": 1186 }, { "epoch": 0.07228328715403587, "grad_norm": 1.0428638959350198, "learning_rate": 4.99849545352013e-06, "loss": 0.5876, "step": 1187 }, { "epoch": 0.07234418293091374, "grad_norm": 0.9563407693078828, "learning_rate": 4.998492684270329e-06, "loss": 0.546, "step": 1188 }, { "epoch": 0.07240507870779161, "grad_norm": 0.9543710589373997, "learning_rate": 4.998489912475114e-06, "loss": 0.5097, "step": 1189 }, { "epoch": 0.07246597448466949, "grad_norm": 1.0140143672391557, "learning_rate": 4.9984871381344904e-06, "loss": 0.6019, "step": 1190 }, { "epoch": 0.07252687026154736, "grad_norm": 0.9893222583812554, "learning_rate": 4.998484361248459e-06, "loss": 0.5785, "step": 1191 }, { "epoch": 0.07258776603842523, "grad_norm": 0.9634497394159248, "learning_rate": 4.998481581817025e-06, "loss": 0.602, "step": 1192 }, { "epoch": 0.0726486618153031, "grad_norm": 1.0277842359613598, "learning_rate": 4.998478799840189e-06, "loss": 0.6183, "step": 1193 }, { "epoch": 0.07270955759218098, "grad_norm": 1.0561216853444582, "learning_rate": 4.998476015317955e-06, "loss": 0.5511, "step": 1194 }, { "epoch": 0.07277045336905885, "grad_norm": 1.0315341492319363, "learning_rate": 4.998473228250326e-06, "loss": 0.6078, "step": 1195 }, { "epoch": 0.07283134914593672, "grad_norm": 0.9859338612643538, "learning_rate": 4.9984704386373036e-06, "loss": 0.5651, "step": 1196 }, { "epoch": 0.0728922449228146, "grad_norm": 0.941229271235631, "learning_rate": 4.998467646478892e-06, "loss": 0.586, "step": 1197 }, { "epoch": 0.07295314069969247, "grad_norm": 1.0112568353857272, "learning_rate": 4.998464851775094e-06, "loss": 0.549, "step": 1198 }, { "epoch": 0.07301403647657034, "grad_norm": 0.9482523307783411, "learning_rate": 4.998462054525911e-06, "loss": 0.5913, "step": 1199 }, { "epoch": 0.07307493225344823, "grad_norm": 0.997554836572336, "learning_rate": 4.998459254731347e-06, "loss": 0.5619, "step": 1200 }, { "epoch": 0.0731358280303261, "grad_norm": 0.934055213659878, "learning_rate": 4.998456452391405e-06, "loss": 0.5249, "step": 1201 }, { "epoch": 0.07319672380720398, "grad_norm": 1.1067300588073856, "learning_rate": 4.9984536475060875e-06, "loss": 0.5643, "step": 1202 }, { "epoch": 0.07325761958408185, "grad_norm": 0.9987728682007665, "learning_rate": 4.998450840075397e-06, "loss": 0.5538, "step": 1203 }, { "epoch": 0.07331851536095972, "grad_norm": 1.0118252132019658, "learning_rate": 4.998448030099337e-06, "loss": 0.6228, "step": 1204 }, { "epoch": 0.0733794111378376, "grad_norm": 1.109523566495688, "learning_rate": 4.9984452175779106e-06, "loss": 0.5644, "step": 1205 }, { "epoch": 0.07344030691471547, "grad_norm": 1.0541191693553016, "learning_rate": 4.998442402511119e-06, "loss": 0.5732, "step": 1206 }, { "epoch": 0.07350120269159334, "grad_norm": 0.9607233597704039, "learning_rate": 4.998439584898967e-06, "loss": 0.5159, "step": 1207 }, { "epoch": 0.07356209846847121, "grad_norm": 1.0373898907294488, "learning_rate": 4.9984367647414565e-06, "loss": 0.5299, "step": 1208 }, { "epoch": 0.07362299424534909, "grad_norm": 1.0366548524284496, "learning_rate": 4.998433942038591e-06, "loss": 0.5203, "step": 1209 }, { "epoch": 0.07368389002222696, "grad_norm": 0.8999612007801143, "learning_rate": 4.9984311167903725e-06, "loss": 0.6097, "step": 1210 }, { "epoch": 0.07374478579910483, "grad_norm": 1.104227513084385, "learning_rate": 4.998428288996804e-06, "loss": 0.5787, "step": 1211 }, { "epoch": 0.0738056815759827, "grad_norm": 1.0730472465942713, "learning_rate": 4.998425458657889e-06, "loss": 0.5192, "step": 1212 }, { "epoch": 0.07386657735286058, "grad_norm": 0.9601544706070597, "learning_rate": 4.9984226257736304e-06, "loss": 0.5838, "step": 1213 }, { "epoch": 0.07392747312973845, "grad_norm": 1.0139278066905026, "learning_rate": 4.99841979034403e-06, "loss": 0.6546, "step": 1214 }, { "epoch": 0.07398836890661632, "grad_norm": 1.0700496891402338, "learning_rate": 4.998416952369092e-06, "loss": 0.5738, "step": 1215 }, { "epoch": 0.0740492646834942, "grad_norm": 0.9571049463574032, "learning_rate": 4.998414111848819e-06, "loss": 0.6831, "step": 1216 }, { "epoch": 0.07411016046037207, "grad_norm": 1.0267449074727277, "learning_rate": 4.998411268783213e-06, "loss": 0.5628, "step": 1217 }, { "epoch": 0.07417105623724994, "grad_norm": 1.0229663870317407, "learning_rate": 4.998408423172278e-06, "loss": 0.5498, "step": 1218 }, { "epoch": 0.07423195201412781, "grad_norm": 0.9819748199025332, "learning_rate": 4.998405575016016e-06, "loss": 0.6028, "step": 1219 }, { "epoch": 0.0742928477910057, "grad_norm": 1.0494742656540514, "learning_rate": 4.998402724314431e-06, "loss": 0.5299, "step": 1220 }, { "epoch": 0.07435374356788357, "grad_norm": 1.0391025597176269, "learning_rate": 4.998399871067525e-06, "loss": 0.558, "step": 1221 }, { "epoch": 0.07441463934476145, "grad_norm": 0.9484574719713161, "learning_rate": 4.998397015275301e-06, "loss": 0.553, "step": 1222 }, { "epoch": 0.07447553512163932, "grad_norm": 0.9515794370497722, "learning_rate": 4.998394156937763e-06, "loss": 0.5945, "step": 1223 }, { "epoch": 0.07453643089851719, "grad_norm": 0.9699349276432713, "learning_rate": 4.9983912960549115e-06, "loss": 0.5485, "step": 1224 }, { "epoch": 0.07459732667539506, "grad_norm": 0.992800701605478, "learning_rate": 4.998388432626752e-06, "loss": 0.5072, "step": 1225 }, { "epoch": 0.07465822245227294, "grad_norm": 1.005243258821148, "learning_rate": 4.998385566653286e-06, "loss": 0.5644, "step": 1226 }, { "epoch": 0.07471911822915081, "grad_norm": 0.9983940758986716, "learning_rate": 4.998382698134516e-06, "loss": 0.5637, "step": 1227 }, { "epoch": 0.07478001400602868, "grad_norm": 1.0394280317065345, "learning_rate": 4.998379827070446e-06, "loss": 0.6091, "step": 1228 }, { "epoch": 0.07484090978290656, "grad_norm": 1.0068486056574648, "learning_rate": 4.998376953461079e-06, "loss": 0.5785, "step": 1229 }, { "epoch": 0.07490180555978443, "grad_norm": 0.9913329400498256, "learning_rate": 4.998374077306417e-06, "loss": 0.5695, "step": 1230 }, { "epoch": 0.0749627013366623, "grad_norm": 0.9884312902890091, "learning_rate": 4.998371198606464e-06, "loss": 0.6269, "step": 1231 }, { "epoch": 0.07502359711354017, "grad_norm": 1.0408900888368038, "learning_rate": 4.998368317361222e-06, "loss": 0.5638, "step": 1232 }, { "epoch": 0.07508449289041805, "grad_norm": 0.9948596435918458, "learning_rate": 4.998365433570694e-06, "loss": 0.5534, "step": 1233 }, { "epoch": 0.07514538866729592, "grad_norm": 1.0851637036315944, "learning_rate": 4.998362547234884e-06, "loss": 0.5832, "step": 1234 }, { "epoch": 0.0752062844441738, "grad_norm": 1.0213182732479609, "learning_rate": 4.998359658353793e-06, "loss": 0.5554, "step": 1235 }, { "epoch": 0.07526718022105167, "grad_norm": 0.9779438420717514, "learning_rate": 4.998356766927427e-06, "loss": 0.6067, "step": 1236 }, { "epoch": 0.07532807599792954, "grad_norm": 1.0139869228221998, "learning_rate": 4.998353872955786e-06, "loss": 0.5363, "step": 1237 }, { "epoch": 0.07538897177480741, "grad_norm": 1.012986764406692, "learning_rate": 4.9983509764388735e-06, "loss": 0.582, "step": 1238 }, { "epoch": 0.07544986755168528, "grad_norm": 1.0373892902529627, "learning_rate": 4.998348077376693e-06, "loss": 0.5724, "step": 1239 }, { "epoch": 0.07551076332856316, "grad_norm": 1.0469319702890991, "learning_rate": 4.998345175769248e-06, "loss": 0.5311, "step": 1240 }, { "epoch": 0.07557165910544104, "grad_norm": 0.9574229427004137, "learning_rate": 4.998342271616541e-06, "loss": 0.5194, "step": 1241 }, { "epoch": 0.07563255488231892, "grad_norm": 1.0338218095865146, "learning_rate": 4.998339364918575e-06, "loss": 0.5524, "step": 1242 }, { "epoch": 0.07569345065919679, "grad_norm": 1.1461504542175425, "learning_rate": 4.998336455675352e-06, "loss": 0.6365, "step": 1243 }, { "epoch": 0.07575434643607466, "grad_norm": 1.0303062335632427, "learning_rate": 4.998333543886876e-06, "loss": 0.559, "step": 1244 }, { "epoch": 0.07581524221295254, "grad_norm": 0.975906325313318, "learning_rate": 4.99833062955315e-06, "loss": 0.5879, "step": 1245 }, { "epoch": 0.07587613798983041, "grad_norm": 1.0638795579929976, "learning_rate": 4.998327712674177e-06, "loss": 0.5103, "step": 1246 }, { "epoch": 0.07593703376670828, "grad_norm": 1.0809723193355294, "learning_rate": 4.998324793249959e-06, "loss": 0.5958, "step": 1247 }, { "epoch": 0.07599792954358615, "grad_norm": 0.9731515984509215, "learning_rate": 4.9983218712805e-06, "loss": 0.5258, "step": 1248 }, { "epoch": 0.07605882532046403, "grad_norm": 1.0842232663095668, "learning_rate": 4.9983189467658025e-06, "loss": 0.5613, "step": 1249 }, { "epoch": 0.0761197210973419, "grad_norm": 1.0170256031807774, "learning_rate": 4.99831601970587e-06, "loss": 0.5534, "step": 1250 }, { "epoch": 0.07618061687421977, "grad_norm": 1.022588582352, "learning_rate": 4.9983130901007045e-06, "loss": 0.5964, "step": 1251 }, { "epoch": 0.07624151265109765, "grad_norm": 1.0409056310986078, "learning_rate": 4.9983101579503095e-06, "loss": 0.5914, "step": 1252 }, { "epoch": 0.07630240842797552, "grad_norm": 1.0200216127776982, "learning_rate": 4.998307223254688e-06, "loss": 0.5931, "step": 1253 }, { "epoch": 0.07636330420485339, "grad_norm": 1.0251525579161205, "learning_rate": 4.998304286013844e-06, "loss": 0.5828, "step": 1254 }, { "epoch": 0.07642419998173126, "grad_norm": 0.9711082369298831, "learning_rate": 4.998301346227779e-06, "loss": 0.5903, "step": 1255 }, { "epoch": 0.07648509575860914, "grad_norm": 1.0636988404104293, "learning_rate": 4.998298403896496e-06, "loss": 0.5878, "step": 1256 }, { "epoch": 0.07654599153548701, "grad_norm": 0.9396555179806282, "learning_rate": 4.998295459019999e-06, "loss": 0.5582, "step": 1257 }, { "epoch": 0.07660688731236488, "grad_norm": 1.0647335456954574, "learning_rate": 4.99829251159829e-06, "loss": 0.5461, "step": 1258 }, { "epoch": 0.07666778308924276, "grad_norm": 1.0419858730922644, "learning_rate": 4.998289561631373e-06, "loss": 0.6439, "step": 1259 }, { "epoch": 0.07672867886612063, "grad_norm": 1.0121396819271393, "learning_rate": 4.998286609119251e-06, "loss": 0.5178, "step": 1260 }, { "epoch": 0.07678957464299851, "grad_norm": 1.0068704251218528, "learning_rate": 4.998283654061926e-06, "loss": 0.5969, "step": 1261 }, { "epoch": 0.07685047041987639, "grad_norm": 1.0209458330189722, "learning_rate": 4.998280696459402e-06, "loss": 0.5736, "step": 1262 }, { "epoch": 0.07691136619675426, "grad_norm": 1.046161346108272, "learning_rate": 4.998277736311681e-06, "loss": 0.5297, "step": 1263 }, { "epoch": 0.07697226197363213, "grad_norm": 0.93245485406517, "learning_rate": 4.998274773618767e-06, "loss": 0.5319, "step": 1264 }, { "epoch": 0.07703315775051, "grad_norm": 0.9561970159788988, "learning_rate": 4.998271808380663e-06, "loss": 0.5685, "step": 1265 }, { "epoch": 0.07709405352738788, "grad_norm": 0.9647048668169314, "learning_rate": 4.99826884059737e-06, "loss": 0.57, "step": 1266 }, { "epoch": 0.07715494930426575, "grad_norm": 1.0436805820011876, "learning_rate": 4.998265870268893e-06, "loss": 0.5971, "step": 1267 }, { "epoch": 0.07721584508114362, "grad_norm": 1.0826067033081626, "learning_rate": 4.998262897395236e-06, "loss": 0.5457, "step": 1268 }, { "epoch": 0.0772767408580215, "grad_norm": 1.017794935338194, "learning_rate": 4.9982599219764e-06, "loss": 0.5925, "step": 1269 }, { "epoch": 0.07733763663489937, "grad_norm": 1.0797630074663152, "learning_rate": 4.998256944012389e-06, "loss": 0.5389, "step": 1270 }, { "epoch": 0.07739853241177724, "grad_norm": 1.208518304985967, "learning_rate": 4.998253963503205e-06, "loss": 0.5111, "step": 1271 }, { "epoch": 0.07745942818865512, "grad_norm": 1.036703327789083, "learning_rate": 4.998250980448853e-06, "loss": 0.6252, "step": 1272 }, { "epoch": 0.07752032396553299, "grad_norm": 1.0030469628563228, "learning_rate": 4.998247994849334e-06, "loss": 0.5954, "step": 1273 }, { "epoch": 0.07758121974241086, "grad_norm": 1.052497297799712, "learning_rate": 4.998245006704652e-06, "loss": 0.5878, "step": 1274 }, { "epoch": 0.07764211551928873, "grad_norm": 1.0070183953992327, "learning_rate": 4.99824201601481e-06, "loss": 0.5375, "step": 1275 }, { "epoch": 0.07770301129616661, "grad_norm": 1.0852154755564956, "learning_rate": 4.998239022779811e-06, "loss": 0.5581, "step": 1276 }, { "epoch": 0.07776390707304448, "grad_norm": 1.0097498992603215, "learning_rate": 4.998236026999658e-06, "loss": 0.5597, "step": 1277 }, { "epoch": 0.07782480284992235, "grad_norm": 1.0203817264335495, "learning_rate": 4.998233028674354e-06, "loss": 0.5418, "step": 1278 }, { "epoch": 0.07788569862680023, "grad_norm": 1.056525625803713, "learning_rate": 4.998230027803902e-06, "loss": 0.6001, "step": 1279 }, { "epoch": 0.0779465944036781, "grad_norm": 0.9607154832911161, "learning_rate": 4.998227024388306e-06, "loss": 0.5643, "step": 1280 }, { "epoch": 0.07800749018055597, "grad_norm": 0.9243228350506005, "learning_rate": 4.998224018427567e-06, "loss": 0.5477, "step": 1281 }, { "epoch": 0.07806838595743386, "grad_norm": 1.0595571770603964, "learning_rate": 4.99822100992169e-06, "loss": 0.6179, "step": 1282 }, { "epoch": 0.07812928173431173, "grad_norm": 0.9780378960537958, "learning_rate": 4.998217998870677e-06, "loss": 0.5524, "step": 1283 }, { "epoch": 0.0781901775111896, "grad_norm": 1.082051429111794, "learning_rate": 4.998214985274532e-06, "loss": 0.5297, "step": 1284 }, { "epoch": 0.07825107328806748, "grad_norm": 1.1384153381768176, "learning_rate": 4.998211969133257e-06, "loss": 0.5432, "step": 1285 }, { "epoch": 0.07831196906494535, "grad_norm": 0.9382629256334661, "learning_rate": 4.998208950446856e-06, "loss": 0.5469, "step": 1286 }, { "epoch": 0.07837286484182322, "grad_norm": 1.1136320403592888, "learning_rate": 4.9982059292153315e-06, "loss": 0.5571, "step": 1287 }, { "epoch": 0.0784337606187011, "grad_norm": 0.9823247776870848, "learning_rate": 4.998202905438687e-06, "loss": 0.5283, "step": 1288 }, { "epoch": 0.07849465639557897, "grad_norm": 1.0259633760239792, "learning_rate": 4.998199879116925e-06, "loss": 0.5144, "step": 1289 }, { "epoch": 0.07855555217245684, "grad_norm": 1.0250984650854649, "learning_rate": 4.998196850250049e-06, "loss": 0.5043, "step": 1290 }, { "epoch": 0.07861644794933471, "grad_norm": 1.0423025765621967, "learning_rate": 4.998193818838062e-06, "loss": 0.5746, "step": 1291 }, { "epoch": 0.07867734372621259, "grad_norm": 0.9863438356379844, "learning_rate": 4.998190784880967e-06, "loss": 0.5665, "step": 1292 }, { "epoch": 0.07873823950309046, "grad_norm": 0.9627636842680778, "learning_rate": 4.998187748378768e-06, "loss": 0.5572, "step": 1293 }, { "epoch": 0.07879913527996833, "grad_norm": 1.0093451282027273, "learning_rate": 4.998184709331465e-06, "loss": 0.577, "step": 1294 }, { "epoch": 0.0788600310568462, "grad_norm": 0.9906904264511571, "learning_rate": 4.998181667739065e-06, "loss": 0.5109, "step": 1295 }, { "epoch": 0.07892092683372408, "grad_norm": 0.9817016183400088, "learning_rate": 4.9981786236015695e-06, "loss": 0.4849, "step": 1296 }, { "epoch": 0.07898182261060195, "grad_norm": 1.168490182660131, "learning_rate": 4.998175576918982e-06, "loss": 0.5378, "step": 1297 }, { "epoch": 0.07904271838747982, "grad_norm": 0.9429280818596947, "learning_rate": 4.998172527691304e-06, "loss": 0.5899, "step": 1298 }, { "epoch": 0.0791036141643577, "grad_norm": 0.9652422171579774, "learning_rate": 4.9981694759185405e-06, "loss": 0.478, "step": 1299 }, { "epoch": 0.07916450994123557, "grad_norm": 1.029828377241511, "learning_rate": 4.998166421600693e-06, "loss": 0.5388, "step": 1300 }, { "epoch": 0.07922540571811344, "grad_norm": 1.0623845315391092, "learning_rate": 4.998163364737766e-06, "loss": 0.5625, "step": 1301 }, { "epoch": 0.07928630149499133, "grad_norm": 1.0577995817051065, "learning_rate": 4.998160305329762e-06, "loss": 0.5428, "step": 1302 }, { "epoch": 0.0793471972718692, "grad_norm": 1.031556345840726, "learning_rate": 4.998157243376685e-06, "loss": 0.5685, "step": 1303 }, { "epoch": 0.07940809304874707, "grad_norm": 1.045842882193498, "learning_rate": 4.998154178878537e-06, "loss": 0.5797, "step": 1304 }, { "epoch": 0.07946898882562495, "grad_norm": 0.9845149024298658, "learning_rate": 4.998151111835321e-06, "loss": 0.5118, "step": 1305 }, { "epoch": 0.07952988460250282, "grad_norm": 1.0266381375383078, "learning_rate": 4.998148042247041e-06, "loss": 0.5516, "step": 1306 }, { "epoch": 0.0795907803793807, "grad_norm": 1.0263059760076163, "learning_rate": 4.9981449701137e-06, "loss": 0.6006, "step": 1307 }, { "epoch": 0.07965167615625857, "grad_norm": 1.069353258442347, "learning_rate": 4.9981418954353e-06, "loss": 0.5769, "step": 1308 }, { "epoch": 0.07971257193313644, "grad_norm": 1.120264238897858, "learning_rate": 4.998138818211845e-06, "loss": 0.5735, "step": 1309 }, { "epoch": 0.07977346771001431, "grad_norm": 1.140150989569195, "learning_rate": 4.998135738443339e-06, "loss": 0.5236, "step": 1310 }, { "epoch": 0.07983436348689218, "grad_norm": 1.0292591864220622, "learning_rate": 4.998132656129784e-06, "loss": 0.5665, "step": 1311 }, { "epoch": 0.07989525926377006, "grad_norm": 0.99614503474071, "learning_rate": 4.998129571271182e-06, "loss": 0.5492, "step": 1312 }, { "epoch": 0.07995615504064793, "grad_norm": 1.0074257139534901, "learning_rate": 4.998126483867539e-06, "loss": 0.6106, "step": 1313 }, { "epoch": 0.0800170508175258, "grad_norm": 1.053817586112949, "learning_rate": 4.998123393918856e-06, "loss": 0.5735, "step": 1314 }, { "epoch": 0.08007794659440368, "grad_norm": 1.0828431556273712, "learning_rate": 4.998120301425138e-06, "loss": 0.5898, "step": 1315 }, { "epoch": 0.08013884237128155, "grad_norm": 1.0014159759171686, "learning_rate": 4.9981172063863855e-06, "loss": 0.6491, "step": 1316 }, { "epoch": 0.08019973814815942, "grad_norm": 1.0631516295467878, "learning_rate": 4.998114108802604e-06, "loss": 0.578, "step": 1317 }, { "epoch": 0.0802606339250373, "grad_norm": 1.1469917517408457, "learning_rate": 4.998111008673795e-06, "loss": 0.635, "step": 1318 }, { "epoch": 0.08032152970191517, "grad_norm": 1.0369777268127425, "learning_rate": 4.998107905999963e-06, "loss": 0.6274, "step": 1319 }, { "epoch": 0.08038242547879304, "grad_norm": 1.0933505840864048, "learning_rate": 4.998104800781111e-06, "loss": 0.5302, "step": 1320 }, { "epoch": 0.08044332125567091, "grad_norm": 1.0723938297123092, "learning_rate": 4.998101693017241e-06, "loss": 0.505, "step": 1321 }, { "epoch": 0.08050421703254879, "grad_norm": 0.9861843715344029, "learning_rate": 4.998098582708357e-06, "loss": 0.6196, "step": 1322 }, { "epoch": 0.08056511280942667, "grad_norm": 0.8845598074602188, "learning_rate": 4.998095469854462e-06, "loss": 0.6342, "step": 1323 }, { "epoch": 0.08062600858630455, "grad_norm": 0.9661747277608366, "learning_rate": 4.99809235445556e-06, "loss": 0.6338, "step": 1324 }, { "epoch": 0.08068690436318242, "grad_norm": 1.0668242612977026, "learning_rate": 4.998089236511654e-06, "loss": 0.5329, "step": 1325 }, { "epoch": 0.08074780014006029, "grad_norm": 1.030231501243152, "learning_rate": 4.998086116022745e-06, "loss": 0.552, "step": 1326 }, { "epoch": 0.08080869591693816, "grad_norm": 1.0077843827560926, "learning_rate": 4.998082992988839e-06, "loss": 0.5089, "step": 1327 }, { "epoch": 0.08086959169381604, "grad_norm": 1.0118280558806028, "learning_rate": 4.998079867409937e-06, "loss": 0.5629, "step": 1328 }, { "epoch": 0.08093048747069391, "grad_norm": 0.9771515407307051, "learning_rate": 4.998076739286044e-06, "loss": 0.5347, "step": 1329 }, { "epoch": 0.08099138324757178, "grad_norm": 0.966935908836208, "learning_rate": 4.998073608617161e-06, "loss": 0.5847, "step": 1330 }, { "epoch": 0.08105227902444966, "grad_norm": 1.137715972427165, "learning_rate": 4.998070475403295e-06, "loss": 0.5107, "step": 1331 }, { "epoch": 0.08111317480132753, "grad_norm": 0.98716720835164, "learning_rate": 4.9980673396444445e-06, "loss": 0.563, "step": 1332 }, { "epoch": 0.0811740705782054, "grad_norm": 1.0726813888408366, "learning_rate": 4.998064201340615e-06, "loss": 0.5942, "step": 1333 }, { "epoch": 0.08123496635508327, "grad_norm": 0.9898635080362551, "learning_rate": 4.998061060491811e-06, "loss": 0.5995, "step": 1334 }, { "epoch": 0.08129586213196115, "grad_norm": 0.9652649294810579, "learning_rate": 4.998057917098034e-06, "loss": 0.5686, "step": 1335 }, { "epoch": 0.08135675790883902, "grad_norm": 1.046962334502556, "learning_rate": 4.998054771159287e-06, "loss": 0.5107, "step": 1336 }, { "epoch": 0.08141765368571689, "grad_norm": 0.9832744212465208, "learning_rate": 4.998051622675574e-06, "loss": 0.5733, "step": 1337 }, { "epoch": 0.08147854946259477, "grad_norm": 0.9368689927558395, "learning_rate": 4.998048471646898e-06, "loss": 0.5599, "step": 1338 }, { "epoch": 0.08153944523947264, "grad_norm": 1.055196289303668, "learning_rate": 4.998045318073262e-06, "loss": 0.5511, "step": 1339 }, { "epoch": 0.08160034101635051, "grad_norm": 1.0749040684455782, "learning_rate": 4.998042161954669e-06, "loss": 0.5701, "step": 1340 }, { "epoch": 0.08166123679322838, "grad_norm": 1.0145056995553623, "learning_rate": 4.998039003291123e-06, "loss": 0.6229, "step": 1341 }, { "epoch": 0.08172213257010626, "grad_norm": 0.9796650532587855, "learning_rate": 4.998035842082627e-06, "loss": 0.6331, "step": 1342 }, { "epoch": 0.08178302834698414, "grad_norm": 1.0435448447172657, "learning_rate": 4.998032678329184e-06, "loss": 0.6048, "step": 1343 }, { "epoch": 0.08184392412386202, "grad_norm": 1.0301046873282795, "learning_rate": 4.998029512030796e-06, "loss": 0.5525, "step": 1344 }, { "epoch": 0.08190481990073989, "grad_norm": 1.0471543501445146, "learning_rate": 4.998026343187469e-06, "loss": 0.6515, "step": 1345 }, { "epoch": 0.08196571567761776, "grad_norm": 1.0492853745177222, "learning_rate": 4.998023171799204e-06, "loss": 0.5018, "step": 1346 }, { "epoch": 0.08202661145449563, "grad_norm": 1.0570140887830555, "learning_rate": 4.9980199978660055e-06, "loss": 0.5638, "step": 1347 }, { "epoch": 0.08208750723137351, "grad_norm": 1.0254615641151106, "learning_rate": 4.998016821387875e-06, "loss": 0.6149, "step": 1348 }, { "epoch": 0.08214840300825138, "grad_norm": 1.1051196221212887, "learning_rate": 4.998013642364818e-06, "loss": 0.538, "step": 1349 }, { "epoch": 0.08220929878512925, "grad_norm": 1.0573606207749888, "learning_rate": 4.9980104607968374e-06, "loss": 0.5252, "step": 1350 }, { "epoch": 0.08227019456200713, "grad_norm": 0.9446013413856239, "learning_rate": 4.998007276683934e-06, "loss": 0.6062, "step": 1351 }, { "epoch": 0.082331090338885, "grad_norm": 0.995532122699198, "learning_rate": 4.998004090026114e-06, "loss": 0.5375, "step": 1352 }, { "epoch": 0.08239198611576287, "grad_norm": 1.0804735908514178, "learning_rate": 4.998000900823378e-06, "loss": 0.5289, "step": 1353 }, { "epoch": 0.08245288189264074, "grad_norm": 1.037330228042543, "learning_rate": 4.997997709075732e-06, "loss": 0.592, "step": 1354 }, { "epoch": 0.08251377766951862, "grad_norm": 0.9974115773532269, "learning_rate": 4.9979945147831765e-06, "loss": 0.6366, "step": 1355 }, { "epoch": 0.08257467344639649, "grad_norm": 1.0222800624282542, "learning_rate": 4.9979913179457165e-06, "loss": 0.5905, "step": 1356 }, { "epoch": 0.08263556922327436, "grad_norm": 0.9780487459100352, "learning_rate": 4.997988118563355e-06, "loss": 0.5722, "step": 1357 }, { "epoch": 0.08269646500015224, "grad_norm": 1.0219318885594033, "learning_rate": 4.9979849166360965e-06, "loss": 0.6011, "step": 1358 }, { "epoch": 0.08275736077703011, "grad_norm": 1.0758291638979902, "learning_rate": 4.997981712163941e-06, "loss": 0.5162, "step": 1359 }, { "epoch": 0.08281825655390798, "grad_norm": 0.999832132315375, "learning_rate": 4.997978505146895e-06, "loss": 0.5622, "step": 1360 }, { "epoch": 0.08287915233078585, "grad_norm": 0.9808118654023391, "learning_rate": 4.997975295584959e-06, "loss": 0.6046, "step": 1361 }, { "epoch": 0.08294004810766373, "grad_norm": 1.1092381489336622, "learning_rate": 4.997972083478139e-06, "loss": 0.5619, "step": 1362 }, { "epoch": 0.0830009438845416, "grad_norm": 1.0424230626462863, "learning_rate": 4.997968868826436e-06, "loss": 0.544, "step": 1363 }, { "epoch": 0.08306183966141949, "grad_norm": 1.0018157474870995, "learning_rate": 4.997965651629855e-06, "loss": 0.5661, "step": 1364 }, { "epoch": 0.08312273543829736, "grad_norm": 1.0616809141868, "learning_rate": 4.997962431888398e-06, "loss": 0.5381, "step": 1365 }, { "epoch": 0.08318363121517523, "grad_norm": 1.1332564891024013, "learning_rate": 4.997959209602069e-06, "loss": 0.6207, "step": 1366 }, { "epoch": 0.0832445269920531, "grad_norm": 0.9827494918664205, "learning_rate": 4.997955984770872e-06, "loss": 0.5591, "step": 1367 }, { "epoch": 0.08330542276893098, "grad_norm": 1.0168679967341927, "learning_rate": 4.997952757394809e-06, "loss": 0.5713, "step": 1368 }, { "epoch": 0.08336631854580885, "grad_norm": 1.0451460056429356, "learning_rate": 4.997949527473882e-06, "loss": 0.5258, "step": 1369 }, { "epoch": 0.08342721432268672, "grad_norm": 0.9608058777648145, "learning_rate": 4.997946295008098e-06, "loss": 0.5385, "step": 1370 }, { "epoch": 0.0834881100995646, "grad_norm": 1.0052888464370753, "learning_rate": 4.997943059997456e-06, "loss": 0.5611, "step": 1371 }, { "epoch": 0.08354900587644247, "grad_norm": 0.9253910354367356, "learning_rate": 4.997939822441964e-06, "loss": 0.6202, "step": 1372 }, { "epoch": 0.08360990165332034, "grad_norm": 1.12127032484878, "learning_rate": 4.997936582341622e-06, "loss": 0.4604, "step": 1373 }, { "epoch": 0.08367079743019822, "grad_norm": 1.1295133506301014, "learning_rate": 4.997933339696434e-06, "loss": 0.5926, "step": 1374 }, { "epoch": 0.08373169320707609, "grad_norm": 1.0038694219269453, "learning_rate": 4.997930094506403e-06, "loss": 0.5032, "step": 1375 }, { "epoch": 0.08379258898395396, "grad_norm": 1.0525057907776012, "learning_rate": 4.997926846771534e-06, "loss": 0.5264, "step": 1376 }, { "epoch": 0.08385348476083183, "grad_norm": 0.9752283523748062, "learning_rate": 4.997923596491827e-06, "loss": 0.5388, "step": 1377 }, { "epoch": 0.0839143805377097, "grad_norm": 1.0056225533012288, "learning_rate": 4.997920343667289e-06, "loss": 0.6161, "step": 1378 }, { "epoch": 0.08397527631458758, "grad_norm": 0.9517339861709416, "learning_rate": 4.997917088297921e-06, "loss": 0.5825, "step": 1379 }, { "epoch": 0.08403617209146545, "grad_norm": 1.0483468745579916, "learning_rate": 4.9979138303837275e-06, "loss": 0.6243, "step": 1380 }, { "epoch": 0.08409706786834333, "grad_norm": 1.0526937855025145, "learning_rate": 4.997910569924711e-06, "loss": 0.5865, "step": 1381 }, { "epoch": 0.0841579636452212, "grad_norm": 1.0519133941561867, "learning_rate": 4.997907306920875e-06, "loss": 0.5529, "step": 1382 }, { "epoch": 0.08421885942209907, "grad_norm": 0.9936847049611551, "learning_rate": 4.997904041372223e-06, "loss": 0.5124, "step": 1383 }, { "epoch": 0.08427975519897696, "grad_norm": 1.0426497269219892, "learning_rate": 4.997900773278759e-06, "loss": 0.528, "step": 1384 }, { "epoch": 0.08434065097585483, "grad_norm": 1.0588432654556976, "learning_rate": 4.997897502640485e-06, "loss": 0.5863, "step": 1385 }, { "epoch": 0.0844015467527327, "grad_norm": 0.9125416209454333, "learning_rate": 4.997894229457405e-06, "loss": 0.5919, "step": 1386 }, { "epoch": 0.08446244252961058, "grad_norm": 1.1610164023350729, "learning_rate": 4.997890953729523e-06, "loss": 0.5343, "step": 1387 }, { "epoch": 0.08452333830648845, "grad_norm": 1.046387991786299, "learning_rate": 4.997887675456841e-06, "loss": 0.552, "step": 1388 }, { "epoch": 0.08458423408336632, "grad_norm": 0.9806410909948885, "learning_rate": 4.997884394639363e-06, "loss": 0.6115, "step": 1389 }, { "epoch": 0.0846451298602442, "grad_norm": 1.0685078013425313, "learning_rate": 4.997881111277092e-06, "loss": 0.5033, "step": 1390 }, { "epoch": 0.08470602563712207, "grad_norm": 1.0285100216045793, "learning_rate": 4.997877825370032e-06, "loss": 0.5427, "step": 1391 }, { "epoch": 0.08476692141399994, "grad_norm": 0.9890166207437792, "learning_rate": 4.997874536918185e-06, "loss": 0.534, "step": 1392 }, { "epoch": 0.08482781719087781, "grad_norm": 0.978209647363005, "learning_rate": 4.997871245921557e-06, "loss": 0.6265, "step": 1393 }, { "epoch": 0.08488871296775569, "grad_norm": 1.1507850374043587, "learning_rate": 4.997867952380149e-06, "loss": 0.5361, "step": 1394 }, { "epoch": 0.08494960874463356, "grad_norm": 0.9458799981211218, "learning_rate": 4.997864656293965e-06, "loss": 0.5654, "step": 1395 }, { "epoch": 0.08501050452151143, "grad_norm": 1.0653908195035158, "learning_rate": 4.997861357663009e-06, "loss": 0.5708, "step": 1396 }, { "epoch": 0.0850714002983893, "grad_norm": 0.9832083459749902, "learning_rate": 4.997858056487283e-06, "loss": 0.6144, "step": 1397 }, { "epoch": 0.08513229607526718, "grad_norm": 1.0249520820930933, "learning_rate": 4.997854752766791e-06, "loss": 0.5323, "step": 1398 }, { "epoch": 0.08519319185214505, "grad_norm": 1.0110827225296726, "learning_rate": 4.997851446501537e-06, "loss": 0.5512, "step": 1399 }, { "epoch": 0.08525408762902292, "grad_norm": 0.9883251659717444, "learning_rate": 4.997848137691525e-06, "loss": 0.5644, "step": 1400 }, { "epoch": 0.0853149834059008, "grad_norm": 0.9623680422273015, "learning_rate": 4.997844826336755e-06, "loss": 0.5772, "step": 1401 }, { "epoch": 0.08537587918277867, "grad_norm": 1.1064886346974587, "learning_rate": 4.997841512437234e-06, "loss": 0.5224, "step": 1402 }, { "epoch": 0.08543677495965654, "grad_norm": 1.0253791751720778, "learning_rate": 4.997838195992964e-06, "loss": 0.5314, "step": 1403 }, { "epoch": 0.08549767073653441, "grad_norm": 1.0898475718300318, "learning_rate": 4.997834877003947e-06, "loss": 0.5406, "step": 1404 }, { "epoch": 0.0855585665134123, "grad_norm": 1.0135577231400803, "learning_rate": 4.99783155547019e-06, "loss": 0.5673, "step": 1405 }, { "epoch": 0.08561946229029017, "grad_norm": 1.0572636733680414, "learning_rate": 4.997828231391693e-06, "loss": 0.5269, "step": 1406 }, { "epoch": 0.08568035806716805, "grad_norm": 1.044291244956292, "learning_rate": 4.99782490476846e-06, "loss": 0.5125, "step": 1407 }, { "epoch": 0.08574125384404592, "grad_norm": 1.1385597566459287, "learning_rate": 4.997821575600495e-06, "loss": 0.6108, "step": 1408 }, { "epoch": 0.08580214962092379, "grad_norm": 1.0149380631100289, "learning_rate": 4.997818243887802e-06, "loss": 0.536, "step": 1409 }, { "epoch": 0.08586304539780167, "grad_norm": 0.9591231348521068, "learning_rate": 4.9978149096303835e-06, "loss": 0.6501, "step": 1410 }, { "epoch": 0.08592394117467954, "grad_norm": 1.07694937637205, "learning_rate": 4.997811572828243e-06, "loss": 0.5801, "step": 1411 }, { "epoch": 0.08598483695155741, "grad_norm": 0.9814813676378317, "learning_rate": 4.997808233481384e-06, "loss": 0.5415, "step": 1412 }, { "epoch": 0.08604573272843528, "grad_norm": 1.1194211270791776, "learning_rate": 4.99780489158981e-06, "loss": 0.5346, "step": 1413 }, { "epoch": 0.08610662850531316, "grad_norm": 1.0877240173262892, "learning_rate": 4.997801547153524e-06, "loss": 0.5917, "step": 1414 }, { "epoch": 0.08616752428219103, "grad_norm": 1.0508841654139938, "learning_rate": 4.99779820017253e-06, "loss": 0.5515, "step": 1415 }, { "epoch": 0.0862284200590689, "grad_norm": 1.0368533898527248, "learning_rate": 4.997794850646831e-06, "loss": 0.5449, "step": 1416 }, { "epoch": 0.08628931583594678, "grad_norm": 0.9749447423036011, "learning_rate": 4.997791498576431e-06, "loss": 0.5707, "step": 1417 }, { "epoch": 0.08635021161282465, "grad_norm": 1.0896998362410615, "learning_rate": 4.997788143961332e-06, "loss": 0.5697, "step": 1418 }, { "epoch": 0.08641110738970252, "grad_norm": 1.0298344892350537, "learning_rate": 4.997784786801539e-06, "loss": 0.4894, "step": 1419 }, { "epoch": 0.0864720031665804, "grad_norm": 1.0513141846293907, "learning_rate": 4.997781427097055e-06, "loss": 0.557, "step": 1420 }, { "epoch": 0.08653289894345827, "grad_norm": 1.0595630971554015, "learning_rate": 4.997778064847882e-06, "loss": 0.5555, "step": 1421 }, { "epoch": 0.08659379472033614, "grad_norm": 0.9386100046397133, "learning_rate": 4.997774700054026e-06, "loss": 0.5809, "step": 1422 }, { "epoch": 0.08665469049721401, "grad_norm": 1.0831023387306655, "learning_rate": 4.997771332715488e-06, "loss": 0.5047, "step": 1423 }, { "epoch": 0.08671558627409189, "grad_norm": 1.0386275500494708, "learning_rate": 4.997767962832274e-06, "loss": 0.5113, "step": 1424 }, { "epoch": 0.08677648205096977, "grad_norm": 1.0196578641150749, "learning_rate": 4.997764590404386e-06, "loss": 0.5531, "step": 1425 }, { "epoch": 0.08683737782784764, "grad_norm": 1.0054996361558854, "learning_rate": 4.997761215431825e-06, "loss": 0.541, "step": 1426 }, { "epoch": 0.08689827360472552, "grad_norm": 1.0262025888770043, "learning_rate": 4.997757837914598e-06, "loss": 0.5513, "step": 1427 }, { "epoch": 0.08695916938160339, "grad_norm": 1.1113985121907357, "learning_rate": 4.997754457852708e-06, "loss": 0.5794, "step": 1428 }, { "epoch": 0.08702006515848126, "grad_norm": 0.9938661966260649, "learning_rate": 4.997751075246157e-06, "loss": 0.5774, "step": 1429 }, { "epoch": 0.08708096093535914, "grad_norm": 1.0329260867319197, "learning_rate": 4.9977476900949494e-06, "loss": 0.5607, "step": 1430 }, { "epoch": 0.08714185671223701, "grad_norm": 1.0338019149930968, "learning_rate": 4.997744302399089e-06, "loss": 0.5459, "step": 1431 }, { "epoch": 0.08720275248911488, "grad_norm": 1.0345679020411003, "learning_rate": 4.997740912158577e-06, "loss": 0.5639, "step": 1432 }, { "epoch": 0.08726364826599275, "grad_norm": 1.0284295666658978, "learning_rate": 4.99773751937342e-06, "loss": 0.5406, "step": 1433 }, { "epoch": 0.08732454404287063, "grad_norm": 1.0410918485948286, "learning_rate": 4.99773412404362e-06, "loss": 0.5621, "step": 1434 }, { "epoch": 0.0873854398197485, "grad_norm": 1.0691144751914856, "learning_rate": 4.997730726169179e-06, "loss": 0.4641, "step": 1435 }, { "epoch": 0.08744633559662637, "grad_norm": 1.0350929910829778, "learning_rate": 4.997727325750102e-06, "loss": 0.508, "step": 1436 }, { "epoch": 0.08750723137350425, "grad_norm": 1.0736502719106746, "learning_rate": 4.997723922786394e-06, "loss": 0.5569, "step": 1437 }, { "epoch": 0.08756812715038212, "grad_norm": 1.0439770582496712, "learning_rate": 4.997720517278055e-06, "loss": 0.6457, "step": 1438 }, { "epoch": 0.08762902292725999, "grad_norm": 1.0746961933877615, "learning_rate": 4.997717109225091e-06, "loss": 0.52, "step": 1439 }, { "epoch": 0.08768991870413786, "grad_norm": 1.1218209856827606, "learning_rate": 4.997713698627506e-06, "loss": 0.5238, "step": 1440 }, { "epoch": 0.08775081448101574, "grad_norm": 1.083361370382905, "learning_rate": 4.9977102854853e-06, "loss": 0.5434, "step": 1441 }, { "epoch": 0.08781171025789361, "grad_norm": 1.0201669579796162, "learning_rate": 4.99770686979848e-06, "loss": 0.547, "step": 1442 }, { "epoch": 0.08787260603477148, "grad_norm": 1.1388569690790045, "learning_rate": 4.997703451567047e-06, "loss": 0.5379, "step": 1443 }, { "epoch": 0.08793350181164936, "grad_norm": 1.023607137618843, "learning_rate": 4.9977000307910076e-06, "loss": 0.6016, "step": 1444 }, { "epoch": 0.08799439758852723, "grad_norm": 1.0288567234464243, "learning_rate": 4.997696607470361e-06, "loss": 0.5656, "step": 1445 }, { "epoch": 0.08805529336540512, "grad_norm": 1.053174616110197, "learning_rate": 4.997693181605115e-06, "loss": 0.5053, "step": 1446 }, { "epoch": 0.08811618914228299, "grad_norm": 1.147217374379264, "learning_rate": 4.99768975319527e-06, "loss": 0.5617, "step": 1447 }, { "epoch": 0.08817708491916086, "grad_norm": 0.9808081873354311, "learning_rate": 4.9976863222408315e-06, "loss": 0.5677, "step": 1448 }, { "epoch": 0.08823798069603873, "grad_norm": 0.9976017514534825, "learning_rate": 4.997682888741801e-06, "loss": 0.5208, "step": 1449 }, { "epoch": 0.08829887647291661, "grad_norm": 1.0358935190333154, "learning_rate": 4.997679452698184e-06, "loss": 0.6268, "step": 1450 }, { "epoch": 0.08835977224979448, "grad_norm": 1.0230771127122553, "learning_rate": 4.997676014109982e-06, "loss": 0.5686, "step": 1451 }, { "epoch": 0.08842066802667235, "grad_norm": 0.9942033346516335, "learning_rate": 4.9976725729772e-06, "loss": 0.5691, "step": 1452 }, { "epoch": 0.08848156380355023, "grad_norm": 1.0592168587071582, "learning_rate": 4.997669129299843e-06, "loss": 0.5414, "step": 1453 }, { "epoch": 0.0885424595804281, "grad_norm": 1.0225186110225175, "learning_rate": 4.99766568307791e-06, "loss": 0.5433, "step": 1454 }, { "epoch": 0.08860335535730597, "grad_norm": 1.110918785586578, "learning_rate": 4.997662234311409e-06, "loss": 0.4987, "step": 1455 }, { "epoch": 0.08866425113418384, "grad_norm": 1.0408031009115777, "learning_rate": 4.997658783000341e-06, "loss": 0.5705, "step": 1456 }, { "epoch": 0.08872514691106172, "grad_norm": 1.0688611402532135, "learning_rate": 4.997655329144709e-06, "loss": 0.4916, "step": 1457 }, { "epoch": 0.08878604268793959, "grad_norm": 1.095416605117354, "learning_rate": 4.997651872744519e-06, "loss": 0.522, "step": 1458 }, { "epoch": 0.08884693846481746, "grad_norm": 1.1839311023953907, "learning_rate": 4.997648413799772e-06, "loss": 0.5558, "step": 1459 }, { "epoch": 0.08890783424169534, "grad_norm": 1.0396988902746995, "learning_rate": 4.997644952310475e-06, "loss": 0.5842, "step": 1460 }, { "epoch": 0.08896873001857321, "grad_norm": 0.9545960636033236, "learning_rate": 4.997641488276627e-06, "loss": 0.564, "step": 1461 }, { "epoch": 0.08902962579545108, "grad_norm": 1.039427804600207, "learning_rate": 4.9976380216982355e-06, "loss": 0.547, "step": 1462 }, { "epoch": 0.08909052157232895, "grad_norm": 1.0183226728438297, "learning_rate": 4.997634552575301e-06, "loss": 0.5686, "step": 1463 }, { "epoch": 0.08915141734920683, "grad_norm": 1.1406363836260522, "learning_rate": 4.997631080907829e-06, "loss": 0.5689, "step": 1464 }, { "epoch": 0.0892123131260847, "grad_norm": 0.954764264489982, "learning_rate": 4.997627606695822e-06, "loss": 0.5657, "step": 1465 }, { "epoch": 0.08927320890296259, "grad_norm": 1.0514434387313412, "learning_rate": 4.997624129939285e-06, "loss": 0.5594, "step": 1466 }, { "epoch": 0.08933410467984046, "grad_norm": 1.1355427286401618, "learning_rate": 4.9976206506382185e-06, "loss": 0.5396, "step": 1467 }, { "epoch": 0.08939500045671833, "grad_norm": 1.025275028018011, "learning_rate": 4.9976171687926295e-06, "loss": 0.5283, "step": 1468 }, { "epoch": 0.0894558962335962, "grad_norm": 1.052539185221459, "learning_rate": 4.99761368440252e-06, "loss": 0.5892, "step": 1469 }, { "epoch": 0.08951679201047408, "grad_norm": 1.0441638021676458, "learning_rate": 4.997610197467892e-06, "loss": 0.5084, "step": 1470 }, { "epoch": 0.08957768778735195, "grad_norm": 0.9828948772711906, "learning_rate": 4.997606707988753e-06, "loss": 0.5196, "step": 1471 }, { "epoch": 0.08963858356422982, "grad_norm": 1.044570197782546, "learning_rate": 4.997603215965103e-06, "loss": 0.6172, "step": 1472 }, { "epoch": 0.0896994793411077, "grad_norm": 1.0019109570095557, "learning_rate": 4.997599721396947e-06, "loss": 0.563, "step": 1473 }, { "epoch": 0.08976037511798557, "grad_norm": 1.0104474715745193, "learning_rate": 4.997596224284288e-06, "loss": 0.5874, "step": 1474 }, { "epoch": 0.08982127089486344, "grad_norm": 1.0229569792691542, "learning_rate": 4.99759272462713e-06, "loss": 0.5832, "step": 1475 }, { "epoch": 0.08988216667174131, "grad_norm": 1.0113815704523144, "learning_rate": 4.997589222425477e-06, "loss": 0.5361, "step": 1476 }, { "epoch": 0.08994306244861919, "grad_norm": 1.069070565971854, "learning_rate": 4.997585717679331e-06, "loss": 0.5972, "step": 1477 }, { "epoch": 0.09000395822549706, "grad_norm": 1.031671044534895, "learning_rate": 4.997582210388697e-06, "loss": 0.5571, "step": 1478 }, { "epoch": 0.09006485400237493, "grad_norm": 0.9302830592559393, "learning_rate": 4.997578700553579e-06, "loss": 0.6045, "step": 1479 }, { "epoch": 0.0901257497792528, "grad_norm": 0.9583526507188402, "learning_rate": 4.9975751881739785e-06, "loss": 0.5069, "step": 1480 }, { "epoch": 0.09018664555613068, "grad_norm": 1.0717051620533726, "learning_rate": 4.997571673249901e-06, "loss": 0.5348, "step": 1481 }, { "epoch": 0.09024754133300855, "grad_norm": 1.056127623223244, "learning_rate": 4.997568155781349e-06, "loss": 0.6048, "step": 1482 }, { "epoch": 0.09030843710988642, "grad_norm": 1.0307393941051817, "learning_rate": 4.9975646357683274e-06, "loss": 0.5791, "step": 1483 }, { "epoch": 0.0903693328867643, "grad_norm": 1.0530716484852736, "learning_rate": 4.9975611132108385e-06, "loss": 0.5443, "step": 1484 }, { "epoch": 0.09043022866364217, "grad_norm": 0.9259547168392037, "learning_rate": 4.997557588108886e-06, "loss": 0.5326, "step": 1485 }, { "epoch": 0.09049112444052004, "grad_norm": 1.0423985984834203, "learning_rate": 4.997554060462474e-06, "loss": 0.5341, "step": 1486 }, { "epoch": 0.09055202021739793, "grad_norm": 1.0435297512326267, "learning_rate": 4.9975505302716055e-06, "loss": 0.5399, "step": 1487 }, { "epoch": 0.0906129159942758, "grad_norm": 1.028291802933058, "learning_rate": 4.997546997536285e-06, "loss": 0.477, "step": 1488 }, { "epoch": 0.09067381177115368, "grad_norm": 1.0268939094049014, "learning_rate": 4.997543462256514e-06, "loss": 0.5423, "step": 1489 }, { "epoch": 0.09073470754803155, "grad_norm": 0.9831200943773456, "learning_rate": 4.9975399244323e-06, "loss": 0.5742, "step": 1490 }, { "epoch": 0.09079560332490942, "grad_norm": 1.104751051325835, "learning_rate": 4.9975363840636425e-06, "loss": 0.5929, "step": 1491 }, { "epoch": 0.0908564991017873, "grad_norm": 1.0753710109943513, "learning_rate": 4.9975328411505474e-06, "loss": 0.5444, "step": 1492 }, { "epoch": 0.09091739487866517, "grad_norm": 1.0327806918957327, "learning_rate": 4.997529295693018e-06, "loss": 0.5441, "step": 1493 }, { "epoch": 0.09097829065554304, "grad_norm": 1.0529295416883744, "learning_rate": 4.997525747691058e-06, "loss": 0.5696, "step": 1494 }, { "epoch": 0.09103918643242091, "grad_norm": 0.9992504249387475, "learning_rate": 4.99752219714467e-06, "loss": 0.591, "step": 1495 }, { "epoch": 0.09110008220929879, "grad_norm": 1.037861396286008, "learning_rate": 4.997518644053858e-06, "loss": 0.5544, "step": 1496 }, { "epoch": 0.09116097798617666, "grad_norm": 1.0394573515446932, "learning_rate": 4.997515088418626e-06, "loss": 0.5394, "step": 1497 }, { "epoch": 0.09122187376305453, "grad_norm": 0.9996856964863439, "learning_rate": 4.997511530238979e-06, "loss": 0.5899, "step": 1498 }, { "epoch": 0.0912827695399324, "grad_norm": 1.0668797451346084, "learning_rate": 4.997507969514918e-06, "loss": 0.5156, "step": 1499 }, { "epoch": 0.09134366531681028, "grad_norm": 1.0769169270260723, "learning_rate": 4.997504406246447e-06, "loss": 0.4925, "step": 1500 }, { "epoch": 0.09140456109368815, "grad_norm": 1.0939306350403828, "learning_rate": 4.997500840433572e-06, "loss": 0.5604, "step": 1501 }, { "epoch": 0.09146545687056602, "grad_norm": 1.0312137302268414, "learning_rate": 4.997497272076293e-06, "loss": 0.535, "step": 1502 }, { "epoch": 0.0915263526474439, "grad_norm": 1.0185687828685626, "learning_rate": 4.997493701174618e-06, "loss": 0.6177, "step": 1503 }, { "epoch": 0.09158724842432177, "grad_norm": 0.9652339289400337, "learning_rate": 4.997490127728548e-06, "loss": 0.5492, "step": 1504 }, { "epoch": 0.09164814420119964, "grad_norm": 0.9924036299537408, "learning_rate": 4.997486551738087e-06, "loss": 0.5799, "step": 1505 }, { "epoch": 0.09170903997807751, "grad_norm": 1.0402205987782847, "learning_rate": 4.997482973203237e-06, "loss": 0.5606, "step": 1506 }, { "epoch": 0.0917699357549554, "grad_norm": 0.9354647843503042, "learning_rate": 4.997479392124005e-06, "loss": 0.5865, "step": 1507 }, { "epoch": 0.09183083153183327, "grad_norm": 1.1290563967528362, "learning_rate": 4.997475808500392e-06, "loss": 0.5671, "step": 1508 }, { "epoch": 0.09189172730871115, "grad_norm": 1.0125627414094271, "learning_rate": 4.997472222332402e-06, "loss": 0.5597, "step": 1509 }, { "epoch": 0.09195262308558902, "grad_norm": 1.0433103850491439, "learning_rate": 4.997468633620041e-06, "loss": 0.5367, "step": 1510 }, { "epoch": 0.09201351886246689, "grad_norm": 1.0859122148466283, "learning_rate": 4.997465042363309e-06, "loss": 0.5285, "step": 1511 }, { "epoch": 0.09207441463934476, "grad_norm": 1.059179631550993, "learning_rate": 4.997461448562213e-06, "loss": 0.4644, "step": 1512 }, { "epoch": 0.09213531041622264, "grad_norm": 1.0762717486743663, "learning_rate": 4.997457852216755e-06, "loss": 0.5254, "step": 1513 }, { "epoch": 0.09219620619310051, "grad_norm": 1.07458906415593, "learning_rate": 4.997454253326939e-06, "loss": 0.502, "step": 1514 }, { "epoch": 0.09225710196997838, "grad_norm": 1.0173029709389938, "learning_rate": 4.997450651892768e-06, "loss": 0.6298, "step": 1515 }, { "epoch": 0.09231799774685626, "grad_norm": 0.9976254784758077, "learning_rate": 4.997447047914246e-06, "loss": 0.6038, "step": 1516 }, { "epoch": 0.09237889352373413, "grad_norm": 1.018991013837208, "learning_rate": 4.997443441391377e-06, "loss": 0.5671, "step": 1517 }, { "epoch": 0.092439789300612, "grad_norm": 1.0607170570831757, "learning_rate": 4.997439832324165e-06, "loss": 0.5327, "step": 1518 }, { "epoch": 0.09250068507748987, "grad_norm": 1.0298225846643443, "learning_rate": 4.9974362207126125e-06, "loss": 0.6091, "step": 1519 }, { "epoch": 0.09256158085436775, "grad_norm": 1.1216921910167503, "learning_rate": 4.997432606556725e-06, "loss": 0.4841, "step": 1520 }, { "epoch": 0.09262247663124562, "grad_norm": 1.0143590249594023, "learning_rate": 4.9974289898565034e-06, "loss": 0.5583, "step": 1521 }, { "epoch": 0.0926833724081235, "grad_norm": 0.9823227316362182, "learning_rate": 4.997425370611954e-06, "loss": 0.5136, "step": 1522 }, { "epoch": 0.09274426818500137, "grad_norm": 1.0322133440895667, "learning_rate": 4.99742174882308e-06, "loss": 0.5393, "step": 1523 }, { "epoch": 0.09280516396187924, "grad_norm": 1.0091754046593295, "learning_rate": 4.997418124489885e-06, "loss": 0.5102, "step": 1524 }, { "epoch": 0.09286605973875711, "grad_norm": 1.0173147824999038, "learning_rate": 4.99741449761237e-06, "loss": 0.5238, "step": 1525 }, { "epoch": 0.09292695551563498, "grad_norm": 1.0286818400162883, "learning_rate": 4.997410868190543e-06, "loss": 0.5426, "step": 1526 }, { "epoch": 0.09298785129251286, "grad_norm": 1.0552073370210773, "learning_rate": 4.997407236224406e-06, "loss": 0.5348, "step": 1527 }, { "epoch": 0.09304874706939074, "grad_norm": 0.9775566873325453, "learning_rate": 4.997403601713961e-06, "loss": 0.5283, "step": 1528 }, { "epoch": 0.09310964284626862, "grad_norm": 1.0947866092436982, "learning_rate": 4.9973999646592145e-06, "loss": 0.519, "step": 1529 }, { "epoch": 0.09317053862314649, "grad_norm": 1.1222237975267093, "learning_rate": 4.997396325060169e-06, "loss": 0.4601, "step": 1530 }, { "epoch": 0.09323143440002436, "grad_norm": 1.1075191908964093, "learning_rate": 4.997392682916827e-06, "loss": 0.5338, "step": 1531 }, { "epoch": 0.09329233017690224, "grad_norm": 1.0374044469803558, "learning_rate": 4.997389038229194e-06, "loss": 0.5435, "step": 1532 }, { "epoch": 0.09335322595378011, "grad_norm": 1.0238648306797096, "learning_rate": 4.9973853909972715e-06, "loss": 0.617, "step": 1533 }, { "epoch": 0.09341412173065798, "grad_norm": 1.0110120262003894, "learning_rate": 4.997381741221067e-06, "loss": 0.537, "step": 1534 }, { "epoch": 0.09347501750753585, "grad_norm": 1.1116045466283238, "learning_rate": 4.99737808890058e-06, "loss": 0.5812, "step": 1535 }, { "epoch": 0.09353591328441373, "grad_norm": 1.138186247347138, "learning_rate": 4.997374434035817e-06, "loss": 0.5693, "step": 1536 }, { "epoch": 0.0935968090612916, "grad_norm": 1.1078288071429587, "learning_rate": 4.9973707766267795e-06, "loss": 0.5356, "step": 1537 }, { "epoch": 0.09365770483816947, "grad_norm": 1.004650256773113, "learning_rate": 4.9973671166734746e-06, "loss": 0.5227, "step": 1538 }, { "epoch": 0.09371860061504735, "grad_norm": 1.0458720868304787, "learning_rate": 4.997363454175903e-06, "loss": 0.5935, "step": 1539 }, { "epoch": 0.09377949639192522, "grad_norm": 0.9747879925101511, "learning_rate": 4.99735978913407e-06, "loss": 0.5739, "step": 1540 }, { "epoch": 0.09384039216880309, "grad_norm": 1.0478052868457708, "learning_rate": 4.997356121547978e-06, "loss": 0.5427, "step": 1541 }, { "epoch": 0.09390128794568096, "grad_norm": 0.9402820622815071, "learning_rate": 4.9973524514176315e-06, "loss": 0.5858, "step": 1542 }, { "epoch": 0.09396218372255884, "grad_norm": 1.0038702873038106, "learning_rate": 4.997348778743034e-06, "loss": 0.5239, "step": 1543 }, { "epoch": 0.09402307949943671, "grad_norm": 1.0151349313094575, "learning_rate": 4.997345103524191e-06, "loss": 0.5163, "step": 1544 }, { "epoch": 0.09408397527631458, "grad_norm": 1.1156061530707904, "learning_rate": 4.997341425761103e-06, "loss": 0.5184, "step": 1545 }, { "epoch": 0.09414487105319246, "grad_norm": 1.0582680102508362, "learning_rate": 4.997337745453776e-06, "loss": 0.5521, "step": 1546 }, { "epoch": 0.09420576683007033, "grad_norm": 1.0490234027638012, "learning_rate": 4.997334062602214e-06, "loss": 0.4862, "step": 1547 }, { "epoch": 0.09426666260694821, "grad_norm": 0.9842453263800433, "learning_rate": 4.997330377206419e-06, "loss": 0.6266, "step": 1548 }, { "epoch": 0.09432755838382609, "grad_norm": 1.0241601609899917, "learning_rate": 4.997326689266396e-06, "loss": 0.48, "step": 1549 }, { "epoch": 0.09438845416070396, "grad_norm": 1.041415023022409, "learning_rate": 4.9973229987821496e-06, "loss": 0.5573, "step": 1550 }, { "epoch": 0.09444934993758183, "grad_norm": 0.9987046454912827, "learning_rate": 4.997319305753681e-06, "loss": 0.5782, "step": 1551 }, { "epoch": 0.0945102457144597, "grad_norm": 0.9831255558441936, "learning_rate": 4.997315610180996e-06, "loss": 0.5812, "step": 1552 }, { "epoch": 0.09457114149133758, "grad_norm": 1.0150558065891764, "learning_rate": 4.997311912064098e-06, "loss": 0.5358, "step": 1553 }, { "epoch": 0.09463203726821545, "grad_norm": 1.0066199112388885, "learning_rate": 4.99730821140299e-06, "loss": 0.5794, "step": 1554 }, { "epoch": 0.09469293304509332, "grad_norm": 1.0793831341996227, "learning_rate": 4.9973045081976766e-06, "loss": 0.5476, "step": 1555 }, { "epoch": 0.0947538288219712, "grad_norm": 1.0426746813722718, "learning_rate": 4.9973008024481615e-06, "loss": 0.5689, "step": 1556 }, { "epoch": 0.09481472459884907, "grad_norm": 1.0580146053699575, "learning_rate": 4.997297094154447e-06, "loss": 0.5272, "step": 1557 }, { "epoch": 0.09487562037572694, "grad_norm": 0.9281574672614727, "learning_rate": 4.997293383316539e-06, "loss": 0.5342, "step": 1558 }, { "epoch": 0.09493651615260482, "grad_norm": 1.0260567417074147, "learning_rate": 4.997289669934442e-06, "loss": 0.5665, "step": 1559 }, { "epoch": 0.09499741192948269, "grad_norm": 1.080653521711041, "learning_rate": 4.997285954008156e-06, "loss": 0.5541, "step": 1560 }, { "epoch": 0.09505830770636056, "grad_norm": 1.0542801858590571, "learning_rate": 4.9972822355376885e-06, "loss": 0.4749, "step": 1561 }, { "epoch": 0.09511920348323843, "grad_norm": 1.0169727377991524, "learning_rate": 4.9972785145230405e-06, "loss": 0.6022, "step": 1562 }, { "epoch": 0.09518009926011631, "grad_norm": 1.146382297884247, "learning_rate": 4.997274790964217e-06, "loss": 0.584, "step": 1563 }, { "epoch": 0.09524099503699418, "grad_norm": 1.0925623592528895, "learning_rate": 4.997271064861223e-06, "loss": 0.5592, "step": 1564 }, { "epoch": 0.09530189081387205, "grad_norm": 1.050967815688295, "learning_rate": 4.997267336214061e-06, "loss": 0.5131, "step": 1565 }, { "epoch": 0.09536278659074993, "grad_norm": 0.95096346804481, "learning_rate": 4.997263605022734e-06, "loss": 0.5599, "step": 1566 }, { "epoch": 0.0954236823676278, "grad_norm": 0.9592202214737985, "learning_rate": 4.997259871287248e-06, "loss": 0.5557, "step": 1567 }, { "epoch": 0.09548457814450567, "grad_norm": 0.9891832824946665, "learning_rate": 4.997256135007604e-06, "loss": 0.5809, "step": 1568 }, { "epoch": 0.09554547392138356, "grad_norm": 0.988082912695688, "learning_rate": 4.997252396183809e-06, "loss": 0.548, "step": 1569 }, { "epoch": 0.09560636969826143, "grad_norm": 1.0495837025772932, "learning_rate": 4.997248654815864e-06, "loss": 0.5424, "step": 1570 }, { "epoch": 0.0956672654751393, "grad_norm": 1.0506824849627017, "learning_rate": 4.997244910903775e-06, "loss": 0.5351, "step": 1571 }, { "epoch": 0.09572816125201718, "grad_norm": 1.0076493019882604, "learning_rate": 4.9972411644475434e-06, "loss": 0.5409, "step": 1572 }, { "epoch": 0.09578905702889505, "grad_norm": 0.9574091855158229, "learning_rate": 4.997237415447176e-06, "loss": 0.5306, "step": 1573 }, { "epoch": 0.09584995280577292, "grad_norm": 1.0688514449495776, "learning_rate": 4.9972336639026746e-06, "loss": 0.6222, "step": 1574 }, { "epoch": 0.0959108485826508, "grad_norm": 1.014330762404642, "learning_rate": 4.997229909814043e-06, "loss": 0.5506, "step": 1575 }, { "epoch": 0.09597174435952867, "grad_norm": 1.0548737965095851, "learning_rate": 4.997226153181285e-06, "loss": 0.4952, "step": 1576 }, { "epoch": 0.09603264013640654, "grad_norm": 1.1088414210380102, "learning_rate": 4.997222394004405e-06, "loss": 0.5097, "step": 1577 }, { "epoch": 0.09609353591328441, "grad_norm": 1.0274473028913385, "learning_rate": 4.997218632283408e-06, "loss": 0.4958, "step": 1578 }, { "epoch": 0.09615443169016229, "grad_norm": 1.044897866548084, "learning_rate": 4.997214868018296e-06, "loss": 0.5665, "step": 1579 }, { "epoch": 0.09621532746704016, "grad_norm": 1.0122183704238317, "learning_rate": 4.997211101209073e-06, "loss": 0.5603, "step": 1580 }, { "epoch": 0.09627622324391803, "grad_norm": 0.8771526626743141, "learning_rate": 4.997207331855744e-06, "loss": 0.5417, "step": 1581 }, { "epoch": 0.0963371190207959, "grad_norm": 1.0327186318233272, "learning_rate": 4.997203559958311e-06, "loss": 0.4961, "step": 1582 }, { "epoch": 0.09639801479767378, "grad_norm": 1.0653502157672567, "learning_rate": 4.99719978551678e-06, "loss": 0.5163, "step": 1583 }, { "epoch": 0.09645891057455165, "grad_norm": 1.094303821977199, "learning_rate": 4.997196008531153e-06, "loss": 0.5635, "step": 1584 }, { "epoch": 0.09651980635142952, "grad_norm": 1.0898070521865237, "learning_rate": 4.9971922290014356e-06, "loss": 0.5281, "step": 1585 }, { "epoch": 0.0965807021283074, "grad_norm": 1.0508055164904817, "learning_rate": 4.99718844692763e-06, "loss": 0.5349, "step": 1586 }, { "epoch": 0.09664159790518527, "grad_norm": 1.0449785320384775, "learning_rate": 4.997184662309741e-06, "loss": 0.4678, "step": 1587 }, { "epoch": 0.09670249368206314, "grad_norm": 0.9608799629650544, "learning_rate": 4.997180875147771e-06, "loss": 0.5726, "step": 1588 }, { "epoch": 0.09676338945894103, "grad_norm": 1.0589726621498352, "learning_rate": 4.997177085441727e-06, "loss": 0.5764, "step": 1589 }, { "epoch": 0.0968242852358189, "grad_norm": 1.0402023153469924, "learning_rate": 4.99717329319161e-06, "loss": 0.5333, "step": 1590 }, { "epoch": 0.09688518101269677, "grad_norm": 1.1480961437663826, "learning_rate": 4.997169498397424e-06, "loss": 0.5458, "step": 1591 }, { "epoch": 0.09694607678957465, "grad_norm": 1.0492152162174317, "learning_rate": 4.997165701059175e-06, "loss": 0.4963, "step": 1592 }, { "epoch": 0.09700697256645252, "grad_norm": 1.0494049982685263, "learning_rate": 4.997161901176865e-06, "loss": 0.6331, "step": 1593 }, { "epoch": 0.0970678683433304, "grad_norm": 1.0530816811503116, "learning_rate": 4.997158098750498e-06, "loss": 0.5482, "step": 1594 }, { "epoch": 0.09712876412020827, "grad_norm": 1.056144132256503, "learning_rate": 4.997154293780078e-06, "loss": 0.518, "step": 1595 }, { "epoch": 0.09718965989708614, "grad_norm": 1.083980197418238, "learning_rate": 4.9971504862656096e-06, "loss": 0.5467, "step": 1596 }, { "epoch": 0.09725055567396401, "grad_norm": 0.9895856942452385, "learning_rate": 4.997146676207096e-06, "loss": 0.5809, "step": 1597 }, { "epoch": 0.09731145145084188, "grad_norm": 1.006164071378832, "learning_rate": 4.997142863604542e-06, "loss": 0.5575, "step": 1598 }, { "epoch": 0.09737234722771976, "grad_norm": 1.0060217843085906, "learning_rate": 4.997139048457949e-06, "loss": 0.5524, "step": 1599 }, { "epoch": 0.09743324300459763, "grad_norm": 1.0499065109490533, "learning_rate": 4.997135230767325e-06, "loss": 0.6315, "step": 1600 }, { "epoch": 0.0974941387814755, "grad_norm": 0.9459363408265279, "learning_rate": 4.99713141053267e-06, "loss": 0.6146, "step": 1601 }, { "epoch": 0.09755503455835338, "grad_norm": 0.9677273042392831, "learning_rate": 4.997127587753989e-06, "loss": 0.6101, "step": 1602 }, { "epoch": 0.09761593033523125, "grad_norm": 1.1040265602940726, "learning_rate": 4.9971237624312876e-06, "loss": 0.5918, "step": 1603 }, { "epoch": 0.09767682611210912, "grad_norm": 0.9673238105062145, "learning_rate": 4.997119934564568e-06, "loss": 0.5945, "step": 1604 }, { "epoch": 0.097737721888987, "grad_norm": 1.0385811926560016, "learning_rate": 4.997116104153835e-06, "loss": 0.5017, "step": 1605 }, { "epoch": 0.09779861766586487, "grad_norm": 1.0924994877208196, "learning_rate": 4.997112271199092e-06, "loss": 0.54, "step": 1606 }, { "epoch": 0.09785951344274274, "grad_norm": 1.0984714330536742, "learning_rate": 4.997108435700342e-06, "loss": 0.5217, "step": 1607 }, { "epoch": 0.09792040921962061, "grad_norm": 1.1076211461452965, "learning_rate": 4.99710459765759e-06, "loss": 0.5101, "step": 1608 }, { "epoch": 0.09798130499649849, "grad_norm": 1.0309572964691338, "learning_rate": 4.99710075707084e-06, "loss": 0.6072, "step": 1609 }, { "epoch": 0.09804220077337637, "grad_norm": 1.1598314623467008, "learning_rate": 4.997096913940096e-06, "loss": 0.4946, "step": 1610 }, { "epoch": 0.09810309655025425, "grad_norm": 0.9649607105716999, "learning_rate": 4.997093068265361e-06, "loss": 0.5898, "step": 1611 }, { "epoch": 0.09816399232713212, "grad_norm": 1.0740658280050155, "learning_rate": 4.9970892200466404e-06, "loss": 0.4929, "step": 1612 }, { "epoch": 0.09822488810400999, "grad_norm": 1.0548457210877893, "learning_rate": 4.997085369283936e-06, "loss": 0.599, "step": 1613 }, { "epoch": 0.09828578388088786, "grad_norm": 1.0463515671982837, "learning_rate": 4.997081515977253e-06, "loss": 0.5766, "step": 1614 }, { "epoch": 0.09834667965776574, "grad_norm": 1.184934601623124, "learning_rate": 4.9970776601265965e-06, "loss": 0.5412, "step": 1615 }, { "epoch": 0.09840757543464361, "grad_norm": 0.9569547232273173, "learning_rate": 4.997073801731969e-06, "loss": 0.5652, "step": 1616 }, { "epoch": 0.09846847121152148, "grad_norm": 1.0677910138729887, "learning_rate": 4.997069940793374e-06, "loss": 0.5457, "step": 1617 }, { "epoch": 0.09852936698839936, "grad_norm": 1.045797203809808, "learning_rate": 4.997066077310816e-06, "loss": 0.4915, "step": 1618 }, { "epoch": 0.09859026276527723, "grad_norm": 1.009444067826701, "learning_rate": 4.997062211284299e-06, "loss": 0.5466, "step": 1619 }, { "epoch": 0.0986511585421551, "grad_norm": 1.0360269955297459, "learning_rate": 4.9970583427138275e-06, "loss": 0.533, "step": 1620 }, { "epoch": 0.09871205431903297, "grad_norm": 1.0005987213485463, "learning_rate": 4.997054471599404e-06, "loss": 0.5893, "step": 1621 }, { "epoch": 0.09877295009591085, "grad_norm": 1.19080785133743, "learning_rate": 4.997050597941034e-06, "loss": 0.526, "step": 1622 }, { "epoch": 0.09883384587278872, "grad_norm": 1.0701734217052064, "learning_rate": 4.9970467217387205e-06, "loss": 0.5537, "step": 1623 }, { "epoch": 0.09889474164966659, "grad_norm": 1.0262236021560172, "learning_rate": 4.9970428429924685e-06, "loss": 0.5831, "step": 1624 }, { "epoch": 0.09895563742654447, "grad_norm": 1.0592401269019265, "learning_rate": 4.99703896170228e-06, "loss": 0.5082, "step": 1625 }, { "epoch": 0.09901653320342234, "grad_norm": 1.0294396481232975, "learning_rate": 4.997035077868161e-06, "loss": 0.6048, "step": 1626 }, { "epoch": 0.09907742898030021, "grad_norm": 1.0712444035132218, "learning_rate": 4.9970311914901135e-06, "loss": 0.5282, "step": 1627 }, { "epoch": 0.09913832475717808, "grad_norm": 1.0201893384144007, "learning_rate": 4.997027302568144e-06, "loss": 0.4903, "step": 1628 }, { "epoch": 0.09919922053405596, "grad_norm": 1.0500106808726113, "learning_rate": 4.997023411102254e-06, "loss": 0.5567, "step": 1629 }, { "epoch": 0.09926011631093384, "grad_norm": 1.0324008578928712, "learning_rate": 4.997019517092449e-06, "loss": 0.5689, "step": 1630 }, { "epoch": 0.09932101208781172, "grad_norm": 0.9984614804117524, "learning_rate": 4.9970156205387325e-06, "loss": 0.5665, "step": 1631 }, { "epoch": 0.09938190786468959, "grad_norm": 1.0412374187952131, "learning_rate": 4.997011721441107e-06, "loss": 0.6086, "step": 1632 }, { "epoch": 0.09944280364156746, "grad_norm": 1.0253725531842854, "learning_rate": 4.99700781979958e-06, "loss": 0.5605, "step": 1633 }, { "epoch": 0.09950369941844533, "grad_norm": 1.1065156383615307, "learning_rate": 4.997003915614151e-06, "loss": 0.5372, "step": 1634 }, { "epoch": 0.09956459519532321, "grad_norm": 1.123751778752941, "learning_rate": 4.997000008884828e-06, "loss": 0.526, "step": 1635 }, { "epoch": 0.09962549097220108, "grad_norm": 0.9448964530511076, "learning_rate": 4.996996099611613e-06, "loss": 0.6052, "step": 1636 }, { "epoch": 0.09968638674907895, "grad_norm": 1.0279268124592427, "learning_rate": 4.9969921877945105e-06, "loss": 0.5397, "step": 1637 }, { "epoch": 0.09974728252595683, "grad_norm": 1.135041739719436, "learning_rate": 4.996988273433524e-06, "loss": 0.5171, "step": 1638 }, { "epoch": 0.0998081783028347, "grad_norm": 1.1853031709820716, "learning_rate": 4.996984356528657e-06, "loss": 0.5193, "step": 1639 }, { "epoch": 0.09986907407971257, "grad_norm": 1.0360247508771494, "learning_rate": 4.996980437079915e-06, "loss": 0.5163, "step": 1640 }, { "epoch": 0.09992996985659044, "grad_norm": 1.1127928193727894, "learning_rate": 4.996976515087301e-06, "loss": 0.579, "step": 1641 }, { "epoch": 0.09999086563346832, "grad_norm": 1.215921847440474, "learning_rate": 4.99697259055082e-06, "loss": 0.4907, "step": 1642 }, { "epoch": 0.10005176141034619, "grad_norm": 0.9960483170141813, "learning_rate": 4.996968663470474e-06, "loss": 0.5443, "step": 1643 }, { "epoch": 0.10011265718722406, "grad_norm": 1.0199276845261946, "learning_rate": 4.996964733846269e-06, "loss": 0.5514, "step": 1644 }, { "epoch": 0.10017355296410194, "grad_norm": 1.0181854755768216, "learning_rate": 4.996960801678209e-06, "loss": 0.6025, "step": 1645 }, { "epoch": 0.10023444874097981, "grad_norm": 0.9397853048015088, "learning_rate": 4.996956866966296e-06, "loss": 0.5562, "step": 1646 }, { "epoch": 0.10029534451785768, "grad_norm": 1.1453137165321257, "learning_rate": 4.996952929710536e-06, "loss": 0.5501, "step": 1647 }, { "epoch": 0.10035624029473555, "grad_norm": 1.0745003297576192, "learning_rate": 4.9969489899109315e-06, "loss": 0.6093, "step": 1648 }, { "epoch": 0.10041713607161343, "grad_norm": 1.007211203289983, "learning_rate": 4.9969450475674875e-06, "loss": 0.5005, "step": 1649 }, { "epoch": 0.1004780318484913, "grad_norm": 1.0774116484464575, "learning_rate": 4.996941102680209e-06, "loss": 0.5598, "step": 1650 }, { "epoch": 0.10053892762536919, "grad_norm": 0.9956222125322651, "learning_rate": 4.996937155249098e-06, "loss": 0.6007, "step": 1651 }, { "epoch": 0.10059982340224706, "grad_norm": 1.042800612644546, "learning_rate": 4.996933205274158e-06, "loss": 0.5185, "step": 1652 }, { "epoch": 0.10066071917912493, "grad_norm": 1.0427137072019084, "learning_rate": 4.996929252755396e-06, "loss": 0.5505, "step": 1653 }, { "epoch": 0.1007216149560028, "grad_norm": 1.0032361463838362, "learning_rate": 4.996925297692814e-06, "loss": 0.5824, "step": 1654 }, { "epoch": 0.10078251073288068, "grad_norm": 1.0218722381505958, "learning_rate": 4.9969213400864154e-06, "loss": 0.6844, "step": 1655 }, { "epoch": 0.10084340650975855, "grad_norm": 1.0658528430895233, "learning_rate": 4.996917379936207e-06, "loss": 0.5447, "step": 1656 }, { "epoch": 0.10090430228663642, "grad_norm": 1.0674475411601154, "learning_rate": 4.99691341724219e-06, "loss": 0.5317, "step": 1657 }, { "epoch": 0.1009651980635143, "grad_norm": 1.0466589516755371, "learning_rate": 4.99690945200437e-06, "loss": 0.4784, "step": 1658 }, { "epoch": 0.10102609384039217, "grad_norm": 1.1203625785754157, "learning_rate": 4.9969054842227506e-06, "loss": 0.5169, "step": 1659 }, { "epoch": 0.10108698961727004, "grad_norm": 1.0292750644858562, "learning_rate": 4.996901513897335e-06, "loss": 0.5441, "step": 1660 }, { "epoch": 0.10114788539414792, "grad_norm": 0.9810794552396621, "learning_rate": 4.996897541028129e-06, "loss": 0.5937, "step": 1661 }, { "epoch": 0.10120878117102579, "grad_norm": 1.032579125241424, "learning_rate": 4.9968935656151355e-06, "loss": 0.535, "step": 1662 }, { "epoch": 0.10126967694790366, "grad_norm": 1.1395235261557664, "learning_rate": 4.996889587658358e-06, "loss": 0.491, "step": 1663 }, { "epoch": 0.10133057272478153, "grad_norm": 1.0848052334082923, "learning_rate": 4.996885607157802e-06, "loss": 0.5802, "step": 1664 }, { "epoch": 0.10139146850165941, "grad_norm": 1.0299315273618883, "learning_rate": 4.996881624113471e-06, "loss": 0.5417, "step": 1665 }, { "epoch": 0.10145236427853728, "grad_norm": 1.0361999772949415, "learning_rate": 4.996877638525368e-06, "loss": 0.5156, "step": 1666 }, { "epoch": 0.10151326005541515, "grad_norm": 0.9814212612719733, "learning_rate": 4.996873650393499e-06, "loss": 0.5272, "step": 1667 }, { "epoch": 0.10157415583229303, "grad_norm": 1.0893565505108886, "learning_rate": 4.996869659717867e-06, "loss": 0.5897, "step": 1668 }, { "epoch": 0.1016350516091709, "grad_norm": 0.9740236209885118, "learning_rate": 4.996865666498476e-06, "loss": 0.5473, "step": 1669 }, { "epoch": 0.10169594738604877, "grad_norm": 1.0796919151194282, "learning_rate": 4.996861670735329e-06, "loss": 0.5862, "step": 1670 }, { "epoch": 0.10175684316292666, "grad_norm": 1.0677942335693211, "learning_rate": 4.996857672428432e-06, "loss": 0.5099, "step": 1671 }, { "epoch": 0.10181773893980453, "grad_norm": 1.1036227162720051, "learning_rate": 4.99685367157779e-06, "loss": 0.5413, "step": 1672 }, { "epoch": 0.1018786347166824, "grad_norm": 0.9745475816729223, "learning_rate": 4.9968496681834025e-06, "loss": 0.5668, "step": 1673 }, { "epoch": 0.10193953049356028, "grad_norm": 1.0851722582213068, "learning_rate": 4.996845662245278e-06, "loss": 0.5008, "step": 1674 }, { "epoch": 0.10200042627043815, "grad_norm": 1.1454709699567331, "learning_rate": 4.996841653763419e-06, "loss": 0.5232, "step": 1675 }, { "epoch": 0.10206132204731602, "grad_norm": 1.1398200122731326, "learning_rate": 4.996837642737829e-06, "loss": 0.5151, "step": 1676 }, { "epoch": 0.1021222178241939, "grad_norm": 1.0798872597905045, "learning_rate": 4.996833629168514e-06, "loss": 0.4811, "step": 1677 }, { "epoch": 0.10218311360107177, "grad_norm": 1.051458899656529, "learning_rate": 4.996829613055476e-06, "loss": 0.5604, "step": 1678 }, { "epoch": 0.10224400937794964, "grad_norm": 1.0954820393907971, "learning_rate": 4.99682559439872e-06, "loss": 0.5545, "step": 1679 }, { "epoch": 0.10230490515482751, "grad_norm": 1.15296699434543, "learning_rate": 4.996821573198249e-06, "loss": 0.5303, "step": 1680 }, { "epoch": 0.10236580093170539, "grad_norm": 1.087817299548718, "learning_rate": 4.996817549454069e-06, "loss": 0.4881, "step": 1681 }, { "epoch": 0.10242669670858326, "grad_norm": 1.0398611135368738, "learning_rate": 4.996813523166184e-06, "loss": 0.5493, "step": 1682 }, { "epoch": 0.10248759248546113, "grad_norm": 1.0424751651573563, "learning_rate": 4.996809494334596e-06, "loss": 0.561, "step": 1683 }, { "epoch": 0.102548488262339, "grad_norm": 1.0893207748337808, "learning_rate": 4.996805462959311e-06, "loss": 0.4902, "step": 1684 }, { "epoch": 0.10260938403921688, "grad_norm": 1.0761885931252027, "learning_rate": 4.9968014290403325e-06, "loss": 0.4669, "step": 1685 }, { "epoch": 0.10267027981609475, "grad_norm": 1.0114898442194173, "learning_rate": 4.996797392577665e-06, "loss": 0.4942, "step": 1686 }, { "epoch": 0.10273117559297262, "grad_norm": 0.9418573139046353, "learning_rate": 4.996793353571311e-06, "loss": 0.5991, "step": 1687 }, { "epoch": 0.1027920713698505, "grad_norm": 1.0481297164331296, "learning_rate": 4.996789312021277e-06, "loss": 0.4797, "step": 1688 }, { "epoch": 0.10285296714672837, "grad_norm": 1.0718218539241076, "learning_rate": 4.996785267927566e-06, "loss": 0.5619, "step": 1689 }, { "epoch": 0.10291386292360624, "grad_norm": 0.9478142839990505, "learning_rate": 4.996781221290181e-06, "loss": 0.6306, "step": 1690 }, { "epoch": 0.10297475870048411, "grad_norm": 0.9925755233042549, "learning_rate": 4.996777172109128e-06, "loss": 0.5716, "step": 1691 }, { "epoch": 0.103035654477362, "grad_norm": 1.0251064865818886, "learning_rate": 4.996773120384411e-06, "loss": 0.4714, "step": 1692 }, { "epoch": 0.10309655025423987, "grad_norm": 0.9927887457818231, "learning_rate": 4.996769066116032e-06, "loss": 0.5567, "step": 1693 }, { "epoch": 0.10315744603111775, "grad_norm": 1.0647482212678125, "learning_rate": 4.996765009303997e-06, "loss": 0.5784, "step": 1694 }, { "epoch": 0.10321834180799562, "grad_norm": 1.018995301849709, "learning_rate": 4.99676094994831e-06, "loss": 0.5321, "step": 1695 }, { "epoch": 0.10327923758487349, "grad_norm": 1.0766568590577419, "learning_rate": 4.996756888048975e-06, "loss": 0.487, "step": 1696 }, { "epoch": 0.10334013336175137, "grad_norm": 1.0562313610797487, "learning_rate": 4.996752823605995e-06, "loss": 0.5259, "step": 1697 }, { "epoch": 0.10340102913862924, "grad_norm": 0.9457696049069677, "learning_rate": 4.996748756619376e-06, "loss": 0.6163, "step": 1698 }, { "epoch": 0.10346192491550711, "grad_norm": 1.2239394878611567, "learning_rate": 4.996744687089121e-06, "loss": 0.5078, "step": 1699 }, { "epoch": 0.10352282069238498, "grad_norm": 0.9926485554793067, "learning_rate": 4.996740615015235e-06, "loss": 0.5697, "step": 1700 }, { "epoch": 0.10358371646926286, "grad_norm": 1.0372014052457623, "learning_rate": 4.996736540397722e-06, "loss": 0.5488, "step": 1701 }, { "epoch": 0.10364461224614073, "grad_norm": 0.996839520953574, "learning_rate": 4.9967324632365844e-06, "loss": 0.5605, "step": 1702 }, { "epoch": 0.1037055080230186, "grad_norm": 1.0608019965169122, "learning_rate": 4.996728383531828e-06, "loss": 0.5099, "step": 1703 }, { "epoch": 0.10376640379989648, "grad_norm": 1.082333285503111, "learning_rate": 4.996724301283457e-06, "loss": 0.5503, "step": 1704 }, { "epoch": 0.10382729957677435, "grad_norm": 1.0232805506503986, "learning_rate": 4.996720216491474e-06, "loss": 0.567, "step": 1705 }, { "epoch": 0.10388819535365222, "grad_norm": 1.0630315860084953, "learning_rate": 4.996716129155887e-06, "loss": 0.5892, "step": 1706 }, { "epoch": 0.1039490911305301, "grad_norm": 1.0741858171281857, "learning_rate": 4.996712039276695e-06, "loss": 0.545, "step": 1707 }, { "epoch": 0.10400998690740797, "grad_norm": 1.0374000397979912, "learning_rate": 4.9967079468539055e-06, "loss": 0.5274, "step": 1708 }, { "epoch": 0.10407088268428584, "grad_norm": 1.024356088006433, "learning_rate": 4.996703851887522e-06, "loss": 0.5972, "step": 1709 }, { "epoch": 0.10413177846116371, "grad_norm": 1.0530121044577043, "learning_rate": 4.996699754377548e-06, "loss": 0.5307, "step": 1710 }, { "epoch": 0.10419267423804159, "grad_norm": 0.9535226943596117, "learning_rate": 4.996695654323989e-06, "loss": 0.602, "step": 1711 }, { "epoch": 0.10425357001491947, "grad_norm": 1.030329399496865, "learning_rate": 4.996691551726848e-06, "loss": 0.5097, "step": 1712 }, { "epoch": 0.10431446579179735, "grad_norm": 1.0973671657825153, "learning_rate": 4.996687446586129e-06, "loss": 0.6642, "step": 1713 }, { "epoch": 0.10437536156867522, "grad_norm": 1.010332372289805, "learning_rate": 4.996683338901838e-06, "loss": 0.562, "step": 1714 }, { "epoch": 0.10443625734555309, "grad_norm": 1.0603870335199188, "learning_rate": 4.996679228673976e-06, "loss": 0.5444, "step": 1715 }, { "epoch": 0.10449715312243096, "grad_norm": 1.1844143511667597, "learning_rate": 4.9966751159025504e-06, "loss": 0.5026, "step": 1716 }, { "epoch": 0.10455804889930884, "grad_norm": 1.1476081513254086, "learning_rate": 4.9966710005875645e-06, "loss": 0.5364, "step": 1717 }, { "epoch": 0.10461894467618671, "grad_norm": 1.068864930697842, "learning_rate": 4.996666882729022e-06, "loss": 0.5592, "step": 1718 }, { "epoch": 0.10467984045306458, "grad_norm": 0.9731913457900554, "learning_rate": 4.996662762326926e-06, "loss": 0.5483, "step": 1719 }, { "epoch": 0.10474073622994245, "grad_norm": 1.0450935734097802, "learning_rate": 4.996658639381283e-06, "loss": 0.5464, "step": 1720 }, { "epoch": 0.10480163200682033, "grad_norm": 1.021978111408529, "learning_rate": 4.9966545138920955e-06, "loss": 0.5479, "step": 1721 }, { "epoch": 0.1048625277836982, "grad_norm": 1.1360691099884468, "learning_rate": 4.996650385859368e-06, "loss": 0.5313, "step": 1722 }, { "epoch": 0.10492342356057607, "grad_norm": 1.0310857846721262, "learning_rate": 4.996646255283107e-06, "loss": 0.5559, "step": 1723 }, { "epoch": 0.10498431933745395, "grad_norm": 1.1331813300466749, "learning_rate": 4.996642122163313e-06, "loss": 0.4649, "step": 1724 }, { "epoch": 0.10504521511433182, "grad_norm": 1.0472808933750806, "learning_rate": 4.996637986499992e-06, "loss": 0.529, "step": 1725 }, { "epoch": 0.10510611089120969, "grad_norm": 1.0633602374194229, "learning_rate": 4.996633848293148e-06, "loss": 0.521, "step": 1726 }, { "epoch": 0.10516700666808756, "grad_norm": 1.0900157285448655, "learning_rate": 4.9966297075427855e-06, "loss": 0.5269, "step": 1727 }, { "epoch": 0.10522790244496544, "grad_norm": 1.0955320259768848, "learning_rate": 4.996625564248909e-06, "loss": 0.5518, "step": 1728 }, { "epoch": 0.10528879822184331, "grad_norm": 1.126634945603069, "learning_rate": 4.996621418411522e-06, "loss": 0.4902, "step": 1729 }, { "epoch": 0.10534969399872118, "grad_norm": 0.9812783723164764, "learning_rate": 4.996617270030629e-06, "loss": 0.5676, "step": 1730 }, { "epoch": 0.10541058977559906, "grad_norm": 1.0891004917879188, "learning_rate": 4.996613119106234e-06, "loss": 0.5152, "step": 1731 }, { "epoch": 0.10547148555247693, "grad_norm": 1.0279339391387845, "learning_rate": 4.996608965638342e-06, "loss": 0.5819, "step": 1732 }, { "epoch": 0.10553238132935482, "grad_norm": 1.142646834850904, "learning_rate": 4.996604809626956e-06, "loss": 0.482, "step": 1733 }, { "epoch": 0.10559327710623269, "grad_norm": 1.0770997602969976, "learning_rate": 4.996600651072081e-06, "loss": 0.5012, "step": 1734 }, { "epoch": 0.10565417288311056, "grad_norm": 1.035994752358036, "learning_rate": 4.996596489973722e-06, "loss": 0.5154, "step": 1735 }, { "epoch": 0.10571506865998843, "grad_norm": 1.1180477400450488, "learning_rate": 4.996592326331882e-06, "loss": 0.5234, "step": 1736 }, { "epoch": 0.10577596443686631, "grad_norm": 1.0920299013743027, "learning_rate": 4.996588160146565e-06, "loss": 0.5837, "step": 1737 }, { "epoch": 0.10583686021374418, "grad_norm": 1.1154964583492333, "learning_rate": 4.996583991417776e-06, "loss": 0.5622, "step": 1738 }, { "epoch": 0.10589775599062205, "grad_norm": 1.0353724029183167, "learning_rate": 4.996579820145519e-06, "loss": 0.51, "step": 1739 }, { "epoch": 0.10595865176749993, "grad_norm": 1.0390768721639931, "learning_rate": 4.9965756463298e-06, "loss": 0.4857, "step": 1740 }, { "epoch": 0.1060195475443778, "grad_norm": 1.081458570543921, "learning_rate": 4.9965714699706204e-06, "loss": 0.467, "step": 1741 }, { "epoch": 0.10608044332125567, "grad_norm": 0.9686078039442046, "learning_rate": 4.996567291067985e-06, "loss": 0.5696, "step": 1742 }, { "epoch": 0.10614133909813354, "grad_norm": 0.9684680843497067, "learning_rate": 4.9965631096219005e-06, "loss": 0.5434, "step": 1743 }, { "epoch": 0.10620223487501142, "grad_norm": 1.0325585691872274, "learning_rate": 4.996558925632367e-06, "loss": 0.5392, "step": 1744 }, { "epoch": 0.10626313065188929, "grad_norm": 0.9852175390252056, "learning_rate": 4.996554739099393e-06, "loss": 0.5011, "step": 1745 }, { "epoch": 0.10632402642876716, "grad_norm": 1.0503192432698503, "learning_rate": 4.996550550022981e-06, "loss": 0.491, "step": 1746 }, { "epoch": 0.10638492220564504, "grad_norm": 1.0087160194758449, "learning_rate": 4.9965463584031345e-06, "loss": 0.5199, "step": 1747 }, { "epoch": 0.10644581798252291, "grad_norm": 0.9726056096404159, "learning_rate": 4.996542164239859e-06, "loss": 0.6042, "step": 1748 }, { "epoch": 0.10650671375940078, "grad_norm": 1.019430748383045, "learning_rate": 4.996537967533158e-06, "loss": 0.542, "step": 1749 }, { "epoch": 0.10656760953627865, "grad_norm": 1.1224842739559349, "learning_rate": 4.996533768283036e-06, "loss": 0.5826, "step": 1750 }, { "epoch": 0.10662850531315653, "grad_norm": 1.0313450963727968, "learning_rate": 4.996529566489497e-06, "loss": 0.5363, "step": 1751 }, { "epoch": 0.1066894010900344, "grad_norm": 0.9755428810872762, "learning_rate": 4.996525362152547e-06, "loss": 0.5551, "step": 1752 }, { "epoch": 0.10675029686691229, "grad_norm": 0.9708752144626873, "learning_rate": 4.996521155272187e-06, "loss": 0.5395, "step": 1753 }, { "epoch": 0.10681119264379016, "grad_norm": 1.0140790796114103, "learning_rate": 4.996516945848424e-06, "loss": 0.5406, "step": 1754 }, { "epoch": 0.10687208842066803, "grad_norm": 1.0003815597020813, "learning_rate": 4.996512733881261e-06, "loss": 0.5301, "step": 1755 }, { "epoch": 0.1069329841975459, "grad_norm": 1.205361460837066, "learning_rate": 4.9965085193707036e-06, "loss": 0.5282, "step": 1756 }, { "epoch": 0.10699387997442378, "grad_norm": 1.059891491963777, "learning_rate": 4.996504302316755e-06, "loss": 0.5134, "step": 1757 }, { "epoch": 0.10705477575130165, "grad_norm": 0.990021941027551, "learning_rate": 4.9965000827194186e-06, "loss": 0.5933, "step": 1758 }, { "epoch": 0.10711567152817952, "grad_norm": 1.1365303407705232, "learning_rate": 4.9964958605787015e-06, "loss": 0.5628, "step": 1759 }, { "epoch": 0.1071765673050574, "grad_norm": 1.0066990029815868, "learning_rate": 4.996491635894605e-06, "loss": 0.4683, "step": 1760 }, { "epoch": 0.10723746308193527, "grad_norm": 0.9572334659261204, "learning_rate": 4.9964874086671354e-06, "loss": 0.5595, "step": 1761 }, { "epoch": 0.10729835885881314, "grad_norm": 0.9605404713782302, "learning_rate": 4.9964831788962965e-06, "loss": 0.5255, "step": 1762 }, { "epoch": 0.10735925463569101, "grad_norm": 1.0605351394237161, "learning_rate": 4.996478946582092e-06, "loss": 0.5515, "step": 1763 }, { "epoch": 0.10742015041256889, "grad_norm": 1.03645794770236, "learning_rate": 4.996474711724526e-06, "loss": 0.5517, "step": 1764 }, { "epoch": 0.10748104618944676, "grad_norm": 1.0309384894148184, "learning_rate": 4.9964704743236045e-06, "loss": 0.5609, "step": 1765 }, { "epoch": 0.10754194196632463, "grad_norm": 1.0659224777021032, "learning_rate": 4.99646623437933e-06, "loss": 0.5387, "step": 1766 }, { "epoch": 0.1076028377432025, "grad_norm": 1.0171544526092064, "learning_rate": 4.996461991891709e-06, "loss": 0.5889, "step": 1767 }, { "epoch": 0.10766373352008038, "grad_norm": 1.1463871584070118, "learning_rate": 4.996457746860743e-06, "loss": 0.5937, "step": 1768 }, { "epoch": 0.10772462929695825, "grad_norm": 1.0423133686720698, "learning_rate": 4.996453499286438e-06, "loss": 0.5429, "step": 1769 }, { "epoch": 0.10778552507383612, "grad_norm": 0.9955857308836678, "learning_rate": 4.996449249168799e-06, "loss": 0.4901, "step": 1770 }, { "epoch": 0.107846420850714, "grad_norm": 1.111897604361036, "learning_rate": 4.996444996507829e-06, "loss": 0.5178, "step": 1771 }, { "epoch": 0.10790731662759187, "grad_norm": 1.0114652270911213, "learning_rate": 4.996440741303532e-06, "loss": 0.5439, "step": 1772 }, { "epoch": 0.10796821240446974, "grad_norm": 0.9525725976344975, "learning_rate": 4.996436483555913e-06, "loss": 0.6092, "step": 1773 }, { "epoch": 0.10802910818134763, "grad_norm": 1.0487106849901062, "learning_rate": 4.996432223264978e-06, "loss": 0.5274, "step": 1774 }, { "epoch": 0.1080900039582255, "grad_norm": 1.0536202206714569, "learning_rate": 4.996427960430728e-06, "loss": 0.5844, "step": 1775 }, { "epoch": 0.10815089973510338, "grad_norm": 1.0488106397686263, "learning_rate": 4.996423695053169e-06, "loss": 0.5633, "step": 1776 }, { "epoch": 0.10821179551198125, "grad_norm": 1.0667570628009386, "learning_rate": 4.996419427132308e-06, "loss": 0.4549, "step": 1777 }, { "epoch": 0.10827269128885912, "grad_norm": 1.0720685308403228, "learning_rate": 4.996415156668144e-06, "loss": 0.4962, "step": 1778 }, { "epoch": 0.108333587065737, "grad_norm": 0.9842558923310549, "learning_rate": 4.996410883660685e-06, "loss": 0.5814, "step": 1779 }, { "epoch": 0.10839448284261487, "grad_norm": 0.9803446947903159, "learning_rate": 4.996406608109935e-06, "loss": 0.5792, "step": 1780 }, { "epoch": 0.10845537861949274, "grad_norm": 1.071972898134292, "learning_rate": 4.996402330015898e-06, "loss": 0.5174, "step": 1781 }, { "epoch": 0.10851627439637061, "grad_norm": 1.0923192948715454, "learning_rate": 4.996398049378577e-06, "loss": 0.5402, "step": 1782 }, { "epoch": 0.10857717017324849, "grad_norm": 1.0258265982932828, "learning_rate": 4.996393766197979e-06, "loss": 0.5095, "step": 1783 }, { "epoch": 0.10863806595012636, "grad_norm": 0.9931125541610801, "learning_rate": 4.996389480474106e-06, "loss": 0.5744, "step": 1784 }, { "epoch": 0.10869896172700423, "grad_norm": 1.0761452523274695, "learning_rate": 4.996385192206963e-06, "loss": 0.5003, "step": 1785 }, { "epoch": 0.1087598575038821, "grad_norm": 1.0635300434815718, "learning_rate": 4.996380901396556e-06, "loss": 0.576, "step": 1786 }, { "epoch": 0.10882075328075998, "grad_norm": 1.0612246719912464, "learning_rate": 4.996376608042887e-06, "loss": 0.5592, "step": 1787 }, { "epoch": 0.10888164905763785, "grad_norm": 1.0539681806164969, "learning_rate": 4.996372312145962e-06, "loss": 0.5877, "step": 1788 }, { "epoch": 0.10894254483451572, "grad_norm": 0.9858774002220871, "learning_rate": 4.996368013705784e-06, "loss": 0.5698, "step": 1789 }, { "epoch": 0.1090034406113936, "grad_norm": 1.1010012972689507, "learning_rate": 4.996363712722359e-06, "loss": 0.5111, "step": 1790 }, { "epoch": 0.10906433638827147, "grad_norm": 1.0427382834655152, "learning_rate": 4.99635940919569e-06, "loss": 0.5418, "step": 1791 }, { "epoch": 0.10912523216514934, "grad_norm": 0.9433634053320837, "learning_rate": 4.9963551031257814e-06, "loss": 0.5504, "step": 1792 }, { "epoch": 0.10918612794202721, "grad_norm": 1.0744198283887572, "learning_rate": 4.99635079451264e-06, "loss": 0.5369, "step": 1793 }, { "epoch": 0.1092470237189051, "grad_norm": 1.0367218823229851, "learning_rate": 4.996346483356266e-06, "loss": 0.571, "step": 1794 }, { "epoch": 0.10930791949578297, "grad_norm": 0.9518974197887267, "learning_rate": 4.996342169656668e-06, "loss": 0.5414, "step": 1795 }, { "epoch": 0.10936881527266085, "grad_norm": 1.1593089956257472, "learning_rate": 4.9963378534138475e-06, "loss": 0.5705, "step": 1796 }, { "epoch": 0.10942971104953872, "grad_norm": 1.14986509881599, "learning_rate": 4.99633353462781e-06, "loss": 0.5518, "step": 1797 }, { "epoch": 0.10949060682641659, "grad_norm": 1.1851779836877863, "learning_rate": 4.9963292132985595e-06, "loss": 0.4474, "step": 1798 }, { "epoch": 0.10955150260329446, "grad_norm": 1.032241175731747, "learning_rate": 4.9963248894261015e-06, "loss": 0.5036, "step": 1799 }, { "epoch": 0.10961239838017234, "grad_norm": 1.0811070096601771, "learning_rate": 4.99632056301044e-06, "loss": 0.5717, "step": 1800 }, { "epoch": 0.10967329415705021, "grad_norm": 1.0750661674677393, "learning_rate": 4.996316234051578e-06, "loss": 0.4967, "step": 1801 }, { "epoch": 0.10973418993392808, "grad_norm": 1.0279018068484733, "learning_rate": 4.996311902549521e-06, "loss": 0.5043, "step": 1802 }, { "epoch": 0.10979508571080596, "grad_norm": 1.0707950972251334, "learning_rate": 4.996307568504274e-06, "loss": 0.5954, "step": 1803 }, { "epoch": 0.10985598148768383, "grad_norm": 1.0562882317328932, "learning_rate": 4.9963032319158394e-06, "loss": 0.5287, "step": 1804 }, { "epoch": 0.1099168772645617, "grad_norm": 0.9704391410957339, "learning_rate": 4.9962988927842235e-06, "loss": 0.6099, "step": 1805 }, { "epoch": 0.10997777304143957, "grad_norm": 0.9995488648943995, "learning_rate": 4.996294551109431e-06, "loss": 0.6417, "step": 1806 }, { "epoch": 0.11003866881831745, "grad_norm": 1.1008214043950628, "learning_rate": 4.996290206891465e-06, "loss": 0.5941, "step": 1807 }, { "epoch": 0.11009956459519532, "grad_norm": 1.0950009130043912, "learning_rate": 4.99628586013033e-06, "loss": 0.5616, "step": 1808 }, { "epoch": 0.1101604603720732, "grad_norm": 1.0422525528596744, "learning_rate": 4.996281510826032e-06, "loss": 0.602, "step": 1809 }, { "epoch": 0.11022135614895107, "grad_norm": 1.1165355335711402, "learning_rate": 4.996277158978573e-06, "loss": 0.5495, "step": 1810 }, { "epoch": 0.11028225192582894, "grad_norm": 1.0030540239203927, "learning_rate": 4.996272804587959e-06, "loss": 0.5263, "step": 1811 }, { "epoch": 0.11034314770270681, "grad_norm": 0.9603609617960293, "learning_rate": 4.996268447654195e-06, "loss": 0.5094, "step": 1812 }, { "epoch": 0.11040404347958468, "grad_norm": 1.0493635627560793, "learning_rate": 4.996264088177284e-06, "loss": 0.5648, "step": 1813 }, { "epoch": 0.11046493925646256, "grad_norm": 1.1127218676257313, "learning_rate": 4.996259726157231e-06, "loss": 0.5317, "step": 1814 }, { "epoch": 0.11052583503334044, "grad_norm": 1.0741650242211582, "learning_rate": 4.996255361594041e-06, "loss": 0.5625, "step": 1815 }, { "epoch": 0.11058673081021832, "grad_norm": 0.9884485673073425, "learning_rate": 4.996250994487717e-06, "loss": 0.609, "step": 1816 }, { "epoch": 0.11064762658709619, "grad_norm": 1.0100426627064114, "learning_rate": 4.996246624838266e-06, "loss": 0.5142, "step": 1817 }, { "epoch": 0.11070852236397406, "grad_norm": 1.103025018886602, "learning_rate": 4.996242252645689e-06, "loss": 0.5105, "step": 1818 }, { "epoch": 0.11076941814085194, "grad_norm": 1.0430189776507874, "learning_rate": 4.996237877909993e-06, "loss": 0.5996, "step": 1819 }, { "epoch": 0.11083031391772981, "grad_norm": 0.9794500593454651, "learning_rate": 4.996233500631182e-06, "loss": 0.5231, "step": 1820 }, { "epoch": 0.11089120969460768, "grad_norm": 1.0436955841094127, "learning_rate": 4.996229120809261e-06, "loss": 0.5272, "step": 1821 }, { "epoch": 0.11095210547148555, "grad_norm": 1.069598222713175, "learning_rate": 4.996224738444232e-06, "loss": 0.4819, "step": 1822 }, { "epoch": 0.11101300124836343, "grad_norm": 1.0282867641200237, "learning_rate": 4.996220353536102e-06, "loss": 0.5095, "step": 1823 }, { "epoch": 0.1110738970252413, "grad_norm": 1.0659701580403138, "learning_rate": 4.996215966084874e-06, "loss": 0.6526, "step": 1824 }, { "epoch": 0.11113479280211917, "grad_norm": 1.0711508540695838, "learning_rate": 4.996211576090554e-06, "loss": 0.4904, "step": 1825 }, { "epoch": 0.11119568857899705, "grad_norm": 1.0445105218286674, "learning_rate": 4.996207183553145e-06, "loss": 0.5517, "step": 1826 }, { "epoch": 0.11125658435587492, "grad_norm": 1.0237588705014908, "learning_rate": 4.996202788472651e-06, "loss": 0.5055, "step": 1827 }, { "epoch": 0.11131748013275279, "grad_norm": 1.033499118291495, "learning_rate": 4.996198390849079e-06, "loss": 0.5629, "step": 1828 }, { "epoch": 0.11137837590963066, "grad_norm": 1.0438215686742498, "learning_rate": 4.996193990682432e-06, "loss": 0.5365, "step": 1829 }, { "epoch": 0.11143927168650854, "grad_norm": 1.0567640192296262, "learning_rate": 4.996189587972714e-06, "loss": 0.5199, "step": 1830 }, { "epoch": 0.11150016746338641, "grad_norm": 1.0533377049824402, "learning_rate": 4.99618518271993e-06, "loss": 0.5326, "step": 1831 }, { "epoch": 0.11156106324026428, "grad_norm": 1.0460182872536743, "learning_rate": 4.996180774924085e-06, "loss": 0.5202, "step": 1832 }, { "epoch": 0.11162195901714216, "grad_norm": 1.1382306928826302, "learning_rate": 4.996176364585181e-06, "loss": 0.528, "step": 1833 }, { "epoch": 0.11168285479402003, "grad_norm": 1.0937324913961817, "learning_rate": 4.996171951703226e-06, "loss": 0.5189, "step": 1834 }, { "epoch": 0.11174375057089792, "grad_norm": 1.013475787092547, "learning_rate": 4.996167536278223e-06, "loss": 0.5258, "step": 1835 }, { "epoch": 0.11180464634777579, "grad_norm": 1.1436433899686214, "learning_rate": 4.996163118310176e-06, "loss": 0.5454, "step": 1836 }, { "epoch": 0.11186554212465366, "grad_norm": 1.0423266086850071, "learning_rate": 4.99615869779909e-06, "loss": 0.5335, "step": 1837 }, { "epoch": 0.11192643790153153, "grad_norm": 1.0983775645375236, "learning_rate": 4.99615427474497e-06, "loss": 0.5567, "step": 1838 }, { "epoch": 0.1119873336784094, "grad_norm": 1.0458947064609418, "learning_rate": 4.9961498491478185e-06, "loss": 0.5438, "step": 1839 }, { "epoch": 0.11204822945528728, "grad_norm": 1.011425451928268, "learning_rate": 4.996145421007642e-06, "loss": 0.615, "step": 1840 }, { "epoch": 0.11210912523216515, "grad_norm": 1.0851727398397542, "learning_rate": 4.996140990324445e-06, "loss": 0.5486, "step": 1841 }, { "epoch": 0.11217002100904302, "grad_norm": 0.9829779534254999, "learning_rate": 4.996136557098231e-06, "loss": 0.5711, "step": 1842 }, { "epoch": 0.1122309167859209, "grad_norm": 0.979284765441551, "learning_rate": 4.996132121329006e-06, "loss": 0.5594, "step": 1843 }, { "epoch": 0.11229181256279877, "grad_norm": 1.048430715587884, "learning_rate": 4.996127683016772e-06, "loss": 0.5691, "step": 1844 }, { "epoch": 0.11235270833967664, "grad_norm": 0.9800123444796744, "learning_rate": 4.996123242161536e-06, "loss": 0.5603, "step": 1845 }, { "epoch": 0.11241360411655452, "grad_norm": 0.995489108983238, "learning_rate": 4.9961187987633005e-06, "loss": 0.529, "step": 1846 }, { "epoch": 0.11247449989343239, "grad_norm": 1.034250329582233, "learning_rate": 4.996114352822072e-06, "loss": 0.5156, "step": 1847 }, { "epoch": 0.11253539567031026, "grad_norm": 0.9820420548326423, "learning_rate": 4.996109904337853e-06, "loss": 0.5219, "step": 1848 }, { "epoch": 0.11259629144718813, "grad_norm": 1.0446390408759836, "learning_rate": 4.996105453310651e-06, "loss": 0.5717, "step": 1849 }, { "epoch": 0.11265718722406601, "grad_norm": 1.151153721548018, "learning_rate": 4.996100999740467e-06, "loss": 0.5118, "step": 1850 }, { "epoch": 0.11271808300094388, "grad_norm": 1.1244929215161281, "learning_rate": 4.996096543627308e-06, "loss": 0.5324, "step": 1851 }, { "epoch": 0.11277897877782175, "grad_norm": 1.1255276444253044, "learning_rate": 4.9960920849711775e-06, "loss": 0.5055, "step": 1852 }, { "epoch": 0.11283987455469963, "grad_norm": 1.1014014166193398, "learning_rate": 4.99608762377208e-06, "loss": 0.5571, "step": 1853 }, { "epoch": 0.1129007703315775, "grad_norm": 1.105023153058197, "learning_rate": 4.996083160030021e-06, "loss": 0.5439, "step": 1854 }, { "epoch": 0.11296166610845537, "grad_norm": 1.100500484249664, "learning_rate": 4.996078693745004e-06, "loss": 0.4756, "step": 1855 }, { "epoch": 0.11302256188533326, "grad_norm": 0.9825598577157797, "learning_rate": 4.9960742249170334e-06, "loss": 0.5446, "step": 1856 }, { "epoch": 0.11308345766221113, "grad_norm": 1.0565833211356086, "learning_rate": 4.996069753546115e-06, "loss": 0.4918, "step": 1857 }, { "epoch": 0.113144353439089, "grad_norm": 1.1175763639064378, "learning_rate": 4.996065279632253e-06, "loss": 0.5124, "step": 1858 }, { "epoch": 0.11320524921596688, "grad_norm": 0.92513368537073, "learning_rate": 4.996060803175451e-06, "loss": 0.5812, "step": 1859 }, { "epoch": 0.11326614499284475, "grad_norm": 1.0814544031358755, "learning_rate": 4.9960563241757135e-06, "loss": 0.5251, "step": 1860 }, { "epoch": 0.11332704076972262, "grad_norm": 1.108513606541972, "learning_rate": 4.996051842633047e-06, "loss": 0.5947, "step": 1861 }, { "epoch": 0.1133879365466005, "grad_norm": 0.9906223842611532, "learning_rate": 4.996047358547454e-06, "loss": 0.5216, "step": 1862 }, { "epoch": 0.11344883232347837, "grad_norm": 1.077180566489411, "learning_rate": 4.9960428719189396e-06, "loss": 0.5289, "step": 1863 }, { "epoch": 0.11350972810035624, "grad_norm": 1.078030958106031, "learning_rate": 4.996038382747509e-06, "loss": 0.5529, "step": 1864 }, { "epoch": 0.11357062387723411, "grad_norm": 1.0809782141970734, "learning_rate": 4.996033891033166e-06, "loss": 0.6242, "step": 1865 }, { "epoch": 0.11363151965411199, "grad_norm": 1.0113270598272546, "learning_rate": 4.9960293967759165e-06, "loss": 0.5558, "step": 1866 }, { "epoch": 0.11369241543098986, "grad_norm": 0.9704247532473209, "learning_rate": 4.996024899975763e-06, "loss": 0.5823, "step": 1867 }, { "epoch": 0.11375331120786773, "grad_norm": 1.0229987878276663, "learning_rate": 4.996020400632713e-06, "loss": 0.5697, "step": 1868 }, { "epoch": 0.1138142069847456, "grad_norm": 1.0395421578917021, "learning_rate": 4.996015898746768e-06, "loss": 0.5498, "step": 1869 }, { "epoch": 0.11387510276162348, "grad_norm": 0.91815413224145, "learning_rate": 4.9960113943179335e-06, "loss": 0.5702, "step": 1870 }, { "epoch": 0.11393599853850135, "grad_norm": 0.9685249543190139, "learning_rate": 4.996006887346216e-06, "loss": 0.605, "step": 1871 }, { "epoch": 0.11399689431537922, "grad_norm": 1.022024593145439, "learning_rate": 4.996002377831617e-06, "loss": 0.5474, "step": 1872 }, { "epoch": 0.1140577900922571, "grad_norm": 1.071138782779551, "learning_rate": 4.9959978657741435e-06, "loss": 0.4922, "step": 1873 }, { "epoch": 0.11411868586913497, "grad_norm": 1.0892621396608342, "learning_rate": 4.995993351173799e-06, "loss": 0.4889, "step": 1874 }, { "epoch": 0.11417958164601284, "grad_norm": 1.0253571013033382, "learning_rate": 4.995988834030588e-06, "loss": 0.5353, "step": 1875 }, { "epoch": 0.11424047742289073, "grad_norm": 1.0912510631032613, "learning_rate": 4.995984314344516e-06, "loss": 0.5361, "step": 1876 }, { "epoch": 0.1143013731997686, "grad_norm": 1.034904429577967, "learning_rate": 4.995979792115587e-06, "loss": 0.5383, "step": 1877 }, { "epoch": 0.11436226897664648, "grad_norm": 1.0060274869531647, "learning_rate": 4.995975267343806e-06, "loss": 0.5967, "step": 1878 }, { "epoch": 0.11442316475352435, "grad_norm": 1.0678052362251962, "learning_rate": 4.995970740029176e-06, "loss": 0.5029, "step": 1879 }, { "epoch": 0.11448406053040222, "grad_norm": 1.0635933164730074, "learning_rate": 4.995966210171705e-06, "loss": 0.555, "step": 1880 }, { "epoch": 0.1145449563072801, "grad_norm": 1.0025655478932722, "learning_rate": 4.995961677771394e-06, "loss": 0.5722, "step": 1881 }, { "epoch": 0.11460585208415797, "grad_norm": 1.0883171488602406, "learning_rate": 4.995957142828249e-06, "loss": 0.5467, "step": 1882 }, { "epoch": 0.11466674786103584, "grad_norm": 1.0109722201650468, "learning_rate": 4.995952605342275e-06, "loss": 0.5934, "step": 1883 }, { "epoch": 0.11472764363791371, "grad_norm": 1.0586923980343774, "learning_rate": 4.995948065313477e-06, "loss": 0.4828, "step": 1884 }, { "epoch": 0.11478853941479158, "grad_norm": 1.0351467829340377, "learning_rate": 4.9959435227418586e-06, "loss": 0.5107, "step": 1885 }, { "epoch": 0.11484943519166946, "grad_norm": 1.103032871190561, "learning_rate": 4.9959389776274246e-06, "loss": 0.4934, "step": 1886 }, { "epoch": 0.11491033096854733, "grad_norm": 0.9758242431943243, "learning_rate": 4.99593442997018e-06, "loss": 0.6228, "step": 1887 }, { "epoch": 0.1149712267454252, "grad_norm": 0.9715081763813744, "learning_rate": 4.9959298797701295e-06, "loss": 0.5243, "step": 1888 }, { "epoch": 0.11503212252230308, "grad_norm": 1.0343850983481966, "learning_rate": 4.995925327027277e-06, "loss": 0.4794, "step": 1889 }, { "epoch": 0.11509301829918095, "grad_norm": 1.0388166155437695, "learning_rate": 4.995920771741629e-06, "loss": 0.5454, "step": 1890 }, { "epoch": 0.11515391407605882, "grad_norm": 1.1601879343365291, "learning_rate": 4.995916213913188e-06, "loss": 0.4877, "step": 1891 }, { "epoch": 0.1152148098529367, "grad_norm": 1.0272331799408367, "learning_rate": 4.9959116535419585e-06, "loss": 0.5661, "step": 1892 }, { "epoch": 0.11527570562981457, "grad_norm": 1.0046979581090119, "learning_rate": 4.995907090627947e-06, "loss": 0.6506, "step": 1893 }, { "epoch": 0.11533660140669244, "grad_norm": 1.0102467417297205, "learning_rate": 4.995902525171157e-06, "loss": 0.6099, "step": 1894 }, { "epoch": 0.11539749718357031, "grad_norm": 1.0185014432408512, "learning_rate": 4.995897957171594e-06, "loss": 0.5397, "step": 1895 }, { "epoch": 0.11545839296044819, "grad_norm": 0.9993698043807705, "learning_rate": 4.995893386629261e-06, "loss": 0.5308, "step": 1896 }, { "epoch": 0.11551928873732607, "grad_norm": 0.9834660635561201, "learning_rate": 4.995888813544165e-06, "loss": 0.5618, "step": 1897 }, { "epoch": 0.11558018451420395, "grad_norm": 1.066182513830402, "learning_rate": 4.995884237916309e-06, "loss": 0.5085, "step": 1898 }, { "epoch": 0.11564108029108182, "grad_norm": 1.1244524020940883, "learning_rate": 4.995879659745697e-06, "loss": 0.5277, "step": 1899 }, { "epoch": 0.11570197606795969, "grad_norm": 0.9513665529333153, "learning_rate": 4.995875079032336e-06, "loss": 0.5413, "step": 1900 }, { "epoch": 0.11576287184483756, "grad_norm": 1.0293090458423284, "learning_rate": 4.995870495776229e-06, "loss": 0.5249, "step": 1901 }, { "epoch": 0.11582376762171544, "grad_norm": 1.015510388696106, "learning_rate": 4.995865909977381e-06, "loss": 0.4946, "step": 1902 }, { "epoch": 0.11588466339859331, "grad_norm": 1.0541311679620646, "learning_rate": 4.995861321635796e-06, "loss": 0.4987, "step": 1903 }, { "epoch": 0.11594555917547118, "grad_norm": 1.001902511335164, "learning_rate": 4.99585673075148e-06, "loss": 0.5651, "step": 1904 }, { "epoch": 0.11600645495234906, "grad_norm": 1.056619314679146, "learning_rate": 4.9958521373244376e-06, "loss": 0.5521, "step": 1905 }, { "epoch": 0.11606735072922693, "grad_norm": 0.9447301031127794, "learning_rate": 4.995847541354671e-06, "loss": 0.6111, "step": 1906 }, { "epoch": 0.1161282465061048, "grad_norm": 1.0792769654808843, "learning_rate": 4.9958429428421886e-06, "loss": 0.5, "step": 1907 }, { "epoch": 0.11618914228298267, "grad_norm": 1.0460459144981529, "learning_rate": 4.9958383417869924e-06, "loss": 0.4717, "step": 1908 }, { "epoch": 0.11625003805986055, "grad_norm": 1.116409272232721, "learning_rate": 4.995833738189089e-06, "loss": 0.5722, "step": 1909 }, { "epoch": 0.11631093383673842, "grad_norm": 1.0345837615648423, "learning_rate": 4.995829132048482e-06, "loss": 0.517, "step": 1910 }, { "epoch": 0.11637182961361629, "grad_norm": 1.077401512563418, "learning_rate": 4.995824523365175e-06, "loss": 0.5173, "step": 1911 }, { "epoch": 0.11643272539049417, "grad_norm": 1.0174699407961476, "learning_rate": 4.995819912139175e-06, "loss": 0.5642, "step": 1912 }, { "epoch": 0.11649362116737204, "grad_norm": 0.9714143613388142, "learning_rate": 4.9958152983704845e-06, "loss": 0.5782, "step": 1913 }, { "epoch": 0.11655451694424991, "grad_norm": 1.0368678453733646, "learning_rate": 4.99581068205911e-06, "loss": 0.5151, "step": 1914 }, { "epoch": 0.11661541272112778, "grad_norm": 1.0908516973280826, "learning_rate": 4.995806063205055e-06, "loss": 0.5354, "step": 1915 }, { "epoch": 0.11667630849800566, "grad_norm": 1.080809830578159, "learning_rate": 4.995801441808325e-06, "loss": 0.5232, "step": 1916 }, { "epoch": 0.11673720427488354, "grad_norm": 0.9954027396850231, "learning_rate": 4.995796817868925e-06, "loss": 0.5168, "step": 1917 }, { "epoch": 0.11679810005176142, "grad_norm": 0.9708959304960411, "learning_rate": 4.9957921913868576e-06, "loss": 0.5705, "step": 1918 }, { "epoch": 0.11685899582863929, "grad_norm": 0.9810341328532971, "learning_rate": 4.99578756236213e-06, "loss": 0.5523, "step": 1919 }, { "epoch": 0.11691989160551716, "grad_norm": 1.0423294511381207, "learning_rate": 4.995782930794747e-06, "loss": 0.5794, "step": 1920 }, { "epoch": 0.11698078738239504, "grad_norm": 1.0205850558962655, "learning_rate": 4.99577829668471e-06, "loss": 0.5975, "step": 1921 }, { "epoch": 0.11704168315927291, "grad_norm": 1.0738975431452507, "learning_rate": 4.995773660032027e-06, "loss": 0.4934, "step": 1922 }, { "epoch": 0.11710257893615078, "grad_norm": 1.073441190077022, "learning_rate": 4.995769020836701e-06, "loss": 0.5226, "step": 1923 }, { "epoch": 0.11716347471302865, "grad_norm": 1.0236512645922649, "learning_rate": 4.995764379098739e-06, "loss": 0.5541, "step": 1924 }, { "epoch": 0.11722437048990653, "grad_norm": 1.1024769095441236, "learning_rate": 4.995759734818143e-06, "loss": 0.5549, "step": 1925 }, { "epoch": 0.1172852662667844, "grad_norm": 1.1924501433461847, "learning_rate": 4.995755087994919e-06, "loss": 0.5411, "step": 1926 }, { "epoch": 0.11734616204366227, "grad_norm": 1.0830983759609487, "learning_rate": 4.995750438629072e-06, "loss": 0.4725, "step": 1927 }, { "epoch": 0.11740705782054014, "grad_norm": 1.092741023637041, "learning_rate": 4.995745786720606e-06, "loss": 0.5369, "step": 1928 }, { "epoch": 0.11746795359741802, "grad_norm": 0.9996182543090801, "learning_rate": 4.995741132269526e-06, "loss": 0.5163, "step": 1929 }, { "epoch": 0.11752884937429589, "grad_norm": 1.0647870013387795, "learning_rate": 4.995736475275837e-06, "loss": 0.5246, "step": 1930 }, { "epoch": 0.11758974515117376, "grad_norm": 1.0507602597221612, "learning_rate": 4.995731815739544e-06, "loss": 0.4832, "step": 1931 }, { "epoch": 0.11765064092805164, "grad_norm": 1.0596958094601, "learning_rate": 4.99572715366065e-06, "loss": 0.5724, "step": 1932 }, { "epoch": 0.11771153670492951, "grad_norm": 0.9916734059221358, "learning_rate": 4.995722489039162e-06, "loss": 0.5158, "step": 1933 }, { "epoch": 0.11777243248180738, "grad_norm": 0.9949742962370867, "learning_rate": 4.995717821875084e-06, "loss": 0.54, "step": 1934 }, { "epoch": 0.11783332825868525, "grad_norm": 1.0276207250623461, "learning_rate": 4.9957131521684195e-06, "loss": 0.5583, "step": 1935 }, { "epoch": 0.11789422403556313, "grad_norm": 0.9871967412511898, "learning_rate": 4.995708479919176e-06, "loss": 0.5437, "step": 1936 }, { "epoch": 0.117955119812441, "grad_norm": 1.1097554249276247, "learning_rate": 4.995703805127355e-06, "loss": 0.516, "step": 1937 }, { "epoch": 0.11801601558931889, "grad_norm": 1.092615866679437, "learning_rate": 4.995699127792964e-06, "loss": 0.5667, "step": 1938 }, { "epoch": 0.11807691136619676, "grad_norm": 1.0977543003260117, "learning_rate": 4.995694447916006e-06, "loss": 0.4924, "step": 1939 }, { "epoch": 0.11813780714307463, "grad_norm": 1.1599234394138889, "learning_rate": 4.995689765496486e-06, "loss": 0.5618, "step": 1940 }, { "epoch": 0.1181987029199525, "grad_norm": 0.9838490056422822, "learning_rate": 4.99568508053441e-06, "loss": 0.5617, "step": 1941 }, { "epoch": 0.11825959869683038, "grad_norm": 1.004090286180282, "learning_rate": 4.995680393029782e-06, "loss": 0.5245, "step": 1942 }, { "epoch": 0.11832049447370825, "grad_norm": 1.0649536154756185, "learning_rate": 4.995675702982606e-06, "loss": 0.5138, "step": 1943 }, { "epoch": 0.11838139025058612, "grad_norm": 1.0421768004454548, "learning_rate": 4.995671010392888e-06, "loss": 0.4622, "step": 1944 }, { "epoch": 0.118442286027464, "grad_norm": 1.0455099486163477, "learning_rate": 4.995666315260632e-06, "loss": 0.598, "step": 1945 }, { "epoch": 0.11850318180434187, "grad_norm": 1.03868894314013, "learning_rate": 4.995661617585843e-06, "loss": 0.5415, "step": 1946 }, { "epoch": 0.11856407758121974, "grad_norm": 1.0491117944715598, "learning_rate": 4.995656917368526e-06, "loss": 0.5208, "step": 1947 }, { "epoch": 0.11862497335809762, "grad_norm": 1.0975261775674285, "learning_rate": 4.9956522146086855e-06, "loss": 0.5576, "step": 1948 }, { "epoch": 0.11868586913497549, "grad_norm": 0.9419234576248314, "learning_rate": 4.9956475093063264e-06, "loss": 0.5589, "step": 1949 }, { "epoch": 0.11874676491185336, "grad_norm": 1.0515278361033769, "learning_rate": 4.995642801461453e-06, "loss": 0.5029, "step": 1950 }, { "epoch": 0.11880766068873123, "grad_norm": 0.9997058079507244, "learning_rate": 4.995638091074072e-06, "loss": 0.5771, "step": 1951 }, { "epoch": 0.11886855646560911, "grad_norm": 1.0824517137021405, "learning_rate": 4.995633378144186e-06, "loss": 0.5646, "step": 1952 }, { "epoch": 0.11892945224248698, "grad_norm": 1.1240290021136325, "learning_rate": 4.9956286626718005e-06, "loss": 0.5159, "step": 1953 }, { "epoch": 0.11899034801936485, "grad_norm": 1.0889001027432559, "learning_rate": 4.99562394465692e-06, "loss": 0.5338, "step": 1954 }, { "epoch": 0.11905124379624273, "grad_norm": 1.2129590556672774, "learning_rate": 4.9956192240995504e-06, "loss": 0.5016, "step": 1955 }, { "epoch": 0.1191121395731206, "grad_norm": 1.1123059630631424, "learning_rate": 4.995614500999696e-06, "loss": 0.5328, "step": 1956 }, { "epoch": 0.11917303534999847, "grad_norm": 1.0842902680449693, "learning_rate": 4.99560977535736e-06, "loss": 0.4666, "step": 1957 }, { "epoch": 0.11923393112687636, "grad_norm": 1.0799714491028776, "learning_rate": 4.99560504717255e-06, "loss": 0.5462, "step": 1958 }, { "epoch": 0.11929482690375423, "grad_norm": 1.0165541062846075, "learning_rate": 4.995600316445269e-06, "loss": 0.5864, "step": 1959 }, { "epoch": 0.1193557226806321, "grad_norm": 1.1461409380169354, "learning_rate": 4.995595583175523e-06, "loss": 0.5027, "step": 1960 }, { "epoch": 0.11941661845750998, "grad_norm": 0.9702731968171178, "learning_rate": 4.995590847363315e-06, "loss": 0.4762, "step": 1961 }, { "epoch": 0.11947751423438785, "grad_norm": 1.1226021949970193, "learning_rate": 4.995586109008652e-06, "loss": 0.5375, "step": 1962 }, { "epoch": 0.11953841001126572, "grad_norm": 1.048545789552324, "learning_rate": 4.9955813681115376e-06, "loss": 0.5348, "step": 1963 }, { "epoch": 0.1195993057881436, "grad_norm": 1.066349990443488, "learning_rate": 4.995576624671976e-06, "loss": 0.5688, "step": 1964 }, { "epoch": 0.11966020156502147, "grad_norm": 0.9992058205842604, "learning_rate": 4.9955718786899735e-06, "loss": 0.5601, "step": 1965 }, { "epoch": 0.11972109734189934, "grad_norm": 1.1031046961780469, "learning_rate": 4.995567130165533e-06, "loss": 0.5229, "step": 1966 }, { "epoch": 0.11978199311877721, "grad_norm": 1.0129517188527721, "learning_rate": 4.995562379098662e-06, "loss": 0.4997, "step": 1967 }, { "epoch": 0.11984288889565509, "grad_norm": 1.0076094945328282, "learning_rate": 4.995557625489363e-06, "loss": 0.6612, "step": 1968 }, { "epoch": 0.11990378467253296, "grad_norm": 0.993390962457168, "learning_rate": 4.9955528693376435e-06, "loss": 0.5025, "step": 1969 }, { "epoch": 0.11996468044941083, "grad_norm": 1.1864369524320824, "learning_rate": 4.995548110643505e-06, "loss": 0.6458, "step": 1970 }, { "epoch": 0.1200255762262887, "grad_norm": 1.0301843235088823, "learning_rate": 4.995543349406954e-06, "loss": 0.531, "step": 1971 }, { "epoch": 0.12008647200316658, "grad_norm": 0.9978725433516997, "learning_rate": 4.995538585627996e-06, "loss": 0.5254, "step": 1972 }, { "epoch": 0.12014736778004445, "grad_norm": 1.037433180301015, "learning_rate": 4.995533819306635e-06, "loss": 0.5074, "step": 1973 }, { "epoch": 0.12020826355692232, "grad_norm": 0.9921867128082734, "learning_rate": 4.995529050442875e-06, "loss": 0.5017, "step": 1974 }, { "epoch": 0.1202691593338002, "grad_norm": 1.0647106841543776, "learning_rate": 4.9955242790367235e-06, "loss": 0.5394, "step": 1975 }, { "epoch": 0.12033005511067807, "grad_norm": 0.9734910881219094, "learning_rate": 4.995519505088183e-06, "loss": 0.488, "step": 1976 }, { "epoch": 0.12039095088755594, "grad_norm": 0.9791276769941627, "learning_rate": 4.995514728597259e-06, "loss": 0.4785, "step": 1977 }, { "epoch": 0.12045184666443381, "grad_norm": 1.08161725969866, "learning_rate": 4.9955099495639565e-06, "loss": 0.5376, "step": 1978 }, { "epoch": 0.1205127424413117, "grad_norm": 0.9630073454478473, "learning_rate": 4.99550516798828e-06, "loss": 0.5811, "step": 1979 }, { "epoch": 0.12057363821818957, "grad_norm": 1.1103048750103652, "learning_rate": 4.995500383870236e-06, "loss": 0.567, "step": 1980 }, { "epoch": 0.12063453399506745, "grad_norm": 0.9614542296199277, "learning_rate": 4.995495597209827e-06, "loss": 0.6339, "step": 1981 }, { "epoch": 0.12069542977194532, "grad_norm": 1.0223082964390624, "learning_rate": 4.995490808007059e-06, "loss": 0.5642, "step": 1982 }, { "epoch": 0.12075632554882319, "grad_norm": 1.0404579542913066, "learning_rate": 4.995486016261936e-06, "loss": 0.5046, "step": 1983 }, { "epoch": 0.12081722132570107, "grad_norm": 1.1055947967916173, "learning_rate": 4.995481221974466e-06, "loss": 0.524, "step": 1984 }, { "epoch": 0.12087811710257894, "grad_norm": 0.9847340718257267, "learning_rate": 4.9954764251446506e-06, "loss": 0.5371, "step": 1985 }, { "epoch": 0.12093901287945681, "grad_norm": 0.9674077698086528, "learning_rate": 4.995471625772495e-06, "loss": 0.5545, "step": 1986 }, { "epoch": 0.12099990865633468, "grad_norm": 1.0692407805781163, "learning_rate": 4.995466823858005e-06, "loss": 0.5243, "step": 1987 }, { "epoch": 0.12106080443321256, "grad_norm": 1.0443238178141467, "learning_rate": 4.995462019401186e-06, "loss": 0.5571, "step": 1988 }, { "epoch": 0.12112170021009043, "grad_norm": 1.0764019740226995, "learning_rate": 4.995457212402042e-06, "loss": 0.5323, "step": 1989 }, { "epoch": 0.1211825959869683, "grad_norm": 1.0848190581571255, "learning_rate": 4.995452402860578e-06, "loss": 0.5627, "step": 1990 }, { "epoch": 0.12124349176384618, "grad_norm": 1.1568917675070665, "learning_rate": 4.995447590776798e-06, "loss": 0.5318, "step": 1991 }, { "epoch": 0.12130438754072405, "grad_norm": 1.0971523176549374, "learning_rate": 4.995442776150709e-06, "loss": 0.4935, "step": 1992 }, { "epoch": 0.12136528331760192, "grad_norm": 1.028894348738708, "learning_rate": 4.995437958982315e-06, "loss": 0.5171, "step": 1993 }, { "epoch": 0.1214261790944798, "grad_norm": 1.047026429290539, "learning_rate": 4.9954331392716194e-06, "loss": 0.5456, "step": 1994 }, { "epoch": 0.12148707487135767, "grad_norm": 1.0075725293607591, "learning_rate": 4.995428317018629e-06, "loss": 0.5547, "step": 1995 }, { "epoch": 0.12154797064823554, "grad_norm": 1.1067168970140713, "learning_rate": 4.995423492223349e-06, "loss": 0.4832, "step": 1996 }, { "epoch": 0.12160886642511341, "grad_norm": 1.1307697847268017, "learning_rate": 4.995418664885783e-06, "loss": 0.5011, "step": 1997 }, { "epoch": 0.12166976220199129, "grad_norm": 1.0929435385037687, "learning_rate": 4.995413835005936e-06, "loss": 0.5218, "step": 1998 }, { "epoch": 0.12173065797886917, "grad_norm": 1.0210987285380264, "learning_rate": 4.995409002583813e-06, "loss": 0.5147, "step": 1999 }, { "epoch": 0.12179155375574705, "grad_norm": 1.200457822240292, "learning_rate": 4.995404167619419e-06, "loss": 0.5473, "step": 2000 }, { "epoch": 0.12185244953262492, "grad_norm": 1.0493162020909226, "learning_rate": 4.99539933011276e-06, "loss": 0.5021, "step": 2001 }, { "epoch": 0.12191334530950279, "grad_norm": 1.1335389505359488, "learning_rate": 4.99539449006384e-06, "loss": 0.5054, "step": 2002 }, { "epoch": 0.12197424108638066, "grad_norm": 1.0727906131273073, "learning_rate": 4.995389647472663e-06, "loss": 0.5651, "step": 2003 }, { "epoch": 0.12203513686325854, "grad_norm": 1.0071366632759449, "learning_rate": 4.995384802339236e-06, "loss": 0.6416, "step": 2004 }, { "epoch": 0.12209603264013641, "grad_norm": 1.0026881466148814, "learning_rate": 4.995379954663562e-06, "loss": 0.5293, "step": 2005 }, { "epoch": 0.12215692841701428, "grad_norm": 1.0197090897537968, "learning_rate": 4.995375104445647e-06, "loss": 0.5113, "step": 2006 }, { "epoch": 0.12221782419389216, "grad_norm": 1.0508338533222017, "learning_rate": 4.995370251685496e-06, "loss": 0.5208, "step": 2007 }, { "epoch": 0.12227871997077003, "grad_norm": 0.9966202208481657, "learning_rate": 4.995365396383114e-06, "loss": 0.5885, "step": 2008 }, { "epoch": 0.1223396157476479, "grad_norm": 1.0842119043001366, "learning_rate": 4.995360538538505e-06, "loss": 0.6105, "step": 2009 }, { "epoch": 0.12240051152452577, "grad_norm": 1.0456835070364647, "learning_rate": 4.995355678151674e-06, "loss": 0.5413, "step": 2010 }, { "epoch": 0.12246140730140365, "grad_norm": 1.16369991618452, "learning_rate": 4.995350815222628e-06, "loss": 0.5474, "step": 2011 }, { "epoch": 0.12252230307828152, "grad_norm": 1.0814797963073004, "learning_rate": 4.99534594975137e-06, "loss": 0.4955, "step": 2012 }, { "epoch": 0.12258319885515939, "grad_norm": 1.0566379637742223, "learning_rate": 4.995341081737904e-06, "loss": 0.526, "step": 2013 }, { "epoch": 0.12264409463203726, "grad_norm": 1.015155218742299, "learning_rate": 4.995336211182238e-06, "loss": 0.4967, "step": 2014 }, { "epoch": 0.12270499040891514, "grad_norm": 0.9286729817910063, "learning_rate": 4.995331338084375e-06, "loss": 0.5176, "step": 2015 }, { "epoch": 0.12276588618579301, "grad_norm": 1.0252174156648046, "learning_rate": 4.9953264624443195e-06, "loss": 0.5064, "step": 2016 }, { "epoch": 0.12282678196267088, "grad_norm": 1.0004179337787553, "learning_rate": 4.9953215842620786e-06, "loss": 0.5332, "step": 2017 }, { "epoch": 0.12288767773954876, "grad_norm": 0.9695097454635692, "learning_rate": 4.995316703537655e-06, "loss": 0.5381, "step": 2018 }, { "epoch": 0.12294857351642663, "grad_norm": 1.1178332761877396, "learning_rate": 4.995311820271055e-06, "loss": 0.4583, "step": 2019 }, { "epoch": 0.12300946929330452, "grad_norm": 1.0861533791548732, "learning_rate": 4.995306934462284e-06, "loss": 0.4979, "step": 2020 }, { "epoch": 0.12307036507018239, "grad_norm": 1.04720205733096, "learning_rate": 4.9953020461113445e-06, "loss": 0.5479, "step": 2021 }, { "epoch": 0.12313126084706026, "grad_norm": 1.016000854997744, "learning_rate": 4.995297155218244e-06, "loss": 0.4887, "step": 2022 }, { "epoch": 0.12319215662393813, "grad_norm": 1.0806149115702994, "learning_rate": 4.995292261782987e-06, "loss": 0.6052, "step": 2023 }, { "epoch": 0.12325305240081601, "grad_norm": 1.161845413034692, "learning_rate": 4.9952873658055775e-06, "loss": 0.5357, "step": 2024 }, { "epoch": 0.12331394817769388, "grad_norm": 1.0317790208659148, "learning_rate": 4.995282467286021e-06, "loss": 0.4855, "step": 2025 }, { "epoch": 0.12337484395457175, "grad_norm": 1.0281896919336817, "learning_rate": 4.9952775662243235e-06, "loss": 0.5204, "step": 2026 }, { "epoch": 0.12343573973144963, "grad_norm": 1.0278154970388031, "learning_rate": 4.9952726626204885e-06, "loss": 0.5278, "step": 2027 }, { "epoch": 0.1234966355083275, "grad_norm": 1.0577458452374164, "learning_rate": 4.995267756474521e-06, "loss": 0.4863, "step": 2028 }, { "epoch": 0.12355753128520537, "grad_norm": 1.0329017244707641, "learning_rate": 4.995262847786428e-06, "loss": 0.5365, "step": 2029 }, { "epoch": 0.12361842706208324, "grad_norm": 1.0167571601531422, "learning_rate": 4.995257936556211e-06, "loss": 0.5321, "step": 2030 }, { "epoch": 0.12367932283896112, "grad_norm": 1.0767443487332633, "learning_rate": 4.995253022783879e-06, "loss": 0.5453, "step": 2031 }, { "epoch": 0.12374021861583899, "grad_norm": 1.0678598592771935, "learning_rate": 4.995248106469435e-06, "loss": 0.5261, "step": 2032 }, { "epoch": 0.12380111439271686, "grad_norm": 1.0046908399416754, "learning_rate": 4.9952431876128835e-06, "loss": 0.5387, "step": 2033 }, { "epoch": 0.12386201016959474, "grad_norm": 1.1590744909764925, "learning_rate": 4.99523826621423e-06, "loss": 0.5438, "step": 2034 }, { "epoch": 0.12392290594647261, "grad_norm": 1.0521677200528226, "learning_rate": 4.99523334227348e-06, "loss": 0.466, "step": 2035 }, { "epoch": 0.12398380172335048, "grad_norm": 1.1234063056236858, "learning_rate": 4.995228415790638e-06, "loss": 0.5352, "step": 2036 }, { "epoch": 0.12404469750022835, "grad_norm": 1.0964363474028576, "learning_rate": 4.995223486765709e-06, "loss": 0.4385, "step": 2037 }, { "epoch": 0.12410559327710623, "grad_norm": 1.0432815136368376, "learning_rate": 4.995218555198698e-06, "loss": 0.5394, "step": 2038 }, { "epoch": 0.1241664890539841, "grad_norm": 1.014364634750578, "learning_rate": 4.99521362108961e-06, "loss": 0.5158, "step": 2039 }, { "epoch": 0.12422738483086199, "grad_norm": 1.0702322605879642, "learning_rate": 4.995208684438452e-06, "loss": 0.5146, "step": 2040 }, { "epoch": 0.12428828060773986, "grad_norm": 1.0479965595022942, "learning_rate": 4.995203745245226e-06, "loss": 0.5098, "step": 2041 }, { "epoch": 0.12434917638461773, "grad_norm": 1.0994282458669773, "learning_rate": 4.995198803509938e-06, "loss": 0.506, "step": 2042 }, { "epoch": 0.1244100721614956, "grad_norm": 0.9762517586678886, "learning_rate": 4.9951938592325935e-06, "loss": 0.5481, "step": 2043 }, { "epoch": 0.12447096793837348, "grad_norm": 1.2031219469306018, "learning_rate": 4.995188912413198e-06, "loss": 0.5363, "step": 2044 }, { "epoch": 0.12453186371525135, "grad_norm": 0.9541048445506638, "learning_rate": 4.995183963051755e-06, "loss": 0.5223, "step": 2045 }, { "epoch": 0.12459275949212922, "grad_norm": 1.0364684024427435, "learning_rate": 4.995179011148271e-06, "loss": 0.528, "step": 2046 }, { "epoch": 0.1246536552690071, "grad_norm": 1.0709185374449652, "learning_rate": 4.995174056702751e-06, "loss": 0.513, "step": 2047 }, { "epoch": 0.12471455104588497, "grad_norm": 1.1859789962294502, "learning_rate": 4.995169099715199e-06, "loss": 0.5126, "step": 2048 }, { "epoch": 0.12477544682276284, "grad_norm": 1.036628774132535, "learning_rate": 4.99516414018562e-06, "loss": 0.5434, "step": 2049 }, { "epoch": 0.12483634259964072, "grad_norm": 1.0320386428430355, "learning_rate": 4.995159178114021e-06, "loss": 0.505, "step": 2050 }, { "epoch": 0.12489723837651859, "grad_norm": 1.0492256234286317, "learning_rate": 4.995154213500404e-06, "loss": 0.5473, "step": 2051 }, { "epoch": 0.12495813415339646, "grad_norm": 1.035454847549353, "learning_rate": 4.995149246344777e-06, "loss": 0.533, "step": 2052 }, { "epoch": 0.12501902993027433, "grad_norm": 1.0951992785861488, "learning_rate": 4.995144276647143e-06, "loss": 0.5632, "step": 2053 }, { "epoch": 0.1250799257071522, "grad_norm": 1.19594511257362, "learning_rate": 4.995139304407509e-06, "loss": 0.4943, "step": 2054 }, { "epoch": 0.12514082148403008, "grad_norm": 1.03895620776062, "learning_rate": 4.995134329625878e-06, "loss": 0.4706, "step": 2055 }, { "epoch": 0.12520171726090795, "grad_norm": 0.9892966182628714, "learning_rate": 4.995129352302257e-06, "loss": 0.5679, "step": 2056 }, { "epoch": 0.12526261303778582, "grad_norm": 1.017093146714465, "learning_rate": 4.9951243724366494e-06, "loss": 0.5806, "step": 2057 }, { "epoch": 0.1253235088146637, "grad_norm": 1.0992272552794902, "learning_rate": 4.995119390029061e-06, "loss": 0.5191, "step": 2058 }, { "epoch": 0.12538440459154157, "grad_norm": 1.0942748213407978, "learning_rate": 4.995114405079496e-06, "loss": 0.5381, "step": 2059 }, { "epoch": 0.12544530036841944, "grad_norm": 0.9804334998534947, "learning_rate": 4.995109417587962e-06, "loss": 0.5687, "step": 2060 }, { "epoch": 0.12550619614529732, "grad_norm": 1.0332930828523208, "learning_rate": 4.995104427554462e-06, "loss": 0.5534, "step": 2061 }, { "epoch": 0.1255670919221752, "grad_norm": 1.1682760417462412, "learning_rate": 4.995099434979001e-06, "loss": 0.5806, "step": 2062 }, { "epoch": 0.12562798769905306, "grad_norm": 1.0774259737281284, "learning_rate": 4.995094439861584e-06, "loss": 0.5636, "step": 2063 }, { "epoch": 0.12568888347593093, "grad_norm": 0.9743188819981542, "learning_rate": 4.9950894422022175e-06, "loss": 0.6071, "step": 2064 }, { "epoch": 0.1257497792528088, "grad_norm": 1.1205253637066266, "learning_rate": 4.995084442000906e-06, "loss": 0.5567, "step": 2065 }, { "epoch": 0.12581067502968668, "grad_norm": 1.0257101252099232, "learning_rate": 4.995079439257653e-06, "loss": 0.5224, "step": 2066 }, { "epoch": 0.12587157080656455, "grad_norm": 1.0586937377677772, "learning_rate": 4.995074433972466e-06, "loss": 0.605, "step": 2067 }, { "epoch": 0.12593246658344243, "grad_norm": 0.9980367111876604, "learning_rate": 4.995069426145349e-06, "loss": 0.5279, "step": 2068 }, { "epoch": 0.1259933623603203, "grad_norm": 1.0343094344958008, "learning_rate": 4.995064415776307e-06, "loss": 0.5215, "step": 2069 }, { "epoch": 0.12605425813719817, "grad_norm": 1.098162270648388, "learning_rate": 4.9950594028653455e-06, "loss": 0.5, "step": 2070 }, { "epoch": 0.12611515391407607, "grad_norm": 1.1469663463198698, "learning_rate": 4.995054387412469e-06, "loss": 0.488, "step": 2071 }, { "epoch": 0.12617604969095395, "grad_norm": 0.9096808615797569, "learning_rate": 4.995049369417683e-06, "loss": 0.5487, "step": 2072 }, { "epoch": 0.12623694546783182, "grad_norm": 1.00912613238366, "learning_rate": 4.995044348880993e-06, "loss": 0.5285, "step": 2073 }, { "epoch": 0.1262978412447097, "grad_norm": 1.0752077545935592, "learning_rate": 4.9950393258024035e-06, "loss": 0.5431, "step": 2074 }, { "epoch": 0.12635873702158756, "grad_norm": 1.0330405801717757, "learning_rate": 4.995034300181919e-06, "loss": 0.5229, "step": 2075 }, { "epoch": 0.12641963279846544, "grad_norm": 1.0462082450009091, "learning_rate": 4.995029272019546e-06, "loss": 0.5516, "step": 2076 }, { "epoch": 0.1264805285753433, "grad_norm": 1.0981599844047356, "learning_rate": 4.995024241315289e-06, "loss": 0.526, "step": 2077 }, { "epoch": 0.12654142435222118, "grad_norm": 1.125989214571041, "learning_rate": 4.995019208069154e-06, "loss": 0.4331, "step": 2078 }, { "epoch": 0.12660232012909906, "grad_norm": 1.0783489957379422, "learning_rate": 4.995014172281144e-06, "loss": 0.5316, "step": 2079 }, { "epoch": 0.12666321590597693, "grad_norm": 0.9665770873623739, "learning_rate": 4.995009133951266e-06, "loss": 0.5527, "step": 2080 }, { "epoch": 0.1267241116828548, "grad_norm": 1.1331057719306377, "learning_rate": 4.995004093079525e-06, "loss": 0.4549, "step": 2081 }, { "epoch": 0.12678500745973267, "grad_norm": 1.0826393923607567, "learning_rate": 4.994999049665925e-06, "loss": 0.5304, "step": 2082 }, { "epoch": 0.12684590323661055, "grad_norm": 1.0097103898553572, "learning_rate": 4.994994003710473e-06, "loss": 0.5126, "step": 2083 }, { "epoch": 0.12690679901348842, "grad_norm": 1.0786440626233307, "learning_rate": 4.994988955213172e-06, "loss": 0.566, "step": 2084 }, { "epoch": 0.1269676947903663, "grad_norm": 1.1098472544527516, "learning_rate": 4.9949839041740285e-06, "loss": 0.5109, "step": 2085 }, { "epoch": 0.12702859056724417, "grad_norm": 1.010333019079247, "learning_rate": 4.9949788505930465e-06, "loss": 0.6141, "step": 2086 }, { "epoch": 0.12708948634412204, "grad_norm": 1.026432127752044, "learning_rate": 4.994973794470233e-06, "loss": 0.504, "step": 2087 }, { "epoch": 0.1271503821209999, "grad_norm": 1.1022799959878615, "learning_rate": 4.994968735805591e-06, "loss": 0.5136, "step": 2088 }, { "epoch": 0.12721127789787778, "grad_norm": 0.9681360928105647, "learning_rate": 4.994963674599127e-06, "loss": 0.5619, "step": 2089 }, { "epoch": 0.12727217367475566, "grad_norm": 1.0677144621860617, "learning_rate": 4.994958610850846e-06, "loss": 0.553, "step": 2090 }, { "epoch": 0.12733306945163353, "grad_norm": 1.1452069076498284, "learning_rate": 4.994953544560754e-06, "loss": 0.5142, "step": 2091 }, { "epoch": 0.1273939652285114, "grad_norm": 1.057778358490308, "learning_rate": 4.994948475728855e-06, "loss": 0.6105, "step": 2092 }, { "epoch": 0.12745486100538927, "grad_norm": 1.0770532823467702, "learning_rate": 4.994943404355153e-06, "loss": 0.5613, "step": 2093 }, { "epoch": 0.12751575678226715, "grad_norm": 1.131152615293246, "learning_rate": 4.994938330439655e-06, "loss": 0.5003, "step": 2094 }, { "epoch": 0.12757665255914502, "grad_norm": 1.0054247602421167, "learning_rate": 4.9949332539823656e-06, "loss": 0.5576, "step": 2095 }, { "epoch": 0.1276375483360229, "grad_norm": 1.0523817227288437, "learning_rate": 4.994928174983291e-06, "loss": 0.5402, "step": 2096 }, { "epoch": 0.12769844411290077, "grad_norm": 1.1049477804612677, "learning_rate": 4.994923093442435e-06, "loss": 0.5619, "step": 2097 }, { "epoch": 0.12775933988977864, "grad_norm": 1.0786580393901084, "learning_rate": 4.994918009359803e-06, "loss": 0.5235, "step": 2098 }, { "epoch": 0.1278202356666565, "grad_norm": 1.2079630272731356, "learning_rate": 4.994912922735401e-06, "loss": 0.4876, "step": 2099 }, { "epoch": 0.12788113144353438, "grad_norm": 1.0696693991384705, "learning_rate": 4.9949078335692326e-06, "loss": 0.5457, "step": 2100 }, { "epoch": 0.12794202722041226, "grad_norm": 0.994699098236643, "learning_rate": 4.994902741861305e-06, "loss": 0.5279, "step": 2101 }, { "epoch": 0.12800292299729013, "grad_norm": 1.0534738255269789, "learning_rate": 4.994897647611622e-06, "loss": 0.5139, "step": 2102 }, { "epoch": 0.128063818774168, "grad_norm": 1.0233691840369288, "learning_rate": 4.9948925508201885e-06, "loss": 0.4625, "step": 2103 }, { "epoch": 0.12812471455104588, "grad_norm": 1.1252656777822323, "learning_rate": 4.994887451487011e-06, "loss": 0.5465, "step": 2104 }, { "epoch": 0.12818561032792375, "grad_norm": 1.0149817264655387, "learning_rate": 4.994882349612094e-06, "loss": 0.5508, "step": 2105 }, { "epoch": 0.12824650610480162, "grad_norm": 0.9908440772761741, "learning_rate": 4.994877245195442e-06, "loss": 0.5419, "step": 2106 }, { "epoch": 0.1283074018816795, "grad_norm": 1.0675992902152254, "learning_rate": 4.994872138237061e-06, "loss": 0.5186, "step": 2107 }, { "epoch": 0.12836829765855737, "grad_norm": 0.9711684918027259, "learning_rate": 4.994867028736957e-06, "loss": 0.5446, "step": 2108 }, { "epoch": 0.12842919343543524, "grad_norm": 1.0379386486539437, "learning_rate": 4.994861916695134e-06, "loss": 0.5406, "step": 2109 }, { "epoch": 0.1284900892123131, "grad_norm": 1.0516907139683418, "learning_rate": 4.9948568021115975e-06, "loss": 0.5812, "step": 2110 }, { "epoch": 0.128550984989191, "grad_norm": 1.062219311446186, "learning_rate": 4.994851684986353e-06, "loss": 0.4605, "step": 2111 }, { "epoch": 0.1286118807660689, "grad_norm": 1.026211047966583, "learning_rate": 4.994846565319405e-06, "loss": 0.5207, "step": 2112 }, { "epoch": 0.12867277654294676, "grad_norm": 1.1047697645625891, "learning_rate": 4.994841443110759e-06, "loss": 0.4659, "step": 2113 }, { "epoch": 0.12873367231982463, "grad_norm": 1.077676164298384, "learning_rate": 4.994836318360422e-06, "loss": 0.5225, "step": 2114 }, { "epoch": 0.1287945680967025, "grad_norm": 1.2039275943872958, "learning_rate": 4.994831191068396e-06, "loss": 0.512, "step": 2115 }, { "epoch": 0.12885546387358038, "grad_norm": 1.214471813143012, "learning_rate": 4.994826061234689e-06, "loss": 0.5164, "step": 2116 }, { "epoch": 0.12891635965045825, "grad_norm": 1.030998327351342, "learning_rate": 4.9948209288593045e-06, "loss": 0.4792, "step": 2117 }, { "epoch": 0.12897725542733612, "grad_norm": 0.9974627742156775, "learning_rate": 4.994815793942248e-06, "loss": 0.5518, "step": 2118 }, { "epoch": 0.129038151204214, "grad_norm": 1.0362358446879465, "learning_rate": 4.994810656483525e-06, "loss": 0.5516, "step": 2119 }, { "epoch": 0.12909904698109187, "grad_norm": 1.1031983105068985, "learning_rate": 4.994805516483141e-06, "loss": 0.4654, "step": 2120 }, { "epoch": 0.12915994275796974, "grad_norm": 1.1421437391900036, "learning_rate": 4.9948003739411015e-06, "loss": 0.5281, "step": 2121 }, { "epoch": 0.12922083853484762, "grad_norm": 0.9988019336393804, "learning_rate": 4.99479522885741e-06, "loss": 0.5398, "step": 2122 }, { "epoch": 0.1292817343117255, "grad_norm": 1.0994612055967892, "learning_rate": 4.994790081232074e-06, "loss": 0.4297, "step": 2123 }, { "epoch": 0.12934263008860336, "grad_norm": 1.0066235043906644, "learning_rate": 4.9947849310650975e-06, "loss": 0.5193, "step": 2124 }, { "epoch": 0.12940352586548123, "grad_norm": 1.0953870013259372, "learning_rate": 4.994779778356486e-06, "loss": 0.4866, "step": 2125 }, { "epoch": 0.1294644216423591, "grad_norm": 1.1040560567074897, "learning_rate": 4.994774623106245e-06, "loss": 0.516, "step": 2126 }, { "epoch": 0.12952531741923698, "grad_norm": 1.162782414033461, "learning_rate": 4.994769465314379e-06, "loss": 0.5015, "step": 2127 }, { "epoch": 0.12958621319611485, "grad_norm": 1.037247236436997, "learning_rate": 4.994764304980894e-06, "loss": 0.4828, "step": 2128 }, { "epoch": 0.12964710897299273, "grad_norm": 1.1293708470296309, "learning_rate": 4.994759142105795e-06, "loss": 0.5328, "step": 2129 }, { "epoch": 0.1297080047498706, "grad_norm": 0.9210270259901354, "learning_rate": 4.9947539766890875e-06, "loss": 0.5783, "step": 2130 }, { "epoch": 0.12976890052674847, "grad_norm": 1.1298668318657212, "learning_rate": 4.994748808730776e-06, "loss": 0.5647, "step": 2131 }, { "epoch": 0.12982979630362634, "grad_norm": 1.0403239939985685, "learning_rate": 4.994743638230866e-06, "loss": 0.4939, "step": 2132 }, { "epoch": 0.12989069208050422, "grad_norm": 1.086727077614537, "learning_rate": 4.994738465189363e-06, "loss": 0.5634, "step": 2133 }, { "epoch": 0.1299515878573821, "grad_norm": 1.0645860694094058, "learning_rate": 4.994733289606274e-06, "loss": 0.585, "step": 2134 }, { "epoch": 0.13001248363425996, "grad_norm": 1.132019405806143, "learning_rate": 4.9947281114816016e-06, "loss": 0.4922, "step": 2135 }, { "epoch": 0.13007337941113783, "grad_norm": 1.1236123361525, "learning_rate": 4.994722930815352e-06, "loss": 0.5891, "step": 2136 }, { "epoch": 0.1301342751880157, "grad_norm": 1.0576044736064194, "learning_rate": 4.99471774760753e-06, "loss": 0.5302, "step": 2137 }, { "epoch": 0.13019517096489358, "grad_norm": 1.1154170188663723, "learning_rate": 4.994712561858142e-06, "loss": 0.5544, "step": 2138 }, { "epoch": 0.13025606674177145, "grad_norm": 1.0489756702751556, "learning_rate": 4.994707373567192e-06, "loss": 0.5672, "step": 2139 }, { "epoch": 0.13031696251864933, "grad_norm": 0.9726022207000563, "learning_rate": 4.994702182734687e-06, "loss": 0.4961, "step": 2140 }, { "epoch": 0.1303778582955272, "grad_norm": 1.061771650105231, "learning_rate": 4.994696989360631e-06, "loss": 0.5095, "step": 2141 }, { "epoch": 0.13043875407240507, "grad_norm": 1.1506935157059341, "learning_rate": 4.994691793445029e-06, "loss": 0.4551, "step": 2142 }, { "epoch": 0.13049964984928294, "grad_norm": 0.9873620093010786, "learning_rate": 4.994686594987888e-06, "loss": 0.5802, "step": 2143 }, { "epoch": 0.13056054562616082, "grad_norm": 0.9958164418136389, "learning_rate": 4.994681393989211e-06, "loss": 0.5251, "step": 2144 }, { "epoch": 0.1306214414030387, "grad_norm": 1.039863281188883, "learning_rate": 4.994676190449005e-06, "loss": 0.5075, "step": 2145 }, { "epoch": 0.13068233717991656, "grad_norm": 1.1228889197255056, "learning_rate": 4.994670984367275e-06, "loss": 0.6113, "step": 2146 }, { "epoch": 0.13074323295679444, "grad_norm": 1.000367234543684, "learning_rate": 4.994665775744025e-06, "loss": 0.6545, "step": 2147 }, { "epoch": 0.1308041287336723, "grad_norm": 1.1025086016935004, "learning_rate": 4.9946605645792624e-06, "loss": 0.5125, "step": 2148 }, { "epoch": 0.13086502451055018, "grad_norm": 1.1425130568105528, "learning_rate": 4.994655350872992e-06, "loss": 0.5169, "step": 2149 }, { "epoch": 0.13092592028742805, "grad_norm": 0.9631525610986171, "learning_rate": 4.9946501346252174e-06, "loss": 0.4881, "step": 2150 }, { "epoch": 0.13098681606430593, "grad_norm": 0.9823090406905182, "learning_rate": 4.994644915835945e-06, "loss": 0.6038, "step": 2151 }, { "epoch": 0.1310477118411838, "grad_norm": 0.9623894870994768, "learning_rate": 4.994639694505181e-06, "loss": 0.5219, "step": 2152 }, { "epoch": 0.1311086076180617, "grad_norm": 1.047988690183531, "learning_rate": 4.99463447063293e-06, "loss": 0.4745, "step": 2153 }, { "epoch": 0.13116950339493957, "grad_norm": 1.0234904122678912, "learning_rate": 4.994629244219197e-06, "loss": 0.5851, "step": 2154 }, { "epoch": 0.13123039917181745, "grad_norm": 1.1465394412638228, "learning_rate": 4.994624015263987e-06, "loss": 0.5006, "step": 2155 }, { "epoch": 0.13129129494869532, "grad_norm": 1.1968249576626895, "learning_rate": 4.9946187837673065e-06, "loss": 0.4969, "step": 2156 }, { "epoch": 0.1313521907255732, "grad_norm": 1.1410437536876108, "learning_rate": 4.99461354972916e-06, "loss": 0.6011, "step": 2157 }, { "epoch": 0.13141308650245107, "grad_norm": 1.0026190218724238, "learning_rate": 4.994608313149553e-06, "loss": 0.5372, "step": 2158 }, { "epoch": 0.13147398227932894, "grad_norm": 1.0472673900799059, "learning_rate": 4.994603074028492e-06, "loss": 0.513, "step": 2159 }, { "epoch": 0.1315348780562068, "grad_norm": 1.0764145650470298, "learning_rate": 4.99459783236598e-06, "loss": 0.5387, "step": 2160 }, { "epoch": 0.13159577383308468, "grad_norm": 1.0416459505878484, "learning_rate": 4.994592588162025e-06, "loss": 0.5506, "step": 2161 }, { "epoch": 0.13165666960996256, "grad_norm": 1.0907591765178848, "learning_rate": 4.994587341416629e-06, "loss": 0.5391, "step": 2162 }, { "epoch": 0.13171756538684043, "grad_norm": 1.0257348284420074, "learning_rate": 4.994582092129801e-06, "loss": 0.5359, "step": 2163 }, { "epoch": 0.1317784611637183, "grad_norm": 1.0555250843035398, "learning_rate": 4.994576840301544e-06, "loss": 0.5179, "step": 2164 }, { "epoch": 0.13183935694059618, "grad_norm": 1.0257961315151316, "learning_rate": 4.9945715859318635e-06, "loss": 0.4884, "step": 2165 }, { "epoch": 0.13190025271747405, "grad_norm": 1.1016593364886011, "learning_rate": 4.994566329020766e-06, "loss": 0.5297, "step": 2166 }, { "epoch": 0.13196114849435192, "grad_norm": 1.098545748594072, "learning_rate": 4.9945610695682555e-06, "loss": 0.5428, "step": 2167 }, { "epoch": 0.1320220442712298, "grad_norm": 1.035022694361597, "learning_rate": 4.994555807574339e-06, "loss": 0.5304, "step": 2168 }, { "epoch": 0.13208294004810767, "grad_norm": 1.0210639405064736, "learning_rate": 4.99455054303902e-06, "loss": 0.5221, "step": 2169 }, { "epoch": 0.13214383582498554, "grad_norm": 1.0441234118084222, "learning_rate": 4.994545275962305e-06, "loss": 0.5008, "step": 2170 }, { "epoch": 0.1322047316018634, "grad_norm": 1.0693563921774638, "learning_rate": 4.994540006344199e-06, "loss": 0.5272, "step": 2171 }, { "epoch": 0.13226562737874129, "grad_norm": 1.0480817020296183, "learning_rate": 4.9945347341847084e-06, "loss": 0.5497, "step": 2172 }, { "epoch": 0.13232652315561916, "grad_norm": 1.0489502253926293, "learning_rate": 4.994529459483837e-06, "loss": 0.5425, "step": 2173 }, { "epoch": 0.13238741893249703, "grad_norm": 1.0842041870565118, "learning_rate": 4.994524182241591e-06, "loss": 0.4875, "step": 2174 }, { "epoch": 0.1324483147093749, "grad_norm": 1.0102807829314358, "learning_rate": 4.994518902457976e-06, "loss": 0.4877, "step": 2175 }, { "epoch": 0.13250921048625278, "grad_norm": 1.0463568167427029, "learning_rate": 4.994513620132996e-06, "loss": 0.5279, "step": 2176 }, { "epoch": 0.13257010626313065, "grad_norm": 0.9964972929946765, "learning_rate": 4.994508335266658e-06, "loss": 0.6494, "step": 2177 }, { "epoch": 0.13263100204000852, "grad_norm": 1.045981580605222, "learning_rate": 4.994503047858966e-06, "loss": 0.5673, "step": 2178 }, { "epoch": 0.1326918978168864, "grad_norm": 0.9765117311210217, "learning_rate": 4.994497757909927e-06, "loss": 0.6022, "step": 2179 }, { "epoch": 0.13275279359376427, "grad_norm": 1.1232474344820293, "learning_rate": 4.994492465419546e-06, "loss": 0.4825, "step": 2180 }, { "epoch": 0.13281368937064214, "grad_norm": 1.0203588160744739, "learning_rate": 4.994487170387828e-06, "loss": 0.5344, "step": 2181 }, { "epoch": 0.13287458514752, "grad_norm": 1.0328680071452823, "learning_rate": 4.994481872814777e-06, "loss": 0.4819, "step": 2182 }, { "epoch": 0.1329354809243979, "grad_norm": 1.0732170504812888, "learning_rate": 4.9944765727004e-06, "loss": 0.5204, "step": 2183 }, { "epoch": 0.13299637670127576, "grad_norm": 0.9914696904093478, "learning_rate": 4.9944712700447026e-06, "loss": 0.5175, "step": 2184 }, { "epoch": 0.13305727247815363, "grad_norm": 1.0297885079729163, "learning_rate": 4.994465964847689e-06, "loss": 0.5261, "step": 2185 }, { "epoch": 0.1331181682550315, "grad_norm": 1.0944115804937151, "learning_rate": 4.9944606571093665e-06, "loss": 0.5338, "step": 2186 }, { "epoch": 0.13317906403190938, "grad_norm": 1.034858824556912, "learning_rate": 4.994455346829739e-06, "loss": 0.6, "step": 2187 }, { "epoch": 0.13323995980878725, "grad_norm": 0.9990610787813946, "learning_rate": 4.994450034008812e-06, "loss": 0.6149, "step": 2188 }, { "epoch": 0.13330085558566512, "grad_norm": 1.0409606054051632, "learning_rate": 4.994444718646591e-06, "loss": 0.5611, "step": 2189 }, { "epoch": 0.133361751362543, "grad_norm": 1.0549846052216914, "learning_rate": 4.994439400743082e-06, "loss": 0.5117, "step": 2190 }, { "epoch": 0.13342264713942087, "grad_norm": 1.0803786111263587, "learning_rate": 4.994434080298289e-06, "loss": 0.5943, "step": 2191 }, { "epoch": 0.13348354291629874, "grad_norm": 1.0146070680096433, "learning_rate": 4.99442875731222e-06, "loss": 0.5265, "step": 2192 }, { "epoch": 0.13354443869317661, "grad_norm": 1.2012269906830635, "learning_rate": 4.9944234317848785e-06, "loss": 0.4969, "step": 2193 }, { "epoch": 0.13360533447005452, "grad_norm": 1.173809557778946, "learning_rate": 4.994418103716269e-06, "loss": 0.486, "step": 2194 }, { "epoch": 0.1336662302469324, "grad_norm": 1.0340839143368372, "learning_rate": 4.9944127731064e-06, "loss": 0.5049, "step": 2195 }, { "epoch": 0.13372712602381026, "grad_norm": 1.0447329265192569, "learning_rate": 4.994407439955273e-06, "loss": 0.6131, "step": 2196 }, { "epoch": 0.13378802180068813, "grad_norm": 1.0075495146298106, "learning_rate": 4.994402104262898e-06, "loss": 0.5111, "step": 2197 }, { "epoch": 0.133848917577566, "grad_norm": 1.0026502844043368, "learning_rate": 4.9943967660292755e-06, "loss": 0.5147, "step": 2198 }, { "epoch": 0.13390981335444388, "grad_norm": 1.089590093573451, "learning_rate": 4.994391425254415e-06, "loss": 0.5019, "step": 2199 }, { "epoch": 0.13397070913132175, "grad_norm": 1.1218243172733202, "learning_rate": 4.994386081938321e-06, "loss": 0.5228, "step": 2200 }, { "epoch": 0.13403160490819963, "grad_norm": 0.9788847366831586, "learning_rate": 4.994380736080996e-06, "loss": 0.5339, "step": 2201 }, { "epoch": 0.1340925006850775, "grad_norm": 1.0142537623532422, "learning_rate": 4.99437538768245e-06, "loss": 0.5326, "step": 2202 }, { "epoch": 0.13415339646195537, "grad_norm": 1.189462193924427, "learning_rate": 4.994370036742685e-06, "loss": 0.4832, "step": 2203 }, { "epoch": 0.13421429223883324, "grad_norm": 1.0315017567041538, "learning_rate": 4.994364683261708e-06, "loss": 0.5711, "step": 2204 }, { "epoch": 0.13427518801571112, "grad_norm": 0.9666682960441391, "learning_rate": 4.994359327239524e-06, "loss": 0.4943, "step": 2205 }, { "epoch": 0.134336083792589, "grad_norm": 1.0657991914411427, "learning_rate": 4.994353968676139e-06, "loss": 0.5806, "step": 2206 }, { "epoch": 0.13439697956946686, "grad_norm": 1.155628133539745, "learning_rate": 4.994348607571557e-06, "loss": 0.497, "step": 2207 }, { "epoch": 0.13445787534634474, "grad_norm": 1.0066441738398848, "learning_rate": 4.994343243925786e-06, "loss": 0.5863, "step": 2208 }, { "epoch": 0.1345187711232226, "grad_norm": 1.0761850073292927, "learning_rate": 4.994337877738829e-06, "loss": 0.5462, "step": 2209 }, { "epoch": 0.13457966690010048, "grad_norm": 1.0310406257761606, "learning_rate": 4.994332509010693e-06, "loss": 0.5629, "step": 2210 }, { "epoch": 0.13464056267697835, "grad_norm": 0.9476337212130281, "learning_rate": 4.994327137741382e-06, "loss": 0.577, "step": 2211 }, { "epoch": 0.13470145845385623, "grad_norm": 1.0525975506786425, "learning_rate": 4.994321763930903e-06, "loss": 0.5717, "step": 2212 }, { "epoch": 0.1347623542307341, "grad_norm": 0.9932526497820812, "learning_rate": 4.994316387579261e-06, "loss": 0.4707, "step": 2213 }, { "epoch": 0.13482325000761197, "grad_norm": 1.0625134150698459, "learning_rate": 4.994311008686461e-06, "loss": 0.5082, "step": 2214 }, { "epoch": 0.13488414578448985, "grad_norm": 1.0248144822083403, "learning_rate": 4.994305627252509e-06, "loss": 0.5229, "step": 2215 }, { "epoch": 0.13494504156136772, "grad_norm": 0.9909994948248603, "learning_rate": 4.99430024327741e-06, "loss": 0.5743, "step": 2216 }, { "epoch": 0.1350059373382456, "grad_norm": 1.1017403649611663, "learning_rate": 4.99429485676117e-06, "loss": 0.4702, "step": 2217 }, { "epoch": 0.13506683311512346, "grad_norm": 1.0280491495268493, "learning_rate": 4.994289467703794e-06, "loss": 0.5778, "step": 2218 }, { "epoch": 0.13512772889200134, "grad_norm": 0.9937801838217093, "learning_rate": 4.994284076105288e-06, "loss": 0.5541, "step": 2219 }, { "epoch": 0.1351886246688792, "grad_norm": 1.064929708487776, "learning_rate": 4.994278681965657e-06, "loss": 0.5247, "step": 2220 }, { "epoch": 0.13524952044575708, "grad_norm": 1.0655088432643074, "learning_rate": 4.9942732852849065e-06, "loss": 0.4978, "step": 2221 }, { "epoch": 0.13531041622263495, "grad_norm": 1.0328536837134805, "learning_rate": 4.994267886063043e-06, "loss": 0.5992, "step": 2222 }, { "epoch": 0.13537131199951283, "grad_norm": 1.0309121767219573, "learning_rate": 4.994262484300071e-06, "loss": 0.6166, "step": 2223 }, { "epoch": 0.1354322077763907, "grad_norm": 1.0726863967977567, "learning_rate": 4.994257079995996e-06, "loss": 0.5062, "step": 2224 }, { "epoch": 0.13549310355326857, "grad_norm": 0.9755300289531466, "learning_rate": 4.994251673150824e-06, "loss": 0.5357, "step": 2225 }, { "epoch": 0.13555399933014645, "grad_norm": 0.9993599839044814, "learning_rate": 4.99424626376456e-06, "loss": 0.5573, "step": 2226 }, { "epoch": 0.13561489510702432, "grad_norm": 1.0079842750987456, "learning_rate": 4.9942408518372095e-06, "loss": 0.5198, "step": 2227 }, { "epoch": 0.1356757908839022, "grad_norm": 1.0290311451434777, "learning_rate": 4.994235437368779e-06, "loss": 0.5301, "step": 2228 }, { "epoch": 0.13573668666078006, "grad_norm": 1.088767031340088, "learning_rate": 4.994230020359273e-06, "loss": 0.4911, "step": 2229 }, { "epoch": 0.13579758243765794, "grad_norm": 1.011088184320105, "learning_rate": 4.994224600808697e-06, "loss": 0.5694, "step": 2230 }, { "epoch": 0.1358584782145358, "grad_norm": 1.0147733010191529, "learning_rate": 4.994219178717057e-06, "loss": 0.4979, "step": 2231 }, { "epoch": 0.13591937399141368, "grad_norm": 1.0474129952005546, "learning_rate": 4.994213754084359e-06, "loss": 0.5315, "step": 2232 }, { "epoch": 0.13598026976829156, "grad_norm": 1.0131990601789405, "learning_rate": 4.9942083269106065e-06, "loss": 0.5623, "step": 2233 }, { "epoch": 0.13604116554516943, "grad_norm": 1.0564291898868943, "learning_rate": 4.994202897195808e-06, "loss": 0.5141, "step": 2234 }, { "epoch": 0.13610206132204733, "grad_norm": 1.086843985444809, "learning_rate": 4.994197464939966e-06, "loss": 0.5197, "step": 2235 }, { "epoch": 0.1361629570989252, "grad_norm": 1.031619063290875, "learning_rate": 4.994192030143089e-06, "loss": 0.5513, "step": 2236 }, { "epoch": 0.13622385287580308, "grad_norm": 1.0256935041200723, "learning_rate": 4.994186592805179e-06, "loss": 0.5098, "step": 2237 }, { "epoch": 0.13628474865268095, "grad_norm": 1.119891615479487, "learning_rate": 4.9941811529262456e-06, "loss": 0.5637, "step": 2238 }, { "epoch": 0.13634564442955882, "grad_norm": 1.0080131983185718, "learning_rate": 4.994175710506292e-06, "loss": 0.5709, "step": 2239 }, { "epoch": 0.1364065402064367, "grad_norm": 1.0823582426163862, "learning_rate": 4.994170265545323e-06, "loss": 0.6132, "step": 2240 }, { "epoch": 0.13646743598331457, "grad_norm": 0.9542920769802781, "learning_rate": 4.994164818043346e-06, "loss": 0.5415, "step": 2241 }, { "epoch": 0.13652833176019244, "grad_norm": 1.075457386522021, "learning_rate": 4.994159368000365e-06, "loss": 0.606, "step": 2242 }, { "epoch": 0.1365892275370703, "grad_norm": 1.051610874185148, "learning_rate": 4.994153915416386e-06, "loss": 0.4609, "step": 2243 }, { "epoch": 0.13665012331394819, "grad_norm": 1.0617266716084033, "learning_rate": 4.994148460291416e-06, "loss": 0.547, "step": 2244 }, { "epoch": 0.13671101909082606, "grad_norm": 1.0146586996273375, "learning_rate": 4.9941430026254585e-06, "loss": 0.5041, "step": 2245 }, { "epoch": 0.13677191486770393, "grad_norm": 1.0100663112746917, "learning_rate": 4.994137542418521e-06, "loss": 0.5225, "step": 2246 }, { "epoch": 0.1368328106445818, "grad_norm": 1.1876149188299077, "learning_rate": 4.994132079670607e-06, "loss": 0.5021, "step": 2247 }, { "epoch": 0.13689370642145968, "grad_norm": 1.0562565403724427, "learning_rate": 4.994126614381723e-06, "loss": 0.545, "step": 2248 }, { "epoch": 0.13695460219833755, "grad_norm": 1.0623890465027757, "learning_rate": 4.9941211465518754e-06, "loss": 0.5253, "step": 2249 }, { "epoch": 0.13701549797521542, "grad_norm": 1.017492586115053, "learning_rate": 4.994115676181069e-06, "loss": 0.5305, "step": 2250 }, { "epoch": 0.1370763937520933, "grad_norm": 0.9472259278980238, "learning_rate": 4.99411020326931e-06, "loss": 0.6202, "step": 2251 }, { "epoch": 0.13713728952897117, "grad_norm": 0.9999080099141797, "learning_rate": 4.9941047278166025e-06, "loss": 0.5326, "step": 2252 }, { "epoch": 0.13719818530584904, "grad_norm": 0.9847565323111603, "learning_rate": 4.994099249822952e-06, "loss": 0.5731, "step": 2253 }, { "epoch": 0.1372590810827269, "grad_norm": 1.0610354897514576, "learning_rate": 4.994093769288366e-06, "loss": 0.5276, "step": 2254 }, { "epoch": 0.1373199768596048, "grad_norm": 1.1161575627276425, "learning_rate": 4.99408828621285e-06, "loss": 0.5224, "step": 2255 }, { "epoch": 0.13738087263648266, "grad_norm": 1.0918422770751668, "learning_rate": 4.994082800596408e-06, "loss": 0.4956, "step": 2256 }, { "epoch": 0.13744176841336053, "grad_norm": 1.0113706036552086, "learning_rate": 4.994077312439046e-06, "loss": 0.5795, "step": 2257 }, { "epoch": 0.1375026641902384, "grad_norm": 1.01772815883812, "learning_rate": 4.9940718217407705e-06, "loss": 0.5176, "step": 2258 }, { "epoch": 0.13756355996711628, "grad_norm": 1.0599902602452183, "learning_rate": 4.994066328501586e-06, "loss": 0.5169, "step": 2259 }, { "epoch": 0.13762445574399415, "grad_norm": 1.09173259436316, "learning_rate": 4.994060832721499e-06, "loss": 0.6029, "step": 2260 }, { "epoch": 0.13768535152087202, "grad_norm": 1.0403032629146516, "learning_rate": 4.994055334400514e-06, "loss": 0.6055, "step": 2261 }, { "epoch": 0.1377462472977499, "grad_norm": 1.1292667968449759, "learning_rate": 4.994049833538637e-06, "loss": 0.5154, "step": 2262 }, { "epoch": 0.13780714307462777, "grad_norm": 1.0493504355886094, "learning_rate": 4.994044330135875e-06, "loss": 0.4635, "step": 2263 }, { "epoch": 0.13786803885150564, "grad_norm": 1.0312565051564129, "learning_rate": 4.994038824192232e-06, "loss": 0.517, "step": 2264 }, { "epoch": 0.13792893462838351, "grad_norm": 0.9874243959705971, "learning_rate": 4.994033315707714e-06, "loss": 0.5361, "step": 2265 }, { "epoch": 0.1379898304052614, "grad_norm": 1.0413851343114897, "learning_rate": 4.994027804682326e-06, "loss": 0.5625, "step": 2266 }, { "epoch": 0.13805072618213926, "grad_norm": 1.064039704992051, "learning_rate": 4.994022291116075e-06, "loss": 0.5452, "step": 2267 }, { "epoch": 0.13811162195901713, "grad_norm": 1.154923625444499, "learning_rate": 4.994016775008967e-06, "loss": 0.5174, "step": 2268 }, { "epoch": 0.138172517735895, "grad_norm": 1.0839570012816246, "learning_rate": 4.9940112563610056e-06, "loss": 0.5085, "step": 2269 }, { "epoch": 0.13823341351277288, "grad_norm": 1.0303587274338182, "learning_rate": 4.994005735172197e-06, "loss": 0.5295, "step": 2270 }, { "epoch": 0.13829430928965075, "grad_norm": 0.9583953097832033, "learning_rate": 4.994000211442548e-06, "loss": 0.5409, "step": 2271 }, { "epoch": 0.13835520506652862, "grad_norm": 1.0382044350792317, "learning_rate": 4.993994685172063e-06, "loss": 0.4986, "step": 2272 }, { "epoch": 0.1384161008434065, "grad_norm": 1.033836777099319, "learning_rate": 4.993989156360748e-06, "loss": 0.5166, "step": 2273 }, { "epoch": 0.13847699662028437, "grad_norm": 1.0651384479861548, "learning_rate": 4.9939836250086086e-06, "loss": 0.5607, "step": 2274 }, { "epoch": 0.13853789239716224, "grad_norm": 1.0844507247402462, "learning_rate": 4.993978091115651e-06, "loss": 0.521, "step": 2275 }, { "epoch": 0.13859878817404014, "grad_norm": 1.0656933670439712, "learning_rate": 4.99397255468188e-06, "loss": 0.5025, "step": 2276 }, { "epoch": 0.13865968395091802, "grad_norm": 1.1083592768825914, "learning_rate": 4.9939670157073016e-06, "loss": 0.4841, "step": 2277 }, { "epoch": 0.1387205797277959, "grad_norm": 1.0239992443636334, "learning_rate": 4.993961474191922e-06, "loss": 0.5748, "step": 2278 }, { "epoch": 0.13878147550467376, "grad_norm": 1.1030559628367933, "learning_rate": 4.9939559301357455e-06, "loss": 0.5055, "step": 2279 }, { "epoch": 0.13884237128155164, "grad_norm": 1.0229539786661996, "learning_rate": 4.993950383538779e-06, "loss": 0.5648, "step": 2280 }, { "epoch": 0.1389032670584295, "grad_norm": 1.0855798152452205, "learning_rate": 4.993944834401028e-06, "loss": 0.4864, "step": 2281 }, { "epoch": 0.13896416283530738, "grad_norm": 1.0025732805404302, "learning_rate": 4.993939282722498e-06, "loss": 0.506, "step": 2282 }, { "epoch": 0.13902505861218525, "grad_norm": 1.0949423169903296, "learning_rate": 4.993933728503193e-06, "loss": 0.5114, "step": 2283 }, { "epoch": 0.13908595438906313, "grad_norm": 0.9693286900914392, "learning_rate": 4.993928171743122e-06, "loss": 0.5309, "step": 2284 }, { "epoch": 0.139146850165941, "grad_norm": 1.0318645373833886, "learning_rate": 4.993922612442288e-06, "loss": 0.5094, "step": 2285 }, { "epoch": 0.13920774594281887, "grad_norm": 1.0278990814826585, "learning_rate": 4.9939170506006975e-06, "loss": 0.5616, "step": 2286 }, { "epoch": 0.13926864171969675, "grad_norm": 1.0552312577729879, "learning_rate": 4.993911486218356e-06, "loss": 0.5433, "step": 2287 }, { "epoch": 0.13932953749657462, "grad_norm": 1.0542364191026994, "learning_rate": 4.993905919295269e-06, "loss": 0.5075, "step": 2288 }, { "epoch": 0.1393904332734525, "grad_norm": 1.0545686250425834, "learning_rate": 4.993900349831443e-06, "loss": 0.4844, "step": 2289 }, { "epoch": 0.13945132905033036, "grad_norm": 1.1903849028458484, "learning_rate": 4.9938947778268835e-06, "loss": 0.5259, "step": 2290 }, { "epoch": 0.13951222482720824, "grad_norm": 1.105615336419567, "learning_rate": 4.993889203281596e-06, "loss": 0.53, "step": 2291 }, { "epoch": 0.1395731206040861, "grad_norm": 1.122581152173588, "learning_rate": 4.993883626195584e-06, "loss": 0.5029, "step": 2292 }, { "epoch": 0.13963401638096398, "grad_norm": 1.0498841141519926, "learning_rate": 4.993878046568857e-06, "loss": 0.5014, "step": 2293 }, { "epoch": 0.13969491215784186, "grad_norm": 1.0196095015560538, "learning_rate": 4.993872464401419e-06, "loss": 0.5213, "step": 2294 }, { "epoch": 0.13975580793471973, "grad_norm": 1.0384291670557024, "learning_rate": 4.993866879693276e-06, "loss": 0.5128, "step": 2295 }, { "epoch": 0.1398167037115976, "grad_norm": 0.9772498219036625, "learning_rate": 4.993861292444432e-06, "loss": 0.6011, "step": 2296 }, { "epoch": 0.13987759948847547, "grad_norm": 1.0734107355103577, "learning_rate": 4.993855702654894e-06, "loss": 0.4743, "step": 2297 }, { "epoch": 0.13993849526535335, "grad_norm": 1.0945613267787857, "learning_rate": 4.993850110324668e-06, "loss": 0.4655, "step": 2298 }, { "epoch": 0.13999939104223122, "grad_norm": 1.0786566356936487, "learning_rate": 4.99384451545376e-06, "loss": 0.4857, "step": 2299 }, { "epoch": 0.1400602868191091, "grad_norm": 1.0298422744648748, "learning_rate": 4.993838918042174e-06, "loss": 0.5021, "step": 2300 }, { "epoch": 0.14012118259598697, "grad_norm": 1.0041628473768203, "learning_rate": 4.993833318089918e-06, "loss": 0.5139, "step": 2301 }, { "epoch": 0.14018207837286484, "grad_norm": 1.050042532786145, "learning_rate": 4.993827715596995e-06, "loss": 0.581, "step": 2302 }, { "epoch": 0.1402429741497427, "grad_norm": 1.0306857848394897, "learning_rate": 4.993822110563414e-06, "loss": 0.5675, "step": 2303 }, { "epoch": 0.14030386992662058, "grad_norm": 1.1397063772350262, "learning_rate": 4.993816502989177e-06, "loss": 0.5553, "step": 2304 }, { "epoch": 0.14036476570349846, "grad_norm": 1.1072694264279346, "learning_rate": 4.993810892874293e-06, "loss": 0.5024, "step": 2305 }, { "epoch": 0.14042566148037633, "grad_norm": 1.088525503600628, "learning_rate": 4.993805280218765e-06, "loss": 0.4899, "step": 2306 }, { "epoch": 0.1404865572572542, "grad_norm": 1.021165274800886, "learning_rate": 4.993799665022601e-06, "loss": 0.4895, "step": 2307 }, { "epoch": 0.14054745303413207, "grad_norm": 1.024001492262837, "learning_rate": 4.993794047285805e-06, "loss": 0.5253, "step": 2308 }, { "epoch": 0.14060834881100995, "grad_norm": 0.9239827072772366, "learning_rate": 4.993788427008384e-06, "loss": 0.5519, "step": 2309 }, { "epoch": 0.14066924458788782, "grad_norm": 1.0085361471209127, "learning_rate": 4.993782804190344e-06, "loss": 0.5398, "step": 2310 }, { "epoch": 0.1407301403647657, "grad_norm": 1.097176390019581, "learning_rate": 4.9937771788316885e-06, "loss": 0.4632, "step": 2311 }, { "epoch": 0.14079103614164357, "grad_norm": 1.063124552236338, "learning_rate": 4.993771550932426e-06, "loss": 0.5004, "step": 2312 }, { "epoch": 0.14085193191852144, "grad_norm": 0.9947474198384793, "learning_rate": 4.993765920492559e-06, "loss": 0.5602, "step": 2313 }, { "epoch": 0.1409128276953993, "grad_norm": 1.1274516826916041, "learning_rate": 4.993760287512097e-06, "loss": 0.4232, "step": 2314 }, { "epoch": 0.14097372347227718, "grad_norm": 1.031686007794025, "learning_rate": 4.993754651991043e-06, "loss": 0.4493, "step": 2315 }, { "epoch": 0.14103461924915506, "grad_norm": 1.110107065483981, "learning_rate": 4.9937490139294035e-06, "loss": 0.5211, "step": 2316 }, { "epoch": 0.14109551502603296, "grad_norm": 1.0697359658626968, "learning_rate": 4.993743373327184e-06, "loss": 0.581, "step": 2317 }, { "epoch": 0.14115641080291083, "grad_norm": 1.0067639528028638, "learning_rate": 4.993737730184391e-06, "loss": 0.5573, "step": 2318 }, { "epoch": 0.1412173065797887, "grad_norm": 1.008988180514407, "learning_rate": 4.993732084501031e-06, "loss": 0.5061, "step": 2319 }, { "epoch": 0.14127820235666658, "grad_norm": 0.9950785556539449, "learning_rate": 4.993726436277108e-06, "loss": 0.5282, "step": 2320 }, { "epoch": 0.14133909813354445, "grad_norm": 1.0552020401442412, "learning_rate": 4.9937207855126265e-06, "loss": 0.5209, "step": 2321 }, { "epoch": 0.14139999391042232, "grad_norm": 1.022062730453616, "learning_rate": 4.993715132207596e-06, "loss": 0.5403, "step": 2322 }, { "epoch": 0.1414608896873002, "grad_norm": 1.066826470246887, "learning_rate": 4.99370947636202e-06, "loss": 0.5252, "step": 2323 }, { "epoch": 0.14152178546417807, "grad_norm": 1.0146206722887003, "learning_rate": 4.993703817975905e-06, "loss": 0.5434, "step": 2324 }, { "epoch": 0.14158268124105594, "grad_norm": 1.0450933820373014, "learning_rate": 4.993698157049256e-06, "loss": 0.4834, "step": 2325 }, { "epoch": 0.1416435770179338, "grad_norm": 1.2217347181648328, "learning_rate": 4.993692493582078e-06, "loss": 0.5068, "step": 2326 }, { "epoch": 0.1417044727948117, "grad_norm": 1.0059795015103712, "learning_rate": 4.99368682757438e-06, "loss": 0.5491, "step": 2327 }, { "epoch": 0.14176536857168956, "grad_norm": 1.1191015875721244, "learning_rate": 4.993681159026165e-06, "loss": 0.4863, "step": 2328 }, { "epoch": 0.14182626434856743, "grad_norm": 1.0751260977376962, "learning_rate": 4.993675487937439e-06, "loss": 0.4555, "step": 2329 }, { "epoch": 0.1418871601254453, "grad_norm": 1.0857842916268585, "learning_rate": 4.993669814308208e-06, "loss": 0.4655, "step": 2330 }, { "epoch": 0.14194805590232318, "grad_norm": 1.0070496815028818, "learning_rate": 4.993664138138478e-06, "loss": 0.5478, "step": 2331 }, { "epoch": 0.14200895167920105, "grad_norm": 1.0592320662670147, "learning_rate": 4.993658459428257e-06, "loss": 0.4991, "step": 2332 }, { "epoch": 0.14206984745607892, "grad_norm": 1.028067058643168, "learning_rate": 4.993652778177547e-06, "loss": 0.5585, "step": 2333 }, { "epoch": 0.1421307432329568, "grad_norm": 1.1208924447465654, "learning_rate": 4.9936470943863545e-06, "loss": 0.4553, "step": 2334 }, { "epoch": 0.14219163900983467, "grad_norm": 1.007358838095519, "learning_rate": 4.993641408054687e-06, "loss": 0.5389, "step": 2335 }, { "epoch": 0.14225253478671254, "grad_norm": 1.0863951501580171, "learning_rate": 4.9936357191825504e-06, "loss": 0.5372, "step": 2336 }, { "epoch": 0.14231343056359042, "grad_norm": 1.1295852001184516, "learning_rate": 4.9936300277699476e-06, "loss": 0.4325, "step": 2337 }, { "epoch": 0.1423743263404683, "grad_norm": 1.0675879583475667, "learning_rate": 4.993624333816888e-06, "loss": 0.5481, "step": 2338 }, { "epoch": 0.14243522211734616, "grad_norm": 1.1247182271520402, "learning_rate": 4.993618637323376e-06, "loss": 0.5002, "step": 2339 }, { "epoch": 0.14249611789422403, "grad_norm": 1.0348663390057122, "learning_rate": 4.993612938289416e-06, "loss": 0.5201, "step": 2340 }, { "epoch": 0.1425570136711019, "grad_norm": 1.0914146322235103, "learning_rate": 4.993607236715015e-06, "loss": 0.4786, "step": 2341 }, { "epoch": 0.14261790944797978, "grad_norm": 1.1365407053438576, "learning_rate": 4.9936015326001796e-06, "loss": 0.4972, "step": 2342 }, { "epoch": 0.14267880522485765, "grad_norm": 0.9692945510241882, "learning_rate": 4.993595825944914e-06, "loss": 0.5385, "step": 2343 }, { "epoch": 0.14273970100173552, "grad_norm": 0.9882400463515096, "learning_rate": 4.993590116749225e-06, "loss": 0.5356, "step": 2344 }, { "epoch": 0.1428005967786134, "grad_norm": 0.9745822120780115, "learning_rate": 4.993584405013119e-06, "loss": 0.5518, "step": 2345 }, { "epoch": 0.14286149255549127, "grad_norm": 1.1553747147371367, "learning_rate": 4.9935786907366005e-06, "loss": 0.5026, "step": 2346 }, { "epoch": 0.14292238833236914, "grad_norm": 1.0940020912188806, "learning_rate": 4.993572973919676e-06, "loss": 0.5248, "step": 2347 }, { "epoch": 0.14298328410924702, "grad_norm": 1.1382709505041846, "learning_rate": 4.99356725456235e-06, "loss": 0.4523, "step": 2348 }, { "epoch": 0.1430441798861249, "grad_norm": 1.0614213452364047, "learning_rate": 4.993561532664632e-06, "loss": 0.5593, "step": 2349 }, { "epoch": 0.14310507566300276, "grad_norm": 1.0630503477177273, "learning_rate": 4.9935558082265234e-06, "loss": 0.4686, "step": 2350 }, { "epoch": 0.14316597143988063, "grad_norm": 1.076930553106851, "learning_rate": 4.993550081248033e-06, "loss": 0.5467, "step": 2351 }, { "epoch": 0.1432268672167585, "grad_norm": 1.0300668587645017, "learning_rate": 4.993544351729165e-06, "loss": 0.5751, "step": 2352 }, { "epoch": 0.14328776299363638, "grad_norm": 1.1521838063839844, "learning_rate": 4.993538619669925e-06, "loss": 0.5059, "step": 2353 }, { "epoch": 0.14334865877051425, "grad_norm": 1.1104289974611699, "learning_rate": 4.993532885070321e-06, "loss": 0.5628, "step": 2354 }, { "epoch": 0.14340955454739213, "grad_norm": 1.095199626544282, "learning_rate": 4.993527147930357e-06, "loss": 0.478, "step": 2355 }, { "epoch": 0.14347045032427, "grad_norm": 1.0364574574690615, "learning_rate": 4.99352140825004e-06, "loss": 0.5247, "step": 2356 }, { "epoch": 0.14353134610114787, "grad_norm": 1.0625363201711866, "learning_rate": 4.993515666029375e-06, "loss": 0.5736, "step": 2357 }, { "epoch": 0.14359224187802577, "grad_norm": 0.974620708351207, "learning_rate": 4.993509921268368e-06, "loss": 0.5377, "step": 2358 }, { "epoch": 0.14365313765490365, "grad_norm": 1.0872242034588018, "learning_rate": 4.993504173967025e-06, "loss": 0.4344, "step": 2359 }, { "epoch": 0.14371403343178152, "grad_norm": 1.0752491242165478, "learning_rate": 4.9934984241253515e-06, "loss": 0.5186, "step": 2360 }, { "epoch": 0.1437749292086594, "grad_norm": 1.1481579442154832, "learning_rate": 4.993492671743354e-06, "loss": 0.499, "step": 2361 }, { "epoch": 0.14383582498553726, "grad_norm": 0.9972270246080913, "learning_rate": 4.993486916821038e-06, "loss": 0.5172, "step": 2362 }, { "epoch": 0.14389672076241514, "grad_norm": 1.1188785802242305, "learning_rate": 4.993481159358409e-06, "loss": 0.4912, "step": 2363 }, { "epoch": 0.143957616539293, "grad_norm": 1.0512862103897036, "learning_rate": 4.993475399355473e-06, "loss": 0.5306, "step": 2364 }, { "epoch": 0.14401851231617088, "grad_norm": 0.9972637184289976, "learning_rate": 4.993469636812237e-06, "loss": 0.5084, "step": 2365 }, { "epoch": 0.14407940809304876, "grad_norm": 1.0853543178101301, "learning_rate": 4.9934638717287055e-06, "loss": 0.5451, "step": 2366 }, { "epoch": 0.14414030386992663, "grad_norm": 1.0948603859011456, "learning_rate": 4.993458104104885e-06, "loss": 0.5418, "step": 2367 }, { "epoch": 0.1442011996468045, "grad_norm": 1.0929121270048583, "learning_rate": 4.99345233394078e-06, "loss": 0.5004, "step": 2368 }, { "epoch": 0.14426209542368237, "grad_norm": 1.0930967605587678, "learning_rate": 4.993446561236399e-06, "loss": 0.6018, "step": 2369 }, { "epoch": 0.14432299120056025, "grad_norm": 1.0922590804208276, "learning_rate": 4.993440785991746e-06, "loss": 0.4809, "step": 2370 }, { "epoch": 0.14438388697743812, "grad_norm": 1.1252625457498189, "learning_rate": 4.993435008206829e-06, "loss": 0.5289, "step": 2371 }, { "epoch": 0.144444782754316, "grad_norm": 1.12674063917501, "learning_rate": 4.99342922788165e-06, "loss": 0.4796, "step": 2372 }, { "epoch": 0.14450567853119387, "grad_norm": 1.1555489980330933, "learning_rate": 4.993423445016218e-06, "loss": 0.5251, "step": 2373 }, { "epoch": 0.14456657430807174, "grad_norm": 1.0220256174341484, "learning_rate": 4.993417659610538e-06, "loss": 0.4845, "step": 2374 }, { "epoch": 0.1446274700849496, "grad_norm": 1.1466744679088448, "learning_rate": 4.993411871664615e-06, "loss": 0.5126, "step": 2375 }, { "epoch": 0.14468836586182748, "grad_norm": 1.065848561812752, "learning_rate": 4.993406081178457e-06, "loss": 0.4939, "step": 2376 }, { "epoch": 0.14474926163870536, "grad_norm": 1.0788440453774666, "learning_rate": 4.993400288152069e-06, "loss": 0.5488, "step": 2377 }, { "epoch": 0.14481015741558323, "grad_norm": 1.076695481119285, "learning_rate": 4.993394492585456e-06, "loss": 0.4914, "step": 2378 }, { "epoch": 0.1448710531924611, "grad_norm": 1.060220931966688, "learning_rate": 4.993388694478624e-06, "loss": 0.5748, "step": 2379 }, { "epoch": 0.14493194896933898, "grad_norm": 0.9816014871226806, "learning_rate": 4.9933828938315805e-06, "loss": 0.5379, "step": 2380 }, { "epoch": 0.14499284474621685, "grad_norm": 0.965153066733068, "learning_rate": 4.9933770906443295e-06, "loss": 0.5601, "step": 2381 }, { "epoch": 0.14505374052309472, "grad_norm": 1.0029131845115766, "learning_rate": 4.993371284916878e-06, "loss": 0.551, "step": 2382 }, { "epoch": 0.1451146362999726, "grad_norm": 1.1688783911050467, "learning_rate": 4.993365476649232e-06, "loss": 0.4796, "step": 2383 }, { "epoch": 0.14517553207685047, "grad_norm": 1.0807386249572402, "learning_rate": 4.993359665841398e-06, "loss": 0.4784, "step": 2384 }, { "epoch": 0.14523642785372834, "grad_norm": 0.9498667898345313, "learning_rate": 4.993353852493379e-06, "loss": 0.5675, "step": 2385 }, { "epoch": 0.1452973236306062, "grad_norm": 1.0938592044627706, "learning_rate": 4.993348036605183e-06, "loss": 0.4748, "step": 2386 }, { "epoch": 0.14535821940748408, "grad_norm": 0.9500347280712079, "learning_rate": 4.993342218176818e-06, "loss": 0.5606, "step": 2387 }, { "epoch": 0.14541911518436196, "grad_norm": 1.0485830315338138, "learning_rate": 4.993336397208286e-06, "loss": 0.556, "step": 2388 }, { "epoch": 0.14548001096123983, "grad_norm": 1.2524629065982562, "learning_rate": 4.993330573699595e-06, "loss": 0.4422, "step": 2389 }, { "epoch": 0.1455409067381177, "grad_norm": 0.946924728933562, "learning_rate": 4.993324747650751e-06, "loss": 0.5646, "step": 2390 }, { "epoch": 0.14560180251499558, "grad_norm": 1.0359521592992935, "learning_rate": 4.9933189190617595e-06, "loss": 0.5045, "step": 2391 }, { "epoch": 0.14566269829187345, "grad_norm": 1.0053404541657602, "learning_rate": 4.993313087932626e-06, "loss": 0.5589, "step": 2392 }, { "epoch": 0.14572359406875132, "grad_norm": 1.0856856816627114, "learning_rate": 4.993307254263357e-06, "loss": 0.4947, "step": 2393 }, { "epoch": 0.1457844898456292, "grad_norm": 1.0863461269274963, "learning_rate": 4.993301418053959e-06, "loss": 0.5482, "step": 2394 }, { "epoch": 0.14584538562250707, "grad_norm": 1.0476786463422072, "learning_rate": 4.993295579304438e-06, "loss": 0.56, "step": 2395 }, { "epoch": 0.14590628139938494, "grad_norm": 1.0880962393390015, "learning_rate": 4.9932897380147985e-06, "loss": 0.4675, "step": 2396 }, { "epoch": 0.1459671771762628, "grad_norm": 1.0584923937312651, "learning_rate": 4.993283894185047e-06, "loss": 0.5128, "step": 2397 }, { "epoch": 0.1460280729531407, "grad_norm": 1.120969329324478, "learning_rate": 4.99327804781519e-06, "loss": 0.5451, "step": 2398 }, { "epoch": 0.1460889687300186, "grad_norm": 1.093557523170927, "learning_rate": 4.993272198905233e-06, "loss": 0.5303, "step": 2399 }, { "epoch": 0.14614986450689646, "grad_norm": 1.0502425948044853, "learning_rate": 4.993266347455182e-06, "loss": 0.4937, "step": 2400 }, { "epoch": 0.14621076028377433, "grad_norm": 0.9702904871939518, "learning_rate": 4.993260493465044e-06, "loss": 0.5907, "step": 2401 }, { "epoch": 0.1462716560606522, "grad_norm": 1.0686083918491232, "learning_rate": 4.993254636934823e-06, "loss": 0.5165, "step": 2402 }, { "epoch": 0.14633255183753008, "grad_norm": 0.9680413528702653, "learning_rate": 4.993248777864526e-06, "loss": 0.5622, "step": 2403 }, { "epoch": 0.14639344761440795, "grad_norm": 1.0304545227048931, "learning_rate": 4.993242916254159e-06, "loss": 0.5713, "step": 2404 }, { "epoch": 0.14645434339128582, "grad_norm": 1.043803153308817, "learning_rate": 4.9932370521037286e-06, "loss": 0.4982, "step": 2405 }, { "epoch": 0.1465152391681637, "grad_norm": 1.069432707114138, "learning_rate": 4.99323118541324e-06, "loss": 0.5528, "step": 2406 }, { "epoch": 0.14657613494504157, "grad_norm": 0.982113011366172, "learning_rate": 4.9932253161827e-06, "loss": 0.5744, "step": 2407 }, { "epoch": 0.14663703072191944, "grad_norm": 0.948015472300026, "learning_rate": 4.993219444412112e-06, "loss": 0.5459, "step": 2408 }, { "epoch": 0.14669792649879732, "grad_norm": 0.9959103962280902, "learning_rate": 4.993213570101485e-06, "loss": 0.5335, "step": 2409 }, { "epoch": 0.1467588222756752, "grad_norm": 1.0231955862413369, "learning_rate": 4.993207693250824e-06, "loss": 0.5685, "step": 2410 }, { "epoch": 0.14681971805255306, "grad_norm": 1.066536718206446, "learning_rate": 4.9932018138601344e-06, "loss": 0.4669, "step": 2411 }, { "epoch": 0.14688061382943093, "grad_norm": 1.053354254607229, "learning_rate": 4.993195931929423e-06, "loss": 0.5587, "step": 2412 }, { "epoch": 0.1469415096063088, "grad_norm": 1.0390917809993145, "learning_rate": 4.9931900474586945e-06, "loss": 0.4731, "step": 2413 }, { "epoch": 0.14700240538318668, "grad_norm": 1.1223803730347472, "learning_rate": 4.993184160447957e-06, "loss": 0.5536, "step": 2414 }, { "epoch": 0.14706330116006455, "grad_norm": 1.0879923005157721, "learning_rate": 4.993178270897215e-06, "loss": 0.5348, "step": 2415 }, { "epoch": 0.14712419693694243, "grad_norm": 1.0793932599669671, "learning_rate": 4.993172378806475e-06, "loss": 0.5612, "step": 2416 }, { "epoch": 0.1471850927138203, "grad_norm": 1.233875916016436, "learning_rate": 4.993166484175742e-06, "loss": 0.4978, "step": 2417 }, { "epoch": 0.14724598849069817, "grad_norm": 1.0625996387109087, "learning_rate": 4.993160587005023e-06, "loss": 0.5088, "step": 2418 }, { "epoch": 0.14730688426757604, "grad_norm": 1.0559561954545595, "learning_rate": 4.993154687294324e-06, "loss": 0.5775, "step": 2419 }, { "epoch": 0.14736778004445392, "grad_norm": 1.037332131624886, "learning_rate": 4.993148785043651e-06, "loss": 0.4916, "step": 2420 }, { "epoch": 0.1474286758213318, "grad_norm": 1.0388196415843405, "learning_rate": 4.99314288025301e-06, "loss": 0.5671, "step": 2421 }, { "epoch": 0.14748957159820966, "grad_norm": 1.0463771197268767, "learning_rate": 4.9931369729224066e-06, "loss": 0.518, "step": 2422 }, { "epoch": 0.14755046737508754, "grad_norm": 1.1414548943852438, "learning_rate": 4.993131063051847e-06, "loss": 0.5487, "step": 2423 }, { "epoch": 0.1476113631519654, "grad_norm": 1.0671214966360865, "learning_rate": 4.993125150641338e-06, "loss": 0.5166, "step": 2424 }, { "epoch": 0.14767225892884328, "grad_norm": 1.0897371810895793, "learning_rate": 4.993119235690884e-06, "loss": 0.4933, "step": 2425 }, { "epoch": 0.14773315470572115, "grad_norm": 0.9775606709526539, "learning_rate": 4.993113318200492e-06, "loss": 0.5936, "step": 2426 }, { "epoch": 0.14779405048259903, "grad_norm": 1.069349619975326, "learning_rate": 4.9931073981701685e-06, "loss": 0.5661, "step": 2427 }, { "epoch": 0.1478549462594769, "grad_norm": 0.976201266616463, "learning_rate": 4.993101475599919e-06, "loss": 0.5139, "step": 2428 }, { "epoch": 0.14791584203635477, "grad_norm": 0.9527347441128732, "learning_rate": 4.993095550489749e-06, "loss": 0.5806, "step": 2429 }, { "epoch": 0.14797673781323264, "grad_norm": 1.0900130031159043, "learning_rate": 4.993089622839667e-06, "loss": 0.5095, "step": 2430 }, { "epoch": 0.14803763359011052, "grad_norm": 1.134999491442151, "learning_rate": 4.9930836926496745e-06, "loss": 0.5313, "step": 2431 }, { "epoch": 0.1480985293669884, "grad_norm": 1.1213965300225313, "learning_rate": 4.993077759919781e-06, "loss": 0.5014, "step": 2432 }, { "epoch": 0.14815942514386626, "grad_norm": 1.0267060867109226, "learning_rate": 4.993071824649992e-06, "loss": 0.4983, "step": 2433 }, { "epoch": 0.14822032092074414, "grad_norm": 1.035208642743681, "learning_rate": 4.993065886840314e-06, "loss": 0.5816, "step": 2434 }, { "epoch": 0.148281216697622, "grad_norm": 1.1352572574375, "learning_rate": 4.993059946490751e-06, "loss": 0.5051, "step": 2435 }, { "epoch": 0.14834211247449988, "grad_norm": 1.0674972479211104, "learning_rate": 4.993054003601311e-06, "loss": 0.4849, "step": 2436 }, { "epoch": 0.14840300825137775, "grad_norm": 1.01487254661745, "learning_rate": 4.993048058171999e-06, "loss": 0.5528, "step": 2437 }, { "epoch": 0.14846390402825563, "grad_norm": 1.0361541003061985, "learning_rate": 4.993042110202822e-06, "loss": 0.5568, "step": 2438 }, { "epoch": 0.1485247998051335, "grad_norm": 1.0328325003157501, "learning_rate": 4.993036159693785e-06, "loss": 0.6495, "step": 2439 }, { "epoch": 0.1485856955820114, "grad_norm": 1.0453640868543714, "learning_rate": 4.993030206644895e-06, "loss": 0.5215, "step": 2440 }, { "epoch": 0.14864659135888927, "grad_norm": 1.0563451216546569, "learning_rate": 4.993024251056158e-06, "loss": 0.5155, "step": 2441 }, { "epoch": 0.14870748713576715, "grad_norm": 1.0237300662612119, "learning_rate": 4.993018292927579e-06, "loss": 0.5449, "step": 2442 }, { "epoch": 0.14876838291264502, "grad_norm": 1.0714902216455926, "learning_rate": 4.993012332259165e-06, "loss": 0.5511, "step": 2443 }, { "epoch": 0.1488292786895229, "grad_norm": 1.025165719635034, "learning_rate": 4.993006369050922e-06, "loss": 0.497, "step": 2444 }, { "epoch": 0.14889017446640077, "grad_norm": 1.032877692600213, "learning_rate": 4.993000403302856e-06, "loss": 0.544, "step": 2445 }, { "epoch": 0.14895107024327864, "grad_norm": 0.9990060427900036, "learning_rate": 4.992994435014973e-06, "loss": 0.488, "step": 2446 }, { "epoch": 0.1490119660201565, "grad_norm": 1.1166430786955657, "learning_rate": 4.99298846418728e-06, "loss": 0.5158, "step": 2447 }, { "epoch": 0.14907286179703438, "grad_norm": 1.048825958699336, "learning_rate": 4.99298249081978e-06, "loss": 0.4955, "step": 2448 }, { "epoch": 0.14913375757391226, "grad_norm": 0.9883260834324052, "learning_rate": 4.992976514912483e-06, "loss": 0.578, "step": 2449 }, { "epoch": 0.14919465335079013, "grad_norm": 1.1434749054841808, "learning_rate": 4.992970536465392e-06, "loss": 0.4986, "step": 2450 }, { "epoch": 0.149255549127668, "grad_norm": 1.1316201072352132, "learning_rate": 4.992964555478516e-06, "loss": 0.6303, "step": 2451 }, { "epoch": 0.14931644490454588, "grad_norm": 1.0920252308786944, "learning_rate": 4.992958571951858e-06, "loss": 0.5403, "step": 2452 }, { "epoch": 0.14937734068142375, "grad_norm": 1.0756526535815185, "learning_rate": 4.992952585885427e-06, "loss": 0.5435, "step": 2453 }, { "epoch": 0.14943823645830162, "grad_norm": 1.0269014010008646, "learning_rate": 4.992946597279227e-06, "loss": 0.5172, "step": 2454 }, { "epoch": 0.1494991322351795, "grad_norm": 1.0660844157313074, "learning_rate": 4.992940606133266e-06, "loss": 0.5746, "step": 2455 }, { "epoch": 0.14956002801205737, "grad_norm": 0.9649131449951128, "learning_rate": 4.9929346124475475e-06, "loss": 0.5877, "step": 2456 }, { "epoch": 0.14962092378893524, "grad_norm": 0.9901239902888693, "learning_rate": 4.992928616222079e-06, "loss": 0.4975, "step": 2457 }, { "epoch": 0.1496818195658131, "grad_norm": 1.1352618744043448, "learning_rate": 4.992922617456867e-06, "loss": 0.528, "step": 2458 }, { "epoch": 0.14974271534269099, "grad_norm": 1.1262653441503034, "learning_rate": 4.992916616151918e-06, "loss": 0.5321, "step": 2459 }, { "epoch": 0.14980361111956886, "grad_norm": 0.9878255365605337, "learning_rate": 4.992910612307237e-06, "loss": 0.5467, "step": 2460 }, { "epoch": 0.14986450689644673, "grad_norm": 1.00583727113917, "learning_rate": 4.99290460592283e-06, "loss": 0.5541, "step": 2461 }, { "epoch": 0.1499254026733246, "grad_norm": 1.0672102719106524, "learning_rate": 4.992898596998704e-06, "loss": 0.5191, "step": 2462 }, { "epoch": 0.14998629845020248, "grad_norm": 1.1333136712313876, "learning_rate": 4.992892585534864e-06, "loss": 0.4772, "step": 2463 }, { "epoch": 0.15004719422708035, "grad_norm": 1.0320423999642852, "learning_rate": 4.992886571531318e-06, "loss": 0.5101, "step": 2464 }, { "epoch": 0.15010809000395822, "grad_norm": 1.032037320200048, "learning_rate": 4.992880554988071e-06, "loss": 0.4269, "step": 2465 }, { "epoch": 0.1501689857808361, "grad_norm": 1.0667553558262541, "learning_rate": 4.992874535905128e-06, "loss": 0.577, "step": 2466 }, { "epoch": 0.15022988155771397, "grad_norm": 1.092012637997363, "learning_rate": 4.992868514282497e-06, "loss": 0.5242, "step": 2467 }, { "epoch": 0.15029077733459184, "grad_norm": 1.0172208349242822, "learning_rate": 4.992862490120183e-06, "loss": 0.5025, "step": 2468 }, { "epoch": 0.1503516731114697, "grad_norm": 1.093021729441943, "learning_rate": 4.992856463418193e-06, "loss": 0.502, "step": 2469 }, { "epoch": 0.1504125688883476, "grad_norm": 0.9618819281751945, "learning_rate": 4.992850434176532e-06, "loss": 0.5306, "step": 2470 }, { "epoch": 0.15047346466522546, "grad_norm": 1.106887959426957, "learning_rate": 4.992844402395208e-06, "loss": 0.5163, "step": 2471 }, { "epoch": 0.15053436044210333, "grad_norm": 0.982383622057061, "learning_rate": 4.992838368074226e-06, "loss": 0.5624, "step": 2472 }, { "epoch": 0.1505952562189812, "grad_norm": 1.0403102931261816, "learning_rate": 4.992832331213591e-06, "loss": 0.5651, "step": 2473 }, { "epoch": 0.15065615199585908, "grad_norm": 1.0402060574357985, "learning_rate": 4.99282629181331e-06, "loss": 0.5435, "step": 2474 }, { "epoch": 0.15071704777273695, "grad_norm": 1.110388612043396, "learning_rate": 4.99282024987339e-06, "loss": 0.5307, "step": 2475 }, { "epoch": 0.15077794354961482, "grad_norm": 1.121151907849297, "learning_rate": 4.992814205393837e-06, "loss": 0.508, "step": 2476 }, { "epoch": 0.1508388393264927, "grad_norm": 1.0728818738651096, "learning_rate": 4.992808158374655e-06, "loss": 0.4763, "step": 2477 }, { "epoch": 0.15089973510337057, "grad_norm": 1.0938761867664655, "learning_rate": 4.992802108815854e-06, "loss": 0.6062, "step": 2478 }, { "epoch": 0.15096063088024844, "grad_norm": 1.0590041496222227, "learning_rate": 4.992796056717437e-06, "loss": 0.5764, "step": 2479 }, { "epoch": 0.15102152665712631, "grad_norm": 1.044602030475801, "learning_rate": 4.992790002079411e-06, "loss": 0.5007, "step": 2480 }, { "epoch": 0.15108242243400422, "grad_norm": 1.0178898155374587, "learning_rate": 4.9927839449017835e-06, "loss": 0.5993, "step": 2481 }, { "epoch": 0.1511433182108821, "grad_norm": 0.9751973489666135, "learning_rate": 4.992777885184558e-06, "loss": 0.4827, "step": 2482 }, { "epoch": 0.15120421398775996, "grad_norm": 1.1577146032361698, "learning_rate": 4.992771822927744e-06, "loss": 0.4997, "step": 2483 }, { "epoch": 0.15126510976463783, "grad_norm": 1.0432023249853397, "learning_rate": 4.992765758131344e-06, "loss": 0.5197, "step": 2484 }, { "epoch": 0.1513260055415157, "grad_norm": 0.9735048674997951, "learning_rate": 4.9927596907953664e-06, "loss": 0.5356, "step": 2485 }, { "epoch": 0.15138690131839358, "grad_norm": 1.0681464365765767, "learning_rate": 4.992753620919818e-06, "loss": 0.546, "step": 2486 }, { "epoch": 0.15144779709527145, "grad_norm": 1.098385213390006, "learning_rate": 4.9927475485047035e-06, "loss": 0.526, "step": 2487 }, { "epoch": 0.15150869287214933, "grad_norm": 1.1280583360659087, "learning_rate": 4.99274147355003e-06, "loss": 0.4457, "step": 2488 }, { "epoch": 0.1515695886490272, "grad_norm": 1.1694876693934355, "learning_rate": 4.992735396055803e-06, "loss": 0.492, "step": 2489 }, { "epoch": 0.15163048442590507, "grad_norm": 1.1004920767149702, "learning_rate": 4.992729316022029e-06, "loss": 0.4974, "step": 2490 }, { "epoch": 0.15169138020278294, "grad_norm": 1.064046406627593, "learning_rate": 4.992723233448714e-06, "loss": 0.5268, "step": 2491 }, { "epoch": 0.15175227597966082, "grad_norm": 1.112763666876491, "learning_rate": 4.992717148335865e-06, "loss": 0.5091, "step": 2492 }, { "epoch": 0.1518131717565387, "grad_norm": 0.9864092858245589, "learning_rate": 4.992711060683487e-06, "loss": 0.5555, "step": 2493 }, { "epoch": 0.15187406753341656, "grad_norm": 1.0287161857962703, "learning_rate": 4.992704970491588e-06, "loss": 0.4775, "step": 2494 }, { "epoch": 0.15193496331029444, "grad_norm": 1.0480102106223532, "learning_rate": 4.992698877760171e-06, "loss": 0.5175, "step": 2495 }, { "epoch": 0.1519958590871723, "grad_norm": 1.1190219083113706, "learning_rate": 4.992692782489245e-06, "loss": 0.5222, "step": 2496 }, { "epoch": 0.15205675486405018, "grad_norm": 1.2428700734218403, "learning_rate": 4.9926866846788165e-06, "loss": 0.5114, "step": 2497 }, { "epoch": 0.15211765064092805, "grad_norm": 1.0423202615012537, "learning_rate": 4.99268058432889e-06, "loss": 0.5581, "step": 2498 }, { "epoch": 0.15217854641780593, "grad_norm": 1.0721449482053398, "learning_rate": 4.992674481439473e-06, "loss": 0.5532, "step": 2499 }, { "epoch": 0.1522394421946838, "grad_norm": 1.1262636290107442, "learning_rate": 4.992668376010569e-06, "loss": 0.5315, "step": 2500 }, { "epoch": 0.15230033797156167, "grad_norm": 1.0187856319459214, "learning_rate": 4.992662268042188e-06, "loss": 0.5223, "step": 2501 }, { "epoch": 0.15236123374843955, "grad_norm": 1.0719507550614693, "learning_rate": 4.9926561575343335e-06, "loss": 0.5792, "step": 2502 }, { "epoch": 0.15242212952531742, "grad_norm": 0.9881962079177027, "learning_rate": 4.9926500444870136e-06, "loss": 0.5378, "step": 2503 }, { "epoch": 0.1524830253021953, "grad_norm": 1.0625258216472449, "learning_rate": 4.992643928900233e-06, "loss": 0.544, "step": 2504 }, { "epoch": 0.15254392107907316, "grad_norm": 1.0548949572569595, "learning_rate": 4.992637810773999e-06, "loss": 0.5318, "step": 2505 }, { "epoch": 0.15260481685595104, "grad_norm": 1.0811896301842863, "learning_rate": 4.9926316901083175e-06, "loss": 0.5338, "step": 2506 }, { "epoch": 0.1526657126328289, "grad_norm": 0.9561110191823566, "learning_rate": 4.992625566903194e-06, "loss": 0.5471, "step": 2507 }, { "epoch": 0.15272660840970678, "grad_norm": 1.0155954245438423, "learning_rate": 4.992619441158636e-06, "loss": 0.4894, "step": 2508 }, { "epoch": 0.15278750418658466, "grad_norm": 1.0659160276622512, "learning_rate": 4.992613312874649e-06, "loss": 0.4838, "step": 2509 }, { "epoch": 0.15284839996346253, "grad_norm": 1.0779526819167673, "learning_rate": 4.99260718205124e-06, "loss": 0.5091, "step": 2510 }, { "epoch": 0.1529092957403404, "grad_norm": 1.082354512442352, "learning_rate": 4.992601048688413e-06, "loss": 0.4948, "step": 2511 }, { "epoch": 0.15297019151721827, "grad_norm": 0.9896906575972525, "learning_rate": 4.9925949127861775e-06, "loss": 0.4989, "step": 2512 }, { "epoch": 0.15303108729409615, "grad_norm": 1.1777369848685637, "learning_rate": 4.992588774344538e-06, "loss": 0.518, "step": 2513 }, { "epoch": 0.15309198307097402, "grad_norm": 1.0860356325719565, "learning_rate": 4.992582633363499e-06, "loss": 0.549, "step": 2514 }, { "epoch": 0.1531528788478519, "grad_norm": 0.9436880425522627, "learning_rate": 4.9925764898430705e-06, "loss": 0.523, "step": 2515 }, { "epoch": 0.15321377462472976, "grad_norm": 1.0766070975103197, "learning_rate": 4.992570343783256e-06, "loss": 0.5447, "step": 2516 }, { "epoch": 0.15327467040160764, "grad_norm": 1.1094380687022705, "learning_rate": 4.992564195184065e-06, "loss": 0.5535, "step": 2517 }, { "epoch": 0.1533355661784855, "grad_norm": 1.0195847645821856, "learning_rate": 4.9925580440454985e-06, "loss": 0.6323, "step": 2518 }, { "epoch": 0.15339646195536338, "grad_norm": 1.0541380315256943, "learning_rate": 4.992551890367567e-06, "loss": 0.49, "step": 2519 }, { "epoch": 0.15345735773224126, "grad_norm": 1.0225681251169028, "learning_rate": 4.992545734150275e-06, "loss": 0.4918, "step": 2520 }, { "epoch": 0.15351825350911913, "grad_norm": 1.0734081397454152, "learning_rate": 4.992539575393629e-06, "loss": 0.5726, "step": 2521 }, { "epoch": 0.15357914928599703, "grad_norm": 1.0044330366259513, "learning_rate": 4.992533414097635e-06, "loss": 0.4881, "step": 2522 }, { "epoch": 0.1536400450628749, "grad_norm": 1.1630243081646932, "learning_rate": 4.992527250262301e-06, "loss": 0.4608, "step": 2523 }, { "epoch": 0.15370094083975278, "grad_norm": 0.9261010707108193, "learning_rate": 4.9925210838876325e-06, "loss": 0.5648, "step": 2524 }, { "epoch": 0.15376183661663065, "grad_norm": 0.9627797408671431, "learning_rate": 4.9925149149736345e-06, "loss": 0.5485, "step": 2525 }, { "epoch": 0.15382273239350852, "grad_norm": 0.9793334308891601, "learning_rate": 4.992508743520314e-06, "loss": 0.5171, "step": 2526 }, { "epoch": 0.1538836281703864, "grad_norm": 1.0852135160491876, "learning_rate": 4.992502569527677e-06, "loss": 0.4797, "step": 2527 }, { "epoch": 0.15394452394726427, "grad_norm": 1.019968434033996, "learning_rate": 4.992496392995732e-06, "loss": 0.5475, "step": 2528 }, { "epoch": 0.15400541972414214, "grad_norm": 1.0800289435556996, "learning_rate": 4.9924902139244814e-06, "loss": 0.5104, "step": 2529 }, { "epoch": 0.15406631550102, "grad_norm": 1.0117880243081294, "learning_rate": 4.992484032313935e-06, "loss": 0.5786, "step": 2530 }, { "epoch": 0.15412721127789789, "grad_norm": 1.035741081241186, "learning_rate": 4.992477848164097e-06, "loss": 0.4681, "step": 2531 }, { "epoch": 0.15418810705477576, "grad_norm": 1.0088924610778338, "learning_rate": 4.992471661474975e-06, "loss": 0.5138, "step": 2532 }, { "epoch": 0.15424900283165363, "grad_norm": 1.075970013128953, "learning_rate": 4.992465472246574e-06, "loss": 0.5028, "step": 2533 }, { "epoch": 0.1543098986085315, "grad_norm": 1.003491021549956, "learning_rate": 4.9924592804789e-06, "loss": 0.529, "step": 2534 }, { "epoch": 0.15437079438540938, "grad_norm": 1.0370871050083033, "learning_rate": 4.9924530861719624e-06, "loss": 0.5118, "step": 2535 }, { "epoch": 0.15443169016228725, "grad_norm": 1.07777999667459, "learning_rate": 4.992446889325765e-06, "loss": 0.5183, "step": 2536 }, { "epoch": 0.15449258593916512, "grad_norm": 1.1193204467280498, "learning_rate": 4.992440689940314e-06, "loss": 0.5878, "step": 2537 }, { "epoch": 0.154553481716043, "grad_norm": 1.1060728951722227, "learning_rate": 4.992434488015616e-06, "loss": 0.4957, "step": 2538 }, { "epoch": 0.15461437749292087, "grad_norm": 1.0758463460792207, "learning_rate": 4.992428283551678e-06, "loss": 0.5201, "step": 2539 }, { "epoch": 0.15467527326979874, "grad_norm": 1.1081847140291947, "learning_rate": 4.992422076548505e-06, "loss": 0.5101, "step": 2540 }, { "epoch": 0.1547361690466766, "grad_norm": 1.1760266407722622, "learning_rate": 4.992415867006105e-06, "loss": 0.4355, "step": 2541 }, { "epoch": 0.1547970648235545, "grad_norm": 1.0871465585571198, "learning_rate": 4.9924096549244834e-06, "loss": 0.472, "step": 2542 }, { "epoch": 0.15485796060043236, "grad_norm": 1.0648975519323127, "learning_rate": 4.992403440303647e-06, "loss": 0.5403, "step": 2543 }, { "epoch": 0.15491885637731023, "grad_norm": 1.1122335985660812, "learning_rate": 4.992397223143602e-06, "loss": 0.592, "step": 2544 }, { "epoch": 0.1549797521541881, "grad_norm": 1.0237533690228025, "learning_rate": 4.992391003444354e-06, "loss": 0.5106, "step": 2545 }, { "epoch": 0.15504064793106598, "grad_norm": 0.9556372737748862, "learning_rate": 4.99238478120591e-06, "loss": 0.5168, "step": 2546 }, { "epoch": 0.15510154370794385, "grad_norm": 1.0952897003466284, "learning_rate": 4.992378556428276e-06, "loss": 0.4525, "step": 2547 }, { "epoch": 0.15516243948482172, "grad_norm": 0.9950723457331299, "learning_rate": 4.992372329111459e-06, "loss": 0.5269, "step": 2548 }, { "epoch": 0.1552233352616996, "grad_norm": 0.9811003921136067, "learning_rate": 4.992366099255465e-06, "loss": 0.5419, "step": 2549 }, { "epoch": 0.15528423103857747, "grad_norm": 1.0121596152535999, "learning_rate": 4.992359866860299e-06, "loss": 0.5193, "step": 2550 }, { "epoch": 0.15534512681545534, "grad_norm": 1.0577777293560058, "learning_rate": 4.9923536319259695e-06, "loss": 0.5061, "step": 2551 }, { "epoch": 0.15540602259233322, "grad_norm": 1.0455974920039641, "learning_rate": 4.992347394452482e-06, "loss": 0.5552, "step": 2552 }, { "epoch": 0.1554669183692111, "grad_norm": 1.0854764014922278, "learning_rate": 4.992341154439843e-06, "loss": 0.5231, "step": 2553 }, { "epoch": 0.15552781414608896, "grad_norm": 1.0276154251905032, "learning_rate": 4.992334911888058e-06, "loss": 0.571, "step": 2554 }, { "epoch": 0.15558870992296683, "grad_norm": 0.9769381279151127, "learning_rate": 4.9923286667971334e-06, "loss": 0.5758, "step": 2555 }, { "epoch": 0.1556496056998447, "grad_norm": 1.156751611992498, "learning_rate": 4.992322419167079e-06, "loss": 0.53, "step": 2556 }, { "epoch": 0.15571050147672258, "grad_norm": 1.092555771697481, "learning_rate": 4.992316168997895e-06, "loss": 0.5034, "step": 2557 }, { "epoch": 0.15577139725360045, "grad_norm": 1.143547368836882, "learning_rate": 4.9923099162895925e-06, "loss": 0.4808, "step": 2558 }, { "epoch": 0.15583229303047832, "grad_norm": 1.0597575892014566, "learning_rate": 4.992303661042177e-06, "loss": 0.5999, "step": 2559 }, { "epoch": 0.1558931888073562, "grad_norm": 1.0378157083822288, "learning_rate": 4.992297403255654e-06, "loss": 0.5859, "step": 2560 }, { "epoch": 0.15595408458423407, "grad_norm": 1.0175998278745078, "learning_rate": 4.992291142930029e-06, "loss": 0.4497, "step": 2561 }, { "epoch": 0.15601498036111194, "grad_norm": 1.0020800974279973, "learning_rate": 4.992284880065311e-06, "loss": 0.5417, "step": 2562 }, { "epoch": 0.15607587613798984, "grad_norm": 1.194784201679778, "learning_rate": 4.992278614661505e-06, "loss": 0.4644, "step": 2563 }, { "epoch": 0.15613677191486772, "grad_norm": 1.044937883008089, "learning_rate": 4.992272346718617e-06, "loss": 0.4942, "step": 2564 }, { "epoch": 0.1561976676917456, "grad_norm": 0.9794253186809694, "learning_rate": 4.992266076236654e-06, "loss": 0.512, "step": 2565 }, { "epoch": 0.15625856346862346, "grad_norm": 1.1301048881048834, "learning_rate": 4.992259803215622e-06, "loss": 0.446, "step": 2566 }, { "epoch": 0.15631945924550134, "grad_norm": 1.0321581279944596, "learning_rate": 4.992253527655527e-06, "loss": 0.5349, "step": 2567 }, { "epoch": 0.1563803550223792, "grad_norm": 1.0179270310552901, "learning_rate": 4.992247249556376e-06, "loss": 0.5195, "step": 2568 }, { "epoch": 0.15644125079925708, "grad_norm": 1.037448782230392, "learning_rate": 4.992240968918176e-06, "loss": 0.5319, "step": 2569 }, { "epoch": 0.15650214657613495, "grad_norm": 1.0175463316019242, "learning_rate": 4.992234685740932e-06, "loss": 0.4827, "step": 2570 }, { "epoch": 0.15656304235301283, "grad_norm": 1.0843764158575289, "learning_rate": 4.992228400024652e-06, "loss": 0.5462, "step": 2571 }, { "epoch": 0.1566239381298907, "grad_norm": 1.086288297873897, "learning_rate": 4.992222111769341e-06, "loss": 0.5191, "step": 2572 }, { "epoch": 0.15668483390676857, "grad_norm": 1.0608852966894236, "learning_rate": 4.992215820975006e-06, "loss": 0.5303, "step": 2573 }, { "epoch": 0.15674572968364645, "grad_norm": 1.0698965139131487, "learning_rate": 4.992209527641653e-06, "loss": 0.5155, "step": 2574 }, { "epoch": 0.15680662546052432, "grad_norm": 1.1183478110279494, "learning_rate": 4.9922032317692895e-06, "loss": 0.4934, "step": 2575 }, { "epoch": 0.1568675212374022, "grad_norm": 1.2092922347684538, "learning_rate": 4.99219693335792e-06, "loss": 0.509, "step": 2576 }, { "epoch": 0.15692841701428006, "grad_norm": 0.9800039830089251, "learning_rate": 4.9921906324075534e-06, "loss": 0.5545, "step": 2577 }, { "epoch": 0.15698931279115794, "grad_norm": 1.0242641952557892, "learning_rate": 4.992184328918194e-06, "loss": 0.5466, "step": 2578 }, { "epoch": 0.1570502085680358, "grad_norm": 1.055300739074551, "learning_rate": 4.992178022889848e-06, "loss": 0.5713, "step": 2579 }, { "epoch": 0.15711110434491368, "grad_norm": 1.0613056623557402, "learning_rate": 4.992171714322525e-06, "loss": 0.5003, "step": 2580 }, { "epoch": 0.15717200012179156, "grad_norm": 1.0257897312510744, "learning_rate": 4.992165403216228e-06, "loss": 0.5228, "step": 2581 }, { "epoch": 0.15723289589866943, "grad_norm": 1.0001915131256194, "learning_rate": 4.992159089570965e-06, "loss": 0.577, "step": 2582 }, { "epoch": 0.1572937916755473, "grad_norm": 1.096205794355187, "learning_rate": 4.9921527733867425e-06, "loss": 0.4924, "step": 2583 }, { "epoch": 0.15735468745242517, "grad_norm": 1.080066664054663, "learning_rate": 4.992146454663566e-06, "loss": 0.4794, "step": 2584 }, { "epoch": 0.15741558322930305, "grad_norm": 1.1292952672317547, "learning_rate": 4.992140133401443e-06, "loss": 0.4863, "step": 2585 }, { "epoch": 0.15747647900618092, "grad_norm": 1.0986695029289961, "learning_rate": 4.992133809600379e-06, "loss": 0.5738, "step": 2586 }, { "epoch": 0.1575373747830588, "grad_norm": 1.0944942941022282, "learning_rate": 4.99212748326038e-06, "loss": 0.5787, "step": 2587 }, { "epoch": 0.15759827055993667, "grad_norm": 1.0641372579759265, "learning_rate": 4.992121154381455e-06, "loss": 0.5263, "step": 2588 }, { "epoch": 0.15765916633681454, "grad_norm": 0.9550840406261758, "learning_rate": 4.992114822963608e-06, "loss": 0.516, "step": 2589 }, { "epoch": 0.1577200621136924, "grad_norm": 1.0659288923517538, "learning_rate": 4.992108489006846e-06, "loss": 0.5546, "step": 2590 }, { "epoch": 0.15778095789057028, "grad_norm": 1.1501160140122448, "learning_rate": 4.992102152511177e-06, "loss": 0.489, "step": 2591 }, { "epoch": 0.15784185366744816, "grad_norm": 1.0688635884390105, "learning_rate": 4.992095813476605e-06, "loss": 0.4921, "step": 2592 }, { "epoch": 0.15790274944432603, "grad_norm": 1.0406540397436232, "learning_rate": 4.9920894719031375e-06, "loss": 0.5124, "step": 2593 }, { "epoch": 0.1579636452212039, "grad_norm": 1.0486013945885582, "learning_rate": 4.992083127790782e-06, "loss": 0.4943, "step": 2594 }, { "epoch": 0.15802454099808178, "grad_norm": 1.0642460409282146, "learning_rate": 4.992076781139543e-06, "loss": 0.478, "step": 2595 }, { "epoch": 0.15808543677495965, "grad_norm": 1.1038833547985802, "learning_rate": 4.992070431949428e-06, "loss": 0.5992, "step": 2596 }, { "epoch": 0.15814633255183752, "grad_norm": 1.115393599211892, "learning_rate": 4.992064080220444e-06, "loss": 0.5647, "step": 2597 }, { "epoch": 0.1582072283287154, "grad_norm": 1.0729702609368583, "learning_rate": 4.992057725952597e-06, "loss": 0.5108, "step": 2598 }, { "epoch": 0.15826812410559327, "grad_norm": 1.1564801116076202, "learning_rate": 4.992051369145893e-06, "loss": 0.4607, "step": 2599 }, { "epoch": 0.15832901988247114, "grad_norm": 0.9797085498389075, "learning_rate": 4.992045009800339e-06, "loss": 0.4746, "step": 2600 }, { "epoch": 0.158389915659349, "grad_norm": 1.0142529565680332, "learning_rate": 4.992038647915941e-06, "loss": 0.5507, "step": 2601 }, { "epoch": 0.15845081143622688, "grad_norm": 1.0435908544157484, "learning_rate": 4.9920322834927065e-06, "loss": 0.626, "step": 2602 }, { "epoch": 0.15851170721310476, "grad_norm": 1.0609425144520421, "learning_rate": 4.992025916530641e-06, "loss": 0.5403, "step": 2603 }, { "epoch": 0.15857260298998266, "grad_norm": 1.0179584115772757, "learning_rate": 4.992019547029752e-06, "loss": 0.4869, "step": 2604 }, { "epoch": 0.15863349876686053, "grad_norm": 1.07376537272316, "learning_rate": 4.992013174990044e-06, "loss": 0.4329, "step": 2605 }, { "epoch": 0.1586943945437384, "grad_norm": 1.1580166731546921, "learning_rate": 4.992006800411525e-06, "loss": 0.5151, "step": 2606 }, { "epoch": 0.15875529032061628, "grad_norm": 0.9580182490961687, "learning_rate": 4.992000423294202e-06, "loss": 0.488, "step": 2607 }, { "epoch": 0.15881618609749415, "grad_norm": 1.1388389370108145, "learning_rate": 4.991994043638081e-06, "loss": 0.5532, "step": 2608 }, { "epoch": 0.15887708187437202, "grad_norm": 1.0832401021536153, "learning_rate": 4.9919876614431675e-06, "loss": 0.5237, "step": 2609 }, { "epoch": 0.1589379776512499, "grad_norm": 1.1417662669536117, "learning_rate": 4.991981276709469e-06, "loss": 0.5537, "step": 2610 }, { "epoch": 0.15899887342812777, "grad_norm": 1.0143735305832462, "learning_rate": 4.991974889436992e-06, "loss": 0.5781, "step": 2611 }, { "epoch": 0.15905976920500564, "grad_norm": 0.9806430812630272, "learning_rate": 4.991968499625742e-06, "loss": 0.559, "step": 2612 }, { "epoch": 0.15912066498188351, "grad_norm": 1.0129678341655717, "learning_rate": 4.9919621072757275e-06, "loss": 0.5771, "step": 2613 }, { "epoch": 0.1591815607587614, "grad_norm": 1.0989977976917944, "learning_rate": 4.9919557123869535e-06, "loss": 0.4921, "step": 2614 }, { "epoch": 0.15924245653563926, "grad_norm": 1.0561139392386654, "learning_rate": 4.991949314959426e-06, "loss": 0.5279, "step": 2615 }, { "epoch": 0.15930335231251713, "grad_norm": 0.9937027481659902, "learning_rate": 4.991942914993153e-06, "loss": 0.5357, "step": 2616 }, { "epoch": 0.159364248089395, "grad_norm": 0.9175838540979713, "learning_rate": 4.99193651248814e-06, "loss": 0.5543, "step": 2617 }, { "epoch": 0.15942514386627288, "grad_norm": 0.9786015960772871, "learning_rate": 4.991930107444395e-06, "loss": 0.5977, "step": 2618 }, { "epoch": 0.15948603964315075, "grad_norm": 1.0409053725080188, "learning_rate": 4.991923699861922e-06, "loss": 0.4867, "step": 2619 }, { "epoch": 0.15954693542002862, "grad_norm": 1.0583552055811372, "learning_rate": 4.991917289740729e-06, "loss": 0.4602, "step": 2620 }, { "epoch": 0.1596078311969065, "grad_norm": 1.0059113840265925, "learning_rate": 4.991910877080823e-06, "loss": 0.5243, "step": 2621 }, { "epoch": 0.15966872697378437, "grad_norm": 1.104247303161446, "learning_rate": 4.991904461882211e-06, "loss": 0.4192, "step": 2622 }, { "epoch": 0.15972962275066224, "grad_norm": 0.9776447002319045, "learning_rate": 4.991898044144897e-06, "loss": 0.5986, "step": 2623 }, { "epoch": 0.15979051852754012, "grad_norm": 1.071039096499261, "learning_rate": 4.99189162386889e-06, "loss": 0.5095, "step": 2624 }, { "epoch": 0.159851414304418, "grad_norm": 1.0002016199462411, "learning_rate": 4.991885201054195e-06, "loss": 0.5201, "step": 2625 }, { "epoch": 0.15991231008129586, "grad_norm": 1.0598238200555685, "learning_rate": 4.991878775700819e-06, "loss": 0.4723, "step": 2626 }, { "epoch": 0.15997320585817373, "grad_norm": 1.158156052090699, "learning_rate": 4.991872347808769e-06, "loss": 0.4764, "step": 2627 }, { "epoch": 0.1600341016350516, "grad_norm": 1.0413874131349758, "learning_rate": 4.9918659173780515e-06, "loss": 0.5347, "step": 2628 }, { "epoch": 0.16009499741192948, "grad_norm": 1.1049384113744014, "learning_rate": 4.9918594844086735e-06, "loss": 0.5097, "step": 2629 }, { "epoch": 0.16015589318880735, "grad_norm": 1.09706410063075, "learning_rate": 4.9918530489006395e-06, "loss": 0.5121, "step": 2630 }, { "epoch": 0.16021678896568523, "grad_norm": 1.1291586566274292, "learning_rate": 4.991846610853958e-06, "loss": 0.5784, "step": 2631 }, { "epoch": 0.1602776847425631, "grad_norm": 1.16586466684923, "learning_rate": 4.991840170268635e-06, "loss": 0.4893, "step": 2632 }, { "epoch": 0.16033858051944097, "grad_norm": 1.0973811366370816, "learning_rate": 4.991833727144677e-06, "loss": 0.5394, "step": 2633 }, { "epoch": 0.16039947629631884, "grad_norm": 1.032383312029091, "learning_rate": 4.991827281482091e-06, "loss": 0.5878, "step": 2634 }, { "epoch": 0.16046037207319672, "grad_norm": 1.080675597695051, "learning_rate": 4.991820833280883e-06, "loss": 0.5113, "step": 2635 }, { "epoch": 0.1605212678500746, "grad_norm": 1.1392356782452133, "learning_rate": 4.991814382541059e-06, "loss": 0.4703, "step": 2636 }, { "epoch": 0.16058216362695246, "grad_norm": 0.9784544580120039, "learning_rate": 4.9918079292626265e-06, "loss": 0.5398, "step": 2637 }, { "epoch": 0.16064305940383033, "grad_norm": 0.9664645254747626, "learning_rate": 4.991801473445592e-06, "loss": 0.5316, "step": 2638 }, { "epoch": 0.1607039551807082, "grad_norm": 0.9553990089051348, "learning_rate": 4.991795015089963e-06, "loss": 0.5297, "step": 2639 }, { "epoch": 0.16076485095758608, "grad_norm": 0.9903211943796809, "learning_rate": 4.991788554195744e-06, "loss": 0.5369, "step": 2640 }, { "epoch": 0.16082574673446395, "grad_norm": 1.0177072794890971, "learning_rate": 4.991782090762942e-06, "loss": 0.5374, "step": 2641 }, { "epoch": 0.16088664251134183, "grad_norm": 1.070948674259093, "learning_rate": 4.991775624791566e-06, "loss": 0.4591, "step": 2642 }, { "epoch": 0.1609475382882197, "grad_norm": 1.0678968203983668, "learning_rate": 4.9917691562816195e-06, "loss": 0.5086, "step": 2643 }, { "epoch": 0.16100843406509757, "grad_norm": 1.0902573939981008, "learning_rate": 4.9917626852331105e-06, "loss": 0.5306, "step": 2644 }, { "epoch": 0.16106932984197547, "grad_norm": 1.0822644828031385, "learning_rate": 4.991756211646046e-06, "loss": 0.4734, "step": 2645 }, { "epoch": 0.16113022561885335, "grad_norm": 1.1442272447597848, "learning_rate": 4.991749735520432e-06, "loss": 0.5995, "step": 2646 }, { "epoch": 0.16119112139573122, "grad_norm": 1.095111873169538, "learning_rate": 4.991743256856275e-06, "loss": 0.4561, "step": 2647 }, { "epoch": 0.1612520171726091, "grad_norm": 1.0113587430921378, "learning_rate": 4.991736775653582e-06, "loss": 0.4836, "step": 2648 }, { "epoch": 0.16131291294948696, "grad_norm": 1.1362371809826601, "learning_rate": 4.9917302919123585e-06, "loss": 0.5068, "step": 2649 }, { "epoch": 0.16137380872636484, "grad_norm": 1.001333493239545, "learning_rate": 4.991723805632613e-06, "loss": 0.4921, "step": 2650 }, { "epoch": 0.1614347045032427, "grad_norm": 1.0075067423450494, "learning_rate": 4.991717316814351e-06, "loss": 0.6351, "step": 2651 }, { "epoch": 0.16149560028012058, "grad_norm": 0.9949803845467409, "learning_rate": 4.991710825457579e-06, "loss": 0.5615, "step": 2652 }, { "epoch": 0.16155649605699846, "grad_norm": 1.016143190886092, "learning_rate": 4.991704331562303e-06, "loss": 0.486, "step": 2653 }, { "epoch": 0.16161739183387633, "grad_norm": 1.0507781957598343, "learning_rate": 4.991697835128532e-06, "loss": 0.5596, "step": 2654 }, { "epoch": 0.1616782876107542, "grad_norm": 1.1246051315132928, "learning_rate": 4.99169133615627e-06, "loss": 0.5417, "step": 2655 }, { "epoch": 0.16173918338763207, "grad_norm": 0.975220389213433, "learning_rate": 4.9916848346455245e-06, "loss": 0.5278, "step": 2656 }, { "epoch": 0.16180007916450995, "grad_norm": 1.020738946283857, "learning_rate": 4.991678330596303e-06, "loss": 0.4641, "step": 2657 }, { "epoch": 0.16186097494138782, "grad_norm": 1.026848838857214, "learning_rate": 4.99167182400861e-06, "loss": 0.4897, "step": 2658 }, { "epoch": 0.1619218707182657, "grad_norm": 1.069819956889003, "learning_rate": 4.991665314882456e-06, "loss": 0.5108, "step": 2659 }, { "epoch": 0.16198276649514357, "grad_norm": 1.10203102893673, "learning_rate": 4.991658803217843e-06, "loss": 0.5457, "step": 2660 }, { "epoch": 0.16204366227202144, "grad_norm": 1.053136217907735, "learning_rate": 4.991652289014781e-06, "loss": 0.523, "step": 2661 }, { "epoch": 0.1621045580488993, "grad_norm": 1.1005359142487499, "learning_rate": 4.991645772273275e-06, "loss": 0.4982, "step": 2662 }, { "epoch": 0.16216545382577718, "grad_norm": 1.001219351232135, "learning_rate": 4.991639252993332e-06, "loss": 0.5354, "step": 2663 }, { "epoch": 0.16222634960265506, "grad_norm": 1.0469747778872975, "learning_rate": 4.991632731174959e-06, "loss": 0.4782, "step": 2664 }, { "epoch": 0.16228724537953293, "grad_norm": 1.0626903956918048, "learning_rate": 4.991626206818162e-06, "loss": 0.4994, "step": 2665 }, { "epoch": 0.1623481411564108, "grad_norm": 1.1378202368668038, "learning_rate": 4.991619679922949e-06, "loss": 0.4585, "step": 2666 }, { "epoch": 0.16240903693328868, "grad_norm": 1.0117829201861008, "learning_rate": 4.991613150489325e-06, "loss": 0.526, "step": 2667 }, { "epoch": 0.16246993271016655, "grad_norm": 0.9437106936049251, "learning_rate": 4.991606618517297e-06, "loss": 0.5238, "step": 2668 }, { "epoch": 0.16253082848704442, "grad_norm": 1.1378861394523276, "learning_rate": 4.9916000840068725e-06, "loss": 0.4531, "step": 2669 }, { "epoch": 0.1625917242639223, "grad_norm": 1.1063159083846632, "learning_rate": 4.991593546958057e-06, "loss": 0.5156, "step": 2670 }, { "epoch": 0.16265262004080017, "grad_norm": 1.070412221579265, "learning_rate": 4.991587007370859e-06, "loss": 0.5373, "step": 2671 }, { "epoch": 0.16271351581767804, "grad_norm": 1.1074392917141156, "learning_rate": 4.9915804652452825e-06, "loss": 0.4537, "step": 2672 }, { "epoch": 0.1627744115945559, "grad_norm": 1.0937286329765379, "learning_rate": 4.9915739205813365e-06, "loss": 0.5238, "step": 2673 }, { "epoch": 0.16283530737143379, "grad_norm": 1.0925508228445737, "learning_rate": 4.991567373379027e-06, "loss": 0.496, "step": 2674 }, { "epoch": 0.16289620314831166, "grad_norm": 1.171195203918714, "learning_rate": 4.991560823638359e-06, "loss": 0.4736, "step": 2675 }, { "epoch": 0.16295709892518953, "grad_norm": 0.9944713458758395, "learning_rate": 4.991554271359342e-06, "loss": 0.5093, "step": 2676 }, { "epoch": 0.1630179947020674, "grad_norm": 1.1216067779031424, "learning_rate": 4.991547716541981e-06, "loss": 0.5234, "step": 2677 }, { "epoch": 0.16307889047894528, "grad_norm": 1.0166782213015428, "learning_rate": 4.991541159186283e-06, "loss": 0.5297, "step": 2678 }, { "epoch": 0.16313978625582315, "grad_norm": 1.137016440528037, "learning_rate": 4.9915345992922546e-06, "loss": 0.5802, "step": 2679 }, { "epoch": 0.16320068203270102, "grad_norm": 1.0675341447706577, "learning_rate": 4.991528036859903e-06, "loss": 0.476, "step": 2680 }, { "epoch": 0.1632615778095789, "grad_norm": 1.0536629623157305, "learning_rate": 4.991521471889234e-06, "loss": 0.503, "step": 2681 }, { "epoch": 0.16332247358645677, "grad_norm": 1.0216418782973864, "learning_rate": 4.9915149043802545e-06, "loss": 0.5891, "step": 2682 }, { "epoch": 0.16338336936333464, "grad_norm": 1.1158581972319055, "learning_rate": 4.9915083343329715e-06, "loss": 0.4607, "step": 2683 }, { "epoch": 0.1634442651402125, "grad_norm": 1.0628550643495978, "learning_rate": 4.991501761747392e-06, "loss": 0.5983, "step": 2684 }, { "epoch": 0.1635051609170904, "grad_norm": 1.0823624322103942, "learning_rate": 4.991495186623522e-06, "loss": 0.5478, "step": 2685 }, { "epoch": 0.1635660566939683, "grad_norm": 1.1230224112520084, "learning_rate": 4.991488608961369e-06, "loss": 0.4847, "step": 2686 }, { "epoch": 0.16362695247084616, "grad_norm": 0.9725190169064424, "learning_rate": 4.991482028760939e-06, "loss": 0.524, "step": 2687 }, { "epoch": 0.16368784824772403, "grad_norm": 0.9794849590678537, "learning_rate": 4.9914754460222385e-06, "loss": 0.4904, "step": 2688 }, { "epoch": 0.1637487440246019, "grad_norm": 1.085887177295332, "learning_rate": 4.991468860745274e-06, "loss": 0.451, "step": 2689 }, { "epoch": 0.16380963980147978, "grad_norm": 1.035687472384461, "learning_rate": 4.991462272930054e-06, "loss": 0.4592, "step": 2690 }, { "epoch": 0.16387053557835765, "grad_norm": 1.0558468152685654, "learning_rate": 4.991455682576583e-06, "loss": 0.5, "step": 2691 }, { "epoch": 0.16393143135523552, "grad_norm": 1.2231556071201213, "learning_rate": 4.99144908968487e-06, "loss": 0.5429, "step": 2692 }, { "epoch": 0.1639923271321134, "grad_norm": 1.0538400161559247, "learning_rate": 4.99144249425492e-06, "loss": 0.4832, "step": 2693 }, { "epoch": 0.16405322290899127, "grad_norm": 1.037708148188799, "learning_rate": 4.99143589628674e-06, "loss": 0.4916, "step": 2694 }, { "epoch": 0.16411411868586914, "grad_norm": 1.0401599803622088, "learning_rate": 4.991429295780337e-06, "loss": 0.5156, "step": 2695 }, { "epoch": 0.16417501446274702, "grad_norm": 1.0572531972949, "learning_rate": 4.9914226927357175e-06, "loss": 0.5678, "step": 2696 }, { "epoch": 0.1642359102396249, "grad_norm": 1.129830016718747, "learning_rate": 4.991416087152889e-06, "loss": 0.427, "step": 2697 }, { "epoch": 0.16429680601650276, "grad_norm": 1.0854384814298255, "learning_rate": 4.9914094790318564e-06, "loss": 0.5101, "step": 2698 }, { "epoch": 0.16435770179338063, "grad_norm": 0.9655376317487521, "learning_rate": 4.991402868372629e-06, "loss": 0.4866, "step": 2699 }, { "epoch": 0.1644185975702585, "grad_norm": 1.0398463715881647, "learning_rate": 4.991396255175211e-06, "loss": 0.5389, "step": 2700 }, { "epoch": 0.16447949334713638, "grad_norm": 1.0748113071792644, "learning_rate": 4.991389639439611e-06, "loss": 0.5138, "step": 2701 }, { "epoch": 0.16454038912401425, "grad_norm": 0.9602775871160099, "learning_rate": 4.991383021165835e-06, "loss": 0.5027, "step": 2702 }, { "epoch": 0.16460128490089213, "grad_norm": 0.9496350765772753, "learning_rate": 4.991376400353889e-06, "loss": 0.5388, "step": 2703 }, { "epoch": 0.16466218067777, "grad_norm": 1.082797445263557, "learning_rate": 4.991369777003781e-06, "loss": 0.5091, "step": 2704 }, { "epoch": 0.16472307645464787, "grad_norm": 0.9903065436834231, "learning_rate": 4.991363151115518e-06, "loss": 0.4448, "step": 2705 }, { "epoch": 0.16478397223152574, "grad_norm": 1.0346835943427912, "learning_rate": 4.991356522689105e-06, "loss": 0.5775, "step": 2706 }, { "epoch": 0.16484486800840362, "grad_norm": 1.0429906920833412, "learning_rate": 4.99134989172455e-06, "loss": 0.5217, "step": 2707 }, { "epoch": 0.1649057637852815, "grad_norm": 1.0681541589658363, "learning_rate": 4.99134325822186e-06, "loss": 0.4954, "step": 2708 }, { "epoch": 0.16496665956215936, "grad_norm": 1.1460915950011168, "learning_rate": 4.9913366221810415e-06, "loss": 0.5051, "step": 2709 }, { "epoch": 0.16502755533903724, "grad_norm": 1.0878830203041965, "learning_rate": 4.9913299836021e-06, "loss": 0.4911, "step": 2710 }, { "epoch": 0.1650884511159151, "grad_norm": 0.9892932953432554, "learning_rate": 4.991323342485043e-06, "loss": 0.524, "step": 2711 }, { "epoch": 0.16514934689279298, "grad_norm": 1.1218118242576196, "learning_rate": 4.991316698829879e-06, "loss": 0.5092, "step": 2712 }, { "epoch": 0.16521024266967085, "grad_norm": 0.995811649942255, "learning_rate": 4.991310052636613e-06, "loss": 0.5378, "step": 2713 }, { "epoch": 0.16527113844654873, "grad_norm": 0.9354965329250947, "learning_rate": 4.991303403905251e-06, "loss": 0.5493, "step": 2714 }, { "epoch": 0.1653320342234266, "grad_norm": 1.0767891989239866, "learning_rate": 4.9912967526358025e-06, "loss": 0.5158, "step": 2715 }, { "epoch": 0.16539293000030447, "grad_norm": 1.004905818612976, "learning_rate": 4.9912900988282714e-06, "loss": 0.5375, "step": 2716 }, { "epoch": 0.16545382577718235, "grad_norm": 1.075190676992048, "learning_rate": 4.991283442482666e-06, "loss": 0.5163, "step": 2717 }, { "epoch": 0.16551472155406022, "grad_norm": 1.1092269200412495, "learning_rate": 4.991276783598993e-06, "loss": 0.5405, "step": 2718 }, { "epoch": 0.1655756173309381, "grad_norm": 1.0461324491518276, "learning_rate": 4.99127012217726e-06, "loss": 0.5223, "step": 2719 }, { "epoch": 0.16563651310781596, "grad_norm": 0.9754072980919567, "learning_rate": 4.991263458217472e-06, "loss": 0.5466, "step": 2720 }, { "epoch": 0.16569740888469384, "grad_norm": 0.9544618450239257, "learning_rate": 4.991256791719635e-06, "loss": 0.5535, "step": 2721 }, { "epoch": 0.1657583046615717, "grad_norm": 1.0648888747064493, "learning_rate": 4.99125012268376e-06, "loss": 0.5513, "step": 2722 }, { "epoch": 0.16581920043844958, "grad_norm": 1.075123557521074, "learning_rate": 4.99124345110985e-06, "loss": 0.5188, "step": 2723 }, { "epoch": 0.16588009621532745, "grad_norm": 1.0578750659040064, "learning_rate": 4.991236776997912e-06, "loss": 0.5394, "step": 2724 }, { "epoch": 0.16594099199220533, "grad_norm": 1.0020392173369737, "learning_rate": 4.9912301003479545e-06, "loss": 0.5457, "step": 2725 }, { "epoch": 0.1660018877690832, "grad_norm": 0.997170119745342, "learning_rate": 4.991223421159984e-06, "loss": 0.5661, "step": 2726 }, { "epoch": 0.1660627835459611, "grad_norm": 1.106496304819477, "learning_rate": 4.991216739434007e-06, "loss": 0.5342, "step": 2727 }, { "epoch": 0.16612367932283897, "grad_norm": 1.142998241349699, "learning_rate": 4.9912100551700295e-06, "loss": 0.5132, "step": 2728 }, { "epoch": 0.16618457509971685, "grad_norm": 1.1178747171145162, "learning_rate": 4.99120336836806e-06, "loss": 0.5753, "step": 2729 }, { "epoch": 0.16624547087659472, "grad_norm": 1.1228982600527717, "learning_rate": 4.991196679028103e-06, "loss": 0.4881, "step": 2730 }, { "epoch": 0.1663063666534726, "grad_norm": 0.9700762822768757, "learning_rate": 4.991189987150167e-06, "loss": 0.5215, "step": 2731 }, { "epoch": 0.16636726243035047, "grad_norm": 1.0755146675952505, "learning_rate": 4.991183292734259e-06, "loss": 0.4988, "step": 2732 }, { "epoch": 0.16642815820722834, "grad_norm": 0.9925287723464853, "learning_rate": 4.991176595780385e-06, "loss": 0.485, "step": 2733 }, { "epoch": 0.1664890539841062, "grad_norm": 1.1252876360535016, "learning_rate": 4.991169896288552e-06, "loss": 0.5638, "step": 2734 }, { "epoch": 0.16654994976098408, "grad_norm": 0.996610968806556, "learning_rate": 4.991163194258767e-06, "loss": 0.5748, "step": 2735 }, { "epoch": 0.16661084553786196, "grad_norm": 1.136752660548816, "learning_rate": 4.991156489691036e-06, "loss": 0.4721, "step": 2736 }, { "epoch": 0.16667174131473983, "grad_norm": 1.1236606030952112, "learning_rate": 4.9911497825853674e-06, "loss": 0.5512, "step": 2737 }, { "epoch": 0.1667326370916177, "grad_norm": 1.026740306299525, "learning_rate": 4.9911430729417676e-06, "loss": 0.5276, "step": 2738 }, { "epoch": 0.16679353286849558, "grad_norm": 0.9619989203757799, "learning_rate": 4.991136360760242e-06, "loss": 0.5001, "step": 2739 }, { "epoch": 0.16685442864537345, "grad_norm": 1.0011236017950504, "learning_rate": 4.991129646040799e-06, "loss": 0.5586, "step": 2740 }, { "epoch": 0.16691532442225132, "grad_norm": 1.0269830796653654, "learning_rate": 4.991122928783445e-06, "loss": 0.5799, "step": 2741 }, { "epoch": 0.1669762201991292, "grad_norm": 1.0461879478380072, "learning_rate": 4.991116208988186e-06, "loss": 0.5528, "step": 2742 }, { "epoch": 0.16703711597600707, "grad_norm": 1.0596382475931543, "learning_rate": 4.99110948665503e-06, "loss": 0.4737, "step": 2743 }, { "epoch": 0.16709801175288494, "grad_norm": 1.0484023966823774, "learning_rate": 4.991102761783984e-06, "loss": 0.4636, "step": 2744 }, { "epoch": 0.1671589075297628, "grad_norm": 1.119498466771803, "learning_rate": 4.991096034375054e-06, "loss": 0.4512, "step": 2745 }, { "epoch": 0.16721980330664069, "grad_norm": 1.0197743286425938, "learning_rate": 4.991089304428247e-06, "loss": 0.557, "step": 2746 }, { "epoch": 0.16728069908351856, "grad_norm": 1.0335363016475971, "learning_rate": 4.991082571943569e-06, "loss": 0.5463, "step": 2747 }, { "epoch": 0.16734159486039643, "grad_norm": 1.156182203977758, "learning_rate": 4.99107583692103e-06, "loss": 0.5235, "step": 2748 }, { "epoch": 0.1674024906372743, "grad_norm": 1.0843273074191415, "learning_rate": 4.991069099360634e-06, "loss": 0.4815, "step": 2749 }, { "epoch": 0.16746338641415218, "grad_norm": 1.084988384962405, "learning_rate": 4.991062359262388e-06, "loss": 0.5567, "step": 2750 }, { "epoch": 0.16752428219103005, "grad_norm": 1.1160813036822828, "learning_rate": 4.991055616626299e-06, "loss": 0.5228, "step": 2751 }, { "epoch": 0.16758517796790792, "grad_norm": 1.031441877456662, "learning_rate": 4.991048871452375e-06, "loss": 0.5063, "step": 2752 }, { "epoch": 0.1676460737447858, "grad_norm": 1.1115112221565828, "learning_rate": 4.9910421237406225e-06, "loss": 0.5146, "step": 2753 }, { "epoch": 0.16770696952166367, "grad_norm": 1.097758939826504, "learning_rate": 4.9910353734910475e-06, "loss": 0.4574, "step": 2754 }, { "epoch": 0.16776786529854154, "grad_norm": 1.1223038811829529, "learning_rate": 4.9910286207036584e-06, "loss": 0.5263, "step": 2755 }, { "epoch": 0.1678287610754194, "grad_norm": 1.0378952480567196, "learning_rate": 4.99102186537846e-06, "loss": 0.4863, "step": 2756 }, { "epoch": 0.1678896568522973, "grad_norm": 1.0749022680184375, "learning_rate": 4.991015107515461e-06, "loss": 0.5113, "step": 2757 }, { "epoch": 0.16795055262917516, "grad_norm": 1.2376035496725921, "learning_rate": 4.991008347114667e-06, "loss": 0.5063, "step": 2758 }, { "epoch": 0.16801144840605303, "grad_norm": 1.1022349300761587, "learning_rate": 4.991001584176086e-06, "loss": 0.4981, "step": 2759 }, { "epoch": 0.1680723441829309, "grad_norm": 1.1819187884299625, "learning_rate": 4.990994818699724e-06, "loss": 0.4752, "step": 2760 }, { "epoch": 0.16813323995980878, "grad_norm": 1.0322094413585923, "learning_rate": 4.990988050685589e-06, "loss": 0.5262, "step": 2761 }, { "epoch": 0.16819413573668665, "grad_norm": 0.9951172531134892, "learning_rate": 4.990981280133686e-06, "loss": 0.5602, "step": 2762 }, { "epoch": 0.16825503151356452, "grad_norm": 1.0300673738294612, "learning_rate": 4.990974507044024e-06, "loss": 0.5796, "step": 2763 }, { "epoch": 0.1683159272904424, "grad_norm": 1.096680884085464, "learning_rate": 4.990967731416609e-06, "loss": 0.4866, "step": 2764 }, { "epoch": 0.16837682306732027, "grad_norm": 1.059951179625986, "learning_rate": 4.990960953251448e-06, "loss": 0.4906, "step": 2765 }, { "epoch": 0.16843771884419814, "grad_norm": 1.056360149534859, "learning_rate": 4.9909541725485465e-06, "loss": 0.6054, "step": 2766 }, { "epoch": 0.16849861462107601, "grad_norm": 1.0417690156938268, "learning_rate": 4.990947389307914e-06, "loss": 0.502, "step": 2767 }, { "epoch": 0.16855951039795392, "grad_norm": 1.019374865704244, "learning_rate": 4.990940603529556e-06, "loss": 0.5692, "step": 2768 }, { "epoch": 0.1686204061748318, "grad_norm": 1.0213749437074848, "learning_rate": 4.990933815213479e-06, "loss": 0.5488, "step": 2769 }, { "epoch": 0.16868130195170966, "grad_norm": 1.0066734540347066, "learning_rate": 4.9909270243596906e-06, "loss": 0.5526, "step": 2770 }, { "epoch": 0.16874219772858753, "grad_norm": 1.0854242630530795, "learning_rate": 4.990920230968197e-06, "loss": 0.5135, "step": 2771 }, { "epoch": 0.1688030935054654, "grad_norm": 1.2436637258378764, "learning_rate": 4.990913435039006e-06, "loss": 0.5292, "step": 2772 }, { "epoch": 0.16886398928234328, "grad_norm": 1.075319006375499, "learning_rate": 4.990906636572125e-06, "loss": 0.5153, "step": 2773 }, { "epoch": 0.16892488505922115, "grad_norm": 1.2281640793786042, "learning_rate": 4.990899835567559e-06, "loss": 0.516, "step": 2774 }, { "epoch": 0.16898578083609903, "grad_norm": 1.1010900386896316, "learning_rate": 4.990893032025316e-06, "loss": 0.4862, "step": 2775 }, { "epoch": 0.1690466766129769, "grad_norm": 1.1140044879524127, "learning_rate": 4.990886225945404e-06, "loss": 0.4372, "step": 2776 }, { "epoch": 0.16910757238985477, "grad_norm": 0.9749092480357102, "learning_rate": 4.990879417327828e-06, "loss": 0.5706, "step": 2777 }, { "epoch": 0.16916846816673264, "grad_norm": 1.1768800803987973, "learning_rate": 4.990872606172596e-06, "loss": 0.5204, "step": 2778 }, { "epoch": 0.16922936394361052, "grad_norm": 1.0946869792305314, "learning_rate": 4.990865792479715e-06, "loss": 0.477, "step": 2779 }, { "epoch": 0.1692902597204884, "grad_norm": 1.013822031546885, "learning_rate": 4.990858976249191e-06, "loss": 0.5078, "step": 2780 }, { "epoch": 0.16935115549736626, "grad_norm": 1.056464762125387, "learning_rate": 4.990852157481033e-06, "loss": 0.4766, "step": 2781 }, { "epoch": 0.16941205127424414, "grad_norm": 1.1669467997493665, "learning_rate": 4.990845336175246e-06, "loss": 0.5695, "step": 2782 }, { "epoch": 0.169472947051122, "grad_norm": 0.9394691882805167, "learning_rate": 4.990838512331837e-06, "loss": 0.5205, "step": 2783 }, { "epoch": 0.16953384282799988, "grad_norm": 1.0510634351027965, "learning_rate": 4.990831685950814e-06, "loss": 0.5795, "step": 2784 }, { "epoch": 0.16959473860487775, "grad_norm": 0.9779053168676226, "learning_rate": 4.9908248570321834e-06, "loss": 0.543, "step": 2785 }, { "epoch": 0.16965563438175563, "grad_norm": 0.9792427470815624, "learning_rate": 4.990818025575952e-06, "loss": 0.5387, "step": 2786 }, { "epoch": 0.1697165301586335, "grad_norm": 0.9840424620288444, "learning_rate": 4.990811191582128e-06, "loss": 0.5628, "step": 2787 }, { "epoch": 0.16977742593551137, "grad_norm": 1.044829634402247, "learning_rate": 4.9908043550507165e-06, "loss": 0.4934, "step": 2788 }, { "epoch": 0.16983832171238925, "grad_norm": 1.0410293881762738, "learning_rate": 4.990797515981725e-06, "loss": 0.5007, "step": 2789 }, { "epoch": 0.16989921748926712, "grad_norm": 1.0651744334311575, "learning_rate": 4.9907906743751615e-06, "loss": 0.4733, "step": 2790 }, { "epoch": 0.169960113266145, "grad_norm": 1.1772188180412784, "learning_rate": 4.990783830231032e-06, "loss": 0.4919, "step": 2791 }, { "epoch": 0.17002100904302286, "grad_norm": 1.1022574594589656, "learning_rate": 4.990776983549344e-06, "loss": 0.5496, "step": 2792 }, { "epoch": 0.17008190481990074, "grad_norm": 0.9854699265880038, "learning_rate": 4.990770134330103e-06, "loss": 0.5089, "step": 2793 }, { "epoch": 0.1701428005967786, "grad_norm": 1.053729941064043, "learning_rate": 4.990763282573319e-06, "loss": 0.4921, "step": 2794 }, { "epoch": 0.17020369637365648, "grad_norm": 1.1161092466908684, "learning_rate": 4.990756428278995e-06, "loss": 0.5218, "step": 2795 }, { "epoch": 0.17026459215053436, "grad_norm": 1.0633172502072248, "learning_rate": 4.990749571447142e-06, "loss": 0.5472, "step": 2796 }, { "epoch": 0.17032548792741223, "grad_norm": 0.9990622164562154, "learning_rate": 4.990742712077764e-06, "loss": 0.5446, "step": 2797 }, { "epoch": 0.1703863837042901, "grad_norm": 1.0495662879411818, "learning_rate": 4.990735850170869e-06, "loss": 0.5172, "step": 2798 }, { "epoch": 0.17044727948116797, "grad_norm": 1.0461651593868748, "learning_rate": 4.990728985726465e-06, "loss": 0.5686, "step": 2799 }, { "epoch": 0.17050817525804585, "grad_norm": 1.1034464138009952, "learning_rate": 4.9907221187445575e-06, "loss": 0.5367, "step": 2800 }, { "epoch": 0.17056907103492372, "grad_norm": 1.0066783541200572, "learning_rate": 4.990715249225154e-06, "loss": 0.5279, "step": 2801 }, { "epoch": 0.1706299668118016, "grad_norm": 0.9626907028298224, "learning_rate": 4.990708377168263e-06, "loss": 0.5605, "step": 2802 }, { "epoch": 0.17069086258867947, "grad_norm": 1.127185883263584, "learning_rate": 4.990701502573888e-06, "loss": 0.4903, "step": 2803 }, { "epoch": 0.17075175836555734, "grad_norm": 1.070424965863723, "learning_rate": 4.99069462544204e-06, "loss": 0.5376, "step": 2804 }, { "epoch": 0.1708126541424352, "grad_norm": 1.0352624909703068, "learning_rate": 4.990687745772723e-06, "loss": 0.5153, "step": 2805 }, { "epoch": 0.17087354991931308, "grad_norm": 1.0868882433639415, "learning_rate": 4.990680863565945e-06, "loss": 0.5665, "step": 2806 }, { "epoch": 0.17093444569619096, "grad_norm": 1.1498751333425743, "learning_rate": 4.990673978821713e-06, "loss": 0.5723, "step": 2807 }, { "epoch": 0.17099534147306883, "grad_norm": 1.0503326773767916, "learning_rate": 4.990667091540035e-06, "loss": 0.5183, "step": 2808 }, { "epoch": 0.17105623724994673, "grad_norm": 1.0997222482926126, "learning_rate": 4.990660201720916e-06, "loss": 0.5683, "step": 2809 }, { "epoch": 0.1711171330268246, "grad_norm": 0.940266127173205, "learning_rate": 4.990653309364365e-06, "loss": 0.5695, "step": 2810 }, { "epoch": 0.17117802880370248, "grad_norm": 1.070796892161803, "learning_rate": 4.990646414470388e-06, "loss": 0.5181, "step": 2811 }, { "epoch": 0.17123892458058035, "grad_norm": 1.065163813431522, "learning_rate": 4.990639517038992e-06, "loss": 0.5453, "step": 2812 }, { "epoch": 0.17129982035745822, "grad_norm": 0.9892453150466485, "learning_rate": 4.9906326170701845e-06, "loss": 0.5695, "step": 2813 }, { "epoch": 0.1713607161343361, "grad_norm": 0.9576827853589414, "learning_rate": 4.990625714563972e-06, "loss": 0.5191, "step": 2814 }, { "epoch": 0.17142161191121397, "grad_norm": 1.0382044643083195, "learning_rate": 4.990618809520362e-06, "loss": 0.4889, "step": 2815 }, { "epoch": 0.17148250768809184, "grad_norm": 1.047116926990181, "learning_rate": 4.990611901939362e-06, "loss": 0.5271, "step": 2816 }, { "epoch": 0.1715434034649697, "grad_norm": 1.00820248306595, "learning_rate": 4.9906049918209775e-06, "loss": 0.5121, "step": 2817 }, { "epoch": 0.17160429924184759, "grad_norm": 1.0667811966576801, "learning_rate": 4.990598079165217e-06, "loss": 0.5315, "step": 2818 }, { "epoch": 0.17166519501872546, "grad_norm": 1.0356175856648462, "learning_rate": 4.990591163972087e-06, "loss": 0.4462, "step": 2819 }, { "epoch": 0.17172609079560333, "grad_norm": 1.1834793622345228, "learning_rate": 4.990584246241594e-06, "loss": 0.5473, "step": 2820 }, { "epoch": 0.1717869865724812, "grad_norm": 1.136712619758158, "learning_rate": 4.990577325973746e-06, "loss": 0.5489, "step": 2821 }, { "epoch": 0.17184788234935908, "grad_norm": 1.0748973504429145, "learning_rate": 4.990570403168549e-06, "loss": 0.4992, "step": 2822 }, { "epoch": 0.17190877812623695, "grad_norm": 1.0179640603182982, "learning_rate": 4.99056347782601e-06, "loss": 0.5399, "step": 2823 }, { "epoch": 0.17196967390311482, "grad_norm": 1.0229042944474314, "learning_rate": 4.990556549946138e-06, "loss": 0.5198, "step": 2824 }, { "epoch": 0.1720305696799927, "grad_norm": 1.0639781771423258, "learning_rate": 4.990549619528939e-06, "loss": 0.5373, "step": 2825 }, { "epoch": 0.17209146545687057, "grad_norm": 1.0344370620783385, "learning_rate": 4.990542686574419e-06, "loss": 0.5415, "step": 2826 }, { "epoch": 0.17215236123374844, "grad_norm": 1.031824150863195, "learning_rate": 4.990535751082585e-06, "loss": 0.4939, "step": 2827 }, { "epoch": 0.1722132570106263, "grad_norm": 1.0818433035472284, "learning_rate": 4.990528813053447e-06, "loss": 0.5193, "step": 2828 }, { "epoch": 0.1722741527875042, "grad_norm": 1.0580872460135264, "learning_rate": 4.99052187248701e-06, "loss": 0.5769, "step": 2829 }, { "epoch": 0.17233504856438206, "grad_norm": 1.1421209826242573, "learning_rate": 4.990514929383279e-06, "loss": 0.5182, "step": 2830 }, { "epoch": 0.17239594434125993, "grad_norm": 0.994124819717035, "learning_rate": 4.990507983742265e-06, "loss": 0.4835, "step": 2831 }, { "epoch": 0.1724568401181378, "grad_norm": 1.090468653868473, "learning_rate": 4.990501035563973e-06, "loss": 0.4005, "step": 2832 }, { "epoch": 0.17251773589501568, "grad_norm": 0.9645813192506792, "learning_rate": 4.990494084848409e-06, "loss": 0.5512, "step": 2833 }, { "epoch": 0.17257863167189355, "grad_norm": 1.0274702115141894, "learning_rate": 4.990487131595583e-06, "loss": 0.5021, "step": 2834 }, { "epoch": 0.17263952744877142, "grad_norm": 1.0137468539455536, "learning_rate": 4.9904801758055e-06, "loss": 0.5426, "step": 2835 }, { "epoch": 0.1727004232256493, "grad_norm": 0.9336835761677942, "learning_rate": 4.9904732174781675e-06, "loss": 0.5153, "step": 2836 }, { "epoch": 0.17276131900252717, "grad_norm": 1.181349674349817, "learning_rate": 4.990466256613592e-06, "loss": 0.4497, "step": 2837 }, { "epoch": 0.17282221477940504, "grad_norm": 1.1016020651813316, "learning_rate": 4.990459293211782e-06, "loss": 0.4953, "step": 2838 }, { "epoch": 0.17288311055628292, "grad_norm": 0.932403680713954, "learning_rate": 4.990452327272743e-06, "loss": 0.5929, "step": 2839 }, { "epoch": 0.1729440063331608, "grad_norm": 1.1033196257091769, "learning_rate": 4.990445358796484e-06, "loss": 0.4815, "step": 2840 }, { "epoch": 0.17300490211003866, "grad_norm": 0.9791316593566508, "learning_rate": 4.990438387783011e-06, "loss": 0.5793, "step": 2841 }, { "epoch": 0.17306579788691653, "grad_norm": 1.0241999766501135, "learning_rate": 4.9904314142323305e-06, "loss": 0.4922, "step": 2842 }, { "epoch": 0.1731266936637944, "grad_norm": 1.0813930554385807, "learning_rate": 4.990424438144451e-06, "loss": 0.448, "step": 2843 }, { "epoch": 0.17318758944067228, "grad_norm": 1.0049213914613377, "learning_rate": 4.990417459519379e-06, "loss": 0.5693, "step": 2844 }, { "epoch": 0.17324848521755015, "grad_norm": 0.9986698332217383, "learning_rate": 4.990410478357121e-06, "loss": 0.5375, "step": 2845 }, { "epoch": 0.17330938099442803, "grad_norm": 1.0732560725406215, "learning_rate": 4.990403494657684e-06, "loss": 0.487, "step": 2846 }, { "epoch": 0.1733702767713059, "grad_norm": 0.948030582488427, "learning_rate": 4.990396508421076e-06, "loss": 0.5586, "step": 2847 }, { "epoch": 0.17343117254818377, "grad_norm": 1.0769328202764965, "learning_rate": 4.990389519647304e-06, "loss": 0.5225, "step": 2848 }, { "epoch": 0.17349206832506164, "grad_norm": 1.110471250602923, "learning_rate": 4.990382528336375e-06, "loss": 0.528, "step": 2849 }, { "epoch": 0.17355296410193954, "grad_norm": 1.0325151393441803, "learning_rate": 4.990375534488296e-06, "loss": 0.5727, "step": 2850 }, { "epoch": 0.17361385987881742, "grad_norm": 1.0935110339361174, "learning_rate": 4.990368538103074e-06, "loss": 0.5128, "step": 2851 }, { "epoch": 0.1736747556556953, "grad_norm": 1.0763073025687575, "learning_rate": 4.990361539180717e-06, "loss": 0.4738, "step": 2852 }, { "epoch": 0.17373565143257316, "grad_norm": 1.015158663200676, "learning_rate": 4.99035453772123e-06, "loss": 0.4827, "step": 2853 }, { "epoch": 0.17379654720945104, "grad_norm": 1.0940206618729, "learning_rate": 4.990347533724623e-06, "loss": 0.5059, "step": 2854 }, { "epoch": 0.1738574429863289, "grad_norm": 0.944963081508007, "learning_rate": 4.990340527190901e-06, "loss": 0.5308, "step": 2855 }, { "epoch": 0.17391833876320678, "grad_norm": 1.067287525286117, "learning_rate": 4.990333518120072e-06, "loss": 0.501, "step": 2856 }, { "epoch": 0.17397923454008465, "grad_norm": 1.0940080176931142, "learning_rate": 4.990326506512143e-06, "loss": 0.5097, "step": 2857 }, { "epoch": 0.17404013031696253, "grad_norm": 0.9601523206785813, "learning_rate": 4.990319492367121e-06, "loss": 0.5122, "step": 2858 }, { "epoch": 0.1741010260938404, "grad_norm": 1.1138189136042353, "learning_rate": 4.990312475685014e-06, "loss": 0.5153, "step": 2859 }, { "epoch": 0.17416192187071827, "grad_norm": 0.9627410679911211, "learning_rate": 4.990305456465828e-06, "loss": 0.5306, "step": 2860 }, { "epoch": 0.17422281764759615, "grad_norm": 1.1485663342906365, "learning_rate": 4.990298434709571e-06, "loss": 0.4799, "step": 2861 }, { "epoch": 0.17428371342447402, "grad_norm": 1.0538980002239324, "learning_rate": 4.990291410416248e-06, "loss": 0.5694, "step": 2862 }, { "epoch": 0.1743446092013519, "grad_norm": 1.0851165137272176, "learning_rate": 4.990284383585869e-06, "loss": 0.5245, "step": 2863 }, { "epoch": 0.17440550497822976, "grad_norm": 0.9958313898450191, "learning_rate": 4.990277354218441e-06, "loss": 0.5846, "step": 2864 }, { "epoch": 0.17446640075510764, "grad_norm": 1.0908389434248047, "learning_rate": 4.990270322313969e-06, "loss": 0.4758, "step": 2865 }, { "epoch": 0.1745272965319855, "grad_norm": 1.0365647506971172, "learning_rate": 4.9902632878724615e-06, "loss": 0.5732, "step": 2866 }, { "epoch": 0.17458819230886338, "grad_norm": 1.0239564858370875, "learning_rate": 4.990256250893925e-06, "loss": 0.5107, "step": 2867 }, { "epoch": 0.17464908808574126, "grad_norm": 0.9857937349122177, "learning_rate": 4.990249211378369e-06, "loss": 0.5153, "step": 2868 }, { "epoch": 0.17470998386261913, "grad_norm": 1.0608235484946178, "learning_rate": 4.990242169325798e-06, "loss": 0.5055, "step": 2869 }, { "epoch": 0.174770879639497, "grad_norm": 1.047745994394843, "learning_rate": 4.99023512473622e-06, "loss": 0.4922, "step": 2870 }, { "epoch": 0.17483177541637487, "grad_norm": 0.9842879889870572, "learning_rate": 4.990228077609641e-06, "loss": 0.5133, "step": 2871 }, { "epoch": 0.17489267119325275, "grad_norm": 1.0962082387953496, "learning_rate": 4.990221027946071e-06, "loss": 0.4837, "step": 2872 }, { "epoch": 0.17495356697013062, "grad_norm": 1.1078890066700453, "learning_rate": 4.990213975745515e-06, "loss": 0.5475, "step": 2873 }, { "epoch": 0.1750144627470085, "grad_norm": 1.0886014680181912, "learning_rate": 4.990206921007981e-06, "loss": 0.4676, "step": 2874 }, { "epoch": 0.17507535852388637, "grad_norm": 1.0657385089530438, "learning_rate": 4.990199863733476e-06, "loss": 0.5387, "step": 2875 }, { "epoch": 0.17513625430076424, "grad_norm": 1.080845299211313, "learning_rate": 4.990192803922007e-06, "loss": 0.5299, "step": 2876 }, { "epoch": 0.1751971500776421, "grad_norm": 1.101243950131989, "learning_rate": 4.990185741573581e-06, "loss": 0.5542, "step": 2877 }, { "epoch": 0.17525804585451998, "grad_norm": 1.0195902392730605, "learning_rate": 4.990178676688206e-06, "loss": 0.4704, "step": 2878 }, { "epoch": 0.17531894163139786, "grad_norm": 1.0370627034937698, "learning_rate": 4.990171609265889e-06, "loss": 0.4706, "step": 2879 }, { "epoch": 0.17537983740827573, "grad_norm": 1.0268323816938052, "learning_rate": 4.990164539306636e-06, "loss": 0.4361, "step": 2880 }, { "epoch": 0.1754407331851536, "grad_norm": 0.9948700954720737, "learning_rate": 4.990157466810456e-06, "loss": 0.5515, "step": 2881 }, { "epoch": 0.17550162896203148, "grad_norm": 1.0762551068184951, "learning_rate": 4.9901503917773546e-06, "loss": 0.4877, "step": 2882 }, { "epoch": 0.17556252473890935, "grad_norm": 1.0387403663712056, "learning_rate": 4.990143314207341e-06, "loss": 0.4933, "step": 2883 }, { "epoch": 0.17562342051578722, "grad_norm": 1.070106313196307, "learning_rate": 4.99013623410042e-06, "loss": 0.5191, "step": 2884 }, { "epoch": 0.1756843162926651, "grad_norm": 1.1217640132014104, "learning_rate": 4.990129151456599e-06, "loss": 0.5125, "step": 2885 }, { "epoch": 0.17574521206954297, "grad_norm": 1.0468195489205663, "learning_rate": 4.990122066275889e-06, "loss": 0.4342, "step": 2886 }, { "epoch": 0.17580610784642084, "grad_norm": 1.0003391476231984, "learning_rate": 4.990114978558293e-06, "loss": 0.5701, "step": 2887 }, { "epoch": 0.1758670036232987, "grad_norm": 1.0444770143360116, "learning_rate": 4.990107888303819e-06, "loss": 0.474, "step": 2888 }, { "epoch": 0.17592789940017659, "grad_norm": 1.056410805849003, "learning_rate": 4.990100795512476e-06, "loss": 0.4796, "step": 2889 }, { "epoch": 0.17598879517705446, "grad_norm": 1.025992427674052, "learning_rate": 4.990093700184269e-06, "loss": 0.4695, "step": 2890 }, { "epoch": 0.17604969095393236, "grad_norm": 1.046911684341831, "learning_rate": 4.990086602319206e-06, "loss": 0.5768, "step": 2891 }, { "epoch": 0.17611058673081023, "grad_norm": 1.0832372925122673, "learning_rate": 4.990079501917296e-06, "loss": 0.4588, "step": 2892 }, { "epoch": 0.1761714825076881, "grad_norm": 0.9744888419801195, "learning_rate": 4.990072398978544e-06, "loss": 0.4744, "step": 2893 }, { "epoch": 0.17623237828456598, "grad_norm": 1.1463478746474702, "learning_rate": 4.990065293502958e-06, "loss": 0.4597, "step": 2894 }, { "epoch": 0.17629327406144385, "grad_norm": 0.9861464190763995, "learning_rate": 4.990058185490545e-06, "loss": 0.5483, "step": 2895 }, { "epoch": 0.17635416983832172, "grad_norm": 1.0233087424189025, "learning_rate": 4.990051074941314e-06, "loss": 0.5325, "step": 2896 }, { "epoch": 0.1764150656151996, "grad_norm": 1.066183218541031, "learning_rate": 4.990043961855269e-06, "loss": 0.514, "step": 2897 }, { "epoch": 0.17647596139207747, "grad_norm": 0.967799192669706, "learning_rate": 4.99003684623242e-06, "loss": 0.5456, "step": 2898 }, { "epoch": 0.17653685716895534, "grad_norm": 1.0598647521457913, "learning_rate": 4.990029728072772e-06, "loss": 0.4709, "step": 2899 }, { "epoch": 0.17659775294583321, "grad_norm": 1.0555974807079176, "learning_rate": 4.990022607376335e-06, "loss": 0.5369, "step": 2900 }, { "epoch": 0.1766586487227111, "grad_norm": 1.0183882569502805, "learning_rate": 4.990015484143113e-06, "loss": 0.5011, "step": 2901 }, { "epoch": 0.17671954449958896, "grad_norm": 0.9646501189490966, "learning_rate": 4.990008358373116e-06, "loss": 0.5395, "step": 2902 }, { "epoch": 0.17678044027646683, "grad_norm": 1.0311877039417705, "learning_rate": 4.99000123006635e-06, "loss": 0.546, "step": 2903 }, { "epoch": 0.1768413360533447, "grad_norm": 1.1124749443110156, "learning_rate": 4.989994099222823e-06, "loss": 0.4831, "step": 2904 }, { "epoch": 0.17690223183022258, "grad_norm": 1.0753524632190885, "learning_rate": 4.989986965842542e-06, "loss": 0.4896, "step": 2905 }, { "epoch": 0.17696312760710045, "grad_norm": 1.0816577128250198, "learning_rate": 4.989979829925514e-06, "loss": 0.4381, "step": 2906 }, { "epoch": 0.17702402338397832, "grad_norm": 1.096709533487499, "learning_rate": 4.9899726914717455e-06, "loss": 0.4659, "step": 2907 }, { "epoch": 0.1770849191608562, "grad_norm": 1.1531183672064396, "learning_rate": 4.9899655504812446e-06, "loss": 0.5203, "step": 2908 }, { "epoch": 0.17714581493773407, "grad_norm": 1.023014523800475, "learning_rate": 4.98995840695402e-06, "loss": 0.4928, "step": 2909 }, { "epoch": 0.17720671071461194, "grad_norm": 0.9958430993449372, "learning_rate": 4.989951260890076e-06, "loss": 0.5416, "step": 2910 }, { "epoch": 0.17726760649148982, "grad_norm": 1.112405234525034, "learning_rate": 4.989944112289422e-06, "loss": 0.4172, "step": 2911 }, { "epoch": 0.1773285022683677, "grad_norm": 0.9882807506997218, "learning_rate": 4.9899369611520644e-06, "loss": 0.5149, "step": 2912 }, { "epoch": 0.17738939804524556, "grad_norm": 1.1040986656232328, "learning_rate": 4.989929807478011e-06, "loss": 0.4887, "step": 2913 }, { "epoch": 0.17745029382212343, "grad_norm": 0.99720261196506, "learning_rate": 4.98992265126727e-06, "loss": 0.5077, "step": 2914 }, { "epoch": 0.1775111895990013, "grad_norm": 0.9573113483054733, "learning_rate": 4.9899154925198455e-06, "loss": 0.5171, "step": 2915 }, { "epoch": 0.17757208537587918, "grad_norm": 1.0538335030681827, "learning_rate": 4.989908331235748e-06, "loss": 0.5169, "step": 2916 }, { "epoch": 0.17763298115275705, "grad_norm": 1.0023611959813634, "learning_rate": 4.9899011674149834e-06, "loss": 0.4851, "step": 2917 }, { "epoch": 0.17769387692963493, "grad_norm": 1.002942266648752, "learning_rate": 4.989894001057559e-06, "loss": 0.5429, "step": 2918 }, { "epoch": 0.1777547727065128, "grad_norm": 1.1461247440508093, "learning_rate": 4.989886832163483e-06, "loss": 0.4774, "step": 2919 }, { "epoch": 0.17781566848339067, "grad_norm": 1.0760741897668422, "learning_rate": 4.989879660732761e-06, "loss": 0.5329, "step": 2920 }, { "epoch": 0.17787656426026854, "grad_norm": 1.0733303125532931, "learning_rate": 4.9898724867654025e-06, "loss": 0.4224, "step": 2921 }, { "epoch": 0.17793746003714642, "grad_norm": 1.0414654801019132, "learning_rate": 4.989865310261414e-06, "loss": 0.4906, "step": 2922 }, { "epoch": 0.1779983558140243, "grad_norm": 1.0825052364630556, "learning_rate": 4.989858131220801e-06, "loss": 0.5117, "step": 2923 }, { "epoch": 0.17805925159090216, "grad_norm": 1.0035847653927539, "learning_rate": 4.989850949643573e-06, "loss": 0.5151, "step": 2924 }, { "epoch": 0.17812014736778004, "grad_norm": 0.9852571853059593, "learning_rate": 4.989843765529737e-06, "loss": 0.511, "step": 2925 }, { "epoch": 0.1781810431446579, "grad_norm": 1.0495445239278571, "learning_rate": 4.9898365788793e-06, "loss": 0.572, "step": 2926 }, { "epoch": 0.17824193892153578, "grad_norm": 0.9624871331460763, "learning_rate": 4.989829389692269e-06, "loss": 0.5805, "step": 2927 }, { "epoch": 0.17830283469841365, "grad_norm": 1.0667337430113062, "learning_rate": 4.989822197968651e-06, "loss": 0.4918, "step": 2928 }, { "epoch": 0.17836373047529153, "grad_norm": 1.1840816938378262, "learning_rate": 4.9898150037084545e-06, "loss": 0.5009, "step": 2929 }, { "epoch": 0.1784246262521694, "grad_norm": 0.9764981874415312, "learning_rate": 4.989807806911686e-06, "loss": 0.541, "step": 2930 }, { "epoch": 0.17848552202904727, "grad_norm": 1.0145003442027638, "learning_rate": 4.9898006075783535e-06, "loss": 0.4666, "step": 2931 }, { "epoch": 0.17854641780592517, "grad_norm": 0.9712197228916821, "learning_rate": 4.989793405708464e-06, "loss": 0.5274, "step": 2932 }, { "epoch": 0.17860731358280305, "grad_norm": 1.0002849295095022, "learning_rate": 4.989786201302024e-06, "loss": 0.5784, "step": 2933 }, { "epoch": 0.17866820935968092, "grad_norm": 1.1331426017906456, "learning_rate": 4.989778994359043e-06, "loss": 0.5003, "step": 2934 }, { "epoch": 0.1787291051365588, "grad_norm": 1.068311680199407, "learning_rate": 4.989771784879525e-06, "loss": 0.5417, "step": 2935 }, { "epoch": 0.17879000091343666, "grad_norm": 1.0492294993702347, "learning_rate": 4.9897645728634805e-06, "loss": 0.5935, "step": 2936 }, { "epoch": 0.17885089669031454, "grad_norm": 1.0975065331998533, "learning_rate": 4.989757358310915e-06, "loss": 0.4994, "step": 2937 }, { "epoch": 0.1789117924671924, "grad_norm": 1.0189743994425222, "learning_rate": 4.989750141221837e-06, "loss": 0.5126, "step": 2938 }, { "epoch": 0.17897268824407028, "grad_norm": 0.9726091581253123, "learning_rate": 4.989742921596253e-06, "loss": 0.547, "step": 2939 }, { "epoch": 0.17903358402094816, "grad_norm": 1.0799937061815332, "learning_rate": 4.98973569943417e-06, "loss": 0.5029, "step": 2940 }, { "epoch": 0.17909447979782603, "grad_norm": 1.0945217259187257, "learning_rate": 4.989728474735597e-06, "loss": 0.4988, "step": 2941 }, { "epoch": 0.1791553755747039, "grad_norm": 1.0637505051670029, "learning_rate": 4.989721247500541e-06, "loss": 0.5343, "step": 2942 }, { "epoch": 0.17921627135158177, "grad_norm": 1.049199996667739, "learning_rate": 4.989714017729007e-06, "loss": 0.5203, "step": 2943 }, { "epoch": 0.17927716712845965, "grad_norm": 1.0530029253241426, "learning_rate": 4.989706785421006e-06, "loss": 0.5309, "step": 2944 }, { "epoch": 0.17933806290533752, "grad_norm": 1.069780805328936, "learning_rate": 4.989699550576542e-06, "loss": 0.4911, "step": 2945 }, { "epoch": 0.1793989586822154, "grad_norm": 1.0423387549674876, "learning_rate": 4.989692313195624e-06, "loss": 0.4742, "step": 2946 }, { "epoch": 0.17945985445909327, "grad_norm": 1.1089285318115027, "learning_rate": 4.98968507327826e-06, "loss": 0.4564, "step": 2947 }, { "epoch": 0.17952075023597114, "grad_norm": 1.1439799000242916, "learning_rate": 4.989677830824456e-06, "loss": 0.463, "step": 2948 }, { "epoch": 0.179581646012849, "grad_norm": 0.9169150028090098, "learning_rate": 4.98967058583422e-06, "loss": 0.526, "step": 2949 }, { "epoch": 0.17964254178972688, "grad_norm": 1.1500012470349439, "learning_rate": 4.98966333830756e-06, "loss": 0.5372, "step": 2950 }, { "epoch": 0.17970343756660476, "grad_norm": 1.0836098616666747, "learning_rate": 4.989656088244482e-06, "loss": 0.5163, "step": 2951 }, { "epoch": 0.17976433334348263, "grad_norm": 0.9875413087533366, "learning_rate": 4.989648835644994e-06, "loss": 0.4857, "step": 2952 }, { "epoch": 0.1798252291203605, "grad_norm": 0.9797632086807937, "learning_rate": 4.989641580509105e-06, "loss": 0.4964, "step": 2953 }, { "epoch": 0.17988612489723838, "grad_norm": 0.9709888884279402, "learning_rate": 4.98963432283682e-06, "loss": 0.5455, "step": 2954 }, { "epoch": 0.17994702067411625, "grad_norm": 1.0224105311829343, "learning_rate": 4.989627062628147e-06, "loss": 0.4892, "step": 2955 }, { "epoch": 0.18000791645099412, "grad_norm": 1.169484171214119, "learning_rate": 4.989619799883094e-06, "loss": 0.4715, "step": 2956 }, { "epoch": 0.180068812227872, "grad_norm": 1.0334368144036203, "learning_rate": 4.989612534601667e-06, "loss": 0.5519, "step": 2957 }, { "epoch": 0.18012970800474987, "grad_norm": 1.0432350371129349, "learning_rate": 4.989605266783876e-06, "loss": 0.4858, "step": 2958 }, { "epoch": 0.18019060378162774, "grad_norm": 0.9883117441431462, "learning_rate": 4.989597996429727e-06, "loss": 0.5313, "step": 2959 }, { "epoch": 0.1802514995585056, "grad_norm": 1.2112218433893853, "learning_rate": 4.989590723539226e-06, "loss": 0.4404, "step": 2960 }, { "epoch": 0.18031239533538349, "grad_norm": 1.0024216538099917, "learning_rate": 4.989583448112383e-06, "loss": 0.5369, "step": 2961 }, { "epoch": 0.18037329111226136, "grad_norm": 0.9441034066706006, "learning_rate": 4.989576170149204e-06, "loss": 0.5286, "step": 2962 }, { "epoch": 0.18043418688913923, "grad_norm": 1.0002070414035555, "learning_rate": 4.989568889649696e-06, "loss": 0.5431, "step": 2963 }, { "epoch": 0.1804950826660171, "grad_norm": 1.1270788808570624, "learning_rate": 4.989561606613867e-06, "loss": 0.4601, "step": 2964 }, { "epoch": 0.18055597844289498, "grad_norm": 1.1537515150038782, "learning_rate": 4.989554321041724e-06, "loss": 0.4994, "step": 2965 }, { "epoch": 0.18061687421977285, "grad_norm": 1.1153800459869663, "learning_rate": 4.989547032933276e-06, "loss": 0.5008, "step": 2966 }, { "epoch": 0.18067776999665072, "grad_norm": 1.1208892539850333, "learning_rate": 4.989539742288529e-06, "loss": 0.5394, "step": 2967 }, { "epoch": 0.1807386657735286, "grad_norm": 1.0490909892304123, "learning_rate": 4.989532449107491e-06, "loss": 0.5188, "step": 2968 }, { "epoch": 0.18079956155040647, "grad_norm": 0.9796315661679105, "learning_rate": 4.989525153390168e-06, "loss": 0.5087, "step": 2969 }, { "epoch": 0.18086045732728434, "grad_norm": 0.9727815677274755, "learning_rate": 4.989517855136568e-06, "loss": 0.5334, "step": 2970 }, { "epoch": 0.1809213531041622, "grad_norm": 1.0617890946594448, "learning_rate": 4.9895105543467e-06, "loss": 0.4983, "step": 2971 }, { "epoch": 0.1809822488810401, "grad_norm": 1.002861051863644, "learning_rate": 4.989503251020571e-06, "loss": 0.4955, "step": 2972 }, { "epoch": 0.181043144657918, "grad_norm": 1.0733719489621698, "learning_rate": 4.989495945158187e-06, "loss": 0.5463, "step": 2973 }, { "epoch": 0.18110404043479586, "grad_norm": 1.0125665028932713, "learning_rate": 4.989488636759557e-06, "loss": 0.6167, "step": 2974 }, { "epoch": 0.18116493621167373, "grad_norm": 0.9579094349427676, "learning_rate": 4.989481325824687e-06, "loss": 0.5418, "step": 2975 }, { "epoch": 0.1812258319885516, "grad_norm": 1.0106035987253672, "learning_rate": 4.989474012353585e-06, "loss": 0.4979, "step": 2976 }, { "epoch": 0.18128672776542948, "grad_norm": 1.154605339076068, "learning_rate": 4.989466696346259e-06, "loss": 0.4749, "step": 2977 }, { "epoch": 0.18134762354230735, "grad_norm": 1.090846269537333, "learning_rate": 4.989459377802716e-06, "loss": 0.5102, "step": 2978 }, { "epoch": 0.18140851931918522, "grad_norm": 1.0058843530329094, "learning_rate": 4.989452056722964e-06, "loss": 0.5108, "step": 2979 }, { "epoch": 0.1814694150960631, "grad_norm": 0.9949185343541124, "learning_rate": 4.9894447331070095e-06, "loss": 0.5488, "step": 2980 }, { "epoch": 0.18153031087294097, "grad_norm": 0.9954191785928252, "learning_rate": 4.98943740695486e-06, "loss": 0.5436, "step": 2981 }, { "epoch": 0.18159120664981884, "grad_norm": 1.011321402897738, "learning_rate": 4.989430078266524e-06, "loss": 0.4717, "step": 2982 }, { "epoch": 0.18165210242669672, "grad_norm": 1.1229632897865807, "learning_rate": 4.989422747042009e-06, "loss": 0.5433, "step": 2983 }, { "epoch": 0.1817129982035746, "grad_norm": 1.0824794785539829, "learning_rate": 4.989415413281321e-06, "loss": 0.5461, "step": 2984 }, { "epoch": 0.18177389398045246, "grad_norm": 1.0405269607482674, "learning_rate": 4.989408076984469e-06, "loss": 0.4891, "step": 2985 }, { "epoch": 0.18183478975733033, "grad_norm": 1.0469529302489466, "learning_rate": 4.98940073815146e-06, "loss": 0.5714, "step": 2986 }, { "epoch": 0.1818956855342082, "grad_norm": 0.9033409570781581, "learning_rate": 4.9893933967823e-06, "loss": 0.4826, "step": 2987 }, { "epoch": 0.18195658131108608, "grad_norm": 1.1072790733934696, "learning_rate": 4.989386052876999e-06, "loss": 0.5228, "step": 2988 }, { "epoch": 0.18201747708796395, "grad_norm": 1.2311264900164969, "learning_rate": 4.989378706435563e-06, "loss": 0.4691, "step": 2989 }, { "epoch": 0.18207837286484183, "grad_norm": 1.115906797296451, "learning_rate": 4.9893713574579986e-06, "loss": 0.4839, "step": 2990 }, { "epoch": 0.1821392686417197, "grad_norm": 1.0902770730741886, "learning_rate": 4.989364005944315e-06, "loss": 0.5115, "step": 2991 }, { "epoch": 0.18220016441859757, "grad_norm": 1.0072942008216492, "learning_rate": 4.98935665189452e-06, "loss": 0.5429, "step": 2992 }, { "epoch": 0.18226106019547544, "grad_norm": 1.1077611774558933, "learning_rate": 4.9893492953086194e-06, "loss": 0.5188, "step": 2993 }, { "epoch": 0.18232195597235332, "grad_norm": 1.2007658249977458, "learning_rate": 4.989341936186621e-06, "loss": 0.4494, "step": 2994 }, { "epoch": 0.1823828517492312, "grad_norm": 1.1277211635797146, "learning_rate": 4.989334574528533e-06, "loss": 0.453, "step": 2995 }, { "epoch": 0.18244374752610906, "grad_norm": 1.0474446009502603, "learning_rate": 4.989327210334364e-06, "loss": 0.4918, "step": 2996 }, { "epoch": 0.18250464330298694, "grad_norm": 1.073641760455255, "learning_rate": 4.989319843604119e-06, "loss": 0.4617, "step": 2997 }, { "epoch": 0.1825655390798648, "grad_norm": 0.9924653507476717, "learning_rate": 4.989312474337808e-06, "loss": 0.5645, "step": 2998 }, { "epoch": 0.18262643485674268, "grad_norm": 1.11497310186271, "learning_rate": 4.989305102535436e-06, "loss": 0.5119, "step": 2999 }, { "epoch": 0.18268733063362055, "grad_norm": 1.0789060357063553, "learning_rate": 4.9892977281970114e-06, "loss": 0.5191, "step": 3000 }, { "epoch": 0.18274822641049843, "grad_norm": 1.0863450875460754, "learning_rate": 4.9892903513225434e-06, "loss": 0.4847, "step": 3001 }, { "epoch": 0.1828091221873763, "grad_norm": 1.1876218428985053, "learning_rate": 4.989282971912037e-06, "loss": 0.4852, "step": 3002 }, { "epoch": 0.18287001796425417, "grad_norm": 1.0650240049353972, "learning_rate": 4.989275589965501e-06, "loss": 0.5408, "step": 3003 }, { "epoch": 0.18293091374113205, "grad_norm": 0.9431314809732075, "learning_rate": 4.989268205482943e-06, "loss": 0.557, "step": 3004 }, { "epoch": 0.18299180951800992, "grad_norm": 1.0533349605804216, "learning_rate": 4.9892608184643695e-06, "loss": 0.5107, "step": 3005 }, { "epoch": 0.1830527052948878, "grad_norm": 1.0708328504203484, "learning_rate": 4.98925342890979e-06, "loss": 0.5094, "step": 3006 }, { "epoch": 0.18311360107176566, "grad_norm": 1.1018991035143113, "learning_rate": 4.989246036819211e-06, "loss": 0.512, "step": 3007 }, { "epoch": 0.18317449684864354, "grad_norm": 1.1015862906961902, "learning_rate": 4.989238642192639e-06, "loss": 0.477, "step": 3008 }, { "epoch": 0.1832353926255214, "grad_norm": 1.0224091797985666, "learning_rate": 4.9892312450300826e-06, "loss": 0.4809, "step": 3009 }, { "epoch": 0.18329628840239928, "grad_norm": 1.0298460027193963, "learning_rate": 4.98922384533155e-06, "loss": 0.5135, "step": 3010 }, { "epoch": 0.18335718417927716, "grad_norm": 1.0109119530362292, "learning_rate": 4.989216443097047e-06, "loss": 0.4788, "step": 3011 }, { "epoch": 0.18341807995615503, "grad_norm": 0.9957525961387114, "learning_rate": 4.9892090383265824e-06, "loss": 0.5582, "step": 3012 }, { "epoch": 0.1834789757330329, "grad_norm": 1.0487742648547973, "learning_rate": 4.989201631020163e-06, "loss": 0.5073, "step": 3013 }, { "epoch": 0.1835398715099108, "grad_norm": 1.0094848994284027, "learning_rate": 4.989194221177797e-06, "loss": 0.4649, "step": 3014 }, { "epoch": 0.18360076728678867, "grad_norm": 1.0831544403658697, "learning_rate": 4.989186808799492e-06, "loss": 0.5346, "step": 3015 }, { "epoch": 0.18366166306366655, "grad_norm": 1.083137002844984, "learning_rate": 4.989179393885254e-06, "loss": 0.5279, "step": 3016 }, { "epoch": 0.18372255884054442, "grad_norm": 0.9893901741364975, "learning_rate": 4.989171976435093e-06, "loss": 0.5521, "step": 3017 }, { "epoch": 0.1837834546174223, "grad_norm": 1.0750113109511321, "learning_rate": 4.989164556449016e-06, "loss": 0.4733, "step": 3018 }, { "epoch": 0.18384435039430017, "grad_norm": 1.132894492116815, "learning_rate": 4.989157133927028e-06, "loss": 0.5353, "step": 3019 }, { "epoch": 0.18390524617117804, "grad_norm": 1.03379568012454, "learning_rate": 4.98914970886914e-06, "loss": 0.4748, "step": 3020 }, { "epoch": 0.1839661419480559, "grad_norm": 1.0168347488630651, "learning_rate": 4.9891422812753575e-06, "loss": 0.4965, "step": 3021 }, { "epoch": 0.18402703772493378, "grad_norm": 1.0081124354040771, "learning_rate": 4.9891348511456885e-06, "loss": 0.474, "step": 3022 }, { "epoch": 0.18408793350181166, "grad_norm": 1.038522484221617, "learning_rate": 4.98912741848014e-06, "loss": 0.4785, "step": 3023 }, { "epoch": 0.18414882927868953, "grad_norm": 1.089000941554744, "learning_rate": 4.989119983278722e-06, "loss": 0.5265, "step": 3024 }, { "epoch": 0.1842097250555674, "grad_norm": 0.9390049464364594, "learning_rate": 4.989112545541438e-06, "loss": 0.5093, "step": 3025 }, { "epoch": 0.18427062083244528, "grad_norm": 1.0033229653127544, "learning_rate": 4.989105105268299e-06, "loss": 0.5488, "step": 3026 }, { "epoch": 0.18433151660932315, "grad_norm": 0.9758999528479504, "learning_rate": 4.9890976624593125e-06, "loss": 0.5276, "step": 3027 }, { "epoch": 0.18439241238620102, "grad_norm": 1.129049411522796, "learning_rate": 4.989090217114484e-06, "loss": 0.4616, "step": 3028 }, { "epoch": 0.1844533081630789, "grad_norm": 1.080919836895642, "learning_rate": 4.989082769233822e-06, "loss": 0.4492, "step": 3029 }, { "epoch": 0.18451420393995677, "grad_norm": 1.1170419624393422, "learning_rate": 4.989075318817335e-06, "loss": 0.484, "step": 3030 }, { "epoch": 0.18457509971683464, "grad_norm": 1.0493133470708642, "learning_rate": 4.989067865865029e-06, "loss": 0.5259, "step": 3031 }, { "epoch": 0.1846359954937125, "grad_norm": 1.033773250873303, "learning_rate": 4.989060410376912e-06, "loss": 0.4768, "step": 3032 }, { "epoch": 0.18469689127059039, "grad_norm": 1.0469351240877973, "learning_rate": 4.989052952352993e-06, "loss": 0.4933, "step": 3033 }, { "epoch": 0.18475778704746826, "grad_norm": 1.022132531180393, "learning_rate": 4.9890454917932785e-06, "loss": 0.4437, "step": 3034 }, { "epoch": 0.18481868282434613, "grad_norm": 1.0024032958595672, "learning_rate": 4.989038028697777e-06, "loss": 0.56, "step": 3035 }, { "epoch": 0.184879578601224, "grad_norm": 0.9571343636151619, "learning_rate": 4.989030563066494e-06, "loss": 0.5624, "step": 3036 }, { "epoch": 0.18494047437810188, "grad_norm": 1.1287206432652925, "learning_rate": 4.989023094899438e-06, "loss": 0.4436, "step": 3037 }, { "epoch": 0.18500137015497975, "grad_norm": 1.1416085150434292, "learning_rate": 4.989015624196618e-06, "loss": 0.5071, "step": 3038 }, { "epoch": 0.18506226593185762, "grad_norm": 1.0910592973325148, "learning_rate": 4.98900815095804e-06, "loss": 0.46, "step": 3039 }, { "epoch": 0.1851231617087355, "grad_norm": 1.0214799300327766, "learning_rate": 4.989000675183713e-06, "loss": 0.4776, "step": 3040 }, { "epoch": 0.18518405748561337, "grad_norm": 1.0598411235948164, "learning_rate": 4.988993196873644e-06, "loss": 0.4375, "step": 3041 }, { "epoch": 0.18524495326249124, "grad_norm": 1.171630937016776, "learning_rate": 4.98898571602784e-06, "loss": 0.4628, "step": 3042 }, { "epoch": 0.1853058490393691, "grad_norm": 1.0330863070229936, "learning_rate": 4.988978232646309e-06, "loss": 0.4895, "step": 3043 }, { "epoch": 0.185366744816247, "grad_norm": 1.1238594874082646, "learning_rate": 4.988970746729059e-06, "loss": 0.5591, "step": 3044 }, { "epoch": 0.18542764059312486, "grad_norm": 1.0619893801895828, "learning_rate": 4.988963258276097e-06, "loss": 0.548, "step": 3045 }, { "epoch": 0.18548853637000273, "grad_norm": 1.0467144166356988, "learning_rate": 4.988955767287431e-06, "loss": 0.4769, "step": 3046 }, { "epoch": 0.1855494321468806, "grad_norm": 1.106558556639032, "learning_rate": 4.988948273763068e-06, "loss": 0.4904, "step": 3047 }, { "epoch": 0.18561032792375848, "grad_norm": 1.0999431464760059, "learning_rate": 4.988940777703018e-06, "loss": 0.491, "step": 3048 }, { "epoch": 0.18567122370063635, "grad_norm": 1.1492163867976506, "learning_rate": 4.988933279107285e-06, "loss": 0.479, "step": 3049 }, { "epoch": 0.18573211947751422, "grad_norm": 1.1193422700592082, "learning_rate": 4.988925777975879e-06, "loss": 0.5256, "step": 3050 }, { "epoch": 0.1857930152543921, "grad_norm": 1.1203731149713823, "learning_rate": 4.9889182743088085e-06, "loss": 0.4957, "step": 3051 }, { "epoch": 0.18585391103126997, "grad_norm": 0.9775794168476334, "learning_rate": 4.988910768106079e-06, "loss": 0.5308, "step": 3052 }, { "epoch": 0.18591480680814784, "grad_norm": 1.0460863436336703, "learning_rate": 4.9889032593676986e-06, "loss": 0.4671, "step": 3053 }, { "epoch": 0.18597570258502572, "grad_norm": 1.1170985164460292, "learning_rate": 4.988895748093675e-06, "loss": 0.495, "step": 3054 }, { "epoch": 0.18603659836190362, "grad_norm": 1.0083468979837484, "learning_rate": 4.988888234284017e-06, "loss": 0.486, "step": 3055 }, { "epoch": 0.1860974941387815, "grad_norm": 1.045733163478923, "learning_rate": 4.988880717938731e-06, "loss": 0.4743, "step": 3056 }, { "epoch": 0.18615838991565936, "grad_norm": 1.0457132333939045, "learning_rate": 4.988873199057826e-06, "loss": 0.4576, "step": 3057 }, { "epoch": 0.18621928569253723, "grad_norm": 0.9407185743200098, "learning_rate": 4.988865677641307e-06, "loss": 0.5319, "step": 3058 }, { "epoch": 0.1862801814694151, "grad_norm": 1.0415034843698698, "learning_rate": 4.988858153689184e-06, "loss": 0.5215, "step": 3059 }, { "epoch": 0.18634107724629298, "grad_norm": 1.012897371019878, "learning_rate": 4.988850627201465e-06, "loss": 0.5539, "step": 3060 }, { "epoch": 0.18640197302317085, "grad_norm": 1.0442162769630015, "learning_rate": 4.9888430981781554e-06, "loss": 0.4976, "step": 3061 }, { "epoch": 0.18646286880004873, "grad_norm": 0.9793823382720429, "learning_rate": 4.988835566619266e-06, "loss": 0.5533, "step": 3062 }, { "epoch": 0.1865237645769266, "grad_norm": 1.1192364806004569, "learning_rate": 4.988828032524801e-06, "loss": 0.4884, "step": 3063 }, { "epoch": 0.18658466035380447, "grad_norm": 1.0169373219089992, "learning_rate": 4.98882049589477e-06, "loss": 0.5279, "step": 3064 }, { "epoch": 0.18664555613068234, "grad_norm": 1.0506242110997424, "learning_rate": 4.9888129567291806e-06, "loss": 0.4899, "step": 3065 }, { "epoch": 0.18670645190756022, "grad_norm": 1.0578632474043266, "learning_rate": 4.9888054150280404e-06, "loss": 0.5536, "step": 3066 }, { "epoch": 0.1867673476844381, "grad_norm": 1.0370030937209052, "learning_rate": 4.988797870791357e-06, "loss": 0.4716, "step": 3067 }, { "epoch": 0.18682824346131596, "grad_norm": 1.1605503819769973, "learning_rate": 4.988790324019138e-06, "loss": 0.5399, "step": 3068 }, { "epoch": 0.18688913923819384, "grad_norm": 1.0896355402098847, "learning_rate": 4.988782774711392e-06, "loss": 0.5358, "step": 3069 }, { "epoch": 0.1869500350150717, "grad_norm": 1.0306848895112284, "learning_rate": 4.988775222868124e-06, "loss": 0.5177, "step": 3070 }, { "epoch": 0.18701093079194958, "grad_norm": 1.0379050361482678, "learning_rate": 4.988767668489345e-06, "loss": 0.5255, "step": 3071 }, { "epoch": 0.18707182656882745, "grad_norm": 1.1252450947885337, "learning_rate": 4.98876011157506e-06, "loss": 0.4696, "step": 3072 }, { "epoch": 0.18713272234570533, "grad_norm": 1.0313780538029533, "learning_rate": 4.98875255212528e-06, "loss": 0.4762, "step": 3073 }, { "epoch": 0.1871936181225832, "grad_norm": 0.9040937955286812, "learning_rate": 4.98874499014001e-06, "loss": 0.5599, "step": 3074 }, { "epoch": 0.18725451389946107, "grad_norm": 1.0773671201889772, "learning_rate": 4.988737425619256e-06, "loss": 0.5287, "step": 3075 }, { "epoch": 0.18731540967633895, "grad_norm": 1.1293866134666655, "learning_rate": 4.98872985856303e-06, "loss": 0.5434, "step": 3076 }, { "epoch": 0.18737630545321682, "grad_norm": 1.0291282573916793, "learning_rate": 4.988722288971338e-06, "loss": 0.5629, "step": 3077 }, { "epoch": 0.1874372012300947, "grad_norm": 1.110238853882906, "learning_rate": 4.9887147168441876e-06, "loss": 0.4752, "step": 3078 }, { "epoch": 0.18749809700697256, "grad_norm": 1.0981776575351196, "learning_rate": 4.988707142181586e-06, "loss": 0.4702, "step": 3079 }, { "epoch": 0.18755899278385044, "grad_norm": 0.9240959712827479, "learning_rate": 4.9886995649835415e-06, "loss": 0.4986, "step": 3080 }, { "epoch": 0.1876198885607283, "grad_norm": 1.1673560251932178, "learning_rate": 4.988691985250061e-06, "loss": 0.534, "step": 3081 }, { "epoch": 0.18768078433760618, "grad_norm": 1.0855676509897174, "learning_rate": 4.988684402981153e-06, "loss": 0.4757, "step": 3082 }, { "epoch": 0.18774168011448406, "grad_norm": 1.0299972609954193, "learning_rate": 4.9886768181768255e-06, "loss": 0.5139, "step": 3083 }, { "epoch": 0.18780257589136193, "grad_norm": 1.0445777009179282, "learning_rate": 4.988669230837085e-06, "loss": 0.4757, "step": 3084 }, { "epoch": 0.1878634716682398, "grad_norm": 1.0583623251895278, "learning_rate": 4.9886616409619405e-06, "loss": 0.5244, "step": 3085 }, { "epoch": 0.18792436744511767, "grad_norm": 1.0838038439345008, "learning_rate": 4.988654048551399e-06, "loss": 0.5063, "step": 3086 }, { "epoch": 0.18798526322199555, "grad_norm": 1.0905194937578422, "learning_rate": 4.988646453605469e-06, "loss": 0.5512, "step": 3087 }, { "epoch": 0.18804615899887342, "grad_norm": 1.0450965462896307, "learning_rate": 4.9886388561241576e-06, "loss": 0.5263, "step": 3088 }, { "epoch": 0.1881070547757513, "grad_norm": 1.0723681318009226, "learning_rate": 4.988631256107473e-06, "loss": 0.5751, "step": 3089 }, { "epoch": 0.18816795055262917, "grad_norm": 1.018079991461013, "learning_rate": 4.9886236535554225e-06, "loss": 0.4786, "step": 3090 }, { "epoch": 0.18822884632950704, "grad_norm": 1.163687511763957, "learning_rate": 4.988616048468013e-06, "loss": 0.4306, "step": 3091 }, { "epoch": 0.1882897421063849, "grad_norm": 0.9781867703195624, "learning_rate": 4.988608440845254e-06, "loss": 0.5342, "step": 3092 }, { "epoch": 0.18835063788326278, "grad_norm": 1.062754565652498, "learning_rate": 4.988600830687153e-06, "loss": 0.5541, "step": 3093 }, { "epoch": 0.18841153366014066, "grad_norm": 1.0349173184405176, "learning_rate": 4.988593217993716e-06, "loss": 0.5653, "step": 3094 }, { "epoch": 0.18847242943701853, "grad_norm": 1.0226786913123496, "learning_rate": 4.988585602764953e-06, "loss": 0.5388, "step": 3095 }, { "epoch": 0.18853332521389643, "grad_norm": 0.9925608451651292, "learning_rate": 4.98857798500087e-06, "loss": 0.5465, "step": 3096 }, { "epoch": 0.1885942209907743, "grad_norm": 1.0194618590132898, "learning_rate": 4.988570364701476e-06, "loss": 0.5374, "step": 3097 }, { "epoch": 0.18865511676765218, "grad_norm": 1.0097926064579354, "learning_rate": 4.9885627418667775e-06, "loss": 0.513, "step": 3098 }, { "epoch": 0.18871601254453005, "grad_norm": 1.0437458765460037, "learning_rate": 4.988555116496784e-06, "loss": 0.5161, "step": 3099 }, { "epoch": 0.18877690832140792, "grad_norm": 1.0781017165729385, "learning_rate": 4.988547488591502e-06, "loss": 0.4861, "step": 3100 }, { "epoch": 0.1888378040982858, "grad_norm": 1.0800027826757872, "learning_rate": 4.988539858150939e-06, "loss": 0.4468, "step": 3101 }, { "epoch": 0.18889869987516367, "grad_norm": 0.9314685023814835, "learning_rate": 4.988532225175103e-06, "loss": 0.566, "step": 3102 }, { "epoch": 0.18895959565204154, "grad_norm": 1.062574987024794, "learning_rate": 4.9885245896640034e-06, "loss": 0.4794, "step": 3103 }, { "epoch": 0.1890204914289194, "grad_norm": 0.9310620848859642, "learning_rate": 4.988516951617645e-06, "loss": 0.6073, "step": 3104 }, { "epoch": 0.18908138720579729, "grad_norm": 1.0303269523485012, "learning_rate": 4.98850931103604e-06, "loss": 0.4856, "step": 3105 }, { "epoch": 0.18914228298267516, "grad_norm": 1.1065970629230184, "learning_rate": 4.988501667919191e-06, "loss": 0.4813, "step": 3106 }, { "epoch": 0.18920317875955303, "grad_norm": 1.047502282902639, "learning_rate": 4.98849402226711e-06, "loss": 0.5464, "step": 3107 }, { "epoch": 0.1892640745364309, "grad_norm": 1.157059487833186, "learning_rate": 4.988486374079802e-06, "loss": 0.4938, "step": 3108 }, { "epoch": 0.18932497031330878, "grad_norm": 1.0013730434097707, "learning_rate": 4.988478723357275e-06, "loss": 0.4469, "step": 3109 }, { "epoch": 0.18938586609018665, "grad_norm": 1.1150099675759806, "learning_rate": 4.988471070099538e-06, "loss": 0.4761, "step": 3110 }, { "epoch": 0.18944676186706452, "grad_norm": 1.1314015736547756, "learning_rate": 4.9884634143066e-06, "loss": 0.4904, "step": 3111 }, { "epoch": 0.1895076576439424, "grad_norm": 1.0373527964458378, "learning_rate": 4.988455755978466e-06, "loss": 0.5333, "step": 3112 }, { "epoch": 0.18956855342082027, "grad_norm": 1.1539633642853337, "learning_rate": 4.988448095115145e-06, "loss": 0.4647, "step": 3113 }, { "epoch": 0.18962944919769814, "grad_norm": 0.9830996599738232, "learning_rate": 4.988440431716644e-06, "loss": 0.5837, "step": 3114 }, { "epoch": 0.18969034497457601, "grad_norm": 0.9510612537312464, "learning_rate": 4.988432765782973e-06, "loss": 0.5458, "step": 3115 }, { "epoch": 0.1897512407514539, "grad_norm": 1.1034487721754829, "learning_rate": 4.9884250973141385e-06, "loss": 0.5219, "step": 3116 }, { "epoch": 0.18981213652833176, "grad_norm": 1.0320066124665832, "learning_rate": 4.988417426310148e-06, "loss": 0.559, "step": 3117 }, { "epoch": 0.18987303230520963, "grad_norm": 1.0497695053984155, "learning_rate": 4.988409752771009e-06, "loss": 0.5278, "step": 3118 }, { "epoch": 0.1899339280820875, "grad_norm": 1.083058047420587, "learning_rate": 4.98840207669673e-06, "loss": 0.4975, "step": 3119 }, { "epoch": 0.18999482385896538, "grad_norm": 1.0587877532811245, "learning_rate": 4.9883943980873195e-06, "loss": 0.5286, "step": 3120 }, { "epoch": 0.19005571963584325, "grad_norm": 1.1183060151104027, "learning_rate": 4.988386716942783e-06, "loss": 0.4597, "step": 3121 }, { "epoch": 0.19011661541272112, "grad_norm": 1.0190735284735564, "learning_rate": 4.988379033263131e-06, "loss": 0.4644, "step": 3122 }, { "epoch": 0.190177511189599, "grad_norm": 1.0653990685650443, "learning_rate": 4.9883713470483706e-06, "loss": 0.4993, "step": 3123 }, { "epoch": 0.19023840696647687, "grad_norm": 1.079521542958363, "learning_rate": 4.9883636582985084e-06, "loss": 0.4744, "step": 3124 }, { "epoch": 0.19029930274335474, "grad_norm": 1.0747730145974308, "learning_rate": 4.988355967013553e-06, "loss": 0.5, "step": 3125 }, { "epoch": 0.19036019852023262, "grad_norm": 1.1653140083068911, "learning_rate": 4.9883482731935126e-06, "loss": 0.4934, "step": 3126 }, { "epoch": 0.1904210942971105, "grad_norm": 1.2360558050270674, "learning_rate": 4.988340576838394e-06, "loss": 0.4598, "step": 3127 }, { "epoch": 0.19048199007398836, "grad_norm": 1.0610528544100235, "learning_rate": 4.988332877948207e-06, "loss": 0.4588, "step": 3128 }, { "epoch": 0.19054288585086623, "grad_norm": 0.9975550317777437, "learning_rate": 4.988325176522957e-06, "loss": 0.5461, "step": 3129 }, { "epoch": 0.1906037816277441, "grad_norm": 1.0262150676831667, "learning_rate": 4.9883174725626535e-06, "loss": 0.5511, "step": 3130 }, { "epoch": 0.19066467740462198, "grad_norm": 1.1730452700525547, "learning_rate": 4.988309766067304e-06, "loss": 0.4806, "step": 3131 }, { "epoch": 0.19072557318149985, "grad_norm": 0.9802355486311569, "learning_rate": 4.9883020570369166e-06, "loss": 0.543, "step": 3132 }, { "epoch": 0.19078646895837773, "grad_norm": 1.1761761779364406, "learning_rate": 4.988294345471499e-06, "loss": 0.5024, "step": 3133 }, { "epoch": 0.1908473647352556, "grad_norm": 1.029415293917612, "learning_rate": 4.988286631371058e-06, "loss": 0.5242, "step": 3134 }, { "epoch": 0.19090826051213347, "grad_norm": 1.036146986752756, "learning_rate": 4.988278914735603e-06, "loss": 0.4872, "step": 3135 }, { "epoch": 0.19096915628901134, "grad_norm": 1.078030684815678, "learning_rate": 4.9882711955651395e-06, "loss": 0.4844, "step": 3136 }, { "epoch": 0.19103005206588924, "grad_norm": 1.1585890849552485, "learning_rate": 4.988263473859679e-06, "loss": 0.4719, "step": 3137 }, { "epoch": 0.19109094784276712, "grad_norm": 1.053584530256567, "learning_rate": 4.9882557496192265e-06, "loss": 0.5573, "step": 3138 }, { "epoch": 0.191151843619645, "grad_norm": 1.0411826106098117, "learning_rate": 4.988248022843791e-06, "loss": 0.5103, "step": 3139 }, { "epoch": 0.19121273939652286, "grad_norm": 1.0147613710147703, "learning_rate": 4.98824029353338e-06, "loss": 0.5632, "step": 3140 }, { "epoch": 0.19127363517340074, "grad_norm": 1.0863178672672789, "learning_rate": 4.9882325616880015e-06, "loss": 0.559, "step": 3141 }, { "epoch": 0.1913345309502786, "grad_norm": 1.0987538450172691, "learning_rate": 4.988224827307664e-06, "loss": 0.5052, "step": 3142 }, { "epoch": 0.19139542672715648, "grad_norm": 1.0325870974989728, "learning_rate": 4.988217090392374e-06, "loss": 0.5316, "step": 3143 }, { "epoch": 0.19145632250403435, "grad_norm": 1.0436837763778004, "learning_rate": 4.9882093509421405e-06, "loss": 0.5006, "step": 3144 }, { "epoch": 0.19151721828091223, "grad_norm": 1.1022191327661157, "learning_rate": 4.988201608956971e-06, "loss": 0.4897, "step": 3145 }, { "epoch": 0.1915781140577901, "grad_norm": 1.0303785080004753, "learning_rate": 4.988193864436873e-06, "loss": 0.505, "step": 3146 }, { "epoch": 0.19163900983466797, "grad_norm": 1.1085962048382996, "learning_rate": 4.9881861173818555e-06, "loss": 0.5175, "step": 3147 }, { "epoch": 0.19169990561154585, "grad_norm": 1.0359501748092508, "learning_rate": 4.988178367791926e-06, "loss": 0.4706, "step": 3148 }, { "epoch": 0.19176080138842372, "grad_norm": 0.9227329846658368, "learning_rate": 4.9881706156670916e-06, "loss": 0.513, "step": 3149 }, { "epoch": 0.1918216971653016, "grad_norm": 0.9825607415512168, "learning_rate": 4.9881628610073605e-06, "loss": 0.5088, "step": 3150 }, { "epoch": 0.19188259294217946, "grad_norm": 1.0927358120671489, "learning_rate": 4.988155103812741e-06, "loss": 0.5016, "step": 3151 }, { "epoch": 0.19194348871905734, "grad_norm": 1.0993913066165892, "learning_rate": 4.988147344083241e-06, "loss": 0.4893, "step": 3152 }, { "epoch": 0.1920043844959352, "grad_norm": 1.0646958458723192, "learning_rate": 4.9881395818188675e-06, "loss": 0.4796, "step": 3153 }, { "epoch": 0.19206528027281308, "grad_norm": 1.038909682278899, "learning_rate": 4.98813181701963e-06, "loss": 0.4707, "step": 3154 }, { "epoch": 0.19212617604969096, "grad_norm": 1.105549053378795, "learning_rate": 4.988124049685535e-06, "loss": 0.4841, "step": 3155 }, { "epoch": 0.19218707182656883, "grad_norm": 1.025482810721373, "learning_rate": 4.9881162798165914e-06, "loss": 0.4628, "step": 3156 }, { "epoch": 0.1922479676034467, "grad_norm": 1.0514034982120968, "learning_rate": 4.988108507412807e-06, "loss": 0.5354, "step": 3157 }, { "epoch": 0.19230886338032457, "grad_norm": 1.0588941804950456, "learning_rate": 4.988100732474188e-06, "loss": 0.5514, "step": 3158 }, { "epoch": 0.19236975915720245, "grad_norm": 1.0023212688980707, "learning_rate": 4.988092955000744e-06, "loss": 0.5722, "step": 3159 }, { "epoch": 0.19243065493408032, "grad_norm": 1.0677189036951786, "learning_rate": 4.988085174992483e-06, "loss": 0.5503, "step": 3160 }, { "epoch": 0.1924915507109582, "grad_norm": 1.1897781092466693, "learning_rate": 4.988077392449413e-06, "loss": 0.5152, "step": 3161 }, { "epoch": 0.19255244648783607, "grad_norm": 1.0736021221191296, "learning_rate": 4.988069607371542e-06, "loss": 0.5034, "step": 3162 }, { "epoch": 0.19261334226471394, "grad_norm": 1.023592846427027, "learning_rate": 4.988061819758876e-06, "loss": 0.5173, "step": 3163 }, { "epoch": 0.1926742380415918, "grad_norm": 1.1918268470612021, "learning_rate": 4.9880540296114245e-06, "loss": 0.491, "step": 3164 }, { "epoch": 0.19273513381846968, "grad_norm": 1.0811542857785914, "learning_rate": 4.988046236929196e-06, "loss": 0.4888, "step": 3165 }, { "epoch": 0.19279602959534756, "grad_norm": 1.0371334081809283, "learning_rate": 4.9880384417121975e-06, "loss": 0.4849, "step": 3166 }, { "epoch": 0.19285692537222543, "grad_norm": 1.0620798484467922, "learning_rate": 4.988030643960437e-06, "loss": 0.484, "step": 3167 }, { "epoch": 0.1929178211491033, "grad_norm": 1.020650753693526, "learning_rate": 4.9880228436739234e-06, "loss": 0.4841, "step": 3168 }, { "epoch": 0.19297871692598118, "grad_norm": 0.9957039716568098, "learning_rate": 4.988015040852663e-06, "loss": 0.5475, "step": 3169 }, { "epoch": 0.19303961270285905, "grad_norm": 1.1303408708347111, "learning_rate": 4.988007235496665e-06, "loss": 0.5083, "step": 3170 }, { "epoch": 0.19310050847973692, "grad_norm": 1.0073877406230685, "learning_rate": 4.987999427605936e-06, "loss": 0.4947, "step": 3171 }, { "epoch": 0.1931614042566148, "grad_norm": 1.1028715398145161, "learning_rate": 4.987991617180486e-06, "loss": 0.5387, "step": 3172 }, { "epoch": 0.19322230003349267, "grad_norm": 1.1315143459191934, "learning_rate": 4.987983804220321e-06, "loss": 0.5107, "step": 3173 }, { "epoch": 0.19328319581037054, "grad_norm": 1.0332452839090394, "learning_rate": 4.98797598872545e-06, "loss": 0.5467, "step": 3174 }, { "epoch": 0.1933440915872484, "grad_norm": 1.054755810374611, "learning_rate": 4.9879681706958815e-06, "loss": 0.4783, "step": 3175 }, { "epoch": 0.19340498736412629, "grad_norm": 1.1216639429077573, "learning_rate": 4.987960350131622e-06, "loss": 0.4728, "step": 3176 }, { "epoch": 0.19346588314100416, "grad_norm": 1.0781769963575571, "learning_rate": 4.987952527032681e-06, "loss": 0.5095, "step": 3177 }, { "epoch": 0.19352677891788206, "grad_norm": 1.011018969240438, "learning_rate": 4.987944701399065e-06, "loss": 0.4925, "step": 3178 }, { "epoch": 0.19358767469475993, "grad_norm": 1.0065626894473172, "learning_rate": 4.987936873230783e-06, "loss": 0.507, "step": 3179 }, { "epoch": 0.1936485704716378, "grad_norm": 1.1183723990474843, "learning_rate": 4.987929042527842e-06, "loss": 0.4946, "step": 3180 }, { "epoch": 0.19370946624851568, "grad_norm": 1.0779075195242143, "learning_rate": 4.987921209290251e-06, "loss": 0.4723, "step": 3181 }, { "epoch": 0.19377036202539355, "grad_norm": 1.0714898262510395, "learning_rate": 4.9879133735180185e-06, "loss": 0.4979, "step": 3182 }, { "epoch": 0.19383125780227142, "grad_norm": 1.019113870456135, "learning_rate": 4.9879055352111505e-06, "loss": 0.4565, "step": 3183 }, { "epoch": 0.1938921535791493, "grad_norm": 1.0106802990403352, "learning_rate": 4.987897694369657e-06, "loss": 0.4722, "step": 3184 }, { "epoch": 0.19395304935602717, "grad_norm": 1.1867391149736086, "learning_rate": 4.987889850993544e-06, "loss": 0.53, "step": 3185 }, { "epoch": 0.19401394513290504, "grad_norm": 1.179822480692507, "learning_rate": 4.987882005082821e-06, "loss": 0.5782, "step": 3186 }, { "epoch": 0.19407484090978291, "grad_norm": 1.113594489933749, "learning_rate": 4.987874156637496e-06, "loss": 0.5135, "step": 3187 }, { "epoch": 0.1941357366866608, "grad_norm": 1.0123597016044323, "learning_rate": 4.987866305657576e-06, "loss": 0.5036, "step": 3188 }, { "epoch": 0.19419663246353866, "grad_norm": 0.9990215332766489, "learning_rate": 4.9878584521430696e-06, "loss": 0.5208, "step": 3189 }, { "epoch": 0.19425752824041653, "grad_norm": 1.0188402684799958, "learning_rate": 4.987850596093985e-06, "loss": 0.4787, "step": 3190 }, { "epoch": 0.1943184240172944, "grad_norm": 1.0556735327733713, "learning_rate": 4.987842737510329e-06, "loss": 0.5044, "step": 3191 }, { "epoch": 0.19437931979417228, "grad_norm": 0.9917501303232851, "learning_rate": 4.987834876392112e-06, "loss": 0.5365, "step": 3192 }, { "epoch": 0.19444021557105015, "grad_norm": 1.1099584227472574, "learning_rate": 4.987827012739339e-06, "loss": 0.5065, "step": 3193 }, { "epoch": 0.19450111134792802, "grad_norm": 1.1198590335615046, "learning_rate": 4.987819146552021e-06, "loss": 0.5225, "step": 3194 }, { "epoch": 0.1945620071248059, "grad_norm": 1.0709168771645194, "learning_rate": 4.9878112778301635e-06, "loss": 0.5158, "step": 3195 }, { "epoch": 0.19462290290168377, "grad_norm": 1.1093254576762202, "learning_rate": 4.9878034065737765e-06, "loss": 0.5036, "step": 3196 }, { "epoch": 0.19468379867856164, "grad_norm": 1.0809266317558084, "learning_rate": 4.987795532782866e-06, "loss": 0.449, "step": 3197 }, { "epoch": 0.19474469445543952, "grad_norm": 1.127964626464581, "learning_rate": 4.987787656457443e-06, "loss": 0.5623, "step": 3198 }, { "epoch": 0.1948055902323174, "grad_norm": 1.0776356894319874, "learning_rate": 4.987779777597511e-06, "loss": 0.53, "step": 3199 }, { "epoch": 0.19486648600919526, "grad_norm": 1.0444538601826825, "learning_rate": 4.987771896203083e-06, "loss": 0.5808, "step": 3200 }, { "epoch": 0.19492738178607313, "grad_norm": 1.0282497304504314, "learning_rate": 4.987764012274164e-06, "loss": 0.5859, "step": 3201 }, { "epoch": 0.194988277562951, "grad_norm": 0.9318011969766758, "learning_rate": 4.987756125810762e-06, "loss": 0.6012, "step": 3202 }, { "epoch": 0.19504917333982888, "grad_norm": 0.9649537481630093, "learning_rate": 4.987748236812886e-06, "loss": 0.5001, "step": 3203 }, { "epoch": 0.19511006911670675, "grad_norm": 1.0468080889407116, "learning_rate": 4.987740345280545e-06, "loss": 0.4992, "step": 3204 }, { "epoch": 0.19517096489358463, "grad_norm": 0.9910361032193095, "learning_rate": 4.987732451213745e-06, "loss": 0.5306, "step": 3205 }, { "epoch": 0.1952318606704625, "grad_norm": 1.0470719515921814, "learning_rate": 4.987724554612494e-06, "loss": 0.5152, "step": 3206 }, { "epoch": 0.19529275644734037, "grad_norm": 1.024158393063088, "learning_rate": 4.987716655476802e-06, "loss": 0.5597, "step": 3207 }, { "epoch": 0.19535365222421824, "grad_norm": 1.0758850720680124, "learning_rate": 4.987708753806676e-06, "loss": 0.5469, "step": 3208 }, { "epoch": 0.19541454800109612, "grad_norm": 1.0604326777606379, "learning_rate": 4.987700849602124e-06, "loss": 0.4863, "step": 3209 }, { "epoch": 0.195475443777974, "grad_norm": 1.189213039268939, "learning_rate": 4.987692942863153e-06, "loss": 0.5077, "step": 3210 }, { "epoch": 0.19553633955485186, "grad_norm": 1.1369208137103608, "learning_rate": 4.9876850335897746e-06, "loss": 0.5088, "step": 3211 }, { "epoch": 0.19559723533172974, "grad_norm": 0.9957412994886607, "learning_rate": 4.987677121781993e-06, "loss": 0.5545, "step": 3212 }, { "epoch": 0.1956581311086076, "grad_norm": 1.0648796716483842, "learning_rate": 4.987669207439817e-06, "loss": 0.5798, "step": 3213 }, { "epoch": 0.19571902688548548, "grad_norm": 1.0579624207967906, "learning_rate": 4.987661290563256e-06, "loss": 0.5222, "step": 3214 }, { "epoch": 0.19577992266236335, "grad_norm": 1.098008610553635, "learning_rate": 4.987653371152318e-06, "loss": 0.4694, "step": 3215 }, { "epoch": 0.19584081843924123, "grad_norm": 0.9915189296561396, "learning_rate": 4.987645449207009e-06, "loss": 0.5393, "step": 3216 }, { "epoch": 0.1959017142161191, "grad_norm": 0.9755372838196928, "learning_rate": 4.98763752472734e-06, "loss": 0.5299, "step": 3217 }, { "epoch": 0.19596260999299697, "grad_norm": 0.9608275593306992, "learning_rate": 4.987629597713317e-06, "loss": 0.4954, "step": 3218 }, { "epoch": 0.19602350576987487, "grad_norm": 1.0009387703745733, "learning_rate": 4.987621668164948e-06, "loss": 0.551, "step": 3219 }, { "epoch": 0.19608440154675275, "grad_norm": 1.004700234895283, "learning_rate": 4.987613736082243e-06, "loss": 0.515, "step": 3220 }, { "epoch": 0.19614529732363062, "grad_norm": 1.0240680083799376, "learning_rate": 4.987605801465208e-06, "loss": 0.5023, "step": 3221 }, { "epoch": 0.1962061931005085, "grad_norm": 1.1353277963427204, "learning_rate": 4.987597864313852e-06, "loss": 0.4733, "step": 3222 }, { "epoch": 0.19626708887738636, "grad_norm": 1.1323368548481039, "learning_rate": 4.987589924628183e-06, "loss": 0.4665, "step": 3223 }, { "epoch": 0.19632798465426424, "grad_norm": 1.0083857958056308, "learning_rate": 4.987581982408209e-06, "loss": 0.4698, "step": 3224 }, { "epoch": 0.1963888804311421, "grad_norm": 1.101876561519886, "learning_rate": 4.987574037653939e-06, "loss": 0.5201, "step": 3225 }, { "epoch": 0.19644977620801998, "grad_norm": 1.0385357724438522, "learning_rate": 4.98756609036538e-06, "loss": 0.4949, "step": 3226 }, { "epoch": 0.19651067198489786, "grad_norm": 1.0989170185838546, "learning_rate": 4.98755814054254e-06, "loss": 0.4988, "step": 3227 }, { "epoch": 0.19657156776177573, "grad_norm": 1.0997438874409085, "learning_rate": 4.987550188185428e-06, "loss": 0.4942, "step": 3228 }, { "epoch": 0.1966324635386536, "grad_norm": 1.0221703705411995, "learning_rate": 4.987542233294051e-06, "loss": 0.4727, "step": 3229 }, { "epoch": 0.19669335931553147, "grad_norm": 1.1484923993384024, "learning_rate": 4.987534275868418e-06, "loss": 0.5414, "step": 3230 }, { "epoch": 0.19675425509240935, "grad_norm": 1.1288849628210769, "learning_rate": 4.987526315908536e-06, "loss": 0.4611, "step": 3231 }, { "epoch": 0.19681515086928722, "grad_norm": 1.049018543826783, "learning_rate": 4.987518353414415e-06, "loss": 0.5222, "step": 3232 }, { "epoch": 0.1968760466461651, "grad_norm": 1.0882157825362022, "learning_rate": 4.9875103883860625e-06, "loss": 0.3788, "step": 3233 }, { "epoch": 0.19693694242304297, "grad_norm": 1.03730123452156, "learning_rate": 4.9875024208234846e-06, "loss": 0.4871, "step": 3234 }, { "epoch": 0.19699783819992084, "grad_norm": 1.1340140823314577, "learning_rate": 4.987494450726692e-06, "loss": 0.4752, "step": 3235 }, { "epoch": 0.1970587339767987, "grad_norm": 1.0889020381029029, "learning_rate": 4.987486478095691e-06, "loss": 0.4133, "step": 3236 }, { "epoch": 0.19711962975367658, "grad_norm": 1.0835730306400124, "learning_rate": 4.987478502930491e-06, "loss": 0.5124, "step": 3237 }, { "epoch": 0.19718052553055446, "grad_norm": 1.011117593475404, "learning_rate": 4.987470525231099e-06, "loss": 0.5564, "step": 3238 }, { "epoch": 0.19724142130743233, "grad_norm": 1.051143300157674, "learning_rate": 4.987462544997525e-06, "loss": 0.5737, "step": 3239 }, { "epoch": 0.1973023170843102, "grad_norm": 1.1615105684506082, "learning_rate": 4.987454562229776e-06, "loss": 0.4485, "step": 3240 }, { "epoch": 0.19736321286118808, "grad_norm": 0.9894589031397166, "learning_rate": 4.987446576927858e-06, "loss": 0.51, "step": 3241 }, { "epoch": 0.19742410863806595, "grad_norm": 1.0264900609834389, "learning_rate": 4.987438589091783e-06, "loss": 0.4957, "step": 3242 }, { "epoch": 0.19748500441494382, "grad_norm": 1.08499568341251, "learning_rate": 4.987430598721556e-06, "loss": 0.5249, "step": 3243 }, { "epoch": 0.1975459001918217, "grad_norm": 1.0583340324508366, "learning_rate": 4.987422605817187e-06, "loss": 0.6071, "step": 3244 }, { "epoch": 0.19760679596869957, "grad_norm": 0.9490820205758849, "learning_rate": 4.987414610378683e-06, "loss": 0.5929, "step": 3245 }, { "epoch": 0.19766769174557744, "grad_norm": 0.9525404416991204, "learning_rate": 4.987406612406054e-06, "loss": 0.5539, "step": 3246 }, { "epoch": 0.1977285875224553, "grad_norm": 1.12214901266567, "learning_rate": 4.987398611899306e-06, "loss": 0.5092, "step": 3247 }, { "epoch": 0.19778948329933319, "grad_norm": 1.1169872755403059, "learning_rate": 4.987390608858448e-06, "loss": 0.4791, "step": 3248 }, { "epoch": 0.19785037907621106, "grad_norm": 1.0713775062892696, "learning_rate": 4.987382603283488e-06, "loss": 0.4942, "step": 3249 }, { "epoch": 0.19791127485308893, "grad_norm": 0.9445533732674111, "learning_rate": 4.987374595174435e-06, "loss": 0.5835, "step": 3250 }, { "epoch": 0.1979721706299668, "grad_norm": 1.0836758146828003, "learning_rate": 4.987366584531296e-06, "loss": 0.5126, "step": 3251 }, { "epoch": 0.19803306640684468, "grad_norm": 1.0338484031463975, "learning_rate": 4.98735857135408e-06, "loss": 0.5069, "step": 3252 }, { "epoch": 0.19809396218372255, "grad_norm": 1.0338727038890463, "learning_rate": 4.987350555642794e-06, "loss": 0.5635, "step": 3253 }, { "epoch": 0.19815485796060042, "grad_norm": 1.0000815441163642, "learning_rate": 4.9873425373974475e-06, "loss": 0.6111, "step": 3254 }, { "epoch": 0.1982157537374783, "grad_norm": 1.0655956089027716, "learning_rate": 4.9873345166180485e-06, "loss": 0.4633, "step": 3255 }, { "epoch": 0.19827664951435617, "grad_norm": 0.9905773164134161, "learning_rate": 4.987326493304605e-06, "loss": 0.53, "step": 3256 }, { "epoch": 0.19833754529123404, "grad_norm": 1.0420324221297466, "learning_rate": 4.987318467457124e-06, "loss": 0.4682, "step": 3257 }, { "epoch": 0.1983984410681119, "grad_norm": 1.0889203880835376, "learning_rate": 4.987310439075615e-06, "loss": 0.4741, "step": 3258 }, { "epoch": 0.1984593368449898, "grad_norm": 1.1308007608181079, "learning_rate": 4.987302408160086e-06, "loss": 0.5388, "step": 3259 }, { "epoch": 0.1985202326218677, "grad_norm": 1.039987577738257, "learning_rate": 4.987294374710544e-06, "loss": 0.4757, "step": 3260 }, { "epoch": 0.19858112839874556, "grad_norm": 1.040228995440859, "learning_rate": 4.987286338727e-06, "loss": 0.505, "step": 3261 }, { "epoch": 0.19864202417562343, "grad_norm": 0.9541377439078932, "learning_rate": 4.9872783002094595e-06, "loss": 0.5473, "step": 3262 }, { "epoch": 0.1987029199525013, "grad_norm": 1.1159772968677604, "learning_rate": 4.987270259157931e-06, "loss": 0.4967, "step": 3263 }, { "epoch": 0.19876381572937918, "grad_norm": 1.177849498917227, "learning_rate": 4.987262215572424e-06, "loss": 0.4678, "step": 3264 }, { "epoch": 0.19882471150625705, "grad_norm": 1.0662771493794183, "learning_rate": 4.987254169452946e-06, "loss": 0.5119, "step": 3265 }, { "epoch": 0.19888560728313492, "grad_norm": 1.1110075222709621, "learning_rate": 4.987246120799505e-06, "loss": 0.5244, "step": 3266 }, { "epoch": 0.1989465030600128, "grad_norm": 0.98697652632359, "learning_rate": 4.9872380696121094e-06, "loss": 0.4993, "step": 3267 }, { "epoch": 0.19900739883689067, "grad_norm": 0.9643858950732165, "learning_rate": 4.987230015890767e-06, "loss": 0.5422, "step": 3268 }, { "epoch": 0.19906829461376854, "grad_norm": 0.9848910186917911, "learning_rate": 4.987221959635487e-06, "loss": 0.5099, "step": 3269 }, { "epoch": 0.19912919039064642, "grad_norm": 1.097377092250161, "learning_rate": 4.987213900846277e-06, "loss": 0.4259, "step": 3270 }, { "epoch": 0.1991900861675243, "grad_norm": 1.0694059273531464, "learning_rate": 4.9872058395231445e-06, "loss": 0.5585, "step": 3271 }, { "epoch": 0.19925098194440216, "grad_norm": 1.0010181390120696, "learning_rate": 4.9871977756660995e-06, "loss": 0.49, "step": 3272 }, { "epoch": 0.19931187772128003, "grad_norm": 1.0747089242863546, "learning_rate": 4.987189709275148e-06, "loss": 0.4767, "step": 3273 }, { "epoch": 0.1993727734981579, "grad_norm": 1.0788187211315003, "learning_rate": 4.9871816403502996e-06, "loss": 0.5468, "step": 3274 }, { "epoch": 0.19943366927503578, "grad_norm": 0.9606248181650854, "learning_rate": 4.9871735688915624e-06, "loss": 0.561, "step": 3275 }, { "epoch": 0.19949456505191365, "grad_norm": 1.0398606353948758, "learning_rate": 4.987165494898945e-06, "loss": 0.5608, "step": 3276 }, { "epoch": 0.19955546082879153, "grad_norm": 1.1536631261615098, "learning_rate": 4.9871574183724546e-06, "loss": 0.5284, "step": 3277 }, { "epoch": 0.1996163566056694, "grad_norm": 1.0089164246926836, "learning_rate": 4.9871493393121e-06, "loss": 0.4509, "step": 3278 }, { "epoch": 0.19967725238254727, "grad_norm": 1.0255828391987065, "learning_rate": 4.9871412577178885e-06, "loss": 0.5086, "step": 3279 }, { "epoch": 0.19973814815942514, "grad_norm": 1.1069261317185266, "learning_rate": 4.98713317358983e-06, "loss": 0.4645, "step": 3280 }, { "epoch": 0.19979904393630302, "grad_norm": 1.0567233258915523, "learning_rate": 4.987125086927932e-06, "loss": 0.4467, "step": 3281 }, { "epoch": 0.1998599397131809, "grad_norm": 1.108093924729973, "learning_rate": 4.987116997732202e-06, "loss": 0.465, "step": 3282 }, { "epoch": 0.19992083549005876, "grad_norm": 1.0149309046494535, "learning_rate": 4.98710890600265e-06, "loss": 0.576, "step": 3283 }, { "epoch": 0.19998173126693664, "grad_norm": 1.0608697256492043, "learning_rate": 4.9871008117392825e-06, "loss": 0.626, "step": 3284 }, { "epoch": 0.2000426270438145, "grad_norm": 1.095432273508629, "learning_rate": 4.987092714942109e-06, "loss": 0.4668, "step": 3285 }, { "epoch": 0.20010352282069238, "grad_norm": 1.094811650694829, "learning_rate": 4.987084615611137e-06, "loss": 0.4503, "step": 3286 }, { "epoch": 0.20016441859757025, "grad_norm": 1.0091459501254498, "learning_rate": 4.987076513746374e-06, "loss": 0.5257, "step": 3287 }, { "epoch": 0.20022531437444813, "grad_norm": 1.0122473937109446, "learning_rate": 4.987068409347829e-06, "loss": 0.5459, "step": 3288 }, { "epoch": 0.200286210151326, "grad_norm": 0.9767133508998462, "learning_rate": 4.9870603024155114e-06, "loss": 0.5262, "step": 3289 }, { "epoch": 0.20034710592820387, "grad_norm": 0.9933332212638218, "learning_rate": 4.9870521929494285e-06, "loss": 0.5575, "step": 3290 }, { "epoch": 0.20040800170508175, "grad_norm": 1.0583728963142625, "learning_rate": 4.987044080949588e-06, "loss": 0.4821, "step": 3291 }, { "epoch": 0.20046889748195962, "grad_norm": 1.0747770401529795, "learning_rate": 4.987035966415999e-06, "loss": 0.4895, "step": 3292 }, { "epoch": 0.2005297932588375, "grad_norm": 0.9942917386885082, "learning_rate": 4.98702784934867e-06, "loss": 0.531, "step": 3293 }, { "epoch": 0.20059068903571536, "grad_norm": 1.110187542883993, "learning_rate": 4.987019729747607e-06, "loss": 0.5135, "step": 3294 }, { "epoch": 0.20065158481259324, "grad_norm": 1.0042705987009908, "learning_rate": 4.987011607612822e-06, "loss": 0.5461, "step": 3295 }, { "epoch": 0.2007124805894711, "grad_norm": 1.0017859098734687, "learning_rate": 4.98700348294432e-06, "loss": 0.532, "step": 3296 }, { "epoch": 0.20077337636634898, "grad_norm": 1.1463614753245046, "learning_rate": 4.986995355742111e-06, "loss": 0.4894, "step": 3297 }, { "epoch": 0.20083427214322686, "grad_norm": 1.0617719027465773, "learning_rate": 4.986987226006203e-06, "loss": 0.5287, "step": 3298 }, { "epoch": 0.20089516792010473, "grad_norm": 0.993686291201932, "learning_rate": 4.986979093736604e-06, "loss": 0.5054, "step": 3299 }, { "epoch": 0.2009560636969826, "grad_norm": 1.102933250528604, "learning_rate": 4.9869709589333225e-06, "loss": 0.5319, "step": 3300 }, { "epoch": 0.2010169594738605, "grad_norm": 1.1126797466076614, "learning_rate": 4.986962821596366e-06, "loss": 0.4823, "step": 3301 }, { "epoch": 0.20107785525073837, "grad_norm": 1.0737820214461593, "learning_rate": 4.986954681725744e-06, "loss": 0.5171, "step": 3302 }, { "epoch": 0.20113875102761625, "grad_norm": 1.152140377499693, "learning_rate": 4.986946539321464e-06, "loss": 0.4746, "step": 3303 }, { "epoch": 0.20119964680449412, "grad_norm": 1.0693642283184854, "learning_rate": 4.986938394383536e-06, "loss": 0.5235, "step": 3304 }, { "epoch": 0.201260542581372, "grad_norm": 1.0599626883407467, "learning_rate": 4.986930246911965e-06, "loss": 0.5502, "step": 3305 }, { "epoch": 0.20132143835824987, "grad_norm": 1.0067186482890351, "learning_rate": 4.9869220969067625e-06, "loss": 0.5103, "step": 3306 }, { "epoch": 0.20138233413512774, "grad_norm": 1.0175526948454368, "learning_rate": 4.986913944367935e-06, "loss": 0.5041, "step": 3307 }, { "epoch": 0.2014432299120056, "grad_norm": 1.1387175062507005, "learning_rate": 4.986905789295491e-06, "loss": 0.5012, "step": 3308 }, { "epoch": 0.20150412568888348, "grad_norm": 0.9673195860958823, "learning_rate": 4.986897631689439e-06, "loss": 0.5562, "step": 3309 }, { "epoch": 0.20156502146576136, "grad_norm": 1.1145058741417575, "learning_rate": 4.986889471549787e-06, "loss": 0.4741, "step": 3310 }, { "epoch": 0.20162591724263923, "grad_norm": 1.0166466270766141, "learning_rate": 4.986881308876545e-06, "loss": 0.5706, "step": 3311 }, { "epoch": 0.2016868130195171, "grad_norm": 1.047014327400255, "learning_rate": 4.9868731436697195e-06, "loss": 0.5123, "step": 3312 }, { "epoch": 0.20174770879639498, "grad_norm": 1.050735268460259, "learning_rate": 4.986864975929319e-06, "loss": 0.492, "step": 3313 }, { "epoch": 0.20180860457327285, "grad_norm": 1.0072982925679759, "learning_rate": 4.986856805655352e-06, "loss": 0.5905, "step": 3314 }, { "epoch": 0.20186950035015072, "grad_norm": 1.1102615015847528, "learning_rate": 4.986848632847828e-06, "loss": 0.4757, "step": 3315 }, { "epoch": 0.2019303961270286, "grad_norm": 1.048428158651563, "learning_rate": 4.986840457506753e-06, "loss": 0.5044, "step": 3316 }, { "epoch": 0.20199129190390647, "grad_norm": 1.0132849452519372, "learning_rate": 4.9868322796321374e-06, "loss": 0.4869, "step": 3317 }, { "epoch": 0.20205218768078434, "grad_norm": 1.1138280123136912, "learning_rate": 4.986824099223989e-06, "loss": 0.4942, "step": 3318 }, { "epoch": 0.2021130834576622, "grad_norm": 1.0710063841488668, "learning_rate": 4.986815916282316e-06, "loss": 0.5541, "step": 3319 }, { "epoch": 0.20217397923454009, "grad_norm": 1.1063171518502126, "learning_rate": 4.986807730807126e-06, "loss": 0.4835, "step": 3320 }, { "epoch": 0.20223487501141796, "grad_norm": 1.0375946362557098, "learning_rate": 4.986799542798428e-06, "loss": 0.4848, "step": 3321 }, { "epoch": 0.20229577078829583, "grad_norm": 1.0358265988606228, "learning_rate": 4.98679135225623e-06, "loss": 0.527, "step": 3322 }, { "epoch": 0.2023566665651737, "grad_norm": 1.052288514084644, "learning_rate": 4.986783159180542e-06, "loss": 0.5433, "step": 3323 }, { "epoch": 0.20241756234205158, "grad_norm": 0.989652400649439, "learning_rate": 4.9867749635713705e-06, "loss": 0.4691, "step": 3324 }, { "epoch": 0.20247845811892945, "grad_norm": 1.0954906183485644, "learning_rate": 4.986766765428725e-06, "loss": 0.5477, "step": 3325 }, { "epoch": 0.20253935389580732, "grad_norm": 1.0210922192930263, "learning_rate": 4.986758564752611e-06, "loss": 0.4924, "step": 3326 }, { "epoch": 0.2026002496726852, "grad_norm": 1.0151261482240455, "learning_rate": 4.9867503615430405e-06, "loss": 0.5209, "step": 3327 }, { "epoch": 0.20266114544956307, "grad_norm": 0.9408445596080487, "learning_rate": 4.986742155800021e-06, "loss": 0.5175, "step": 3328 }, { "epoch": 0.20272204122644094, "grad_norm": 0.9780996428076116, "learning_rate": 4.986733947523559e-06, "loss": 0.5233, "step": 3329 }, { "epoch": 0.20278293700331881, "grad_norm": 1.044894465973774, "learning_rate": 4.986725736713665e-06, "loss": 0.5182, "step": 3330 }, { "epoch": 0.2028438327801967, "grad_norm": 1.1171331250363923, "learning_rate": 4.986717523370347e-06, "loss": 0.4896, "step": 3331 }, { "epoch": 0.20290472855707456, "grad_norm": 1.087967683151893, "learning_rate": 4.986709307493612e-06, "loss": 0.4683, "step": 3332 }, { "epoch": 0.20296562433395243, "grad_norm": 1.0503185235958243, "learning_rate": 4.986701089083469e-06, "loss": 0.4693, "step": 3333 }, { "epoch": 0.2030265201108303, "grad_norm": 1.0402067480999657, "learning_rate": 4.986692868139927e-06, "loss": 0.4788, "step": 3334 }, { "epoch": 0.20308741588770818, "grad_norm": 1.147482342337665, "learning_rate": 4.9866846446629945e-06, "loss": 0.4196, "step": 3335 }, { "epoch": 0.20314831166458605, "grad_norm": 1.0753876506153601, "learning_rate": 4.9866764186526785e-06, "loss": 0.5033, "step": 3336 }, { "epoch": 0.20320920744146392, "grad_norm": 1.0123111662564122, "learning_rate": 4.9866681901089895e-06, "loss": 0.5663, "step": 3337 }, { "epoch": 0.2032701032183418, "grad_norm": 1.1262164570299773, "learning_rate": 4.986659959031933e-06, "loss": 0.4584, "step": 3338 }, { "epoch": 0.20333099899521967, "grad_norm": 1.1155186822766288, "learning_rate": 4.986651725421521e-06, "loss": 0.5432, "step": 3339 }, { "epoch": 0.20339189477209754, "grad_norm": 1.0894071884784136, "learning_rate": 4.9866434892777585e-06, "loss": 0.4938, "step": 3340 }, { "epoch": 0.20345279054897542, "grad_norm": 1.0363771424641588, "learning_rate": 4.986635250600655e-06, "loss": 0.5224, "step": 3341 }, { "epoch": 0.20351368632585332, "grad_norm": 1.133566764808604, "learning_rate": 4.98662700939022e-06, "loss": 0.3984, "step": 3342 }, { "epoch": 0.2035745821027312, "grad_norm": 1.0685882293543991, "learning_rate": 4.9866187656464615e-06, "loss": 0.4643, "step": 3343 }, { "epoch": 0.20363547787960906, "grad_norm": 0.987209535978606, "learning_rate": 4.986610519369387e-06, "loss": 0.5005, "step": 3344 }, { "epoch": 0.20369637365648693, "grad_norm": 1.1178819745644832, "learning_rate": 4.986602270559006e-06, "loss": 0.5444, "step": 3345 }, { "epoch": 0.2037572694333648, "grad_norm": 1.0915211907272573, "learning_rate": 4.986594019215325e-06, "loss": 0.5156, "step": 3346 }, { "epoch": 0.20381816521024268, "grad_norm": 1.152092863493988, "learning_rate": 4.986585765338354e-06, "loss": 0.4125, "step": 3347 }, { "epoch": 0.20387906098712055, "grad_norm": 1.025759109200939, "learning_rate": 4.9865775089281024e-06, "loss": 0.5248, "step": 3348 }, { "epoch": 0.20393995676399843, "grad_norm": 0.9932647874078001, "learning_rate": 4.986569249984576e-06, "loss": 0.4896, "step": 3349 }, { "epoch": 0.2040008525408763, "grad_norm": 1.1229488453282515, "learning_rate": 4.986560988507785e-06, "loss": 0.5239, "step": 3350 }, { "epoch": 0.20406174831775417, "grad_norm": 1.0177920936344513, "learning_rate": 4.986552724497738e-06, "loss": 0.4959, "step": 3351 }, { "epoch": 0.20412264409463204, "grad_norm": 1.003248207223814, "learning_rate": 4.986544457954441e-06, "loss": 0.4983, "step": 3352 }, { "epoch": 0.20418353987150992, "grad_norm": 1.0861140906980653, "learning_rate": 4.986536188877906e-06, "loss": 0.5177, "step": 3353 }, { "epoch": 0.2042444356483878, "grad_norm": 1.1093893669079051, "learning_rate": 4.986527917268139e-06, "loss": 0.4544, "step": 3354 }, { "epoch": 0.20430533142526566, "grad_norm": 1.0645426714778683, "learning_rate": 4.986519643125149e-06, "loss": 0.4315, "step": 3355 }, { "epoch": 0.20436622720214354, "grad_norm": 1.0326903738144806, "learning_rate": 4.986511366448944e-06, "loss": 0.5273, "step": 3356 }, { "epoch": 0.2044271229790214, "grad_norm": 1.0340632032752586, "learning_rate": 4.986503087239534e-06, "loss": 0.5672, "step": 3357 }, { "epoch": 0.20448801875589928, "grad_norm": 1.0691658746712573, "learning_rate": 4.9864948054969256e-06, "loss": 0.524, "step": 3358 }, { "epoch": 0.20454891453277715, "grad_norm": 1.0388806798261574, "learning_rate": 4.9864865212211274e-06, "loss": 0.4799, "step": 3359 }, { "epoch": 0.20460981030965503, "grad_norm": 0.966858752434892, "learning_rate": 4.98647823441215e-06, "loss": 0.4699, "step": 3360 }, { "epoch": 0.2046707060865329, "grad_norm": 1.0690088160802844, "learning_rate": 4.986469945069999e-06, "loss": 0.4837, "step": 3361 }, { "epoch": 0.20473160186341077, "grad_norm": 1.003578311311874, "learning_rate": 4.986461653194686e-06, "loss": 0.5539, "step": 3362 }, { "epoch": 0.20479249764028865, "grad_norm": 1.0170848959881245, "learning_rate": 4.986453358786215e-06, "loss": 0.5086, "step": 3363 }, { "epoch": 0.20485339341716652, "grad_norm": 1.0827242354903512, "learning_rate": 4.986445061844598e-06, "loss": 0.4656, "step": 3364 }, { "epoch": 0.2049142891940444, "grad_norm": 1.1184554917327494, "learning_rate": 4.986436762369843e-06, "loss": 0.4553, "step": 3365 }, { "epoch": 0.20497518497092226, "grad_norm": 1.021868820623027, "learning_rate": 4.986428460361957e-06, "loss": 0.4461, "step": 3366 }, { "epoch": 0.20503608074780014, "grad_norm": 1.0868505385617973, "learning_rate": 4.98642015582095e-06, "loss": 0.5267, "step": 3367 }, { "epoch": 0.205096976524678, "grad_norm": 1.1205419876513374, "learning_rate": 4.98641184874683e-06, "loss": 0.4917, "step": 3368 }, { "epoch": 0.20515787230155588, "grad_norm": 1.0996248736569136, "learning_rate": 4.986403539139605e-06, "loss": 0.4591, "step": 3369 }, { "epoch": 0.20521876807843376, "grad_norm": 1.0516618436055971, "learning_rate": 4.986395226999283e-06, "loss": 0.4667, "step": 3370 }, { "epoch": 0.20527966385531163, "grad_norm": 1.0007442944346292, "learning_rate": 4.986386912325875e-06, "loss": 0.5217, "step": 3371 }, { "epoch": 0.2053405596321895, "grad_norm": 1.040729873926254, "learning_rate": 4.9863785951193865e-06, "loss": 0.4846, "step": 3372 }, { "epoch": 0.20540145540906737, "grad_norm": 1.0142181337458642, "learning_rate": 4.9863702753798274e-06, "loss": 0.4692, "step": 3373 }, { "epoch": 0.20546235118594525, "grad_norm": 1.0364876348632364, "learning_rate": 4.986361953107206e-06, "loss": 0.4692, "step": 3374 }, { "epoch": 0.20552324696282312, "grad_norm": 1.0922650813828012, "learning_rate": 4.986353628301531e-06, "loss": 0.5334, "step": 3375 }, { "epoch": 0.205584142739701, "grad_norm": 1.0268471838514983, "learning_rate": 4.98634530096281e-06, "loss": 0.5126, "step": 3376 }, { "epoch": 0.20564503851657887, "grad_norm": 1.1582607107527993, "learning_rate": 4.986336971091052e-06, "loss": 0.5305, "step": 3377 }, { "epoch": 0.20570593429345674, "grad_norm": 0.9863219337817485, "learning_rate": 4.986328638686267e-06, "loss": 0.5187, "step": 3378 }, { "epoch": 0.2057668300703346, "grad_norm": 1.0953596970757749, "learning_rate": 4.986320303748461e-06, "loss": 0.4974, "step": 3379 }, { "epoch": 0.20582772584721248, "grad_norm": 1.0637393551941463, "learning_rate": 4.9863119662776434e-06, "loss": 0.4702, "step": 3380 }, { "epoch": 0.20588862162409036, "grad_norm": 1.0478465505533385, "learning_rate": 4.986303626273823e-06, "loss": 0.4939, "step": 3381 }, { "epoch": 0.20594951740096823, "grad_norm": 1.0960624627059992, "learning_rate": 4.986295283737008e-06, "loss": 0.4522, "step": 3382 }, { "epoch": 0.20601041317784613, "grad_norm": 1.0709429836965356, "learning_rate": 4.986286938667208e-06, "loss": 0.5081, "step": 3383 }, { "epoch": 0.206071308954724, "grad_norm": 1.0986072260745445, "learning_rate": 4.9862785910644295e-06, "loss": 0.544, "step": 3384 }, { "epoch": 0.20613220473160188, "grad_norm": 1.1546722413965487, "learning_rate": 4.986270240928683e-06, "loss": 0.4541, "step": 3385 }, { "epoch": 0.20619310050847975, "grad_norm": 0.9614205633665729, "learning_rate": 4.986261888259975e-06, "loss": 0.5079, "step": 3386 }, { "epoch": 0.20625399628535762, "grad_norm": 1.0883739180843135, "learning_rate": 4.9862535330583164e-06, "loss": 0.5129, "step": 3387 }, { "epoch": 0.2063148920622355, "grad_norm": 1.06568311901635, "learning_rate": 4.986245175323714e-06, "loss": 0.5093, "step": 3388 }, { "epoch": 0.20637578783911337, "grad_norm": 1.0569617073446542, "learning_rate": 4.986236815056176e-06, "loss": 0.5201, "step": 3389 }, { "epoch": 0.20643668361599124, "grad_norm": 1.1540327681327762, "learning_rate": 4.986228452255712e-06, "loss": 0.4767, "step": 3390 }, { "epoch": 0.2064975793928691, "grad_norm": 0.9760444907196393, "learning_rate": 4.98622008692233e-06, "loss": 0.5079, "step": 3391 }, { "epoch": 0.20655847516974699, "grad_norm": 1.1036537418487622, "learning_rate": 4.9862117190560385e-06, "loss": 0.4738, "step": 3392 }, { "epoch": 0.20661937094662486, "grad_norm": 0.9919256674953753, "learning_rate": 4.986203348656847e-06, "loss": 0.5166, "step": 3393 }, { "epoch": 0.20668026672350273, "grad_norm": 0.969749415413153, "learning_rate": 4.986194975724762e-06, "loss": 0.5141, "step": 3394 }, { "epoch": 0.2067411625003806, "grad_norm": 1.0135668935345605, "learning_rate": 4.986186600259794e-06, "loss": 0.5408, "step": 3395 }, { "epoch": 0.20680205827725848, "grad_norm": 1.055962805258759, "learning_rate": 4.986178222261951e-06, "loss": 0.4965, "step": 3396 }, { "epoch": 0.20686295405413635, "grad_norm": 1.0316484236326458, "learning_rate": 4.986169841731241e-06, "loss": 0.545, "step": 3397 }, { "epoch": 0.20692384983101422, "grad_norm": 1.1087929403470216, "learning_rate": 4.986161458667672e-06, "loss": 0.5027, "step": 3398 }, { "epoch": 0.2069847456078921, "grad_norm": 1.1548011490893866, "learning_rate": 4.986153073071255e-06, "loss": 0.4853, "step": 3399 }, { "epoch": 0.20704564138476997, "grad_norm": 0.9934473689603875, "learning_rate": 4.986144684941996e-06, "loss": 0.5286, "step": 3400 }, { "epoch": 0.20710653716164784, "grad_norm": 1.1314787191840912, "learning_rate": 4.986136294279905e-06, "loss": 0.5435, "step": 3401 }, { "epoch": 0.20716743293852571, "grad_norm": 1.0004601347553874, "learning_rate": 4.9861279010849895e-06, "loss": 0.5378, "step": 3402 }, { "epoch": 0.2072283287154036, "grad_norm": 0.9826265238611142, "learning_rate": 4.9861195053572585e-06, "loss": 0.5161, "step": 3403 }, { "epoch": 0.20728922449228146, "grad_norm": 1.084329443166551, "learning_rate": 4.986111107096721e-06, "loss": 0.418, "step": 3404 }, { "epoch": 0.20735012026915933, "grad_norm": 1.0061218247419947, "learning_rate": 4.986102706303385e-06, "loss": 0.5051, "step": 3405 }, { "epoch": 0.2074110160460372, "grad_norm": 0.942620301166675, "learning_rate": 4.9860943029772595e-06, "loss": 0.5051, "step": 3406 }, { "epoch": 0.20747191182291508, "grad_norm": 1.0624389223994877, "learning_rate": 4.986085897118353e-06, "loss": 0.5228, "step": 3407 }, { "epoch": 0.20753280759979295, "grad_norm": 1.09212321133681, "learning_rate": 4.986077488726673e-06, "loss": 0.4826, "step": 3408 }, { "epoch": 0.20759370337667082, "grad_norm": 1.085195564819186, "learning_rate": 4.98606907780223e-06, "loss": 0.4622, "step": 3409 }, { "epoch": 0.2076545991535487, "grad_norm": 1.0023270186637236, "learning_rate": 4.986060664345031e-06, "loss": 0.4676, "step": 3410 }, { "epoch": 0.20771549493042657, "grad_norm": 1.0620773476378462, "learning_rate": 4.986052248355085e-06, "loss": 0.6146, "step": 3411 }, { "epoch": 0.20777639070730444, "grad_norm": 1.0422806168664056, "learning_rate": 4.986043829832401e-06, "loss": 0.5124, "step": 3412 }, { "epoch": 0.20783728648418232, "grad_norm": 1.0607825592847067, "learning_rate": 4.986035408776987e-06, "loss": 0.4782, "step": 3413 }, { "epoch": 0.2078981822610602, "grad_norm": 1.192102561220855, "learning_rate": 4.986026985188851e-06, "loss": 0.5143, "step": 3414 }, { "epoch": 0.20795907803793806, "grad_norm": 1.0214296132110936, "learning_rate": 4.9860185590680035e-06, "loss": 0.52, "step": 3415 }, { "epoch": 0.20801997381481593, "grad_norm": 1.0539077983103642, "learning_rate": 4.986010130414452e-06, "loss": 0.5097, "step": 3416 }, { "epoch": 0.2080808695916938, "grad_norm": 1.0200432864729752, "learning_rate": 4.986001699228205e-06, "loss": 0.496, "step": 3417 }, { "epoch": 0.20814176536857168, "grad_norm": 1.1307539374485989, "learning_rate": 4.98599326550927e-06, "loss": 0.4416, "step": 3418 }, { "epoch": 0.20820266114544955, "grad_norm": 1.0322278011578812, "learning_rate": 4.985984829257658e-06, "loss": 0.5084, "step": 3419 }, { "epoch": 0.20826355692232743, "grad_norm": 1.0533178084295167, "learning_rate": 4.985976390473376e-06, "loss": 0.4713, "step": 3420 }, { "epoch": 0.2083244526992053, "grad_norm": 0.9310979720058788, "learning_rate": 4.9859679491564325e-06, "loss": 0.5298, "step": 3421 }, { "epoch": 0.20838534847608317, "grad_norm": 1.0137734345663125, "learning_rate": 4.985959505306838e-06, "loss": 0.5057, "step": 3422 }, { "epoch": 0.20844624425296104, "grad_norm": 0.9421587125344855, "learning_rate": 4.985951058924598e-06, "loss": 0.5501, "step": 3423 }, { "epoch": 0.20850714002983894, "grad_norm": 1.1892674392937794, "learning_rate": 4.985942610009723e-06, "loss": 0.4857, "step": 3424 }, { "epoch": 0.20856803580671682, "grad_norm": 1.1396838405584981, "learning_rate": 4.985934158562222e-06, "loss": 0.4369, "step": 3425 }, { "epoch": 0.2086289315835947, "grad_norm": 0.9814121642898451, "learning_rate": 4.985925704582103e-06, "loss": 0.5455, "step": 3426 }, { "epoch": 0.20868982736047256, "grad_norm": 0.9518147987516924, "learning_rate": 4.985917248069374e-06, "loss": 0.4622, "step": 3427 }, { "epoch": 0.20875072313735044, "grad_norm": 0.9634471757280669, "learning_rate": 4.985908789024044e-06, "loss": 0.542, "step": 3428 }, { "epoch": 0.2088116189142283, "grad_norm": 1.0291249574276764, "learning_rate": 4.985900327446123e-06, "loss": 0.5182, "step": 3429 }, { "epoch": 0.20887251469110618, "grad_norm": 0.9774471438705676, "learning_rate": 4.985891863335617e-06, "loss": 0.5788, "step": 3430 }, { "epoch": 0.20893341046798405, "grad_norm": 1.0625802842276713, "learning_rate": 4.985883396692537e-06, "loss": 0.5229, "step": 3431 }, { "epoch": 0.20899430624486193, "grad_norm": 1.1743163755025108, "learning_rate": 4.98587492751689e-06, "loss": 0.5092, "step": 3432 }, { "epoch": 0.2090552020217398, "grad_norm": 0.9938690456672714, "learning_rate": 4.985866455808685e-06, "loss": 0.5098, "step": 3433 }, { "epoch": 0.20911609779861767, "grad_norm": 0.9803903472796017, "learning_rate": 4.985857981567933e-06, "loss": 0.4903, "step": 3434 }, { "epoch": 0.20917699357549555, "grad_norm": 1.0041164587889277, "learning_rate": 4.985849504794639e-06, "loss": 0.6336, "step": 3435 }, { "epoch": 0.20923788935237342, "grad_norm": 0.9864747889895915, "learning_rate": 4.985841025488813e-06, "loss": 0.4796, "step": 3436 }, { "epoch": 0.2092987851292513, "grad_norm": 1.0378498194891863, "learning_rate": 4.985832543650464e-06, "loss": 0.5979, "step": 3437 }, { "epoch": 0.20935968090612916, "grad_norm": 1.0623352026236608, "learning_rate": 4.985824059279601e-06, "loss": 0.5292, "step": 3438 }, { "epoch": 0.20942057668300704, "grad_norm": 1.041597733259241, "learning_rate": 4.985815572376232e-06, "loss": 0.5221, "step": 3439 }, { "epoch": 0.2094814724598849, "grad_norm": 0.9862923145944356, "learning_rate": 4.985807082940366e-06, "loss": 0.5218, "step": 3440 }, { "epoch": 0.20954236823676278, "grad_norm": 1.0306717182057008, "learning_rate": 4.985798590972011e-06, "loss": 0.5578, "step": 3441 }, { "epoch": 0.20960326401364066, "grad_norm": 1.0981398010236594, "learning_rate": 4.985790096471176e-06, "loss": 0.4816, "step": 3442 }, { "epoch": 0.20966415979051853, "grad_norm": 1.0633498537857196, "learning_rate": 4.98578159943787e-06, "loss": 0.5428, "step": 3443 }, { "epoch": 0.2097250555673964, "grad_norm": 0.9917142399514144, "learning_rate": 4.985773099872101e-06, "loss": 0.5201, "step": 3444 }, { "epoch": 0.20978595134427427, "grad_norm": 1.021307536019222, "learning_rate": 4.985764597773878e-06, "loss": 0.5775, "step": 3445 }, { "epoch": 0.20984684712115215, "grad_norm": 0.9920800169832799, "learning_rate": 4.98575609314321e-06, "loss": 0.5391, "step": 3446 }, { "epoch": 0.20990774289803002, "grad_norm": 1.0151634658486852, "learning_rate": 4.985747585980106e-06, "loss": 0.5074, "step": 3447 }, { "epoch": 0.2099686386749079, "grad_norm": 1.1519059625027241, "learning_rate": 4.985739076284573e-06, "loss": 0.5014, "step": 3448 }, { "epoch": 0.21002953445178577, "grad_norm": 1.0127358693173807, "learning_rate": 4.9857305640566215e-06, "loss": 0.549, "step": 3449 }, { "epoch": 0.21009043022866364, "grad_norm": 1.073346305411959, "learning_rate": 4.9857220492962585e-06, "loss": 0.5092, "step": 3450 }, { "epoch": 0.2101513260055415, "grad_norm": 1.1016426400727715, "learning_rate": 4.985713532003495e-06, "loss": 0.521, "step": 3451 }, { "epoch": 0.21021222178241938, "grad_norm": 1.1399352134621787, "learning_rate": 4.985705012178337e-06, "loss": 0.4829, "step": 3452 }, { "epoch": 0.21027311755929726, "grad_norm": 1.0138555361817831, "learning_rate": 4.985696489820794e-06, "loss": 0.5339, "step": 3453 }, { "epoch": 0.21033401333617513, "grad_norm": 1.0594137999991409, "learning_rate": 4.985687964930877e-06, "loss": 0.4893, "step": 3454 }, { "epoch": 0.210394909113053, "grad_norm": 1.1043313401364887, "learning_rate": 4.985679437508591e-06, "loss": 0.5144, "step": 3455 }, { "epoch": 0.21045580488993088, "grad_norm": 0.9295620543287733, "learning_rate": 4.985670907553947e-06, "loss": 0.5588, "step": 3456 }, { "epoch": 0.21051670066680875, "grad_norm": 1.1273868066892614, "learning_rate": 4.985662375066953e-06, "loss": 0.4481, "step": 3457 }, { "epoch": 0.21057759644368662, "grad_norm": 1.130661185162559, "learning_rate": 4.985653840047618e-06, "loss": 0.4747, "step": 3458 }, { "epoch": 0.2106384922205645, "grad_norm": 1.0631560429826916, "learning_rate": 4.985645302495951e-06, "loss": 0.4963, "step": 3459 }, { "epoch": 0.21069938799744237, "grad_norm": 1.037820240945483, "learning_rate": 4.98563676241196e-06, "loss": 0.613, "step": 3460 }, { "epoch": 0.21076028377432024, "grad_norm": 1.0913315676202893, "learning_rate": 4.985628219795654e-06, "loss": 0.4636, "step": 3461 }, { "epoch": 0.2108211795511981, "grad_norm": 1.1417202151016919, "learning_rate": 4.985619674647041e-06, "loss": 0.5134, "step": 3462 }, { "epoch": 0.21088207532807599, "grad_norm": 1.0392605503902796, "learning_rate": 4.985611126966131e-06, "loss": 0.5117, "step": 3463 }, { "epoch": 0.21094297110495386, "grad_norm": 1.1501416206797939, "learning_rate": 4.985602576752931e-06, "loss": 0.4167, "step": 3464 }, { "epoch": 0.21100386688183176, "grad_norm": 1.0376798210620344, "learning_rate": 4.985594024007453e-06, "loss": 0.5404, "step": 3465 }, { "epoch": 0.21106476265870963, "grad_norm": 0.9724724850460346, "learning_rate": 4.9855854687297015e-06, "loss": 0.5567, "step": 3466 }, { "epoch": 0.2111256584355875, "grad_norm": 1.0250783705235627, "learning_rate": 4.985576910919688e-06, "loss": 0.5367, "step": 3467 }, { "epoch": 0.21118655421246538, "grad_norm": 1.1000705961293658, "learning_rate": 4.98556835057742e-06, "loss": 0.5245, "step": 3468 }, { "epoch": 0.21124744998934325, "grad_norm": 1.0686925838737458, "learning_rate": 4.985559787702907e-06, "loss": 0.4618, "step": 3469 }, { "epoch": 0.21130834576622112, "grad_norm": 1.027310685836467, "learning_rate": 4.985551222296157e-06, "loss": 0.4456, "step": 3470 }, { "epoch": 0.211369241543099, "grad_norm": 1.1275903114729413, "learning_rate": 4.985542654357179e-06, "loss": 0.4351, "step": 3471 }, { "epoch": 0.21143013731997687, "grad_norm": 1.0629429516532836, "learning_rate": 4.985534083885983e-06, "loss": 0.456, "step": 3472 }, { "epoch": 0.21149103309685474, "grad_norm": 1.011511535881752, "learning_rate": 4.985525510882575e-06, "loss": 0.5043, "step": 3473 }, { "epoch": 0.21155192887373261, "grad_norm": 1.0362982612806666, "learning_rate": 4.985516935346967e-06, "loss": 0.4918, "step": 3474 }, { "epoch": 0.2116128246506105, "grad_norm": 1.006366279800218, "learning_rate": 4.985508357279164e-06, "loss": 0.4581, "step": 3475 }, { "epoch": 0.21167372042748836, "grad_norm": 1.0577757424818255, "learning_rate": 4.985499776679178e-06, "loss": 0.5605, "step": 3476 }, { "epoch": 0.21173461620436623, "grad_norm": 1.171379052725659, "learning_rate": 4.985491193547016e-06, "loss": 0.4476, "step": 3477 }, { "epoch": 0.2117955119812441, "grad_norm": 0.9251357731399327, "learning_rate": 4.985482607882688e-06, "loss": 0.4878, "step": 3478 }, { "epoch": 0.21185640775812198, "grad_norm": 1.048574997512551, "learning_rate": 4.9854740196862016e-06, "loss": 0.4294, "step": 3479 }, { "epoch": 0.21191730353499985, "grad_norm": 1.1004148255011466, "learning_rate": 4.985465428957565e-06, "loss": 0.4457, "step": 3480 }, { "epoch": 0.21197819931187772, "grad_norm": 1.0204893888293909, "learning_rate": 4.985456835696789e-06, "loss": 0.5148, "step": 3481 }, { "epoch": 0.2120390950887556, "grad_norm": 1.067891817232085, "learning_rate": 4.9854482399038815e-06, "loss": 0.475, "step": 3482 }, { "epoch": 0.21209999086563347, "grad_norm": 1.039557570794661, "learning_rate": 4.985439641578851e-06, "loss": 0.5514, "step": 3483 }, { "epoch": 0.21216088664251134, "grad_norm": 1.0628007419834808, "learning_rate": 4.985431040721706e-06, "loss": 0.4686, "step": 3484 }, { "epoch": 0.21222178241938922, "grad_norm": 1.0182519191535138, "learning_rate": 4.985422437332454e-06, "loss": 0.5289, "step": 3485 }, { "epoch": 0.2122826781962671, "grad_norm": 1.1351406744537365, "learning_rate": 4.985413831411107e-06, "loss": 0.5549, "step": 3486 }, { "epoch": 0.21234357397314496, "grad_norm": 1.0013771511123946, "learning_rate": 4.985405222957672e-06, "loss": 0.4907, "step": 3487 }, { "epoch": 0.21240446975002283, "grad_norm": 1.0512866713654438, "learning_rate": 4.985396611972158e-06, "loss": 0.437, "step": 3488 }, { "epoch": 0.2124653655269007, "grad_norm": 1.015935754070679, "learning_rate": 4.9853879984545725e-06, "loss": 0.5138, "step": 3489 }, { "epoch": 0.21252626130377858, "grad_norm": 0.9229779184584319, "learning_rate": 4.9853793824049255e-06, "loss": 0.5676, "step": 3490 }, { "epoch": 0.21258715708065645, "grad_norm": 1.0616339917435567, "learning_rate": 4.985370763823227e-06, "loss": 0.5366, "step": 3491 }, { "epoch": 0.21264805285753433, "grad_norm": 1.153546432900535, "learning_rate": 4.985362142709483e-06, "loss": 0.47, "step": 3492 }, { "epoch": 0.2127089486344122, "grad_norm": 1.2488482074242204, "learning_rate": 4.985353519063705e-06, "loss": 0.4284, "step": 3493 }, { "epoch": 0.21276984441129007, "grad_norm": 1.03068727457133, "learning_rate": 4.985344892885899e-06, "loss": 0.4632, "step": 3494 }, { "epoch": 0.21283074018816794, "grad_norm": 1.0548567874148556, "learning_rate": 4.985336264176077e-06, "loss": 0.48, "step": 3495 }, { "epoch": 0.21289163596504582, "grad_norm": 1.0465640754640029, "learning_rate": 4.985327632934245e-06, "loss": 0.5266, "step": 3496 }, { "epoch": 0.2129525317419237, "grad_norm": 0.9875018738575668, "learning_rate": 4.985318999160413e-06, "loss": 0.5273, "step": 3497 }, { "epoch": 0.21301342751880156, "grad_norm": 0.9732033773095238, "learning_rate": 4.98531036285459e-06, "loss": 0.5747, "step": 3498 }, { "epoch": 0.21307432329567944, "grad_norm": 1.0169449552303647, "learning_rate": 4.985301724016783e-06, "loss": 0.5301, "step": 3499 }, { "epoch": 0.2131352190725573, "grad_norm": 0.9906611004134511, "learning_rate": 4.985293082647004e-06, "loss": 0.468, "step": 3500 }, { "epoch": 0.21319611484943518, "grad_norm": 1.063566724987577, "learning_rate": 4.9852844387452594e-06, "loss": 0.5189, "step": 3501 }, { "epoch": 0.21325701062631305, "grad_norm": 1.021459835910639, "learning_rate": 4.985275792311559e-06, "loss": 0.4884, "step": 3502 }, { "epoch": 0.21331790640319093, "grad_norm": 1.0510510382871183, "learning_rate": 4.985267143345912e-06, "loss": 0.5279, "step": 3503 }, { "epoch": 0.2133788021800688, "grad_norm": 1.0764704057660706, "learning_rate": 4.9852584918483245e-06, "loss": 0.5573, "step": 3504 }, { "epoch": 0.21343969795694667, "grad_norm": 1.0891016060688774, "learning_rate": 4.985249837818809e-06, "loss": 0.5386, "step": 3505 }, { "epoch": 0.21350059373382457, "grad_norm": 1.0083768185289161, "learning_rate": 4.985241181257372e-06, "loss": 0.5355, "step": 3506 }, { "epoch": 0.21356148951070245, "grad_norm": 1.0923640763432976, "learning_rate": 4.985232522164023e-06, "loss": 0.467, "step": 3507 }, { "epoch": 0.21362238528758032, "grad_norm": 0.996959754355739, "learning_rate": 4.985223860538771e-06, "loss": 0.5295, "step": 3508 }, { "epoch": 0.2136832810644582, "grad_norm": 1.0108604733090936, "learning_rate": 4.9852151963816246e-06, "loss": 0.4995, "step": 3509 }, { "epoch": 0.21374417684133606, "grad_norm": 1.000422315483787, "learning_rate": 4.985206529692592e-06, "loss": 0.463, "step": 3510 }, { "epoch": 0.21380507261821394, "grad_norm": 0.9539663020941015, "learning_rate": 4.9851978604716834e-06, "loss": 0.5093, "step": 3511 }, { "epoch": 0.2138659683950918, "grad_norm": 1.0558769958787477, "learning_rate": 4.985189188718906e-06, "loss": 0.5108, "step": 3512 }, { "epoch": 0.21392686417196968, "grad_norm": 1.0303940612723397, "learning_rate": 4.985180514434271e-06, "loss": 0.4882, "step": 3513 }, { "epoch": 0.21398775994884756, "grad_norm": 1.0933666137818052, "learning_rate": 4.985171837617785e-06, "loss": 0.5113, "step": 3514 }, { "epoch": 0.21404865572572543, "grad_norm": 1.051103193529704, "learning_rate": 4.985163158269457e-06, "loss": 0.5468, "step": 3515 }, { "epoch": 0.2141095515026033, "grad_norm": 1.0154876720947599, "learning_rate": 4.985154476389297e-06, "loss": 0.5057, "step": 3516 }, { "epoch": 0.21417044727948117, "grad_norm": 1.1057036632058403, "learning_rate": 4.985145791977313e-06, "loss": 0.4818, "step": 3517 }, { "epoch": 0.21423134305635905, "grad_norm": 1.0751174025486592, "learning_rate": 4.985137105033515e-06, "loss": 0.5502, "step": 3518 }, { "epoch": 0.21429223883323692, "grad_norm": 1.0738324363276404, "learning_rate": 4.98512841555791e-06, "loss": 0.4905, "step": 3519 }, { "epoch": 0.2143531346101148, "grad_norm": 1.0730286578825206, "learning_rate": 4.985119723550508e-06, "loss": 0.4685, "step": 3520 }, { "epoch": 0.21441403038699267, "grad_norm": 1.0965605335098958, "learning_rate": 4.985111029011318e-06, "loss": 0.5192, "step": 3521 }, { "epoch": 0.21447492616387054, "grad_norm": 1.1208847682691627, "learning_rate": 4.985102331940349e-06, "loss": 0.5268, "step": 3522 }, { "epoch": 0.2145358219407484, "grad_norm": 1.0785554716342298, "learning_rate": 4.985093632337608e-06, "loss": 0.4822, "step": 3523 }, { "epoch": 0.21459671771762628, "grad_norm": 1.095387011068836, "learning_rate": 4.985084930203107e-06, "loss": 0.5038, "step": 3524 }, { "epoch": 0.21465761349450416, "grad_norm": 1.0114081045631733, "learning_rate": 4.9850762255368516e-06, "loss": 0.5226, "step": 3525 }, { "epoch": 0.21471850927138203, "grad_norm": 1.0473894231891725, "learning_rate": 4.985067518338853e-06, "loss": 0.5279, "step": 3526 }, { "epoch": 0.2147794050482599, "grad_norm": 0.9522292131861048, "learning_rate": 4.985058808609119e-06, "loss": 0.5498, "step": 3527 }, { "epoch": 0.21484030082513778, "grad_norm": 0.9775037670415306, "learning_rate": 4.985050096347659e-06, "loss": 0.5122, "step": 3528 }, { "epoch": 0.21490119660201565, "grad_norm": 0.9761014522037812, "learning_rate": 4.985041381554482e-06, "loss": 0.5251, "step": 3529 }, { "epoch": 0.21496209237889352, "grad_norm": 0.9548130416559433, "learning_rate": 4.9850326642295955e-06, "loss": 0.5642, "step": 3530 }, { "epoch": 0.2150229881557714, "grad_norm": 1.098791477192116, "learning_rate": 4.98502394437301e-06, "loss": 0.5843, "step": 3531 }, { "epoch": 0.21508388393264927, "grad_norm": 1.1280278014737517, "learning_rate": 4.985015221984734e-06, "loss": 0.5021, "step": 3532 }, { "epoch": 0.21514477970952714, "grad_norm": 1.0683169191176245, "learning_rate": 4.985006497064776e-06, "loss": 0.4651, "step": 3533 }, { "epoch": 0.215205675486405, "grad_norm": 1.1095765924125272, "learning_rate": 4.984997769613145e-06, "loss": 0.5001, "step": 3534 }, { "epoch": 0.21526657126328289, "grad_norm": 0.958966818476649, "learning_rate": 4.98498903962985e-06, "loss": 0.4863, "step": 3535 }, { "epoch": 0.21532746704016076, "grad_norm": 1.053699370757304, "learning_rate": 4.984980307114899e-06, "loss": 0.4917, "step": 3536 }, { "epoch": 0.21538836281703863, "grad_norm": 1.084109637735801, "learning_rate": 4.9849715720683025e-06, "loss": 0.5081, "step": 3537 }, { "epoch": 0.2154492585939165, "grad_norm": 1.0280232838626695, "learning_rate": 4.984962834490068e-06, "loss": 0.499, "step": 3538 }, { "epoch": 0.21551015437079438, "grad_norm": 1.0303213174295196, "learning_rate": 4.984954094380205e-06, "loss": 0.4589, "step": 3539 }, { "epoch": 0.21557105014767225, "grad_norm": 1.099978769921647, "learning_rate": 4.984945351738724e-06, "loss": 0.5282, "step": 3540 }, { "epoch": 0.21563194592455012, "grad_norm": 0.9809994178449754, "learning_rate": 4.984936606565631e-06, "loss": 0.5171, "step": 3541 }, { "epoch": 0.215692841701428, "grad_norm": 0.953575100214995, "learning_rate": 4.9849278588609365e-06, "loss": 0.5332, "step": 3542 }, { "epoch": 0.21575373747830587, "grad_norm": 1.0336117583560018, "learning_rate": 4.984919108624649e-06, "loss": 0.5347, "step": 3543 }, { "epoch": 0.21581463325518374, "grad_norm": 1.0571292826915153, "learning_rate": 4.984910355856778e-06, "loss": 0.5419, "step": 3544 }, { "epoch": 0.2158755290320616, "grad_norm": 1.028994703868717, "learning_rate": 4.984901600557332e-06, "loss": 0.5084, "step": 3545 }, { "epoch": 0.2159364248089395, "grad_norm": 1.1484746818838043, "learning_rate": 4.984892842726319e-06, "loss": 0.5062, "step": 3546 }, { "epoch": 0.2159973205858174, "grad_norm": 1.0988222949539712, "learning_rate": 4.98488408236375e-06, "loss": 0.5173, "step": 3547 }, { "epoch": 0.21605821636269526, "grad_norm": 1.0036478010112153, "learning_rate": 4.984875319469632e-06, "loss": 0.5002, "step": 3548 }, { "epoch": 0.21611911213957313, "grad_norm": 1.1435748735014262, "learning_rate": 4.984866554043975e-06, "loss": 0.4802, "step": 3549 }, { "epoch": 0.216180007916451, "grad_norm": 1.0682719494387507, "learning_rate": 4.984857786086787e-06, "loss": 0.4908, "step": 3550 }, { "epoch": 0.21624090369332888, "grad_norm": 1.0320908165818885, "learning_rate": 4.984849015598079e-06, "loss": 0.5319, "step": 3551 }, { "epoch": 0.21630179947020675, "grad_norm": 1.1103984367486937, "learning_rate": 4.984840242577857e-06, "loss": 0.5164, "step": 3552 }, { "epoch": 0.21636269524708462, "grad_norm": 1.1232057330201966, "learning_rate": 4.984831467026132e-06, "loss": 0.488, "step": 3553 }, { "epoch": 0.2164235910239625, "grad_norm": 1.1552259871373736, "learning_rate": 4.984822688942913e-06, "loss": 0.4937, "step": 3554 }, { "epoch": 0.21648448680084037, "grad_norm": 1.1197892754975105, "learning_rate": 4.984813908328208e-06, "loss": 0.443, "step": 3555 }, { "epoch": 0.21654538257771824, "grad_norm": 1.0812298419807627, "learning_rate": 4.984805125182026e-06, "loss": 0.5099, "step": 3556 }, { "epoch": 0.21660627835459612, "grad_norm": 1.0961155046352729, "learning_rate": 4.984796339504376e-06, "loss": 0.4763, "step": 3557 }, { "epoch": 0.216667174131474, "grad_norm": 1.0045700051174846, "learning_rate": 4.984787551295267e-06, "loss": 0.563, "step": 3558 }, { "epoch": 0.21672806990835186, "grad_norm": 1.0595934139111627, "learning_rate": 4.9847787605547085e-06, "loss": 0.474, "step": 3559 }, { "epoch": 0.21678896568522973, "grad_norm": 1.0220847896769076, "learning_rate": 4.984769967282708e-06, "loss": 0.5005, "step": 3560 }, { "epoch": 0.2168498614621076, "grad_norm": 1.0653039885978497, "learning_rate": 4.9847611714792775e-06, "loss": 0.4699, "step": 3561 }, { "epoch": 0.21691075723898548, "grad_norm": 0.9920550940915338, "learning_rate": 4.9847523731444225e-06, "loss": 0.5728, "step": 3562 }, { "epoch": 0.21697165301586335, "grad_norm": 1.0517730083971346, "learning_rate": 4.984743572278155e-06, "loss": 0.4926, "step": 3563 }, { "epoch": 0.21703254879274123, "grad_norm": 1.0137027294087486, "learning_rate": 4.98473476888048e-06, "loss": 0.5179, "step": 3564 }, { "epoch": 0.2170934445696191, "grad_norm": 1.0186563884219648, "learning_rate": 4.984725962951411e-06, "loss": 0.5447, "step": 3565 }, { "epoch": 0.21715434034649697, "grad_norm": 1.0512192738034583, "learning_rate": 4.984717154490953e-06, "loss": 0.5287, "step": 3566 }, { "epoch": 0.21721523612337484, "grad_norm": 1.0744107659301123, "learning_rate": 4.984708343499119e-06, "loss": 0.4599, "step": 3567 }, { "epoch": 0.21727613190025272, "grad_norm": 1.167163280112619, "learning_rate": 4.984699529975914e-06, "loss": 0.5466, "step": 3568 }, { "epoch": 0.2173370276771306, "grad_norm": 1.0101606815034203, "learning_rate": 4.9846907139213495e-06, "loss": 0.54, "step": 3569 }, { "epoch": 0.21739792345400846, "grad_norm": 1.0709126962876254, "learning_rate": 4.9846818953354335e-06, "loss": 0.4721, "step": 3570 }, { "epoch": 0.21745881923088634, "grad_norm": 1.0657408028159654, "learning_rate": 4.984673074218176e-06, "loss": 0.4949, "step": 3571 }, { "epoch": 0.2175197150077642, "grad_norm": 1.1163570870729875, "learning_rate": 4.984664250569584e-06, "loss": 0.5076, "step": 3572 }, { "epoch": 0.21758061078464208, "grad_norm": 0.9456135808870202, "learning_rate": 4.984655424389669e-06, "loss": 0.558, "step": 3573 }, { "epoch": 0.21764150656151995, "grad_norm": 0.9889784372565126, "learning_rate": 4.984646595678438e-06, "loss": 0.4962, "step": 3574 }, { "epoch": 0.21770240233839783, "grad_norm": 1.1411530581985125, "learning_rate": 4.984637764435901e-06, "loss": 0.4664, "step": 3575 }, { "epoch": 0.2177632981152757, "grad_norm": 1.096702531386223, "learning_rate": 4.984628930662067e-06, "loss": 0.4679, "step": 3576 }, { "epoch": 0.21782419389215357, "grad_norm": 1.1625168691277445, "learning_rate": 4.984620094356943e-06, "loss": 0.5637, "step": 3577 }, { "epoch": 0.21788508966903145, "grad_norm": 0.9719153646059796, "learning_rate": 4.984611255520542e-06, "loss": 0.4535, "step": 3578 }, { "epoch": 0.21794598544590932, "grad_norm": 1.060071160533184, "learning_rate": 4.984602414152869e-06, "loss": 0.5626, "step": 3579 }, { "epoch": 0.2180068812227872, "grad_norm": 1.0608807475998072, "learning_rate": 4.984593570253935e-06, "loss": 0.5655, "step": 3580 }, { "epoch": 0.21806777699966506, "grad_norm": 1.0273762418682866, "learning_rate": 4.984584723823749e-06, "loss": 0.4848, "step": 3581 }, { "epoch": 0.21812867277654294, "grad_norm": 1.016711172642323, "learning_rate": 4.98457587486232e-06, "loss": 0.5558, "step": 3582 }, { "epoch": 0.2181895685534208, "grad_norm": 1.009328895916967, "learning_rate": 4.984567023369656e-06, "loss": 0.5056, "step": 3583 }, { "epoch": 0.21825046433029868, "grad_norm": 1.0150473649731746, "learning_rate": 4.984558169345768e-06, "loss": 0.4747, "step": 3584 }, { "epoch": 0.21831136010717656, "grad_norm": 0.9866278639253534, "learning_rate": 4.984549312790663e-06, "loss": 0.5068, "step": 3585 }, { "epoch": 0.21837225588405443, "grad_norm": 1.0177449673468346, "learning_rate": 4.9845404537043515e-06, "loss": 0.5561, "step": 3586 }, { "epoch": 0.2184331516609323, "grad_norm": 0.969978004435348, "learning_rate": 4.984531592086841e-06, "loss": 0.5391, "step": 3587 }, { "epoch": 0.2184940474378102, "grad_norm": 1.0729564760040857, "learning_rate": 4.984522727938142e-06, "loss": 0.4734, "step": 3588 }, { "epoch": 0.21855494321468807, "grad_norm": 1.0534362309954948, "learning_rate": 4.9845138612582625e-06, "loss": 0.5239, "step": 3589 }, { "epoch": 0.21861583899156595, "grad_norm": 1.0845357001814593, "learning_rate": 4.984504992047212e-06, "loss": 0.4826, "step": 3590 }, { "epoch": 0.21867673476844382, "grad_norm": 1.0322439791410114, "learning_rate": 4.984496120304999e-06, "loss": 0.462, "step": 3591 }, { "epoch": 0.2187376305453217, "grad_norm": 1.0466083461751259, "learning_rate": 4.984487246031633e-06, "loss": 0.5313, "step": 3592 }, { "epoch": 0.21879852632219957, "grad_norm": 1.0533933393503696, "learning_rate": 4.9844783692271235e-06, "loss": 0.4398, "step": 3593 }, { "epoch": 0.21885942209907744, "grad_norm": 0.9835983853941652, "learning_rate": 4.984469489891479e-06, "loss": 0.5397, "step": 3594 }, { "epoch": 0.2189203178759553, "grad_norm": 1.0399830933307308, "learning_rate": 4.984460608024709e-06, "loss": 0.4741, "step": 3595 }, { "epoch": 0.21898121365283318, "grad_norm": 1.0853151252509252, "learning_rate": 4.984451723626822e-06, "loss": 0.4736, "step": 3596 }, { "epoch": 0.21904210942971106, "grad_norm": 1.084687281491857, "learning_rate": 4.984442836697827e-06, "loss": 0.4979, "step": 3597 }, { "epoch": 0.21910300520658893, "grad_norm": 1.0932520844224118, "learning_rate": 4.984433947237734e-06, "loss": 0.4506, "step": 3598 }, { "epoch": 0.2191639009834668, "grad_norm": 0.9686437538333063, "learning_rate": 4.984425055246551e-06, "loss": 0.5443, "step": 3599 }, { "epoch": 0.21922479676034468, "grad_norm": 1.0894462606556667, "learning_rate": 4.984416160724287e-06, "loss": 0.5715, "step": 3600 }, { "epoch": 0.21928569253722255, "grad_norm": 0.9718910607669609, "learning_rate": 4.984407263670952e-06, "loss": 0.5711, "step": 3601 }, { "epoch": 0.21934658831410042, "grad_norm": 1.0251470225750992, "learning_rate": 4.984398364086554e-06, "loss": 0.5538, "step": 3602 }, { "epoch": 0.2194074840909783, "grad_norm": 1.0792308118946277, "learning_rate": 4.984389461971103e-06, "loss": 0.507, "step": 3603 }, { "epoch": 0.21946837986785617, "grad_norm": 0.9264043216483323, "learning_rate": 4.9843805573246065e-06, "loss": 0.4842, "step": 3604 }, { "epoch": 0.21952927564473404, "grad_norm": 1.00976698604612, "learning_rate": 4.984371650147077e-06, "loss": 0.509, "step": 3605 }, { "epoch": 0.2195901714216119, "grad_norm": 1.105847716734821, "learning_rate": 4.984362740438519e-06, "loss": 0.4759, "step": 3606 }, { "epoch": 0.21965106719848979, "grad_norm": 1.0957670612753891, "learning_rate": 4.984353828198945e-06, "loss": 0.5235, "step": 3607 }, { "epoch": 0.21971196297536766, "grad_norm": 0.9517713398996043, "learning_rate": 4.984344913428363e-06, "loss": 0.5589, "step": 3608 }, { "epoch": 0.21977285875224553, "grad_norm": 0.9814473910069393, "learning_rate": 4.984335996126782e-06, "loss": 0.4486, "step": 3609 }, { "epoch": 0.2198337545291234, "grad_norm": 1.0469018074397163, "learning_rate": 4.984327076294211e-06, "loss": 0.4684, "step": 3610 }, { "epoch": 0.21989465030600128, "grad_norm": 1.030444647433261, "learning_rate": 4.98431815393066e-06, "loss": 0.4877, "step": 3611 }, { "epoch": 0.21995554608287915, "grad_norm": 1.0580250183044841, "learning_rate": 4.984309229036136e-06, "loss": 0.55, "step": 3612 }, { "epoch": 0.22001644185975702, "grad_norm": 0.9753491655572889, "learning_rate": 4.98430030161065e-06, "loss": 0.529, "step": 3613 }, { "epoch": 0.2200773376366349, "grad_norm": 1.0277327911265826, "learning_rate": 4.98429137165421e-06, "loss": 0.4669, "step": 3614 }, { "epoch": 0.22013823341351277, "grad_norm": 0.9892479774981794, "learning_rate": 4.984282439166827e-06, "loss": 0.4905, "step": 3615 }, { "epoch": 0.22019912919039064, "grad_norm": 0.9821434408365656, "learning_rate": 4.984273504148507e-06, "loss": 0.4962, "step": 3616 }, { "epoch": 0.22026002496726851, "grad_norm": 1.034569566341808, "learning_rate": 4.984264566599262e-06, "loss": 0.4871, "step": 3617 }, { "epoch": 0.2203209207441464, "grad_norm": 1.0528372794915533, "learning_rate": 4.9842556265190995e-06, "loss": 0.5077, "step": 3618 }, { "epoch": 0.22038181652102426, "grad_norm": 1.1016868895006773, "learning_rate": 4.984246683908029e-06, "loss": 0.4887, "step": 3619 }, { "epoch": 0.22044271229790213, "grad_norm": 1.060326373708191, "learning_rate": 4.98423773876606e-06, "loss": 0.4705, "step": 3620 }, { "epoch": 0.22050360807478, "grad_norm": 1.2313197749777403, "learning_rate": 4.984228791093201e-06, "loss": 0.5148, "step": 3621 }, { "epoch": 0.22056450385165788, "grad_norm": 1.0093074464343585, "learning_rate": 4.9842198408894604e-06, "loss": 0.4955, "step": 3622 }, { "epoch": 0.22062539962853575, "grad_norm": 0.9538930438794321, "learning_rate": 4.98421088815485e-06, "loss": 0.5591, "step": 3623 }, { "epoch": 0.22068629540541362, "grad_norm": 1.0680132811883842, "learning_rate": 4.984201932889376e-06, "loss": 0.4778, "step": 3624 }, { "epoch": 0.2207471911822915, "grad_norm": 1.0676372254971267, "learning_rate": 4.984192975093049e-06, "loss": 0.5423, "step": 3625 }, { "epoch": 0.22080808695916937, "grad_norm": 1.0092946899408097, "learning_rate": 4.984184014765878e-06, "loss": 0.5252, "step": 3626 }, { "epoch": 0.22086898273604724, "grad_norm": 0.9630429618084504, "learning_rate": 4.984175051907872e-06, "loss": 0.5383, "step": 3627 }, { "epoch": 0.22092987851292512, "grad_norm": 1.095491873356134, "learning_rate": 4.98416608651904e-06, "loss": 0.4845, "step": 3628 }, { "epoch": 0.22099077428980302, "grad_norm": 1.0198616066440156, "learning_rate": 4.984157118599391e-06, "loss": 0.5227, "step": 3629 }, { "epoch": 0.2210516700666809, "grad_norm": 0.9264900035243322, "learning_rate": 4.984148148148935e-06, "loss": 0.6027, "step": 3630 }, { "epoch": 0.22111256584355876, "grad_norm": 0.9920303895009099, "learning_rate": 4.984139175167679e-06, "loss": 0.4753, "step": 3631 }, { "epoch": 0.22117346162043663, "grad_norm": 1.0689095519860532, "learning_rate": 4.984130199655636e-06, "loss": 0.464, "step": 3632 }, { "epoch": 0.2212343573973145, "grad_norm": 1.0693238030282803, "learning_rate": 4.984121221612811e-06, "loss": 0.4765, "step": 3633 }, { "epoch": 0.22129525317419238, "grad_norm": 0.9561980051294979, "learning_rate": 4.984112241039216e-06, "loss": 0.5137, "step": 3634 }, { "epoch": 0.22135614895107025, "grad_norm": 1.1110219735739506, "learning_rate": 4.984103257934858e-06, "loss": 0.4915, "step": 3635 }, { "epoch": 0.22141704472794813, "grad_norm": 1.0085938542613546, "learning_rate": 4.984094272299748e-06, "loss": 0.5537, "step": 3636 }, { "epoch": 0.221477940504826, "grad_norm": 1.0206446369423188, "learning_rate": 4.9840852841338946e-06, "loss": 0.4968, "step": 3637 }, { "epoch": 0.22153883628170387, "grad_norm": 1.008104305805527, "learning_rate": 4.984076293437306e-06, "loss": 0.4517, "step": 3638 }, { "epoch": 0.22159973205858174, "grad_norm": 1.0090671849446387, "learning_rate": 4.984067300209993e-06, "loss": 0.5659, "step": 3639 }, { "epoch": 0.22166062783545962, "grad_norm": 1.0786615203573802, "learning_rate": 4.984058304451963e-06, "loss": 0.4951, "step": 3640 }, { "epoch": 0.2217215236123375, "grad_norm": 0.9701775348709315, "learning_rate": 4.984049306163227e-06, "loss": 0.5241, "step": 3641 }, { "epoch": 0.22178241938921536, "grad_norm": 1.0102500306416036, "learning_rate": 4.984040305343792e-06, "loss": 0.4857, "step": 3642 }, { "epoch": 0.22184331516609324, "grad_norm": 0.9569484349056119, "learning_rate": 4.98403130199367e-06, "loss": 0.5094, "step": 3643 }, { "epoch": 0.2219042109429711, "grad_norm": 1.0412011668717063, "learning_rate": 4.984022296112867e-06, "loss": 0.5427, "step": 3644 }, { "epoch": 0.22196510671984898, "grad_norm": 1.0459647000536478, "learning_rate": 4.984013287701394e-06, "loss": 0.5008, "step": 3645 }, { "epoch": 0.22202600249672685, "grad_norm": 1.1306911614441362, "learning_rate": 4.98400427675926e-06, "loss": 0.4445, "step": 3646 }, { "epoch": 0.22208689827360473, "grad_norm": 1.1329550795422219, "learning_rate": 4.9839952632864745e-06, "loss": 0.4966, "step": 3647 }, { "epoch": 0.2221477940504826, "grad_norm": 0.9554822254406653, "learning_rate": 4.983986247283046e-06, "loss": 0.4955, "step": 3648 }, { "epoch": 0.22220868982736047, "grad_norm": 1.1513353066225238, "learning_rate": 4.983977228748984e-06, "loss": 0.4747, "step": 3649 }, { "epoch": 0.22226958560423835, "grad_norm": 1.090172423498002, "learning_rate": 4.983968207684298e-06, "loss": 0.5289, "step": 3650 }, { "epoch": 0.22233048138111622, "grad_norm": 1.1265133214903589, "learning_rate": 4.983959184088996e-06, "loss": 0.4129, "step": 3651 }, { "epoch": 0.2223913771579941, "grad_norm": 1.196251804387111, "learning_rate": 4.983950157963089e-06, "loss": 0.5338, "step": 3652 }, { "epoch": 0.22245227293487196, "grad_norm": 1.0334343219976312, "learning_rate": 4.983941129306585e-06, "loss": 0.4532, "step": 3653 }, { "epoch": 0.22251316871174984, "grad_norm": 1.014178843726231, "learning_rate": 4.983932098119493e-06, "loss": 0.5225, "step": 3654 }, { "epoch": 0.2225740644886277, "grad_norm": 1.060130626201651, "learning_rate": 4.983923064401823e-06, "loss": 0.4889, "step": 3655 }, { "epoch": 0.22263496026550558, "grad_norm": 1.0125945711546855, "learning_rate": 4.9839140281535835e-06, "loss": 0.5325, "step": 3656 }, { "epoch": 0.22269585604238346, "grad_norm": 0.9769895410889771, "learning_rate": 4.983904989374783e-06, "loss": 0.5207, "step": 3657 }, { "epoch": 0.22275675181926133, "grad_norm": 1.0570311616504617, "learning_rate": 4.983895948065433e-06, "loss": 0.5231, "step": 3658 }, { "epoch": 0.2228176475961392, "grad_norm": 0.9925997999763783, "learning_rate": 4.983886904225543e-06, "loss": 0.4429, "step": 3659 }, { "epoch": 0.22287854337301707, "grad_norm": 0.958117719090135, "learning_rate": 4.983877857855118e-06, "loss": 0.5546, "step": 3660 }, { "epoch": 0.22293943914989495, "grad_norm": 1.081575798347445, "learning_rate": 4.983868808954171e-06, "loss": 0.5281, "step": 3661 }, { "epoch": 0.22300033492677282, "grad_norm": 1.0057490391327615, "learning_rate": 4.98385975752271e-06, "loss": 0.5107, "step": 3662 }, { "epoch": 0.2230612307036507, "grad_norm": 1.002430332098149, "learning_rate": 4.9838507035607445e-06, "loss": 0.4415, "step": 3663 }, { "epoch": 0.22312212648052857, "grad_norm": 1.1517345079342831, "learning_rate": 4.983841647068284e-06, "loss": 0.4909, "step": 3664 }, { "epoch": 0.22318302225740644, "grad_norm": 1.2061091479996402, "learning_rate": 4.983832588045336e-06, "loss": 0.5002, "step": 3665 }, { "epoch": 0.2232439180342843, "grad_norm": 1.0618157442830711, "learning_rate": 4.9838235264919115e-06, "loss": 0.4973, "step": 3666 }, { "epoch": 0.22330481381116218, "grad_norm": 1.0569504257258773, "learning_rate": 4.98381446240802e-06, "loss": 0.4688, "step": 3667 }, { "epoch": 0.22336570958804006, "grad_norm": 1.0840193511230167, "learning_rate": 4.983805395793669e-06, "loss": 0.5157, "step": 3668 }, { "epoch": 0.22342660536491793, "grad_norm": 0.9771170397587104, "learning_rate": 4.983796326648869e-06, "loss": 0.5174, "step": 3669 }, { "epoch": 0.22348750114179583, "grad_norm": 1.0689973424421182, "learning_rate": 4.983787254973629e-06, "loss": 0.4876, "step": 3670 }, { "epoch": 0.2235483969186737, "grad_norm": 1.022888645024949, "learning_rate": 4.983778180767958e-06, "loss": 0.5207, "step": 3671 }, { "epoch": 0.22360929269555158, "grad_norm": 1.074167007857084, "learning_rate": 4.9837691040318656e-06, "loss": 0.5736, "step": 3672 }, { "epoch": 0.22367018847242945, "grad_norm": 1.0146785158032263, "learning_rate": 4.983760024765361e-06, "loss": 0.4905, "step": 3673 }, { "epoch": 0.22373108424930732, "grad_norm": 0.9852477276164616, "learning_rate": 4.983750942968452e-06, "loss": 0.5199, "step": 3674 }, { "epoch": 0.2237919800261852, "grad_norm": 0.9520716641224147, "learning_rate": 4.983741858641151e-06, "loss": 0.5013, "step": 3675 }, { "epoch": 0.22385287580306307, "grad_norm": 0.9625468738064992, "learning_rate": 4.983732771783465e-06, "loss": 0.5036, "step": 3676 }, { "epoch": 0.22391377157994094, "grad_norm": 0.9921370188712887, "learning_rate": 4.983723682395404e-06, "loss": 0.5196, "step": 3677 }, { "epoch": 0.2239746673568188, "grad_norm": 1.1441418990127838, "learning_rate": 4.983714590476976e-06, "loss": 0.4116, "step": 3678 }, { "epoch": 0.22403556313369669, "grad_norm": 1.082491842997823, "learning_rate": 4.983705496028192e-06, "loss": 0.4806, "step": 3679 }, { "epoch": 0.22409645891057456, "grad_norm": 1.0370195137056684, "learning_rate": 4.98369639904906e-06, "loss": 0.5195, "step": 3680 }, { "epoch": 0.22415735468745243, "grad_norm": 1.0333843525215605, "learning_rate": 4.9836872995395905e-06, "loss": 0.4907, "step": 3681 }, { "epoch": 0.2242182504643303, "grad_norm": 1.114320515939684, "learning_rate": 4.983678197499791e-06, "loss": 0.4709, "step": 3682 }, { "epoch": 0.22427914624120818, "grad_norm": 1.1162487546310778, "learning_rate": 4.9836690929296715e-06, "loss": 0.4929, "step": 3683 }, { "epoch": 0.22434004201808605, "grad_norm": 1.0495223185092992, "learning_rate": 4.983659985829242e-06, "loss": 0.5095, "step": 3684 }, { "epoch": 0.22440093779496392, "grad_norm": 1.0558277149045903, "learning_rate": 4.983650876198513e-06, "loss": 0.484, "step": 3685 }, { "epoch": 0.2244618335718418, "grad_norm": 1.0446666886000473, "learning_rate": 4.98364176403749e-06, "loss": 0.5479, "step": 3686 }, { "epoch": 0.22452272934871967, "grad_norm": 1.0617704341336716, "learning_rate": 4.983632649346185e-06, "loss": 0.5014, "step": 3687 }, { "epoch": 0.22458362512559754, "grad_norm": 0.9390263602169476, "learning_rate": 4.9836235321246064e-06, "loss": 0.4904, "step": 3688 }, { "epoch": 0.22464452090247541, "grad_norm": 1.0827146730456525, "learning_rate": 4.983614412372765e-06, "loss": 0.4799, "step": 3689 }, { "epoch": 0.2247054166793533, "grad_norm": 0.9868236134692797, "learning_rate": 4.983605290090668e-06, "loss": 0.4918, "step": 3690 }, { "epoch": 0.22476631245623116, "grad_norm": 1.0775289876324547, "learning_rate": 4.983596165278325e-06, "loss": 0.4907, "step": 3691 }, { "epoch": 0.22482720823310903, "grad_norm": 1.057323032753069, "learning_rate": 4.9835870379357464e-06, "loss": 0.5981, "step": 3692 }, { "epoch": 0.2248881040099869, "grad_norm": 1.1134981490237437, "learning_rate": 4.983577908062941e-06, "loss": 0.4775, "step": 3693 }, { "epoch": 0.22494899978686478, "grad_norm": 1.0518086110848555, "learning_rate": 4.983568775659918e-06, "loss": 0.4894, "step": 3694 }, { "epoch": 0.22500989556374265, "grad_norm": 1.1918477590989611, "learning_rate": 4.9835596407266875e-06, "loss": 0.4432, "step": 3695 }, { "epoch": 0.22507079134062052, "grad_norm": 1.1633733543908427, "learning_rate": 4.9835505032632576e-06, "loss": 0.538, "step": 3696 }, { "epoch": 0.2251316871174984, "grad_norm": 1.0535741243759975, "learning_rate": 4.983541363269638e-06, "loss": 0.4881, "step": 3697 }, { "epoch": 0.22519258289437627, "grad_norm": 0.9987444995260891, "learning_rate": 4.983532220745838e-06, "loss": 0.5471, "step": 3698 }, { "epoch": 0.22525347867125414, "grad_norm": 1.043180854127297, "learning_rate": 4.9835230756918664e-06, "loss": 0.4816, "step": 3699 }, { "epoch": 0.22531437444813202, "grad_norm": 1.0712837106295863, "learning_rate": 4.983513928107734e-06, "loss": 0.4569, "step": 3700 }, { "epoch": 0.2253752702250099, "grad_norm": 1.0014453767979237, "learning_rate": 4.983504777993449e-06, "loss": 0.4622, "step": 3701 }, { "epoch": 0.22543616600188776, "grad_norm": 1.012923974362725, "learning_rate": 4.983495625349021e-06, "loss": 0.5966, "step": 3702 }, { "epoch": 0.22549706177876563, "grad_norm": 1.059595012427275, "learning_rate": 4.983486470174459e-06, "loss": 0.5185, "step": 3703 }, { "epoch": 0.2255579575556435, "grad_norm": 1.0344802691444002, "learning_rate": 4.9834773124697735e-06, "loss": 0.5062, "step": 3704 }, { "epoch": 0.22561885333252138, "grad_norm": 1.0743234176377614, "learning_rate": 4.983468152234972e-06, "loss": 0.4726, "step": 3705 }, { "epoch": 0.22567974910939925, "grad_norm": 1.076741005843409, "learning_rate": 4.983458989470065e-06, "loss": 0.4622, "step": 3706 }, { "epoch": 0.22574064488627713, "grad_norm": 1.0315842567351479, "learning_rate": 4.983449824175062e-06, "loss": 0.4549, "step": 3707 }, { "epoch": 0.225801540663155, "grad_norm": 1.1133313367540218, "learning_rate": 4.983440656349972e-06, "loss": 0.5903, "step": 3708 }, { "epoch": 0.22586243644003287, "grad_norm": 0.9322357440896087, "learning_rate": 4.983431485994803e-06, "loss": 0.515, "step": 3709 }, { "epoch": 0.22592333221691074, "grad_norm": 1.0658420975407386, "learning_rate": 4.9834223131095675e-06, "loss": 0.4275, "step": 3710 }, { "epoch": 0.22598422799378864, "grad_norm": 0.9495233903246977, "learning_rate": 4.9834131376942715e-06, "loss": 0.4973, "step": 3711 }, { "epoch": 0.22604512377066652, "grad_norm": 1.1228274437326409, "learning_rate": 4.983403959748927e-06, "loss": 0.5674, "step": 3712 }, { "epoch": 0.2261060195475444, "grad_norm": 1.101785497436263, "learning_rate": 4.983394779273542e-06, "loss": 0.4185, "step": 3713 }, { "epoch": 0.22616691532442226, "grad_norm": 1.1604830390741283, "learning_rate": 4.983385596268125e-06, "loss": 0.435, "step": 3714 }, { "epoch": 0.22622781110130014, "grad_norm": 1.0703028572822528, "learning_rate": 4.983376410732686e-06, "loss": 0.474, "step": 3715 }, { "epoch": 0.226288706878178, "grad_norm": 1.0011493893344998, "learning_rate": 4.983367222667237e-06, "loss": 0.5219, "step": 3716 }, { "epoch": 0.22634960265505588, "grad_norm": 1.11984783985255, "learning_rate": 4.983358032071783e-06, "loss": 0.4307, "step": 3717 }, { "epoch": 0.22641049843193375, "grad_norm": 1.1210717957555651, "learning_rate": 4.983348838946337e-06, "loss": 0.5006, "step": 3718 }, { "epoch": 0.22647139420881163, "grad_norm": 0.9514429573211931, "learning_rate": 4.983339643290906e-06, "loss": 0.6271, "step": 3719 }, { "epoch": 0.2265322899856895, "grad_norm": 0.986467215810796, "learning_rate": 4.9833304451055e-06, "loss": 0.5453, "step": 3720 }, { "epoch": 0.22659318576256737, "grad_norm": 1.0471072271111141, "learning_rate": 4.983321244390129e-06, "loss": 0.4969, "step": 3721 }, { "epoch": 0.22665408153944525, "grad_norm": 1.041484121284085, "learning_rate": 4.983312041144802e-06, "loss": 0.5207, "step": 3722 }, { "epoch": 0.22671497731632312, "grad_norm": 1.0381568525385074, "learning_rate": 4.983302835369528e-06, "loss": 0.4827, "step": 3723 }, { "epoch": 0.226775873093201, "grad_norm": 1.0971295190937118, "learning_rate": 4.983293627064317e-06, "loss": 0.503, "step": 3724 }, { "epoch": 0.22683676887007886, "grad_norm": 1.0120260811476922, "learning_rate": 4.983284416229178e-06, "loss": 0.5802, "step": 3725 }, { "epoch": 0.22689766464695674, "grad_norm": 1.051108740907145, "learning_rate": 4.983275202864121e-06, "loss": 0.5611, "step": 3726 }, { "epoch": 0.2269585604238346, "grad_norm": 1.0139916412471541, "learning_rate": 4.983265986969153e-06, "loss": 0.5115, "step": 3727 }, { "epoch": 0.22701945620071248, "grad_norm": 1.03856992740413, "learning_rate": 4.983256768544287e-06, "loss": 0.5295, "step": 3728 }, { "epoch": 0.22708035197759036, "grad_norm": 1.0703636822112685, "learning_rate": 4.9832475475895305e-06, "loss": 0.5519, "step": 3729 }, { "epoch": 0.22714124775446823, "grad_norm": 1.0218200703347118, "learning_rate": 4.983238324104893e-06, "loss": 0.5112, "step": 3730 }, { "epoch": 0.2272021435313461, "grad_norm": 0.9787784585950156, "learning_rate": 4.983229098090383e-06, "loss": 0.5202, "step": 3731 }, { "epoch": 0.22726303930822397, "grad_norm": 1.039645387408998, "learning_rate": 4.983219869546012e-06, "loss": 0.4975, "step": 3732 }, { "epoch": 0.22732393508510185, "grad_norm": 1.0861992566900163, "learning_rate": 4.983210638471787e-06, "loss": 0.4357, "step": 3733 }, { "epoch": 0.22738483086197972, "grad_norm": 1.0138069141267996, "learning_rate": 4.98320140486772e-06, "loss": 0.4893, "step": 3734 }, { "epoch": 0.2274457266388576, "grad_norm": 0.9739057975920656, "learning_rate": 4.983192168733818e-06, "loss": 0.5305, "step": 3735 }, { "epoch": 0.22750662241573547, "grad_norm": 1.008614745399796, "learning_rate": 4.983182930070092e-06, "loss": 0.561, "step": 3736 }, { "epoch": 0.22756751819261334, "grad_norm": 1.0260249812941757, "learning_rate": 4.983173688876551e-06, "loss": 0.4524, "step": 3737 }, { "epoch": 0.2276284139694912, "grad_norm": 1.063459758955186, "learning_rate": 4.983164445153203e-06, "loss": 0.4781, "step": 3738 }, { "epoch": 0.22768930974636908, "grad_norm": 0.9990103337566727, "learning_rate": 4.98315519890006e-06, "loss": 0.4808, "step": 3739 }, { "epoch": 0.22775020552324696, "grad_norm": 0.9878451659167987, "learning_rate": 4.98314595011713e-06, "loss": 0.4986, "step": 3740 }, { "epoch": 0.22781110130012483, "grad_norm": 1.0431726882212, "learning_rate": 4.983136698804422e-06, "loss": 0.4869, "step": 3741 }, { "epoch": 0.2278719970770027, "grad_norm": 0.9770139117393136, "learning_rate": 4.983127444961946e-06, "loss": 0.4889, "step": 3742 }, { "epoch": 0.22793289285388058, "grad_norm": 0.9823464157712205, "learning_rate": 4.983118188589712e-06, "loss": 0.5104, "step": 3743 }, { "epoch": 0.22799378863075845, "grad_norm": 0.982543162772777, "learning_rate": 4.9831089296877275e-06, "loss": 0.5377, "step": 3744 }, { "epoch": 0.22805468440763632, "grad_norm": 1.103635489431834, "learning_rate": 4.9830996682560045e-06, "loss": 0.4814, "step": 3745 }, { "epoch": 0.2281155801845142, "grad_norm": 0.9842092093558494, "learning_rate": 4.983090404294551e-06, "loss": 0.5401, "step": 3746 }, { "epoch": 0.22817647596139207, "grad_norm": 0.9758647394495704, "learning_rate": 4.983081137803376e-06, "loss": 0.4883, "step": 3747 }, { "epoch": 0.22823737173826994, "grad_norm": 1.0546473892803705, "learning_rate": 4.9830718687824905e-06, "loss": 0.5106, "step": 3748 }, { "epoch": 0.2282982675151478, "grad_norm": 1.0949829093951071, "learning_rate": 4.983062597231903e-06, "loss": 0.5018, "step": 3749 }, { "epoch": 0.22835916329202569, "grad_norm": 1.0863763305553835, "learning_rate": 4.983053323151622e-06, "loss": 0.4438, "step": 3750 }, { "epoch": 0.22842005906890356, "grad_norm": 1.063410478764649, "learning_rate": 4.9830440465416575e-06, "loss": 0.5212, "step": 3751 }, { "epoch": 0.22848095484578146, "grad_norm": 1.063333577899775, "learning_rate": 4.9830347674020205e-06, "loss": 0.5321, "step": 3752 }, { "epoch": 0.22854185062265933, "grad_norm": 1.0512383326853638, "learning_rate": 4.9830254857327195e-06, "loss": 0.4444, "step": 3753 }, { "epoch": 0.2286027463995372, "grad_norm": 0.99803840389353, "learning_rate": 4.983016201533763e-06, "loss": 0.4991, "step": 3754 }, { "epoch": 0.22866364217641508, "grad_norm": 1.1491105743240242, "learning_rate": 4.983006914805162e-06, "loss": 0.4441, "step": 3755 }, { "epoch": 0.22872453795329295, "grad_norm": 1.085619264445055, "learning_rate": 4.982997625546924e-06, "loss": 0.5339, "step": 3756 }, { "epoch": 0.22878543373017082, "grad_norm": 1.091534765480261, "learning_rate": 4.9829883337590604e-06, "loss": 0.5051, "step": 3757 }, { "epoch": 0.2288463295070487, "grad_norm": 1.0520591915932902, "learning_rate": 4.98297903944158e-06, "loss": 0.5479, "step": 3758 }, { "epoch": 0.22890722528392657, "grad_norm": 1.0829531219335513, "learning_rate": 4.982969742594492e-06, "loss": 0.5295, "step": 3759 }, { "epoch": 0.22896812106080444, "grad_norm": 1.033727312221422, "learning_rate": 4.982960443217806e-06, "loss": 0.4715, "step": 3760 }, { "epoch": 0.22902901683768231, "grad_norm": 1.0535824532244455, "learning_rate": 4.982951141311532e-06, "loss": 0.5663, "step": 3761 }, { "epoch": 0.2290899126145602, "grad_norm": 1.0740155996153367, "learning_rate": 4.982941836875678e-06, "loss": 0.4532, "step": 3762 }, { "epoch": 0.22915080839143806, "grad_norm": 1.1986277408454418, "learning_rate": 4.9829325299102546e-06, "loss": 0.4685, "step": 3763 }, { "epoch": 0.22921170416831593, "grad_norm": 1.0989822249752235, "learning_rate": 4.9829232204152724e-06, "loss": 0.5125, "step": 3764 }, { "epoch": 0.2292725999451938, "grad_norm": 1.0621132622167233, "learning_rate": 4.982913908390738e-06, "loss": 0.4913, "step": 3765 }, { "epoch": 0.22933349572207168, "grad_norm": 0.9579994766495308, "learning_rate": 4.982904593836664e-06, "loss": 0.5299, "step": 3766 }, { "epoch": 0.22939439149894955, "grad_norm": 0.9674949931008232, "learning_rate": 4.982895276753058e-06, "loss": 0.5313, "step": 3767 }, { "epoch": 0.22945528727582742, "grad_norm": 0.9651965632897885, "learning_rate": 4.982885957139929e-06, "loss": 0.5284, "step": 3768 }, { "epoch": 0.2295161830527053, "grad_norm": 0.9448646809301025, "learning_rate": 4.982876634997289e-06, "loss": 0.525, "step": 3769 }, { "epoch": 0.22957707882958317, "grad_norm": 0.9747603509445841, "learning_rate": 4.982867310325145e-06, "loss": 0.4866, "step": 3770 }, { "epoch": 0.22963797460646104, "grad_norm": 1.0547304817317602, "learning_rate": 4.982857983123507e-06, "loss": 0.4712, "step": 3771 }, { "epoch": 0.22969887038333892, "grad_norm": 1.153717996715709, "learning_rate": 4.982848653392385e-06, "loss": 0.5307, "step": 3772 }, { "epoch": 0.2297597661602168, "grad_norm": 1.0154104695975763, "learning_rate": 4.982839321131789e-06, "loss": 0.4935, "step": 3773 }, { "epoch": 0.22982066193709466, "grad_norm": 1.0637298873311283, "learning_rate": 4.982829986341727e-06, "loss": 0.4535, "step": 3774 }, { "epoch": 0.22988155771397253, "grad_norm": 1.1134841933219644, "learning_rate": 4.982820649022211e-06, "loss": 0.4788, "step": 3775 }, { "epoch": 0.2299424534908504, "grad_norm": 1.0386476660849633, "learning_rate": 4.982811309173248e-06, "loss": 0.473, "step": 3776 }, { "epoch": 0.23000334926772828, "grad_norm": 1.1598616542466147, "learning_rate": 4.982801966794848e-06, "loss": 0.5752, "step": 3777 }, { "epoch": 0.23006424504460615, "grad_norm": 0.9742574963680173, "learning_rate": 4.9827926218870216e-06, "loss": 0.5078, "step": 3778 }, { "epoch": 0.23012514082148403, "grad_norm": 1.2297493764213325, "learning_rate": 4.982783274449777e-06, "loss": 0.5363, "step": 3779 }, { "epoch": 0.2301860365983619, "grad_norm": 1.1011349807886763, "learning_rate": 4.982773924483125e-06, "loss": 0.54, "step": 3780 }, { "epoch": 0.23024693237523977, "grad_norm": 0.9641627572847817, "learning_rate": 4.982764571987076e-06, "loss": 0.4881, "step": 3781 }, { "epoch": 0.23030782815211764, "grad_norm": 1.0468765928448454, "learning_rate": 4.982755216961636e-06, "loss": 0.5417, "step": 3782 }, { "epoch": 0.23036872392899552, "grad_norm": 1.075605059446278, "learning_rate": 4.9827458594068166e-06, "loss": 0.4417, "step": 3783 }, { "epoch": 0.2304296197058734, "grad_norm": 1.0417261647629539, "learning_rate": 4.982736499322628e-06, "loss": 0.5416, "step": 3784 }, { "epoch": 0.23049051548275126, "grad_norm": 0.9902458633143093, "learning_rate": 4.982727136709079e-06, "loss": 0.5156, "step": 3785 }, { "epoch": 0.23055141125962914, "grad_norm": 1.1281322812824437, "learning_rate": 4.982717771566179e-06, "loss": 0.4668, "step": 3786 }, { "epoch": 0.230612307036507, "grad_norm": 1.142327503666585, "learning_rate": 4.982708403893938e-06, "loss": 0.4397, "step": 3787 }, { "epoch": 0.23067320281338488, "grad_norm": 1.062585017330708, "learning_rate": 4.9826990336923655e-06, "loss": 0.4774, "step": 3788 }, { "epoch": 0.23073409859026275, "grad_norm": 1.023809843879714, "learning_rate": 4.98268966096147e-06, "loss": 0.5477, "step": 3789 }, { "epoch": 0.23079499436714063, "grad_norm": 1.0612345190241597, "learning_rate": 4.982680285701264e-06, "loss": 0.4798, "step": 3790 }, { "epoch": 0.2308558901440185, "grad_norm": 1.1553056950701304, "learning_rate": 4.982670907911752e-06, "loss": 0.513, "step": 3791 }, { "epoch": 0.23091678592089637, "grad_norm": 0.9796706360210359, "learning_rate": 4.982661527592948e-06, "loss": 0.4996, "step": 3792 }, { "epoch": 0.23097768169777427, "grad_norm": 1.0466354903084, "learning_rate": 4.98265214474486e-06, "loss": 0.4377, "step": 3793 }, { "epoch": 0.23103857747465215, "grad_norm": 0.9942405146274848, "learning_rate": 4.982642759367498e-06, "loss": 0.571, "step": 3794 }, { "epoch": 0.23109947325153002, "grad_norm": 1.0229228387141278, "learning_rate": 4.982633371460871e-06, "loss": 0.5136, "step": 3795 }, { "epoch": 0.2311603690284079, "grad_norm": 1.1800761973790914, "learning_rate": 4.982623981024988e-06, "loss": 0.4595, "step": 3796 }, { "epoch": 0.23122126480528576, "grad_norm": 1.0204336624096733, "learning_rate": 4.98261458805986e-06, "loss": 0.5252, "step": 3797 }, { "epoch": 0.23128216058216364, "grad_norm": 1.0547336894868837, "learning_rate": 4.982605192565496e-06, "loss": 0.4981, "step": 3798 }, { "epoch": 0.2313430563590415, "grad_norm": 1.1199248328784053, "learning_rate": 4.982595794541905e-06, "loss": 0.4495, "step": 3799 }, { "epoch": 0.23140395213591938, "grad_norm": 1.0505605392382793, "learning_rate": 4.982586393989097e-06, "loss": 0.5581, "step": 3800 }, { "epoch": 0.23146484791279726, "grad_norm": 1.0284623778455193, "learning_rate": 4.982576990907082e-06, "loss": 0.4494, "step": 3801 }, { "epoch": 0.23152574368967513, "grad_norm": 1.0621531115318543, "learning_rate": 4.982567585295869e-06, "loss": 0.4533, "step": 3802 }, { "epoch": 0.231586639466553, "grad_norm": 0.9640816358508384, "learning_rate": 4.9825581771554676e-06, "loss": 0.5023, "step": 3803 }, { "epoch": 0.23164753524343087, "grad_norm": 1.0460499917416157, "learning_rate": 4.982548766485887e-06, "loss": 0.4662, "step": 3804 }, { "epoch": 0.23170843102030875, "grad_norm": 1.0955118280072782, "learning_rate": 4.982539353287138e-06, "loss": 0.5127, "step": 3805 }, { "epoch": 0.23176932679718662, "grad_norm": 1.009534681983333, "learning_rate": 4.98252993755923e-06, "loss": 0.4661, "step": 3806 }, { "epoch": 0.2318302225740645, "grad_norm": 1.0375906465286762, "learning_rate": 4.982520519302171e-06, "loss": 0.5758, "step": 3807 }, { "epoch": 0.23189111835094237, "grad_norm": 1.0648615877641143, "learning_rate": 4.982511098515972e-06, "loss": 0.4788, "step": 3808 }, { "epoch": 0.23195201412782024, "grad_norm": 1.3337664058384422, "learning_rate": 4.982501675200643e-06, "loss": 0.4849, "step": 3809 }, { "epoch": 0.2320129099046981, "grad_norm": 1.0584047017085294, "learning_rate": 4.982492249356193e-06, "loss": 0.4879, "step": 3810 }, { "epoch": 0.23207380568157598, "grad_norm": 0.98869554572663, "learning_rate": 4.98248282098263e-06, "loss": 0.5296, "step": 3811 }, { "epoch": 0.23213470145845386, "grad_norm": 0.9499119626236463, "learning_rate": 4.982473390079967e-06, "loss": 0.4766, "step": 3812 }, { "epoch": 0.23219559723533173, "grad_norm": 1.0425475486754014, "learning_rate": 4.98246395664821e-06, "loss": 0.5391, "step": 3813 }, { "epoch": 0.2322564930122096, "grad_norm": 0.944786267455095, "learning_rate": 4.982454520687372e-06, "loss": 0.4945, "step": 3814 }, { "epoch": 0.23231738878908748, "grad_norm": 1.0700137252726838, "learning_rate": 4.98244508219746e-06, "loss": 0.438, "step": 3815 }, { "epoch": 0.23237828456596535, "grad_norm": 1.2079890915181453, "learning_rate": 4.982435641178485e-06, "loss": 0.5075, "step": 3816 }, { "epoch": 0.23243918034284322, "grad_norm": 1.117078882967973, "learning_rate": 4.982426197630456e-06, "loss": 0.4564, "step": 3817 }, { "epoch": 0.2325000761197211, "grad_norm": 0.9990147064543815, "learning_rate": 4.982416751553382e-06, "loss": 0.4502, "step": 3818 }, { "epoch": 0.23256097189659897, "grad_norm": 0.9820821333565848, "learning_rate": 4.982407302947274e-06, "loss": 0.5457, "step": 3819 }, { "epoch": 0.23262186767347684, "grad_norm": 1.0459675197701963, "learning_rate": 4.982397851812141e-06, "loss": 0.4591, "step": 3820 }, { "epoch": 0.2326827634503547, "grad_norm": 1.0171638870211217, "learning_rate": 4.982388398147993e-06, "loss": 0.4959, "step": 3821 }, { "epoch": 0.23274365922723259, "grad_norm": 1.0164604820815775, "learning_rate": 4.98237894195484e-06, "loss": 0.5417, "step": 3822 }, { "epoch": 0.23280455500411046, "grad_norm": 1.0523767418549692, "learning_rate": 4.9823694832326896e-06, "loss": 0.5251, "step": 3823 }, { "epoch": 0.23286545078098833, "grad_norm": 1.0092754418317391, "learning_rate": 4.9823600219815524e-06, "loss": 0.5107, "step": 3824 }, { "epoch": 0.2329263465578662, "grad_norm": 1.0846120039292853, "learning_rate": 4.98235055820144e-06, "loss": 0.4939, "step": 3825 }, { "epoch": 0.23298724233474408, "grad_norm": 0.9365409400473113, "learning_rate": 4.98234109189236e-06, "loss": 0.5241, "step": 3826 }, { "epoch": 0.23304813811162195, "grad_norm": 1.2008215572195, "learning_rate": 4.982331623054322e-06, "loss": 0.4544, "step": 3827 }, { "epoch": 0.23310903388849982, "grad_norm": 1.0836030611654375, "learning_rate": 4.982322151687337e-06, "loss": 0.4512, "step": 3828 }, { "epoch": 0.2331699296653777, "grad_norm": 0.996632494570029, "learning_rate": 4.982312677791412e-06, "loss": 0.5216, "step": 3829 }, { "epoch": 0.23323082544225557, "grad_norm": 1.0870396209756983, "learning_rate": 4.98230320136656e-06, "loss": 0.5191, "step": 3830 }, { "epoch": 0.23329172121913344, "grad_norm": 1.0885430782899022, "learning_rate": 4.982293722412788e-06, "loss": 0.4931, "step": 3831 }, { "epoch": 0.23335261699601131, "grad_norm": 1.0411532500417353, "learning_rate": 4.982284240930108e-06, "loss": 0.4847, "step": 3832 }, { "epoch": 0.2334135127728892, "grad_norm": 0.9495249639793224, "learning_rate": 4.9822747569185285e-06, "loss": 0.5101, "step": 3833 }, { "epoch": 0.2334744085497671, "grad_norm": 0.9733246788767889, "learning_rate": 4.982265270378058e-06, "loss": 0.5195, "step": 3834 }, { "epoch": 0.23353530432664496, "grad_norm": 1.0229040522921637, "learning_rate": 4.982255781308708e-06, "loss": 0.5119, "step": 3835 }, { "epoch": 0.23359620010352283, "grad_norm": 0.9958938347043992, "learning_rate": 4.982246289710487e-06, "loss": 0.4776, "step": 3836 }, { "epoch": 0.2336570958804007, "grad_norm": 1.0403162323912059, "learning_rate": 4.982236795583406e-06, "loss": 0.472, "step": 3837 }, { "epoch": 0.23371799165727858, "grad_norm": 1.0519931448520232, "learning_rate": 4.982227298927472e-06, "loss": 0.4447, "step": 3838 }, { "epoch": 0.23377888743415645, "grad_norm": 1.0378064639186813, "learning_rate": 4.982217799742698e-06, "loss": 0.5366, "step": 3839 }, { "epoch": 0.23383978321103432, "grad_norm": 1.0818034000699541, "learning_rate": 4.982208298029091e-06, "loss": 0.4888, "step": 3840 }, { "epoch": 0.2339006789879122, "grad_norm": 1.175437327163238, "learning_rate": 4.982198793786663e-06, "loss": 0.5101, "step": 3841 }, { "epoch": 0.23396157476479007, "grad_norm": 1.0566467311075343, "learning_rate": 4.982189287015422e-06, "loss": 0.4733, "step": 3842 }, { "epoch": 0.23402247054166794, "grad_norm": 1.0707565480995254, "learning_rate": 4.982179777715378e-06, "loss": 0.5785, "step": 3843 }, { "epoch": 0.23408336631854582, "grad_norm": 1.0700436711890222, "learning_rate": 4.982170265886541e-06, "loss": 0.4977, "step": 3844 }, { "epoch": 0.2341442620954237, "grad_norm": 1.098822585849228, "learning_rate": 4.98216075152892e-06, "loss": 0.4551, "step": 3845 }, { "epoch": 0.23420515787230156, "grad_norm": 1.0519589768889273, "learning_rate": 4.9821512346425256e-06, "loss": 0.4543, "step": 3846 }, { "epoch": 0.23426605364917943, "grad_norm": 1.0738597681100417, "learning_rate": 4.982141715227367e-06, "loss": 0.5853, "step": 3847 }, { "epoch": 0.2343269494260573, "grad_norm": 1.0392373729285156, "learning_rate": 4.982132193283454e-06, "loss": 0.4779, "step": 3848 }, { "epoch": 0.23438784520293518, "grad_norm": 1.0108312273955453, "learning_rate": 4.9821226688107975e-06, "loss": 0.5576, "step": 3849 }, { "epoch": 0.23444874097981305, "grad_norm": 1.0639551485639465, "learning_rate": 4.982113141809405e-06, "loss": 0.5381, "step": 3850 }, { "epoch": 0.23450963675669093, "grad_norm": 1.0655376642922674, "learning_rate": 4.982103612279286e-06, "loss": 0.4868, "step": 3851 }, { "epoch": 0.2345705325335688, "grad_norm": 1.0307836145689413, "learning_rate": 4.982094080220453e-06, "loss": 0.5237, "step": 3852 }, { "epoch": 0.23463142831044667, "grad_norm": 1.0679601921184858, "learning_rate": 4.982084545632914e-06, "loss": 0.5092, "step": 3853 }, { "epoch": 0.23469232408732454, "grad_norm": 1.0788151164893014, "learning_rate": 4.982075008516679e-06, "loss": 0.4694, "step": 3854 }, { "epoch": 0.23475321986420242, "grad_norm": 1.0281501245210427, "learning_rate": 4.982065468871758e-06, "loss": 0.5036, "step": 3855 }, { "epoch": 0.2348141156410803, "grad_norm": 0.9885288356406656, "learning_rate": 4.982055926698159e-06, "loss": 0.466, "step": 3856 }, { "epoch": 0.23487501141795816, "grad_norm": 1.2061176699736167, "learning_rate": 4.982046381995893e-06, "loss": 0.4748, "step": 3857 }, { "epoch": 0.23493590719483604, "grad_norm": 1.0190371253426487, "learning_rate": 4.98203683476497e-06, "loss": 0.5097, "step": 3858 }, { "epoch": 0.2349968029717139, "grad_norm": 1.0830686555594666, "learning_rate": 4.982027285005401e-06, "loss": 0.48, "step": 3859 }, { "epoch": 0.23505769874859178, "grad_norm": 1.0502416164318502, "learning_rate": 4.982017732717192e-06, "loss": 0.4821, "step": 3860 }, { "epoch": 0.23511859452546965, "grad_norm": 1.076564570294196, "learning_rate": 4.982008177900357e-06, "loss": 0.4881, "step": 3861 }, { "epoch": 0.23517949030234753, "grad_norm": 0.9689128247214893, "learning_rate": 4.981998620554903e-06, "loss": 0.5256, "step": 3862 }, { "epoch": 0.2352403860792254, "grad_norm": 1.065895180227061, "learning_rate": 4.98198906068084e-06, "loss": 0.4895, "step": 3863 }, { "epoch": 0.23530128185610327, "grad_norm": 1.1180781524926755, "learning_rate": 4.981979498278177e-06, "loss": 0.5389, "step": 3864 }, { "epoch": 0.23536217763298115, "grad_norm": 1.0021346328938914, "learning_rate": 4.981969933346928e-06, "loss": 0.511, "step": 3865 }, { "epoch": 0.23542307340985902, "grad_norm": 1.0032140385091155, "learning_rate": 4.981960365887097e-06, "loss": 0.52, "step": 3866 }, { "epoch": 0.2354839691867369, "grad_norm": 1.0011375464230556, "learning_rate": 4.981950795898697e-06, "loss": 0.4715, "step": 3867 }, { "epoch": 0.23554486496361476, "grad_norm": 1.0455351226805487, "learning_rate": 4.981941223381738e-06, "loss": 0.5135, "step": 3868 }, { "epoch": 0.23560576074049264, "grad_norm": 1.062242012562604, "learning_rate": 4.981931648336227e-06, "loss": 0.5245, "step": 3869 }, { "epoch": 0.2356666565173705, "grad_norm": 1.0330487181979797, "learning_rate": 4.9819220707621775e-06, "loss": 0.4784, "step": 3870 }, { "epoch": 0.23572755229424838, "grad_norm": 1.126825096415133, "learning_rate": 4.981912490659596e-06, "loss": 0.4936, "step": 3871 }, { "epoch": 0.23578844807112626, "grad_norm": 1.0668034062262801, "learning_rate": 4.981902908028495e-06, "loss": 0.5194, "step": 3872 }, { "epoch": 0.23584934384800413, "grad_norm": 1.0262789877590757, "learning_rate": 4.981893322868882e-06, "loss": 0.4753, "step": 3873 }, { "epoch": 0.235910239624882, "grad_norm": 1.0451493364225681, "learning_rate": 4.981883735180768e-06, "loss": 0.4709, "step": 3874 }, { "epoch": 0.2359711354017599, "grad_norm": 1.0146841095027543, "learning_rate": 4.981874144964163e-06, "loss": 0.4888, "step": 3875 }, { "epoch": 0.23603203117863777, "grad_norm": 0.9819293684451167, "learning_rate": 4.981864552219075e-06, "loss": 0.4835, "step": 3876 }, { "epoch": 0.23609292695551565, "grad_norm": 1.0807813321779165, "learning_rate": 4.981854956945515e-06, "loss": 0.5086, "step": 3877 }, { "epoch": 0.23615382273239352, "grad_norm": 0.9715465191850327, "learning_rate": 4.981845359143494e-06, "loss": 0.4951, "step": 3878 }, { "epoch": 0.2362147185092714, "grad_norm": 1.0350111665870085, "learning_rate": 4.98183575881302e-06, "loss": 0.4245, "step": 3879 }, { "epoch": 0.23627561428614927, "grad_norm": 1.01600806522499, "learning_rate": 4.981826155954103e-06, "loss": 0.519, "step": 3880 }, { "epoch": 0.23633651006302714, "grad_norm": 0.9737789153392198, "learning_rate": 4.9818165505667536e-06, "loss": 0.5098, "step": 3881 }, { "epoch": 0.236397405839905, "grad_norm": 1.0651804131560318, "learning_rate": 4.981806942650981e-06, "loss": 0.5035, "step": 3882 }, { "epoch": 0.23645830161678288, "grad_norm": 1.1498474751041823, "learning_rate": 4.981797332206795e-06, "loss": 0.458, "step": 3883 }, { "epoch": 0.23651919739366076, "grad_norm": 1.0544522407420063, "learning_rate": 4.981787719234205e-06, "loss": 0.6176, "step": 3884 }, { "epoch": 0.23658009317053863, "grad_norm": 1.1425704680457498, "learning_rate": 4.9817781037332215e-06, "loss": 0.5464, "step": 3885 }, { "epoch": 0.2366409889474165, "grad_norm": 1.0807275330861723, "learning_rate": 4.981768485703855e-06, "loss": 0.5054, "step": 3886 }, { "epoch": 0.23670188472429438, "grad_norm": 0.9432275179281085, "learning_rate": 4.981758865146114e-06, "loss": 0.5435, "step": 3887 }, { "epoch": 0.23676278050117225, "grad_norm": 1.0248797506331115, "learning_rate": 4.981749242060008e-06, "loss": 0.5162, "step": 3888 }, { "epoch": 0.23682367627805012, "grad_norm": 1.1713447427139956, "learning_rate": 4.981739616445548e-06, "loss": 0.4598, "step": 3889 }, { "epoch": 0.236884572054928, "grad_norm": 0.9602508483493223, "learning_rate": 4.981729988302742e-06, "loss": 0.5085, "step": 3890 }, { "epoch": 0.23694546783180587, "grad_norm": 1.0081868297096883, "learning_rate": 4.981720357631603e-06, "loss": 0.5222, "step": 3891 }, { "epoch": 0.23700636360868374, "grad_norm": 1.1334398717836685, "learning_rate": 4.981710724432137e-06, "loss": 0.4358, "step": 3892 }, { "epoch": 0.2370672593855616, "grad_norm": 1.0789928405409182, "learning_rate": 4.981701088704357e-06, "loss": 0.4915, "step": 3893 }, { "epoch": 0.23712815516243949, "grad_norm": 0.9333092956936851, "learning_rate": 4.981691450448271e-06, "loss": 0.5311, "step": 3894 }, { "epoch": 0.23718905093931736, "grad_norm": 0.970634485315829, "learning_rate": 4.98168180966389e-06, "loss": 0.5341, "step": 3895 }, { "epoch": 0.23724994671619523, "grad_norm": 1.098918880833628, "learning_rate": 4.981672166351222e-06, "loss": 0.479, "step": 3896 }, { "epoch": 0.2373108424930731, "grad_norm": 1.1228804885932946, "learning_rate": 4.981662520510279e-06, "loss": 0.4406, "step": 3897 }, { "epoch": 0.23737173826995098, "grad_norm": 1.0694775369208267, "learning_rate": 4.98165287214107e-06, "loss": 0.4419, "step": 3898 }, { "epoch": 0.23743263404682885, "grad_norm": 1.1900453664695665, "learning_rate": 4.981643221243605e-06, "loss": 0.5115, "step": 3899 }, { "epoch": 0.23749352982370672, "grad_norm": 1.0362608355146108, "learning_rate": 4.981633567817892e-06, "loss": 0.4489, "step": 3900 }, { "epoch": 0.2375544256005846, "grad_norm": 0.9472354606155396, "learning_rate": 4.981623911863943e-06, "loss": 0.5418, "step": 3901 }, { "epoch": 0.23761532137746247, "grad_norm": 0.9938938717684741, "learning_rate": 4.981614253381768e-06, "loss": 0.4852, "step": 3902 }, { "epoch": 0.23767621715434034, "grad_norm": 0.9912178938837476, "learning_rate": 4.981604592371374e-06, "loss": 0.5059, "step": 3903 }, { "epoch": 0.23773711293121821, "grad_norm": 1.044531536435821, "learning_rate": 4.981594928832775e-06, "loss": 0.4876, "step": 3904 }, { "epoch": 0.2377980087080961, "grad_norm": 1.1475467112652176, "learning_rate": 4.981585262765977e-06, "loss": 0.4651, "step": 3905 }, { "epoch": 0.23785890448497396, "grad_norm": 0.9858567571544226, "learning_rate": 4.981575594170992e-06, "loss": 0.5315, "step": 3906 }, { "epoch": 0.23791980026185183, "grad_norm": 1.0374816118155141, "learning_rate": 4.9815659230478294e-06, "loss": 0.5672, "step": 3907 }, { "epoch": 0.2379806960387297, "grad_norm": 1.1392921341157074, "learning_rate": 4.981556249396499e-06, "loss": 0.4911, "step": 3908 }, { "epoch": 0.23804159181560758, "grad_norm": 1.0694331414665301, "learning_rate": 4.981546573217012e-06, "loss": 0.4916, "step": 3909 }, { "epoch": 0.23810248759248545, "grad_norm": 1.0792893709593037, "learning_rate": 4.981536894509376e-06, "loss": 0.4003, "step": 3910 }, { "epoch": 0.23816338336936332, "grad_norm": 1.092934003316643, "learning_rate": 4.981527213273602e-06, "loss": 0.4643, "step": 3911 }, { "epoch": 0.2382242791462412, "grad_norm": 0.9979972158199573, "learning_rate": 4.981517529509699e-06, "loss": 0.4867, "step": 3912 }, { "epoch": 0.23828517492311907, "grad_norm": 0.9966536004811701, "learning_rate": 4.9815078432176776e-06, "loss": 0.4804, "step": 3913 }, { "epoch": 0.23834607069999694, "grad_norm": 0.9825489700215086, "learning_rate": 4.981498154397548e-06, "loss": 0.4778, "step": 3914 }, { "epoch": 0.23840696647687482, "grad_norm": 0.9642892241895192, "learning_rate": 4.981488463049319e-06, "loss": 0.4672, "step": 3915 }, { "epoch": 0.23846786225375272, "grad_norm": 1.0831058952908323, "learning_rate": 4.9814787691730014e-06, "loss": 0.4712, "step": 3916 }, { "epoch": 0.2385287580306306, "grad_norm": 1.0608022055758801, "learning_rate": 4.9814690727686054e-06, "loss": 0.4738, "step": 3917 }, { "epoch": 0.23858965380750846, "grad_norm": 1.1138543891616683, "learning_rate": 4.98145937383614e-06, "loss": 0.4711, "step": 3918 }, { "epoch": 0.23865054958438633, "grad_norm": 1.1228455447025976, "learning_rate": 4.981449672375616e-06, "loss": 0.4437, "step": 3919 }, { "epoch": 0.2387114453612642, "grad_norm": 1.1634016713532676, "learning_rate": 4.981439968387042e-06, "loss": 0.4423, "step": 3920 }, { "epoch": 0.23877234113814208, "grad_norm": 0.9618365475583119, "learning_rate": 4.981430261870428e-06, "loss": 0.5306, "step": 3921 }, { "epoch": 0.23883323691501995, "grad_norm": 1.0888769927045006, "learning_rate": 4.981420552825785e-06, "loss": 0.3921, "step": 3922 }, { "epoch": 0.23889413269189783, "grad_norm": 1.0191494698485075, "learning_rate": 4.981410841253122e-06, "loss": 0.4678, "step": 3923 }, { "epoch": 0.2389550284687757, "grad_norm": 1.0576193964025224, "learning_rate": 4.98140112715245e-06, "loss": 0.5155, "step": 3924 }, { "epoch": 0.23901592424565357, "grad_norm": 0.9975335704920769, "learning_rate": 4.981391410523778e-06, "loss": 0.5018, "step": 3925 }, { "epoch": 0.23907682002253144, "grad_norm": 0.9666107341337267, "learning_rate": 4.981381691367115e-06, "loss": 0.5332, "step": 3926 }, { "epoch": 0.23913771579940932, "grad_norm": 1.075008870286327, "learning_rate": 4.981371969682473e-06, "loss": 0.5204, "step": 3927 }, { "epoch": 0.2391986115762872, "grad_norm": 1.0604127460205013, "learning_rate": 4.981362245469861e-06, "loss": 0.4466, "step": 3928 }, { "epoch": 0.23925950735316506, "grad_norm": 1.1110483906807513, "learning_rate": 4.981352518729288e-06, "loss": 0.4301, "step": 3929 }, { "epoch": 0.23932040313004294, "grad_norm": 1.0405925594177867, "learning_rate": 4.981342789460765e-06, "loss": 0.4538, "step": 3930 }, { "epoch": 0.2393812989069208, "grad_norm": 1.095902736973017, "learning_rate": 4.981333057664301e-06, "loss": 0.4771, "step": 3931 }, { "epoch": 0.23944219468379868, "grad_norm": 1.0871895585949507, "learning_rate": 4.981323323339908e-06, "loss": 0.4666, "step": 3932 }, { "epoch": 0.23950309046067655, "grad_norm": 1.004904803544844, "learning_rate": 4.981313586487592e-06, "loss": 0.555, "step": 3933 }, { "epoch": 0.23956398623755443, "grad_norm": 0.9785748165773344, "learning_rate": 4.9813038471073675e-06, "loss": 0.4934, "step": 3934 }, { "epoch": 0.2396248820144323, "grad_norm": 1.1514220508932829, "learning_rate": 4.98129410519924e-06, "loss": 0.508, "step": 3935 }, { "epoch": 0.23968577779131017, "grad_norm": 1.126632232726212, "learning_rate": 4.981284360763223e-06, "loss": 0.4725, "step": 3936 }, { "epoch": 0.23974667356818805, "grad_norm": 1.1104459412336685, "learning_rate": 4.981274613799326e-06, "loss": 0.4908, "step": 3937 }, { "epoch": 0.23980756934506592, "grad_norm": 1.0015585494423838, "learning_rate": 4.9812648643075565e-06, "loss": 0.4781, "step": 3938 }, { "epoch": 0.2398684651219438, "grad_norm": 1.0613048664861662, "learning_rate": 4.981255112287927e-06, "loss": 0.4719, "step": 3939 }, { "epoch": 0.23992936089882166, "grad_norm": 1.0569753426344481, "learning_rate": 4.981245357740445e-06, "loss": 0.4735, "step": 3940 }, { "epoch": 0.23999025667569954, "grad_norm": 1.02952409737359, "learning_rate": 4.981235600665123e-06, "loss": 0.4783, "step": 3941 }, { "epoch": 0.2400511524525774, "grad_norm": 1.1377462622443633, "learning_rate": 4.98122584106197e-06, "loss": 0.4262, "step": 3942 }, { "epoch": 0.24011204822945528, "grad_norm": 1.044449172766818, "learning_rate": 4.981216078930995e-06, "loss": 0.4426, "step": 3943 }, { "epoch": 0.24017294400633316, "grad_norm": 1.0730142311762407, "learning_rate": 4.981206314272209e-06, "loss": 0.4636, "step": 3944 }, { "epoch": 0.24023383978321103, "grad_norm": 0.9998714418062362, "learning_rate": 4.981196547085621e-06, "loss": 0.4564, "step": 3945 }, { "epoch": 0.2402947355600889, "grad_norm": 1.0598691526020294, "learning_rate": 4.981186777371242e-06, "loss": 0.5353, "step": 3946 }, { "epoch": 0.24035563133696677, "grad_norm": 0.9769154563733607, "learning_rate": 4.981177005129081e-06, "loss": 0.4823, "step": 3947 }, { "epoch": 0.24041652711384465, "grad_norm": 0.961327574289567, "learning_rate": 4.981167230359149e-06, "loss": 0.4714, "step": 3948 }, { "epoch": 0.24047742289072252, "grad_norm": 0.9978192388210497, "learning_rate": 4.981157453061455e-06, "loss": 0.4732, "step": 3949 }, { "epoch": 0.2405383186676004, "grad_norm": 1.0713354454535151, "learning_rate": 4.98114767323601e-06, "loss": 0.4579, "step": 3950 }, { "epoch": 0.24059921444447827, "grad_norm": 1.0095220045052913, "learning_rate": 4.981137890882823e-06, "loss": 0.5067, "step": 3951 }, { "epoch": 0.24066011022135614, "grad_norm": 1.075339491107288, "learning_rate": 4.981128106001905e-06, "loss": 0.4282, "step": 3952 }, { "epoch": 0.240721005998234, "grad_norm": 1.095219922186481, "learning_rate": 4.981118318593264e-06, "loss": 0.4924, "step": 3953 }, { "epoch": 0.24078190177511188, "grad_norm": 1.1045119001333732, "learning_rate": 4.981108528656911e-06, "loss": 0.4679, "step": 3954 }, { "epoch": 0.24084279755198976, "grad_norm": 0.9635518077210926, "learning_rate": 4.981098736192858e-06, "loss": 0.4797, "step": 3955 }, { "epoch": 0.24090369332886763, "grad_norm": 1.0210566894029653, "learning_rate": 4.981088941201112e-06, "loss": 0.5049, "step": 3956 }, { "epoch": 0.24096458910574553, "grad_norm": 0.9559871961420698, "learning_rate": 4.981079143681684e-06, "loss": 0.4951, "step": 3957 }, { "epoch": 0.2410254848826234, "grad_norm": 1.0795045209190195, "learning_rate": 4.981069343634585e-06, "loss": 0.5025, "step": 3958 }, { "epoch": 0.24108638065950128, "grad_norm": 1.1300204395004465, "learning_rate": 4.9810595410598235e-06, "loss": 0.4895, "step": 3959 }, { "epoch": 0.24114727643637915, "grad_norm": 0.9838211994650045, "learning_rate": 4.98104973595741e-06, "loss": 0.5363, "step": 3960 }, { "epoch": 0.24120817221325702, "grad_norm": 1.0785077332375315, "learning_rate": 4.981039928327355e-06, "loss": 0.5479, "step": 3961 }, { "epoch": 0.2412690679901349, "grad_norm": 1.063926859725005, "learning_rate": 4.981030118169668e-06, "loss": 0.4728, "step": 3962 }, { "epoch": 0.24132996376701277, "grad_norm": 0.9862348353858558, "learning_rate": 4.9810203054843585e-06, "loss": 0.4924, "step": 3963 }, { "epoch": 0.24139085954389064, "grad_norm": 1.0163397634386007, "learning_rate": 4.9810104902714385e-06, "loss": 0.5491, "step": 3964 }, { "epoch": 0.2414517553207685, "grad_norm": 1.0165579977874617, "learning_rate": 4.981000672530915e-06, "loss": 0.4803, "step": 3965 }, { "epoch": 0.24151265109764639, "grad_norm": 0.9895921826492333, "learning_rate": 4.980990852262801e-06, "loss": 0.475, "step": 3966 }, { "epoch": 0.24157354687452426, "grad_norm": 1.035936162379611, "learning_rate": 4.980981029467105e-06, "loss": 0.4773, "step": 3967 }, { "epoch": 0.24163444265140213, "grad_norm": 1.013006033202925, "learning_rate": 4.980971204143836e-06, "loss": 0.4569, "step": 3968 }, { "epoch": 0.24169533842828, "grad_norm": 1.0551194986178474, "learning_rate": 4.9809613762930055e-06, "loss": 0.485, "step": 3969 }, { "epoch": 0.24175623420515788, "grad_norm": 1.1507083223337693, "learning_rate": 4.980951545914624e-06, "loss": 0.5063, "step": 3970 }, { "epoch": 0.24181712998203575, "grad_norm": 1.0461358547580937, "learning_rate": 4.9809417130087e-06, "loss": 0.4579, "step": 3971 }, { "epoch": 0.24187802575891362, "grad_norm": 0.998368584289763, "learning_rate": 4.980931877575243e-06, "loss": 0.5198, "step": 3972 }, { "epoch": 0.2419389215357915, "grad_norm": 1.0838067302088368, "learning_rate": 4.980922039614266e-06, "loss": 0.4573, "step": 3973 }, { "epoch": 0.24199981731266937, "grad_norm": 1.0069672412338886, "learning_rate": 4.980912199125777e-06, "loss": 0.4866, "step": 3974 }, { "epoch": 0.24206071308954724, "grad_norm": 1.0148604498702731, "learning_rate": 4.980902356109785e-06, "loss": 0.4708, "step": 3975 }, { "epoch": 0.24212160886642511, "grad_norm": 1.0763108040390619, "learning_rate": 4.980892510566302e-06, "loss": 0.4883, "step": 3976 }, { "epoch": 0.242182504643303, "grad_norm": 1.0913310873392597, "learning_rate": 4.980882662495337e-06, "loss": 0.476, "step": 3977 }, { "epoch": 0.24224340042018086, "grad_norm": 0.996802464865154, "learning_rate": 4.980872811896901e-06, "loss": 0.4669, "step": 3978 }, { "epoch": 0.24230429619705873, "grad_norm": 1.034666349440827, "learning_rate": 4.980862958771003e-06, "loss": 0.5209, "step": 3979 }, { "epoch": 0.2423651919739366, "grad_norm": 0.9861692302363315, "learning_rate": 4.9808531031176536e-06, "loss": 0.4981, "step": 3980 }, { "epoch": 0.24242608775081448, "grad_norm": 1.087336605196495, "learning_rate": 4.980843244936863e-06, "loss": 0.5262, "step": 3981 }, { "epoch": 0.24248698352769235, "grad_norm": 1.0232919813720867, "learning_rate": 4.980833384228639e-06, "loss": 0.5339, "step": 3982 }, { "epoch": 0.24254787930457022, "grad_norm": 1.07595523289145, "learning_rate": 4.980823520992996e-06, "loss": 0.5525, "step": 3983 }, { "epoch": 0.2426087750814481, "grad_norm": 0.9941656645381992, "learning_rate": 4.9808136552299405e-06, "loss": 0.4539, "step": 3984 }, { "epoch": 0.24266967085832597, "grad_norm": 1.1062746740187028, "learning_rate": 4.980803786939483e-06, "loss": 0.5902, "step": 3985 }, { "epoch": 0.24273056663520384, "grad_norm": 0.9898559340482247, "learning_rate": 4.980793916121634e-06, "loss": 0.4973, "step": 3986 }, { "epoch": 0.24279146241208172, "grad_norm": 0.9724419177871614, "learning_rate": 4.980784042776405e-06, "loss": 0.5342, "step": 3987 }, { "epoch": 0.2428523581889596, "grad_norm": 1.0092637836903222, "learning_rate": 4.980774166903805e-06, "loss": 0.5411, "step": 3988 }, { "epoch": 0.24291325396583746, "grad_norm": 0.9810377705616926, "learning_rate": 4.980764288503842e-06, "loss": 0.5145, "step": 3989 }, { "epoch": 0.24297414974271533, "grad_norm": 1.173416463618114, "learning_rate": 4.980754407576529e-06, "loss": 0.4499, "step": 3990 }, { "epoch": 0.2430350455195932, "grad_norm": 1.057009586378205, "learning_rate": 4.980744524121875e-06, "loss": 0.4887, "step": 3991 }, { "epoch": 0.24309594129647108, "grad_norm": 1.0483724546123938, "learning_rate": 4.98073463813989e-06, "loss": 0.4671, "step": 3992 }, { "epoch": 0.24315683707334895, "grad_norm": 1.046457993681976, "learning_rate": 4.980724749630584e-06, "loss": 0.523, "step": 3993 }, { "epoch": 0.24321773285022683, "grad_norm": 1.0630418179830168, "learning_rate": 4.980714858593968e-06, "loss": 0.4919, "step": 3994 }, { "epoch": 0.2432786286271047, "grad_norm": 1.0033300830710334, "learning_rate": 4.980704965030051e-06, "loss": 0.4965, "step": 3995 }, { "epoch": 0.24333952440398257, "grad_norm": 1.0115447315897508, "learning_rate": 4.980695068938843e-06, "loss": 0.5206, "step": 3996 }, { "epoch": 0.24340042018086044, "grad_norm": 1.055933035873064, "learning_rate": 4.9806851703203544e-06, "loss": 0.4656, "step": 3997 }, { "epoch": 0.24346131595773834, "grad_norm": 1.0218407646082692, "learning_rate": 4.980675269174595e-06, "loss": 0.5238, "step": 3998 }, { "epoch": 0.24352221173461622, "grad_norm": 0.9938841169388715, "learning_rate": 4.980665365501576e-06, "loss": 0.4672, "step": 3999 }, { "epoch": 0.2435831075114941, "grad_norm": 1.1641254323710215, "learning_rate": 4.980655459301306e-06, "loss": 0.4988, "step": 4000 }, { "epoch": 0.24364400328837196, "grad_norm": 1.0238304967561251, "learning_rate": 4.980645550573796e-06, "loss": 0.5456, "step": 4001 }, { "epoch": 0.24370489906524984, "grad_norm": 1.0823559074900375, "learning_rate": 4.980635639319056e-06, "loss": 0.5108, "step": 4002 }, { "epoch": 0.2437657948421277, "grad_norm": 1.0243597246959946, "learning_rate": 4.980625725537096e-06, "loss": 0.4389, "step": 4003 }, { "epoch": 0.24382669061900558, "grad_norm": 1.0644638006831746, "learning_rate": 4.980615809227926e-06, "loss": 0.5159, "step": 4004 }, { "epoch": 0.24388758639588345, "grad_norm": 1.0789842000925718, "learning_rate": 4.9806058903915564e-06, "loss": 0.5357, "step": 4005 }, { "epoch": 0.24394848217276133, "grad_norm": 0.9732498506005204, "learning_rate": 4.980595969027997e-06, "loss": 0.5447, "step": 4006 }, { "epoch": 0.2440093779496392, "grad_norm": 1.0748426903119561, "learning_rate": 4.980586045137258e-06, "loss": 0.5047, "step": 4007 }, { "epoch": 0.24407027372651707, "grad_norm": 1.0795414120277669, "learning_rate": 4.980576118719349e-06, "loss": 0.5538, "step": 4008 }, { "epoch": 0.24413116950339495, "grad_norm": 1.0659217326543589, "learning_rate": 4.980566189774281e-06, "loss": 0.4887, "step": 4009 }, { "epoch": 0.24419206528027282, "grad_norm": 1.1396392578667893, "learning_rate": 4.9805562583020635e-06, "loss": 0.5016, "step": 4010 }, { "epoch": 0.2442529610571507, "grad_norm": 0.9996244897587777, "learning_rate": 4.9805463243027075e-06, "loss": 0.5326, "step": 4011 }, { "epoch": 0.24431385683402856, "grad_norm": 1.0071131070996013, "learning_rate": 4.980536387776223e-06, "loss": 0.4641, "step": 4012 }, { "epoch": 0.24437475261090644, "grad_norm": 1.1866664007019112, "learning_rate": 4.980526448722618e-06, "loss": 0.4079, "step": 4013 }, { "epoch": 0.2444356483877843, "grad_norm": 1.0587619483146928, "learning_rate": 4.980516507141905e-06, "loss": 0.5803, "step": 4014 }, { "epoch": 0.24449654416466218, "grad_norm": 1.0919355368108241, "learning_rate": 4.980506563034093e-06, "loss": 0.4559, "step": 4015 }, { "epoch": 0.24455743994154006, "grad_norm": 1.0158099728579484, "learning_rate": 4.980496616399193e-06, "loss": 0.4985, "step": 4016 }, { "epoch": 0.24461833571841793, "grad_norm": 1.0740886439674215, "learning_rate": 4.980486667237214e-06, "loss": 0.5066, "step": 4017 }, { "epoch": 0.2446792314952958, "grad_norm": 1.0006847598784376, "learning_rate": 4.980476715548168e-06, "loss": 0.4898, "step": 4018 }, { "epoch": 0.24474012727217367, "grad_norm": 1.0621754487005695, "learning_rate": 4.980466761332062e-06, "loss": 0.468, "step": 4019 }, { "epoch": 0.24480102304905155, "grad_norm": 1.15891190995193, "learning_rate": 4.980456804588909e-06, "loss": 0.4464, "step": 4020 }, { "epoch": 0.24486191882592942, "grad_norm": 1.0019641333661502, "learning_rate": 4.980446845318719e-06, "loss": 0.4767, "step": 4021 }, { "epoch": 0.2449228146028073, "grad_norm": 1.0295919691410458, "learning_rate": 4.9804368835215e-06, "loss": 0.5165, "step": 4022 }, { "epoch": 0.24498371037968517, "grad_norm": 1.0369562867508109, "learning_rate": 4.980426919197264e-06, "loss": 0.4599, "step": 4023 }, { "epoch": 0.24504460615656304, "grad_norm": 1.1074672357947812, "learning_rate": 4.980416952346021e-06, "loss": 0.5096, "step": 4024 }, { "epoch": 0.2451055019334409, "grad_norm": 0.9789128940265874, "learning_rate": 4.98040698296778e-06, "loss": 0.4942, "step": 4025 }, { "epoch": 0.24516639771031878, "grad_norm": 1.0460970738651634, "learning_rate": 4.9803970110625534e-06, "loss": 0.4968, "step": 4026 }, { "epoch": 0.24522729348719666, "grad_norm": 1.0629216237523387, "learning_rate": 4.980387036630349e-06, "loss": 0.547, "step": 4027 }, { "epoch": 0.24528818926407453, "grad_norm": 1.164032218003646, "learning_rate": 4.9803770596711776e-06, "loss": 0.6029, "step": 4028 }, { "epoch": 0.2453490850409524, "grad_norm": 0.9663628459343706, "learning_rate": 4.980367080185049e-06, "loss": 0.5524, "step": 4029 }, { "epoch": 0.24540998081783028, "grad_norm": 1.1010770407895025, "learning_rate": 4.9803570981719754e-06, "loss": 0.4611, "step": 4030 }, { "epoch": 0.24547087659470815, "grad_norm": 1.083246705212952, "learning_rate": 4.980347113631965e-06, "loss": 0.4624, "step": 4031 }, { "epoch": 0.24553177237158602, "grad_norm": 1.0194927797668465, "learning_rate": 4.980337126565028e-06, "loss": 0.4913, "step": 4032 }, { "epoch": 0.2455926681484639, "grad_norm": 1.172362379969205, "learning_rate": 4.9803271369711755e-06, "loss": 0.4462, "step": 4033 }, { "epoch": 0.24565356392534177, "grad_norm": 1.0586378079941408, "learning_rate": 4.980317144850418e-06, "loss": 0.4474, "step": 4034 }, { "epoch": 0.24571445970221964, "grad_norm": 1.0170352631767654, "learning_rate": 4.980307150202763e-06, "loss": 0.4452, "step": 4035 }, { "epoch": 0.2457753554790975, "grad_norm": 1.003540808893636, "learning_rate": 4.9802971530282246e-06, "loss": 0.4932, "step": 4036 }, { "epoch": 0.24583625125597539, "grad_norm": 1.064182337339439, "learning_rate": 4.980287153326811e-06, "loss": 0.5131, "step": 4037 }, { "epoch": 0.24589714703285326, "grad_norm": 1.1401741818542015, "learning_rate": 4.980277151098531e-06, "loss": 0.5118, "step": 4038 }, { "epoch": 0.24595804280973116, "grad_norm": 1.080294713711817, "learning_rate": 4.980267146343397e-06, "loss": 0.4568, "step": 4039 }, { "epoch": 0.24601893858660903, "grad_norm": 0.9277651808981804, "learning_rate": 4.980257139061418e-06, "loss": 0.5858, "step": 4040 }, { "epoch": 0.2460798343634869, "grad_norm": 1.0105043748207745, "learning_rate": 4.980247129252606e-06, "loss": 0.4405, "step": 4041 }, { "epoch": 0.24614073014036478, "grad_norm": 1.1110149791379806, "learning_rate": 4.980237116916968e-06, "loss": 0.4843, "step": 4042 }, { "epoch": 0.24620162591724265, "grad_norm": 1.0384427803059315, "learning_rate": 4.980227102054516e-06, "loss": 0.5017, "step": 4043 }, { "epoch": 0.24626252169412052, "grad_norm": 1.22973896460569, "learning_rate": 4.980217084665262e-06, "loss": 0.544, "step": 4044 }, { "epoch": 0.2463234174709984, "grad_norm": 0.978521131571482, "learning_rate": 4.980207064749213e-06, "loss": 0.5043, "step": 4045 }, { "epoch": 0.24638431324787627, "grad_norm": 1.0010742465486158, "learning_rate": 4.980197042306381e-06, "loss": 0.4834, "step": 4046 }, { "epoch": 0.24644520902475414, "grad_norm": 1.0431143905293672, "learning_rate": 4.980187017336776e-06, "loss": 0.5018, "step": 4047 }, { "epoch": 0.24650610480163201, "grad_norm": 1.0715819186175437, "learning_rate": 4.980176989840407e-06, "loss": 0.4797, "step": 4048 }, { "epoch": 0.2465670005785099, "grad_norm": 1.0220762831111998, "learning_rate": 4.980166959817286e-06, "loss": 0.4825, "step": 4049 }, { "epoch": 0.24662789635538776, "grad_norm": 1.0437768033387822, "learning_rate": 4.980156927267423e-06, "loss": 0.4642, "step": 4050 }, { "epoch": 0.24668879213226563, "grad_norm": 1.061155408362368, "learning_rate": 4.9801468921908274e-06, "loss": 0.4727, "step": 4051 }, { "epoch": 0.2467496879091435, "grad_norm": 1.0317208885620504, "learning_rate": 4.98013685458751e-06, "loss": 0.4814, "step": 4052 }, { "epoch": 0.24681058368602138, "grad_norm": 1.0216354268933265, "learning_rate": 4.98012681445748e-06, "loss": 0.4962, "step": 4053 }, { "epoch": 0.24687147946289925, "grad_norm": 1.1471889571653067, "learning_rate": 4.9801167718007485e-06, "loss": 0.4634, "step": 4054 }, { "epoch": 0.24693237523977712, "grad_norm": 1.1510750703636465, "learning_rate": 4.980106726617325e-06, "loss": 0.4158, "step": 4055 }, { "epoch": 0.246993271016655, "grad_norm": 0.9341595212909969, "learning_rate": 4.980096678907221e-06, "loss": 0.4951, "step": 4056 }, { "epoch": 0.24705416679353287, "grad_norm": 1.045234948700589, "learning_rate": 4.980086628670446e-06, "loss": 0.4324, "step": 4057 }, { "epoch": 0.24711506257041074, "grad_norm": 1.1191113315707526, "learning_rate": 4.980076575907012e-06, "loss": 0.4584, "step": 4058 }, { "epoch": 0.24717595834728862, "grad_norm": 1.0945937111881174, "learning_rate": 4.980066520616925e-06, "loss": 0.4683, "step": 4059 }, { "epoch": 0.2472368541241665, "grad_norm": 1.0595285414507942, "learning_rate": 4.9800564628001994e-06, "loss": 0.4684, "step": 4060 }, { "epoch": 0.24729774990104436, "grad_norm": 1.000051272441026, "learning_rate": 4.980046402456842e-06, "loss": 0.5755, "step": 4061 }, { "epoch": 0.24735864567792223, "grad_norm": 0.9818512975848286, "learning_rate": 4.980036339586866e-06, "loss": 0.4791, "step": 4062 }, { "epoch": 0.2474195414548001, "grad_norm": 1.0681448766617214, "learning_rate": 4.980026274190281e-06, "loss": 0.4613, "step": 4063 }, { "epoch": 0.24748043723167798, "grad_norm": 1.0693337548766981, "learning_rate": 4.980016206267096e-06, "loss": 0.4851, "step": 4064 }, { "epoch": 0.24754133300855585, "grad_norm": 1.0036654605171769, "learning_rate": 4.980006135817323e-06, "loss": 0.4982, "step": 4065 }, { "epoch": 0.24760222878543373, "grad_norm": 1.028256514658881, "learning_rate": 4.979996062840971e-06, "loss": 0.4772, "step": 4066 }, { "epoch": 0.2476631245623116, "grad_norm": 1.076886769947766, "learning_rate": 4.97998598733805e-06, "loss": 0.4875, "step": 4067 }, { "epoch": 0.24772402033918947, "grad_norm": 1.0385852521376226, "learning_rate": 4.979975909308571e-06, "loss": 0.5058, "step": 4068 }, { "epoch": 0.24778491611606734, "grad_norm": 1.0700170905245607, "learning_rate": 4.979965828752545e-06, "loss": 0.4759, "step": 4069 }, { "epoch": 0.24784581189294522, "grad_norm": 0.9716376714722894, "learning_rate": 4.97995574566998e-06, "loss": 0.4914, "step": 4070 }, { "epoch": 0.2479067076698231, "grad_norm": 1.0828394714745335, "learning_rate": 4.9799456600608885e-06, "loss": 0.5114, "step": 4071 }, { "epoch": 0.24796760344670096, "grad_norm": 1.0152494712246694, "learning_rate": 4.979935571925279e-06, "loss": 0.5366, "step": 4072 }, { "epoch": 0.24802849922357884, "grad_norm": 1.0281693308243371, "learning_rate": 4.979925481263164e-06, "loss": 0.4745, "step": 4073 }, { "epoch": 0.2480893950004567, "grad_norm": 1.0646273844784562, "learning_rate": 4.979915388074552e-06, "loss": 0.4867, "step": 4074 }, { "epoch": 0.24815029077733458, "grad_norm": 1.1061261018372028, "learning_rate": 4.979905292359453e-06, "loss": 0.4927, "step": 4075 }, { "epoch": 0.24821118655421245, "grad_norm": 1.0417640668700485, "learning_rate": 4.9798951941178796e-06, "loss": 0.5214, "step": 4076 }, { "epoch": 0.24827208233109033, "grad_norm": 1.140714941493114, "learning_rate": 4.9798850933498386e-06, "loss": 0.4962, "step": 4077 }, { "epoch": 0.2483329781079682, "grad_norm": 1.0071443913580735, "learning_rate": 4.979874990055344e-06, "loss": 0.4742, "step": 4078 }, { "epoch": 0.24839387388484607, "grad_norm": 1.0299271579688773, "learning_rate": 4.979864884234403e-06, "loss": 0.5371, "step": 4079 }, { "epoch": 0.24845476966172397, "grad_norm": 1.0050134246702687, "learning_rate": 4.979854775887028e-06, "loss": 0.5459, "step": 4080 }, { "epoch": 0.24851566543860185, "grad_norm": 1.0678455697398428, "learning_rate": 4.979844665013228e-06, "loss": 0.4715, "step": 4081 }, { "epoch": 0.24857656121547972, "grad_norm": 1.0967388695385776, "learning_rate": 4.9798345516130145e-06, "loss": 0.5079, "step": 4082 }, { "epoch": 0.2486374569923576, "grad_norm": 0.9832118892987423, "learning_rate": 4.9798244356863965e-06, "loss": 0.5087, "step": 4083 }, { "epoch": 0.24869835276923546, "grad_norm": 1.091102193383833, "learning_rate": 4.979814317233386e-06, "loss": 0.5316, "step": 4084 }, { "epoch": 0.24875924854611334, "grad_norm": 1.0760202016016025, "learning_rate": 4.979804196253991e-06, "loss": 0.5174, "step": 4085 }, { "epoch": 0.2488201443229912, "grad_norm": 1.113949521304509, "learning_rate": 4.979794072748223e-06, "loss": 0.5546, "step": 4086 }, { "epoch": 0.24888104009986908, "grad_norm": 1.0308811635090351, "learning_rate": 4.979783946716093e-06, "loss": 0.4318, "step": 4087 }, { "epoch": 0.24894193587674696, "grad_norm": 1.0471032550218016, "learning_rate": 4.979773818157611e-06, "loss": 0.5334, "step": 4088 }, { "epoch": 0.24900283165362483, "grad_norm": 1.0185352393438665, "learning_rate": 4.979763687072786e-06, "loss": 0.4286, "step": 4089 }, { "epoch": 0.2490637274305027, "grad_norm": 1.107312594157445, "learning_rate": 4.979753553461629e-06, "loss": 0.4691, "step": 4090 }, { "epoch": 0.24912462320738057, "grad_norm": 1.0104684034902969, "learning_rate": 4.979743417324152e-06, "loss": 0.452, "step": 4091 }, { "epoch": 0.24918551898425845, "grad_norm": 1.1199953168193517, "learning_rate": 4.9797332786603636e-06, "loss": 0.5057, "step": 4092 }, { "epoch": 0.24924641476113632, "grad_norm": 0.9727778821403755, "learning_rate": 4.979723137470274e-06, "loss": 0.5081, "step": 4093 }, { "epoch": 0.2493073105380142, "grad_norm": 1.0691921342663646, "learning_rate": 4.979712993753894e-06, "loss": 0.4949, "step": 4094 }, { "epoch": 0.24936820631489207, "grad_norm": 1.0843819083071615, "learning_rate": 4.979702847511234e-06, "loss": 0.468, "step": 4095 }, { "epoch": 0.24942910209176994, "grad_norm": 1.0715450346901974, "learning_rate": 4.9796926987423044e-06, "loss": 0.5044, "step": 4096 }, { "epoch": 0.2494899978686478, "grad_norm": 1.0060816728733095, "learning_rate": 4.979682547447116e-06, "loss": 0.5073, "step": 4097 }, { "epoch": 0.24955089364552568, "grad_norm": 1.1190226358202244, "learning_rate": 4.979672393625678e-06, "loss": 0.4648, "step": 4098 }, { "epoch": 0.24961178942240356, "grad_norm": 1.0181212731833285, "learning_rate": 4.979662237278001e-06, "loss": 0.4981, "step": 4099 }, { "epoch": 0.24967268519928143, "grad_norm": 1.0828455774550572, "learning_rate": 4.9796520784040965e-06, "loss": 0.4842, "step": 4100 }, { "epoch": 0.2497335809761593, "grad_norm": 1.0778122058270494, "learning_rate": 4.9796419170039735e-06, "loss": 0.5259, "step": 4101 }, { "epoch": 0.24979447675303718, "grad_norm": 1.0315906124517946, "learning_rate": 4.9796317530776425e-06, "loss": 0.4723, "step": 4102 }, { "epoch": 0.24985537252991505, "grad_norm": 1.0926782001658024, "learning_rate": 4.979621586625115e-06, "loss": 0.456, "step": 4103 }, { "epoch": 0.24991626830679292, "grad_norm": 1.0432754744016668, "learning_rate": 4.9796114176464004e-06, "loss": 0.489, "step": 4104 }, { "epoch": 0.2499771640836708, "grad_norm": 1.033326490141144, "learning_rate": 4.97960124614151e-06, "loss": 0.4921, "step": 4105 }, { "epoch": 0.25003805986054867, "grad_norm": 1.096642719075634, "learning_rate": 4.979591072110452e-06, "loss": 0.4716, "step": 4106 }, { "epoch": 0.25009895563742657, "grad_norm": 0.9750718328363718, "learning_rate": 4.979580895553239e-06, "loss": 0.4655, "step": 4107 }, { "epoch": 0.2501598514143044, "grad_norm": 1.012688092888798, "learning_rate": 4.9795707164698795e-06, "loss": 0.4353, "step": 4108 }, { "epoch": 0.2502207471911823, "grad_norm": 1.0147710814499373, "learning_rate": 4.979560534860386e-06, "loss": 0.4684, "step": 4109 }, { "epoch": 0.25028164296806016, "grad_norm": 0.9918673151042803, "learning_rate": 4.979550350724767e-06, "loss": 0.5073, "step": 4110 }, { "epoch": 0.25034253874493806, "grad_norm": 1.109311329508295, "learning_rate": 4.9795401640630344e-06, "loss": 0.5044, "step": 4111 }, { "epoch": 0.2504034345218159, "grad_norm": 1.1718645033859496, "learning_rate": 4.979529974875198e-06, "loss": 0.4709, "step": 4112 }, { "epoch": 0.2504643302986938, "grad_norm": 1.0999434240423733, "learning_rate": 4.979519783161267e-06, "loss": 0.5104, "step": 4113 }, { "epoch": 0.25052522607557165, "grad_norm": 0.9788093189487905, "learning_rate": 4.979509588921253e-06, "loss": 0.5219, "step": 4114 }, { "epoch": 0.25058612185244955, "grad_norm": 1.0778914399727857, "learning_rate": 4.979499392155167e-06, "loss": 0.5075, "step": 4115 }, { "epoch": 0.2506470176293274, "grad_norm": 0.9919202284117123, "learning_rate": 4.979489192863018e-06, "loss": 0.5145, "step": 4116 }, { "epoch": 0.2507079134062053, "grad_norm": 1.0732303419814995, "learning_rate": 4.979478991044817e-06, "loss": 0.4183, "step": 4117 }, { "epoch": 0.25076880918308314, "grad_norm": 1.0511951353102449, "learning_rate": 4.979468786700575e-06, "loss": 0.5488, "step": 4118 }, { "epoch": 0.25082970495996104, "grad_norm": 1.0049120470257444, "learning_rate": 4.9794585798303e-06, "loss": 0.4664, "step": 4119 }, { "epoch": 0.2508906007368389, "grad_norm": 1.0715032549144652, "learning_rate": 4.979448370434005e-06, "loss": 0.4556, "step": 4120 }, { "epoch": 0.2509514965137168, "grad_norm": 1.019426803154069, "learning_rate": 4.9794381585117e-06, "loss": 0.4818, "step": 4121 }, { "epoch": 0.25101239229059463, "grad_norm": 1.048612656105177, "learning_rate": 4.979427944063395e-06, "loss": 0.5093, "step": 4122 }, { "epoch": 0.25107328806747253, "grad_norm": 1.0885206263313183, "learning_rate": 4.9794177270891e-06, "loss": 0.4505, "step": 4123 }, { "epoch": 0.2511341838443504, "grad_norm": 1.0424366941911747, "learning_rate": 4.979407507588826e-06, "loss": 0.5328, "step": 4124 }, { "epoch": 0.2511950796212283, "grad_norm": 1.1625526891895688, "learning_rate": 4.9793972855625835e-06, "loss": 0.4897, "step": 4125 }, { "epoch": 0.2512559753981061, "grad_norm": 1.2204232291472044, "learning_rate": 4.979387061010383e-06, "loss": 0.4635, "step": 4126 }, { "epoch": 0.251316871174984, "grad_norm": 1.0754352903023399, "learning_rate": 4.979376833932232e-06, "loss": 0.4943, "step": 4127 }, { "epoch": 0.25137776695186187, "grad_norm": 1.0110800797881907, "learning_rate": 4.979366604328146e-06, "loss": 0.5372, "step": 4128 }, { "epoch": 0.25143866272873977, "grad_norm": 1.0383897452970186, "learning_rate": 4.979356372198132e-06, "loss": 0.4356, "step": 4129 }, { "epoch": 0.2514995585056176, "grad_norm": 1.0997965841925141, "learning_rate": 4.979346137542201e-06, "loss": 0.4589, "step": 4130 }, { "epoch": 0.2515604542824955, "grad_norm": 1.081326626004732, "learning_rate": 4.9793359003603635e-06, "loss": 0.4116, "step": 4131 }, { "epoch": 0.25162135005937336, "grad_norm": 1.058970934680491, "learning_rate": 4.979325660652631e-06, "loss": 0.4512, "step": 4132 }, { "epoch": 0.25168224583625126, "grad_norm": 1.008470136479926, "learning_rate": 4.9793154184190125e-06, "loss": 0.5127, "step": 4133 }, { "epoch": 0.2517431416131291, "grad_norm": 1.1439014811908312, "learning_rate": 4.9793051736595184e-06, "loss": 0.4655, "step": 4134 }, { "epoch": 0.251804037390007, "grad_norm": 0.9328527247655396, "learning_rate": 4.979294926374161e-06, "loss": 0.5281, "step": 4135 }, { "epoch": 0.25186493316688485, "grad_norm": 1.0436449221328277, "learning_rate": 4.979284676562949e-06, "loss": 0.5128, "step": 4136 }, { "epoch": 0.25192582894376275, "grad_norm": 0.9914704300457361, "learning_rate": 4.979274424225893e-06, "loss": 0.4919, "step": 4137 }, { "epoch": 0.2519867247206406, "grad_norm": 1.1364053009868753, "learning_rate": 4.979264169363004e-06, "loss": 0.4957, "step": 4138 }, { "epoch": 0.2520476204975185, "grad_norm": 1.0104032715797329, "learning_rate": 4.979253911974293e-06, "loss": 0.4811, "step": 4139 }, { "epoch": 0.25210851627439634, "grad_norm": 1.0392181370191977, "learning_rate": 4.979243652059768e-06, "loss": 0.5528, "step": 4140 }, { "epoch": 0.25216941205127424, "grad_norm": 1.162146789235497, "learning_rate": 4.979233389619442e-06, "loss": 0.5166, "step": 4141 }, { "epoch": 0.25223030782815214, "grad_norm": 1.0341102367764374, "learning_rate": 4.979223124653325e-06, "loss": 0.5049, "step": 4142 }, { "epoch": 0.25229120360503, "grad_norm": 0.9692932494412276, "learning_rate": 4.979212857161427e-06, "loss": 0.5524, "step": 4143 }, { "epoch": 0.2523520993819079, "grad_norm": 1.1388945980509473, "learning_rate": 4.9792025871437575e-06, "loss": 0.4474, "step": 4144 }, { "epoch": 0.25241299515878574, "grad_norm": 1.030906906364773, "learning_rate": 4.979192314600328e-06, "loss": 0.4906, "step": 4145 }, { "epoch": 0.25247389093566364, "grad_norm": 0.9929995381984421, "learning_rate": 4.97918203953115e-06, "loss": 0.4742, "step": 4146 }, { "epoch": 0.2525347867125415, "grad_norm": 1.024009856756656, "learning_rate": 4.979171761936232e-06, "loss": 0.464, "step": 4147 }, { "epoch": 0.2525956824894194, "grad_norm": 1.1332398245652562, "learning_rate": 4.979161481815586e-06, "loss": 0.4617, "step": 4148 }, { "epoch": 0.2526565782662972, "grad_norm": 1.0533726541771493, "learning_rate": 4.979151199169222e-06, "loss": 0.518, "step": 4149 }, { "epoch": 0.2527174740431751, "grad_norm": 1.0708588601490654, "learning_rate": 4.979140913997149e-06, "loss": 0.4909, "step": 4150 }, { "epoch": 0.252778369820053, "grad_norm": 1.032916508991088, "learning_rate": 4.97913062629938e-06, "loss": 0.4884, "step": 4151 }, { "epoch": 0.2528392655969309, "grad_norm": 1.0566661494754832, "learning_rate": 4.979120336075924e-06, "loss": 0.5484, "step": 4152 }, { "epoch": 0.2529001613738087, "grad_norm": 1.0466549068352444, "learning_rate": 4.979110043326792e-06, "loss": 0.4934, "step": 4153 }, { "epoch": 0.2529610571506866, "grad_norm": 1.0472775919575747, "learning_rate": 4.979099748051993e-06, "loss": 0.5217, "step": 4154 }, { "epoch": 0.25302195292756446, "grad_norm": 1.0514932646442379, "learning_rate": 4.97908945025154e-06, "loss": 0.4807, "step": 4155 }, { "epoch": 0.25308284870444236, "grad_norm": 1.1061138303584577, "learning_rate": 4.979079149925442e-06, "loss": 0.5459, "step": 4156 }, { "epoch": 0.2531437444813202, "grad_norm": 1.0191603246198486, "learning_rate": 4.979068847073709e-06, "loss": 0.5288, "step": 4157 }, { "epoch": 0.2532046402581981, "grad_norm": 1.0003569480110428, "learning_rate": 4.979058541696352e-06, "loss": 0.5441, "step": 4158 }, { "epoch": 0.25326553603507596, "grad_norm": 0.9362808410261741, "learning_rate": 4.979048233793384e-06, "loss": 0.5071, "step": 4159 }, { "epoch": 0.25332643181195386, "grad_norm": 1.0742269446734907, "learning_rate": 4.979037923364811e-06, "loss": 0.4332, "step": 4160 }, { "epoch": 0.2533873275888317, "grad_norm": 0.9855766244969387, "learning_rate": 4.979027610410646e-06, "loss": 0.5086, "step": 4161 }, { "epoch": 0.2534482233657096, "grad_norm": 1.1278314384107002, "learning_rate": 4.979017294930899e-06, "loss": 0.4736, "step": 4162 }, { "epoch": 0.25350911914258745, "grad_norm": 0.9558370979447068, "learning_rate": 4.979006976925581e-06, "loss": 0.5234, "step": 4163 }, { "epoch": 0.25357001491946535, "grad_norm": 0.9746740877516773, "learning_rate": 4.978996656394703e-06, "loss": 0.4822, "step": 4164 }, { "epoch": 0.2536309106963432, "grad_norm": 1.1014775878261802, "learning_rate": 4.978986333338274e-06, "loss": 0.4491, "step": 4165 }, { "epoch": 0.2536918064732211, "grad_norm": 0.9569538547411932, "learning_rate": 4.9789760077563055e-06, "loss": 0.5367, "step": 4166 }, { "epoch": 0.25375270225009894, "grad_norm": 1.1140712564573125, "learning_rate": 4.9789656796488076e-06, "loss": 0.5159, "step": 4167 }, { "epoch": 0.25381359802697684, "grad_norm": 1.053877046817155, "learning_rate": 4.978955349015791e-06, "loss": 0.499, "step": 4168 }, { "epoch": 0.2538744938038547, "grad_norm": 1.0853577006147443, "learning_rate": 4.978945015857266e-06, "loss": 0.5109, "step": 4169 }, { "epoch": 0.2539353895807326, "grad_norm": 1.1016307401561245, "learning_rate": 4.978934680173244e-06, "loss": 0.4605, "step": 4170 }, { "epoch": 0.25399628535761043, "grad_norm": 0.9534127971337051, "learning_rate": 4.978924341963735e-06, "loss": 0.5075, "step": 4171 }, { "epoch": 0.25405718113448833, "grad_norm": 1.0781941642096409, "learning_rate": 4.978914001228748e-06, "loss": 0.5225, "step": 4172 }, { "epoch": 0.2541180769113662, "grad_norm": 1.08527085172192, "learning_rate": 4.978903657968297e-06, "loss": 0.4705, "step": 4173 }, { "epoch": 0.2541789726882441, "grad_norm": 1.0290740372083687, "learning_rate": 4.978893312182389e-06, "loss": 0.4714, "step": 4174 }, { "epoch": 0.2542398684651219, "grad_norm": 1.0492170381502892, "learning_rate": 4.978882963871037e-06, "loss": 0.5033, "step": 4175 }, { "epoch": 0.2543007642419998, "grad_norm": 1.0511507435786167, "learning_rate": 4.978872613034249e-06, "loss": 0.4911, "step": 4176 }, { "epoch": 0.25436166001887767, "grad_norm": 0.9820379609128026, "learning_rate": 4.978862259672039e-06, "loss": 0.4902, "step": 4177 }, { "epoch": 0.25442255579575557, "grad_norm": 1.0340677093724016, "learning_rate": 4.978851903784415e-06, "loss": 0.5229, "step": 4178 }, { "epoch": 0.2544834515726334, "grad_norm": 1.0103367798177947, "learning_rate": 4.978841545371388e-06, "loss": 0.474, "step": 4179 }, { "epoch": 0.2545443473495113, "grad_norm": 1.029827609507553, "learning_rate": 4.978831184432969e-06, "loss": 0.4959, "step": 4180 }, { "epoch": 0.25460524312638916, "grad_norm": 1.0942576000649247, "learning_rate": 4.978820820969168e-06, "loss": 0.4642, "step": 4181 }, { "epoch": 0.25466613890326706, "grad_norm": 1.1100581032080492, "learning_rate": 4.978810454979996e-06, "loss": 0.4515, "step": 4182 }, { "epoch": 0.25472703468014496, "grad_norm": 1.060117416015849, "learning_rate": 4.978800086465463e-06, "loss": 0.535, "step": 4183 }, { "epoch": 0.2547879304570228, "grad_norm": 1.0101689360513257, "learning_rate": 4.97878971542558e-06, "loss": 0.4896, "step": 4184 }, { "epoch": 0.2548488262339007, "grad_norm": 1.1296517723716737, "learning_rate": 4.978779341860359e-06, "loss": 0.4844, "step": 4185 }, { "epoch": 0.25490972201077855, "grad_norm": 1.0579622957646002, "learning_rate": 4.978768965769808e-06, "loss": 0.4353, "step": 4186 }, { "epoch": 0.25497061778765645, "grad_norm": 1.0614958001688264, "learning_rate": 4.978758587153939e-06, "loss": 0.4434, "step": 4187 }, { "epoch": 0.2550315135645343, "grad_norm": 0.9727906895567442, "learning_rate": 4.978748206012762e-06, "loss": 0.498, "step": 4188 }, { "epoch": 0.2550924093414122, "grad_norm": 1.0281678092083033, "learning_rate": 4.978737822346288e-06, "loss": 0.5253, "step": 4189 }, { "epoch": 0.25515330511829004, "grad_norm": 0.9999432270305109, "learning_rate": 4.978727436154528e-06, "loss": 0.4916, "step": 4190 }, { "epoch": 0.25521420089516794, "grad_norm": 1.0293004772282734, "learning_rate": 4.978717047437492e-06, "loss": 0.4947, "step": 4191 }, { "epoch": 0.2552750966720458, "grad_norm": 1.038356096631454, "learning_rate": 4.978706656195189e-06, "loss": 0.48, "step": 4192 }, { "epoch": 0.2553359924489237, "grad_norm": 0.9446618063275546, "learning_rate": 4.978696262427633e-06, "loss": 0.5548, "step": 4193 }, { "epoch": 0.25539688822580153, "grad_norm": 0.9958883782489781, "learning_rate": 4.978685866134831e-06, "loss": 0.4668, "step": 4194 }, { "epoch": 0.25545778400267943, "grad_norm": 1.0433187912463948, "learning_rate": 4.978675467316797e-06, "loss": 0.4805, "step": 4195 }, { "epoch": 0.2555186797795573, "grad_norm": 1.0541820555256631, "learning_rate": 4.97866506597354e-06, "loss": 0.4549, "step": 4196 }, { "epoch": 0.2555795755564352, "grad_norm": 0.9766950683850496, "learning_rate": 4.97865466210507e-06, "loss": 0.5005, "step": 4197 }, { "epoch": 0.255640471333313, "grad_norm": 0.9905923095647241, "learning_rate": 4.978644255711398e-06, "loss": 0.5438, "step": 4198 }, { "epoch": 0.2557013671101909, "grad_norm": 1.0931326311768408, "learning_rate": 4.978633846792534e-06, "loss": 0.4977, "step": 4199 }, { "epoch": 0.25576226288706877, "grad_norm": 1.2469322012662378, "learning_rate": 4.978623435348491e-06, "loss": 0.4039, "step": 4200 }, { "epoch": 0.25582315866394667, "grad_norm": 1.055044216967742, "learning_rate": 4.978613021379277e-06, "loss": 0.5135, "step": 4201 }, { "epoch": 0.2558840544408245, "grad_norm": 1.0291426875152827, "learning_rate": 4.978602604884904e-06, "loss": 0.5147, "step": 4202 }, { "epoch": 0.2559449502177024, "grad_norm": 1.0211735220211073, "learning_rate": 4.978592185865382e-06, "loss": 0.5193, "step": 4203 }, { "epoch": 0.25600584599458026, "grad_norm": 1.0602656642046662, "learning_rate": 4.978581764320723e-06, "loss": 0.5496, "step": 4204 }, { "epoch": 0.25606674177145816, "grad_norm": 0.9881742480427127, "learning_rate": 4.978571340250934e-06, "loss": 0.5129, "step": 4205 }, { "epoch": 0.256127637548336, "grad_norm": 1.0319056581591093, "learning_rate": 4.97856091365603e-06, "loss": 0.5508, "step": 4206 }, { "epoch": 0.2561885333252139, "grad_norm": 1.07999513234695, "learning_rate": 4.978550484536019e-06, "loss": 0.4807, "step": 4207 }, { "epoch": 0.25624942910209175, "grad_norm": 0.9803902791518019, "learning_rate": 4.9785400528909125e-06, "loss": 0.5661, "step": 4208 }, { "epoch": 0.25631032487896965, "grad_norm": 1.0810527048554208, "learning_rate": 4.97852961872072e-06, "loss": 0.5131, "step": 4209 }, { "epoch": 0.2563712206558475, "grad_norm": 1.1207671734520945, "learning_rate": 4.978519182025454e-06, "loss": 0.5026, "step": 4210 }, { "epoch": 0.2564321164327254, "grad_norm": 0.9689875556670429, "learning_rate": 4.978508742805124e-06, "loss": 0.5735, "step": 4211 }, { "epoch": 0.25649301220960324, "grad_norm": 0.941172409597495, "learning_rate": 4.978498301059741e-06, "loss": 0.5182, "step": 4212 }, { "epoch": 0.25655390798648114, "grad_norm": 1.071014205702067, "learning_rate": 4.978487856789315e-06, "loss": 0.502, "step": 4213 }, { "epoch": 0.256614803763359, "grad_norm": 1.0540079699414602, "learning_rate": 4.978477409993858e-06, "loss": 0.4867, "step": 4214 }, { "epoch": 0.2566756995402369, "grad_norm": 0.9637814721103485, "learning_rate": 4.978466960673379e-06, "loss": 0.4793, "step": 4215 }, { "epoch": 0.25673659531711474, "grad_norm": 1.0343710898755503, "learning_rate": 4.978456508827889e-06, "loss": 0.4816, "step": 4216 }, { "epoch": 0.25679749109399264, "grad_norm": 0.9320148273073705, "learning_rate": 4.9784460544573995e-06, "loss": 0.4705, "step": 4217 }, { "epoch": 0.2568583868708705, "grad_norm": 1.0242868107841634, "learning_rate": 4.978435597561921e-06, "loss": 0.5024, "step": 4218 }, { "epoch": 0.2569192826477484, "grad_norm": 1.1823610389442316, "learning_rate": 4.978425138141464e-06, "loss": 0.5353, "step": 4219 }, { "epoch": 0.2569801784246262, "grad_norm": 1.073732466317486, "learning_rate": 4.978414676196038e-06, "loss": 0.4986, "step": 4220 }, { "epoch": 0.2570410742015041, "grad_norm": 1.0030209351930515, "learning_rate": 4.9784042117256554e-06, "loss": 0.4656, "step": 4221 }, { "epoch": 0.257101969978382, "grad_norm": 1.0382458787150262, "learning_rate": 4.978393744730325e-06, "loss": 0.4876, "step": 4222 }, { "epoch": 0.2571628657552599, "grad_norm": 1.0490564102230748, "learning_rate": 4.97838327521006e-06, "loss": 0.4887, "step": 4223 }, { "epoch": 0.2572237615321378, "grad_norm": 1.033725005726756, "learning_rate": 4.978372803164869e-06, "loss": 0.5052, "step": 4224 }, { "epoch": 0.2572846573090156, "grad_norm": 1.0831191261186595, "learning_rate": 4.978362328594764e-06, "loss": 0.5021, "step": 4225 }, { "epoch": 0.2573455530858935, "grad_norm": 1.0971426837345069, "learning_rate": 4.978351851499754e-06, "loss": 0.4924, "step": 4226 }, { "epoch": 0.25740644886277136, "grad_norm": 1.0744029494279794, "learning_rate": 4.978341371879851e-06, "loss": 0.5027, "step": 4227 }, { "epoch": 0.25746734463964926, "grad_norm": 0.9827699685925225, "learning_rate": 4.978330889735065e-06, "loss": 0.4865, "step": 4228 }, { "epoch": 0.2575282404165271, "grad_norm": 0.9974188085635367, "learning_rate": 4.978320405065409e-06, "loss": 0.4585, "step": 4229 }, { "epoch": 0.257589136193405, "grad_norm": 0.978456155134026, "learning_rate": 4.978309917870889e-06, "loss": 0.569, "step": 4230 }, { "epoch": 0.25765003197028286, "grad_norm": 1.1579867021086172, "learning_rate": 4.978299428151521e-06, "loss": 0.474, "step": 4231 }, { "epoch": 0.25771092774716076, "grad_norm": 1.0473087383438258, "learning_rate": 4.978288935907311e-06, "loss": 0.4966, "step": 4232 }, { "epoch": 0.2577718235240386, "grad_norm": 0.9967552602289788, "learning_rate": 4.9782784411382725e-06, "loss": 0.5216, "step": 4233 }, { "epoch": 0.2578327193009165, "grad_norm": 1.0176393277804336, "learning_rate": 4.9782679438444145e-06, "loss": 0.4934, "step": 4234 }, { "epoch": 0.25789361507779435, "grad_norm": 1.1326064167984315, "learning_rate": 4.97825744402575e-06, "loss": 0.4598, "step": 4235 }, { "epoch": 0.25795451085467225, "grad_norm": 1.0306949749424994, "learning_rate": 4.9782469416822875e-06, "loss": 0.5326, "step": 4236 }, { "epoch": 0.2580154066315501, "grad_norm": 1.0748249851648723, "learning_rate": 4.978236436814039e-06, "loss": 0.5117, "step": 4237 }, { "epoch": 0.258076302408428, "grad_norm": 1.2044453051898507, "learning_rate": 4.978225929421015e-06, "loss": 0.4523, "step": 4238 }, { "epoch": 0.25813719818530584, "grad_norm": 1.108017369888361, "learning_rate": 4.978215419503225e-06, "loss": 0.4406, "step": 4239 }, { "epoch": 0.25819809396218374, "grad_norm": 0.9600632509785169, "learning_rate": 4.978204907060682e-06, "loss": 0.5853, "step": 4240 }, { "epoch": 0.2582589897390616, "grad_norm": 1.0761812498758252, "learning_rate": 4.978194392093394e-06, "loss": 0.4826, "step": 4241 }, { "epoch": 0.2583198855159395, "grad_norm": 1.0125908976507134, "learning_rate": 4.978183874601374e-06, "loss": 0.546, "step": 4242 }, { "epoch": 0.25838078129281733, "grad_norm": 1.073181298364281, "learning_rate": 4.978173354584631e-06, "loss": 0.4267, "step": 4243 }, { "epoch": 0.25844167706969523, "grad_norm": 1.0036312998609556, "learning_rate": 4.978162832043177e-06, "loss": 0.5088, "step": 4244 }, { "epoch": 0.2585025728465731, "grad_norm": 1.0487677187090338, "learning_rate": 4.9781523069770225e-06, "loss": 0.55, "step": 4245 }, { "epoch": 0.258563468623451, "grad_norm": 0.9755267068784758, "learning_rate": 4.9781417793861774e-06, "loss": 0.5813, "step": 4246 }, { "epoch": 0.2586243644003288, "grad_norm": 1.1448288695262951, "learning_rate": 4.978131249270653e-06, "loss": 0.4329, "step": 4247 }, { "epoch": 0.2586852601772067, "grad_norm": 1.0551150234867765, "learning_rate": 4.97812071663046e-06, "loss": 0.4371, "step": 4248 }, { "epoch": 0.25874615595408457, "grad_norm": 0.9624072199708438, "learning_rate": 4.97811018146561e-06, "loss": 0.4931, "step": 4249 }, { "epoch": 0.25880705173096247, "grad_norm": 0.9853927971698032, "learning_rate": 4.978099643776112e-06, "loss": 0.5128, "step": 4250 }, { "epoch": 0.2588679475078403, "grad_norm": 1.0725962901389934, "learning_rate": 4.978089103561977e-06, "loss": 0.5178, "step": 4251 }, { "epoch": 0.2589288432847182, "grad_norm": 0.9901615825689618, "learning_rate": 4.978078560823218e-06, "loss": 0.568, "step": 4252 }, { "epoch": 0.25898973906159606, "grad_norm": 1.0329609440910292, "learning_rate": 4.978068015559843e-06, "loss": 0.5103, "step": 4253 }, { "epoch": 0.25905063483847396, "grad_norm": 0.9533486786150255, "learning_rate": 4.978057467771864e-06, "loss": 0.4963, "step": 4254 }, { "epoch": 0.2591115306153518, "grad_norm": 1.0855233833630096, "learning_rate": 4.978046917459291e-06, "loss": 0.4526, "step": 4255 }, { "epoch": 0.2591724263922297, "grad_norm": 1.1810234774248214, "learning_rate": 4.978036364622137e-06, "loss": 0.515, "step": 4256 }, { "epoch": 0.25923332216910755, "grad_norm": 1.041638872334355, "learning_rate": 4.97802580926041e-06, "loss": 0.5419, "step": 4257 }, { "epoch": 0.25929421794598545, "grad_norm": 1.1540591418643193, "learning_rate": 4.978015251374122e-06, "loss": 0.524, "step": 4258 }, { "epoch": 0.2593551137228633, "grad_norm": 1.124239086945665, "learning_rate": 4.978004690963283e-06, "loss": 0.4644, "step": 4259 }, { "epoch": 0.2594160094997412, "grad_norm": 1.117926095729487, "learning_rate": 4.977994128027905e-06, "loss": 0.5884, "step": 4260 }, { "epoch": 0.25947690527661904, "grad_norm": 1.1238041370968503, "learning_rate": 4.977983562567998e-06, "loss": 0.3885, "step": 4261 }, { "epoch": 0.25953780105349694, "grad_norm": 1.0714017147236954, "learning_rate": 4.977972994583572e-06, "loss": 0.4952, "step": 4262 }, { "epoch": 0.2595986968303748, "grad_norm": 1.01552877879411, "learning_rate": 4.97796242407464e-06, "loss": 0.483, "step": 4263 }, { "epoch": 0.2596595926072527, "grad_norm": 0.942634612529687, "learning_rate": 4.97795185104121e-06, "loss": 0.4899, "step": 4264 }, { "epoch": 0.2597204883841306, "grad_norm": 1.09531801400908, "learning_rate": 4.977941275483296e-06, "loss": 0.5114, "step": 4265 }, { "epoch": 0.25978138416100843, "grad_norm": 1.0779535377756604, "learning_rate": 4.977930697400906e-06, "loss": 0.4039, "step": 4266 }, { "epoch": 0.25984227993788633, "grad_norm": 1.0136437141692827, "learning_rate": 4.977920116794051e-06, "loss": 0.4652, "step": 4267 }, { "epoch": 0.2599031757147642, "grad_norm": 1.0589417739299216, "learning_rate": 4.977909533662743e-06, "loss": 0.4684, "step": 4268 }, { "epoch": 0.2599640714916421, "grad_norm": 0.9587308548363149, "learning_rate": 4.9778989480069925e-06, "loss": 0.4878, "step": 4269 }, { "epoch": 0.2600249672685199, "grad_norm": 1.0157762348419908, "learning_rate": 4.97788835982681e-06, "loss": 0.5149, "step": 4270 }, { "epoch": 0.2600858630453978, "grad_norm": 1.1259763393611677, "learning_rate": 4.977877769122206e-06, "loss": 0.4843, "step": 4271 }, { "epoch": 0.26014675882227567, "grad_norm": 1.1188293093277009, "learning_rate": 4.977867175893192e-06, "loss": 0.4957, "step": 4272 }, { "epoch": 0.26020765459915357, "grad_norm": 1.1140873862102008, "learning_rate": 4.977856580139779e-06, "loss": 0.5025, "step": 4273 }, { "epoch": 0.2602685503760314, "grad_norm": 0.9636388380568758, "learning_rate": 4.977845981861976e-06, "loss": 0.4486, "step": 4274 }, { "epoch": 0.2603294461529093, "grad_norm": 0.9637820228761064, "learning_rate": 4.977835381059796e-06, "loss": 0.5167, "step": 4275 }, { "epoch": 0.26039034192978716, "grad_norm": 1.0768274451780855, "learning_rate": 4.977824777733249e-06, "loss": 0.4303, "step": 4276 }, { "epoch": 0.26045123770666506, "grad_norm": 1.1382926212249227, "learning_rate": 4.977814171882345e-06, "loss": 0.544, "step": 4277 }, { "epoch": 0.2605121334835429, "grad_norm": 1.0282627569036695, "learning_rate": 4.977803563507095e-06, "loss": 0.5232, "step": 4278 }, { "epoch": 0.2605730292604208, "grad_norm": 1.00510390184354, "learning_rate": 4.9777929526075105e-06, "loss": 0.4845, "step": 4279 }, { "epoch": 0.26063392503729865, "grad_norm": 1.067855329614217, "learning_rate": 4.977782339183603e-06, "loss": 0.5123, "step": 4280 }, { "epoch": 0.26069482081417655, "grad_norm": 1.038534839688906, "learning_rate": 4.977771723235382e-06, "loss": 0.4639, "step": 4281 }, { "epoch": 0.2607557165910544, "grad_norm": 1.0501360348326798, "learning_rate": 4.9777611047628586e-06, "loss": 0.4676, "step": 4282 }, { "epoch": 0.2608166123679323, "grad_norm": 1.065577098548266, "learning_rate": 4.977750483766043e-06, "loss": 0.4715, "step": 4283 }, { "epoch": 0.26087750814481014, "grad_norm": 1.0834853020486452, "learning_rate": 4.9777398602449475e-06, "loss": 0.4875, "step": 4284 }, { "epoch": 0.26093840392168804, "grad_norm": 0.978118767035738, "learning_rate": 4.977729234199582e-06, "loss": 0.4897, "step": 4285 }, { "epoch": 0.2609992996985659, "grad_norm": 1.0699002504174069, "learning_rate": 4.977718605629957e-06, "loss": 0.4324, "step": 4286 }, { "epoch": 0.2610601954754438, "grad_norm": 1.013377163353115, "learning_rate": 4.977707974536084e-06, "loss": 0.4707, "step": 4287 }, { "epoch": 0.26112109125232164, "grad_norm": 1.0045144261941663, "learning_rate": 4.977697340917974e-06, "loss": 0.4545, "step": 4288 }, { "epoch": 0.26118198702919954, "grad_norm": 1.0584961345851316, "learning_rate": 4.977686704775637e-06, "loss": 0.4983, "step": 4289 }, { "epoch": 0.2612428828060774, "grad_norm": 0.968778604362165, "learning_rate": 4.977676066109085e-06, "loss": 0.4988, "step": 4290 }, { "epoch": 0.2613037785829553, "grad_norm": 1.022396362376389, "learning_rate": 4.9776654249183274e-06, "loss": 0.5028, "step": 4291 }, { "epoch": 0.2613646743598331, "grad_norm": 1.0162845627740082, "learning_rate": 4.977654781203376e-06, "loss": 0.4666, "step": 4292 }, { "epoch": 0.261425570136711, "grad_norm": 1.0017807837817576, "learning_rate": 4.977644134964241e-06, "loss": 0.5125, "step": 4293 }, { "epoch": 0.2614864659135889, "grad_norm": 1.1348776154397495, "learning_rate": 4.977633486200935e-06, "loss": 0.4503, "step": 4294 }, { "epoch": 0.2615473616904668, "grad_norm": 1.1150880712971536, "learning_rate": 4.977622834913466e-06, "loss": 0.4216, "step": 4295 }, { "epoch": 0.2616082574673446, "grad_norm": 1.0894259943137183, "learning_rate": 4.977612181101848e-06, "loss": 0.4475, "step": 4296 }, { "epoch": 0.2616691532442225, "grad_norm": 0.940953715135138, "learning_rate": 4.977601524766088e-06, "loss": 0.5452, "step": 4297 }, { "epoch": 0.26173004902110036, "grad_norm": 0.9570907587309244, "learning_rate": 4.977590865906201e-06, "loss": 0.4962, "step": 4298 }, { "epoch": 0.26179094479797826, "grad_norm": 1.0007821943971995, "learning_rate": 4.977580204522195e-06, "loss": 0.4907, "step": 4299 }, { "epoch": 0.2618518405748561, "grad_norm": 1.0127129184215837, "learning_rate": 4.977569540614082e-06, "loss": 0.444, "step": 4300 }, { "epoch": 0.261912736351734, "grad_norm": 1.0142659358135995, "learning_rate": 4.9775588741818725e-06, "loss": 0.4765, "step": 4301 }, { "epoch": 0.26197363212861186, "grad_norm": 1.0181709861307093, "learning_rate": 4.977548205225578e-06, "loss": 0.4756, "step": 4302 }, { "epoch": 0.26203452790548976, "grad_norm": 1.0626007642696378, "learning_rate": 4.9775375337452084e-06, "loss": 0.4392, "step": 4303 }, { "epoch": 0.2620954236823676, "grad_norm": 1.0454049875565132, "learning_rate": 4.9775268597407755e-06, "loss": 0.4982, "step": 4304 }, { "epoch": 0.2621563194592455, "grad_norm": 1.0421598798185556, "learning_rate": 4.9775161832122895e-06, "loss": 0.4981, "step": 4305 }, { "epoch": 0.2622172152361234, "grad_norm": 1.0333363082363236, "learning_rate": 4.977505504159762e-06, "loss": 0.5367, "step": 4306 }, { "epoch": 0.26227811101300125, "grad_norm": 1.0731533518403562, "learning_rate": 4.977494822583203e-06, "loss": 0.527, "step": 4307 }, { "epoch": 0.26233900678987915, "grad_norm": 0.9551649506659368, "learning_rate": 4.977484138482623e-06, "loss": 0.5567, "step": 4308 }, { "epoch": 0.262399902566757, "grad_norm": 1.0640274248725643, "learning_rate": 4.977473451858035e-06, "loss": 0.4364, "step": 4309 }, { "epoch": 0.2624607983436349, "grad_norm": 0.9473526878786953, "learning_rate": 4.977462762709448e-06, "loss": 0.4354, "step": 4310 }, { "epoch": 0.26252169412051274, "grad_norm": 1.0361984120691552, "learning_rate": 4.9774520710368735e-06, "loss": 0.5468, "step": 4311 }, { "epoch": 0.26258258989739064, "grad_norm": 1.0394626294338078, "learning_rate": 4.977441376840322e-06, "loss": 0.4764, "step": 4312 }, { "epoch": 0.2626434856742685, "grad_norm": 1.0406970081999007, "learning_rate": 4.977430680119805e-06, "loss": 0.5059, "step": 4313 }, { "epoch": 0.2627043814511464, "grad_norm": 1.0363104108750198, "learning_rate": 4.977419980875333e-06, "loss": 0.4186, "step": 4314 }, { "epoch": 0.26276527722802423, "grad_norm": 1.0059821671402036, "learning_rate": 4.977409279106917e-06, "loss": 0.4361, "step": 4315 }, { "epoch": 0.26282617300490213, "grad_norm": 1.0802657802253421, "learning_rate": 4.977398574814568e-06, "loss": 0.4852, "step": 4316 }, { "epoch": 0.26288706878178, "grad_norm": 1.1456122633134054, "learning_rate": 4.977387867998297e-06, "loss": 0.5115, "step": 4317 }, { "epoch": 0.2629479645586579, "grad_norm": 1.045722282311691, "learning_rate": 4.977377158658115e-06, "loss": 0.4555, "step": 4318 }, { "epoch": 0.2630088603355357, "grad_norm": 1.0896920353135542, "learning_rate": 4.977366446794033e-06, "loss": 0.4239, "step": 4319 }, { "epoch": 0.2630697561124136, "grad_norm": 1.0699394954477637, "learning_rate": 4.977355732406061e-06, "loss": 0.4822, "step": 4320 }, { "epoch": 0.26313065188929147, "grad_norm": 0.9766821282349897, "learning_rate": 4.977345015494209e-06, "loss": 0.4862, "step": 4321 }, { "epoch": 0.26319154766616937, "grad_norm": 1.1053355692566227, "learning_rate": 4.9773342960584915e-06, "loss": 0.4851, "step": 4322 }, { "epoch": 0.2632524434430472, "grad_norm": 1.018503428640529, "learning_rate": 4.9773235740989164e-06, "loss": 0.5575, "step": 4323 }, { "epoch": 0.2633133392199251, "grad_norm": 1.0028411391887961, "learning_rate": 4.977312849615496e-06, "loss": 0.4936, "step": 4324 }, { "epoch": 0.26337423499680296, "grad_norm": 1.0559339974776063, "learning_rate": 4.9773021226082404e-06, "loss": 0.5517, "step": 4325 }, { "epoch": 0.26343513077368086, "grad_norm": 1.1000362626639208, "learning_rate": 4.977291393077161e-06, "loss": 0.519, "step": 4326 }, { "epoch": 0.2634960265505587, "grad_norm": 1.0210900857082925, "learning_rate": 4.977280661022269e-06, "loss": 0.5035, "step": 4327 }, { "epoch": 0.2635569223274366, "grad_norm": 1.0277690079920427, "learning_rate": 4.977269926443574e-06, "loss": 0.4654, "step": 4328 }, { "epoch": 0.26361781810431445, "grad_norm": 1.120759748383794, "learning_rate": 4.977259189341089e-06, "loss": 0.441, "step": 4329 }, { "epoch": 0.26367871388119235, "grad_norm": 0.9874544665210461, "learning_rate": 4.977248449714823e-06, "loss": 0.5062, "step": 4330 }, { "epoch": 0.2637396096580702, "grad_norm": 1.0561744958732404, "learning_rate": 4.977237707564788e-06, "loss": 0.4172, "step": 4331 }, { "epoch": 0.2638005054349481, "grad_norm": 0.986054957686327, "learning_rate": 4.977226962890995e-06, "loss": 0.5042, "step": 4332 }, { "epoch": 0.26386140121182594, "grad_norm": 1.0440583067792675, "learning_rate": 4.977216215693455e-06, "loss": 0.498, "step": 4333 }, { "epoch": 0.26392229698870384, "grad_norm": 1.0663844313234603, "learning_rate": 4.977205465972178e-06, "loss": 0.5236, "step": 4334 }, { "epoch": 0.2639831927655817, "grad_norm": 1.1234499157634448, "learning_rate": 4.977194713727176e-06, "loss": 0.4513, "step": 4335 }, { "epoch": 0.2640440885424596, "grad_norm": 1.1172306240221723, "learning_rate": 4.977183958958459e-06, "loss": 0.4015, "step": 4336 }, { "epoch": 0.26410498431933743, "grad_norm": 1.0161273181749533, "learning_rate": 4.977173201666039e-06, "loss": 0.5514, "step": 4337 }, { "epoch": 0.26416588009621533, "grad_norm": 0.9584198544726118, "learning_rate": 4.977162441849926e-06, "loss": 0.5385, "step": 4338 }, { "epoch": 0.2642267758730932, "grad_norm": 0.9640264754177881, "learning_rate": 4.977151679510132e-06, "loss": 0.4803, "step": 4339 }, { "epoch": 0.2642876716499711, "grad_norm": 1.032633862675662, "learning_rate": 4.977140914646667e-06, "loss": 0.4715, "step": 4340 }, { "epoch": 0.2643485674268489, "grad_norm": 1.0094407181409029, "learning_rate": 4.977130147259542e-06, "loss": 0.5096, "step": 4341 }, { "epoch": 0.2644094632037268, "grad_norm": 1.0082669080801097, "learning_rate": 4.977119377348769e-06, "loss": 0.4645, "step": 4342 }, { "epoch": 0.26447035898060467, "grad_norm": 1.027447285772334, "learning_rate": 4.977108604914358e-06, "loss": 0.5001, "step": 4343 }, { "epoch": 0.26453125475748257, "grad_norm": 1.0065727171629335, "learning_rate": 4.97709782995632e-06, "loss": 0.4945, "step": 4344 }, { "epoch": 0.2645921505343604, "grad_norm": 1.114292049934933, "learning_rate": 4.9770870524746665e-06, "loss": 0.4696, "step": 4345 }, { "epoch": 0.2646530463112383, "grad_norm": 1.0552448988805994, "learning_rate": 4.9770762724694075e-06, "loss": 0.4895, "step": 4346 }, { "epoch": 0.2647139420881162, "grad_norm": 1.1037957951529809, "learning_rate": 4.977065489940555e-06, "loss": 0.5408, "step": 4347 }, { "epoch": 0.26477483786499406, "grad_norm": 1.097299755292922, "learning_rate": 4.977054704888121e-06, "loss": 0.4763, "step": 4348 }, { "epoch": 0.26483573364187196, "grad_norm": 1.0743878331203938, "learning_rate": 4.977043917312114e-06, "loss": 0.4896, "step": 4349 }, { "epoch": 0.2648966294187498, "grad_norm": 1.0150852482870096, "learning_rate": 4.977033127212546e-06, "loss": 0.4762, "step": 4350 }, { "epoch": 0.2649575251956277, "grad_norm": 1.0086515029591878, "learning_rate": 4.977022334589429e-06, "loss": 0.4339, "step": 4351 }, { "epoch": 0.26501842097250555, "grad_norm": 0.9987272172733684, "learning_rate": 4.977011539442772e-06, "loss": 0.5528, "step": 4352 }, { "epoch": 0.26507931674938345, "grad_norm": 1.054165274425705, "learning_rate": 4.977000741772588e-06, "loss": 0.4715, "step": 4353 }, { "epoch": 0.2651402125262613, "grad_norm": 1.0419955603961981, "learning_rate": 4.976989941578887e-06, "loss": 0.5373, "step": 4354 }, { "epoch": 0.2652011083031392, "grad_norm": 1.0321080502177797, "learning_rate": 4.97697913886168e-06, "loss": 0.4814, "step": 4355 }, { "epoch": 0.26526200408001704, "grad_norm": 1.0810933561846277, "learning_rate": 4.976968333620979e-06, "loss": 0.4827, "step": 4356 }, { "epoch": 0.26532289985689494, "grad_norm": 1.0207253521093855, "learning_rate": 4.976957525856792e-06, "loss": 0.5221, "step": 4357 }, { "epoch": 0.2653837956337728, "grad_norm": 1.17321367054963, "learning_rate": 4.976946715569134e-06, "loss": 0.4556, "step": 4358 }, { "epoch": 0.2654446914106507, "grad_norm": 1.0147553963300695, "learning_rate": 4.976935902758013e-06, "loss": 0.571, "step": 4359 }, { "epoch": 0.26550558718752854, "grad_norm": 1.020235458825297, "learning_rate": 4.9769250874234426e-06, "loss": 0.451, "step": 4360 }, { "epoch": 0.26556648296440644, "grad_norm": 1.14808281358581, "learning_rate": 4.9769142695654315e-06, "loss": 0.4002, "step": 4361 }, { "epoch": 0.2656273787412843, "grad_norm": 1.0251636258269496, "learning_rate": 4.9769034491839915e-06, "loss": 0.5079, "step": 4362 }, { "epoch": 0.2656882745181622, "grad_norm": 1.1266061794202493, "learning_rate": 4.9768926262791345e-06, "loss": 0.4378, "step": 4363 }, { "epoch": 0.26574917029504, "grad_norm": 1.0865826008875394, "learning_rate": 4.97688180085087e-06, "loss": 0.54, "step": 4364 }, { "epoch": 0.2658100660719179, "grad_norm": 1.0139983573162212, "learning_rate": 4.97687097289921e-06, "loss": 0.5011, "step": 4365 }, { "epoch": 0.2658709618487958, "grad_norm": 0.9772287862586538, "learning_rate": 4.976860142424166e-06, "loss": 0.5427, "step": 4366 }, { "epoch": 0.2659318576256737, "grad_norm": 1.0268370817171697, "learning_rate": 4.976849309425749e-06, "loss": 0.4734, "step": 4367 }, { "epoch": 0.2659927534025515, "grad_norm": 1.0819112876359447, "learning_rate": 4.976838473903968e-06, "loss": 0.454, "step": 4368 }, { "epoch": 0.2660536491794294, "grad_norm": 1.0770013827128522, "learning_rate": 4.976827635858835e-06, "loss": 0.4988, "step": 4369 }, { "epoch": 0.26611454495630726, "grad_norm": 1.0540707284022202, "learning_rate": 4.976816795290363e-06, "loss": 0.4419, "step": 4370 }, { "epoch": 0.26617544073318516, "grad_norm": 1.1372541228406503, "learning_rate": 4.9768059521985605e-06, "loss": 0.4492, "step": 4371 }, { "epoch": 0.266236336510063, "grad_norm": 1.110961564554303, "learning_rate": 4.97679510658344e-06, "loss": 0.4698, "step": 4372 }, { "epoch": 0.2662972322869409, "grad_norm": 1.0070681981635305, "learning_rate": 4.976784258445012e-06, "loss": 0.4946, "step": 4373 }, { "epoch": 0.26635812806381876, "grad_norm": 1.0196780582753202, "learning_rate": 4.976773407783288e-06, "loss": 0.4841, "step": 4374 }, { "epoch": 0.26641902384069666, "grad_norm": 1.0420550140186162, "learning_rate": 4.976762554598279e-06, "loss": 0.4326, "step": 4375 }, { "epoch": 0.2664799196175745, "grad_norm": 1.0409552586153321, "learning_rate": 4.976751698889995e-06, "loss": 0.4636, "step": 4376 }, { "epoch": 0.2665408153944524, "grad_norm": 1.0165885898830007, "learning_rate": 4.976740840658448e-06, "loss": 0.4777, "step": 4377 }, { "epoch": 0.26660171117133025, "grad_norm": 1.0975648681363115, "learning_rate": 4.976729979903649e-06, "loss": 0.4756, "step": 4378 }, { "epoch": 0.26666260694820815, "grad_norm": 1.0695407608266332, "learning_rate": 4.976719116625609e-06, "loss": 0.4242, "step": 4379 }, { "epoch": 0.266723502725086, "grad_norm": 1.0340389145936968, "learning_rate": 4.9767082508243395e-06, "loss": 0.5151, "step": 4380 }, { "epoch": 0.2667843985019639, "grad_norm": 1.0068532637288095, "learning_rate": 4.976697382499851e-06, "loss": 0.4863, "step": 4381 }, { "epoch": 0.26684529427884174, "grad_norm": 1.0064594888792282, "learning_rate": 4.976686511652154e-06, "loss": 0.4761, "step": 4382 }, { "epoch": 0.26690619005571964, "grad_norm": 1.0349332565802773, "learning_rate": 4.976675638281261e-06, "loss": 0.5031, "step": 4383 }, { "epoch": 0.2669670858325975, "grad_norm": 1.0568372862624784, "learning_rate": 4.976664762387182e-06, "loss": 0.449, "step": 4384 }, { "epoch": 0.2670279816094754, "grad_norm": 1.1411673248029799, "learning_rate": 4.976653883969929e-06, "loss": 0.5113, "step": 4385 }, { "epoch": 0.26708887738635323, "grad_norm": 1.1340002754458671, "learning_rate": 4.9766430030295125e-06, "loss": 0.4356, "step": 4386 }, { "epoch": 0.26714977316323113, "grad_norm": 1.138906348827003, "learning_rate": 4.976632119565943e-06, "loss": 0.4357, "step": 4387 }, { "epoch": 0.26721066894010903, "grad_norm": 1.014261505582291, "learning_rate": 4.976621233579232e-06, "loss": 0.449, "step": 4388 }, { "epoch": 0.2672715647169869, "grad_norm": 1.050366961464326, "learning_rate": 4.976610345069391e-06, "loss": 0.5458, "step": 4389 }, { "epoch": 0.2673324604938648, "grad_norm": 1.0603582387501844, "learning_rate": 4.976599454036431e-06, "loss": 0.4201, "step": 4390 }, { "epoch": 0.2673933562707426, "grad_norm": 1.062619885434541, "learning_rate": 4.976588560480363e-06, "loss": 0.474, "step": 4391 }, { "epoch": 0.2674542520476205, "grad_norm": 1.0168994942941592, "learning_rate": 4.976577664401197e-06, "loss": 0.545, "step": 4392 }, { "epoch": 0.26751514782449837, "grad_norm": 0.9406511130832385, "learning_rate": 4.976566765798947e-06, "loss": 0.4931, "step": 4393 }, { "epoch": 0.26757604360137627, "grad_norm": 1.0113987021329334, "learning_rate": 4.9765558646736215e-06, "loss": 0.4644, "step": 4394 }, { "epoch": 0.2676369393782541, "grad_norm": 1.017110493534631, "learning_rate": 4.976544961025233e-06, "loss": 0.4504, "step": 4395 }, { "epoch": 0.267697835155132, "grad_norm": 1.0878250064427246, "learning_rate": 4.976534054853791e-06, "loss": 0.4683, "step": 4396 }, { "epoch": 0.26775873093200986, "grad_norm": 1.0258750537128132, "learning_rate": 4.976523146159308e-06, "loss": 0.4857, "step": 4397 }, { "epoch": 0.26781962670888776, "grad_norm": 1.0107425443028046, "learning_rate": 4.976512234941795e-06, "loss": 0.4608, "step": 4398 }, { "epoch": 0.2678805224857656, "grad_norm": 0.9745965241548192, "learning_rate": 4.9765013212012615e-06, "loss": 0.4939, "step": 4399 }, { "epoch": 0.2679414182626435, "grad_norm": 1.0459573942415852, "learning_rate": 4.976490404937721e-06, "loss": 0.4888, "step": 4400 }, { "epoch": 0.26800231403952135, "grad_norm": 1.019864783110917, "learning_rate": 4.9764794861511836e-06, "loss": 0.4607, "step": 4401 }, { "epoch": 0.26806320981639925, "grad_norm": 1.1156308737967318, "learning_rate": 4.97646856484166e-06, "loss": 0.5177, "step": 4402 }, { "epoch": 0.2681241055932771, "grad_norm": 1.0773627886316834, "learning_rate": 4.976457641009163e-06, "loss": 0.5894, "step": 4403 }, { "epoch": 0.268185001370155, "grad_norm": 1.1219653729338495, "learning_rate": 4.9764467146537e-06, "loss": 0.4862, "step": 4404 }, { "epoch": 0.26824589714703284, "grad_norm": 0.9912662278053233, "learning_rate": 4.976435785775287e-06, "loss": 0.4043, "step": 4405 }, { "epoch": 0.26830679292391074, "grad_norm": 0.9823827410536216, "learning_rate": 4.976424854373931e-06, "loss": 0.5294, "step": 4406 }, { "epoch": 0.2683676887007886, "grad_norm": 1.0657462305039678, "learning_rate": 4.976413920449646e-06, "loss": 0.5092, "step": 4407 }, { "epoch": 0.2684285844776665, "grad_norm": 0.9829879409521822, "learning_rate": 4.9764029840024404e-06, "loss": 0.4739, "step": 4408 }, { "epoch": 0.26848948025454433, "grad_norm": 0.9770474846633947, "learning_rate": 4.976392045032329e-06, "loss": 0.5325, "step": 4409 }, { "epoch": 0.26855037603142223, "grad_norm": 1.0558584206208204, "learning_rate": 4.9763811035393195e-06, "loss": 0.503, "step": 4410 }, { "epoch": 0.2686112718083001, "grad_norm": 1.0009338150408398, "learning_rate": 4.976370159523425e-06, "loss": 0.5022, "step": 4411 }, { "epoch": 0.268672167585178, "grad_norm": 1.1033997551796788, "learning_rate": 4.976359212984656e-06, "loss": 0.434, "step": 4412 }, { "epoch": 0.2687330633620558, "grad_norm": 1.1271835147992066, "learning_rate": 4.976348263923023e-06, "loss": 0.4828, "step": 4413 }, { "epoch": 0.2687939591389337, "grad_norm": 1.1134581710673057, "learning_rate": 4.976337312338539e-06, "loss": 0.5035, "step": 4414 }, { "epoch": 0.26885485491581157, "grad_norm": 1.0817602792966154, "learning_rate": 4.976326358231214e-06, "loss": 0.4897, "step": 4415 }, { "epoch": 0.26891575069268947, "grad_norm": 1.1142431907666506, "learning_rate": 4.976315401601058e-06, "loss": 0.4613, "step": 4416 }, { "epoch": 0.2689766464695673, "grad_norm": 1.097183010557061, "learning_rate": 4.976304442448085e-06, "loss": 0.4789, "step": 4417 }, { "epoch": 0.2690375422464452, "grad_norm": 0.9964858381201339, "learning_rate": 4.976293480772303e-06, "loss": 0.5264, "step": 4418 }, { "epoch": 0.26909843802332306, "grad_norm": 1.0682460826581925, "learning_rate": 4.976282516573725e-06, "loss": 0.4246, "step": 4419 }, { "epoch": 0.26915933380020096, "grad_norm": 1.0084504539440458, "learning_rate": 4.976271549852362e-06, "loss": 0.5377, "step": 4420 }, { "epoch": 0.2692202295770788, "grad_norm": 1.075596665143942, "learning_rate": 4.976260580608225e-06, "loss": 0.5317, "step": 4421 }, { "epoch": 0.2692811253539567, "grad_norm": 1.005750424669973, "learning_rate": 4.976249608841326e-06, "loss": 0.478, "step": 4422 }, { "epoch": 0.26934202113083455, "grad_norm": 1.0433690481972324, "learning_rate": 4.976238634551674e-06, "loss": 0.4681, "step": 4423 }, { "epoch": 0.26940291690771245, "grad_norm": 1.0288650069469758, "learning_rate": 4.9762276577392825e-06, "loss": 0.439, "step": 4424 }, { "epoch": 0.2694638126845903, "grad_norm": 0.9814101075371204, "learning_rate": 4.976216678404161e-06, "loss": 0.522, "step": 4425 }, { "epoch": 0.2695247084614682, "grad_norm": 1.0458428259281112, "learning_rate": 4.976205696546322e-06, "loss": 0.541, "step": 4426 }, { "epoch": 0.26958560423834604, "grad_norm": 1.079700616128, "learning_rate": 4.976194712165776e-06, "loss": 0.4967, "step": 4427 }, { "epoch": 0.26964650001522394, "grad_norm": 1.0272168999256106, "learning_rate": 4.976183725262535e-06, "loss": 0.4508, "step": 4428 }, { "epoch": 0.26970739579210185, "grad_norm": 1.059247503342494, "learning_rate": 4.976172735836608e-06, "loss": 0.4244, "step": 4429 }, { "epoch": 0.2697682915689797, "grad_norm": 0.956842662043063, "learning_rate": 4.9761617438880096e-06, "loss": 0.4745, "step": 4430 }, { "epoch": 0.2698291873458576, "grad_norm": 1.0662412094258193, "learning_rate": 4.9761507494167485e-06, "loss": 0.4084, "step": 4431 }, { "epoch": 0.26989008312273544, "grad_norm": 1.1112081527484614, "learning_rate": 4.976139752422835e-06, "loss": 0.5512, "step": 4432 }, { "epoch": 0.26995097889961334, "grad_norm": 0.970206655778794, "learning_rate": 4.9761287529062835e-06, "loss": 0.4458, "step": 4433 }, { "epoch": 0.2700118746764912, "grad_norm": 1.1203493732330476, "learning_rate": 4.9761177508671024e-06, "loss": 0.5057, "step": 4434 }, { "epoch": 0.2700727704533691, "grad_norm": 1.1457439143542683, "learning_rate": 4.976106746305305e-06, "loss": 0.59, "step": 4435 }, { "epoch": 0.2701336662302469, "grad_norm": 1.0613666475326669, "learning_rate": 4.976095739220901e-06, "loss": 0.4542, "step": 4436 }, { "epoch": 0.2701945620071248, "grad_norm": 1.0464802081409736, "learning_rate": 4.976084729613902e-06, "loss": 0.4988, "step": 4437 }, { "epoch": 0.2702554577840027, "grad_norm": 1.0564456692772686, "learning_rate": 4.97607371748432e-06, "loss": 0.5161, "step": 4438 }, { "epoch": 0.2703163535608806, "grad_norm": 1.072540959654046, "learning_rate": 4.976062702832165e-06, "loss": 0.4231, "step": 4439 }, { "epoch": 0.2703772493377584, "grad_norm": 0.9824362664896312, "learning_rate": 4.976051685657449e-06, "loss": 0.5169, "step": 4440 }, { "epoch": 0.2704381451146363, "grad_norm": 1.0788906647134073, "learning_rate": 4.976040665960182e-06, "loss": 0.4993, "step": 4441 }, { "epoch": 0.27049904089151416, "grad_norm": 1.082051669160645, "learning_rate": 4.9760296437403775e-06, "loss": 0.4676, "step": 4442 }, { "epoch": 0.27055993666839206, "grad_norm": 1.0389767547839033, "learning_rate": 4.976018618998045e-06, "loss": 0.4852, "step": 4443 }, { "epoch": 0.2706208324452699, "grad_norm": 1.086810987832344, "learning_rate": 4.976007591733196e-06, "loss": 0.4446, "step": 4444 }, { "epoch": 0.2706817282221478, "grad_norm": 1.0343512729732083, "learning_rate": 4.975996561945843e-06, "loss": 0.4739, "step": 4445 }, { "epoch": 0.27074262399902566, "grad_norm": 0.9537894730798898, "learning_rate": 4.975985529635995e-06, "loss": 0.4604, "step": 4446 }, { "epoch": 0.27080351977590356, "grad_norm": 1.0691686146989552, "learning_rate": 4.975974494803665e-06, "loss": 0.4685, "step": 4447 }, { "epoch": 0.2708644155527814, "grad_norm": 1.0913486304050501, "learning_rate": 4.975963457448863e-06, "loss": 0.4596, "step": 4448 }, { "epoch": 0.2709253113296593, "grad_norm": 1.0068630415888546, "learning_rate": 4.975952417571602e-06, "loss": 0.5686, "step": 4449 }, { "epoch": 0.27098620710653715, "grad_norm": 1.0215947306536421, "learning_rate": 4.975941375171892e-06, "loss": 0.4959, "step": 4450 }, { "epoch": 0.27104710288341505, "grad_norm": 1.0803173162270017, "learning_rate": 4.975930330249744e-06, "loss": 0.4797, "step": 4451 }, { "epoch": 0.2711079986602929, "grad_norm": 1.0426949842015742, "learning_rate": 4.9759192828051695e-06, "loss": 0.5442, "step": 4452 }, { "epoch": 0.2711688944371708, "grad_norm": 1.1503672825543385, "learning_rate": 4.97590823283818e-06, "loss": 0.447, "step": 4453 }, { "epoch": 0.27122979021404864, "grad_norm": 1.013373296147792, "learning_rate": 4.975897180348787e-06, "loss": 0.5112, "step": 4454 }, { "epoch": 0.27129068599092654, "grad_norm": 1.1027728736857432, "learning_rate": 4.975886125337001e-06, "loss": 0.4525, "step": 4455 }, { "epoch": 0.2713515817678044, "grad_norm": 0.9213065596685084, "learning_rate": 4.975875067802834e-06, "loss": 0.4908, "step": 4456 }, { "epoch": 0.2714124775446823, "grad_norm": 0.9858457999571967, "learning_rate": 4.9758640077462975e-06, "loss": 0.4937, "step": 4457 }, { "epoch": 0.27147337332156013, "grad_norm": 0.9929199108853294, "learning_rate": 4.975852945167401e-06, "loss": 0.5213, "step": 4458 }, { "epoch": 0.27153426909843803, "grad_norm": 1.0103972904654441, "learning_rate": 4.975841880066158e-06, "loss": 0.4619, "step": 4459 }, { "epoch": 0.2715951648753159, "grad_norm": 1.1867365452642582, "learning_rate": 4.975830812442578e-06, "loss": 0.4135, "step": 4460 }, { "epoch": 0.2716560606521938, "grad_norm": 1.0384594884672478, "learning_rate": 4.975819742296673e-06, "loss": 0.4933, "step": 4461 }, { "epoch": 0.2717169564290716, "grad_norm": 1.0068192119822117, "learning_rate": 4.975808669628454e-06, "loss": 0.4558, "step": 4462 }, { "epoch": 0.2717778522059495, "grad_norm": 0.9872926569081232, "learning_rate": 4.975797594437934e-06, "loss": 0.543, "step": 4463 }, { "epoch": 0.27183874798282737, "grad_norm": 1.0941928090846957, "learning_rate": 4.975786516725122e-06, "loss": 0.4674, "step": 4464 }, { "epoch": 0.27189964375970527, "grad_norm": 1.1215934125587363, "learning_rate": 4.97577543649003e-06, "loss": 0.4948, "step": 4465 }, { "epoch": 0.2719605395365831, "grad_norm": 1.0610809233759988, "learning_rate": 4.97576435373267e-06, "loss": 0.4614, "step": 4466 }, { "epoch": 0.272021435313461, "grad_norm": 1.0057548612492557, "learning_rate": 4.975753268453053e-06, "loss": 0.58, "step": 4467 }, { "epoch": 0.27208233109033886, "grad_norm": 1.0944960883320163, "learning_rate": 4.975742180651189e-06, "loss": 0.5156, "step": 4468 }, { "epoch": 0.27214322686721676, "grad_norm": 1.048478898087235, "learning_rate": 4.9757310903270915e-06, "loss": 0.5405, "step": 4469 }, { "epoch": 0.27220412264409466, "grad_norm": 1.1215155811322124, "learning_rate": 4.97571999748077e-06, "loss": 0.395, "step": 4470 }, { "epoch": 0.2722650184209725, "grad_norm": 1.0629578972070972, "learning_rate": 4.975708902112236e-06, "loss": 0.4951, "step": 4471 }, { "epoch": 0.2723259141978504, "grad_norm": 1.0102856157072042, "learning_rate": 4.975697804221502e-06, "loss": 0.5028, "step": 4472 }, { "epoch": 0.27238680997472825, "grad_norm": 1.0748887604062813, "learning_rate": 4.975686703808578e-06, "loss": 0.4456, "step": 4473 }, { "epoch": 0.27244770575160615, "grad_norm": 0.9946581293670533, "learning_rate": 4.9756756008734765e-06, "loss": 0.5488, "step": 4474 }, { "epoch": 0.272508601528484, "grad_norm": 1.0734974048784114, "learning_rate": 4.975664495416208e-06, "loss": 0.5052, "step": 4475 }, { "epoch": 0.2725694973053619, "grad_norm": 1.0072654775129275, "learning_rate": 4.975653387436784e-06, "loss": 0.4897, "step": 4476 }, { "epoch": 0.27263039308223974, "grad_norm": 0.9879429429312616, "learning_rate": 4.975642276935215e-06, "loss": 0.5322, "step": 4477 }, { "epoch": 0.27269128885911764, "grad_norm": 0.9897476303302353, "learning_rate": 4.975631163911514e-06, "loss": 0.5422, "step": 4478 }, { "epoch": 0.2727521846359955, "grad_norm": 1.1103495428579688, "learning_rate": 4.975620048365691e-06, "loss": 0.4914, "step": 4479 }, { "epoch": 0.2728130804128734, "grad_norm": 1.1414974600240053, "learning_rate": 4.975608930297758e-06, "loss": 0.4038, "step": 4480 }, { "epoch": 0.27287397618975123, "grad_norm": 1.0694029678091672, "learning_rate": 4.975597809707726e-06, "loss": 0.4682, "step": 4481 }, { "epoch": 0.27293487196662913, "grad_norm": 1.0842356050777733, "learning_rate": 4.975586686595606e-06, "loss": 0.4373, "step": 4482 }, { "epoch": 0.272995767743507, "grad_norm": 1.0574403291024899, "learning_rate": 4.97557556096141e-06, "loss": 0.5496, "step": 4483 }, { "epoch": 0.2730566635203849, "grad_norm": 1.018783336696444, "learning_rate": 4.97556443280515e-06, "loss": 0.5104, "step": 4484 }, { "epoch": 0.2731175592972627, "grad_norm": 1.0108983451075466, "learning_rate": 4.975553302126835e-06, "loss": 0.5155, "step": 4485 }, { "epoch": 0.2731784550741406, "grad_norm": 1.025428177881471, "learning_rate": 4.975542168926478e-06, "loss": 0.4805, "step": 4486 }, { "epoch": 0.27323935085101847, "grad_norm": 0.9457978776539547, "learning_rate": 4.975531033204091e-06, "loss": 0.4838, "step": 4487 }, { "epoch": 0.27330024662789637, "grad_norm": 1.138844057285715, "learning_rate": 4.9755198949596845e-06, "loss": 0.4427, "step": 4488 }, { "epoch": 0.2733611424047742, "grad_norm": 1.1383000024592502, "learning_rate": 4.975508754193269e-06, "loss": 0.388, "step": 4489 }, { "epoch": 0.2734220381816521, "grad_norm": 0.998222388175142, "learning_rate": 4.975497610904857e-06, "loss": 0.5157, "step": 4490 }, { "epoch": 0.27348293395852996, "grad_norm": 1.0095845720604335, "learning_rate": 4.97548646509446e-06, "loss": 0.4487, "step": 4491 }, { "epoch": 0.27354382973540786, "grad_norm": 1.0074487096671911, "learning_rate": 4.975475316762088e-06, "loss": 0.5181, "step": 4492 }, { "epoch": 0.2736047255122857, "grad_norm": 1.0431429768644922, "learning_rate": 4.975464165907754e-06, "loss": 0.4747, "step": 4493 }, { "epoch": 0.2736656212891636, "grad_norm": 1.1419738572300158, "learning_rate": 4.975453012531468e-06, "loss": 0.5005, "step": 4494 }, { "epoch": 0.27372651706604145, "grad_norm": 1.0343185248817903, "learning_rate": 4.975441856633242e-06, "loss": 0.4625, "step": 4495 }, { "epoch": 0.27378741284291935, "grad_norm": 0.9946557946683363, "learning_rate": 4.975430698213086e-06, "loss": 0.4766, "step": 4496 }, { "epoch": 0.2738483086197972, "grad_norm": 1.0474812329227, "learning_rate": 4.975419537271015e-06, "loss": 0.4548, "step": 4497 }, { "epoch": 0.2739092043966751, "grad_norm": 0.9489301512931004, "learning_rate": 4.9754083738070365e-06, "loss": 0.5525, "step": 4498 }, { "epoch": 0.27397010017355294, "grad_norm": 0.9692453461615222, "learning_rate": 4.975397207821164e-06, "loss": 0.4701, "step": 4499 }, { "epoch": 0.27403099595043084, "grad_norm": 1.0153891493953173, "learning_rate": 4.975386039313409e-06, "loss": 0.5228, "step": 4500 }, { "epoch": 0.2740918917273087, "grad_norm": 1.072565542563807, "learning_rate": 4.975374868283781e-06, "loss": 0.4811, "step": 4501 }, { "epoch": 0.2741527875041866, "grad_norm": 1.1572040245207893, "learning_rate": 4.9753636947322935e-06, "loss": 0.4042, "step": 4502 }, { "epoch": 0.27421368328106444, "grad_norm": 1.0534981826597418, "learning_rate": 4.975352518658956e-06, "loss": 0.4748, "step": 4503 }, { "epoch": 0.27427457905794234, "grad_norm": 1.1459813113207349, "learning_rate": 4.975341340063781e-06, "loss": 0.4426, "step": 4504 }, { "epoch": 0.2743354748348202, "grad_norm": 1.0087890415508596, "learning_rate": 4.97533015894678e-06, "loss": 0.508, "step": 4505 }, { "epoch": 0.2743963706116981, "grad_norm": 0.9553312958911184, "learning_rate": 4.975318975307965e-06, "loss": 0.5137, "step": 4506 }, { "epoch": 0.2744572663885759, "grad_norm": 1.042179807229924, "learning_rate": 4.975307789147345e-06, "loss": 0.4656, "step": 4507 }, { "epoch": 0.2745181621654538, "grad_norm": 1.049033759414271, "learning_rate": 4.975296600464933e-06, "loss": 0.456, "step": 4508 }, { "epoch": 0.2745790579423317, "grad_norm": 0.9821456174632861, "learning_rate": 4.975285409260741e-06, "loss": 0.5966, "step": 4509 }, { "epoch": 0.2746399537192096, "grad_norm": 1.0007379512346062, "learning_rate": 4.975274215534779e-06, "loss": 0.548, "step": 4510 }, { "epoch": 0.2747008494960875, "grad_norm": 1.0890924094953054, "learning_rate": 4.97526301928706e-06, "loss": 0.5104, "step": 4511 }, { "epoch": 0.2747617452729653, "grad_norm": 0.9266339541865934, "learning_rate": 4.975251820517594e-06, "loss": 0.5375, "step": 4512 }, { "epoch": 0.2748226410498432, "grad_norm": 1.1183458758568983, "learning_rate": 4.975240619226393e-06, "loss": 0.4478, "step": 4513 }, { "epoch": 0.27488353682672106, "grad_norm": 1.0053628935260082, "learning_rate": 4.975229415413467e-06, "loss": 0.4352, "step": 4514 }, { "epoch": 0.27494443260359896, "grad_norm": 1.03478844979292, "learning_rate": 4.9752182090788305e-06, "loss": 0.4888, "step": 4515 }, { "epoch": 0.2750053283804768, "grad_norm": 0.9893724614170148, "learning_rate": 4.975207000222493e-06, "loss": 0.5361, "step": 4516 }, { "epoch": 0.2750662241573547, "grad_norm": 1.1779556499017647, "learning_rate": 4.975195788844465e-06, "loss": 0.4826, "step": 4517 }, { "epoch": 0.27512711993423256, "grad_norm": 1.0476268180417607, "learning_rate": 4.97518457494476e-06, "loss": 0.4631, "step": 4518 }, { "epoch": 0.27518801571111046, "grad_norm": 1.178884416014151, "learning_rate": 4.975173358523387e-06, "loss": 0.5538, "step": 4519 }, { "epoch": 0.2752489114879883, "grad_norm": 1.1139300601536508, "learning_rate": 4.975162139580361e-06, "loss": 0.5183, "step": 4520 }, { "epoch": 0.2753098072648662, "grad_norm": 1.1238221489133138, "learning_rate": 4.97515091811569e-06, "loss": 0.4569, "step": 4521 }, { "epoch": 0.27537070304174405, "grad_norm": 1.0914274173155876, "learning_rate": 4.975139694129387e-06, "loss": 0.4458, "step": 4522 }, { "epoch": 0.27543159881862195, "grad_norm": 1.0329476485317803, "learning_rate": 4.975128467621463e-06, "loss": 0.5038, "step": 4523 }, { "epoch": 0.2754924945954998, "grad_norm": 0.931211401851377, "learning_rate": 4.9751172385919296e-06, "loss": 0.5629, "step": 4524 }, { "epoch": 0.2755533903723777, "grad_norm": 1.0348829529104817, "learning_rate": 4.975106007040798e-06, "loss": 0.47, "step": 4525 }, { "epoch": 0.27561428614925554, "grad_norm": 1.0094432060740046, "learning_rate": 4.97509477296808e-06, "loss": 0.4989, "step": 4526 }, { "epoch": 0.27567518192613344, "grad_norm": 0.9986655877443573, "learning_rate": 4.975083536373787e-06, "loss": 0.4786, "step": 4527 }, { "epoch": 0.2757360777030113, "grad_norm": 1.0281978840569816, "learning_rate": 4.97507229725793e-06, "loss": 0.4466, "step": 4528 }, { "epoch": 0.2757969734798892, "grad_norm": 1.0212676743550224, "learning_rate": 4.975061055620522e-06, "loss": 0.4868, "step": 4529 }, { "epoch": 0.27585786925676703, "grad_norm": 1.026343146217889, "learning_rate": 4.975049811461572e-06, "loss": 0.5076, "step": 4530 }, { "epoch": 0.27591876503364493, "grad_norm": 0.9353891155087947, "learning_rate": 4.975038564781093e-06, "loss": 0.5939, "step": 4531 }, { "epoch": 0.2759796608105228, "grad_norm": 1.066934029665653, "learning_rate": 4.975027315579096e-06, "loss": 0.4703, "step": 4532 }, { "epoch": 0.2760405565874007, "grad_norm": 1.0208765227067973, "learning_rate": 4.975016063855593e-06, "loss": 0.4518, "step": 4533 }, { "epoch": 0.2761014523642785, "grad_norm": 1.1329676531452095, "learning_rate": 4.975004809610595e-06, "loss": 0.4185, "step": 4534 }, { "epoch": 0.2761623481411564, "grad_norm": 1.0720534721928394, "learning_rate": 4.974993552844113e-06, "loss": 0.466, "step": 4535 }, { "epoch": 0.27622324391803427, "grad_norm": 1.0786891694593534, "learning_rate": 4.97498229355616e-06, "loss": 0.4909, "step": 4536 }, { "epoch": 0.27628413969491217, "grad_norm": 1.1212879032191305, "learning_rate": 4.974971031746746e-06, "loss": 0.4928, "step": 4537 }, { "epoch": 0.27634503547179, "grad_norm": 1.028677454906405, "learning_rate": 4.974959767415883e-06, "loss": 0.4307, "step": 4538 }, { "epoch": 0.2764059312486679, "grad_norm": 1.1351870470963519, "learning_rate": 4.9749485005635824e-06, "loss": 0.5501, "step": 4539 }, { "epoch": 0.27646682702554576, "grad_norm": 1.0686052551647105, "learning_rate": 4.974937231189856e-06, "loss": 0.4738, "step": 4540 }, { "epoch": 0.27652772280242366, "grad_norm": 0.9361727731960461, "learning_rate": 4.974925959294714e-06, "loss": 0.5297, "step": 4541 }, { "epoch": 0.2765886185793015, "grad_norm": 1.0698132580030086, "learning_rate": 4.974914684878171e-06, "loss": 0.5101, "step": 4542 }, { "epoch": 0.2766495143561794, "grad_norm": 1.060675256750301, "learning_rate": 4.974903407940235e-06, "loss": 0.5332, "step": 4543 }, { "epoch": 0.27671041013305725, "grad_norm": 1.1309975213652022, "learning_rate": 4.974892128480918e-06, "loss": 0.4687, "step": 4544 }, { "epoch": 0.27677130590993515, "grad_norm": 1.009036892808231, "learning_rate": 4.974880846500234e-06, "loss": 0.4748, "step": 4545 }, { "epoch": 0.276832201686813, "grad_norm": 1.0449230789285644, "learning_rate": 4.9748695619981915e-06, "loss": 0.4929, "step": 4546 }, { "epoch": 0.2768930974636909, "grad_norm": 1.0199089003497832, "learning_rate": 4.974858274974804e-06, "loss": 0.5441, "step": 4547 }, { "epoch": 0.27695399324056874, "grad_norm": 1.1434815154998659, "learning_rate": 4.9748469854300825e-06, "loss": 0.4949, "step": 4548 }, { "epoch": 0.27701488901744664, "grad_norm": 1.1440369611154046, "learning_rate": 4.974835693364038e-06, "loss": 0.4367, "step": 4549 }, { "epoch": 0.2770757847943245, "grad_norm": 1.0273528228783757, "learning_rate": 4.974824398776683e-06, "loss": 0.4976, "step": 4550 }, { "epoch": 0.2771366805712024, "grad_norm": 1.0716610519976235, "learning_rate": 4.974813101668027e-06, "loss": 0.4989, "step": 4551 }, { "epoch": 0.2771975763480803, "grad_norm": 1.0245322017150305, "learning_rate": 4.974801802038084e-06, "loss": 0.4785, "step": 4552 }, { "epoch": 0.27725847212495813, "grad_norm": 1.0798005224087914, "learning_rate": 4.974790499886864e-06, "loss": 0.4733, "step": 4553 }, { "epoch": 0.27731936790183603, "grad_norm": 1.005031619947371, "learning_rate": 4.974779195214379e-06, "loss": 0.5116, "step": 4554 }, { "epoch": 0.2773802636787139, "grad_norm": 1.0305962255213543, "learning_rate": 4.974767888020641e-06, "loss": 0.5732, "step": 4555 }, { "epoch": 0.2774411594555918, "grad_norm": 1.1116007690004073, "learning_rate": 4.9747565783056595e-06, "loss": 0.4098, "step": 4556 }, { "epoch": 0.2775020552324696, "grad_norm": 0.924419994776228, "learning_rate": 4.974745266069449e-06, "loss": 0.5045, "step": 4557 }, { "epoch": 0.2775629510093475, "grad_norm": 0.9868637309408224, "learning_rate": 4.974733951312018e-06, "loss": 0.4744, "step": 4558 }, { "epoch": 0.27762384678622537, "grad_norm": 0.9203293663841893, "learning_rate": 4.974722634033381e-06, "loss": 0.4877, "step": 4559 }, { "epoch": 0.27768474256310327, "grad_norm": 1.0688045640254733, "learning_rate": 4.974711314233547e-06, "loss": 0.4234, "step": 4560 }, { "epoch": 0.2777456383399811, "grad_norm": 0.9720535469061055, "learning_rate": 4.9746999919125286e-06, "loss": 0.4952, "step": 4561 }, { "epoch": 0.277806534116859, "grad_norm": 1.06935647498941, "learning_rate": 4.974688667070337e-06, "loss": 0.4465, "step": 4562 }, { "epoch": 0.27786742989373686, "grad_norm": 1.1243863428298293, "learning_rate": 4.974677339706985e-06, "loss": 0.5234, "step": 4563 }, { "epoch": 0.27792832567061476, "grad_norm": 1.0386550860335932, "learning_rate": 4.974666009822483e-06, "loss": 0.5367, "step": 4564 }, { "epoch": 0.2779892214474926, "grad_norm": 1.07606205646196, "learning_rate": 4.9746546774168425e-06, "loss": 0.457, "step": 4565 }, { "epoch": 0.2780501172243705, "grad_norm": 1.0714993195717417, "learning_rate": 4.974643342490075e-06, "loss": 0.5629, "step": 4566 }, { "epoch": 0.27811101300124835, "grad_norm": 1.212352818662428, "learning_rate": 4.974632005042193e-06, "loss": 0.4697, "step": 4567 }, { "epoch": 0.27817190877812625, "grad_norm": 1.1103089346093213, "learning_rate": 4.974620665073206e-06, "loss": 0.472, "step": 4568 }, { "epoch": 0.2782328045550041, "grad_norm": 1.0328659888414227, "learning_rate": 4.974609322583128e-06, "loss": 0.4625, "step": 4569 }, { "epoch": 0.278293700331882, "grad_norm": 1.0610389328029726, "learning_rate": 4.9745979775719695e-06, "loss": 0.4863, "step": 4570 }, { "epoch": 0.27835459610875984, "grad_norm": 0.9907113266279443, "learning_rate": 4.974586630039742e-06, "loss": 0.5188, "step": 4571 }, { "epoch": 0.27841549188563774, "grad_norm": 1.1154091729269733, "learning_rate": 4.974575279986457e-06, "loss": 0.4004, "step": 4572 }, { "epoch": 0.2784763876625156, "grad_norm": 1.0258034593453, "learning_rate": 4.974563927412126e-06, "loss": 0.5028, "step": 4573 }, { "epoch": 0.2785372834393935, "grad_norm": 1.0085041328201212, "learning_rate": 4.974552572316761e-06, "loss": 0.4478, "step": 4574 }, { "epoch": 0.27859817921627134, "grad_norm": 0.9781428028855677, "learning_rate": 4.974541214700373e-06, "loss": 0.5524, "step": 4575 }, { "epoch": 0.27865907499314924, "grad_norm": 1.083171713083971, "learning_rate": 4.974529854562974e-06, "loss": 0.4561, "step": 4576 }, { "epoch": 0.2787199707700271, "grad_norm": 0.9827390806844892, "learning_rate": 4.974518491904575e-06, "loss": 0.513, "step": 4577 }, { "epoch": 0.278780866546905, "grad_norm": 1.026292423857707, "learning_rate": 4.974507126725189e-06, "loss": 0.48, "step": 4578 }, { "epoch": 0.2788417623237828, "grad_norm": 1.1178729913558718, "learning_rate": 4.974495759024826e-06, "loss": 0.5243, "step": 4579 }, { "epoch": 0.2789026581006607, "grad_norm": 1.1612994353227497, "learning_rate": 4.974484388803498e-06, "loss": 0.4897, "step": 4580 }, { "epoch": 0.2789635538775386, "grad_norm": 1.0539582294263017, "learning_rate": 4.974473016061217e-06, "loss": 0.4803, "step": 4581 }, { "epoch": 0.2790244496544165, "grad_norm": 1.0696493958122695, "learning_rate": 4.974461640797993e-06, "loss": 0.4452, "step": 4582 }, { "epoch": 0.2790853454312943, "grad_norm": 1.0796209148549096, "learning_rate": 4.974450263013841e-06, "loss": 0.4821, "step": 4583 }, { "epoch": 0.2791462412081722, "grad_norm": 1.0730082544227209, "learning_rate": 4.974438882708769e-06, "loss": 0.4551, "step": 4584 }, { "epoch": 0.27920713698505006, "grad_norm": 0.9596530709790342, "learning_rate": 4.974427499882791e-06, "loss": 0.4954, "step": 4585 }, { "epoch": 0.27926803276192796, "grad_norm": 0.9680698427867895, "learning_rate": 4.974416114535918e-06, "loss": 0.5363, "step": 4586 }, { "epoch": 0.2793289285388058, "grad_norm": 0.9623275461777904, "learning_rate": 4.9744047266681605e-06, "loss": 0.4728, "step": 4587 }, { "epoch": 0.2793898243156837, "grad_norm": 0.9929946937987327, "learning_rate": 4.974393336279531e-06, "loss": 0.4405, "step": 4588 }, { "epoch": 0.27945072009256156, "grad_norm": 1.0794427799341026, "learning_rate": 4.974381943370041e-06, "loss": 0.4346, "step": 4589 }, { "epoch": 0.27951161586943946, "grad_norm": 1.0202960529196963, "learning_rate": 4.974370547939703e-06, "loss": 0.4557, "step": 4590 }, { "epoch": 0.2795725116463173, "grad_norm": 1.1149402707996452, "learning_rate": 4.9743591499885265e-06, "loss": 0.4533, "step": 4591 }, { "epoch": 0.2796334074231952, "grad_norm": 0.9852305906007178, "learning_rate": 4.974347749516525e-06, "loss": 0.5502, "step": 4592 }, { "epoch": 0.2796943032000731, "grad_norm": 1.1350702105853916, "learning_rate": 4.974336346523709e-06, "loss": 0.4456, "step": 4593 }, { "epoch": 0.27975519897695095, "grad_norm": 1.1021820770173585, "learning_rate": 4.974324941010091e-06, "loss": 0.3837, "step": 4594 }, { "epoch": 0.27981609475382885, "grad_norm": 1.082470539502784, "learning_rate": 4.974313532975682e-06, "loss": 0.443, "step": 4595 }, { "epoch": 0.2798769905307067, "grad_norm": 1.053272779692528, "learning_rate": 4.974302122420494e-06, "loss": 0.4887, "step": 4596 }, { "epoch": 0.2799378863075846, "grad_norm": 1.10596466745438, "learning_rate": 4.974290709344538e-06, "loss": 0.4987, "step": 4597 }, { "epoch": 0.27999878208446244, "grad_norm": 1.0473945581376343, "learning_rate": 4.974279293747827e-06, "loss": 0.474, "step": 4598 }, { "epoch": 0.28005967786134034, "grad_norm": 1.0201581560993809, "learning_rate": 4.974267875630371e-06, "loss": 0.5588, "step": 4599 }, { "epoch": 0.2801205736382182, "grad_norm": 1.0866793330344389, "learning_rate": 4.974256454992182e-06, "loss": 0.5499, "step": 4600 }, { "epoch": 0.2801814694150961, "grad_norm": 1.068825589242229, "learning_rate": 4.974245031833273e-06, "loss": 0.4945, "step": 4601 }, { "epoch": 0.28024236519197393, "grad_norm": 1.0237197198136996, "learning_rate": 4.974233606153654e-06, "loss": 0.4696, "step": 4602 }, { "epoch": 0.28030326096885183, "grad_norm": 1.0888122428385953, "learning_rate": 4.974222177953337e-06, "loss": 0.4627, "step": 4603 }, { "epoch": 0.2803641567457297, "grad_norm": 1.0707359070700317, "learning_rate": 4.974210747232334e-06, "loss": 0.5273, "step": 4604 }, { "epoch": 0.2804250525226076, "grad_norm": 1.0980437875655291, "learning_rate": 4.974199313990657e-06, "loss": 0.5327, "step": 4605 }, { "epoch": 0.2804859482994854, "grad_norm": 1.131239624917363, "learning_rate": 4.974187878228317e-06, "loss": 0.5368, "step": 4606 }, { "epoch": 0.2805468440763633, "grad_norm": 1.000346198465179, "learning_rate": 4.9741764399453255e-06, "loss": 0.5162, "step": 4607 }, { "epoch": 0.28060773985324117, "grad_norm": 1.0427021515391084, "learning_rate": 4.974164999141695e-06, "loss": 0.4481, "step": 4608 }, { "epoch": 0.28066863563011907, "grad_norm": 1.0223011866827316, "learning_rate": 4.974153555817436e-06, "loss": 0.4527, "step": 4609 }, { "epoch": 0.2807295314069969, "grad_norm": 0.9886757758868444, "learning_rate": 4.974142109972561e-06, "loss": 0.5339, "step": 4610 }, { "epoch": 0.2807904271838748, "grad_norm": 0.9910506688228174, "learning_rate": 4.974130661607082e-06, "loss": 0.486, "step": 4611 }, { "epoch": 0.28085132296075266, "grad_norm": 1.1011653632605596, "learning_rate": 4.974119210721009e-06, "loss": 0.4869, "step": 4612 }, { "epoch": 0.28091221873763056, "grad_norm": 1.0996542370440674, "learning_rate": 4.9741077573143556e-06, "loss": 0.4548, "step": 4613 }, { "epoch": 0.2809731145145084, "grad_norm": 1.1194430013200274, "learning_rate": 4.9740963013871326e-06, "loss": 0.4981, "step": 4614 }, { "epoch": 0.2810340102913863, "grad_norm": 0.9847304283512232, "learning_rate": 4.974084842939352e-06, "loss": 0.4917, "step": 4615 }, { "epoch": 0.28109490606826415, "grad_norm": 1.1291570175285386, "learning_rate": 4.9740733819710244e-06, "loss": 0.4281, "step": 4616 }, { "epoch": 0.28115580184514205, "grad_norm": 1.0390376706961921, "learning_rate": 4.974061918482163e-06, "loss": 0.4556, "step": 4617 }, { "epoch": 0.2812166976220199, "grad_norm": 0.9913756910496051, "learning_rate": 4.974050452472778e-06, "loss": 0.4544, "step": 4618 }, { "epoch": 0.2812775933988978, "grad_norm": 1.0541364262871262, "learning_rate": 4.974038983942883e-06, "loss": 0.4625, "step": 4619 }, { "epoch": 0.28133848917577564, "grad_norm": 1.155417036478199, "learning_rate": 4.974027512892487e-06, "loss": 0.5164, "step": 4620 }, { "epoch": 0.28139938495265354, "grad_norm": 1.0689193363124667, "learning_rate": 4.9740160393216044e-06, "loss": 0.4574, "step": 4621 }, { "epoch": 0.2814602807295314, "grad_norm": 1.033246874371159, "learning_rate": 4.9740045632302454e-06, "loss": 0.4683, "step": 4622 }, { "epoch": 0.2815211765064093, "grad_norm": 1.0436443237715383, "learning_rate": 4.973993084618422e-06, "loss": 0.5467, "step": 4623 }, { "epoch": 0.28158207228328713, "grad_norm": 1.0946314176605765, "learning_rate": 4.973981603486146e-06, "loss": 0.452, "step": 4624 }, { "epoch": 0.28164296806016503, "grad_norm": 1.087304093817119, "learning_rate": 4.973970119833429e-06, "loss": 0.4493, "step": 4625 }, { "epoch": 0.2817038638370429, "grad_norm": 0.9667378538899482, "learning_rate": 4.973958633660282e-06, "loss": 0.5557, "step": 4626 }, { "epoch": 0.2817647596139208, "grad_norm": 0.9766330285349364, "learning_rate": 4.9739471449667185e-06, "loss": 0.5205, "step": 4627 }, { "epoch": 0.2818256553907986, "grad_norm": 1.0663448103933508, "learning_rate": 4.973935653752748e-06, "loss": 0.406, "step": 4628 }, { "epoch": 0.2818865511676765, "grad_norm": 1.0515382056367437, "learning_rate": 4.973924160018384e-06, "loss": 0.5005, "step": 4629 }, { "epoch": 0.28194744694455437, "grad_norm": 1.0130909809785542, "learning_rate": 4.973912663763637e-06, "loss": 0.3989, "step": 4630 }, { "epoch": 0.28200834272143227, "grad_norm": 1.026796559734027, "learning_rate": 4.973901164988519e-06, "loss": 0.4731, "step": 4631 }, { "epoch": 0.2820692384983101, "grad_norm": 1.152524347426922, "learning_rate": 4.973889663693043e-06, "loss": 0.3822, "step": 4632 }, { "epoch": 0.282130134275188, "grad_norm": 1.0017076752125538, "learning_rate": 4.973878159877219e-06, "loss": 0.4934, "step": 4633 }, { "epoch": 0.2821910300520659, "grad_norm": 1.0388927262375904, "learning_rate": 4.973866653541059e-06, "loss": 0.4757, "step": 4634 }, { "epoch": 0.28225192582894376, "grad_norm": 1.0645883424660485, "learning_rate": 4.973855144684575e-06, "loss": 0.4805, "step": 4635 }, { "epoch": 0.28231282160582166, "grad_norm": 0.9439628919409012, "learning_rate": 4.97384363330778e-06, "loss": 0.4257, "step": 4636 }, { "epoch": 0.2823737173826995, "grad_norm": 1.0121190102895237, "learning_rate": 4.973832119410684e-06, "loss": 0.5217, "step": 4637 }, { "epoch": 0.2824346131595774, "grad_norm": 1.1420730778082306, "learning_rate": 4.973820602993299e-06, "loss": 0.4117, "step": 4638 }, { "epoch": 0.28249550893645525, "grad_norm": 1.048553229256215, "learning_rate": 4.9738090840556366e-06, "loss": 0.4687, "step": 4639 }, { "epoch": 0.28255640471333315, "grad_norm": 1.0081929820590643, "learning_rate": 4.97379756259771e-06, "loss": 0.4772, "step": 4640 }, { "epoch": 0.282617300490211, "grad_norm": 1.0891687628826903, "learning_rate": 4.973786038619529e-06, "loss": 0.4808, "step": 4641 }, { "epoch": 0.2826781962670889, "grad_norm": 1.073926738490453, "learning_rate": 4.973774512121106e-06, "loss": 0.4807, "step": 4642 }, { "epoch": 0.28273909204396674, "grad_norm": 1.0057425075213005, "learning_rate": 4.9737629831024535e-06, "loss": 0.4736, "step": 4643 }, { "epoch": 0.28279998782084464, "grad_norm": 1.0382081541340094, "learning_rate": 4.9737514515635834e-06, "loss": 0.4463, "step": 4644 }, { "epoch": 0.2828608835977225, "grad_norm": 1.0657272248471759, "learning_rate": 4.973739917504506e-06, "loss": 0.501, "step": 4645 }, { "epoch": 0.2829217793746004, "grad_norm": 0.9985653987865056, "learning_rate": 4.973728380925233e-06, "loss": 0.4546, "step": 4646 }, { "epoch": 0.28298267515147824, "grad_norm": 1.066880491907266, "learning_rate": 4.973716841825778e-06, "loss": 0.4843, "step": 4647 }, { "epoch": 0.28304357092835614, "grad_norm": 0.9983068339548419, "learning_rate": 4.973705300206151e-06, "loss": 0.546, "step": 4648 }, { "epoch": 0.283104466705234, "grad_norm": 0.990750672894937, "learning_rate": 4.973693756066366e-06, "loss": 0.5265, "step": 4649 }, { "epoch": 0.2831653624821119, "grad_norm": 1.063250874884526, "learning_rate": 4.9736822094064315e-06, "loss": 0.4556, "step": 4650 }, { "epoch": 0.2832262582589897, "grad_norm": 0.9803211366918892, "learning_rate": 4.973670660226362e-06, "loss": 0.5145, "step": 4651 }, { "epoch": 0.2832871540358676, "grad_norm": 1.048694899210618, "learning_rate": 4.973659108526167e-06, "loss": 0.5309, "step": 4652 }, { "epoch": 0.2833480498127455, "grad_norm": 1.0189837222152456, "learning_rate": 4.9736475543058615e-06, "loss": 0.4516, "step": 4653 }, { "epoch": 0.2834089455896234, "grad_norm": 1.05285072475693, "learning_rate": 4.973635997565454e-06, "loss": 0.4692, "step": 4654 }, { "epoch": 0.2834698413665012, "grad_norm": 1.1118935415768019, "learning_rate": 4.973624438304958e-06, "loss": 0.4648, "step": 4655 }, { "epoch": 0.2835307371433791, "grad_norm": 1.1028015286073, "learning_rate": 4.973612876524384e-06, "loss": 0.4893, "step": 4656 }, { "epoch": 0.28359163292025696, "grad_norm": 1.1658919995930832, "learning_rate": 4.973601312223746e-06, "loss": 0.4712, "step": 4657 }, { "epoch": 0.28365252869713486, "grad_norm": 1.0419561840124933, "learning_rate": 4.9735897454030535e-06, "loss": 0.5032, "step": 4658 }, { "epoch": 0.2837134244740127, "grad_norm": 1.007101935375357, "learning_rate": 4.973578176062319e-06, "loss": 0.499, "step": 4659 }, { "epoch": 0.2837743202508906, "grad_norm": 1.0406600213348685, "learning_rate": 4.973566604201555e-06, "loss": 0.5025, "step": 4660 }, { "epoch": 0.28383521602776846, "grad_norm": 1.088350375895477, "learning_rate": 4.973555029820773e-06, "loss": 0.4906, "step": 4661 }, { "epoch": 0.28389611180464636, "grad_norm": 1.0962362733100885, "learning_rate": 4.973543452919984e-06, "loss": 0.4687, "step": 4662 }, { "epoch": 0.2839570075815242, "grad_norm": 1.070420505585791, "learning_rate": 4.973531873499201e-06, "loss": 0.4662, "step": 4663 }, { "epoch": 0.2840179033584021, "grad_norm": 1.0185875932208615, "learning_rate": 4.973520291558434e-06, "loss": 0.4724, "step": 4664 }, { "epoch": 0.28407879913527995, "grad_norm": 1.0013579278526856, "learning_rate": 4.973508707097697e-06, "loss": 0.5003, "step": 4665 }, { "epoch": 0.28413969491215785, "grad_norm": 1.1279419409257365, "learning_rate": 4.973497120117e-06, "loss": 0.3769, "step": 4666 }, { "epoch": 0.2842005906890357, "grad_norm": 1.0530970500456756, "learning_rate": 4.973485530616357e-06, "loss": 0.5231, "step": 4667 }, { "epoch": 0.2842614864659136, "grad_norm": 0.9724607159765419, "learning_rate": 4.973473938595778e-06, "loss": 0.5297, "step": 4668 }, { "epoch": 0.28432238224279144, "grad_norm": 1.1531224031555793, "learning_rate": 4.973462344055274e-06, "loss": 0.4434, "step": 4669 }, { "epoch": 0.28438327801966934, "grad_norm": 1.0645034555021813, "learning_rate": 4.973450746994858e-06, "loss": 0.462, "step": 4670 }, { "epoch": 0.2844441737965472, "grad_norm": 1.2049028408549063, "learning_rate": 4.9734391474145425e-06, "loss": 0.4402, "step": 4671 }, { "epoch": 0.2845050695734251, "grad_norm": 1.1233107066322292, "learning_rate": 4.973427545314339e-06, "loss": 0.5186, "step": 4672 }, { "epoch": 0.28456596535030293, "grad_norm": 0.9398154591560276, "learning_rate": 4.973415940694258e-06, "loss": 0.485, "step": 4673 }, { "epoch": 0.28462686112718083, "grad_norm": 1.105104055204243, "learning_rate": 4.973404333554313e-06, "loss": 0.5331, "step": 4674 }, { "epoch": 0.28468775690405873, "grad_norm": 1.1359242049194815, "learning_rate": 4.973392723894515e-06, "loss": 0.4393, "step": 4675 }, { "epoch": 0.2847486526809366, "grad_norm": 1.0633411351122808, "learning_rate": 4.973381111714875e-06, "loss": 0.4402, "step": 4676 }, { "epoch": 0.2848095484578145, "grad_norm": 1.0391710569886838, "learning_rate": 4.973369497015407e-06, "loss": 0.4141, "step": 4677 }, { "epoch": 0.2848704442346923, "grad_norm": 1.1706556911644022, "learning_rate": 4.97335787979612e-06, "loss": 0.4214, "step": 4678 }, { "epoch": 0.2849313400115702, "grad_norm": 1.0418689594273873, "learning_rate": 4.973346260057029e-06, "loss": 0.5106, "step": 4679 }, { "epoch": 0.28499223578844807, "grad_norm": 1.0583510159711462, "learning_rate": 4.9733346377981435e-06, "loss": 0.4594, "step": 4680 }, { "epoch": 0.28505313156532597, "grad_norm": 1.1171088356710959, "learning_rate": 4.9733230130194765e-06, "loss": 0.4836, "step": 4681 }, { "epoch": 0.2851140273422038, "grad_norm": 1.155565368345447, "learning_rate": 4.973311385721039e-06, "loss": 0.3825, "step": 4682 }, { "epoch": 0.2851749231190817, "grad_norm": 0.9694443596982596, "learning_rate": 4.9732997559028435e-06, "loss": 0.4423, "step": 4683 }, { "epoch": 0.28523581889595956, "grad_norm": 1.0094346065310182, "learning_rate": 4.973288123564902e-06, "loss": 0.5395, "step": 4684 }, { "epoch": 0.28529671467283746, "grad_norm": 0.9810789295502576, "learning_rate": 4.973276488707225e-06, "loss": 0.4744, "step": 4685 }, { "epoch": 0.2853576104497153, "grad_norm": 1.1039947741116771, "learning_rate": 4.973264851329826e-06, "loss": 0.4326, "step": 4686 }, { "epoch": 0.2854185062265932, "grad_norm": 1.001397718424464, "learning_rate": 4.973253211432716e-06, "loss": 0.49, "step": 4687 }, { "epoch": 0.28547940200347105, "grad_norm": 1.1575953911228627, "learning_rate": 4.973241569015908e-06, "loss": 0.4736, "step": 4688 }, { "epoch": 0.28554029778034895, "grad_norm": 0.9602654236493253, "learning_rate": 4.973229924079412e-06, "loss": 0.4718, "step": 4689 }, { "epoch": 0.2856011935572268, "grad_norm": 0.9644655709567413, "learning_rate": 4.973218276623241e-06, "loss": 0.5089, "step": 4690 }, { "epoch": 0.2856620893341047, "grad_norm": 1.0551895350523648, "learning_rate": 4.973206626647405e-06, "loss": 0.4773, "step": 4691 }, { "epoch": 0.28572298511098254, "grad_norm": 1.0286427581142652, "learning_rate": 4.9731949741519195e-06, "loss": 0.4441, "step": 4692 }, { "epoch": 0.28578388088786044, "grad_norm": 1.0163658791390553, "learning_rate": 4.973183319136794e-06, "loss": 0.4767, "step": 4693 }, { "epoch": 0.2858447766647383, "grad_norm": 0.996080171327085, "learning_rate": 4.973171661602041e-06, "loss": 0.5078, "step": 4694 }, { "epoch": 0.2859056724416162, "grad_norm": 1.0732081602855155, "learning_rate": 4.973160001547671e-06, "loss": 0.4647, "step": 4695 }, { "epoch": 0.28596656821849403, "grad_norm": 1.020824221157327, "learning_rate": 4.973148338973699e-06, "loss": 0.4751, "step": 4696 }, { "epoch": 0.28602746399537193, "grad_norm": 1.0494388006649895, "learning_rate": 4.9731366738801335e-06, "loss": 0.45, "step": 4697 }, { "epoch": 0.2860883597722498, "grad_norm": 1.055545929817841, "learning_rate": 4.9731250062669875e-06, "loss": 0.471, "step": 4698 }, { "epoch": 0.2861492555491277, "grad_norm": 0.9867309154038033, "learning_rate": 4.973113336134273e-06, "loss": 0.4499, "step": 4699 }, { "epoch": 0.2862101513260055, "grad_norm": 1.0063646733575238, "learning_rate": 4.973101663482003e-06, "loss": 0.451, "step": 4700 }, { "epoch": 0.2862710471028834, "grad_norm": 1.0609078783150212, "learning_rate": 4.973089988310188e-06, "loss": 0.4531, "step": 4701 }, { "epoch": 0.28633194287976127, "grad_norm": 1.0730073786456584, "learning_rate": 4.973078310618841e-06, "loss": 0.5255, "step": 4702 }, { "epoch": 0.28639283865663917, "grad_norm": 1.1248862390305878, "learning_rate": 4.973066630407972e-06, "loss": 0.4439, "step": 4703 }, { "epoch": 0.286453734433517, "grad_norm": 0.9933135941911766, "learning_rate": 4.973054947677595e-06, "loss": 0.4675, "step": 4704 }, { "epoch": 0.2865146302103949, "grad_norm": 0.9414926372565166, "learning_rate": 4.973043262427721e-06, "loss": 0.5144, "step": 4705 }, { "epoch": 0.28657552598727276, "grad_norm": 0.9874056264991097, "learning_rate": 4.9730315746583615e-06, "loss": 0.4634, "step": 4706 }, { "epoch": 0.28663642176415066, "grad_norm": 1.128537110383014, "learning_rate": 4.973019884369529e-06, "loss": 0.4798, "step": 4707 }, { "epoch": 0.2866973175410285, "grad_norm": 1.0167722187253343, "learning_rate": 4.973008191561236e-06, "loss": 0.5504, "step": 4708 }, { "epoch": 0.2867582133179064, "grad_norm": 1.1185837411907142, "learning_rate": 4.972996496233493e-06, "loss": 0.4776, "step": 4709 }, { "epoch": 0.28681910909478425, "grad_norm": 1.077771661182427, "learning_rate": 4.972984798386312e-06, "loss": 0.4664, "step": 4710 }, { "epoch": 0.28688000487166215, "grad_norm": 1.0738304531420348, "learning_rate": 4.972973098019706e-06, "loss": 0.4411, "step": 4711 }, { "epoch": 0.28694090064854, "grad_norm": 0.9434725575289545, "learning_rate": 4.9729613951336875e-06, "loss": 0.4914, "step": 4712 }, { "epoch": 0.2870017964254179, "grad_norm": 1.0528966074546295, "learning_rate": 4.972949689728266e-06, "loss": 0.5273, "step": 4713 }, { "epoch": 0.28706269220229574, "grad_norm": 1.0252659699497773, "learning_rate": 4.9729379818034555e-06, "loss": 0.4478, "step": 4714 }, { "epoch": 0.28712358797917364, "grad_norm": 0.9975629182601389, "learning_rate": 4.972926271359266e-06, "loss": 0.4614, "step": 4715 }, { "epoch": 0.28718448375605155, "grad_norm": 1.0258591495356872, "learning_rate": 4.972914558395712e-06, "loss": 0.4686, "step": 4716 }, { "epoch": 0.2872453795329294, "grad_norm": 0.9962126040578033, "learning_rate": 4.9729028429128036e-06, "loss": 0.5112, "step": 4717 }, { "epoch": 0.2873062753098073, "grad_norm": 1.1329417152472516, "learning_rate": 4.972891124910553e-06, "loss": 0.4401, "step": 4718 }, { "epoch": 0.28736717108668514, "grad_norm": 0.9116977013754667, "learning_rate": 4.9728794043889725e-06, "loss": 0.4756, "step": 4719 }, { "epoch": 0.28742806686356304, "grad_norm": 0.9901742647889742, "learning_rate": 4.972867681348074e-06, "loss": 0.4991, "step": 4720 }, { "epoch": 0.2874889626404409, "grad_norm": 0.9837915727425174, "learning_rate": 4.972855955787869e-06, "loss": 0.5259, "step": 4721 }, { "epoch": 0.2875498584173188, "grad_norm": 1.0174789677409508, "learning_rate": 4.97284422770837e-06, "loss": 0.4911, "step": 4722 }, { "epoch": 0.2876107541941966, "grad_norm": 1.0731255807174873, "learning_rate": 4.972832497109589e-06, "loss": 0.4619, "step": 4723 }, { "epoch": 0.28767164997107453, "grad_norm": 0.9725578509437919, "learning_rate": 4.972820763991537e-06, "loss": 0.527, "step": 4724 }, { "epoch": 0.2877325457479524, "grad_norm": 1.0495665780666017, "learning_rate": 4.972809028354227e-06, "loss": 0.4909, "step": 4725 }, { "epoch": 0.2877934415248303, "grad_norm": 1.0423182443885124, "learning_rate": 4.9727972901976706e-06, "loss": 0.4518, "step": 4726 }, { "epoch": 0.2878543373017081, "grad_norm": 1.0886534859007742, "learning_rate": 4.972785549521879e-06, "loss": 0.4615, "step": 4727 }, { "epoch": 0.287915233078586, "grad_norm": 1.0700171890221695, "learning_rate": 4.972773806326866e-06, "loss": 0.4825, "step": 4728 }, { "epoch": 0.28797612885546386, "grad_norm": 1.0578715606430413, "learning_rate": 4.972762060612642e-06, "loss": 0.5314, "step": 4729 }, { "epoch": 0.28803702463234176, "grad_norm": 1.0201424455163153, "learning_rate": 4.9727503123792194e-06, "loss": 0.469, "step": 4730 }, { "epoch": 0.2880979204092196, "grad_norm": 1.1160743752889675, "learning_rate": 4.97273856162661e-06, "loss": 0.4889, "step": 4731 }, { "epoch": 0.2881588161860975, "grad_norm": 1.0659674382808033, "learning_rate": 4.972726808354826e-06, "loss": 0.4176, "step": 4732 }, { "epoch": 0.28821971196297536, "grad_norm": 1.0219002165435425, "learning_rate": 4.97271505256388e-06, "loss": 0.4327, "step": 4733 }, { "epoch": 0.28828060773985326, "grad_norm": 1.0640357157172742, "learning_rate": 4.972703294253783e-06, "loss": 0.4486, "step": 4734 }, { "epoch": 0.2883415035167311, "grad_norm": 0.9502939590049496, "learning_rate": 4.972691533424547e-06, "loss": 0.5425, "step": 4735 }, { "epoch": 0.288402399293609, "grad_norm": 0.9701555308105064, "learning_rate": 4.972679770076184e-06, "loss": 0.4585, "step": 4736 }, { "epoch": 0.28846329507048685, "grad_norm": 1.053332841307033, "learning_rate": 4.9726680042087075e-06, "loss": 0.5075, "step": 4737 }, { "epoch": 0.28852419084736475, "grad_norm": 1.0066423133391196, "learning_rate": 4.972656235822126e-06, "loss": 0.4624, "step": 4738 }, { "epoch": 0.2885850866242426, "grad_norm": 0.9849663383178267, "learning_rate": 4.972644464916457e-06, "loss": 0.475, "step": 4739 }, { "epoch": 0.2886459824011205, "grad_norm": 0.9309961440560315, "learning_rate": 4.972632691491706e-06, "loss": 0.4924, "step": 4740 }, { "epoch": 0.28870687817799834, "grad_norm": 0.9774073795962573, "learning_rate": 4.97262091554789e-06, "loss": 0.4413, "step": 4741 }, { "epoch": 0.28876777395487624, "grad_norm": 1.0909227621255146, "learning_rate": 4.9726091370850184e-06, "loss": 0.4851, "step": 4742 }, { "epoch": 0.2888286697317541, "grad_norm": 1.113155070574979, "learning_rate": 4.972597356103105e-06, "loss": 0.4505, "step": 4743 }, { "epoch": 0.288889565508632, "grad_norm": 1.023033233086492, "learning_rate": 4.97258557260216e-06, "loss": 0.4518, "step": 4744 }, { "epoch": 0.28895046128550983, "grad_norm": 1.1645962776536078, "learning_rate": 4.972573786582196e-06, "loss": 0.4551, "step": 4745 }, { "epoch": 0.28901135706238773, "grad_norm": 1.1200221221112634, "learning_rate": 4.972561998043226e-06, "loss": 0.5194, "step": 4746 }, { "epoch": 0.2890722528392656, "grad_norm": 1.040908297965486, "learning_rate": 4.972550206985261e-06, "loss": 0.463, "step": 4747 }, { "epoch": 0.2891331486161435, "grad_norm": 0.8915600745369252, "learning_rate": 4.972538413408313e-06, "loss": 0.5875, "step": 4748 }, { "epoch": 0.2891940443930213, "grad_norm": 1.0707615852962402, "learning_rate": 4.972526617312394e-06, "loss": 0.5364, "step": 4749 }, { "epoch": 0.2892549401698992, "grad_norm": 1.0698570697574992, "learning_rate": 4.972514818697517e-06, "loss": 0.4349, "step": 4750 }, { "epoch": 0.28931583594677707, "grad_norm": 0.982335570378541, "learning_rate": 4.972503017563693e-06, "loss": 0.5261, "step": 4751 }, { "epoch": 0.28937673172365497, "grad_norm": 1.0327696798469783, "learning_rate": 4.972491213910935e-06, "loss": 0.5493, "step": 4752 }, { "epoch": 0.2894376275005328, "grad_norm": 1.051675242759539, "learning_rate": 4.972479407739254e-06, "loss": 0.4846, "step": 4753 }, { "epoch": 0.2894985232774107, "grad_norm": 1.0031282073542507, "learning_rate": 4.972467599048661e-06, "loss": 0.5086, "step": 4754 }, { "epoch": 0.28955941905428856, "grad_norm": 1.044137618687895, "learning_rate": 4.972455787839171e-06, "loss": 0.4747, "step": 4755 }, { "epoch": 0.28962031483116646, "grad_norm": 1.0207176671493368, "learning_rate": 4.972443974110794e-06, "loss": 0.4231, "step": 4756 }, { "epoch": 0.28968121060804436, "grad_norm": 1.0011586841552058, "learning_rate": 4.972432157863543e-06, "loss": 0.495, "step": 4757 }, { "epoch": 0.2897421063849222, "grad_norm": 1.0774572599097945, "learning_rate": 4.9724203390974285e-06, "loss": 0.4689, "step": 4758 }, { "epoch": 0.2898030021618001, "grad_norm": 1.0609840365252408, "learning_rate": 4.972408517812463e-06, "loss": 0.4939, "step": 4759 }, { "epoch": 0.28986389793867795, "grad_norm": 1.0474812550029045, "learning_rate": 4.972396694008661e-06, "loss": 0.4502, "step": 4760 }, { "epoch": 0.28992479371555585, "grad_norm": 1.0430997808774765, "learning_rate": 4.972384867686032e-06, "loss": 0.4567, "step": 4761 }, { "epoch": 0.2899856894924337, "grad_norm": 0.9779573644339782, "learning_rate": 4.972373038844588e-06, "loss": 0.4804, "step": 4762 }, { "epoch": 0.2900465852693116, "grad_norm": 1.034753223786268, "learning_rate": 4.972361207484343e-06, "loss": 0.5065, "step": 4763 }, { "epoch": 0.29010748104618944, "grad_norm": 1.0891701448226778, "learning_rate": 4.972349373605306e-06, "loss": 0.4558, "step": 4764 }, { "epoch": 0.29016837682306734, "grad_norm": 1.0812175562328907, "learning_rate": 4.9723375372074925e-06, "loss": 0.4807, "step": 4765 }, { "epoch": 0.2902292725999452, "grad_norm": 1.0554342213807921, "learning_rate": 4.972325698290912e-06, "loss": 0.4815, "step": 4766 }, { "epoch": 0.2902901683768231, "grad_norm": 1.005379516266243, "learning_rate": 4.972313856855579e-06, "loss": 0.4711, "step": 4767 }, { "epoch": 0.29035106415370093, "grad_norm": 1.00752076055098, "learning_rate": 4.972302012901502e-06, "loss": 0.5696, "step": 4768 }, { "epoch": 0.29041195993057883, "grad_norm": 1.022231933994263, "learning_rate": 4.972290166428696e-06, "loss": 0.5062, "step": 4769 }, { "epoch": 0.2904728557074567, "grad_norm": 1.1480401985359985, "learning_rate": 4.972278317437172e-06, "loss": 0.5061, "step": 4770 }, { "epoch": 0.2905337514843346, "grad_norm": 1.0674872986785338, "learning_rate": 4.9722664659269425e-06, "loss": 0.4831, "step": 4771 }, { "epoch": 0.2905946472612124, "grad_norm": 0.9593935999866446, "learning_rate": 4.97225461189802e-06, "loss": 0.4284, "step": 4772 }, { "epoch": 0.2906555430380903, "grad_norm": 1.090204668501177, "learning_rate": 4.972242755350414e-06, "loss": 0.4231, "step": 4773 }, { "epoch": 0.29071643881496817, "grad_norm": 1.0287395195040085, "learning_rate": 4.9722308962841404e-06, "loss": 0.4961, "step": 4774 }, { "epoch": 0.29077733459184607, "grad_norm": 1.0340315535984126, "learning_rate": 4.9722190346992086e-06, "loss": 0.4385, "step": 4775 }, { "epoch": 0.2908382303687239, "grad_norm": 1.0598351307063223, "learning_rate": 4.972207170595631e-06, "loss": 0.4274, "step": 4776 }, { "epoch": 0.2908991261456018, "grad_norm": 1.0805359941856578, "learning_rate": 4.9721953039734205e-06, "loss": 0.4455, "step": 4777 }, { "epoch": 0.29096002192247966, "grad_norm": 1.0593504189174017, "learning_rate": 4.97218343483259e-06, "loss": 0.5208, "step": 4778 }, { "epoch": 0.29102091769935756, "grad_norm": 1.1214931935013783, "learning_rate": 4.972171563173148e-06, "loss": 0.4783, "step": 4779 }, { "epoch": 0.2910818134762354, "grad_norm": 0.9628611238140816, "learning_rate": 4.97215968899511e-06, "loss": 0.4854, "step": 4780 }, { "epoch": 0.2911427092531133, "grad_norm": 1.126221575836851, "learning_rate": 4.972147812298488e-06, "loss": 0.476, "step": 4781 }, { "epoch": 0.29120360502999115, "grad_norm": 1.0135910084410833, "learning_rate": 4.972135933083292e-06, "loss": 0.4892, "step": 4782 }, { "epoch": 0.29126450080686905, "grad_norm": 1.0229343489482596, "learning_rate": 4.972124051349536e-06, "loss": 0.4702, "step": 4783 }, { "epoch": 0.2913253965837469, "grad_norm": 1.1424463619979517, "learning_rate": 4.972112167097232e-06, "loss": 0.4367, "step": 4784 }, { "epoch": 0.2913862923606248, "grad_norm": 0.9914508699565783, "learning_rate": 4.972100280326391e-06, "loss": 0.4638, "step": 4785 }, { "epoch": 0.29144718813750264, "grad_norm": 1.0171157248670357, "learning_rate": 4.972088391037025e-06, "loss": 0.4387, "step": 4786 }, { "epoch": 0.29150808391438054, "grad_norm": 0.9784027906863423, "learning_rate": 4.972076499229147e-06, "loss": 0.4966, "step": 4787 }, { "epoch": 0.2915689796912584, "grad_norm": 1.0055150309818668, "learning_rate": 4.9720646049027696e-06, "loss": 0.5309, "step": 4788 }, { "epoch": 0.2916298754681363, "grad_norm": 1.0014871072057334, "learning_rate": 4.972052708057903e-06, "loss": 0.5574, "step": 4789 }, { "epoch": 0.29169077124501414, "grad_norm": 0.995324811172232, "learning_rate": 4.972040808694562e-06, "loss": 0.5252, "step": 4790 }, { "epoch": 0.29175166702189204, "grad_norm": 1.111665299270616, "learning_rate": 4.972028906812756e-06, "loss": 0.5209, "step": 4791 }, { "epoch": 0.2918125627987699, "grad_norm": 1.0747105124472116, "learning_rate": 4.972017002412499e-06, "loss": 0.5201, "step": 4792 }, { "epoch": 0.2918734585756478, "grad_norm": 1.0813534937886593, "learning_rate": 4.972005095493801e-06, "loss": 0.4594, "step": 4793 }, { "epoch": 0.2919343543525256, "grad_norm": 1.072709981955552, "learning_rate": 4.971993186056677e-06, "loss": 0.5882, "step": 4794 }, { "epoch": 0.2919952501294035, "grad_norm": 1.0736773102784318, "learning_rate": 4.971981274101138e-06, "loss": 0.4497, "step": 4795 }, { "epoch": 0.2920561459062814, "grad_norm": 1.0359312270416807, "learning_rate": 4.971969359627195e-06, "loss": 0.473, "step": 4796 }, { "epoch": 0.2921170416831593, "grad_norm": 1.05184395070069, "learning_rate": 4.971957442634862e-06, "loss": 0.4594, "step": 4797 }, { "epoch": 0.2921779374600372, "grad_norm": 1.040352866062599, "learning_rate": 4.9719455231241495e-06, "loss": 0.4206, "step": 4798 }, { "epoch": 0.292238833236915, "grad_norm": 1.0176778505756297, "learning_rate": 4.97193360109507e-06, "loss": 0.467, "step": 4799 }, { "epoch": 0.2922997290137929, "grad_norm": 1.051365992232664, "learning_rate": 4.9719216765476365e-06, "loss": 0.4812, "step": 4800 }, { "epoch": 0.29236062479067076, "grad_norm": 1.0416310771766466, "learning_rate": 4.97190974948186e-06, "loss": 0.4347, "step": 4801 }, { "epoch": 0.29242152056754867, "grad_norm": 1.1363797863306264, "learning_rate": 4.971897819897754e-06, "loss": 0.4592, "step": 4802 }, { "epoch": 0.2924824163444265, "grad_norm": 1.1133976442531408, "learning_rate": 4.97188588779533e-06, "loss": 0.4395, "step": 4803 }, { "epoch": 0.2925433121213044, "grad_norm": 0.9874988612944314, "learning_rate": 4.971873953174599e-06, "loss": 0.472, "step": 4804 }, { "epoch": 0.29260420789818226, "grad_norm": 1.0050341293871587, "learning_rate": 4.971862016035575e-06, "loss": 0.5105, "step": 4805 }, { "epoch": 0.29266510367506016, "grad_norm": 1.0327202732675025, "learning_rate": 4.971850076378269e-06, "loss": 0.5046, "step": 4806 }, { "epoch": 0.292725999451938, "grad_norm": 1.07862621025045, "learning_rate": 4.971838134202693e-06, "loss": 0.4708, "step": 4807 }, { "epoch": 0.2927868952288159, "grad_norm": 1.01748033454078, "learning_rate": 4.9718261895088606e-06, "loss": 0.5198, "step": 4808 }, { "epoch": 0.29284779100569375, "grad_norm": 1.0667496034583155, "learning_rate": 4.971814242296783e-06, "loss": 0.4643, "step": 4809 }, { "epoch": 0.29290868678257165, "grad_norm": 1.0902498964003675, "learning_rate": 4.971802292566473e-06, "loss": 0.473, "step": 4810 }, { "epoch": 0.2929695825594495, "grad_norm": 1.0222849067855517, "learning_rate": 4.971790340317941e-06, "loss": 0.4628, "step": 4811 }, { "epoch": 0.2930304783363274, "grad_norm": 1.1209966152087487, "learning_rate": 4.971778385551201e-06, "loss": 0.4671, "step": 4812 }, { "epoch": 0.29309137411320524, "grad_norm": 1.1039957858247014, "learning_rate": 4.971766428266265e-06, "loss": 0.457, "step": 4813 }, { "epoch": 0.29315226989008314, "grad_norm": 1.052150822024785, "learning_rate": 4.971754468463143e-06, "loss": 0.4371, "step": 4814 }, { "epoch": 0.293213165666961, "grad_norm": 1.0239998313858532, "learning_rate": 4.971742506141851e-06, "loss": 0.4692, "step": 4815 }, { "epoch": 0.2932740614438389, "grad_norm": 1.053941936512643, "learning_rate": 4.971730541302398e-06, "loss": 0.5143, "step": 4816 }, { "epoch": 0.29333495722071673, "grad_norm": 0.9948505417445542, "learning_rate": 4.971718573944798e-06, "loss": 0.4417, "step": 4817 }, { "epoch": 0.29339585299759463, "grad_norm": 0.9765091291099964, "learning_rate": 4.971706604069062e-06, "loss": 0.4976, "step": 4818 }, { "epoch": 0.2934567487744725, "grad_norm": 0.9537958749552902, "learning_rate": 4.971694631675202e-06, "loss": 0.5069, "step": 4819 }, { "epoch": 0.2935176445513504, "grad_norm": 1.06761782677484, "learning_rate": 4.971682656763232e-06, "loss": 0.4264, "step": 4820 }, { "epoch": 0.2935785403282282, "grad_norm": 1.0297210003996151, "learning_rate": 4.971670679333163e-06, "loss": 0.4511, "step": 4821 }, { "epoch": 0.2936394361051061, "grad_norm": 1.0394078793851205, "learning_rate": 4.9716586993850065e-06, "loss": 0.5085, "step": 4822 }, { "epoch": 0.29370033188198397, "grad_norm": 1.075192650029821, "learning_rate": 4.971646716918776e-06, "loss": 0.4515, "step": 4823 }, { "epoch": 0.29376122765886187, "grad_norm": 1.1831655892858455, "learning_rate": 4.971634731934484e-06, "loss": 0.4438, "step": 4824 }, { "epoch": 0.2938221234357397, "grad_norm": 1.1648850444769714, "learning_rate": 4.9716227444321404e-06, "loss": 0.536, "step": 4825 }, { "epoch": 0.2938830192126176, "grad_norm": 1.05762962989318, "learning_rate": 4.97161075441176e-06, "loss": 0.4844, "step": 4826 }, { "epoch": 0.29394391498949546, "grad_norm": 1.0995277772389986, "learning_rate": 4.9715987618733545e-06, "loss": 0.4554, "step": 4827 }, { "epoch": 0.29400481076637336, "grad_norm": 0.9919672859080824, "learning_rate": 4.971586766816934e-06, "loss": 0.5282, "step": 4828 }, { "epoch": 0.2940657065432512, "grad_norm": 1.0079271330534496, "learning_rate": 4.971574769242513e-06, "loss": 0.4553, "step": 4829 }, { "epoch": 0.2941266023201291, "grad_norm": 1.0718786386302184, "learning_rate": 4.9715627691501035e-06, "loss": 0.496, "step": 4830 }, { "epoch": 0.29418749809700695, "grad_norm": 1.0672204782586219, "learning_rate": 4.971550766539717e-06, "loss": 0.483, "step": 4831 }, { "epoch": 0.29424839387388485, "grad_norm": 0.9015131557870956, "learning_rate": 4.971538761411365e-06, "loss": 0.5412, "step": 4832 }, { "epoch": 0.2943092896507627, "grad_norm": 0.9246459182538004, "learning_rate": 4.971526753765063e-06, "loss": 0.5131, "step": 4833 }, { "epoch": 0.2943701854276406, "grad_norm": 1.1056043076485484, "learning_rate": 4.971514743600818e-06, "loss": 0.4715, "step": 4834 }, { "epoch": 0.29443108120451844, "grad_norm": 1.0393158368332567, "learning_rate": 4.971502730918647e-06, "loss": 0.5302, "step": 4835 }, { "epoch": 0.29449197698139634, "grad_norm": 1.0384521124702424, "learning_rate": 4.97149071571856e-06, "loss": 0.4186, "step": 4836 }, { "epoch": 0.2945528727582742, "grad_norm": 1.0757410077415215, "learning_rate": 4.971478698000569e-06, "loss": 0.498, "step": 4837 }, { "epoch": 0.2946137685351521, "grad_norm": 1.0723112617625008, "learning_rate": 4.971466677764688e-06, "loss": 0.5109, "step": 4838 }, { "epoch": 0.29467466431203, "grad_norm": 1.0252239066869873, "learning_rate": 4.971454655010928e-06, "loss": 0.5318, "step": 4839 }, { "epoch": 0.29473556008890783, "grad_norm": 1.1389200308018306, "learning_rate": 4.9714426297393e-06, "loss": 0.4988, "step": 4840 }, { "epoch": 0.29479645586578573, "grad_norm": 1.0016745137239622, "learning_rate": 4.9714306019498194e-06, "loss": 0.5087, "step": 4841 }, { "epoch": 0.2948573516426636, "grad_norm": 1.107955912530081, "learning_rate": 4.971418571642496e-06, "loss": 0.4666, "step": 4842 }, { "epoch": 0.2949182474195415, "grad_norm": 1.0156062092705862, "learning_rate": 4.971406538817343e-06, "loss": 0.5455, "step": 4843 }, { "epoch": 0.2949791431964193, "grad_norm": 0.9823986876375775, "learning_rate": 4.971394503474372e-06, "loss": 0.4908, "step": 4844 }, { "epoch": 0.2950400389732972, "grad_norm": 1.0844771717356114, "learning_rate": 4.971382465613595e-06, "loss": 0.5026, "step": 4845 }, { "epoch": 0.29510093475017507, "grad_norm": 0.9932265481016237, "learning_rate": 4.9713704252350255e-06, "loss": 0.4848, "step": 4846 }, { "epoch": 0.29516183052705297, "grad_norm": 0.9229270490735245, "learning_rate": 4.9713583823386755e-06, "loss": 0.4561, "step": 4847 }, { "epoch": 0.2952227263039308, "grad_norm": 1.0480023672081658, "learning_rate": 4.971346336924557e-06, "loss": 0.4709, "step": 4848 }, { "epoch": 0.2952836220808087, "grad_norm": 1.1553046809451752, "learning_rate": 4.971334288992682e-06, "loss": 0.4506, "step": 4849 }, { "epoch": 0.29534451785768656, "grad_norm": 1.0235689628937377, "learning_rate": 4.971322238543063e-06, "loss": 0.4662, "step": 4850 }, { "epoch": 0.29540541363456446, "grad_norm": 1.0030682011168108, "learning_rate": 4.971310185575712e-06, "loss": 0.5229, "step": 4851 }, { "epoch": 0.2954663094114423, "grad_norm": 0.9466202229837539, "learning_rate": 4.971298130090642e-06, "loss": 0.4742, "step": 4852 }, { "epoch": 0.2955272051883202, "grad_norm": 1.0174807641639394, "learning_rate": 4.971286072087865e-06, "loss": 0.4863, "step": 4853 }, { "epoch": 0.29558810096519805, "grad_norm": 1.0266351219169283, "learning_rate": 4.971274011567393e-06, "loss": 0.5345, "step": 4854 }, { "epoch": 0.29564899674207595, "grad_norm": 0.9985926109621863, "learning_rate": 4.971261948529238e-06, "loss": 0.4463, "step": 4855 }, { "epoch": 0.2957098925189538, "grad_norm": 1.085510585632155, "learning_rate": 4.971249882973413e-06, "loss": 0.4967, "step": 4856 }, { "epoch": 0.2957707882958317, "grad_norm": 1.0932477969532728, "learning_rate": 4.971237814899929e-06, "loss": 0.4515, "step": 4857 }, { "epoch": 0.29583168407270954, "grad_norm": 1.0030162121491937, "learning_rate": 4.971225744308801e-06, "loss": 0.4642, "step": 4858 }, { "epoch": 0.29589257984958744, "grad_norm": 1.009744697227879, "learning_rate": 4.971213671200039e-06, "loss": 0.5239, "step": 4859 }, { "epoch": 0.2959534756264653, "grad_norm": 1.0471457183020163, "learning_rate": 4.9712015955736555e-06, "loss": 0.4552, "step": 4860 }, { "epoch": 0.2960143714033432, "grad_norm": 1.0497935863716459, "learning_rate": 4.971189517429663e-06, "loss": 0.569, "step": 4861 }, { "epoch": 0.29607526718022104, "grad_norm": 1.007044464279966, "learning_rate": 4.971177436768076e-06, "loss": 0.4451, "step": 4862 }, { "epoch": 0.29613616295709894, "grad_norm": 0.9890551267158382, "learning_rate": 4.971165353588903e-06, "loss": 0.5338, "step": 4863 }, { "epoch": 0.2961970587339768, "grad_norm": 0.9431820076291278, "learning_rate": 4.971153267892158e-06, "loss": 0.5514, "step": 4864 }, { "epoch": 0.2962579545108547, "grad_norm": 1.033908603432793, "learning_rate": 4.9711411796778535e-06, "loss": 0.5008, "step": 4865 }, { "epoch": 0.2963188502877325, "grad_norm": 0.9447531498033565, "learning_rate": 4.971129088946002e-06, "loss": 0.5244, "step": 4866 }, { "epoch": 0.2963797460646104, "grad_norm": 1.061203322664377, "learning_rate": 4.971116995696616e-06, "loss": 0.4973, "step": 4867 }, { "epoch": 0.2964406418414883, "grad_norm": 0.9902563421174313, "learning_rate": 4.9711048999297066e-06, "loss": 0.4634, "step": 4868 }, { "epoch": 0.2965015376183662, "grad_norm": 1.0125656064119415, "learning_rate": 4.971092801645288e-06, "loss": 0.4714, "step": 4869 }, { "epoch": 0.296562433395244, "grad_norm": 1.0300337139781535, "learning_rate": 4.97108070084337e-06, "loss": 0.4046, "step": 4870 }, { "epoch": 0.2966233291721219, "grad_norm": 1.04022587714178, "learning_rate": 4.971068597523967e-06, "loss": 0.458, "step": 4871 }, { "epoch": 0.29668422494899976, "grad_norm": 1.0289249325119165, "learning_rate": 4.971056491687091e-06, "loss": 0.5, "step": 4872 }, { "epoch": 0.29674512072587766, "grad_norm": 1.102774062818966, "learning_rate": 4.971044383332754e-06, "loss": 0.4491, "step": 4873 }, { "epoch": 0.2968060165027555, "grad_norm": 1.115079665441173, "learning_rate": 4.971032272460968e-06, "loss": 0.4792, "step": 4874 }, { "epoch": 0.2968669122796334, "grad_norm": 0.9911162031642672, "learning_rate": 4.971020159071746e-06, "loss": 0.4984, "step": 4875 }, { "epoch": 0.29692780805651126, "grad_norm": 1.0555939149805413, "learning_rate": 4.971008043165099e-06, "loss": 0.4247, "step": 4876 }, { "epoch": 0.29698870383338916, "grad_norm": 0.8867457640478145, "learning_rate": 4.9709959247410416e-06, "loss": 0.5644, "step": 4877 }, { "epoch": 0.297049599610267, "grad_norm": 1.0220004366799313, "learning_rate": 4.970983803799585e-06, "loss": 0.5971, "step": 4878 }, { "epoch": 0.2971104953871449, "grad_norm": 1.029484631742006, "learning_rate": 4.97097168034074e-06, "loss": 0.424, "step": 4879 }, { "epoch": 0.2971713911640228, "grad_norm": 1.090916531850651, "learning_rate": 4.9709595543645214e-06, "loss": 0.4751, "step": 4880 }, { "epoch": 0.29723228694090065, "grad_norm": 1.0629882021911303, "learning_rate": 4.970947425870941e-06, "loss": 0.4762, "step": 4881 }, { "epoch": 0.29729318271777855, "grad_norm": 1.0265015947012308, "learning_rate": 4.970935294860011e-06, "loss": 0.4908, "step": 4882 }, { "epoch": 0.2973540784946564, "grad_norm": 1.0461443775660983, "learning_rate": 4.970923161331742e-06, "loss": 0.4458, "step": 4883 }, { "epoch": 0.2974149742715343, "grad_norm": 1.131969517283659, "learning_rate": 4.9709110252861495e-06, "loss": 0.4508, "step": 4884 }, { "epoch": 0.29747587004841214, "grad_norm": 1.0998958444008191, "learning_rate": 4.970898886723243e-06, "loss": 0.4972, "step": 4885 }, { "epoch": 0.29753676582529004, "grad_norm": 1.1282203045889958, "learning_rate": 4.970886745643037e-06, "loss": 0.4492, "step": 4886 }, { "epoch": 0.2975976616021679, "grad_norm": 1.1454009550033348, "learning_rate": 4.9708746020455425e-06, "loss": 0.4458, "step": 4887 }, { "epoch": 0.2976585573790458, "grad_norm": 1.0536151385086105, "learning_rate": 4.9708624559307715e-06, "loss": 0.4983, "step": 4888 }, { "epoch": 0.29771945315592363, "grad_norm": 1.0874530053775349, "learning_rate": 4.970850307298739e-06, "loss": 0.4378, "step": 4889 }, { "epoch": 0.29778034893280153, "grad_norm": 0.9943597894730497, "learning_rate": 4.970838156149454e-06, "loss": 0.4896, "step": 4890 }, { "epoch": 0.2978412447096794, "grad_norm": 1.007813531665907, "learning_rate": 4.970826002482932e-06, "loss": 0.4563, "step": 4891 }, { "epoch": 0.2979021404865573, "grad_norm": 1.086224912871932, "learning_rate": 4.970813846299182e-06, "loss": 0.4488, "step": 4892 }, { "epoch": 0.2979630362634351, "grad_norm": 1.0928734238732427, "learning_rate": 4.970801687598219e-06, "loss": 0.5447, "step": 4893 }, { "epoch": 0.298023932040313, "grad_norm": 0.9948436134611929, "learning_rate": 4.970789526380055e-06, "loss": 0.5704, "step": 4894 }, { "epoch": 0.29808482781719087, "grad_norm": 1.0808752379975455, "learning_rate": 4.970777362644701e-06, "loss": 0.4692, "step": 4895 }, { "epoch": 0.29814572359406877, "grad_norm": 1.077961080737291, "learning_rate": 4.9707651963921715e-06, "loss": 0.4707, "step": 4896 }, { "epoch": 0.2982066193709466, "grad_norm": 1.113027361491597, "learning_rate": 4.970753027622478e-06, "loss": 0.4322, "step": 4897 }, { "epoch": 0.2982675151478245, "grad_norm": 0.9511885397633729, "learning_rate": 4.970740856335632e-06, "loss": 0.5285, "step": 4898 }, { "epoch": 0.29832841092470236, "grad_norm": 0.9412862006638815, "learning_rate": 4.9707286825316455e-06, "loss": 0.5816, "step": 4899 }, { "epoch": 0.29838930670158026, "grad_norm": 1.0745105592253101, "learning_rate": 4.970716506210534e-06, "loss": 0.3787, "step": 4900 }, { "epoch": 0.2984502024784581, "grad_norm": 1.0376669266079135, "learning_rate": 4.970704327372306e-06, "loss": 0.4825, "step": 4901 }, { "epoch": 0.298511098255336, "grad_norm": 0.9746671124778157, "learning_rate": 4.970692146016978e-06, "loss": 0.462, "step": 4902 }, { "epoch": 0.29857199403221385, "grad_norm": 1.064091446651441, "learning_rate": 4.970679962144559e-06, "loss": 0.5118, "step": 4903 }, { "epoch": 0.29863288980909175, "grad_norm": 1.0020884533476857, "learning_rate": 4.970667775755062e-06, "loss": 0.4327, "step": 4904 }, { "epoch": 0.2986937855859696, "grad_norm": 1.1060257797771755, "learning_rate": 4.970655586848501e-06, "loss": 0.485, "step": 4905 }, { "epoch": 0.2987546813628475, "grad_norm": 1.0354379751158758, "learning_rate": 4.970643395424887e-06, "loss": 0.5292, "step": 4906 }, { "epoch": 0.29881557713972534, "grad_norm": 0.9672198935409871, "learning_rate": 4.970631201484233e-06, "loss": 0.5057, "step": 4907 }, { "epoch": 0.29887647291660324, "grad_norm": 1.025190500280572, "learning_rate": 4.970619005026552e-06, "loss": 0.4334, "step": 4908 }, { "epoch": 0.2989373686934811, "grad_norm": 0.9544513997733172, "learning_rate": 4.970606806051855e-06, "loss": 0.5025, "step": 4909 }, { "epoch": 0.298998264470359, "grad_norm": 1.0971734322360107, "learning_rate": 4.9705946045601545e-06, "loss": 0.4755, "step": 4910 }, { "epoch": 0.29905916024723683, "grad_norm": 1.0089385376374211, "learning_rate": 4.970582400551465e-06, "loss": 0.4129, "step": 4911 }, { "epoch": 0.29912005602411473, "grad_norm": 0.9970472842722582, "learning_rate": 4.970570194025797e-06, "loss": 0.544, "step": 4912 }, { "epoch": 0.2991809518009926, "grad_norm": 0.9367809987966663, "learning_rate": 4.9705579849831625e-06, "loss": 0.5503, "step": 4913 }, { "epoch": 0.2992418475778705, "grad_norm": 0.9665656727969053, "learning_rate": 4.970545773423576e-06, "loss": 0.5635, "step": 4914 }, { "epoch": 0.2993027433547483, "grad_norm": 1.0116630551454882, "learning_rate": 4.970533559347048e-06, "loss": 0.537, "step": 4915 }, { "epoch": 0.2993636391316262, "grad_norm": 1.0811467475488183, "learning_rate": 4.970521342753592e-06, "loss": 0.4906, "step": 4916 }, { "epoch": 0.29942453490850407, "grad_norm": 1.158031940201155, "learning_rate": 4.970509123643221e-06, "loss": 0.4508, "step": 4917 }, { "epoch": 0.29948543068538197, "grad_norm": 1.0032629738709806, "learning_rate": 4.970496902015946e-06, "loss": 0.5344, "step": 4918 }, { "epoch": 0.2995463264622598, "grad_norm": 1.0441834257734484, "learning_rate": 4.97048467787178e-06, "loss": 0.5096, "step": 4919 }, { "epoch": 0.2996072222391377, "grad_norm": 0.9461991098391789, "learning_rate": 4.970472451210736e-06, "loss": 0.4882, "step": 4920 }, { "epoch": 0.2996681180160156, "grad_norm": 1.041258229816044, "learning_rate": 4.970460222032826e-06, "loss": 0.4444, "step": 4921 }, { "epoch": 0.29972901379289346, "grad_norm": 1.0304099557873891, "learning_rate": 4.970447990338062e-06, "loss": 0.4045, "step": 4922 }, { "epoch": 0.29978990956977136, "grad_norm": 1.0571806200448772, "learning_rate": 4.970435756126457e-06, "loss": 0.5022, "step": 4923 }, { "epoch": 0.2998508053466492, "grad_norm": 0.9922260081628608, "learning_rate": 4.970423519398024e-06, "loss": 0.4612, "step": 4924 }, { "epoch": 0.2999117011235271, "grad_norm": 0.930098604083178, "learning_rate": 4.970411280152775e-06, "loss": 0.4313, "step": 4925 }, { "epoch": 0.29997259690040495, "grad_norm": 0.9896062587220559, "learning_rate": 4.970399038390722e-06, "loss": 0.4939, "step": 4926 }, { "epoch": 0.30003349267728285, "grad_norm": 1.0116079342050286, "learning_rate": 4.970386794111878e-06, "loss": 0.521, "step": 4927 }, { "epoch": 0.3000943884541607, "grad_norm": 1.0741947807389676, "learning_rate": 4.970374547316255e-06, "loss": 0.3916, "step": 4928 }, { "epoch": 0.3001552842310386, "grad_norm": 1.0367597529218504, "learning_rate": 4.970362298003866e-06, "loss": 0.5125, "step": 4929 }, { "epoch": 0.30021618000791644, "grad_norm": 0.996159428712316, "learning_rate": 4.970350046174722e-06, "loss": 0.5217, "step": 4930 }, { "epoch": 0.30027707578479435, "grad_norm": 1.0363403184846296, "learning_rate": 4.970337791828838e-06, "loss": 0.4369, "step": 4931 }, { "epoch": 0.3003379715616722, "grad_norm": 1.0768081556048565, "learning_rate": 4.970325534966225e-06, "loss": 0.4493, "step": 4932 }, { "epoch": 0.3003988673385501, "grad_norm": 1.0026265013282036, "learning_rate": 4.970313275586896e-06, "loss": 0.5137, "step": 4933 }, { "epoch": 0.30045976311542794, "grad_norm": 1.0987459436003237, "learning_rate": 4.970301013690863e-06, "loss": 0.5247, "step": 4934 }, { "epoch": 0.30052065889230584, "grad_norm": 1.0482526571114168, "learning_rate": 4.970288749278138e-06, "loss": 0.4666, "step": 4935 }, { "epoch": 0.3005815546691837, "grad_norm": 1.0986605778996261, "learning_rate": 4.970276482348735e-06, "loss": 0.4672, "step": 4936 }, { "epoch": 0.3006424504460616, "grad_norm": 0.9700473276413938, "learning_rate": 4.970264212902666e-06, "loss": 0.4612, "step": 4937 }, { "epoch": 0.3007033462229394, "grad_norm": 0.9351974785949779, "learning_rate": 4.9702519409399425e-06, "loss": 0.5214, "step": 4938 }, { "epoch": 0.30076424199981733, "grad_norm": 1.0746187056870868, "learning_rate": 4.9702396664605775e-06, "loss": 0.4145, "step": 4939 }, { "epoch": 0.3008251377766952, "grad_norm": 0.9674212275122395, "learning_rate": 4.9702273894645845e-06, "loss": 0.4984, "step": 4940 }, { "epoch": 0.3008860335535731, "grad_norm": 0.9683697142512124, "learning_rate": 4.970215109951974e-06, "loss": 0.381, "step": 4941 }, { "epoch": 0.3009469293304509, "grad_norm": 1.092221477305765, "learning_rate": 4.970202827922761e-06, "loss": 0.423, "step": 4942 }, { "epoch": 0.3010078251073288, "grad_norm": 1.0743770050663917, "learning_rate": 4.970190543376956e-06, "loss": 0.4397, "step": 4943 }, { "epoch": 0.30106872088420666, "grad_norm": 1.0914523101053157, "learning_rate": 4.970178256314572e-06, "loss": 0.4852, "step": 4944 }, { "epoch": 0.30112961666108456, "grad_norm": 1.1043565913553344, "learning_rate": 4.9701659667356226e-06, "loss": 0.5266, "step": 4945 }, { "epoch": 0.3011905124379624, "grad_norm": 0.9871830260563703, "learning_rate": 4.9701536746401195e-06, "loss": 0.5606, "step": 4946 }, { "epoch": 0.3012514082148403, "grad_norm": 1.0222764072237511, "learning_rate": 4.970141380028074e-06, "loss": 0.4598, "step": 4947 }, { "epoch": 0.30131230399171816, "grad_norm": 1.0538527754215283, "learning_rate": 4.9701290828995e-06, "loss": 0.4639, "step": 4948 }, { "epoch": 0.30137319976859606, "grad_norm": 1.0739045959355669, "learning_rate": 4.97011678325441e-06, "loss": 0.5091, "step": 4949 }, { "epoch": 0.3014340955454739, "grad_norm": 1.1058232656757216, "learning_rate": 4.9701044810928176e-06, "loss": 0.5602, "step": 4950 }, { "epoch": 0.3014949913223518, "grad_norm": 1.0530086276240438, "learning_rate": 4.970092176414733e-06, "loss": 0.4823, "step": 4951 }, { "epoch": 0.30155588709922965, "grad_norm": 0.9372584514495159, "learning_rate": 4.97007986922017e-06, "loss": 0.5124, "step": 4952 }, { "epoch": 0.30161678287610755, "grad_norm": 1.0559995320259994, "learning_rate": 4.970067559509141e-06, "loss": 0.4647, "step": 4953 }, { "epoch": 0.3016776786529854, "grad_norm": 1.0619723836633261, "learning_rate": 4.970055247281659e-06, "loss": 0.4502, "step": 4954 }, { "epoch": 0.3017385744298633, "grad_norm": 1.024638769021098, "learning_rate": 4.970042932537736e-06, "loss": 0.4919, "step": 4955 }, { "epoch": 0.30179947020674114, "grad_norm": 0.9967199226849933, "learning_rate": 4.9700306152773834e-06, "loss": 0.4502, "step": 4956 }, { "epoch": 0.30186036598361904, "grad_norm": 1.0785394208417376, "learning_rate": 4.970018295500616e-06, "loss": 0.4564, "step": 4957 }, { "epoch": 0.3019212617604969, "grad_norm": 1.000096409171083, "learning_rate": 4.970005973207445e-06, "loss": 0.5181, "step": 4958 }, { "epoch": 0.3019821575373748, "grad_norm": 0.9958971917404045, "learning_rate": 4.969993648397883e-06, "loss": 0.5094, "step": 4959 }, { "epoch": 0.30204305331425263, "grad_norm": 0.9940823611028659, "learning_rate": 4.969981321071944e-06, "loss": 0.4417, "step": 4960 }, { "epoch": 0.30210394909113053, "grad_norm": 1.0217535981750054, "learning_rate": 4.969968991229638e-06, "loss": 0.5152, "step": 4961 }, { "epoch": 0.30216484486800843, "grad_norm": 0.9855225430336216, "learning_rate": 4.969956658870979e-06, "loss": 0.5403, "step": 4962 }, { "epoch": 0.3022257406448863, "grad_norm": 0.9569724378388705, "learning_rate": 4.969944323995981e-06, "loss": 0.5128, "step": 4963 }, { "epoch": 0.3022866364217642, "grad_norm": 0.9639554113376013, "learning_rate": 4.969931986604654e-06, "loss": 0.5367, "step": 4964 }, { "epoch": 0.302347532198642, "grad_norm": 1.0579788801911894, "learning_rate": 4.9699196466970115e-06, "loss": 0.4801, "step": 4965 }, { "epoch": 0.3024084279755199, "grad_norm": 1.00629867912399, "learning_rate": 4.969907304273066e-06, "loss": 0.4452, "step": 4966 }, { "epoch": 0.30246932375239777, "grad_norm": 1.0670320992625448, "learning_rate": 4.969894959332832e-06, "loss": 0.4884, "step": 4967 }, { "epoch": 0.30253021952927567, "grad_norm": 0.9542363826120015, "learning_rate": 4.9698826118763184e-06, "loss": 0.5613, "step": 4968 }, { "epoch": 0.3025911153061535, "grad_norm": 1.115617210135161, "learning_rate": 4.96987026190354e-06, "loss": 0.5094, "step": 4969 }, { "epoch": 0.3026520110830314, "grad_norm": 1.1117591512215368, "learning_rate": 4.96985790941451e-06, "loss": 0.4503, "step": 4970 }, { "epoch": 0.30271290685990926, "grad_norm": 1.0806770180372833, "learning_rate": 4.96984555440924e-06, "loss": 0.4749, "step": 4971 }, { "epoch": 0.30277380263678716, "grad_norm": 1.0081326013919, "learning_rate": 4.969833196887742e-06, "loss": 0.4936, "step": 4972 }, { "epoch": 0.302834698413665, "grad_norm": 1.0186379491389177, "learning_rate": 4.96982083685003e-06, "loss": 0.5091, "step": 4973 }, { "epoch": 0.3028955941905429, "grad_norm": 1.100908921332688, "learning_rate": 4.969808474296115e-06, "loss": 0.4479, "step": 4974 }, { "epoch": 0.30295648996742075, "grad_norm": 1.07350988141062, "learning_rate": 4.969796109226012e-06, "loss": 0.4445, "step": 4975 }, { "epoch": 0.30301738574429865, "grad_norm": 0.9530436938468588, "learning_rate": 4.9697837416397306e-06, "loss": 0.5198, "step": 4976 }, { "epoch": 0.3030782815211765, "grad_norm": 0.9945665592491477, "learning_rate": 4.9697713715372856e-06, "loss": 0.4591, "step": 4977 }, { "epoch": 0.3031391772980544, "grad_norm": 1.0053297070215237, "learning_rate": 4.969758998918689e-06, "loss": 0.4552, "step": 4978 }, { "epoch": 0.30320007307493224, "grad_norm": 1.0870505843231055, "learning_rate": 4.969746623783952e-06, "loss": 0.4596, "step": 4979 }, { "epoch": 0.30326096885181014, "grad_norm": 1.0396522444392808, "learning_rate": 4.96973424613309e-06, "loss": 0.4567, "step": 4980 }, { "epoch": 0.303321864628688, "grad_norm": 1.0052590566589814, "learning_rate": 4.969721865966114e-06, "loss": 0.4319, "step": 4981 }, { "epoch": 0.3033827604055659, "grad_norm": 0.9317154800671471, "learning_rate": 4.969709483283036e-06, "loss": 0.4798, "step": 4982 }, { "epoch": 0.30344365618244373, "grad_norm": 1.1220444538016756, "learning_rate": 4.969697098083869e-06, "loss": 0.4959, "step": 4983 }, { "epoch": 0.30350455195932163, "grad_norm": 1.0554496777567757, "learning_rate": 4.969684710368627e-06, "loss": 0.4815, "step": 4984 }, { "epoch": 0.3035654477361995, "grad_norm": 1.056794190099181, "learning_rate": 4.9696723201373206e-06, "loss": 0.4307, "step": 4985 }, { "epoch": 0.3036263435130774, "grad_norm": 1.1441655192539453, "learning_rate": 4.969659927389965e-06, "loss": 0.4838, "step": 4986 }, { "epoch": 0.3036872392899552, "grad_norm": 1.0537504840483045, "learning_rate": 4.9696475321265695e-06, "loss": 0.4837, "step": 4987 }, { "epoch": 0.3037481350668331, "grad_norm": 1.1890437172975241, "learning_rate": 4.969635134347149e-06, "loss": 0.5165, "step": 4988 }, { "epoch": 0.30380903084371097, "grad_norm": 1.1490539826670503, "learning_rate": 4.969622734051716e-06, "loss": 0.4706, "step": 4989 }, { "epoch": 0.30386992662058887, "grad_norm": 0.9563754614759905, "learning_rate": 4.969610331240282e-06, "loss": 0.4848, "step": 4990 }, { "epoch": 0.3039308223974667, "grad_norm": 1.0951977915389661, "learning_rate": 4.96959792591286e-06, "loss": 0.4497, "step": 4991 }, { "epoch": 0.3039917181743446, "grad_norm": 1.0589506197677945, "learning_rate": 4.969585518069464e-06, "loss": 0.4564, "step": 4992 }, { "epoch": 0.30405261395122246, "grad_norm": 1.0671811817128787, "learning_rate": 4.969573107710105e-06, "loss": 0.461, "step": 4993 }, { "epoch": 0.30411350972810036, "grad_norm": 1.1065230531020118, "learning_rate": 4.9695606948347965e-06, "loss": 0.447, "step": 4994 }, { "epoch": 0.3041744055049782, "grad_norm": 0.9726336567875957, "learning_rate": 4.969548279443551e-06, "loss": 0.5581, "step": 4995 }, { "epoch": 0.3042353012818561, "grad_norm": 1.0260527294191515, "learning_rate": 4.969535861536381e-06, "loss": 0.4652, "step": 4996 }, { "epoch": 0.30429619705873395, "grad_norm": 1.0108517557901224, "learning_rate": 4.969523441113299e-06, "loss": 0.4707, "step": 4997 }, { "epoch": 0.30435709283561185, "grad_norm": 1.0445585005639557, "learning_rate": 4.969511018174318e-06, "loss": 0.4344, "step": 4998 }, { "epoch": 0.3044179886124897, "grad_norm": 1.0873952703230902, "learning_rate": 4.969498592719451e-06, "loss": 0.5257, "step": 4999 }, { "epoch": 0.3044788843893676, "grad_norm": 1.1057028032311849, "learning_rate": 4.969486164748709e-06, "loss": 0.4188, "step": 5000 }, { "epoch": 0.30453978016624544, "grad_norm": 1.0201680373179371, "learning_rate": 4.969473734262107e-06, "loss": 0.5477, "step": 5001 }, { "epoch": 0.30460067594312334, "grad_norm": 1.1364511722538246, "learning_rate": 4.969461301259656e-06, "loss": 0.4366, "step": 5002 }, { "epoch": 0.30466157172000125, "grad_norm": 0.9807959708660955, "learning_rate": 4.96944886574137e-06, "loss": 0.4909, "step": 5003 }, { "epoch": 0.3047224674968791, "grad_norm": 1.1101296552951776, "learning_rate": 4.969436427707259e-06, "loss": 0.4691, "step": 5004 }, { "epoch": 0.304783363273757, "grad_norm": 0.9424665522338885, "learning_rate": 4.969423987157339e-06, "loss": 0.6356, "step": 5005 }, { "epoch": 0.30484425905063484, "grad_norm": 1.0166249668896945, "learning_rate": 4.969411544091621e-06, "loss": 0.5134, "step": 5006 }, { "epoch": 0.30490515482751274, "grad_norm": 1.1648069619512398, "learning_rate": 4.969399098510117e-06, "loss": 0.4889, "step": 5007 }, { "epoch": 0.3049660506043906, "grad_norm": 0.9554607090158274, "learning_rate": 4.969386650412842e-06, "loss": 0.5166, "step": 5008 }, { "epoch": 0.3050269463812685, "grad_norm": 1.1020627120608435, "learning_rate": 4.969374199799806e-06, "loss": 0.4572, "step": 5009 }, { "epoch": 0.3050878421581463, "grad_norm": 0.9868726583114441, "learning_rate": 4.969361746671023e-06, "loss": 0.4702, "step": 5010 }, { "epoch": 0.30514873793502423, "grad_norm": 0.9377812862188304, "learning_rate": 4.969349291026506e-06, "loss": 0.5252, "step": 5011 }, { "epoch": 0.3052096337119021, "grad_norm": 1.0536112229525696, "learning_rate": 4.969336832866267e-06, "loss": 0.5047, "step": 5012 }, { "epoch": 0.30527052948878, "grad_norm": 0.9655928116633052, "learning_rate": 4.96932437219032e-06, "loss": 0.5467, "step": 5013 }, { "epoch": 0.3053314252656578, "grad_norm": 1.070286254947455, "learning_rate": 4.969311908998675e-06, "loss": 0.4305, "step": 5014 }, { "epoch": 0.3053923210425357, "grad_norm": 1.0855605670040693, "learning_rate": 4.969299443291347e-06, "loss": 0.4247, "step": 5015 }, { "epoch": 0.30545321681941356, "grad_norm": 1.1361712417855092, "learning_rate": 4.969286975068348e-06, "loss": 0.4928, "step": 5016 }, { "epoch": 0.30551411259629146, "grad_norm": 0.9662548616404487, "learning_rate": 4.969274504329691e-06, "loss": 0.6052, "step": 5017 }, { "epoch": 0.3055750083731693, "grad_norm": 1.0053558559868432, "learning_rate": 4.969262031075389e-06, "loss": 0.4718, "step": 5018 }, { "epoch": 0.3056359041500472, "grad_norm": 1.2701794717639687, "learning_rate": 4.969249555305453e-06, "loss": 0.387, "step": 5019 }, { "epoch": 0.30569679992692506, "grad_norm": 1.061238983425306, "learning_rate": 4.969237077019898e-06, "loss": 0.5114, "step": 5020 }, { "epoch": 0.30575769570380296, "grad_norm": 1.069934828866935, "learning_rate": 4.969224596218735e-06, "loss": 0.508, "step": 5021 }, { "epoch": 0.3058185914806808, "grad_norm": 1.1212024661436197, "learning_rate": 4.969212112901978e-06, "loss": 0.4915, "step": 5022 }, { "epoch": 0.3058794872575587, "grad_norm": 1.0801032555903776, "learning_rate": 4.969199627069638e-06, "loss": 0.4946, "step": 5023 }, { "epoch": 0.30594038303443655, "grad_norm": 1.0810401536120458, "learning_rate": 4.969187138721729e-06, "loss": 0.4711, "step": 5024 }, { "epoch": 0.30600127881131445, "grad_norm": 0.9587972296622962, "learning_rate": 4.969174647858264e-06, "loss": 0.5004, "step": 5025 }, { "epoch": 0.3060621745881923, "grad_norm": 1.0443569915971467, "learning_rate": 4.969162154479254e-06, "loss": 0.4568, "step": 5026 }, { "epoch": 0.3061230703650702, "grad_norm": 1.015833541196289, "learning_rate": 4.969149658584715e-06, "loss": 0.4753, "step": 5027 }, { "epoch": 0.30618396614194804, "grad_norm": 1.0575446519982281, "learning_rate": 4.969137160174656e-06, "loss": 0.4726, "step": 5028 }, { "epoch": 0.30624486191882594, "grad_norm": 0.8879352768580389, "learning_rate": 4.969124659249092e-06, "loss": 0.5069, "step": 5029 }, { "epoch": 0.3063057576957038, "grad_norm": 1.014302063898416, "learning_rate": 4.969112155808035e-06, "loss": 0.4967, "step": 5030 }, { "epoch": 0.3063666534725817, "grad_norm": 0.940337816097191, "learning_rate": 4.969099649851498e-06, "loss": 0.5277, "step": 5031 }, { "epoch": 0.30642754924945953, "grad_norm": 0.9723108242904902, "learning_rate": 4.969087141379494e-06, "loss": 0.513, "step": 5032 }, { "epoch": 0.30648844502633743, "grad_norm": 0.9740742373183502, "learning_rate": 4.969074630392035e-06, "loss": 0.4904, "step": 5033 }, { "epoch": 0.3065493408032153, "grad_norm": 1.0704686597381328, "learning_rate": 4.969062116889134e-06, "loss": 0.4263, "step": 5034 }, { "epoch": 0.3066102365800932, "grad_norm": 0.8754239257694846, "learning_rate": 4.969049600870803e-06, "loss": 0.5116, "step": 5035 }, { "epoch": 0.306671132356971, "grad_norm": 0.971147412670188, "learning_rate": 4.969037082337057e-06, "loss": 0.5095, "step": 5036 }, { "epoch": 0.3067320281338489, "grad_norm": 1.0838238313668744, "learning_rate": 4.969024561287906e-06, "loss": 0.5192, "step": 5037 }, { "epoch": 0.30679292391072677, "grad_norm": 1.11567003505524, "learning_rate": 4.969012037723365e-06, "loss": 0.47, "step": 5038 }, { "epoch": 0.30685381968760467, "grad_norm": 0.9959563481863839, "learning_rate": 4.968999511643447e-06, "loss": 0.4451, "step": 5039 }, { "epoch": 0.3069147154644825, "grad_norm": 1.0542652420655783, "learning_rate": 4.9689869830481615e-06, "loss": 0.4868, "step": 5040 }, { "epoch": 0.3069756112413604, "grad_norm": 1.0530860007088592, "learning_rate": 4.968974451937524e-06, "loss": 0.4623, "step": 5041 }, { "epoch": 0.30703650701823826, "grad_norm": 1.074190756999078, "learning_rate": 4.9689619183115475e-06, "loss": 0.5144, "step": 5042 }, { "epoch": 0.30709740279511616, "grad_norm": 1.050353344463161, "learning_rate": 4.968949382170243e-06, "loss": 0.5338, "step": 5043 }, { "epoch": 0.30715829857199406, "grad_norm": 1.0333597542937802, "learning_rate": 4.968936843513625e-06, "loss": 0.461, "step": 5044 }, { "epoch": 0.3072191943488719, "grad_norm": 0.980100562457861, "learning_rate": 4.968924302341705e-06, "loss": 0.4419, "step": 5045 }, { "epoch": 0.3072800901257498, "grad_norm": 0.9717956113468027, "learning_rate": 4.968911758654497e-06, "loss": 0.5354, "step": 5046 }, { "epoch": 0.30734098590262765, "grad_norm": 1.127850226045452, "learning_rate": 4.968899212452012e-06, "loss": 0.4106, "step": 5047 }, { "epoch": 0.30740188167950555, "grad_norm": 1.0591115216972031, "learning_rate": 4.968886663734264e-06, "loss": 0.4375, "step": 5048 }, { "epoch": 0.3074627774563834, "grad_norm": 1.027267476965639, "learning_rate": 4.968874112501265e-06, "loss": 0.5204, "step": 5049 }, { "epoch": 0.3075236732332613, "grad_norm": 1.0714324761912237, "learning_rate": 4.96886155875303e-06, "loss": 0.5085, "step": 5050 }, { "epoch": 0.30758456901013914, "grad_norm": 0.9752043858718045, "learning_rate": 4.968849002489568e-06, "loss": 0.5429, "step": 5051 }, { "epoch": 0.30764546478701704, "grad_norm": 1.137806290175147, "learning_rate": 4.968836443710896e-06, "loss": 0.516, "step": 5052 }, { "epoch": 0.3077063605638949, "grad_norm": 0.974064054812871, "learning_rate": 4.968823882417025e-06, "loss": 0.4885, "step": 5053 }, { "epoch": 0.3077672563407728, "grad_norm": 1.0232338716419167, "learning_rate": 4.968811318607966e-06, "loss": 0.509, "step": 5054 }, { "epoch": 0.30782815211765063, "grad_norm": 1.0083135216252797, "learning_rate": 4.9687987522837335e-06, "loss": 0.4285, "step": 5055 }, { "epoch": 0.30788904789452853, "grad_norm": 1.0845860705975552, "learning_rate": 4.968786183444341e-06, "loss": 0.4052, "step": 5056 }, { "epoch": 0.3079499436714064, "grad_norm": 1.0534882327030928, "learning_rate": 4.9687736120898e-06, "loss": 0.4867, "step": 5057 }, { "epoch": 0.3080108394482843, "grad_norm": 1.0667864091272876, "learning_rate": 4.968761038220124e-06, "loss": 0.5373, "step": 5058 }, { "epoch": 0.3080717352251621, "grad_norm": 1.0367658192750488, "learning_rate": 4.968748461835325e-06, "loss": 0.4686, "step": 5059 }, { "epoch": 0.30813263100204, "grad_norm": 1.0541846078424237, "learning_rate": 4.968735882935417e-06, "loss": 0.4511, "step": 5060 }, { "epoch": 0.30819352677891787, "grad_norm": 0.9888586037578275, "learning_rate": 4.968723301520412e-06, "loss": 0.4806, "step": 5061 }, { "epoch": 0.30825442255579577, "grad_norm": 1.1114018732140467, "learning_rate": 4.968710717590323e-06, "loss": 0.489, "step": 5062 }, { "epoch": 0.3083153183326736, "grad_norm": 1.0598257964304432, "learning_rate": 4.9686981311451625e-06, "loss": 0.4597, "step": 5063 }, { "epoch": 0.3083762141095515, "grad_norm": 1.0213098930937954, "learning_rate": 4.968685542184944e-06, "loss": 0.4477, "step": 5064 }, { "epoch": 0.30843710988642936, "grad_norm": 0.9507471240203729, "learning_rate": 4.9686729507096805e-06, "loss": 0.4955, "step": 5065 }, { "epoch": 0.30849800566330726, "grad_norm": 1.0526737689602832, "learning_rate": 4.9686603567193835e-06, "loss": 0.5024, "step": 5066 }, { "epoch": 0.3085589014401851, "grad_norm": 1.0513545801709065, "learning_rate": 4.968647760214067e-06, "loss": 0.4418, "step": 5067 }, { "epoch": 0.308619797217063, "grad_norm": 1.0477543196529404, "learning_rate": 4.968635161193744e-06, "loss": 0.5105, "step": 5068 }, { "epoch": 0.30868069299394085, "grad_norm": 1.0050112654618055, "learning_rate": 4.968622559658426e-06, "loss": 0.4538, "step": 5069 }, { "epoch": 0.30874158877081875, "grad_norm": 1.012554264613682, "learning_rate": 4.968609955608127e-06, "loss": 0.4636, "step": 5070 }, { "epoch": 0.3088024845476966, "grad_norm": 0.9837771697737474, "learning_rate": 4.96859734904286e-06, "loss": 0.5029, "step": 5071 }, { "epoch": 0.3088633803245745, "grad_norm": 1.0452234768330588, "learning_rate": 4.968584739962636e-06, "loss": 0.4956, "step": 5072 }, { "epoch": 0.30892427610145234, "grad_norm": 1.039993185266732, "learning_rate": 4.968572128367471e-06, "loss": 0.4769, "step": 5073 }, { "epoch": 0.30898517187833024, "grad_norm": 1.0826373627090515, "learning_rate": 4.968559514257375e-06, "loss": 0.5284, "step": 5074 }, { "epoch": 0.3090460676552081, "grad_norm": 1.1588836476979387, "learning_rate": 4.968546897632361e-06, "loss": 0.4498, "step": 5075 }, { "epoch": 0.309106963432086, "grad_norm": 1.000615609684844, "learning_rate": 4.968534278492444e-06, "loss": 0.5167, "step": 5076 }, { "epoch": 0.30916785920896384, "grad_norm": 1.0134349593726322, "learning_rate": 4.968521656837636e-06, "loss": 0.509, "step": 5077 }, { "epoch": 0.30922875498584174, "grad_norm": 1.0398419098266587, "learning_rate": 4.968509032667948e-06, "loss": 0.443, "step": 5078 }, { "epoch": 0.3092896507627196, "grad_norm": 0.9936108879203397, "learning_rate": 4.9684964059833954e-06, "loss": 0.558, "step": 5079 }, { "epoch": 0.3093505465395975, "grad_norm": 1.020787624313198, "learning_rate": 4.9684837767839895e-06, "loss": 0.5189, "step": 5080 }, { "epoch": 0.3094114423164753, "grad_norm": 0.9700081666130015, "learning_rate": 4.968471145069744e-06, "loss": 0.5096, "step": 5081 }, { "epoch": 0.3094723380933532, "grad_norm": 0.9979033864017931, "learning_rate": 4.968458510840671e-06, "loss": 0.4847, "step": 5082 }, { "epoch": 0.3095332338702311, "grad_norm": 0.9726461193713638, "learning_rate": 4.968445874096784e-06, "loss": 0.4918, "step": 5083 }, { "epoch": 0.309594129647109, "grad_norm": 1.0153879660121452, "learning_rate": 4.968433234838096e-06, "loss": 0.4582, "step": 5084 }, { "epoch": 0.3096550254239869, "grad_norm": 0.9955649452741717, "learning_rate": 4.968420593064619e-06, "loss": 0.4572, "step": 5085 }, { "epoch": 0.3097159212008647, "grad_norm": 0.9812778680233103, "learning_rate": 4.968407948776367e-06, "loss": 0.4824, "step": 5086 }, { "epoch": 0.3097768169777426, "grad_norm": 1.1208105316877057, "learning_rate": 4.968395301973351e-06, "loss": 0.4982, "step": 5087 }, { "epoch": 0.30983771275462046, "grad_norm": 0.9699231743767815, "learning_rate": 4.9683826526555865e-06, "loss": 0.5065, "step": 5088 }, { "epoch": 0.30989860853149837, "grad_norm": 1.0573450582967472, "learning_rate": 4.968370000823085e-06, "loss": 0.4841, "step": 5089 }, { "epoch": 0.3099595043083762, "grad_norm": 1.0157389475456855, "learning_rate": 4.968357346475859e-06, "loss": 0.4668, "step": 5090 }, { "epoch": 0.3100204000852541, "grad_norm": 1.1064073390383085, "learning_rate": 4.968344689613922e-06, "loss": 0.4344, "step": 5091 }, { "epoch": 0.31008129586213196, "grad_norm": 1.1153826298547915, "learning_rate": 4.968332030237287e-06, "loss": 0.4326, "step": 5092 }, { "epoch": 0.31014219163900986, "grad_norm": 1.0178356409543432, "learning_rate": 4.968319368345967e-06, "loss": 0.4453, "step": 5093 }, { "epoch": 0.3102030874158877, "grad_norm": 0.9964832654763887, "learning_rate": 4.9683067039399734e-06, "loss": 0.5119, "step": 5094 }, { "epoch": 0.3102639831927656, "grad_norm": 1.122896887707491, "learning_rate": 4.968294037019321e-06, "loss": 0.5246, "step": 5095 }, { "epoch": 0.31032487896964345, "grad_norm": 1.1399021266120526, "learning_rate": 4.968281367584021e-06, "loss": 0.4137, "step": 5096 }, { "epoch": 0.31038577474652135, "grad_norm": 1.031661005012351, "learning_rate": 4.968268695634089e-06, "loss": 0.4745, "step": 5097 }, { "epoch": 0.3104466705233992, "grad_norm": 1.0158564539496315, "learning_rate": 4.9682560211695345e-06, "loss": 0.4997, "step": 5098 }, { "epoch": 0.3105075663002771, "grad_norm": 0.9593264633507248, "learning_rate": 4.968243344190373e-06, "loss": 0.4777, "step": 5099 }, { "epoch": 0.31056846207715494, "grad_norm": 1.0480055508162904, "learning_rate": 4.968230664696616e-06, "loss": 0.4946, "step": 5100 }, { "epoch": 0.31062935785403284, "grad_norm": 1.032845949627706, "learning_rate": 4.968217982688277e-06, "loss": 0.5317, "step": 5101 }, { "epoch": 0.3106902536309107, "grad_norm": 1.133953215255313, "learning_rate": 4.968205298165369e-06, "loss": 0.5025, "step": 5102 }, { "epoch": 0.3107511494077886, "grad_norm": 1.0218764623507441, "learning_rate": 4.968192611127905e-06, "loss": 0.4897, "step": 5103 }, { "epoch": 0.31081204518466643, "grad_norm": 1.1106721040050749, "learning_rate": 4.968179921575897e-06, "loss": 0.4719, "step": 5104 }, { "epoch": 0.31087294096154433, "grad_norm": 1.0185108763173394, "learning_rate": 4.96816722950936e-06, "loss": 0.4818, "step": 5105 }, { "epoch": 0.3109338367384222, "grad_norm": 0.9494301165279092, "learning_rate": 4.968154534928305e-06, "loss": 0.5536, "step": 5106 }, { "epoch": 0.3109947325153001, "grad_norm": 1.0454517746604666, "learning_rate": 4.968141837832744e-06, "loss": 0.417, "step": 5107 }, { "epoch": 0.3110556282921779, "grad_norm": 1.0458485000057145, "learning_rate": 4.968129138222693e-06, "loss": 0.5538, "step": 5108 }, { "epoch": 0.3111165240690558, "grad_norm": 0.9817175401241943, "learning_rate": 4.9681164360981625e-06, "loss": 0.506, "step": 5109 }, { "epoch": 0.31117741984593367, "grad_norm": 1.064079450782691, "learning_rate": 4.968103731459166e-06, "loss": 0.4019, "step": 5110 }, { "epoch": 0.31123831562281157, "grad_norm": 0.9733204030831321, "learning_rate": 4.968091024305718e-06, "loss": 0.5133, "step": 5111 }, { "epoch": 0.3112992113996894, "grad_norm": 1.148137120243835, "learning_rate": 4.96807831463783e-06, "loss": 0.4694, "step": 5112 }, { "epoch": 0.3113601071765673, "grad_norm": 1.0348870612271959, "learning_rate": 4.968065602455514e-06, "loss": 0.4992, "step": 5113 }, { "epoch": 0.31142100295344516, "grad_norm": 1.063855979632825, "learning_rate": 4.9680528877587855e-06, "loss": 0.4604, "step": 5114 }, { "epoch": 0.31148189873032306, "grad_norm": 1.1151070492788793, "learning_rate": 4.968040170547655e-06, "loss": 0.4178, "step": 5115 }, { "epoch": 0.3115427945072009, "grad_norm": 1.034407546536507, "learning_rate": 4.968027450822136e-06, "loss": 0.5001, "step": 5116 }, { "epoch": 0.3116036902840788, "grad_norm": 1.053614645029064, "learning_rate": 4.9680147285822434e-06, "loss": 0.4987, "step": 5117 }, { "epoch": 0.31166458606095665, "grad_norm": 1.1884723976853817, "learning_rate": 4.968002003827988e-06, "loss": 0.4855, "step": 5118 }, { "epoch": 0.31172548183783455, "grad_norm": 1.0712409407223522, "learning_rate": 4.967989276559383e-06, "loss": 0.4187, "step": 5119 }, { "epoch": 0.3117863776147124, "grad_norm": 1.0925613597560053, "learning_rate": 4.967976546776442e-06, "loss": 0.4286, "step": 5120 }, { "epoch": 0.3118472733915903, "grad_norm": 1.1249124687109406, "learning_rate": 4.967963814479178e-06, "loss": 0.5909, "step": 5121 }, { "epoch": 0.31190816916846814, "grad_norm": 1.0960844276090487, "learning_rate": 4.967951079667604e-06, "loss": 0.4515, "step": 5122 }, { "epoch": 0.31196906494534604, "grad_norm": 1.066627781438501, "learning_rate": 4.967938342341734e-06, "loss": 0.4454, "step": 5123 }, { "epoch": 0.3120299607222239, "grad_norm": 1.0523619315652433, "learning_rate": 4.967925602501578e-06, "loss": 0.479, "step": 5124 }, { "epoch": 0.3120908564991018, "grad_norm": 0.9827881358594166, "learning_rate": 4.967912860147151e-06, "loss": 0.5163, "step": 5125 }, { "epoch": 0.3121517522759797, "grad_norm": 1.0217208418963493, "learning_rate": 4.9679001152784655e-06, "loss": 0.5244, "step": 5126 }, { "epoch": 0.31221264805285753, "grad_norm": 1.0079779412882612, "learning_rate": 4.967887367895535e-06, "loss": 0.483, "step": 5127 }, { "epoch": 0.31227354382973543, "grad_norm": 1.0092697160057587, "learning_rate": 4.9678746179983715e-06, "loss": 0.453, "step": 5128 }, { "epoch": 0.3123344396066133, "grad_norm": 1.0610488532758704, "learning_rate": 4.967861865586989e-06, "loss": 0.5073, "step": 5129 }, { "epoch": 0.3123953353834912, "grad_norm": 1.0575312906545655, "learning_rate": 4.967849110661401e-06, "loss": 0.4659, "step": 5130 }, { "epoch": 0.312456231160369, "grad_norm": 1.004383684049922, "learning_rate": 4.967836353221619e-06, "loss": 0.4958, "step": 5131 }, { "epoch": 0.3125171269372469, "grad_norm": 0.9821399252078046, "learning_rate": 4.967823593267657e-06, "loss": 0.4943, "step": 5132 }, { "epoch": 0.31257802271412477, "grad_norm": 1.032152585769433, "learning_rate": 4.967810830799527e-06, "loss": 0.4551, "step": 5133 }, { "epoch": 0.31263891849100267, "grad_norm": 0.9960900980174974, "learning_rate": 4.967798065817243e-06, "loss": 0.5007, "step": 5134 }, { "epoch": 0.3126998142678805, "grad_norm": 1.0701554295589961, "learning_rate": 4.967785298320817e-06, "loss": 0.4512, "step": 5135 }, { "epoch": 0.3127607100447584, "grad_norm": 1.0544001297742749, "learning_rate": 4.9677725283102635e-06, "loss": 0.4735, "step": 5136 }, { "epoch": 0.31282160582163626, "grad_norm": 1.021011333390711, "learning_rate": 4.967759755785594e-06, "loss": 0.5076, "step": 5137 }, { "epoch": 0.31288250159851416, "grad_norm": 1.039385029928438, "learning_rate": 4.967746980746823e-06, "loss": 0.4372, "step": 5138 }, { "epoch": 0.312943397375392, "grad_norm": 0.975909786746416, "learning_rate": 4.9677342031939625e-06, "loss": 0.5526, "step": 5139 }, { "epoch": 0.3130042931522699, "grad_norm": 0.9753685698177517, "learning_rate": 4.967721423127025e-06, "loss": 0.5066, "step": 5140 }, { "epoch": 0.31306518892914775, "grad_norm": 1.002098027327078, "learning_rate": 4.9677086405460244e-06, "loss": 0.4998, "step": 5141 }, { "epoch": 0.31312608470602565, "grad_norm": 1.0592575398938675, "learning_rate": 4.967695855450974e-06, "loss": 0.4155, "step": 5142 }, { "epoch": 0.3131869804829035, "grad_norm": 1.0743444798906776, "learning_rate": 4.967683067841887e-06, "loss": 0.422, "step": 5143 }, { "epoch": 0.3132478762597814, "grad_norm": 0.9861369460124829, "learning_rate": 4.967670277718774e-06, "loss": 0.4857, "step": 5144 }, { "epoch": 0.31330877203665924, "grad_norm": 1.0726165682837618, "learning_rate": 4.96765748508165e-06, "loss": 0.4527, "step": 5145 }, { "epoch": 0.31336966781353714, "grad_norm": 1.0947909078801936, "learning_rate": 4.9676446899305295e-06, "loss": 0.4085, "step": 5146 }, { "epoch": 0.313430563590415, "grad_norm": 1.0179623226618648, "learning_rate": 4.9676318922654234e-06, "loss": 0.4754, "step": 5147 }, { "epoch": 0.3134914593672929, "grad_norm": 1.0838382599504857, "learning_rate": 4.967619092086344e-06, "loss": 0.4496, "step": 5148 }, { "epoch": 0.31355235514417074, "grad_norm": 1.120816199417892, "learning_rate": 4.967606289393306e-06, "loss": 0.4312, "step": 5149 }, { "epoch": 0.31361325092104864, "grad_norm": 1.0244105066156357, "learning_rate": 4.967593484186323e-06, "loss": 0.3969, "step": 5150 }, { "epoch": 0.3136741466979265, "grad_norm": 1.0787757718424287, "learning_rate": 4.967580676465407e-06, "loss": 0.5213, "step": 5151 }, { "epoch": 0.3137350424748044, "grad_norm": 1.0717575960639651, "learning_rate": 4.967567866230571e-06, "loss": 0.516, "step": 5152 }, { "epoch": 0.3137959382516822, "grad_norm": 1.0765708026960146, "learning_rate": 4.967555053481827e-06, "loss": 0.4871, "step": 5153 }, { "epoch": 0.3138568340285601, "grad_norm": 0.9747025637472024, "learning_rate": 4.96754223821919e-06, "loss": 0.5041, "step": 5154 }, { "epoch": 0.313917729805438, "grad_norm": 1.0232788887105977, "learning_rate": 4.967529420442672e-06, "loss": 0.4866, "step": 5155 }, { "epoch": 0.3139786255823159, "grad_norm": 1.0560063321253057, "learning_rate": 4.967516600152287e-06, "loss": 0.4731, "step": 5156 }, { "epoch": 0.3140395213591937, "grad_norm": 1.1460914100110102, "learning_rate": 4.967503777348046e-06, "loss": 0.5381, "step": 5157 }, { "epoch": 0.3141004171360716, "grad_norm": 0.9913844222994829, "learning_rate": 4.967490952029965e-06, "loss": 0.5296, "step": 5158 }, { "epoch": 0.31416131291294946, "grad_norm": 1.0245303722363153, "learning_rate": 4.967478124198054e-06, "loss": 0.5088, "step": 5159 }, { "epoch": 0.31422220868982736, "grad_norm": 1.0087074505602667, "learning_rate": 4.967465293852329e-06, "loss": 0.5282, "step": 5160 }, { "epoch": 0.3142831044667052, "grad_norm": 1.10107940406176, "learning_rate": 4.967452460992802e-06, "loss": 0.4259, "step": 5161 }, { "epoch": 0.3143440002435831, "grad_norm": 1.0154212951906314, "learning_rate": 4.9674396256194845e-06, "loss": 0.4858, "step": 5162 }, { "epoch": 0.31440489602046096, "grad_norm": 1.1509542254432117, "learning_rate": 4.967426787732391e-06, "loss": 0.423, "step": 5163 }, { "epoch": 0.31446579179733886, "grad_norm": 1.0771217347106425, "learning_rate": 4.967413947331535e-06, "loss": 0.4695, "step": 5164 }, { "epoch": 0.3145266875742167, "grad_norm": 1.060254826777119, "learning_rate": 4.967401104416928e-06, "loss": 0.4748, "step": 5165 }, { "epoch": 0.3145875833510946, "grad_norm": 1.0237747775346377, "learning_rate": 4.9673882589885845e-06, "loss": 0.5199, "step": 5166 }, { "epoch": 0.3146484791279725, "grad_norm": 0.9954987116537547, "learning_rate": 4.967375411046518e-06, "loss": 0.4783, "step": 5167 }, { "epoch": 0.31470937490485035, "grad_norm": 1.0664643953101764, "learning_rate": 4.967362560590739e-06, "loss": 0.4752, "step": 5168 }, { "epoch": 0.31477027068172825, "grad_norm": 0.9759886291301523, "learning_rate": 4.967349707621264e-06, "loss": 0.4868, "step": 5169 }, { "epoch": 0.3148311664586061, "grad_norm": 1.0932100184401297, "learning_rate": 4.967336852138104e-06, "loss": 0.4455, "step": 5170 }, { "epoch": 0.314892062235484, "grad_norm": 1.0494460756915363, "learning_rate": 4.967323994141272e-06, "loss": 0.4364, "step": 5171 }, { "epoch": 0.31495295801236184, "grad_norm": 0.9736215824638996, "learning_rate": 4.9673111336307815e-06, "loss": 0.4842, "step": 5172 }, { "epoch": 0.31501385378923974, "grad_norm": 1.0507166522675302, "learning_rate": 4.967298270606646e-06, "loss": 0.5474, "step": 5173 }, { "epoch": 0.3150747495661176, "grad_norm": 0.9590860179643425, "learning_rate": 4.967285405068878e-06, "loss": 0.481, "step": 5174 }, { "epoch": 0.3151356453429955, "grad_norm": 1.0849201526111079, "learning_rate": 4.967272537017492e-06, "loss": 0.5294, "step": 5175 }, { "epoch": 0.31519654111987333, "grad_norm": 1.0213313137864841, "learning_rate": 4.967259666452499e-06, "loss": 0.4842, "step": 5176 }, { "epoch": 0.31525743689675123, "grad_norm": 1.0674407979718614, "learning_rate": 4.967246793373914e-06, "loss": 0.4931, "step": 5177 }, { "epoch": 0.3153183326736291, "grad_norm": 1.025745581144172, "learning_rate": 4.9672339177817485e-06, "loss": 0.4831, "step": 5178 }, { "epoch": 0.315379228450507, "grad_norm": 1.0603844106779188, "learning_rate": 4.967221039676016e-06, "loss": 0.4979, "step": 5179 }, { "epoch": 0.3154401242273848, "grad_norm": 0.9233946628773994, "learning_rate": 4.967208159056731e-06, "loss": 0.4885, "step": 5180 }, { "epoch": 0.3155010200042627, "grad_norm": 1.0014458175507566, "learning_rate": 4.967195275923905e-06, "loss": 0.5563, "step": 5181 }, { "epoch": 0.31556191578114057, "grad_norm": 0.9964051295852098, "learning_rate": 4.967182390277553e-06, "loss": 0.4737, "step": 5182 }, { "epoch": 0.31562281155801847, "grad_norm": 1.0367698456229089, "learning_rate": 4.967169502117685e-06, "loss": 0.4109, "step": 5183 }, { "epoch": 0.3156837073348963, "grad_norm": 0.9961950497326734, "learning_rate": 4.967156611444317e-06, "loss": 0.4956, "step": 5184 }, { "epoch": 0.3157446031117742, "grad_norm": 1.0139530269654236, "learning_rate": 4.967143718257461e-06, "loss": 0.464, "step": 5185 }, { "epoch": 0.31580549888865206, "grad_norm": 1.0147739040637103, "learning_rate": 4.9671308225571305e-06, "loss": 0.4849, "step": 5186 }, { "epoch": 0.31586639466552996, "grad_norm": 1.057885802561059, "learning_rate": 4.9671179243433385e-06, "loss": 0.4254, "step": 5187 }, { "epoch": 0.3159272904424078, "grad_norm": 1.013440651821097, "learning_rate": 4.967105023616097e-06, "loss": 0.4657, "step": 5188 }, { "epoch": 0.3159881862192857, "grad_norm": 1.1951178228896153, "learning_rate": 4.967092120375422e-06, "loss": 0.4406, "step": 5189 }, { "epoch": 0.31604908199616355, "grad_norm": 1.016042717519446, "learning_rate": 4.967079214621323e-06, "loss": 0.4075, "step": 5190 }, { "epoch": 0.31610997777304145, "grad_norm": 1.0347652402172578, "learning_rate": 4.967066306353816e-06, "loss": 0.4536, "step": 5191 }, { "epoch": 0.3161708735499193, "grad_norm": 1.0853364831517276, "learning_rate": 4.967053395572913e-06, "loss": 0.4254, "step": 5192 }, { "epoch": 0.3162317693267972, "grad_norm": 1.1021755350750566, "learning_rate": 4.9670404822786266e-06, "loss": 0.4185, "step": 5193 }, { "epoch": 0.31629266510367504, "grad_norm": 1.0347735606678234, "learning_rate": 4.967027566470972e-06, "loss": 0.5126, "step": 5194 }, { "epoch": 0.31635356088055294, "grad_norm": 1.073878364720785, "learning_rate": 4.96701464814996e-06, "loss": 0.4959, "step": 5195 }, { "epoch": 0.3164144566574308, "grad_norm": 0.9737813411200141, "learning_rate": 4.9670017273156045e-06, "loss": 0.4686, "step": 5196 }, { "epoch": 0.3164753524343087, "grad_norm": 1.0282399951065826, "learning_rate": 4.96698880396792e-06, "loss": 0.5368, "step": 5197 }, { "epoch": 0.31653624821118653, "grad_norm": 0.993455119842797, "learning_rate": 4.966975878106918e-06, "loss": 0.516, "step": 5198 }, { "epoch": 0.31659714398806443, "grad_norm": 1.0767053677612632, "learning_rate": 4.9669629497326126e-06, "loss": 0.4495, "step": 5199 }, { "epoch": 0.3166580397649423, "grad_norm": 1.1250318652652074, "learning_rate": 4.966950018845016e-06, "loss": 0.5841, "step": 5200 }, { "epoch": 0.3167189355418202, "grad_norm": 1.0580688737587123, "learning_rate": 4.966937085444142e-06, "loss": 0.5384, "step": 5201 }, { "epoch": 0.316779831318698, "grad_norm": 1.0396226465300666, "learning_rate": 4.966924149530005e-06, "loss": 0.4715, "step": 5202 }, { "epoch": 0.3168407270955759, "grad_norm": 1.0241968073086807, "learning_rate": 4.9669112111026154e-06, "loss": 0.5179, "step": 5203 }, { "epoch": 0.31690162287245377, "grad_norm": 0.9896675493427827, "learning_rate": 4.966898270161988e-06, "loss": 0.4685, "step": 5204 }, { "epoch": 0.31696251864933167, "grad_norm": 0.9761343316758749, "learning_rate": 4.9668853267081375e-06, "loss": 0.4637, "step": 5205 }, { "epoch": 0.3170234144262095, "grad_norm": 1.0495267309248282, "learning_rate": 4.966872380741074e-06, "loss": 0.482, "step": 5206 }, { "epoch": 0.3170843102030874, "grad_norm": 1.0766868032854158, "learning_rate": 4.966859432260813e-06, "loss": 0.42, "step": 5207 }, { "epoch": 0.3171452059799653, "grad_norm": 1.0263533337914674, "learning_rate": 4.966846481267367e-06, "loss": 0.5597, "step": 5208 }, { "epoch": 0.31720610175684316, "grad_norm": 1.031027424343123, "learning_rate": 4.966833527760749e-06, "loss": 0.4957, "step": 5209 }, { "epoch": 0.31726699753372106, "grad_norm": 1.0356210009409395, "learning_rate": 4.9668205717409715e-06, "loss": 0.5433, "step": 5210 }, { "epoch": 0.3173278933105989, "grad_norm": 1.002360090822641, "learning_rate": 4.966807613208049e-06, "loss": 0.4447, "step": 5211 }, { "epoch": 0.3173887890874768, "grad_norm": 0.9490554765320061, "learning_rate": 4.966794652161995e-06, "loss": 0.5111, "step": 5212 }, { "epoch": 0.31744968486435465, "grad_norm": 0.9913620172122898, "learning_rate": 4.966781688602821e-06, "loss": 0.494, "step": 5213 }, { "epoch": 0.31751058064123255, "grad_norm": 1.1134517565072355, "learning_rate": 4.966768722530541e-06, "loss": 0.458, "step": 5214 }, { "epoch": 0.3175714764181104, "grad_norm": 1.128614289283085, "learning_rate": 4.966755753945168e-06, "loss": 0.527, "step": 5215 }, { "epoch": 0.3176323721949883, "grad_norm": 1.0461813842968541, "learning_rate": 4.966742782846717e-06, "loss": 0.5075, "step": 5216 }, { "epoch": 0.31769326797186614, "grad_norm": 0.9111136105332313, "learning_rate": 4.966729809235199e-06, "loss": 0.5192, "step": 5217 }, { "epoch": 0.31775416374874405, "grad_norm": 1.0140206842036108, "learning_rate": 4.966716833110627e-06, "loss": 0.5137, "step": 5218 }, { "epoch": 0.3178150595256219, "grad_norm": 1.1004716199105786, "learning_rate": 4.966703854473016e-06, "loss": 0.4183, "step": 5219 }, { "epoch": 0.3178759553024998, "grad_norm": 1.0680326300216205, "learning_rate": 4.966690873322379e-06, "loss": 0.52, "step": 5220 }, { "epoch": 0.31793685107937764, "grad_norm": 1.1294550521149913, "learning_rate": 4.966677889658727e-06, "loss": 0.4894, "step": 5221 }, { "epoch": 0.31799774685625554, "grad_norm": 0.9989332897459482, "learning_rate": 4.966664903482077e-06, "loss": 0.4466, "step": 5222 }, { "epoch": 0.3180586426331334, "grad_norm": 1.0663824450711468, "learning_rate": 4.966651914792438e-06, "loss": 0.4638, "step": 5223 }, { "epoch": 0.3181195384100113, "grad_norm": 0.9978886955754611, "learning_rate": 4.966638923589826e-06, "loss": 0.5356, "step": 5224 }, { "epoch": 0.3181804341868891, "grad_norm": 0.9647445514580674, "learning_rate": 4.966625929874254e-06, "loss": 0.501, "step": 5225 }, { "epoch": 0.31824132996376703, "grad_norm": 1.1570338910985234, "learning_rate": 4.966612933645735e-06, "loss": 0.4763, "step": 5226 }, { "epoch": 0.3183022257406449, "grad_norm": 1.029038009408433, "learning_rate": 4.966599934904281e-06, "loss": 0.4855, "step": 5227 }, { "epoch": 0.3183631215175228, "grad_norm": 1.1025559631038306, "learning_rate": 4.966586933649907e-06, "loss": 0.4692, "step": 5228 }, { "epoch": 0.3184240172944006, "grad_norm": 1.0797213652909536, "learning_rate": 4.966573929882625e-06, "loss": 0.5103, "step": 5229 }, { "epoch": 0.3184849130712785, "grad_norm": 1.1191169021320893, "learning_rate": 4.96656092360245e-06, "loss": 0.4976, "step": 5230 }, { "epoch": 0.31854580884815636, "grad_norm": 0.9840511039611001, "learning_rate": 4.966547914809393e-06, "loss": 0.5079, "step": 5231 }, { "epoch": 0.31860670462503426, "grad_norm": 0.9762653856323117, "learning_rate": 4.966534903503469e-06, "loss": 0.482, "step": 5232 }, { "epoch": 0.3186676004019121, "grad_norm": 1.0087535788662907, "learning_rate": 4.96652188968469e-06, "loss": 0.4537, "step": 5233 }, { "epoch": 0.31872849617879, "grad_norm": 1.0025825450585968, "learning_rate": 4.966508873353069e-06, "loss": 0.4183, "step": 5234 }, { "epoch": 0.31878939195566786, "grad_norm": 0.9670296322272832, "learning_rate": 4.966495854508621e-06, "loss": 0.5255, "step": 5235 }, { "epoch": 0.31885028773254576, "grad_norm": 1.0929291076365029, "learning_rate": 4.9664828331513585e-06, "loss": 0.4897, "step": 5236 }, { "epoch": 0.3189111835094236, "grad_norm": 1.0674021647660434, "learning_rate": 4.966469809281294e-06, "loss": 0.4615, "step": 5237 }, { "epoch": 0.3189720792863015, "grad_norm": 1.1161719536886323, "learning_rate": 4.966456782898441e-06, "loss": 0.4645, "step": 5238 }, { "epoch": 0.31903297506317935, "grad_norm": 1.009593753939317, "learning_rate": 4.9664437540028135e-06, "loss": 0.5382, "step": 5239 }, { "epoch": 0.31909387084005725, "grad_norm": 1.025123557208146, "learning_rate": 4.966430722594424e-06, "loss": 0.4427, "step": 5240 }, { "epoch": 0.3191547666169351, "grad_norm": 1.143765240046962, "learning_rate": 4.966417688673287e-06, "loss": 0.5381, "step": 5241 }, { "epoch": 0.319215662393813, "grad_norm": 1.0561533678595723, "learning_rate": 4.966404652239415e-06, "loss": 0.5628, "step": 5242 }, { "epoch": 0.31927655817069084, "grad_norm": 1.025167043313535, "learning_rate": 4.9663916132928205e-06, "loss": 0.4419, "step": 5243 }, { "epoch": 0.31933745394756874, "grad_norm": 1.0549666885150353, "learning_rate": 4.9663785718335176e-06, "loss": 0.4255, "step": 5244 }, { "epoch": 0.3193983497244466, "grad_norm": 1.0856236094064156, "learning_rate": 4.966365527861519e-06, "loss": 0.439, "step": 5245 }, { "epoch": 0.3194592455013245, "grad_norm": 1.15186293614338, "learning_rate": 4.96635248137684e-06, "loss": 0.5097, "step": 5246 }, { "epoch": 0.31952014127820233, "grad_norm": 1.0176238180586639, "learning_rate": 4.966339432379491e-06, "loss": 0.4666, "step": 5247 }, { "epoch": 0.31958103705508023, "grad_norm": 1.1417038735488148, "learning_rate": 4.9663263808694876e-06, "loss": 0.4531, "step": 5248 }, { "epoch": 0.31964193283195813, "grad_norm": 1.0007650137426092, "learning_rate": 4.966313326846842e-06, "loss": 0.4612, "step": 5249 }, { "epoch": 0.319702828608836, "grad_norm": 0.9898036601313632, "learning_rate": 4.966300270311567e-06, "loss": 0.4463, "step": 5250 }, { "epoch": 0.3197637243857139, "grad_norm": 0.9564685110755365, "learning_rate": 4.966287211263678e-06, "loss": 0.5054, "step": 5251 }, { "epoch": 0.3198246201625917, "grad_norm": 0.9770561361436657, "learning_rate": 4.966274149703185e-06, "loss": 0.4654, "step": 5252 }, { "epoch": 0.3198855159394696, "grad_norm": 0.9772496025927199, "learning_rate": 4.966261085630104e-06, "loss": 0.5085, "step": 5253 }, { "epoch": 0.31994641171634747, "grad_norm": 1.0741625834500306, "learning_rate": 4.966248019044447e-06, "loss": 0.4822, "step": 5254 }, { "epoch": 0.32000730749322537, "grad_norm": 1.068926811779887, "learning_rate": 4.966234949946228e-06, "loss": 0.4671, "step": 5255 }, { "epoch": 0.3200682032701032, "grad_norm": 0.968541046503007, "learning_rate": 4.966221878335461e-06, "loss": 0.4737, "step": 5256 }, { "epoch": 0.3201290990469811, "grad_norm": 0.9821241799272363, "learning_rate": 4.966208804212157e-06, "loss": 0.4644, "step": 5257 }, { "epoch": 0.32018999482385896, "grad_norm": 0.9798028953200881, "learning_rate": 4.966195727576332e-06, "loss": 0.4217, "step": 5258 }, { "epoch": 0.32025089060073686, "grad_norm": 1.048403243728136, "learning_rate": 4.966182648427997e-06, "loss": 0.4725, "step": 5259 }, { "epoch": 0.3203117863776147, "grad_norm": 1.0648190038950944, "learning_rate": 4.966169566767168e-06, "loss": 0.4249, "step": 5260 }, { "epoch": 0.3203726821544926, "grad_norm": 1.0419075697536015, "learning_rate": 4.966156482593856e-06, "loss": 0.4533, "step": 5261 }, { "epoch": 0.32043357793137045, "grad_norm": 0.9828492802872503, "learning_rate": 4.966143395908074e-06, "loss": 0.5005, "step": 5262 }, { "epoch": 0.32049447370824835, "grad_norm": 1.1086211802171517, "learning_rate": 4.966130306709837e-06, "loss": 0.4951, "step": 5263 }, { "epoch": 0.3205553694851262, "grad_norm": 1.061070651711483, "learning_rate": 4.966117214999157e-06, "loss": 0.495, "step": 5264 }, { "epoch": 0.3206162652620041, "grad_norm": 1.0764906693215783, "learning_rate": 4.966104120776049e-06, "loss": 0.4396, "step": 5265 }, { "epoch": 0.32067716103888194, "grad_norm": 1.111089952714541, "learning_rate": 4.9660910240405265e-06, "loss": 0.4162, "step": 5266 }, { "epoch": 0.32073805681575984, "grad_norm": 1.0201253099730319, "learning_rate": 4.966077924792601e-06, "loss": 0.5672, "step": 5267 }, { "epoch": 0.3207989525926377, "grad_norm": 1.1863800859546674, "learning_rate": 4.966064823032285e-06, "loss": 0.4919, "step": 5268 }, { "epoch": 0.3208598483695156, "grad_norm": 0.9702455405691068, "learning_rate": 4.966051718759595e-06, "loss": 0.5095, "step": 5269 }, { "epoch": 0.32092074414639343, "grad_norm": 1.0361345843297132, "learning_rate": 4.966038611974542e-06, "loss": 0.4029, "step": 5270 }, { "epoch": 0.32098163992327133, "grad_norm": 0.9872437037376248, "learning_rate": 4.966025502677141e-06, "loss": 0.4718, "step": 5271 }, { "epoch": 0.3210425357001492, "grad_norm": 0.9829421450411355, "learning_rate": 4.966012390867404e-06, "loss": 0.4704, "step": 5272 }, { "epoch": 0.3211034314770271, "grad_norm": 1.0313116473488897, "learning_rate": 4.965999276545344e-06, "loss": 0.5036, "step": 5273 }, { "epoch": 0.3211643272539049, "grad_norm": 0.9768598128963751, "learning_rate": 4.9659861597109764e-06, "loss": 0.5167, "step": 5274 }, { "epoch": 0.3212252230307828, "grad_norm": 0.9619050468890626, "learning_rate": 4.965973040364313e-06, "loss": 0.4939, "step": 5275 }, { "epoch": 0.32128611880766067, "grad_norm": 0.9890244680008914, "learning_rate": 4.965959918505368e-06, "loss": 0.4962, "step": 5276 }, { "epoch": 0.32134701458453857, "grad_norm": 0.9743627367802712, "learning_rate": 4.965946794134153e-06, "loss": 0.4483, "step": 5277 }, { "epoch": 0.3214079103614164, "grad_norm": 1.0020512047248733, "learning_rate": 4.965933667250683e-06, "loss": 0.5542, "step": 5278 }, { "epoch": 0.3214688061382943, "grad_norm": 1.0179662819624535, "learning_rate": 4.965920537854973e-06, "loss": 0.5295, "step": 5279 }, { "epoch": 0.32152970191517216, "grad_norm": 1.1371379918934763, "learning_rate": 4.965907405947033e-06, "loss": 0.4577, "step": 5280 }, { "epoch": 0.32159059769205006, "grad_norm": 1.0756820680722052, "learning_rate": 4.965894271526877e-06, "loss": 0.4688, "step": 5281 }, { "epoch": 0.3216514934689279, "grad_norm": 1.0193450225007683, "learning_rate": 4.96588113459452e-06, "loss": 0.4601, "step": 5282 }, { "epoch": 0.3217123892458058, "grad_norm": 0.9924944930726552, "learning_rate": 4.965867995149974e-06, "loss": 0.4626, "step": 5283 }, { "epoch": 0.32177328502268365, "grad_norm": 0.985337322923826, "learning_rate": 4.965854853193254e-06, "loss": 0.426, "step": 5284 }, { "epoch": 0.32183418079956155, "grad_norm": 1.0695885892582881, "learning_rate": 4.965841708724372e-06, "loss": 0.4779, "step": 5285 }, { "epoch": 0.3218950765764394, "grad_norm": 1.0538907059785014, "learning_rate": 4.965828561743341e-06, "loss": 0.5583, "step": 5286 }, { "epoch": 0.3219559723533173, "grad_norm": 1.1126699506278155, "learning_rate": 4.965815412250176e-06, "loss": 0.4254, "step": 5287 }, { "epoch": 0.32201686813019514, "grad_norm": 0.9950146090409945, "learning_rate": 4.965802260244889e-06, "loss": 0.5558, "step": 5288 }, { "epoch": 0.32207776390707304, "grad_norm": 0.9452268100931123, "learning_rate": 4.965789105727494e-06, "loss": 0.4676, "step": 5289 }, { "epoch": 0.32213865968395095, "grad_norm": 1.0053869812239304, "learning_rate": 4.965775948698005e-06, "loss": 0.4563, "step": 5290 }, { "epoch": 0.3221995554608288, "grad_norm": 0.9610110891673767, "learning_rate": 4.965762789156434e-06, "loss": 0.5339, "step": 5291 }, { "epoch": 0.3222604512377067, "grad_norm": 1.0587284397526124, "learning_rate": 4.965749627102795e-06, "loss": 0.5871, "step": 5292 }, { "epoch": 0.32232134701458454, "grad_norm": 0.972153841318407, "learning_rate": 4.965736462537102e-06, "loss": 0.4952, "step": 5293 }, { "epoch": 0.32238224279146244, "grad_norm": 1.070224980729414, "learning_rate": 4.965723295459367e-06, "loss": 0.4721, "step": 5294 }, { "epoch": 0.3224431385683403, "grad_norm": 0.9643074738844868, "learning_rate": 4.965710125869606e-06, "loss": 0.4769, "step": 5295 }, { "epoch": 0.3225040343452182, "grad_norm": 1.1419679449094908, "learning_rate": 4.9656969537678295e-06, "loss": 0.4254, "step": 5296 }, { "epoch": 0.322564930122096, "grad_norm": 1.0764453189450254, "learning_rate": 4.965683779154053e-06, "loss": 0.4374, "step": 5297 }, { "epoch": 0.32262582589897393, "grad_norm": 1.056704701081694, "learning_rate": 4.965670602028289e-06, "loss": 0.4288, "step": 5298 }, { "epoch": 0.3226867216758518, "grad_norm": 1.0610201679692526, "learning_rate": 4.9656574223905505e-06, "loss": 0.4075, "step": 5299 }, { "epoch": 0.3227476174527297, "grad_norm": 1.117401846272463, "learning_rate": 4.965644240240852e-06, "loss": 0.4105, "step": 5300 }, { "epoch": 0.3228085132296075, "grad_norm": 1.0307577306493014, "learning_rate": 4.965631055579206e-06, "loss": 0.4711, "step": 5301 }, { "epoch": 0.3228694090064854, "grad_norm": 1.0700149978808409, "learning_rate": 4.965617868405627e-06, "loss": 0.5088, "step": 5302 }, { "epoch": 0.32293030478336326, "grad_norm": 0.9520757970127525, "learning_rate": 4.965604678720128e-06, "loss": 0.4448, "step": 5303 }, { "epoch": 0.32299120056024117, "grad_norm": 1.0613253632885506, "learning_rate": 4.96559148652272e-06, "loss": 0.5065, "step": 5304 }, { "epoch": 0.323052096337119, "grad_norm": 1.0565151070456502, "learning_rate": 4.965578291813421e-06, "loss": 0.4871, "step": 5305 }, { "epoch": 0.3231129921139969, "grad_norm": 1.0640647937929493, "learning_rate": 4.9655650945922405e-06, "loss": 0.511, "step": 5306 }, { "epoch": 0.32317388789087476, "grad_norm": 1.0819492954087684, "learning_rate": 4.965551894859195e-06, "loss": 0.4456, "step": 5307 }, { "epoch": 0.32323478366775266, "grad_norm": 1.0198192745648615, "learning_rate": 4.965538692614296e-06, "loss": 0.4916, "step": 5308 }, { "epoch": 0.3232956794446305, "grad_norm": 1.1016141622231534, "learning_rate": 4.965525487857557e-06, "loss": 0.477, "step": 5309 }, { "epoch": 0.3233565752215084, "grad_norm": 1.077204908433035, "learning_rate": 4.965512280588992e-06, "loss": 0.481, "step": 5310 }, { "epoch": 0.32341747099838625, "grad_norm": 1.043149138754214, "learning_rate": 4.9654990708086144e-06, "loss": 0.4458, "step": 5311 }, { "epoch": 0.32347836677526415, "grad_norm": 0.9855101405555371, "learning_rate": 4.965485858516438e-06, "loss": 0.4582, "step": 5312 }, { "epoch": 0.323539262552142, "grad_norm": 1.0765677385199608, "learning_rate": 4.965472643712476e-06, "loss": 0.4472, "step": 5313 }, { "epoch": 0.3236001583290199, "grad_norm": 1.1872396982243296, "learning_rate": 4.965459426396741e-06, "loss": 0.3781, "step": 5314 }, { "epoch": 0.32366105410589774, "grad_norm": 1.1200319154638796, "learning_rate": 4.965446206569248e-06, "loss": 0.4386, "step": 5315 }, { "epoch": 0.32372194988277564, "grad_norm": 0.9977326650695858, "learning_rate": 4.9654329842300086e-06, "loss": 0.4871, "step": 5316 }, { "epoch": 0.3237828456596535, "grad_norm": 1.0099712277387412, "learning_rate": 4.965419759379038e-06, "loss": 0.4685, "step": 5317 }, { "epoch": 0.3238437414365314, "grad_norm": 1.1268878314238044, "learning_rate": 4.965406532016349e-06, "loss": 0.4252, "step": 5318 }, { "epoch": 0.32390463721340923, "grad_norm": 0.9835658880865078, "learning_rate": 4.965393302141955e-06, "loss": 0.461, "step": 5319 }, { "epoch": 0.32396553299028713, "grad_norm": 1.0878997959472292, "learning_rate": 4.96538006975587e-06, "loss": 0.4924, "step": 5320 }, { "epoch": 0.324026428767165, "grad_norm": 0.9921212242171656, "learning_rate": 4.965366834858107e-06, "loss": 0.4598, "step": 5321 }, { "epoch": 0.3240873245440429, "grad_norm": 0.9783533620252665, "learning_rate": 4.9653535974486785e-06, "loss": 0.4669, "step": 5322 }, { "epoch": 0.3241482203209207, "grad_norm": 1.0069718686664204, "learning_rate": 4.965340357527599e-06, "loss": 0.4498, "step": 5323 }, { "epoch": 0.3242091160977986, "grad_norm": 1.031074544643116, "learning_rate": 4.965327115094883e-06, "loss": 0.4873, "step": 5324 }, { "epoch": 0.32427001187467647, "grad_norm": 0.9720821887839692, "learning_rate": 4.965313870150543e-06, "loss": 0.4784, "step": 5325 }, { "epoch": 0.32433090765155437, "grad_norm": 1.080914841018448, "learning_rate": 4.965300622694592e-06, "loss": 0.4335, "step": 5326 }, { "epoch": 0.3243918034284322, "grad_norm": 1.1547449342331648, "learning_rate": 4.965287372727044e-06, "loss": 0.4447, "step": 5327 }, { "epoch": 0.3244526992053101, "grad_norm": 0.988456558822998, "learning_rate": 4.965274120247913e-06, "loss": 0.5163, "step": 5328 }, { "epoch": 0.32451359498218796, "grad_norm": 0.9846730006081421, "learning_rate": 4.965260865257211e-06, "loss": 0.4846, "step": 5329 }, { "epoch": 0.32457449075906586, "grad_norm": 0.9886267070484621, "learning_rate": 4.965247607754953e-06, "loss": 0.5415, "step": 5330 }, { "epoch": 0.32463538653594376, "grad_norm": 1.0616080706166082, "learning_rate": 4.965234347741153e-06, "loss": 0.4372, "step": 5331 }, { "epoch": 0.3246962823128216, "grad_norm": 0.9987927420750937, "learning_rate": 4.965221085215822e-06, "loss": 0.4569, "step": 5332 }, { "epoch": 0.3247571780896995, "grad_norm": 1.1556869754195997, "learning_rate": 4.965207820178976e-06, "loss": 0.4332, "step": 5333 }, { "epoch": 0.32481807386657735, "grad_norm": 1.0192941584090898, "learning_rate": 4.965194552630626e-06, "loss": 0.5315, "step": 5334 }, { "epoch": 0.32487896964345525, "grad_norm": 1.073933369757585, "learning_rate": 4.965181282570788e-06, "loss": 0.4076, "step": 5335 }, { "epoch": 0.3249398654203331, "grad_norm": 1.0615701894643383, "learning_rate": 4.965168009999475e-06, "loss": 0.5207, "step": 5336 }, { "epoch": 0.325000761197211, "grad_norm": 0.984261603503034, "learning_rate": 4.9651547349166995e-06, "loss": 0.4913, "step": 5337 }, { "epoch": 0.32506165697408884, "grad_norm": 1.1139616091061613, "learning_rate": 4.9651414573224765e-06, "loss": 0.4661, "step": 5338 }, { "epoch": 0.32512255275096674, "grad_norm": 1.094189127583266, "learning_rate": 4.965128177216818e-06, "loss": 0.5265, "step": 5339 }, { "epoch": 0.3251834485278446, "grad_norm": 1.008719583973579, "learning_rate": 4.965114894599738e-06, "loss": 0.4561, "step": 5340 }, { "epoch": 0.3252443443047225, "grad_norm": 1.0722981632029192, "learning_rate": 4.96510160947125e-06, "loss": 0.4262, "step": 5341 }, { "epoch": 0.32530524008160033, "grad_norm": 1.0179378811840754, "learning_rate": 4.965088321831368e-06, "loss": 0.4908, "step": 5342 }, { "epoch": 0.32536613585847823, "grad_norm": 1.0327603279127926, "learning_rate": 4.9650750316801055e-06, "loss": 0.4315, "step": 5343 }, { "epoch": 0.3254270316353561, "grad_norm": 1.0780054161332941, "learning_rate": 4.965061739017476e-06, "loss": 0.5261, "step": 5344 }, { "epoch": 0.325487927412234, "grad_norm": 0.9890190782699287, "learning_rate": 4.965048443843492e-06, "loss": 0.5124, "step": 5345 }, { "epoch": 0.3255488231891118, "grad_norm": 1.101394270623113, "learning_rate": 4.965035146158168e-06, "loss": 0.4496, "step": 5346 }, { "epoch": 0.3256097189659897, "grad_norm": 1.0851466227412605, "learning_rate": 4.965021845961518e-06, "loss": 0.4856, "step": 5347 }, { "epoch": 0.32567061474286757, "grad_norm": 1.1513990287261306, "learning_rate": 4.965008543253555e-06, "loss": 0.4532, "step": 5348 }, { "epoch": 0.32573151051974547, "grad_norm": 1.0386476694136604, "learning_rate": 4.964995238034293e-06, "loss": 0.5142, "step": 5349 }, { "epoch": 0.3257924062966233, "grad_norm": 0.950925600667484, "learning_rate": 4.9649819303037445e-06, "loss": 0.4809, "step": 5350 }, { "epoch": 0.3258533020735012, "grad_norm": 1.0129824416937943, "learning_rate": 4.964968620061923e-06, "loss": 0.4814, "step": 5351 }, { "epoch": 0.32591419785037906, "grad_norm": 0.9279766887430841, "learning_rate": 4.964955307308844e-06, "loss": 0.4539, "step": 5352 }, { "epoch": 0.32597509362725696, "grad_norm": 1.1061614525701533, "learning_rate": 4.964941992044519e-06, "loss": 0.4349, "step": 5353 }, { "epoch": 0.3260359894041348, "grad_norm": 1.0897452988626748, "learning_rate": 4.964928674268963e-06, "loss": 0.4311, "step": 5354 }, { "epoch": 0.3260968851810127, "grad_norm": 1.0547096849660595, "learning_rate": 4.964915353982188e-06, "loss": 0.4275, "step": 5355 }, { "epoch": 0.32615778095789055, "grad_norm": 1.0057016679472364, "learning_rate": 4.964902031184209e-06, "loss": 0.4835, "step": 5356 }, { "epoch": 0.32621867673476845, "grad_norm": 1.0545616558165871, "learning_rate": 4.964888705875039e-06, "loss": 0.5273, "step": 5357 }, { "epoch": 0.3262795725116463, "grad_norm": 1.0155965948594166, "learning_rate": 4.964875378054691e-06, "loss": 0.4924, "step": 5358 }, { "epoch": 0.3263404682885242, "grad_norm": 1.040208702741414, "learning_rate": 4.96486204772318e-06, "loss": 0.4501, "step": 5359 }, { "epoch": 0.32640136406540204, "grad_norm": 1.021457205364964, "learning_rate": 4.964848714880519e-06, "loss": 0.5381, "step": 5360 }, { "epoch": 0.32646225984227994, "grad_norm": 1.0641789508140373, "learning_rate": 4.964835379526721e-06, "loss": 0.4833, "step": 5361 }, { "epoch": 0.3265231556191578, "grad_norm": 1.0576304485658274, "learning_rate": 4.9648220416618e-06, "loss": 0.4407, "step": 5362 }, { "epoch": 0.3265840513960357, "grad_norm": 1.0751180401059997, "learning_rate": 4.964808701285769e-06, "loss": 0.4613, "step": 5363 }, { "epoch": 0.32664494717291354, "grad_norm": 1.0772977097087046, "learning_rate": 4.964795358398643e-06, "loss": 0.5177, "step": 5364 }, { "epoch": 0.32670584294979144, "grad_norm": 1.0782283784960045, "learning_rate": 4.964782013000434e-06, "loss": 0.436, "step": 5365 }, { "epoch": 0.3267667387266693, "grad_norm": 0.9558280746938584, "learning_rate": 4.9647686650911564e-06, "loss": 0.5557, "step": 5366 }, { "epoch": 0.3268276345035472, "grad_norm": 1.1076045154840382, "learning_rate": 4.9647553146708245e-06, "loss": 0.4184, "step": 5367 }, { "epoch": 0.326888530280425, "grad_norm": 1.1327151340657735, "learning_rate": 4.964741961739451e-06, "loss": 0.4176, "step": 5368 }, { "epoch": 0.3269494260573029, "grad_norm": 0.8830045005616807, "learning_rate": 4.964728606297049e-06, "loss": 0.4958, "step": 5369 }, { "epoch": 0.3270103218341808, "grad_norm": 0.9075753978306286, "learning_rate": 4.964715248343633e-06, "loss": 0.4596, "step": 5370 }, { "epoch": 0.3270712176110587, "grad_norm": 0.9818494618398115, "learning_rate": 4.964701887879217e-06, "loss": 0.4984, "step": 5371 }, { "epoch": 0.3271321133879366, "grad_norm": 1.0503757624137744, "learning_rate": 4.9646885249038125e-06, "loss": 0.4976, "step": 5372 }, { "epoch": 0.3271930091648144, "grad_norm": 0.9731003496914412, "learning_rate": 4.964675159417435e-06, "loss": 0.4822, "step": 5373 }, { "epoch": 0.3272539049416923, "grad_norm": 1.1027689165334487, "learning_rate": 4.964661791420099e-06, "loss": 0.4568, "step": 5374 }, { "epoch": 0.32731480071857016, "grad_norm": 1.0858673668462981, "learning_rate": 4.9646484209118155e-06, "loss": 0.4582, "step": 5375 }, { "epoch": 0.32737569649544807, "grad_norm": 1.028278038283817, "learning_rate": 4.9646350478925996e-06, "loss": 0.5011, "step": 5376 }, { "epoch": 0.3274365922723259, "grad_norm": 1.088005083893556, "learning_rate": 4.9646216723624654e-06, "loss": 0.4059, "step": 5377 }, { "epoch": 0.3274974880492038, "grad_norm": 0.947120316717683, "learning_rate": 4.964608294321425e-06, "loss": 0.5386, "step": 5378 }, { "epoch": 0.32755838382608166, "grad_norm": 0.9470164285235089, "learning_rate": 4.964594913769493e-06, "loss": 0.47, "step": 5379 }, { "epoch": 0.32761927960295956, "grad_norm": 1.1868135286086066, "learning_rate": 4.964581530706683e-06, "loss": 0.4866, "step": 5380 }, { "epoch": 0.3276801753798374, "grad_norm": 1.001401552103552, "learning_rate": 4.964568145133009e-06, "loss": 0.4338, "step": 5381 }, { "epoch": 0.3277410711567153, "grad_norm": 1.0315774635152515, "learning_rate": 4.964554757048485e-06, "loss": 0.4845, "step": 5382 }, { "epoch": 0.32780196693359315, "grad_norm": 1.0442480601581638, "learning_rate": 4.964541366453123e-06, "loss": 0.447, "step": 5383 }, { "epoch": 0.32786286271047105, "grad_norm": 1.1397749467727523, "learning_rate": 4.964527973346937e-06, "loss": 0.4735, "step": 5384 }, { "epoch": 0.3279237584873489, "grad_norm": 1.0626966001195395, "learning_rate": 4.964514577729942e-06, "loss": 0.489, "step": 5385 }, { "epoch": 0.3279846542642268, "grad_norm": 1.0722669429586085, "learning_rate": 4.9645011796021504e-06, "loss": 0.4748, "step": 5386 }, { "epoch": 0.32804555004110464, "grad_norm": 0.9462916754987175, "learning_rate": 4.964487778963576e-06, "loss": 0.5013, "step": 5387 }, { "epoch": 0.32810644581798254, "grad_norm": 0.9956001805486306, "learning_rate": 4.964474375814233e-06, "loss": 0.4569, "step": 5388 }, { "epoch": 0.3281673415948604, "grad_norm": 1.137134913352318, "learning_rate": 4.964460970154135e-06, "loss": 0.536, "step": 5389 }, { "epoch": 0.3282282373717383, "grad_norm": 0.9837019148836657, "learning_rate": 4.964447561983295e-06, "loss": 0.4917, "step": 5390 }, { "epoch": 0.32828913314861613, "grad_norm": 1.0730390825775635, "learning_rate": 4.964434151301727e-06, "loss": 0.455, "step": 5391 }, { "epoch": 0.32835002892549403, "grad_norm": 0.9270143503860433, "learning_rate": 4.964420738109444e-06, "loss": 0.5408, "step": 5392 }, { "epoch": 0.3284109247023719, "grad_norm": 1.1396571593796767, "learning_rate": 4.964407322406462e-06, "loss": 0.4307, "step": 5393 }, { "epoch": 0.3284718204792498, "grad_norm": 1.0743446899044908, "learning_rate": 4.964393904192792e-06, "loss": 0.3741, "step": 5394 }, { "epoch": 0.3285327162561276, "grad_norm": 1.1047715492085728, "learning_rate": 4.964380483468449e-06, "loss": 0.454, "step": 5395 }, { "epoch": 0.3285936120330055, "grad_norm": 0.9806631932585276, "learning_rate": 4.964367060233446e-06, "loss": 0.5139, "step": 5396 }, { "epoch": 0.32865450780988337, "grad_norm": 0.9338535706512983, "learning_rate": 4.964353634487797e-06, "loss": 0.5112, "step": 5397 }, { "epoch": 0.32871540358676127, "grad_norm": 1.123641408760829, "learning_rate": 4.964340206231517e-06, "loss": 0.4356, "step": 5398 }, { "epoch": 0.3287762993636391, "grad_norm": 0.9743659064129739, "learning_rate": 4.964326775464617e-06, "loss": 0.4592, "step": 5399 }, { "epoch": 0.328837195140517, "grad_norm": 1.0349769667503785, "learning_rate": 4.964313342187113e-06, "loss": 0.5403, "step": 5400 }, { "epoch": 0.32889809091739486, "grad_norm": 1.1339998390696693, "learning_rate": 4.964299906399018e-06, "loss": 0.5128, "step": 5401 }, { "epoch": 0.32895898669427276, "grad_norm": 1.1184052542292011, "learning_rate": 4.964286468100345e-06, "loss": 0.4683, "step": 5402 }, { "epoch": 0.3290198824711506, "grad_norm": 0.9714638785079127, "learning_rate": 4.964273027291108e-06, "loss": 0.5015, "step": 5403 }, { "epoch": 0.3290807782480285, "grad_norm": 1.0568907775863363, "learning_rate": 4.964259583971321e-06, "loss": 0.483, "step": 5404 }, { "epoch": 0.32914167402490635, "grad_norm": 1.117095373015537, "learning_rate": 4.964246138140998e-06, "loss": 0.4285, "step": 5405 }, { "epoch": 0.32920256980178425, "grad_norm": 0.9697678735348156, "learning_rate": 4.9642326898001515e-06, "loss": 0.5114, "step": 5406 }, { "epoch": 0.3292634655786621, "grad_norm": 0.9712316104822428, "learning_rate": 4.964219238948797e-06, "loss": 0.4441, "step": 5407 }, { "epoch": 0.32932436135554, "grad_norm": 1.0388912220808995, "learning_rate": 4.964205785586946e-06, "loss": 0.4579, "step": 5408 }, { "epoch": 0.32938525713241784, "grad_norm": 1.0309979453948228, "learning_rate": 4.964192329714614e-06, "loss": 0.4698, "step": 5409 }, { "epoch": 0.32944615290929574, "grad_norm": 0.9597145294836522, "learning_rate": 4.964178871331815e-06, "loss": 0.4822, "step": 5410 }, { "epoch": 0.3295070486861736, "grad_norm": 0.9624164536638524, "learning_rate": 4.964165410438561e-06, "loss": 0.501, "step": 5411 }, { "epoch": 0.3295679444630515, "grad_norm": 1.0763367020611825, "learning_rate": 4.964151947034866e-06, "loss": 0.4181, "step": 5412 }, { "epoch": 0.3296288402399294, "grad_norm": 1.0062063974384863, "learning_rate": 4.964138481120744e-06, "loss": 0.4763, "step": 5413 }, { "epoch": 0.32968973601680723, "grad_norm": 1.1178471707010247, "learning_rate": 4.9641250126962096e-06, "loss": 0.3912, "step": 5414 }, { "epoch": 0.32975063179368513, "grad_norm": 1.033427396122199, "learning_rate": 4.964111541761276e-06, "loss": 0.4271, "step": 5415 }, { "epoch": 0.329811527570563, "grad_norm": 0.9866621834527847, "learning_rate": 4.964098068315957e-06, "loss": 0.4995, "step": 5416 }, { "epoch": 0.3298724233474409, "grad_norm": 1.0528661614638362, "learning_rate": 4.964084592360266e-06, "loss": 0.4794, "step": 5417 }, { "epoch": 0.3299333191243187, "grad_norm": 1.0273846240964828, "learning_rate": 4.964071113894216e-06, "loss": 0.4941, "step": 5418 }, { "epoch": 0.3299942149011966, "grad_norm": 1.0541288170644354, "learning_rate": 4.964057632917822e-06, "loss": 0.4453, "step": 5419 }, { "epoch": 0.33005511067807447, "grad_norm": 1.0649541000167286, "learning_rate": 4.964044149431098e-06, "loss": 0.4539, "step": 5420 }, { "epoch": 0.33011600645495237, "grad_norm": 1.0364344678781128, "learning_rate": 4.964030663434056e-06, "loss": 0.4992, "step": 5421 }, { "epoch": 0.3301769022318302, "grad_norm": 0.9806860632941039, "learning_rate": 4.964017174926712e-06, "loss": 0.4538, "step": 5422 }, { "epoch": 0.3302377980087081, "grad_norm": 1.0229303168272106, "learning_rate": 4.964003683909077e-06, "loss": 0.4968, "step": 5423 }, { "epoch": 0.33029869378558596, "grad_norm": 1.0792256940812617, "learning_rate": 4.963990190381167e-06, "loss": 0.4384, "step": 5424 }, { "epoch": 0.33035958956246386, "grad_norm": 1.042252819269745, "learning_rate": 4.963976694342996e-06, "loss": 0.3868, "step": 5425 }, { "epoch": 0.3304204853393417, "grad_norm": 1.0559778282782248, "learning_rate": 4.963963195794575e-06, "loss": 0.4702, "step": 5426 }, { "epoch": 0.3304813811162196, "grad_norm": 1.0870539336646599, "learning_rate": 4.963949694735921e-06, "loss": 0.51, "step": 5427 }, { "epoch": 0.33054227689309745, "grad_norm": 1.0072136530574587, "learning_rate": 4.963936191167046e-06, "loss": 0.4827, "step": 5428 }, { "epoch": 0.33060317266997535, "grad_norm": 1.1170090963847326, "learning_rate": 4.963922685087963e-06, "loss": 0.4239, "step": 5429 }, { "epoch": 0.3306640684468532, "grad_norm": 1.0481183750689573, "learning_rate": 4.963909176498688e-06, "loss": 0.4437, "step": 5430 }, { "epoch": 0.3307249642237311, "grad_norm": 0.8880822790510647, "learning_rate": 4.963895665399233e-06, "loss": 0.5046, "step": 5431 }, { "epoch": 0.33078586000060894, "grad_norm": 1.0035844730962415, "learning_rate": 4.963882151789612e-06, "loss": 0.4857, "step": 5432 }, { "epoch": 0.33084675577748685, "grad_norm": 1.0290637350324672, "learning_rate": 4.9638686356698394e-06, "loss": 0.4359, "step": 5433 }, { "epoch": 0.3309076515543647, "grad_norm": 1.0819670890670623, "learning_rate": 4.963855117039929e-06, "loss": 0.4514, "step": 5434 }, { "epoch": 0.3309685473312426, "grad_norm": 1.080207468945585, "learning_rate": 4.963841595899895e-06, "loss": 0.4598, "step": 5435 }, { "epoch": 0.33102944310812044, "grad_norm": 1.0308081078963887, "learning_rate": 4.9638280722497485e-06, "loss": 0.5477, "step": 5436 }, { "epoch": 0.33109033888499834, "grad_norm": 0.9613796086822501, "learning_rate": 4.963814546089506e-06, "loss": 0.5367, "step": 5437 }, { "epoch": 0.3311512346618762, "grad_norm": 1.0010787298460404, "learning_rate": 4.963801017419181e-06, "loss": 0.5394, "step": 5438 }, { "epoch": 0.3312121304387541, "grad_norm": 1.0956372680802082, "learning_rate": 4.963787486238786e-06, "loss": 0.5289, "step": 5439 }, { "epoch": 0.3312730262156319, "grad_norm": 0.992601990764145, "learning_rate": 4.9637739525483354e-06, "loss": 0.4492, "step": 5440 }, { "epoch": 0.33133392199250983, "grad_norm": 1.1226320864084574, "learning_rate": 4.963760416347844e-06, "loss": 0.4303, "step": 5441 }, { "epoch": 0.3313948177693877, "grad_norm": 1.0782019358011363, "learning_rate": 4.963746877637325e-06, "loss": 0.492, "step": 5442 }, { "epoch": 0.3314557135462656, "grad_norm": 0.9484354428199512, "learning_rate": 4.96373333641679e-06, "loss": 0.4472, "step": 5443 }, { "epoch": 0.3315166093231434, "grad_norm": 1.066198859636273, "learning_rate": 4.963719792686255e-06, "loss": 0.5247, "step": 5444 }, { "epoch": 0.3315775051000213, "grad_norm": 1.0880514686525211, "learning_rate": 4.9637062464457354e-06, "loss": 0.4862, "step": 5445 }, { "epoch": 0.33163840087689916, "grad_norm": 0.911019691347691, "learning_rate": 4.963692697695242e-06, "loss": 0.5239, "step": 5446 }, { "epoch": 0.33169929665377706, "grad_norm": 1.0820910881678925, "learning_rate": 4.96367914643479e-06, "loss": 0.4286, "step": 5447 }, { "epoch": 0.3317601924306549, "grad_norm": 1.1776323043445802, "learning_rate": 4.9636655926643924e-06, "loss": 0.4, "step": 5448 }, { "epoch": 0.3318210882075328, "grad_norm": 1.1294972148065272, "learning_rate": 4.963652036384063e-06, "loss": 0.45, "step": 5449 }, { "epoch": 0.33188198398441066, "grad_norm": 0.9890870749237546, "learning_rate": 4.9636384775938175e-06, "loss": 0.4509, "step": 5450 }, { "epoch": 0.33194287976128856, "grad_norm": 0.994416354369958, "learning_rate": 4.9636249162936676e-06, "loss": 0.53, "step": 5451 }, { "epoch": 0.3320037755381664, "grad_norm": 1.068848024002241, "learning_rate": 4.963611352483629e-06, "loss": 0.3888, "step": 5452 }, { "epoch": 0.3320646713150443, "grad_norm": 1.023899277228583, "learning_rate": 4.9635977861637124e-06, "loss": 0.4777, "step": 5453 }, { "epoch": 0.3321255670919222, "grad_norm": 1.1046941304248536, "learning_rate": 4.963584217333934e-06, "loss": 0.5565, "step": 5454 }, { "epoch": 0.33218646286880005, "grad_norm": 1.08951279230911, "learning_rate": 4.963570645994309e-06, "loss": 0.581, "step": 5455 }, { "epoch": 0.33224735864567795, "grad_norm": 1.0719677464544386, "learning_rate": 4.963557072144848e-06, "loss": 0.41, "step": 5456 }, { "epoch": 0.3323082544225558, "grad_norm": 1.094704639846148, "learning_rate": 4.963543495785566e-06, "loss": 0.4453, "step": 5457 }, { "epoch": 0.3323691501994337, "grad_norm": 1.0042156682781147, "learning_rate": 4.9635299169164775e-06, "loss": 0.436, "step": 5458 }, { "epoch": 0.33243004597631154, "grad_norm": 1.026486183088245, "learning_rate": 4.963516335537596e-06, "loss": 0.415, "step": 5459 }, { "epoch": 0.33249094175318944, "grad_norm": 1.046028918069639, "learning_rate": 4.963502751648935e-06, "loss": 0.4124, "step": 5460 }, { "epoch": 0.3325518375300673, "grad_norm": 1.021622403480536, "learning_rate": 4.9634891652505095e-06, "loss": 0.4716, "step": 5461 }, { "epoch": 0.3326127333069452, "grad_norm": 0.967656841791181, "learning_rate": 4.963475576342332e-06, "loss": 0.4586, "step": 5462 }, { "epoch": 0.33267362908382303, "grad_norm": 1.1391238058271225, "learning_rate": 4.963461984924417e-06, "loss": 0.4351, "step": 5463 }, { "epoch": 0.33273452486070093, "grad_norm": 1.0398187860315462, "learning_rate": 4.9634483909967775e-06, "loss": 0.4196, "step": 5464 }, { "epoch": 0.3327954206375788, "grad_norm": 0.98932566230204, "learning_rate": 4.963434794559428e-06, "loss": 0.4763, "step": 5465 }, { "epoch": 0.3328563164144567, "grad_norm": 1.0757058210340913, "learning_rate": 4.963421195612383e-06, "loss": 0.4943, "step": 5466 }, { "epoch": 0.3329172121913345, "grad_norm": 1.057375107313085, "learning_rate": 4.963407594155655e-06, "loss": 0.4846, "step": 5467 }, { "epoch": 0.3329781079682124, "grad_norm": 0.9857885324239551, "learning_rate": 4.9633939901892596e-06, "loss": 0.5008, "step": 5468 }, { "epoch": 0.33303900374509027, "grad_norm": 1.0999194030465618, "learning_rate": 4.963380383713209e-06, "loss": 0.4573, "step": 5469 }, { "epoch": 0.33309989952196817, "grad_norm": 0.9870979836576529, "learning_rate": 4.963366774727517e-06, "loss": 0.5592, "step": 5470 }, { "epoch": 0.333160795298846, "grad_norm": 1.0810344232376545, "learning_rate": 4.963353163232199e-06, "loss": 0.4939, "step": 5471 }, { "epoch": 0.3332216910757239, "grad_norm": 1.0433660697769396, "learning_rate": 4.963339549227268e-06, "loss": 0.5012, "step": 5472 }, { "epoch": 0.33328258685260176, "grad_norm": 1.0559575031849227, "learning_rate": 4.963325932712738e-06, "loss": 0.4193, "step": 5473 }, { "epoch": 0.33334348262947966, "grad_norm": 0.9953519095696776, "learning_rate": 4.963312313688622e-06, "loss": 0.4959, "step": 5474 }, { "epoch": 0.3334043784063575, "grad_norm": 1.0241206205944402, "learning_rate": 4.963298692154935e-06, "loss": 0.5166, "step": 5475 }, { "epoch": 0.3334652741832354, "grad_norm": 1.0239230951437497, "learning_rate": 4.963285068111691e-06, "loss": 0.4584, "step": 5476 }, { "epoch": 0.33352616996011325, "grad_norm": 1.1678809764886993, "learning_rate": 4.9632714415589024e-06, "loss": 0.4169, "step": 5477 }, { "epoch": 0.33358706573699115, "grad_norm": 1.0385473588197178, "learning_rate": 4.963257812496584e-06, "loss": 0.4141, "step": 5478 }, { "epoch": 0.333647961513869, "grad_norm": 1.0220960096496718, "learning_rate": 4.9632441809247515e-06, "loss": 0.5284, "step": 5479 }, { "epoch": 0.3337088572907469, "grad_norm": 1.0432802573727098, "learning_rate": 4.963230546843416e-06, "loss": 0.4795, "step": 5480 }, { "epoch": 0.33376975306762474, "grad_norm": 1.0830698283878704, "learning_rate": 4.963216910252592e-06, "loss": 0.4329, "step": 5481 }, { "epoch": 0.33383064884450264, "grad_norm": 1.0762261321415172, "learning_rate": 4.963203271152294e-06, "loss": 0.4501, "step": 5482 }, { "epoch": 0.3338915446213805, "grad_norm": 1.1168518251486845, "learning_rate": 4.963189629542536e-06, "loss": 0.5113, "step": 5483 }, { "epoch": 0.3339524403982584, "grad_norm": 1.0288510755370004, "learning_rate": 4.963175985423332e-06, "loss": 0.4346, "step": 5484 }, { "epoch": 0.33401333617513623, "grad_norm": 1.0437154257323538, "learning_rate": 4.9631623387946945e-06, "loss": 0.4529, "step": 5485 }, { "epoch": 0.33407423195201413, "grad_norm": 1.0223367476402339, "learning_rate": 4.963148689656639e-06, "loss": 0.5127, "step": 5486 }, { "epoch": 0.334135127728892, "grad_norm": 1.079032480092341, "learning_rate": 4.963135038009179e-06, "loss": 0.5195, "step": 5487 }, { "epoch": 0.3341960235057699, "grad_norm": 0.9846920424435558, "learning_rate": 4.963121383852327e-06, "loss": 0.5396, "step": 5488 }, { "epoch": 0.3342569192826477, "grad_norm": 0.9694738074810227, "learning_rate": 4.9631077271861e-06, "loss": 0.5246, "step": 5489 }, { "epoch": 0.3343178150595256, "grad_norm": 1.0507225517804835, "learning_rate": 4.963094068010509e-06, "loss": 0.401, "step": 5490 }, { "epoch": 0.33437871083640347, "grad_norm": 1.002495150312498, "learning_rate": 4.963080406325569e-06, "loss": 0.4704, "step": 5491 }, { "epoch": 0.33443960661328137, "grad_norm": 0.9606246703400887, "learning_rate": 4.963066742131294e-06, "loss": 0.4853, "step": 5492 }, { "epoch": 0.3345005023901592, "grad_norm": 1.0825687954548766, "learning_rate": 4.963053075427698e-06, "loss": 0.451, "step": 5493 }, { "epoch": 0.3345613981670371, "grad_norm": 1.0962474530407518, "learning_rate": 4.963039406214795e-06, "loss": 0.4985, "step": 5494 }, { "epoch": 0.334622293943915, "grad_norm": 0.9938268046395152, "learning_rate": 4.963025734492598e-06, "loss": 0.4989, "step": 5495 }, { "epoch": 0.33468318972079286, "grad_norm": 0.9238709635140457, "learning_rate": 4.963012060261122e-06, "loss": 0.4748, "step": 5496 }, { "epoch": 0.33474408549767076, "grad_norm": 0.9664590296366121, "learning_rate": 4.96299838352038e-06, "loss": 0.5354, "step": 5497 }, { "epoch": 0.3348049812745486, "grad_norm": 1.0527483460761131, "learning_rate": 4.9629847042703875e-06, "loss": 0.5154, "step": 5498 }, { "epoch": 0.3348658770514265, "grad_norm": 1.0692439011714907, "learning_rate": 4.962971022511156e-06, "loss": 0.4907, "step": 5499 }, { "epoch": 0.33492677282830435, "grad_norm": 1.0589933375752845, "learning_rate": 4.962957338242702e-06, "loss": 0.4557, "step": 5500 }, { "epoch": 0.33498766860518225, "grad_norm": 1.027313891252013, "learning_rate": 4.962943651465038e-06, "loss": 0.506, "step": 5501 }, { "epoch": 0.3350485643820601, "grad_norm": 1.0271901038231992, "learning_rate": 4.962929962178178e-06, "loss": 0.494, "step": 5502 }, { "epoch": 0.335109460158938, "grad_norm": 0.959753186141344, "learning_rate": 4.962916270382135e-06, "loss": 0.449, "step": 5503 }, { "epoch": 0.33517035593581584, "grad_norm": 0.9914862276363401, "learning_rate": 4.962902576076926e-06, "loss": 0.4826, "step": 5504 }, { "epoch": 0.33523125171269375, "grad_norm": 1.0831841027974176, "learning_rate": 4.962888879262562e-06, "loss": 0.4667, "step": 5505 }, { "epoch": 0.3352921474895716, "grad_norm": 1.0001744999974964, "learning_rate": 4.962875179939059e-06, "loss": 0.5162, "step": 5506 }, { "epoch": 0.3353530432664495, "grad_norm": 1.0142209829484197, "learning_rate": 4.962861478106429e-06, "loss": 0.4651, "step": 5507 }, { "epoch": 0.33541393904332734, "grad_norm": 1.1116097413931931, "learning_rate": 4.9628477737646875e-06, "loss": 0.4644, "step": 5508 }, { "epoch": 0.33547483482020524, "grad_norm": 1.1061175440939948, "learning_rate": 4.962834066913848e-06, "loss": 0.3786, "step": 5509 }, { "epoch": 0.3355357305970831, "grad_norm": 1.02906196609887, "learning_rate": 4.962820357553923e-06, "loss": 0.4542, "step": 5510 }, { "epoch": 0.335596626373961, "grad_norm": 1.0422831138291224, "learning_rate": 4.962806645684929e-06, "loss": 0.4747, "step": 5511 }, { "epoch": 0.3356575221508388, "grad_norm": 0.9681455111410203, "learning_rate": 4.962792931306879e-06, "loss": 0.4611, "step": 5512 }, { "epoch": 0.33571841792771673, "grad_norm": 1.0970848977689245, "learning_rate": 4.9627792144197865e-06, "loss": 0.4016, "step": 5513 }, { "epoch": 0.3357793137045946, "grad_norm": 1.0270202703251063, "learning_rate": 4.962765495023666e-06, "loss": 0.5064, "step": 5514 }, { "epoch": 0.3358402094814725, "grad_norm": 1.0254995249268837, "learning_rate": 4.96275177311853e-06, "loss": 0.4702, "step": 5515 }, { "epoch": 0.3359011052583503, "grad_norm": 1.0180901319268902, "learning_rate": 4.962738048704395e-06, "loss": 0.4932, "step": 5516 }, { "epoch": 0.3359620010352282, "grad_norm": 1.1175326342045697, "learning_rate": 4.962724321781274e-06, "loss": 0.5194, "step": 5517 }, { "epoch": 0.33602289681210606, "grad_norm": 1.0450951741461172, "learning_rate": 4.962710592349179e-06, "loss": 0.5675, "step": 5518 }, { "epoch": 0.33608379258898397, "grad_norm": 1.0221441558580742, "learning_rate": 4.9626968604081276e-06, "loss": 0.4613, "step": 5519 }, { "epoch": 0.3361446883658618, "grad_norm": 0.9942915142332897, "learning_rate": 4.96268312595813e-06, "loss": 0.4139, "step": 5520 }, { "epoch": 0.3362055841427397, "grad_norm": 1.1729353931758837, "learning_rate": 4.962669388999203e-06, "loss": 0.4896, "step": 5521 }, { "epoch": 0.33626647991961756, "grad_norm": 1.0883732962307489, "learning_rate": 4.962655649531359e-06, "loss": 0.4794, "step": 5522 }, { "epoch": 0.33632737569649546, "grad_norm": 1.0254852366577396, "learning_rate": 4.962641907554614e-06, "loss": 0.45, "step": 5523 }, { "epoch": 0.3363882714733733, "grad_norm": 1.0045212636591576, "learning_rate": 4.9626281630689785e-06, "loss": 0.4477, "step": 5524 }, { "epoch": 0.3364491672502512, "grad_norm": 0.9887324057258692, "learning_rate": 4.962614416074471e-06, "loss": 0.4243, "step": 5525 }, { "epoch": 0.33651006302712905, "grad_norm": 0.9768461626998454, "learning_rate": 4.962600666571101e-06, "loss": 0.5187, "step": 5526 }, { "epoch": 0.33657095880400695, "grad_norm": 1.0554313198084948, "learning_rate": 4.962586914558885e-06, "loss": 0.4331, "step": 5527 }, { "epoch": 0.3366318545808848, "grad_norm": 1.0673360824592304, "learning_rate": 4.9625731600378376e-06, "loss": 0.4843, "step": 5528 }, { "epoch": 0.3366927503577627, "grad_norm": 1.0521973535555944, "learning_rate": 4.962559403007972e-06, "loss": 0.4557, "step": 5529 }, { "epoch": 0.33675364613464054, "grad_norm": 1.0934680922278455, "learning_rate": 4.962545643469302e-06, "loss": 0.4363, "step": 5530 }, { "epoch": 0.33681454191151844, "grad_norm": 1.0085175347878144, "learning_rate": 4.962531881421841e-06, "loss": 0.5079, "step": 5531 }, { "epoch": 0.3368754376883963, "grad_norm": 1.0404968686041185, "learning_rate": 4.962518116865604e-06, "loss": 0.4483, "step": 5532 }, { "epoch": 0.3369363334652742, "grad_norm": 1.0310382237979936, "learning_rate": 4.9625043498006045e-06, "loss": 0.4745, "step": 5533 }, { "epoch": 0.33699722924215203, "grad_norm": 1.0413296318529957, "learning_rate": 4.962490580226857e-06, "loss": 0.4773, "step": 5534 }, { "epoch": 0.33705812501902993, "grad_norm": 1.014905820907169, "learning_rate": 4.962476808144375e-06, "loss": 0.5304, "step": 5535 }, { "epoch": 0.33711902079590783, "grad_norm": 1.1095458098426498, "learning_rate": 4.962463033553173e-06, "loss": 0.5041, "step": 5536 }, { "epoch": 0.3371799165727857, "grad_norm": 1.084200056462048, "learning_rate": 4.962449256453265e-06, "loss": 0.4252, "step": 5537 }, { "epoch": 0.3372408123496636, "grad_norm": 1.0963605753865362, "learning_rate": 4.962435476844665e-06, "loss": 0.4345, "step": 5538 }, { "epoch": 0.3373017081265414, "grad_norm": 1.0315776212690646, "learning_rate": 4.962421694727387e-06, "loss": 0.546, "step": 5539 }, { "epoch": 0.3373626039034193, "grad_norm": 1.0922348913790492, "learning_rate": 4.962407910101445e-06, "loss": 0.4893, "step": 5540 }, { "epoch": 0.33742349968029717, "grad_norm": 0.9654528251870261, "learning_rate": 4.962394122966852e-06, "loss": 0.6033, "step": 5541 }, { "epoch": 0.33748439545717507, "grad_norm": 0.993438182086029, "learning_rate": 4.962380333323624e-06, "loss": 0.5223, "step": 5542 }, { "epoch": 0.3375452912340529, "grad_norm": 1.0590263899349757, "learning_rate": 4.962366541171775e-06, "loss": 0.5032, "step": 5543 }, { "epoch": 0.3376061870109308, "grad_norm": 1.072597367105068, "learning_rate": 4.962352746511316e-06, "loss": 0.4818, "step": 5544 }, { "epoch": 0.33766708278780866, "grad_norm": 0.9960144763519876, "learning_rate": 4.9623389493422645e-06, "loss": 0.4623, "step": 5545 }, { "epoch": 0.33772797856468656, "grad_norm": 1.0953880954161435, "learning_rate": 4.962325149664633e-06, "loss": 0.427, "step": 5546 }, { "epoch": 0.3377888743415644, "grad_norm": 0.9846365091281676, "learning_rate": 4.962311347478437e-06, "loss": 0.5004, "step": 5547 }, { "epoch": 0.3378497701184423, "grad_norm": 0.9949995475042839, "learning_rate": 4.962297542783688e-06, "loss": 0.4812, "step": 5548 }, { "epoch": 0.33791066589532015, "grad_norm": 1.0370429277594186, "learning_rate": 4.962283735580402e-06, "loss": 0.5085, "step": 5549 }, { "epoch": 0.33797156167219805, "grad_norm": 1.0859098641791944, "learning_rate": 4.962269925868592e-06, "loss": 0.4408, "step": 5550 }, { "epoch": 0.3380324574490759, "grad_norm": 1.0384870391780494, "learning_rate": 4.962256113648273e-06, "loss": 0.431, "step": 5551 }, { "epoch": 0.3380933532259538, "grad_norm": 1.0175394710184629, "learning_rate": 4.962242298919459e-06, "loss": 0.4756, "step": 5552 }, { "epoch": 0.33815424900283164, "grad_norm": 1.0786890827509201, "learning_rate": 4.962228481682163e-06, "loss": 0.4692, "step": 5553 }, { "epoch": 0.33821514477970954, "grad_norm": 1.0232071129618827, "learning_rate": 4.962214661936399e-06, "loss": 0.4591, "step": 5554 }, { "epoch": 0.3382760405565874, "grad_norm": 1.0623207792519755, "learning_rate": 4.962200839682184e-06, "loss": 0.4971, "step": 5555 }, { "epoch": 0.3383369363334653, "grad_norm": 0.9703087954547531, "learning_rate": 4.962187014919529e-06, "loss": 0.5453, "step": 5556 }, { "epoch": 0.33839783211034313, "grad_norm": 1.1001028552047638, "learning_rate": 4.962173187648449e-06, "loss": 0.4244, "step": 5557 }, { "epoch": 0.33845872788722103, "grad_norm": 1.1124683735891283, "learning_rate": 4.962159357868958e-06, "loss": 0.4616, "step": 5558 }, { "epoch": 0.3385196236640989, "grad_norm": 0.981638287156133, "learning_rate": 4.96214552558107e-06, "loss": 0.5994, "step": 5559 }, { "epoch": 0.3385805194409768, "grad_norm": 1.069777206265205, "learning_rate": 4.9621316907848005e-06, "loss": 0.4735, "step": 5560 }, { "epoch": 0.3386414152178546, "grad_norm": 1.0527493086400335, "learning_rate": 4.9621178534801616e-06, "loss": 0.4905, "step": 5561 }, { "epoch": 0.3387023109947325, "grad_norm": 1.069528426473951, "learning_rate": 4.962104013667168e-06, "loss": 0.4497, "step": 5562 }, { "epoch": 0.33876320677161037, "grad_norm": 1.107800298861036, "learning_rate": 4.9620901713458346e-06, "loss": 0.4431, "step": 5563 }, { "epoch": 0.33882410254848827, "grad_norm": 0.9253220320227247, "learning_rate": 4.9620763265161745e-06, "loss": 0.464, "step": 5564 }, { "epoch": 0.3388849983253661, "grad_norm": 0.9487566967567834, "learning_rate": 4.9620624791782024e-06, "loss": 0.4906, "step": 5565 }, { "epoch": 0.338945894102244, "grad_norm": 1.0055018201093775, "learning_rate": 4.962048629331933e-06, "loss": 0.4729, "step": 5566 }, { "epoch": 0.33900678987912186, "grad_norm": 0.9820888440397437, "learning_rate": 4.962034776977378e-06, "loss": 0.4875, "step": 5567 }, { "epoch": 0.33906768565599976, "grad_norm": 0.9723027549575624, "learning_rate": 4.962020922114554e-06, "loss": 0.4523, "step": 5568 }, { "epoch": 0.3391285814328776, "grad_norm": 1.108546411289178, "learning_rate": 4.962007064743474e-06, "loss": 0.4299, "step": 5569 }, { "epoch": 0.3391894772097555, "grad_norm": 1.0422890020414157, "learning_rate": 4.961993204864153e-06, "loss": 0.4966, "step": 5570 }, { "epoch": 0.33925037298663335, "grad_norm": 1.085296619340662, "learning_rate": 4.961979342476604e-06, "loss": 0.5023, "step": 5571 }, { "epoch": 0.33931126876351125, "grad_norm": 1.0529649088549593, "learning_rate": 4.961965477580842e-06, "loss": 0.485, "step": 5572 }, { "epoch": 0.3393721645403891, "grad_norm": 1.0283427403485526, "learning_rate": 4.9619516101768805e-06, "loss": 0.4629, "step": 5573 }, { "epoch": 0.339433060317267, "grad_norm": 1.0584310698189001, "learning_rate": 4.961937740264734e-06, "loss": 0.4209, "step": 5574 }, { "epoch": 0.33949395609414484, "grad_norm": 0.9624264252813494, "learning_rate": 4.961923867844417e-06, "loss": 0.4988, "step": 5575 }, { "epoch": 0.33955485187102274, "grad_norm": 0.99323746098715, "learning_rate": 4.961909992915942e-06, "loss": 0.4972, "step": 5576 }, { "epoch": 0.33961574764790065, "grad_norm": 1.0380606362015534, "learning_rate": 4.961896115479325e-06, "loss": 0.4491, "step": 5577 }, { "epoch": 0.3396766434247785, "grad_norm": 1.0031794340890787, "learning_rate": 4.961882235534579e-06, "loss": 0.496, "step": 5578 }, { "epoch": 0.3397375392016564, "grad_norm": 1.0957758457425746, "learning_rate": 4.961868353081719e-06, "loss": 0.4489, "step": 5579 }, { "epoch": 0.33979843497853424, "grad_norm": 1.0221330393120551, "learning_rate": 4.961854468120758e-06, "loss": 0.4336, "step": 5580 }, { "epoch": 0.33985933075541214, "grad_norm": 1.0619400509002852, "learning_rate": 4.961840580651712e-06, "loss": 0.4991, "step": 5581 }, { "epoch": 0.33992022653229, "grad_norm": 1.05542987533867, "learning_rate": 4.961826690674594e-06, "loss": 0.4383, "step": 5582 }, { "epoch": 0.3399811223091679, "grad_norm": 1.015524801434588, "learning_rate": 4.9618127981894174e-06, "loss": 0.4037, "step": 5583 }, { "epoch": 0.3400420180860457, "grad_norm": 1.1299523854036753, "learning_rate": 4.961798903196197e-06, "loss": 0.4936, "step": 5584 }, { "epoch": 0.34010291386292363, "grad_norm": 0.9663474635114704, "learning_rate": 4.9617850056949475e-06, "loss": 0.4695, "step": 5585 }, { "epoch": 0.3401638096398015, "grad_norm": 1.061756275473981, "learning_rate": 4.961771105685682e-06, "loss": 0.466, "step": 5586 }, { "epoch": 0.3402247054166794, "grad_norm": 0.987591954888018, "learning_rate": 4.961757203168416e-06, "loss": 0.4688, "step": 5587 }, { "epoch": 0.3402856011935572, "grad_norm": 1.0015294950416767, "learning_rate": 4.9617432981431626e-06, "loss": 0.4567, "step": 5588 }, { "epoch": 0.3403464969704351, "grad_norm": 1.1170674558823095, "learning_rate": 4.961729390609936e-06, "loss": 0.4753, "step": 5589 }, { "epoch": 0.34040739274731296, "grad_norm": 1.1729381293389722, "learning_rate": 4.961715480568752e-06, "loss": 0.4766, "step": 5590 }, { "epoch": 0.34046828852419087, "grad_norm": 1.0215106213239569, "learning_rate": 4.961701568019622e-06, "loss": 0.4295, "step": 5591 }, { "epoch": 0.3405291843010687, "grad_norm": 1.023329040385912, "learning_rate": 4.961687652962562e-06, "loss": 0.495, "step": 5592 }, { "epoch": 0.3405900800779466, "grad_norm": 0.9593049934470675, "learning_rate": 4.961673735397587e-06, "loss": 0.5607, "step": 5593 }, { "epoch": 0.34065097585482446, "grad_norm": 0.9298767115242889, "learning_rate": 4.961659815324708e-06, "loss": 0.4724, "step": 5594 }, { "epoch": 0.34071187163170236, "grad_norm": 1.0121578000942217, "learning_rate": 4.961645892743942e-06, "loss": 0.4806, "step": 5595 }, { "epoch": 0.3407727674085802, "grad_norm": 1.0336142986235242, "learning_rate": 4.9616319676553025e-06, "loss": 0.461, "step": 5596 }, { "epoch": 0.3408336631854581, "grad_norm": 1.0255784611557572, "learning_rate": 4.961618040058803e-06, "loss": 0.4358, "step": 5597 }, { "epoch": 0.34089455896233595, "grad_norm": 0.9979684092275197, "learning_rate": 4.961604109954459e-06, "loss": 0.4598, "step": 5598 }, { "epoch": 0.34095545473921385, "grad_norm": 0.9958950510636841, "learning_rate": 4.961590177342284e-06, "loss": 0.5286, "step": 5599 }, { "epoch": 0.3410163505160917, "grad_norm": 0.9745449150922775, "learning_rate": 4.961576242222291e-06, "loss": 0.456, "step": 5600 }, { "epoch": 0.3410772462929696, "grad_norm": 1.0260590815321449, "learning_rate": 4.9615623045944965e-06, "loss": 0.4676, "step": 5601 }, { "epoch": 0.34113814206984744, "grad_norm": 1.0842244711815199, "learning_rate": 4.961548364458913e-06, "loss": 0.5256, "step": 5602 }, { "epoch": 0.34119903784672534, "grad_norm": 1.0316298614161954, "learning_rate": 4.961534421815556e-06, "loss": 0.4878, "step": 5603 }, { "epoch": 0.3412599336236032, "grad_norm": 1.1487780507952183, "learning_rate": 4.961520476664437e-06, "loss": 0.5012, "step": 5604 }, { "epoch": 0.3413208294004811, "grad_norm": 1.0443431021798713, "learning_rate": 4.961506529005574e-06, "loss": 0.4873, "step": 5605 }, { "epoch": 0.34138172517735893, "grad_norm": 1.0655039628310419, "learning_rate": 4.9614925788389775e-06, "loss": 0.4312, "step": 5606 }, { "epoch": 0.34144262095423683, "grad_norm": 1.0064817281735896, "learning_rate": 4.961478626164665e-06, "loss": 0.4442, "step": 5607 }, { "epoch": 0.3415035167311147, "grad_norm": 1.1743422755999875, "learning_rate": 4.961464670982649e-06, "loss": 0.4294, "step": 5608 }, { "epoch": 0.3415644125079926, "grad_norm": 1.0966685459104737, "learning_rate": 4.961450713292943e-06, "loss": 0.3855, "step": 5609 }, { "epoch": 0.3416253082848704, "grad_norm": 1.0150917467026392, "learning_rate": 4.961436753095563e-06, "loss": 0.4759, "step": 5610 }, { "epoch": 0.3416862040617483, "grad_norm": 0.9801768867768611, "learning_rate": 4.961422790390523e-06, "loss": 0.446, "step": 5611 }, { "epoch": 0.34174709983862617, "grad_norm": 1.0725737275865193, "learning_rate": 4.961408825177836e-06, "loss": 0.4373, "step": 5612 }, { "epoch": 0.34180799561550407, "grad_norm": 1.0480673162240057, "learning_rate": 4.961394857457517e-06, "loss": 0.4836, "step": 5613 }, { "epoch": 0.3418688913923819, "grad_norm": 1.1027977224534764, "learning_rate": 4.96138088722958e-06, "loss": 0.4574, "step": 5614 }, { "epoch": 0.3419297871692598, "grad_norm": 1.130673200431905, "learning_rate": 4.961366914494039e-06, "loss": 0.4035, "step": 5615 }, { "epoch": 0.34199068294613766, "grad_norm": 0.9849183726980638, "learning_rate": 4.96135293925091e-06, "loss": 0.4955, "step": 5616 }, { "epoch": 0.34205157872301556, "grad_norm": 1.045965158223417, "learning_rate": 4.961338961500204e-06, "loss": 0.4505, "step": 5617 }, { "epoch": 0.34211247449989346, "grad_norm": 0.9975112023473243, "learning_rate": 4.961324981241937e-06, "loss": 0.4591, "step": 5618 }, { "epoch": 0.3421733702767713, "grad_norm": 1.0586182199500822, "learning_rate": 4.9613109984761245e-06, "loss": 0.4756, "step": 5619 }, { "epoch": 0.3422342660536492, "grad_norm": 1.0256741785020347, "learning_rate": 4.961297013202779e-06, "loss": 0.4275, "step": 5620 }, { "epoch": 0.34229516183052705, "grad_norm": 0.9846891669603921, "learning_rate": 4.9612830254219156e-06, "loss": 0.4681, "step": 5621 }, { "epoch": 0.34235605760740495, "grad_norm": 1.0090202661632917, "learning_rate": 4.961269035133548e-06, "loss": 0.4691, "step": 5622 }, { "epoch": 0.3424169533842828, "grad_norm": 0.9905174371914138, "learning_rate": 4.9612550423376904e-06, "loss": 0.4731, "step": 5623 }, { "epoch": 0.3424778491611607, "grad_norm": 0.9531418191249211, "learning_rate": 4.961241047034358e-06, "loss": 0.5425, "step": 5624 }, { "epoch": 0.34253874493803854, "grad_norm": 0.9279481362300305, "learning_rate": 4.9612270492235636e-06, "loss": 0.4718, "step": 5625 }, { "epoch": 0.34259964071491644, "grad_norm": 1.0862052341421298, "learning_rate": 4.961213048905322e-06, "loss": 0.4284, "step": 5626 }, { "epoch": 0.3426605364917943, "grad_norm": 0.916370633544873, "learning_rate": 4.961199046079649e-06, "loss": 0.4743, "step": 5627 }, { "epoch": 0.3427214322686722, "grad_norm": 1.0138858166364146, "learning_rate": 4.961185040746557e-06, "loss": 0.5828, "step": 5628 }, { "epoch": 0.34278232804555003, "grad_norm": 1.0202718224692027, "learning_rate": 4.96117103290606e-06, "loss": 0.5379, "step": 5629 }, { "epoch": 0.34284322382242793, "grad_norm": 1.0343804047555951, "learning_rate": 4.961157022558174e-06, "loss": 0.4568, "step": 5630 }, { "epoch": 0.3429041195993058, "grad_norm": 1.1155331769684116, "learning_rate": 4.961143009702912e-06, "loss": 0.4833, "step": 5631 }, { "epoch": 0.3429650153761837, "grad_norm": 1.0354984791495347, "learning_rate": 4.961128994340289e-06, "loss": 0.5245, "step": 5632 }, { "epoch": 0.3430259111530615, "grad_norm": 0.9740000093623228, "learning_rate": 4.961114976470319e-06, "loss": 0.5669, "step": 5633 }, { "epoch": 0.3430868069299394, "grad_norm": 1.0151826179857024, "learning_rate": 4.961100956093016e-06, "loss": 0.4942, "step": 5634 }, { "epoch": 0.34314770270681727, "grad_norm": 1.0080470599195328, "learning_rate": 4.961086933208395e-06, "loss": 0.465, "step": 5635 }, { "epoch": 0.34320859848369517, "grad_norm": 1.0318118824119142, "learning_rate": 4.961072907816469e-06, "loss": 0.427, "step": 5636 }, { "epoch": 0.343269494260573, "grad_norm": 0.9875664878180967, "learning_rate": 4.961058879917254e-06, "loss": 0.4264, "step": 5637 }, { "epoch": 0.3433303900374509, "grad_norm": 0.9578899588734768, "learning_rate": 4.9610448495107625e-06, "loss": 0.5521, "step": 5638 }, { "epoch": 0.34339128581432876, "grad_norm": 1.05420091933313, "learning_rate": 4.96103081659701e-06, "loss": 0.4763, "step": 5639 }, { "epoch": 0.34345218159120666, "grad_norm": 1.0349023744875652, "learning_rate": 4.961016781176011e-06, "loss": 0.5043, "step": 5640 }, { "epoch": 0.3435130773680845, "grad_norm": 1.0619941791693353, "learning_rate": 4.961002743247778e-06, "loss": 0.4812, "step": 5641 }, { "epoch": 0.3435739731449624, "grad_norm": 1.0731964729291112, "learning_rate": 4.960988702812328e-06, "loss": 0.4655, "step": 5642 }, { "epoch": 0.34363486892184025, "grad_norm": 1.014331556778732, "learning_rate": 4.9609746598696725e-06, "loss": 0.5042, "step": 5643 }, { "epoch": 0.34369576469871815, "grad_norm": 0.9868360407811839, "learning_rate": 4.960960614419829e-06, "loss": 0.4958, "step": 5644 }, { "epoch": 0.343756660475596, "grad_norm": 1.0809061114853022, "learning_rate": 4.960946566462809e-06, "loss": 0.4879, "step": 5645 }, { "epoch": 0.3438175562524739, "grad_norm": 0.9935008868849504, "learning_rate": 4.960932515998627e-06, "loss": 0.4904, "step": 5646 }, { "epoch": 0.34387845202935174, "grad_norm": 1.0673034950317544, "learning_rate": 4.960918463027299e-06, "loss": 0.4159, "step": 5647 }, { "epoch": 0.34393934780622964, "grad_norm": 0.974466060089914, "learning_rate": 4.960904407548837e-06, "loss": 0.4361, "step": 5648 }, { "epoch": 0.3440002435831075, "grad_norm": 1.0167017874111102, "learning_rate": 4.960890349563259e-06, "loss": 0.4721, "step": 5649 }, { "epoch": 0.3440611393599854, "grad_norm": 1.0613997743337014, "learning_rate": 4.9608762890705766e-06, "loss": 0.5159, "step": 5650 }, { "epoch": 0.34412203513686324, "grad_norm": 1.004023828700836, "learning_rate": 4.960862226070804e-06, "loss": 0.4968, "step": 5651 }, { "epoch": 0.34418293091374114, "grad_norm": 0.9775736840899638, "learning_rate": 4.960848160563956e-06, "loss": 0.5332, "step": 5652 }, { "epoch": 0.344243826690619, "grad_norm": 0.9985045631810867, "learning_rate": 4.9608340925500475e-06, "loss": 0.5184, "step": 5653 }, { "epoch": 0.3443047224674969, "grad_norm": 0.923574054457734, "learning_rate": 4.960820022029092e-06, "loss": 0.5591, "step": 5654 }, { "epoch": 0.3443656182443747, "grad_norm": 1.0216207850905528, "learning_rate": 4.960805949001104e-06, "loss": 0.4944, "step": 5655 }, { "epoch": 0.3444265140212526, "grad_norm": 1.0041097731700754, "learning_rate": 4.960791873466099e-06, "loss": 0.5156, "step": 5656 }, { "epoch": 0.3444874097981305, "grad_norm": 1.032719814141192, "learning_rate": 4.96077779542409e-06, "loss": 0.4354, "step": 5657 }, { "epoch": 0.3445483055750084, "grad_norm": 1.0743660596059932, "learning_rate": 4.960763714875091e-06, "loss": 0.5199, "step": 5658 }, { "epoch": 0.3446092013518863, "grad_norm": 0.9966300902164862, "learning_rate": 4.960749631819118e-06, "loss": 0.5035, "step": 5659 }, { "epoch": 0.3446700971287641, "grad_norm": 1.1555321367694555, "learning_rate": 4.960735546256183e-06, "loss": 0.4497, "step": 5660 }, { "epoch": 0.344730992905642, "grad_norm": 0.9918514713904871, "learning_rate": 4.9607214581863035e-06, "loss": 0.4709, "step": 5661 }, { "epoch": 0.34479188868251986, "grad_norm": 0.9987306927661563, "learning_rate": 4.960707367609492e-06, "loss": 0.4519, "step": 5662 }, { "epoch": 0.34485278445939777, "grad_norm": 1.0940217911128838, "learning_rate": 4.9606932745257616e-06, "loss": 0.4128, "step": 5663 }, { "epoch": 0.3449136802362756, "grad_norm": 1.1165815690080039, "learning_rate": 4.96067917893513e-06, "loss": 0.5675, "step": 5664 }, { "epoch": 0.3449745760131535, "grad_norm": 1.0324173832993693, "learning_rate": 4.960665080837608e-06, "loss": 0.4196, "step": 5665 }, { "epoch": 0.34503547179003136, "grad_norm": 1.0136229581496292, "learning_rate": 4.960650980233212e-06, "loss": 0.4778, "step": 5666 }, { "epoch": 0.34509636756690926, "grad_norm": 1.160777096600884, "learning_rate": 4.960636877121957e-06, "loss": 0.4243, "step": 5667 }, { "epoch": 0.3451572633437871, "grad_norm": 0.9889867512317283, "learning_rate": 4.960622771503854e-06, "loss": 0.4209, "step": 5668 }, { "epoch": 0.345218159120665, "grad_norm": 1.023075860982401, "learning_rate": 4.960608663378922e-06, "loss": 0.4136, "step": 5669 }, { "epoch": 0.34527905489754285, "grad_norm": 1.0524811911416028, "learning_rate": 4.960594552747172e-06, "loss": 0.4621, "step": 5670 }, { "epoch": 0.34533995067442075, "grad_norm": 1.0295215205010304, "learning_rate": 4.96058043960862e-06, "loss": 0.4204, "step": 5671 }, { "epoch": 0.3454008464512986, "grad_norm": 0.9229953517575232, "learning_rate": 4.960566323963279e-06, "loss": 0.5216, "step": 5672 }, { "epoch": 0.3454617422281765, "grad_norm": 1.0804878771593358, "learning_rate": 4.960552205811164e-06, "loss": 0.5068, "step": 5673 }, { "epoch": 0.34552263800505434, "grad_norm": 1.2100715262384056, "learning_rate": 4.960538085152291e-06, "loss": 0.4159, "step": 5674 }, { "epoch": 0.34558353378193224, "grad_norm": 1.1576596188348447, "learning_rate": 4.960523961986672e-06, "loss": 0.4367, "step": 5675 }, { "epoch": 0.3456444295588101, "grad_norm": 1.0486615824620424, "learning_rate": 4.960509836314322e-06, "loss": 0.4639, "step": 5676 }, { "epoch": 0.345705325335688, "grad_norm": 1.117942636130738, "learning_rate": 4.960495708135257e-06, "loss": 0.4514, "step": 5677 }, { "epoch": 0.34576622111256583, "grad_norm": 1.0934865049733116, "learning_rate": 4.960481577449489e-06, "loss": 0.5083, "step": 5678 }, { "epoch": 0.34582711688944373, "grad_norm": 1.0023496549396533, "learning_rate": 4.960467444257033e-06, "loss": 0.4396, "step": 5679 }, { "epoch": 0.3458880126663216, "grad_norm": 1.0453654522991345, "learning_rate": 4.960453308557906e-06, "loss": 0.4881, "step": 5680 }, { "epoch": 0.3459489084431995, "grad_norm": 0.9772829635813838, "learning_rate": 4.960439170352118e-06, "loss": 0.5267, "step": 5681 }, { "epoch": 0.3460098042200773, "grad_norm": 1.0089227243252519, "learning_rate": 4.960425029639687e-06, "loss": 0.4637, "step": 5682 }, { "epoch": 0.3460706999969552, "grad_norm": 0.9624478226480379, "learning_rate": 4.960410886420625e-06, "loss": 0.4134, "step": 5683 }, { "epoch": 0.34613159577383307, "grad_norm": 0.9275128420210255, "learning_rate": 4.9603967406949495e-06, "loss": 0.5378, "step": 5684 }, { "epoch": 0.34619249155071097, "grad_norm": 0.9956527276118627, "learning_rate": 4.960382592462672e-06, "loss": 0.5263, "step": 5685 }, { "epoch": 0.3462533873275888, "grad_norm": 1.0148816202869815, "learning_rate": 4.960368441723808e-06, "loss": 0.4665, "step": 5686 }, { "epoch": 0.3463142831044667, "grad_norm": 1.0065839277913275, "learning_rate": 4.960354288478371e-06, "loss": 0.4151, "step": 5687 }, { "epoch": 0.34637517888134456, "grad_norm": 0.9965703626913762, "learning_rate": 4.9603401327263766e-06, "loss": 0.4389, "step": 5688 }, { "epoch": 0.34643607465822246, "grad_norm": 1.0036181489623104, "learning_rate": 4.960325974467839e-06, "loss": 0.4367, "step": 5689 }, { "epoch": 0.3464969704351003, "grad_norm": 0.9404187418381386, "learning_rate": 4.960311813702773e-06, "loss": 0.468, "step": 5690 }, { "epoch": 0.3465578662119782, "grad_norm": 1.1916480016027462, "learning_rate": 4.960297650431192e-06, "loss": 0.4135, "step": 5691 }, { "epoch": 0.34661876198885605, "grad_norm": 1.0224795366854924, "learning_rate": 4.96028348465311e-06, "loss": 0.4666, "step": 5692 }, { "epoch": 0.34667965776573395, "grad_norm": 0.9889907338598624, "learning_rate": 4.960269316368543e-06, "loss": 0.4931, "step": 5693 }, { "epoch": 0.3467405535426118, "grad_norm": 1.1108683280749272, "learning_rate": 4.960255145577505e-06, "loss": 0.4215, "step": 5694 }, { "epoch": 0.3468014493194897, "grad_norm": 1.1071219017619884, "learning_rate": 4.960240972280011e-06, "loss": 0.4472, "step": 5695 }, { "epoch": 0.34686234509636754, "grad_norm": 1.1282742104845087, "learning_rate": 4.960226796476073e-06, "loss": 0.3933, "step": 5696 }, { "epoch": 0.34692324087324544, "grad_norm": 1.1075404835902984, "learning_rate": 4.960212618165709e-06, "loss": 0.4583, "step": 5697 }, { "epoch": 0.3469841366501233, "grad_norm": 1.1116122039272691, "learning_rate": 4.96019843734893e-06, "loss": 0.5328, "step": 5698 }, { "epoch": 0.3470450324270012, "grad_norm": 0.9217011815339985, "learning_rate": 4.9601842540257526e-06, "loss": 0.4653, "step": 5699 }, { "epoch": 0.3471059282038791, "grad_norm": 1.0535260701902927, "learning_rate": 4.9601700681961896e-06, "loss": 0.4345, "step": 5700 }, { "epoch": 0.34716682398075693, "grad_norm": 0.9362950464501706, "learning_rate": 4.960155879860258e-06, "loss": 0.4385, "step": 5701 }, { "epoch": 0.34722771975763483, "grad_norm": 0.9356506497247293, "learning_rate": 4.96014168901797e-06, "loss": 0.5069, "step": 5702 }, { "epoch": 0.3472886155345127, "grad_norm": 0.9902603646381289, "learning_rate": 4.96012749566934e-06, "loss": 0.4851, "step": 5703 }, { "epoch": 0.3473495113113906, "grad_norm": 1.0292583811286604, "learning_rate": 4.960113299814384e-06, "loss": 0.4764, "step": 5704 }, { "epoch": 0.3474104070882684, "grad_norm": 1.025857166693417, "learning_rate": 4.960099101453116e-06, "loss": 0.4585, "step": 5705 }, { "epoch": 0.3474713028651463, "grad_norm": 1.0280971457704164, "learning_rate": 4.96008490058555e-06, "loss": 0.411, "step": 5706 }, { "epoch": 0.34753219864202417, "grad_norm": 1.0392252099942065, "learning_rate": 4.960070697211701e-06, "loss": 0.5064, "step": 5707 }, { "epoch": 0.34759309441890207, "grad_norm": 0.9781378285298085, "learning_rate": 4.960056491331583e-06, "loss": 0.463, "step": 5708 }, { "epoch": 0.3476539901957799, "grad_norm": 1.0328726950891054, "learning_rate": 4.96004228294521e-06, "loss": 0.4445, "step": 5709 }, { "epoch": 0.3477148859726578, "grad_norm": 1.0503879307668702, "learning_rate": 4.9600280720525975e-06, "loss": 0.4459, "step": 5710 }, { "epoch": 0.34777578174953566, "grad_norm": 0.965358957823711, "learning_rate": 4.9600138586537595e-06, "loss": 0.4461, "step": 5711 }, { "epoch": 0.34783667752641356, "grad_norm": 1.1426253572654612, "learning_rate": 4.95999964274871e-06, "loss": 0.4468, "step": 5712 }, { "epoch": 0.3478975733032914, "grad_norm": 1.0010894020433327, "learning_rate": 4.959985424337464e-06, "loss": 0.4505, "step": 5713 }, { "epoch": 0.3479584690801693, "grad_norm": 1.0956194348160897, "learning_rate": 4.9599712034200375e-06, "loss": 0.4717, "step": 5714 }, { "epoch": 0.34801936485704715, "grad_norm": 0.9932082966721392, "learning_rate": 4.959956979996442e-06, "loss": 0.4164, "step": 5715 }, { "epoch": 0.34808026063392505, "grad_norm": 1.1718896757585866, "learning_rate": 4.959942754066694e-06, "loss": 0.4279, "step": 5716 }, { "epoch": 0.3481411564108029, "grad_norm": 1.0855481587546072, "learning_rate": 4.959928525630808e-06, "loss": 0.4999, "step": 5717 }, { "epoch": 0.3482020521876808, "grad_norm": 1.028419086348198, "learning_rate": 4.959914294688797e-06, "loss": 0.5112, "step": 5718 }, { "epoch": 0.34826294796455864, "grad_norm": 1.0176693392470961, "learning_rate": 4.959900061240677e-06, "loss": 0.5188, "step": 5719 }, { "epoch": 0.34832384374143655, "grad_norm": 1.0220073784281685, "learning_rate": 4.959885825286462e-06, "loss": 0.426, "step": 5720 }, { "epoch": 0.3483847395183144, "grad_norm": 1.061960759379996, "learning_rate": 4.959871586826166e-06, "loss": 0.4764, "step": 5721 }, { "epoch": 0.3484456352951923, "grad_norm": 0.9283718037103699, "learning_rate": 4.959857345859804e-06, "loss": 0.5389, "step": 5722 }, { "epoch": 0.34850653107207014, "grad_norm": 1.1635623162157163, "learning_rate": 4.959843102387391e-06, "loss": 0.4511, "step": 5723 }, { "epoch": 0.34856742684894804, "grad_norm": 0.9881200507994939, "learning_rate": 4.95982885640894e-06, "loss": 0.497, "step": 5724 }, { "epoch": 0.3486283226258259, "grad_norm": 1.1225750254237012, "learning_rate": 4.959814607924468e-06, "loss": 0.4682, "step": 5725 }, { "epoch": 0.3486892184027038, "grad_norm": 1.0054768693201863, "learning_rate": 4.959800356933987e-06, "loss": 0.4167, "step": 5726 }, { "epoch": 0.3487501141795816, "grad_norm": 0.9440241898611947, "learning_rate": 4.9597861034375124e-06, "loss": 0.496, "step": 5727 }, { "epoch": 0.34881100995645953, "grad_norm": 1.1928984138330974, "learning_rate": 4.959771847435059e-06, "loss": 0.4457, "step": 5728 }, { "epoch": 0.3488719057333374, "grad_norm": 1.003615962700787, "learning_rate": 4.959757588926642e-06, "loss": 0.4507, "step": 5729 }, { "epoch": 0.3489328015102153, "grad_norm": 1.032212559849315, "learning_rate": 4.959743327912274e-06, "loss": 0.4756, "step": 5730 }, { "epoch": 0.3489936972870931, "grad_norm": 0.9533832621902606, "learning_rate": 4.959729064391972e-06, "loss": 0.4804, "step": 5731 }, { "epoch": 0.349054593063971, "grad_norm": 1.01980927652901, "learning_rate": 4.959714798365748e-06, "loss": 0.4225, "step": 5732 }, { "epoch": 0.34911548884084886, "grad_norm": 1.0262226854397782, "learning_rate": 4.9597005298336175e-06, "loss": 0.4545, "step": 5733 }, { "epoch": 0.34917638461772676, "grad_norm": 0.9638389569729167, "learning_rate": 4.959686258795596e-06, "loss": 0.5079, "step": 5734 }, { "epoch": 0.3492372803946046, "grad_norm": 1.0259882669534421, "learning_rate": 4.959671985251697e-06, "loss": 0.4428, "step": 5735 }, { "epoch": 0.3492981761714825, "grad_norm": 1.085503716431905, "learning_rate": 4.959657709201935e-06, "loss": 0.5251, "step": 5736 }, { "epoch": 0.34935907194836036, "grad_norm": 1.0068612525470007, "learning_rate": 4.959643430646326e-06, "loss": 0.5312, "step": 5737 }, { "epoch": 0.34941996772523826, "grad_norm": 1.022567661574769, "learning_rate": 4.959629149584883e-06, "loss": 0.5777, "step": 5738 }, { "epoch": 0.3494808635021161, "grad_norm": 0.9841091286510206, "learning_rate": 4.95961486601762e-06, "loss": 0.4298, "step": 5739 }, { "epoch": 0.349541759278994, "grad_norm": 0.9716153451005802, "learning_rate": 4.959600579944553e-06, "loss": 0.4698, "step": 5740 }, { "epoch": 0.3496026550558719, "grad_norm": 1.0073133319487793, "learning_rate": 4.959586291365696e-06, "loss": 0.4678, "step": 5741 }, { "epoch": 0.34966355083274975, "grad_norm": 1.0060876512794463, "learning_rate": 4.959572000281064e-06, "loss": 0.4198, "step": 5742 }, { "epoch": 0.34972444660962765, "grad_norm": 1.0899541469149239, "learning_rate": 4.959557706690671e-06, "loss": 0.4173, "step": 5743 }, { "epoch": 0.3497853423865055, "grad_norm": 1.0653359371349262, "learning_rate": 4.959543410594532e-06, "loss": 0.4405, "step": 5744 }, { "epoch": 0.3498462381633834, "grad_norm": 1.0426734887392606, "learning_rate": 4.95952911199266e-06, "loss": 0.4563, "step": 5745 }, { "epoch": 0.34990713394026124, "grad_norm": 1.015641237606067, "learning_rate": 4.959514810885072e-06, "loss": 0.4967, "step": 5746 }, { "epoch": 0.34996802971713914, "grad_norm": 1.072761470336275, "learning_rate": 4.9595005072717815e-06, "loss": 0.496, "step": 5747 }, { "epoch": 0.350028925494017, "grad_norm": 0.9827751685332833, "learning_rate": 4.959486201152803e-06, "loss": 0.5152, "step": 5748 }, { "epoch": 0.3500898212708949, "grad_norm": 1.09392015966596, "learning_rate": 4.959471892528151e-06, "loss": 0.5136, "step": 5749 }, { "epoch": 0.35015071704777273, "grad_norm": 0.9968944583252702, "learning_rate": 4.95945758139784e-06, "loss": 0.4502, "step": 5750 }, { "epoch": 0.35021161282465063, "grad_norm": 1.0782845028477257, "learning_rate": 4.959443267761886e-06, "loss": 0.4837, "step": 5751 }, { "epoch": 0.3502725086015285, "grad_norm": 1.0970497043931706, "learning_rate": 4.9594289516203e-06, "loss": 0.5353, "step": 5752 }, { "epoch": 0.3503334043784064, "grad_norm": 1.0612421716639384, "learning_rate": 4.9594146329731e-06, "loss": 0.469, "step": 5753 }, { "epoch": 0.3503943001552842, "grad_norm": 1.1095159828830734, "learning_rate": 4.9594003118203e-06, "loss": 0.4698, "step": 5754 }, { "epoch": 0.3504551959321621, "grad_norm": 0.8824566643327829, "learning_rate": 4.959385988161914e-06, "loss": 0.5397, "step": 5755 }, { "epoch": 0.35051609170903997, "grad_norm": 0.9516776107329558, "learning_rate": 4.959371661997956e-06, "loss": 0.5067, "step": 5756 }, { "epoch": 0.35057698748591787, "grad_norm": 0.9872365455371239, "learning_rate": 4.959357333328442e-06, "loss": 0.528, "step": 5757 }, { "epoch": 0.3506378832627957, "grad_norm": 1.1557805577736708, "learning_rate": 4.959343002153385e-06, "loss": 0.5139, "step": 5758 }, { "epoch": 0.3506987790396736, "grad_norm": 1.1113972547059587, "learning_rate": 4.959328668472801e-06, "loss": 0.4952, "step": 5759 }, { "epoch": 0.35075967481655146, "grad_norm": 1.0037757119469608, "learning_rate": 4.9593143322867046e-06, "loss": 0.5121, "step": 5760 }, { "epoch": 0.35082057059342936, "grad_norm": 1.074127398095707, "learning_rate": 4.959299993595109e-06, "loss": 0.4739, "step": 5761 }, { "epoch": 0.3508814663703072, "grad_norm": 0.9672592946902565, "learning_rate": 4.95928565239803e-06, "loss": 0.474, "step": 5762 }, { "epoch": 0.3509423621471851, "grad_norm": 0.9176553143674648, "learning_rate": 4.9592713086954824e-06, "loss": 0.4978, "step": 5763 }, { "epoch": 0.35100325792406295, "grad_norm": 1.0235887818533327, "learning_rate": 4.95925696248748e-06, "loss": 0.5063, "step": 5764 }, { "epoch": 0.35106415370094085, "grad_norm": 1.0355108771081158, "learning_rate": 4.9592426137740375e-06, "loss": 0.4973, "step": 5765 }, { "epoch": 0.3511250494778187, "grad_norm": 1.0868327951935244, "learning_rate": 4.95922826255517e-06, "loss": 0.442, "step": 5766 }, { "epoch": 0.3511859452546966, "grad_norm": 0.9943808499495443, "learning_rate": 4.959213908830892e-06, "loss": 0.5062, "step": 5767 }, { "epoch": 0.35124684103157444, "grad_norm": 1.0091813973218928, "learning_rate": 4.959199552601217e-06, "loss": 0.5116, "step": 5768 }, { "epoch": 0.35130773680845234, "grad_norm": 1.0203425801653885, "learning_rate": 4.959185193866161e-06, "loss": 0.5121, "step": 5769 }, { "epoch": 0.3513686325853302, "grad_norm": 1.11459253870132, "learning_rate": 4.95917083262574e-06, "loss": 0.492, "step": 5770 }, { "epoch": 0.3514295283622081, "grad_norm": 0.9990058670329051, "learning_rate": 4.9591564688799655e-06, "loss": 0.4879, "step": 5771 }, { "epoch": 0.35149042413908593, "grad_norm": 1.1077713452197215, "learning_rate": 4.9591421026288535e-06, "loss": 0.4592, "step": 5772 }, { "epoch": 0.35155131991596383, "grad_norm": 1.149168380532495, "learning_rate": 4.959127733872419e-06, "loss": 0.4174, "step": 5773 }, { "epoch": 0.3516122156928417, "grad_norm": 0.9341431746724786, "learning_rate": 4.959113362610676e-06, "loss": 0.5192, "step": 5774 }, { "epoch": 0.3516731114697196, "grad_norm": 1.0733498007118232, "learning_rate": 4.9590989888436395e-06, "loss": 0.4049, "step": 5775 }, { "epoch": 0.3517340072465974, "grad_norm": 1.1435281853543617, "learning_rate": 4.959084612571323e-06, "loss": 0.4904, "step": 5776 }, { "epoch": 0.3517949030234753, "grad_norm": 1.0227590791259593, "learning_rate": 4.959070233793743e-06, "loss": 0.4812, "step": 5777 }, { "epoch": 0.35185579880035317, "grad_norm": 0.9915332243407481, "learning_rate": 4.959055852510914e-06, "loss": 0.5722, "step": 5778 }, { "epoch": 0.35191669457723107, "grad_norm": 1.0291565911177785, "learning_rate": 4.95904146872285e-06, "loss": 0.4638, "step": 5779 }, { "epoch": 0.3519775903541089, "grad_norm": 1.045572065162049, "learning_rate": 4.959027082429565e-06, "loss": 0.3721, "step": 5780 }, { "epoch": 0.3520384861309868, "grad_norm": 0.939145688087458, "learning_rate": 4.959012693631075e-06, "loss": 0.489, "step": 5781 }, { "epoch": 0.3520993819078647, "grad_norm": 1.0354191048711159, "learning_rate": 4.958998302327394e-06, "loss": 0.4825, "step": 5782 }, { "epoch": 0.35216027768474256, "grad_norm": 0.9369769537465518, "learning_rate": 4.958983908518536e-06, "loss": 0.429, "step": 5783 }, { "epoch": 0.35222117346162046, "grad_norm": 0.9706822409635127, "learning_rate": 4.958969512204517e-06, "loss": 0.4796, "step": 5784 }, { "epoch": 0.3522820692384983, "grad_norm": 0.917289769046061, "learning_rate": 4.95895511338535e-06, "loss": 0.5213, "step": 5785 }, { "epoch": 0.3523429650153762, "grad_norm": 1.0470036822451954, "learning_rate": 4.958940712061051e-06, "loss": 0.5088, "step": 5786 }, { "epoch": 0.35240386079225405, "grad_norm": 1.0563567434829375, "learning_rate": 4.958926308231635e-06, "loss": 0.4855, "step": 5787 }, { "epoch": 0.35246475656913195, "grad_norm": 1.1407818060378725, "learning_rate": 4.958911901897115e-06, "loss": 0.4277, "step": 5788 }, { "epoch": 0.3525256523460098, "grad_norm": 1.0143565966791586, "learning_rate": 4.958897493057507e-06, "loss": 0.5061, "step": 5789 }, { "epoch": 0.3525865481228877, "grad_norm": 1.1845835356507584, "learning_rate": 4.9588830817128265e-06, "loss": 0.3821, "step": 5790 }, { "epoch": 0.35264744389976554, "grad_norm": 0.9801943779325643, "learning_rate": 4.958868667863086e-06, "loss": 0.5043, "step": 5791 }, { "epoch": 0.35270833967664345, "grad_norm": 1.063502689389363, "learning_rate": 4.958854251508301e-06, "loss": 0.5099, "step": 5792 }, { "epoch": 0.3527692354535213, "grad_norm": 1.0864233367652305, "learning_rate": 4.958839832648487e-06, "loss": 0.4546, "step": 5793 }, { "epoch": 0.3528301312303992, "grad_norm": 1.0554420087824048, "learning_rate": 4.958825411283658e-06, "loss": 0.4402, "step": 5794 }, { "epoch": 0.35289102700727704, "grad_norm": 0.9555852888090719, "learning_rate": 4.958810987413828e-06, "loss": 0.4644, "step": 5795 }, { "epoch": 0.35295192278415494, "grad_norm": 1.0803828855612032, "learning_rate": 4.958796561039014e-06, "loss": 0.4347, "step": 5796 }, { "epoch": 0.3530128185610328, "grad_norm": 1.0269595084933079, "learning_rate": 4.958782132159228e-06, "loss": 0.4503, "step": 5797 }, { "epoch": 0.3530737143379107, "grad_norm": 1.0500386790331417, "learning_rate": 4.958767700774486e-06, "loss": 0.45, "step": 5798 }, { "epoch": 0.3531346101147885, "grad_norm": 0.9927885055534464, "learning_rate": 4.958753266884803e-06, "loss": 0.5282, "step": 5799 }, { "epoch": 0.35319550589166643, "grad_norm": 1.1094743893515495, "learning_rate": 4.958738830490193e-06, "loss": 0.4271, "step": 5800 }, { "epoch": 0.3532564016685443, "grad_norm": 0.9892202154896496, "learning_rate": 4.958724391590671e-06, "loss": 0.4822, "step": 5801 }, { "epoch": 0.3533172974454222, "grad_norm": 1.024965452247808, "learning_rate": 4.958709950186253e-06, "loss": 0.4534, "step": 5802 }, { "epoch": 0.3533781932223, "grad_norm": 1.1534593107672708, "learning_rate": 4.95869550627695e-06, "loss": 0.4392, "step": 5803 }, { "epoch": 0.3534390889991779, "grad_norm": 0.8905709010255881, "learning_rate": 4.958681059862781e-06, "loss": 0.4984, "step": 5804 }, { "epoch": 0.35349998477605576, "grad_norm": 1.0194239868218125, "learning_rate": 4.958666610943758e-06, "loss": 0.4286, "step": 5805 }, { "epoch": 0.35356088055293367, "grad_norm": 1.0081393567153918, "learning_rate": 4.958652159519896e-06, "loss": 0.4657, "step": 5806 }, { "epoch": 0.3536217763298115, "grad_norm": 1.0615990008753917, "learning_rate": 4.958637705591211e-06, "loss": 0.4956, "step": 5807 }, { "epoch": 0.3536826721066894, "grad_norm": 1.0766787303443972, "learning_rate": 4.958623249157718e-06, "loss": 0.4099, "step": 5808 }, { "epoch": 0.35374356788356726, "grad_norm": 1.002634271266341, "learning_rate": 4.95860879021943e-06, "loss": 0.48, "step": 5809 }, { "epoch": 0.35380446366044516, "grad_norm": 1.091201787901894, "learning_rate": 4.9585943287763625e-06, "loss": 0.4733, "step": 5810 }, { "epoch": 0.353865359437323, "grad_norm": 1.031762857670462, "learning_rate": 4.95857986482853e-06, "loss": 0.4952, "step": 5811 }, { "epoch": 0.3539262552142009, "grad_norm": 0.9891647247891716, "learning_rate": 4.958565398375947e-06, "loss": 0.4236, "step": 5812 }, { "epoch": 0.35398715099107875, "grad_norm": 1.0209388457909196, "learning_rate": 4.9585509294186295e-06, "loss": 0.4106, "step": 5813 }, { "epoch": 0.35404804676795665, "grad_norm": 1.1386349403009695, "learning_rate": 4.958536457956591e-06, "loss": 0.4293, "step": 5814 }, { "epoch": 0.3541089425448345, "grad_norm": 1.0565524029271371, "learning_rate": 4.958521983989847e-06, "loss": 0.453, "step": 5815 }, { "epoch": 0.3541698383217124, "grad_norm": 1.068358881615297, "learning_rate": 4.9585075075184115e-06, "loss": 0.5053, "step": 5816 }, { "epoch": 0.35423073409859024, "grad_norm": 1.0022441318320334, "learning_rate": 4.958493028542299e-06, "loss": 0.4499, "step": 5817 }, { "epoch": 0.35429162987546814, "grad_norm": 1.1444720478209816, "learning_rate": 4.958478547061526e-06, "loss": 0.4881, "step": 5818 }, { "epoch": 0.354352525652346, "grad_norm": 1.0149181365284936, "learning_rate": 4.9584640630761066e-06, "loss": 0.55, "step": 5819 }, { "epoch": 0.3544134214292239, "grad_norm": 1.0907623858407682, "learning_rate": 4.958449576586054e-06, "loss": 0.422, "step": 5820 }, { "epoch": 0.35447431720610173, "grad_norm": 0.9901606957254873, "learning_rate": 4.9584350875913854e-06, "loss": 0.5226, "step": 5821 }, { "epoch": 0.35453521298297963, "grad_norm": 1.0566735381028693, "learning_rate": 4.958420596092113e-06, "loss": 0.4576, "step": 5822 }, { "epoch": 0.35459610875985753, "grad_norm": 1.113906757411018, "learning_rate": 4.958406102088253e-06, "loss": 0.4568, "step": 5823 }, { "epoch": 0.3546570045367354, "grad_norm": 0.9879151231720895, "learning_rate": 4.95839160557982e-06, "loss": 0.4293, "step": 5824 }, { "epoch": 0.3547179003136133, "grad_norm": 1.0056470562277258, "learning_rate": 4.95837710656683e-06, "loss": 0.4639, "step": 5825 }, { "epoch": 0.3547787960904911, "grad_norm": 1.0008365360185616, "learning_rate": 4.958362605049295e-06, "loss": 0.5364, "step": 5826 }, { "epoch": 0.354839691867369, "grad_norm": 0.9719234095444994, "learning_rate": 4.958348101027232e-06, "loss": 0.4231, "step": 5827 }, { "epoch": 0.35490058764424687, "grad_norm": 0.9959546951866003, "learning_rate": 4.958333594500654e-06, "loss": 0.4415, "step": 5828 }, { "epoch": 0.35496148342112477, "grad_norm": 0.9918594752136062, "learning_rate": 4.958319085469578e-06, "loss": 0.4973, "step": 5829 }, { "epoch": 0.3550223791980026, "grad_norm": 0.915755926993677, "learning_rate": 4.958304573934017e-06, "loss": 0.5565, "step": 5830 }, { "epoch": 0.3550832749748805, "grad_norm": 1.0460126732799004, "learning_rate": 4.9582900598939874e-06, "loss": 0.4983, "step": 5831 }, { "epoch": 0.35514417075175836, "grad_norm": 0.922635357327736, "learning_rate": 4.958275543349501e-06, "loss": 0.4742, "step": 5832 }, { "epoch": 0.35520506652863626, "grad_norm": 1.0413815637127861, "learning_rate": 4.958261024300577e-06, "loss": 0.4961, "step": 5833 }, { "epoch": 0.3552659623055141, "grad_norm": 1.0001349248573175, "learning_rate": 4.958246502747226e-06, "loss": 0.4509, "step": 5834 }, { "epoch": 0.355326858082392, "grad_norm": 0.9623168925560192, "learning_rate": 4.958231978689465e-06, "loss": 0.5614, "step": 5835 }, { "epoch": 0.35538775385926985, "grad_norm": 1.1469875781620058, "learning_rate": 4.9582174521273095e-06, "loss": 0.4463, "step": 5836 }, { "epoch": 0.35544864963614775, "grad_norm": 0.9587321610466831, "learning_rate": 4.958202923060772e-06, "loss": 0.463, "step": 5837 }, { "epoch": 0.3555095454130256, "grad_norm": 1.0598560657927787, "learning_rate": 4.958188391489869e-06, "loss": 0.5083, "step": 5838 }, { "epoch": 0.3555704411899035, "grad_norm": 0.9499490475287559, "learning_rate": 4.958173857414615e-06, "loss": 0.4736, "step": 5839 }, { "epoch": 0.35563133696678134, "grad_norm": 1.0367236745305477, "learning_rate": 4.958159320835023e-06, "loss": 0.4283, "step": 5840 }, { "epoch": 0.35569223274365924, "grad_norm": 0.9668126320429052, "learning_rate": 4.95814478175111e-06, "loss": 0.5009, "step": 5841 }, { "epoch": 0.3557531285205371, "grad_norm": 0.9602854132970177, "learning_rate": 4.958130240162891e-06, "loss": 0.5055, "step": 5842 }, { "epoch": 0.355814024297415, "grad_norm": 0.9878791948395165, "learning_rate": 4.95811569607038e-06, "loss": 0.4559, "step": 5843 }, { "epoch": 0.35587492007429283, "grad_norm": 1.0240913335563584, "learning_rate": 4.958101149473591e-06, "loss": 0.4832, "step": 5844 }, { "epoch": 0.35593581585117073, "grad_norm": 1.1151448624456637, "learning_rate": 4.95808660037254e-06, "loss": 0.4577, "step": 5845 }, { "epoch": 0.3559967116280486, "grad_norm": 1.0637491124643705, "learning_rate": 4.958072048767243e-06, "loss": 0.4481, "step": 5846 }, { "epoch": 0.3560576074049265, "grad_norm": 1.10364794145267, "learning_rate": 4.958057494657711e-06, "loss": 0.5197, "step": 5847 }, { "epoch": 0.3561185031818043, "grad_norm": 0.9576133491012419, "learning_rate": 4.958042938043962e-06, "loss": 0.5178, "step": 5848 }, { "epoch": 0.3561793989586822, "grad_norm": 1.0760492963852222, "learning_rate": 4.95802837892601e-06, "loss": 0.4917, "step": 5849 }, { "epoch": 0.35624029473556007, "grad_norm": 1.0367700574232612, "learning_rate": 4.958013817303871e-06, "loss": 0.5404, "step": 5850 }, { "epoch": 0.35630119051243797, "grad_norm": 1.053555659651629, "learning_rate": 4.957999253177557e-06, "loss": 0.4468, "step": 5851 }, { "epoch": 0.3563620862893158, "grad_norm": 1.0442939819887946, "learning_rate": 4.9579846865470845e-06, "loss": 0.4519, "step": 5852 }, { "epoch": 0.3564229820661937, "grad_norm": 1.0204468980682833, "learning_rate": 4.957970117412468e-06, "loss": 0.5102, "step": 5853 }, { "epoch": 0.35648387784307156, "grad_norm": 1.0682810745415983, "learning_rate": 4.957955545773724e-06, "loss": 0.435, "step": 5854 }, { "epoch": 0.35654477361994946, "grad_norm": 1.040436312857044, "learning_rate": 4.957940971630866e-06, "loss": 0.4493, "step": 5855 }, { "epoch": 0.3566056693968273, "grad_norm": 1.1527778248080882, "learning_rate": 4.957926394983908e-06, "loss": 0.5319, "step": 5856 }, { "epoch": 0.3566665651737052, "grad_norm": 1.0840078558014805, "learning_rate": 4.957911815832865e-06, "loss": 0.4274, "step": 5857 }, { "epoch": 0.35672746095058305, "grad_norm": 1.0958302519497423, "learning_rate": 4.9578972341777544e-06, "loss": 0.4354, "step": 5858 }, { "epoch": 0.35678835672746095, "grad_norm": 0.9617409420146571, "learning_rate": 4.957882650018588e-06, "loss": 0.5326, "step": 5859 }, { "epoch": 0.3568492525043388, "grad_norm": 1.0476802978180613, "learning_rate": 4.957868063355382e-06, "loss": 0.4547, "step": 5860 }, { "epoch": 0.3569101482812167, "grad_norm": 1.0283087315519477, "learning_rate": 4.9578534741881514e-06, "loss": 0.4609, "step": 5861 }, { "epoch": 0.35697104405809454, "grad_norm": 1.0909251695228843, "learning_rate": 4.957838882516911e-06, "loss": 0.4631, "step": 5862 }, { "epoch": 0.35703193983497244, "grad_norm": 1.11477768071488, "learning_rate": 4.957824288341675e-06, "loss": 0.4779, "step": 5863 }, { "epoch": 0.35709283561185035, "grad_norm": 1.0802016087609425, "learning_rate": 4.9578096916624584e-06, "loss": 0.4671, "step": 5864 }, { "epoch": 0.3571537313887282, "grad_norm": 0.9580290017027775, "learning_rate": 4.957795092479276e-06, "loss": 0.5113, "step": 5865 }, { "epoch": 0.3572146271656061, "grad_norm": 1.0112167303461215, "learning_rate": 4.957780490792145e-06, "loss": 0.4752, "step": 5866 }, { "epoch": 0.35727552294248394, "grad_norm": 1.1463074162289153, "learning_rate": 4.957765886601076e-06, "loss": 0.3739, "step": 5867 }, { "epoch": 0.35733641871936184, "grad_norm": 0.9852992350315424, "learning_rate": 4.957751279906088e-06, "loss": 0.4712, "step": 5868 }, { "epoch": 0.3573973144962397, "grad_norm": 1.0597298246839522, "learning_rate": 4.957736670707193e-06, "loss": 0.4082, "step": 5869 }, { "epoch": 0.3574582102731176, "grad_norm": 1.036383031105337, "learning_rate": 4.957722059004408e-06, "loss": 0.453, "step": 5870 }, { "epoch": 0.3575191060499954, "grad_norm": 1.0157959772736564, "learning_rate": 4.957707444797746e-06, "loss": 0.5129, "step": 5871 }, { "epoch": 0.35758000182687333, "grad_norm": 0.9954113804903888, "learning_rate": 4.957692828087223e-06, "loss": 0.4758, "step": 5872 }, { "epoch": 0.3576408976037512, "grad_norm": 1.0486211271368207, "learning_rate": 4.957678208872854e-06, "loss": 0.4482, "step": 5873 }, { "epoch": 0.3577017933806291, "grad_norm": 1.0919893504218119, "learning_rate": 4.957663587154653e-06, "loss": 0.4534, "step": 5874 }, { "epoch": 0.3577626891575069, "grad_norm": 1.0218558136429827, "learning_rate": 4.957648962932635e-06, "loss": 0.4999, "step": 5875 }, { "epoch": 0.3578235849343848, "grad_norm": 0.982785928377609, "learning_rate": 4.9576343362068165e-06, "loss": 0.5139, "step": 5876 }, { "epoch": 0.35788448071126266, "grad_norm": 1.0088206267948443, "learning_rate": 4.95761970697721e-06, "loss": 0.4172, "step": 5877 }, { "epoch": 0.35794537648814057, "grad_norm": 0.9669306785284851, "learning_rate": 4.957605075243833e-06, "loss": 0.4672, "step": 5878 }, { "epoch": 0.3580062722650184, "grad_norm": 0.9708568619355379, "learning_rate": 4.9575904410066985e-06, "loss": 0.4848, "step": 5879 }, { "epoch": 0.3580671680418963, "grad_norm": 0.9762028652241797, "learning_rate": 4.9575758042658215e-06, "loss": 0.4817, "step": 5880 }, { "epoch": 0.35812806381877416, "grad_norm": 1.0297337999808611, "learning_rate": 4.9575611650212176e-06, "loss": 0.5041, "step": 5881 }, { "epoch": 0.35818895959565206, "grad_norm": 1.0710275766550497, "learning_rate": 4.957546523272902e-06, "loss": 0.489, "step": 5882 }, { "epoch": 0.3582498553725299, "grad_norm": 1.0105072600979745, "learning_rate": 4.957531879020888e-06, "loss": 0.4489, "step": 5883 }, { "epoch": 0.3583107511494078, "grad_norm": 1.0257201529159878, "learning_rate": 4.9575172322651935e-06, "loss": 0.4916, "step": 5884 }, { "epoch": 0.35837164692628565, "grad_norm": 1.026349944225731, "learning_rate": 4.95750258300583e-06, "loss": 0.4709, "step": 5885 }, { "epoch": 0.35843254270316355, "grad_norm": 1.0441627146862915, "learning_rate": 4.957487931242814e-06, "loss": 0.4669, "step": 5886 }, { "epoch": 0.3584934384800414, "grad_norm": 0.9887847953389347, "learning_rate": 4.957473276976161e-06, "loss": 0.4836, "step": 5887 }, { "epoch": 0.3585543342569193, "grad_norm": 1.0970195300581362, "learning_rate": 4.957458620205885e-06, "loss": 0.4464, "step": 5888 }, { "epoch": 0.35861523003379714, "grad_norm": 1.1252875438933703, "learning_rate": 4.957443960932001e-06, "loss": 0.4573, "step": 5889 }, { "epoch": 0.35867612581067504, "grad_norm": 1.059057300506433, "learning_rate": 4.957429299154525e-06, "loss": 0.3822, "step": 5890 }, { "epoch": 0.3587370215875529, "grad_norm": 0.9952782178007933, "learning_rate": 4.957414634873469e-06, "loss": 0.4964, "step": 5891 }, { "epoch": 0.3587979173644308, "grad_norm": 1.0432259786015223, "learning_rate": 4.957399968088853e-06, "loss": 0.4186, "step": 5892 }, { "epoch": 0.35885881314130863, "grad_norm": 1.0519930358030436, "learning_rate": 4.9573852988006875e-06, "loss": 0.4921, "step": 5893 }, { "epoch": 0.35891970891818653, "grad_norm": 1.0053258016141355, "learning_rate": 4.957370627008989e-06, "loss": 0.4484, "step": 5894 }, { "epoch": 0.3589806046950644, "grad_norm": 1.020268077113043, "learning_rate": 4.957355952713772e-06, "loss": 0.4695, "step": 5895 }, { "epoch": 0.3590415004719423, "grad_norm": 0.9997404981494037, "learning_rate": 4.957341275915053e-06, "loss": 0.517, "step": 5896 }, { "epoch": 0.3591023962488201, "grad_norm": 1.0143904090800628, "learning_rate": 4.957326596612845e-06, "loss": 0.3909, "step": 5897 }, { "epoch": 0.359163292025698, "grad_norm": 0.9973650985281276, "learning_rate": 4.957311914807164e-06, "loss": 0.5095, "step": 5898 }, { "epoch": 0.35922418780257587, "grad_norm": 1.0274053871835211, "learning_rate": 4.9572972304980235e-06, "loss": 0.5233, "step": 5899 }, { "epoch": 0.35928508357945377, "grad_norm": 0.9770482494953905, "learning_rate": 4.957282543685441e-06, "loss": 0.4522, "step": 5900 }, { "epoch": 0.3593459793563316, "grad_norm": 1.0679404824122398, "learning_rate": 4.9572678543694305e-06, "loss": 0.4161, "step": 5901 }, { "epoch": 0.3594068751332095, "grad_norm": 1.084481851770722, "learning_rate": 4.957253162550006e-06, "loss": 0.4829, "step": 5902 }, { "epoch": 0.35946777091008736, "grad_norm": 0.9951622234708237, "learning_rate": 4.957238468227183e-06, "loss": 0.4336, "step": 5903 }, { "epoch": 0.35952866668696526, "grad_norm": 1.0325057298968152, "learning_rate": 4.957223771400977e-06, "loss": 0.4713, "step": 5904 }, { "epoch": 0.35958956246384316, "grad_norm": 0.9891865094815111, "learning_rate": 4.957209072071402e-06, "loss": 0.4927, "step": 5905 }, { "epoch": 0.359650458240721, "grad_norm": 1.0428126020874233, "learning_rate": 4.957194370238473e-06, "loss": 0.4297, "step": 5906 }, { "epoch": 0.3597113540175989, "grad_norm": 1.0278613965896726, "learning_rate": 4.957179665902206e-06, "loss": 0.4625, "step": 5907 }, { "epoch": 0.35977224979447675, "grad_norm": 1.0280263358718158, "learning_rate": 4.957164959062616e-06, "loss": 0.4736, "step": 5908 }, { "epoch": 0.35983314557135465, "grad_norm": 1.0602336656236964, "learning_rate": 4.957150249719716e-06, "loss": 0.5164, "step": 5909 }, { "epoch": 0.3598940413482325, "grad_norm": 1.0546492461331096, "learning_rate": 4.957135537873524e-06, "loss": 0.4243, "step": 5910 }, { "epoch": 0.3599549371251104, "grad_norm": 1.0677673155637681, "learning_rate": 4.957120823524053e-06, "loss": 0.4208, "step": 5911 }, { "epoch": 0.36001583290198824, "grad_norm": 1.0344323393209731, "learning_rate": 4.957106106671318e-06, "loss": 0.4674, "step": 5912 }, { "epoch": 0.36007672867886614, "grad_norm": 0.985177967980951, "learning_rate": 4.957091387315334e-06, "loss": 0.5185, "step": 5913 }, { "epoch": 0.360137624455744, "grad_norm": 0.9915911080927179, "learning_rate": 4.957076665456117e-06, "loss": 0.5027, "step": 5914 }, { "epoch": 0.3601985202326219, "grad_norm": 1.0913910746436848, "learning_rate": 4.957061941093681e-06, "loss": 0.4411, "step": 5915 }, { "epoch": 0.36025941600949973, "grad_norm": 1.1780768474503296, "learning_rate": 4.957047214228042e-06, "loss": 0.4678, "step": 5916 }, { "epoch": 0.36032031178637763, "grad_norm": 0.9855363408467105, "learning_rate": 4.957032484859214e-06, "loss": 0.4825, "step": 5917 }, { "epoch": 0.3603812075632555, "grad_norm": 0.9639579403525318, "learning_rate": 4.957017752987212e-06, "loss": 0.4485, "step": 5918 }, { "epoch": 0.3604421033401334, "grad_norm": 1.085007529057979, "learning_rate": 4.957003018612052e-06, "loss": 0.4171, "step": 5919 }, { "epoch": 0.3605029991170112, "grad_norm": 1.0193559091832947, "learning_rate": 4.956988281733748e-06, "loss": 0.4327, "step": 5920 }, { "epoch": 0.3605638948938891, "grad_norm": 1.0616037944886991, "learning_rate": 4.956973542352316e-06, "loss": 0.4418, "step": 5921 }, { "epoch": 0.36062479067076697, "grad_norm": 0.9602900213930493, "learning_rate": 4.9569588004677695e-06, "loss": 0.5582, "step": 5922 }, { "epoch": 0.36068568644764487, "grad_norm": 1.1299709087996834, "learning_rate": 4.9569440560801256e-06, "loss": 0.4631, "step": 5923 }, { "epoch": 0.3607465822245227, "grad_norm": 0.9877585563639111, "learning_rate": 4.956929309189397e-06, "loss": 0.4663, "step": 5924 }, { "epoch": 0.3608074780014006, "grad_norm": 1.0526525284122275, "learning_rate": 4.956914559795601e-06, "loss": 0.4537, "step": 5925 }, { "epoch": 0.36086837377827846, "grad_norm": 0.9971557306201985, "learning_rate": 4.956899807898751e-06, "loss": 0.4661, "step": 5926 }, { "epoch": 0.36092926955515636, "grad_norm": 0.9939795988172753, "learning_rate": 4.956885053498862e-06, "loss": 0.5441, "step": 5927 }, { "epoch": 0.3609901653320342, "grad_norm": 1.01095820218077, "learning_rate": 4.95687029659595e-06, "loss": 0.4489, "step": 5928 }, { "epoch": 0.3610510611089121, "grad_norm": 1.043583789849136, "learning_rate": 4.95685553719003e-06, "loss": 0.4151, "step": 5929 }, { "epoch": 0.36111195688578995, "grad_norm": 1.0294594766415035, "learning_rate": 4.956840775281117e-06, "loss": 0.4502, "step": 5930 }, { "epoch": 0.36117285266266785, "grad_norm": 1.0995680493589644, "learning_rate": 4.956826010869224e-06, "loss": 0.4109, "step": 5931 }, { "epoch": 0.3612337484395457, "grad_norm": 1.0974756502020424, "learning_rate": 4.95681124395437e-06, "loss": 0.4954, "step": 5932 }, { "epoch": 0.3612946442164236, "grad_norm": 1.048101158622435, "learning_rate": 4.956796474536566e-06, "loss": 0.506, "step": 5933 }, { "epoch": 0.36135553999330144, "grad_norm": 1.127577634585211, "learning_rate": 4.95678170261583e-06, "loss": 0.4771, "step": 5934 }, { "epoch": 0.36141643577017935, "grad_norm": 1.0314552170178095, "learning_rate": 4.956766928192175e-06, "loss": 0.4116, "step": 5935 }, { "epoch": 0.3614773315470572, "grad_norm": 1.0705066783684076, "learning_rate": 4.956752151265617e-06, "loss": 0.4073, "step": 5936 }, { "epoch": 0.3615382273239351, "grad_norm": 0.9840030042519587, "learning_rate": 4.956737371836172e-06, "loss": 0.4869, "step": 5937 }, { "epoch": 0.36159912310081294, "grad_norm": 1.0446227760409263, "learning_rate": 4.956722589903853e-06, "loss": 0.4484, "step": 5938 }, { "epoch": 0.36166001887769084, "grad_norm": 1.0987168159869072, "learning_rate": 4.956707805468677e-06, "loss": 0.4669, "step": 5939 }, { "epoch": 0.3617209146545687, "grad_norm": 1.0578534812732725, "learning_rate": 4.956693018530657e-06, "loss": 0.4622, "step": 5940 }, { "epoch": 0.3617818104314466, "grad_norm": 0.983878117425382, "learning_rate": 4.95667822908981e-06, "loss": 0.5055, "step": 5941 }, { "epoch": 0.3618427062083244, "grad_norm": 1.0774122997102273, "learning_rate": 4.956663437146149e-06, "loss": 0.441, "step": 5942 }, { "epoch": 0.36190360198520233, "grad_norm": 0.9521728231100681, "learning_rate": 4.956648642699693e-06, "loss": 0.4861, "step": 5943 }, { "epoch": 0.3619644977620802, "grad_norm": 0.9183915096261009, "learning_rate": 4.9566338457504525e-06, "loss": 0.504, "step": 5944 }, { "epoch": 0.3620253935389581, "grad_norm": 1.058534718060545, "learning_rate": 4.956619046298444e-06, "loss": 0.471, "step": 5945 }, { "epoch": 0.362086289315836, "grad_norm": 1.1738857336130304, "learning_rate": 4.956604244343684e-06, "loss": 0.4049, "step": 5946 }, { "epoch": 0.3621471850927138, "grad_norm": 0.9804009311101864, "learning_rate": 4.9565894398861875e-06, "loss": 0.481, "step": 5947 }, { "epoch": 0.3622080808695917, "grad_norm": 1.038915448254017, "learning_rate": 4.9565746329259675e-06, "loss": 0.3975, "step": 5948 }, { "epoch": 0.36226897664646956, "grad_norm": 1.1301154290974451, "learning_rate": 4.956559823463041e-06, "loss": 0.4331, "step": 5949 }, { "epoch": 0.36232987242334747, "grad_norm": 1.02384675174766, "learning_rate": 4.9565450114974224e-06, "loss": 0.4588, "step": 5950 }, { "epoch": 0.3623907682002253, "grad_norm": 1.0812968585719447, "learning_rate": 4.956530197029127e-06, "loss": 0.4273, "step": 5951 }, { "epoch": 0.3624516639771032, "grad_norm": 1.0427850818041422, "learning_rate": 4.9565153800581685e-06, "loss": 0.5305, "step": 5952 }, { "epoch": 0.36251255975398106, "grad_norm": 1.1440136500790477, "learning_rate": 4.956500560584565e-06, "loss": 0.4043, "step": 5953 }, { "epoch": 0.36257345553085896, "grad_norm": 1.0986524171727288, "learning_rate": 4.9564857386083285e-06, "loss": 0.413, "step": 5954 }, { "epoch": 0.3626343513077368, "grad_norm": 1.0234778749836893, "learning_rate": 4.956470914129475e-06, "loss": 0.4574, "step": 5955 }, { "epoch": 0.3626952470846147, "grad_norm": 1.023343561098871, "learning_rate": 4.956456087148022e-06, "loss": 0.502, "step": 5956 }, { "epoch": 0.36275614286149255, "grad_norm": 1.0805837502807247, "learning_rate": 4.956441257663981e-06, "loss": 0.417, "step": 5957 }, { "epoch": 0.36281703863837045, "grad_norm": 1.0675542983078854, "learning_rate": 4.956426425677369e-06, "loss": 0.4407, "step": 5958 }, { "epoch": 0.3628779344152483, "grad_norm": 1.1287329944727151, "learning_rate": 4.9564115911882005e-06, "loss": 0.4814, "step": 5959 }, { "epoch": 0.3629388301921262, "grad_norm": 0.9876383151567801, "learning_rate": 4.956396754196492e-06, "loss": 0.4352, "step": 5960 }, { "epoch": 0.36299972596900404, "grad_norm": 1.067772991433254, "learning_rate": 4.956381914702256e-06, "loss": 0.4523, "step": 5961 }, { "epoch": 0.36306062174588194, "grad_norm": 0.8623014141089806, "learning_rate": 4.95636707270551e-06, "loss": 0.5631, "step": 5962 }, { "epoch": 0.3631215175227598, "grad_norm": 0.9841535355028717, "learning_rate": 4.956352228206269e-06, "loss": 0.4709, "step": 5963 }, { "epoch": 0.3631824132996377, "grad_norm": 1.0352731938351536, "learning_rate": 4.956337381204547e-06, "loss": 0.5257, "step": 5964 }, { "epoch": 0.36324330907651553, "grad_norm": 1.019479988963735, "learning_rate": 4.956322531700359e-06, "loss": 0.4511, "step": 5965 }, { "epoch": 0.36330420485339343, "grad_norm": 0.947186149821031, "learning_rate": 4.956307679693721e-06, "loss": 0.4594, "step": 5966 }, { "epoch": 0.3633651006302713, "grad_norm": 0.9860605755331532, "learning_rate": 4.956292825184647e-06, "loss": 0.5054, "step": 5967 }, { "epoch": 0.3634259964071492, "grad_norm": 1.0246779620366455, "learning_rate": 4.956277968173155e-06, "loss": 0.5152, "step": 5968 }, { "epoch": 0.363486892184027, "grad_norm": 1.0529668846959945, "learning_rate": 4.956263108659256e-06, "loss": 0.449, "step": 5969 }, { "epoch": 0.3635477879609049, "grad_norm": 0.9493275473743187, "learning_rate": 4.956248246642968e-06, "loss": 0.4919, "step": 5970 }, { "epoch": 0.36360868373778277, "grad_norm": 0.9768143101890996, "learning_rate": 4.956233382124306e-06, "loss": 0.5518, "step": 5971 }, { "epoch": 0.36366957951466067, "grad_norm": 1.1833682463256738, "learning_rate": 4.956218515103283e-06, "loss": 0.3932, "step": 5972 }, { "epoch": 0.3637304752915385, "grad_norm": 1.0201007541003366, "learning_rate": 4.9562036455799165e-06, "loss": 0.4542, "step": 5973 }, { "epoch": 0.3637913710684164, "grad_norm": 1.034413844084137, "learning_rate": 4.956188773554221e-06, "loss": 0.4675, "step": 5974 }, { "epoch": 0.36385226684529426, "grad_norm": 1.0167314745307234, "learning_rate": 4.956173899026212e-06, "loss": 0.4901, "step": 5975 }, { "epoch": 0.36391316262217216, "grad_norm": 1.0073842995215903, "learning_rate": 4.956159021995903e-06, "loss": 0.4073, "step": 5976 }, { "epoch": 0.36397405839905, "grad_norm": 1.024695318359912, "learning_rate": 4.95614414246331e-06, "loss": 0.4977, "step": 5977 }, { "epoch": 0.3640349541759279, "grad_norm": 1.0150093288673316, "learning_rate": 4.956129260428449e-06, "loss": 0.5101, "step": 5978 }, { "epoch": 0.36409584995280575, "grad_norm": 1.0179480388545166, "learning_rate": 4.956114375891335e-06, "loss": 0.3947, "step": 5979 }, { "epoch": 0.36415674572968365, "grad_norm": 1.0016715084236207, "learning_rate": 4.956099488851982e-06, "loss": 0.4469, "step": 5980 }, { "epoch": 0.3642176415065615, "grad_norm": 0.9754138383953196, "learning_rate": 4.956084599310407e-06, "loss": 0.4588, "step": 5981 }, { "epoch": 0.3642785372834394, "grad_norm": 1.0712835761578299, "learning_rate": 4.956069707266623e-06, "loss": 0.4862, "step": 5982 }, { "epoch": 0.36433943306031724, "grad_norm": 1.091722692119818, "learning_rate": 4.956054812720646e-06, "loss": 0.4276, "step": 5983 }, { "epoch": 0.36440032883719514, "grad_norm": 0.9838303738300328, "learning_rate": 4.956039915672492e-06, "loss": 0.4839, "step": 5984 }, { "epoch": 0.364461224614073, "grad_norm": 1.0320232625659702, "learning_rate": 4.956025016122176e-06, "loss": 0.4403, "step": 5985 }, { "epoch": 0.3645221203909509, "grad_norm": 1.0298611050228113, "learning_rate": 4.956010114069712e-06, "loss": 0.4566, "step": 5986 }, { "epoch": 0.3645830161678288, "grad_norm": 0.9812459930684417, "learning_rate": 4.955995209515117e-06, "loss": 0.4977, "step": 5987 }, { "epoch": 0.36464391194470663, "grad_norm": 0.9667187988899829, "learning_rate": 4.9559803024584045e-06, "loss": 0.5351, "step": 5988 }, { "epoch": 0.36470480772158453, "grad_norm": 1.1037723351518227, "learning_rate": 4.95596539289959e-06, "loss": 0.49, "step": 5989 }, { "epoch": 0.3647657034984624, "grad_norm": 1.0186171287888655, "learning_rate": 4.955950480838689e-06, "loss": 0.456, "step": 5990 }, { "epoch": 0.3648265992753403, "grad_norm": 0.9995478932858419, "learning_rate": 4.955935566275717e-06, "loss": 0.4914, "step": 5991 }, { "epoch": 0.3648874950522181, "grad_norm": 0.9940440476334215, "learning_rate": 4.955920649210689e-06, "loss": 0.4102, "step": 5992 }, { "epoch": 0.364948390829096, "grad_norm": 1.0086267726181692, "learning_rate": 4.95590572964362e-06, "loss": 0.4235, "step": 5993 }, { "epoch": 0.36500928660597387, "grad_norm": 0.9654380608696763, "learning_rate": 4.955890807574525e-06, "loss": 0.4388, "step": 5994 }, { "epoch": 0.36507018238285177, "grad_norm": 1.0325453798195798, "learning_rate": 4.9558758830034205e-06, "loss": 0.4888, "step": 5995 }, { "epoch": 0.3651310781597296, "grad_norm": 0.9358713903856345, "learning_rate": 4.955860955930319e-06, "loss": 0.4719, "step": 5996 }, { "epoch": 0.3651919739366075, "grad_norm": 1.0536437970184025, "learning_rate": 4.955846026355239e-06, "loss": 0.4528, "step": 5997 }, { "epoch": 0.36525286971348536, "grad_norm": 1.0938475489143789, "learning_rate": 4.955831094278194e-06, "loss": 0.457, "step": 5998 }, { "epoch": 0.36531376549036326, "grad_norm": 1.116730665115478, "learning_rate": 4.9558161596991985e-06, "loss": 0.4586, "step": 5999 }, { "epoch": 0.3653746612672411, "grad_norm": 0.9505505325234672, "learning_rate": 4.955801222618269e-06, "loss": 0.4008, "step": 6000 }, { "epoch": 0.365435557044119, "grad_norm": 1.021256397977252, "learning_rate": 4.95578628303542e-06, "loss": 0.5127, "step": 6001 }, { "epoch": 0.36549645282099685, "grad_norm": 0.9759332871170654, "learning_rate": 4.955771340950667e-06, "loss": 0.4663, "step": 6002 }, { "epoch": 0.36555734859787475, "grad_norm": 1.0590600732059887, "learning_rate": 4.955756396364026e-06, "loss": 0.4634, "step": 6003 }, { "epoch": 0.3656182443747526, "grad_norm": 1.0997285392935592, "learning_rate": 4.955741449275511e-06, "loss": 0.5017, "step": 6004 }, { "epoch": 0.3656791401516305, "grad_norm": 1.1000296468542754, "learning_rate": 4.955726499685137e-06, "loss": 0.4298, "step": 6005 }, { "epoch": 0.36574003592850834, "grad_norm": 0.9606171714324424, "learning_rate": 4.955711547592921e-06, "loss": 0.4857, "step": 6006 }, { "epoch": 0.36580093170538625, "grad_norm": 0.919421173462739, "learning_rate": 4.9556965929988765e-06, "loss": 0.5104, "step": 6007 }, { "epoch": 0.3658618274822641, "grad_norm": 1.0373120712610218, "learning_rate": 4.955681635903019e-06, "loss": 0.4738, "step": 6008 }, { "epoch": 0.365922723259142, "grad_norm": 0.9709169043290344, "learning_rate": 4.955666676305365e-06, "loss": 0.4863, "step": 6009 }, { "epoch": 0.36598361903601984, "grad_norm": 1.0685762786283473, "learning_rate": 4.955651714205928e-06, "loss": 0.472, "step": 6010 }, { "epoch": 0.36604451481289774, "grad_norm": 1.0657628988556218, "learning_rate": 4.955636749604725e-06, "loss": 0.5078, "step": 6011 }, { "epoch": 0.3661054105897756, "grad_norm": 0.9785638903386237, "learning_rate": 4.95562178250177e-06, "loss": 0.4608, "step": 6012 }, { "epoch": 0.3661663063666535, "grad_norm": 1.1537497740726192, "learning_rate": 4.955606812897078e-06, "loss": 0.3935, "step": 6013 }, { "epoch": 0.3662272021435313, "grad_norm": 1.0416295782592686, "learning_rate": 4.955591840790665e-06, "loss": 0.5162, "step": 6014 }, { "epoch": 0.36628809792040923, "grad_norm": 1.0631604355628037, "learning_rate": 4.955576866182547e-06, "loss": 0.4686, "step": 6015 }, { "epoch": 0.3663489936972871, "grad_norm": 1.0502663742363367, "learning_rate": 4.955561889072737e-06, "loss": 0.4213, "step": 6016 }, { "epoch": 0.366409889474165, "grad_norm": 1.0149225982633372, "learning_rate": 4.955546909461253e-06, "loss": 0.4587, "step": 6017 }, { "epoch": 0.3664707852510428, "grad_norm": 1.0271081565307223, "learning_rate": 4.955531927348107e-06, "loss": 0.5337, "step": 6018 }, { "epoch": 0.3665316810279207, "grad_norm": 1.1108051625370898, "learning_rate": 4.955516942733318e-06, "loss": 0.4563, "step": 6019 }, { "epoch": 0.36659257680479856, "grad_norm": 0.9519553958321096, "learning_rate": 4.955501955616898e-06, "loss": 0.4804, "step": 6020 }, { "epoch": 0.36665347258167647, "grad_norm": 1.0456210389853222, "learning_rate": 4.955486965998865e-06, "loss": 0.4787, "step": 6021 }, { "epoch": 0.3667143683585543, "grad_norm": 1.0253843107756424, "learning_rate": 4.955471973879231e-06, "loss": 0.4328, "step": 6022 }, { "epoch": 0.3667752641354322, "grad_norm": 1.0423304462864216, "learning_rate": 4.955456979258016e-06, "loss": 0.4682, "step": 6023 }, { "epoch": 0.36683615991231006, "grad_norm": 1.047818811220035, "learning_rate": 4.95544198213523e-06, "loss": 0.5409, "step": 6024 }, { "epoch": 0.36689705568918796, "grad_norm": 1.0606418688587953, "learning_rate": 4.955426982510891e-06, "loss": 0.5127, "step": 6025 }, { "epoch": 0.3669579514660658, "grad_norm": 0.9867698889774916, "learning_rate": 4.955411980385015e-06, "loss": 0.4608, "step": 6026 }, { "epoch": 0.3670188472429437, "grad_norm": 1.0099545253258095, "learning_rate": 4.9553969757576165e-06, "loss": 0.4698, "step": 6027 }, { "epoch": 0.3670797430198216, "grad_norm": 1.0705889010400158, "learning_rate": 4.95538196862871e-06, "loss": 0.4076, "step": 6028 }, { "epoch": 0.36714063879669945, "grad_norm": 1.024103218441094, "learning_rate": 4.955366958998312e-06, "loss": 0.4874, "step": 6029 }, { "epoch": 0.36720153457357735, "grad_norm": 0.9852691860321268, "learning_rate": 4.955351946866436e-06, "loss": 0.4724, "step": 6030 }, { "epoch": 0.3672624303504552, "grad_norm": 1.0402369169787309, "learning_rate": 4.9553369322331e-06, "loss": 0.452, "step": 6031 }, { "epoch": 0.3673233261273331, "grad_norm": 0.9553346835539287, "learning_rate": 4.955321915098317e-06, "loss": 0.4913, "step": 6032 }, { "epoch": 0.36738422190421094, "grad_norm": 0.9794896006905828, "learning_rate": 4.955306895462102e-06, "loss": 0.5166, "step": 6033 }, { "epoch": 0.36744511768108884, "grad_norm": 1.0365308352216436, "learning_rate": 4.955291873324473e-06, "loss": 0.4651, "step": 6034 }, { "epoch": 0.3675060134579667, "grad_norm": 1.09533701446043, "learning_rate": 4.955276848685443e-06, "loss": 0.4701, "step": 6035 }, { "epoch": 0.3675669092348446, "grad_norm": 1.0814475241467412, "learning_rate": 4.9552618215450285e-06, "loss": 0.5105, "step": 6036 }, { "epoch": 0.36762780501172243, "grad_norm": 1.0344096176145035, "learning_rate": 4.955246791903243e-06, "loss": 0.5064, "step": 6037 }, { "epoch": 0.36768870078860033, "grad_norm": 1.0290174303916395, "learning_rate": 4.9552317597601055e-06, "loss": 0.4078, "step": 6038 }, { "epoch": 0.3677495965654782, "grad_norm": 1.1745850195385723, "learning_rate": 4.955216725115627e-06, "loss": 0.4817, "step": 6039 }, { "epoch": 0.3678104923423561, "grad_norm": 0.97557027350353, "learning_rate": 4.955201687969825e-06, "loss": 0.4909, "step": 6040 }, { "epoch": 0.3678713881192339, "grad_norm": 0.9680485546357052, "learning_rate": 4.955186648322715e-06, "loss": 0.4914, "step": 6041 }, { "epoch": 0.3679322838961118, "grad_norm": 0.8846316862344714, "learning_rate": 4.955171606174312e-06, "loss": 0.5009, "step": 6042 }, { "epoch": 0.36799317967298967, "grad_norm": 0.9674266886887368, "learning_rate": 4.95515656152463e-06, "loss": 0.4967, "step": 6043 }, { "epoch": 0.36805407544986757, "grad_norm": 1.0134008747659073, "learning_rate": 4.955141514373687e-06, "loss": 0.3898, "step": 6044 }, { "epoch": 0.3681149712267454, "grad_norm": 1.0433509500993623, "learning_rate": 4.9551264647214955e-06, "loss": 0.437, "step": 6045 }, { "epoch": 0.3681758670036233, "grad_norm": 0.9983442725998677, "learning_rate": 4.955111412568073e-06, "loss": 0.5487, "step": 6046 }, { "epoch": 0.36823676278050116, "grad_norm": 1.0066431644756593, "learning_rate": 4.955096357913435e-06, "loss": 0.512, "step": 6047 }, { "epoch": 0.36829765855737906, "grad_norm": 1.1210776266589981, "learning_rate": 4.9550813007575945e-06, "loss": 0.4774, "step": 6048 }, { "epoch": 0.3683585543342569, "grad_norm": 1.0949529535010654, "learning_rate": 4.955066241100569e-06, "loss": 0.4192, "step": 6049 }, { "epoch": 0.3684194501111348, "grad_norm": 0.9638401208699781, "learning_rate": 4.955051178942372e-06, "loss": 0.5206, "step": 6050 }, { "epoch": 0.36848034588801265, "grad_norm": 1.028199744820986, "learning_rate": 4.95503611428302e-06, "loss": 0.4653, "step": 6051 }, { "epoch": 0.36854124166489055, "grad_norm": 1.1405147084861502, "learning_rate": 4.95502104712253e-06, "loss": 0.4159, "step": 6052 }, { "epoch": 0.3686021374417684, "grad_norm": 1.1301073887584574, "learning_rate": 4.955005977460914e-06, "loss": 0.4956, "step": 6053 }, { "epoch": 0.3686630332186463, "grad_norm": 1.0975708534220976, "learning_rate": 4.954990905298189e-06, "loss": 0.4949, "step": 6054 }, { "epoch": 0.36872392899552414, "grad_norm": 0.9895300878603349, "learning_rate": 4.954975830634371e-06, "loss": 0.4879, "step": 6055 }, { "epoch": 0.36878482477240204, "grad_norm": 1.0745962331105468, "learning_rate": 4.954960753469474e-06, "loss": 0.457, "step": 6056 }, { "epoch": 0.3688457205492799, "grad_norm": 1.0059322602475251, "learning_rate": 4.954945673803515e-06, "loss": 0.5096, "step": 6057 }, { "epoch": 0.3689066163261578, "grad_norm": 1.0084416368074998, "learning_rate": 4.9549305916365075e-06, "loss": 0.4301, "step": 6058 }, { "epoch": 0.36896751210303563, "grad_norm": 1.0305587135721996, "learning_rate": 4.954915506968469e-06, "loss": 0.4747, "step": 6059 }, { "epoch": 0.36902840787991353, "grad_norm": 0.9716687695603177, "learning_rate": 4.9549004197994125e-06, "loss": 0.4863, "step": 6060 }, { "epoch": 0.3690893036567914, "grad_norm": 0.9810671103624888, "learning_rate": 4.954885330129354e-06, "loss": 0.4467, "step": 6061 }, { "epoch": 0.3691501994336693, "grad_norm": 1.034810106768394, "learning_rate": 4.95487023795831e-06, "loss": 0.4752, "step": 6062 }, { "epoch": 0.3692110952105471, "grad_norm": 1.0378566595676086, "learning_rate": 4.954855143286295e-06, "loss": 0.4306, "step": 6063 }, { "epoch": 0.369271990987425, "grad_norm": 1.0567889337948464, "learning_rate": 4.954840046113325e-06, "loss": 0.3971, "step": 6064 }, { "epoch": 0.36933288676430287, "grad_norm": 1.1027685733653938, "learning_rate": 4.954824946439415e-06, "loss": 0.4378, "step": 6065 }, { "epoch": 0.36939378254118077, "grad_norm": 0.9590174783990574, "learning_rate": 4.95480984426458e-06, "loss": 0.5305, "step": 6066 }, { "epoch": 0.3694546783180586, "grad_norm": 1.0847789332032582, "learning_rate": 4.954794739588836e-06, "loss": 0.4464, "step": 6067 }, { "epoch": 0.3695155740949365, "grad_norm": 1.0942638428917637, "learning_rate": 4.954779632412198e-06, "loss": 0.4889, "step": 6068 }, { "epoch": 0.3695764698718144, "grad_norm": 0.9424948764647954, "learning_rate": 4.954764522734682e-06, "loss": 0.4215, "step": 6069 }, { "epoch": 0.36963736564869226, "grad_norm": 1.0054119313548815, "learning_rate": 4.954749410556302e-06, "loss": 0.5487, "step": 6070 }, { "epoch": 0.36969826142557016, "grad_norm": 1.1102618596128098, "learning_rate": 4.954734295877075e-06, "loss": 0.4143, "step": 6071 }, { "epoch": 0.369759157202448, "grad_norm": 1.0259845404721895, "learning_rate": 4.954719178697016e-06, "loss": 0.4459, "step": 6072 }, { "epoch": 0.3698200529793259, "grad_norm": 1.0364802762629588, "learning_rate": 4.95470405901614e-06, "loss": 0.5384, "step": 6073 }, { "epoch": 0.36988094875620375, "grad_norm": 1.1429468762962434, "learning_rate": 4.954688936834462e-06, "loss": 0.4409, "step": 6074 }, { "epoch": 0.36994184453308165, "grad_norm": 1.072767632031658, "learning_rate": 4.954673812151999e-06, "loss": 0.4664, "step": 6075 }, { "epoch": 0.3700027403099595, "grad_norm": 1.0869173396947693, "learning_rate": 4.954658684968764e-06, "loss": 0.4754, "step": 6076 }, { "epoch": 0.3700636360868374, "grad_norm": 0.9948745705375578, "learning_rate": 4.954643555284775e-06, "loss": 0.4835, "step": 6077 }, { "epoch": 0.37012453186371524, "grad_norm": 1.0322567953581128, "learning_rate": 4.954628423100045e-06, "loss": 0.4464, "step": 6078 }, { "epoch": 0.37018542764059315, "grad_norm": 1.0684679895885498, "learning_rate": 4.954613288414591e-06, "loss": 0.5106, "step": 6079 }, { "epoch": 0.370246323417471, "grad_norm": 1.0232438934766566, "learning_rate": 4.954598151228429e-06, "loss": 0.4597, "step": 6080 }, { "epoch": 0.3703072191943489, "grad_norm": 0.9034695952972958, "learning_rate": 4.954583011541573e-06, "loss": 0.4753, "step": 6081 }, { "epoch": 0.37036811497122674, "grad_norm": 1.0449125507087937, "learning_rate": 4.954567869354038e-06, "loss": 0.4922, "step": 6082 }, { "epoch": 0.37042901074810464, "grad_norm": 1.058438369347969, "learning_rate": 4.954552724665841e-06, "loss": 0.4701, "step": 6083 }, { "epoch": 0.3704899065249825, "grad_norm": 0.9660142148584423, "learning_rate": 4.9545375774769964e-06, "loss": 0.4915, "step": 6084 }, { "epoch": 0.3705508023018604, "grad_norm": 0.9410817689930323, "learning_rate": 4.95452242778752e-06, "loss": 0.5209, "step": 6085 }, { "epoch": 0.3706116980787382, "grad_norm": 0.9858879722914147, "learning_rate": 4.9545072755974276e-06, "loss": 0.4638, "step": 6086 }, { "epoch": 0.37067259385561613, "grad_norm": 1.0017233899074118, "learning_rate": 4.9544921209067335e-06, "loss": 0.4854, "step": 6087 }, { "epoch": 0.370733489632494, "grad_norm": 1.0060773089033128, "learning_rate": 4.954476963715454e-06, "loss": 0.4412, "step": 6088 }, { "epoch": 0.3707943854093719, "grad_norm": 1.0502068690349289, "learning_rate": 4.954461804023605e-06, "loss": 0.4349, "step": 6089 }, { "epoch": 0.3708552811862497, "grad_norm": 1.0363591502693998, "learning_rate": 4.954446641831201e-06, "loss": 0.4911, "step": 6090 }, { "epoch": 0.3709161769631276, "grad_norm": 1.0238895045994616, "learning_rate": 4.954431477138257e-06, "loss": 0.4598, "step": 6091 }, { "epoch": 0.37097707274000546, "grad_norm": 0.9526263054408701, "learning_rate": 4.954416309944791e-06, "loss": 0.5171, "step": 6092 }, { "epoch": 0.37103796851688337, "grad_norm": 0.997042799459519, "learning_rate": 4.954401140250816e-06, "loss": 0.4822, "step": 6093 }, { "epoch": 0.3710988642937612, "grad_norm": 1.1046195987917462, "learning_rate": 4.9543859680563475e-06, "loss": 0.3984, "step": 6094 }, { "epoch": 0.3711597600706391, "grad_norm": 0.9597612087611525, "learning_rate": 4.954370793361402e-06, "loss": 0.5071, "step": 6095 }, { "epoch": 0.37122065584751696, "grad_norm": 1.0755943660287053, "learning_rate": 4.954355616165994e-06, "loss": 0.5206, "step": 6096 }, { "epoch": 0.37128155162439486, "grad_norm": 1.0621742565773706, "learning_rate": 4.9543404364701404e-06, "loss": 0.4729, "step": 6097 }, { "epoch": 0.3713424474012727, "grad_norm": 1.0538214593528707, "learning_rate": 4.954325254273855e-06, "loss": 0.4323, "step": 6098 }, { "epoch": 0.3714033431781506, "grad_norm": 0.9873853518348582, "learning_rate": 4.954310069577155e-06, "loss": 0.5291, "step": 6099 }, { "epoch": 0.37146423895502845, "grad_norm": 1.0598009226958216, "learning_rate": 4.954294882380054e-06, "loss": 0.3927, "step": 6100 }, { "epoch": 0.37152513473190635, "grad_norm": 1.063536912977621, "learning_rate": 4.9542796926825685e-06, "loss": 0.5062, "step": 6101 }, { "epoch": 0.3715860305087842, "grad_norm": 1.0735508768594233, "learning_rate": 4.954264500484714e-06, "loss": 0.4916, "step": 6102 }, { "epoch": 0.3716469262856621, "grad_norm": 1.0194401851895718, "learning_rate": 4.954249305786507e-06, "loss": 0.4428, "step": 6103 }, { "epoch": 0.37170782206253994, "grad_norm": 1.0560808791635838, "learning_rate": 4.954234108587961e-06, "loss": 0.4506, "step": 6104 }, { "epoch": 0.37176871783941784, "grad_norm": 1.0159919928390322, "learning_rate": 4.954218908889092e-06, "loss": 0.47, "step": 6105 }, { "epoch": 0.3718296136162957, "grad_norm": 1.0820125867904415, "learning_rate": 4.954203706689916e-06, "loss": 0.4268, "step": 6106 }, { "epoch": 0.3718905093931736, "grad_norm": 1.01980879642156, "learning_rate": 4.954188501990448e-06, "loss": 0.4527, "step": 6107 }, { "epoch": 0.37195140517005143, "grad_norm": 0.9241601317563577, "learning_rate": 4.954173294790704e-06, "loss": 0.5504, "step": 6108 }, { "epoch": 0.37201230094692933, "grad_norm": 1.0373536617585366, "learning_rate": 4.9541580850907e-06, "loss": 0.5363, "step": 6109 }, { "epoch": 0.37207319672380723, "grad_norm": 1.0140116317667849, "learning_rate": 4.9541428728904495e-06, "loss": 0.4757, "step": 6110 }, { "epoch": 0.3721340925006851, "grad_norm": 1.0990217645759348, "learning_rate": 4.95412765818997e-06, "loss": 0.4849, "step": 6111 }, { "epoch": 0.372194988277563, "grad_norm": 1.0212356605158104, "learning_rate": 4.954112440989276e-06, "loss": 0.5498, "step": 6112 }, { "epoch": 0.3722558840544408, "grad_norm": 1.047226500152906, "learning_rate": 4.954097221288383e-06, "loss": 0.4602, "step": 6113 }, { "epoch": 0.3723167798313187, "grad_norm": 1.0981836497191761, "learning_rate": 4.954081999087308e-06, "loss": 0.4812, "step": 6114 }, { "epoch": 0.37237767560819657, "grad_norm": 1.073866617456297, "learning_rate": 4.954066774386064e-06, "loss": 0.6086, "step": 6115 }, { "epoch": 0.37243857138507447, "grad_norm": 0.9990920889602521, "learning_rate": 4.954051547184669e-06, "loss": 0.449, "step": 6116 }, { "epoch": 0.3724994671619523, "grad_norm": 1.0541604918254546, "learning_rate": 4.9540363174831356e-06, "loss": 0.4687, "step": 6117 }, { "epoch": 0.3725603629388302, "grad_norm": 1.1229637065959743, "learning_rate": 4.954021085281482e-06, "loss": 0.434, "step": 6118 }, { "epoch": 0.37262125871570806, "grad_norm": 1.0202659804724348, "learning_rate": 4.954005850579723e-06, "loss": 0.4851, "step": 6119 }, { "epoch": 0.37268215449258596, "grad_norm": 1.041344192943728, "learning_rate": 4.953990613377873e-06, "loss": 0.4382, "step": 6120 }, { "epoch": 0.3727430502694638, "grad_norm": 1.0027217709813083, "learning_rate": 4.953975373675949e-06, "loss": 0.5181, "step": 6121 }, { "epoch": 0.3728039460463417, "grad_norm": 1.0310060595052497, "learning_rate": 4.953960131473966e-06, "loss": 0.4167, "step": 6122 }, { "epoch": 0.37286484182321955, "grad_norm": 1.1895355606761902, "learning_rate": 4.95394488677194e-06, "loss": 0.4363, "step": 6123 }, { "epoch": 0.37292573760009745, "grad_norm": 0.9094697475379501, "learning_rate": 4.953929639569885e-06, "loss": 0.5366, "step": 6124 }, { "epoch": 0.3729866333769753, "grad_norm": 1.0389224536890518, "learning_rate": 4.953914389867818e-06, "loss": 0.4928, "step": 6125 }, { "epoch": 0.3730475291538532, "grad_norm": 1.0267695025354686, "learning_rate": 4.953899137665753e-06, "loss": 0.4427, "step": 6126 }, { "epoch": 0.37310842493073104, "grad_norm": 1.0473766467288306, "learning_rate": 4.953883882963708e-06, "loss": 0.5087, "step": 6127 }, { "epoch": 0.37316932070760894, "grad_norm": 1.0243769315825, "learning_rate": 4.953868625761696e-06, "loss": 0.556, "step": 6128 }, { "epoch": 0.3732302164844868, "grad_norm": 0.9466014485031404, "learning_rate": 4.953853366059734e-06, "loss": 0.5144, "step": 6129 }, { "epoch": 0.3732911122613647, "grad_norm": 1.0017801704081422, "learning_rate": 4.9538381038578374e-06, "loss": 0.4799, "step": 6130 }, { "epoch": 0.37335200803824253, "grad_norm": 1.0363506130719404, "learning_rate": 4.953822839156022e-06, "loss": 0.4601, "step": 6131 }, { "epoch": 0.37341290381512043, "grad_norm": 0.9763303598556224, "learning_rate": 4.953807571954302e-06, "loss": 0.4925, "step": 6132 }, { "epoch": 0.3734737995919983, "grad_norm": 0.9516255623893157, "learning_rate": 4.953792302252695e-06, "loss": 0.5199, "step": 6133 }, { "epoch": 0.3735346953688762, "grad_norm": 0.9014335626651085, "learning_rate": 4.953777030051215e-06, "loss": 0.5183, "step": 6134 }, { "epoch": 0.373595591145754, "grad_norm": 1.0050082648600522, "learning_rate": 4.953761755349877e-06, "loss": 0.4341, "step": 6135 }, { "epoch": 0.3736564869226319, "grad_norm": 0.9825530707025701, "learning_rate": 4.953746478148698e-06, "loss": 0.4474, "step": 6136 }, { "epoch": 0.37371738269950977, "grad_norm": 0.9302173059085405, "learning_rate": 4.953731198447693e-06, "loss": 0.5643, "step": 6137 }, { "epoch": 0.37377827847638767, "grad_norm": 0.9803722319527821, "learning_rate": 4.953715916246878e-06, "loss": 0.4923, "step": 6138 }, { "epoch": 0.3738391742532655, "grad_norm": 0.9957650385078488, "learning_rate": 4.9537006315462684e-06, "loss": 0.4892, "step": 6139 }, { "epoch": 0.3739000700301434, "grad_norm": 1.0974340599194536, "learning_rate": 4.95368534434588e-06, "loss": 0.4103, "step": 6140 }, { "epoch": 0.37396096580702126, "grad_norm": 1.073060455757614, "learning_rate": 4.953670054645728e-06, "loss": 0.4801, "step": 6141 }, { "epoch": 0.37402186158389916, "grad_norm": 1.0647491116141334, "learning_rate": 4.953654762445826e-06, "loss": 0.4984, "step": 6142 }, { "epoch": 0.374082757360777, "grad_norm": 1.0290826558041104, "learning_rate": 4.953639467746193e-06, "loss": 0.4676, "step": 6143 }, { "epoch": 0.3741436531376549, "grad_norm": 1.046587490271967, "learning_rate": 4.953624170546843e-06, "loss": 0.4253, "step": 6144 }, { "epoch": 0.37420454891453275, "grad_norm": 1.0033067829455395, "learning_rate": 4.953608870847792e-06, "loss": 0.4652, "step": 6145 }, { "epoch": 0.37426544469141065, "grad_norm": 1.1079004201652993, "learning_rate": 4.953593568649056e-06, "loss": 0.435, "step": 6146 }, { "epoch": 0.3743263404682885, "grad_norm": 0.9643262574794592, "learning_rate": 4.953578263950648e-06, "loss": 0.5506, "step": 6147 }, { "epoch": 0.3743872362451664, "grad_norm": 0.9575902076910581, "learning_rate": 4.953562956752586e-06, "loss": 0.4709, "step": 6148 }, { "epoch": 0.37444813202204424, "grad_norm": 1.0589348702647976, "learning_rate": 4.953547647054886e-06, "loss": 0.4923, "step": 6149 }, { "epoch": 0.37450902779892215, "grad_norm": 1.068619495962225, "learning_rate": 4.953532334857562e-06, "loss": 0.4658, "step": 6150 }, { "epoch": 0.37456992357580005, "grad_norm": 1.1161213603072657, "learning_rate": 4.953517020160631e-06, "loss": 0.5561, "step": 6151 }, { "epoch": 0.3746308193526779, "grad_norm": 1.0509426105898099, "learning_rate": 4.953501702964108e-06, "loss": 0.4808, "step": 6152 }, { "epoch": 0.3746917151295558, "grad_norm": 0.9832603466438111, "learning_rate": 4.953486383268007e-06, "loss": 0.4736, "step": 6153 }, { "epoch": 0.37475261090643364, "grad_norm": 1.0271924230370846, "learning_rate": 4.953471061072346e-06, "loss": 0.4857, "step": 6154 }, { "epoch": 0.37481350668331154, "grad_norm": 1.0317837385936457, "learning_rate": 4.953455736377139e-06, "loss": 0.4438, "step": 6155 }, { "epoch": 0.3748744024601894, "grad_norm": 1.02294520616393, "learning_rate": 4.953440409182403e-06, "loss": 0.4781, "step": 6156 }, { "epoch": 0.3749352982370673, "grad_norm": 1.0095291472368366, "learning_rate": 4.953425079488153e-06, "loss": 0.4298, "step": 6157 }, { "epoch": 0.3749961940139451, "grad_norm": 1.0888590612633355, "learning_rate": 4.953409747294404e-06, "loss": 0.4343, "step": 6158 }, { "epoch": 0.37505708979082303, "grad_norm": 0.9842790028670121, "learning_rate": 4.953394412601173e-06, "loss": 0.5136, "step": 6159 }, { "epoch": 0.3751179855677009, "grad_norm": 0.9562141188727699, "learning_rate": 4.9533790754084735e-06, "loss": 0.4466, "step": 6160 }, { "epoch": 0.3751788813445788, "grad_norm": 0.9984343361121111, "learning_rate": 4.953363735716323e-06, "loss": 0.5395, "step": 6161 }, { "epoch": 0.3752397771214566, "grad_norm": 1.0240308269306835, "learning_rate": 4.953348393524737e-06, "loss": 0.458, "step": 6162 }, { "epoch": 0.3753006728983345, "grad_norm": 0.9852447625744957, "learning_rate": 4.95333304883373e-06, "loss": 0.4641, "step": 6163 }, { "epoch": 0.37536156867521236, "grad_norm": 0.9741788854259957, "learning_rate": 4.953317701643319e-06, "loss": 0.5103, "step": 6164 }, { "epoch": 0.37542246445209027, "grad_norm": 1.0355692747788448, "learning_rate": 4.953302351953519e-06, "loss": 0.5272, "step": 6165 }, { "epoch": 0.3754833602289681, "grad_norm": 1.0457964105727904, "learning_rate": 4.953286999764345e-06, "loss": 0.4951, "step": 6166 }, { "epoch": 0.375544256005846, "grad_norm": 1.0890380484400917, "learning_rate": 4.953271645075814e-06, "loss": 0.4346, "step": 6167 }, { "epoch": 0.37560515178272386, "grad_norm": 1.072500356743128, "learning_rate": 4.9532562878879396e-06, "loss": 0.4383, "step": 6168 }, { "epoch": 0.37566604755960176, "grad_norm": 0.9970849884038931, "learning_rate": 4.953240928200739e-06, "loss": 0.4644, "step": 6169 }, { "epoch": 0.3757269433364796, "grad_norm": 0.9980886541314371, "learning_rate": 4.9532255660142285e-06, "loss": 0.4869, "step": 6170 }, { "epoch": 0.3757878391133575, "grad_norm": 1.0309199997376262, "learning_rate": 4.953210201328421e-06, "loss": 0.4262, "step": 6171 }, { "epoch": 0.37584873489023535, "grad_norm": 0.8990704806792376, "learning_rate": 4.953194834143336e-06, "loss": 0.4966, "step": 6172 }, { "epoch": 0.37590963066711325, "grad_norm": 1.0757119196155462, "learning_rate": 4.953179464458986e-06, "loss": 0.4052, "step": 6173 }, { "epoch": 0.3759705264439911, "grad_norm": 1.0643230192229978, "learning_rate": 4.953164092275387e-06, "loss": 0.444, "step": 6174 }, { "epoch": 0.376031422220869, "grad_norm": 0.980050619842328, "learning_rate": 4.953148717592558e-06, "loss": 0.4971, "step": 6175 }, { "epoch": 0.37609231799774684, "grad_norm": 1.013853357514565, "learning_rate": 4.95313334041051e-06, "loss": 0.5215, "step": 6176 }, { "epoch": 0.37615321377462474, "grad_norm": 0.9844636395894656, "learning_rate": 4.9531179607292615e-06, "loss": 0.4188, "step": 6177 }, { "epoch": 0.3762141095515026, "grad_norm": 1.020164356858937, "learning_rate": 4.953102578548826e-06, "loss": 0.4529, "step": 6178 }, { "epoch": 0.3762750053283805, "grad_norm": 1.0136758977238693, "learning_rate": 4.953087193869222e-06, "loss": 0.5333, "step": 6179 }, { "epoch": 0.37633590110525833, "grad_norm": 1.045925776525004, "learning_rate": 4.953071806690464e-06, "loss": 0.4543, "step": 6180 }, { "epoch": 0.37639679688213623, "grad_norm": 1.0945586473812847, "learning_rate": 4.953056417012566e-06, "loss": 0.4479, "step": 6181 }, { "epoch": 0.3764576926590141, "grad_norm": 0.9811205125559901, "learning_rate": 4.953041024835547e-06, "loss": 0.4765, "step": 6182 }, { "epoch": 0.376518588435892, "grad_norm": 1.0656309261242096, "learning_rate": 4.95302563015942e-06, "loss": 0.4847, "step": 6183 }, { "epoch": 0.3765794842127698, "grad_norm": 0.9913339404445668, "learning_rate": 4.953010232984201e-06, "loss": 0.4827, "step": 6184 }, { "epoch": 0.3766403799896477, "grad_norm": 0.9852353912959414, "learning_rate": 4.9529948333099065e-06, "loss": 0.491, "step": 6185 }, { "epoch": 0.37670127576652557, "grad_norm": 0.9703577965585669, "learning_rate": 4.952979431136552e-06, "loss": 0.4628, "step": 6186 }, { "epoch": 0.37676217154340347, "grad_norm": 1.0577392062727597, "learning_rate": 4.952964026464153e-06, "loss": 0.4791, "step": 6187 }, { "epoch": 0.3768230673202813, "grad_norm": 1.1478201272336088, "learning_rate": 4.952948619292726e-06, "loss": 0.4543, "step": 6188 }, { "epoch": 0.3768839630971592, "grad_norm": 1.0693702257778634, "learning_rate": 4.952933209622284e-06, "loss": 0.4169, "step": 6189 }, { "epoch": 0.37694485887403706, "grad_norm": 0.9955803189377422, "learning_rate": 4.952917797452846e-06, "loss": 0.4691, "step": 6190 }, { "epoch": 0.37700575465091496, "grad_norm": 0.995354876690036, "learning_rate": 4.952902382784426e-06, "loss": 0.4667, "step": 6191 }, { "epoch": 0.37706665042779286, "grad_norm": 0.9577729107918449, "learning_rate": 4.952886965617041e-06, "loss": 0.5257, "step": 6192 }, { "epoch": 0.3771275462046707, "grad_norm": 1.0989628830497782, "learning_rate": 4.952871545950705e-06, "loss": 0.4851, "step": 6193 }, { "epoch": 0.3771884419815486, "grad_norm": 1.0154341430773257, "learning_rate": 4.952856123785434e-06, "loss": 0.4332, "step": 6194 }, { "epoch": 0.37724933775842645, "grad_norm": 1.1597653654831614, "learning_rate": 4.952840699121245e-06, "loss": 0.4601, "step": 6195 }, { "epoch": 0.37731023353530435, "grad_norm": 1.0350233295971298, "learning_rate": 4.952825271958152e-06, "loss": 0.5036, "step": 6196 }, { "epoch": 0.3773711293121822, "grad_norm": 1.1035056635057086, "learning_rate": 4.952809842296172e-06, "loss": 0.4948, "step": 6197 }, { "epoch": 0.3774320250890601, "grad_norm": 1.0234245508532396, "learning_rate": 4.952794410135321e-06, "loss": 0.4698, "step": 6198 }, { "epoch": 0.37749292086593794, "grad_norm": 1.0629453313845538, "learning_rate": 4.952778975475613e-06, "loss": 0.4815, "step": 6199 }, { "epoch": 0.37755381664281584, "grad_norm": 1.0705433424571682, "learning_rate": 4.9527635383170655e-06, "loss": 0.4697, "step": 6200 }, { "epoch": 0.3776147124196937, "grad_norm": 1.049011210470263, "learning_rate": 4.952748098659693e-06, "loss": 0.4231, "step": 6201 }, { "epoch": 0.3776756081965716, "grad_norm": 0.972325848312185, "learning_rate": 4.952732656503512e-06, "loss": 0.4466, "step": 6202 }, { "epoch": 0.37773650397344943, "grad_norm": 1.0795183826554908, "learning_rate": 4.952717211848538e-06, "loss": 0.4424, "step": 6203 }, { "epoch": 0.37779739975032733, "grad_norm": 1.0008056586509155, "learning_rate": 4.9527017646947865e-06, "loss": 0.5545, "step": 6204 }, { "epoch": 0.3778582955272052, "grad_norm": 1.0817458904496795, "learning_rate": 4.952686315042274e-06, "loss": 0.4345, "step": 6205 }, { "epoch": 0.3779191913040831, "grad_norm": 1.050696133224878, "learning_rate": 4.952670862891015e-06, "loss": 0.5068, "step": 6206 }, { "epoch": 0.3779800870809609, "grad_norm": 1.0929178505966588, "learning_rate": 4.952655408241026e-06, "loss": 0.4426, "step": 6207 }, { "epoch": 0.3780409828578388, "grad_norm": 1.0153995707253862, "learning_rate": 4.952639951092323e-06, "loss": 0.4602, "step": 6208 }, { "epoch": 0.37810187863471667, "grad_norm": 0.996852187375733, "learning_rate": 4.952624491444921e-06, "loss": 0.5184, "step": 6209 }, { "epoch": 0.37816277441159457, "grad_norm": 0.9686057163343564, "learning_rate": 4.952609029298837e-06, "loss": 0.4386, "step": 6210 }, { "epoch": 0.3782236701884724, "grad_norm": 1.0037116449006789, "learning_rate": 4.9525935646540845e-06, "loss": 0.4522, "step": 6211 }, { "epoch": 0.3782845659653503, "grad_norm": 0.9243195603940001, "learning_rate": 4.9525780975106815e-06, "loss": 0.4753, "step": 6212 }, { "epoch": 0.37834546174222816, "grad_norm": 1.1284643716511435, "learning_rate": 4.952562627868643e-06, "loss": 0.5019, "step": 6213 }, { "epoch": 0.37840635751910606, "grad_norm": 1.073720050997467, "learning_rate": 4.952547155727985e-06, "loss": 0.506, "step": 6214 }, { "epoch": 0.3784672532959839, "grad_norm": 1.0095802267605534, "learning_rate": 4.952531681088722e-06, "loss": 0.4704, "step": 6215 }, { "epoch": 0.3785281490728618, "grad_norm": 1.0455801742115363, "learning_rate": 4.952516203950872e-06, "loss": 0.4576, "step": 6216 }, { "epoch": 0.37858904484973965, "grad_norm": 1.0704037789117022, "learning_rate": 4.952500724314448e-06, "loss": 0.5675, "step": 6217 }, { "epoch": 0.37864994062661755, "grad_norm": 0.993369759369137, "learning_rate": 4.9524852421794686e-06, "loss": 0.5043, "step": 6218 }, { "epoch": 0.3787108364034954, "grad_norm": 0.9372756117925566, "learning_rate": 4.952469757545947e-06, "loss": 0.4908, "step": 6219 }, { "epoch": 0.3787717321803733, "grad_norm": 1.00421733561364, "learning_rate": 4.952454270413901e-06, "loss": 0.4742, "step": 6220 }, { "epoch": 0.37883262795725114, "grad_norm": 0.9897462864340599, "learning_rate": 4.952438780783346e-06, "loss": 0.4487, "step": 6221 }, { "epoch": 0.37889352373412905, "grad_norm": 0.9802841160126721, "learning_rate": 4.9524232886542965e-06, "loss": 0.5085, "step": 6222 }, { "epoch": 0.3789544195110069, "grad_norm": 1.08209453123268, "learning_rate": 4.952407794026769e-06, "loss": 0.5276, "step": 6223 }, { "epoch": 0.3790153152878848, "grad_norm": 1.1109795394218331, "learning_rate": 4.95239229690078e-06, "loss": 0.4576, "step": 6224 }, { "epoch": 0.37907621106476264, "grad_norm": 1.0333893349874026, "learning_rate": 4.952376797276345e-06, "loss": 0.4184, "step": 6225 }, { "epoch": 0.37913710684164054, "grad_norm": 0.9752766358523337, "learning_rate": 4.952361295153478e-06, "loss": 0.4997, "step": 6226 }, { "epoch": 0.3791980026185184, "grad_norm": 0.9972305524623175, "learning_rate": 4.952345790532198e-06, "loss": 0.5858, "step": 6227 }, { "epoch": 0.3792588983953963, "grad_norm": 0.9446620691671298, "learning_rate": 4.9523302834125184e-06, "loss": 0.4564, "step": 6228 }, { "epoch": 0.3793197941722741, "grad_norm": 1.042712257849186, "learning_rate": 4.952314773794455e-06, "loss": 0.435, "step": 6229 }, { "epoch": 0.37938068994915203, "grad_norm": 1.0900231308235555, "learning_rate": 4.952299261678026e-06, "loss": 0.4128, "step": 6230 }, { "epoch": 0.3794415857260299, "grad_norm": 0.9164856881570241, "learning_rate": 4.952283747063244e-06, "loss": 0.5018, "step": 6231 }, { "epoch": 0.3795024815029078, "grad_norm": 1.1361870099637192, "learning_rate": 4.952268229950127e-06, "loss": 0.417, "step": 6232 }, { "epoch": 0.3795633772797857, "grad_norm": 0.9626721398291819, "learning_rate": 4.95225271033869e-06, "loss": 0.5219, "step": 6233 }, { "epoch": 0.3796242730566635, "grad_norm": 0.9727525931375427, "learning_rate": 4.952237188228949e-06, "loss": 0.4977, "step": 6234 }, { "epoch": 0.3796851688335414, "grad_norm": 1.004405098320813, "learning_rate": 4.952221663620919e-06, "loss": 0.4724, "step": 6235 }, { "epoch": 0.37974606461041926, "grad_norm": 1.1164785614518142, "learning_rate": 4.9522061365146174e-06, "loss": 0.4137, "step": 6236 }, { "epoch": 0.37980696038729717, "grad_norm": 0.9804789548605373, "learning_rate": 4.952190606910059e-06, "loss": 0.4662, "step": 6237 }, { "epoch": 0.379867856164175, "grad_norm": 1.13653909086519, "learning_rate": 4.9521750748072595e-06, "loss": 0.4249, "step": 6238 }, { "epoch": 0.3799287519410529, "grad_norm": 1.0929730281563852, "learning_rate": 4.952159540206235e-06, "loss": 0.4921, "step": 6239 }, { "epoch": 0.37998964771793076, "grad_norm": 1.0281427120240756, "learning_rate": 4.952144003107002e-06, "loss": 0.4553, "step": 6240 }, { "epoch": 0.38005054349480866, "grad_norm": 1.0070830411846645, "learning_rate": 4.952128463509575e-06, "loss": 0.5227, "step": 6241 }, { "epoch": 0.3801114392716865, "grad_norm": 1.0930901440538727, "learning_rate": 4.952112921413971e-06, "loss": 0.4669, "step": 6242 }, { "epoch": 0.3801723350485644, "grad_norm": 1.02836947665105, "learning_rate": 4.952097376820205e-06, "loss": 0.4578, "step": 6243 }, { "epoch": 0.38023323082544225, "grad_norm": 1.0892749352566953, "learning_rate": 4.952081829728293e-06, "loss": 0.4743, "step": 6244 }, { "epoch": 0.38029412660232015, "grad_norm": 1.0153884488125613, "learning_rate": 4.952066280138251e-06, "loss": 0.4645, "step": 6245 }, { "epoch": 0.380355022379198, "grad_norm": 0.9864106958562094, "learning_rate": 4.952050728050095e-06, "loss": 0.4205, "step": 6246 }, { "epoch": 0.3804159181560759, "grad_norm": 1.0049757359824008, "learning_rate": 4.952035173463842e-06, "loss": 0.4135, "step": 6247 }, { "epoch": 0.38047681393295374, "grad_norm": 1.0159484148363829, "learning_rate": 4.952019616379504e-06, "loss": 0.4321, "step": 6248 }, { "epoch": 0.38053770970983164, "grad_norm": 1.0696473176536077, "learning_rate": 4.952004056797102e-06, "loss": 0.4201, "step": 6249 }, { "epoch": 0.3805986054867095, "grad_norm": 1.1040639358735755, "learning_rate": 4.951988494716648e-06, "loss": 0.4271, "step": 6250 }, { "epoch": 0.3806595012635874, "grad_norm": 1.118637791045271, "learning_rate": 4.951972930138158e-06, "loss": 0.3868, "step": 6251 }, { "epoch": 0.38072039704046523, "grad_norm": 1.0435345437374535, "learning_rate": 4.95195736306165e-06, "loss": 0.4849, "step": 6252 }, { "epoch": 0.38078129281734313, "grad_norm": 1.0614130094816925, "learning_rate": 4.951941793487139e-06, "loss": 0.4447, "step": 6253 }, { "epoch": 0.380842188594221, "grad_norm": 1.069316871908195, "learning_rate": 4.9519262214146405e-06, "loss": 0.4898, "step": 6254 }, { "epoch": 0.3809030843710989, "grad_norm": 1.026078165766074, "learning_rate": 4.951910646844171e-06, "loss": 0.4809, "step": 6255 }, { "epoch": 0.3809639801479767, "grad_norm": 1.0666980292771364, "learning_rate": 4.951895069775745e-06, "loss": 0.4211, "step": 6256 }, { "epoch": 0.3810248759248546, "grad_norm": 1.161233580882707, "learning_rate": 4.9518794902093805e-06, "loss": 0.5046, "step": 6257 }, { "epoch": 0.38108577170173247, "grad_norm": 1.036297906163775, "learning_rate": 4.9518639081450916e-06, "loss": 0.4653, "step": 6258 }, { "epoch": 0.38114666747861037, "grad_norm": 1.0521314296015953, "learning_rate": 4.951848323582894e-06, "loss": 0.4273, "step": 6259 }, { "epoch": 0.3812075632554882, "grad_norm": 1.0364005615682215, "learning_rate": 4.951832736522805e-06, "loss": 0.4899, "step": 6260 }, { "epoch": 0.3812684590323661, "grad_norm": 1.0431333054210508, "learning_rate": 4.95181714696484e-06, "loss": 0.5022, "step": 6261 }, { "epoch": 0.38132935480924396, "grad_norm": 0.991350302050278, "learning_rate": 4.951801554909014e-06, "loss": 0.4713, "step": 6262 }, { "epoch": 0.38139025058612186, "grad_norm": 1.0995484496965902, "learning_rate": 4.9517859603553435e-06, "loss": 0.4642, "step": 6263 }, { "epoch": 0.3814511463629997, "grad_norm": 0.9327002905026383, "learning_rate": 4.951770363303845e-06, "loss": 0.4621, "step": 6264 }, { "epoch": 0.3815120421398776, "grad_norm": 1.1065400786897703, "learning_rate": 4.951754763754534e-06, "loss": 0.4296, "step": 6265 }, { "epoch": 0.38157293791675545, "grad_norm": 0.9693962913893135, "learning_rate": 4.9517391617074254e-06, "loss": 0.5042, "step": 6266 }, { "epoch": 0.38163383369363335, "grad_norm": 0.9612616425016631, "learning_rate": 4.9517235571625365e-06, "loss": 0.5007, "step": 6267 }, { "epoch": 0.3816947294705112, "grad_norm": 0.9885319349198695, "learning_rate": 4.951707950119882e-06, "loss": 0.4903, "step": 6268 }, { "epoch": 0.3817556252473891, "grad_norm": 1.0732904246142683, "learning_rate": 4.951692340579479e-06, "loss": 0.3523, "step": 6269 }, { "epoch": 0.38181652102426694, "grad_norm": 1.0934070778918439, "learning_rate": 4.951676728541343e-06, "loss": 0.4431, "step": 6270 }, { "epoch": 0.38187741680114484, "grad_norm": 1.0561758063332374, "learning_rate": 4.951661114005489e-06, "loss": 0.4692, "step": 6271 }, { "epoch": 0.3819383125780227, "grad_norm": 1.0886785864989046, "learning_rate": 4.951645496971934e-06, "loss": 0.439, "step": 6272 }, { "epoch": 0.3819992083549006, "grad_norm": 1.0224700171846646, "learning_rate": 4.9516298774406935e-06, "loss": 0.556, "step": 6273 }, { "epoch": 0.3820601041317785, "grad_norm": 1.0112538637726123, "learning_rate": 4.951614255411784e-06, "loss": 0.4427, "step": 6274 }, { "epoch": 0.38212099990865633, "grad_norm": 1.0183300223259741, "learning_rate": 4.95159863088522e-06, "loss": 0.4656, "step": 6275 }, { "epoch": 0.38218189568553423, "grad_norm": 0.9815627274042307, "learning_rate": 4.951583003861019e-06, "loss": 0.4641, "step": 6276 }, { "epoch": 0.3822427914624121, "grad_norm": 1.0536320380956545, "learning_rate": 4.951567374339196e-06, "loss": 0.461, "step": 6277 }, { "epoch": 0.38230368723929, "grad_norm": 0.9870150930796193, "learning_rate": 4.951551742319767e-06, "loss": 0.4815, "step": 6278 }, { "epoch": 0.3823645830161678, "grad_norm": 1.0477554406875553, "learning_rate": 4.951536107802748e-06, "loss": 0.4596, "step": 6279 }, { "epoch": 0.3824254787930457, "grad_norm": 1.0427985948656824, "learning_rate": 4.951520470788155e-06, "loss": 0.451, "step": 6280 }, { "epoch": 0.38248637456992357, "grad_norm": 0.9687833709778435, "learning_rate": 4.951504831276004e-06, "loss": 0.4649, "step": 6281 }, { "epoch": 0.38254727034680147, "grad_norm": 1.1079216903732936, "learning_rate": 4.951489189266311e-06, "loss": 0.5008, "step": 6282 }, { "epoch": 0.3826081661236793, "grad_norm": 1.0733837330347158, "learning_rate": 4.951473544759091e-06, "loss": 0.4135, "step": 6283 }, { "epoch": 0.3826690619005572, "grad_norm": 1.1374250415957654, "learning_rate": 4.951457897754362e-06, "loss": 0.4503, "step": 6284 }, { "epoch": 0.38272995767743506, "grad_norm": 1.0541976972930818, "learning_rate": 4.951442248252138e-06, "loss": 0.4179, "step": 6285 }, { "epoch": 0.38279085345431296, "grad_norm": 0.9703335909200909, "learning_rate": 4.9514265962524345e-06, "loss": 0.4916, "step": 6286 }, { "epoch": 0.3828517492311908, "grad_norm": 1.1470399838132395, "learning_rate": 4.95141094175527e-06, "loss": 0.3831, "step": 6287 }, { "epoch": 0.3829126450080687, "grad_norm": 1.0352791791056986, "learning_rate": 4.951395284760659e-06, "loss": 0.4925, "step": 6288 }, { "epoch": 0.38297354078494655, "grad_norm": 1.0032438516201547, "learning_rate": 4.951379625268617e-06, "loss": 0.4574, "step": 6289 }, { "epoch": 0.38303443656182445, "grad_norm": 0.9575547639593858, "learning_rate": 4.9513639632791604e-06, "loss": 0.5458, "step": 6290 }, { "epoch": 0.3830953323387023, "grad_norm": 1.0581680895226544, "learning_rate": 4.951348298792305e-06, "loss": 0.4311, "step": 6291 }, { "epoch": 0.3831562281155802, "grad_norm": 1.0363448371098116, "learning_rate": 4.9513326318080676e-06, "loss": 0.3808, "step": 6292 }, { "epoch": 0.38321712389245804, "grad_norm": 0.9293598243125225, "learning_rate": 4.951316962326463e-06, "loss": 0.5046, "step": 6293 }, { "epoch": 0.38327801966933595, "grad_norm": 0.9725648967626845, "learning_rate": 4.951301290347508e-06, "loss": 0.4771, "step": 6294 }, { "epoch": 0.3833389154462138, "grad_norm": 0.9759156124826759, "learning_rate": 4.951285615871218e-06, "loss": 0.449, "step": 6295 }, { "epoch": 0.3833998112230917, "grad_norm": 1.0099389898525486, "learning_rate": 4.951269938897608e-06, "loss": 0.44, "step": 6296 }, { "epoch": 0.38346070699996954, "grad_norm": 0.8882066173676824, "learning_rate": 4.951254259426697e-06, "loss": 0.488, "step": 6297 }, { "epoch": 0.38352160277684744, "grad_norm": 0.9538309096341363, "learning_rate": 4.951238577458498e-06, "loss": 0.5115, "step": 6298 }, { "epoch": 0.3835824985537253, "grad_norm": 0.9577774296060666, "learning_rate": 4.951222892993028e-06, "loss": 0.4848, "step": 6299 }, { "epoch": 0.3836433943306032, "grad_norm": 1.031842149859397, "learning_rate": 4.951207206030304e-06, "loss": 0.4174, "step": 6300 }, { "epoch": 0.383704290107481, "grad_norm": 1.0099961951670025, "learning_rate": 4.951191516570341e-06, "loss": 0.4345, "step": 6301 }, { "epoch": 0.38376518588435893, "grad_norm": 0.9994833194291857, "learning_rate": 4.951175824613154e-06, "loss": 0.4523, "step": 6302 }, { "epoch": 0.3838260816612368, "grad_norm": 0.975923365231895, "learning_rate": 4.951160130158761e-06, "loss": 0.5299, "step": 6303 }, { "epoch": 0.3838869774381147, "grad_norm": 1.0167478766946068, "learning_rate": 4.951144433207177e-06, "loss": 0.4806, "step": 6304 }, { "epoch": 0.3839478732149925, "grad_norm": 0.9591071841807449, "learning_rate": 4.951128733758417e-06, "loss": 0.4393, "step": 6305 }, { "epoch": 0.3840087689918704, "grad_norm": 0.9020663304136843, "learning_rate": 4.951113031812499e-06, "loss": 0.5134, "step": 6306 }, { "epoch": 0.38406966476874826, "grad_norm": 1.0174936306932998, "learning_rate": 4.951097327369438e-06, "loss": 0.5146, "step": 6307 }, { "epoch": 0.38413056054562617, "grad_norm": 1.1060376082039234, "learning_rate": 4.951081620429249e-06, "loss": 0.4308, "step": 6308 }, { "epoch": 0.384191456322504, "grad_norm": 1.127919647077675, "learning_rate": 4.95106591099195e-06, "loss": 0.3983, "step": 6309 }, { "epoch": 0.3842523520993819, "grad_norm": 1.0464443322218624, "learning_rate": 4.951050199057555e-06, "loss": 0.4463, "step": 6310 }, { "epoch": 0.38431324787625976, "grad_norm": 0.9897857095101403, "learning_rate": 4.9510344846260826e-06, "loss": 0.4453, "step": 6311 }, { "epoch": 0.38437414365313766, "grad_norm": 1.062831658290762, "learning_rate": 4.9510187676975466e-06, "loss": 0.5417, "step": 6312 }, { "epoch": 0.3844350394300155, "grad_norm": 0.9826009044801425, "learning_rate": 4.9510030482719625e-06, "loss": 0.4641, "step": 6313 }, { "epoch": 0.3844959352068934, "grad_norm": 1.0826786330717952, "learning_rate": 4.950987326349348e-06, "loss": 0.3838, "step": 6314 }, { "epoch": 0.3845568309837713, "grad_norm": 1.0299401644359154, "learning_rate": 4.95097160192972e-06, "loss": 0.4427, "step": 6315 }, { "epoch": 0.38461772676064915, "grad_norm": 1.0499529938159415, "learning_rate": 4.9509558750130916e-06, "loss": 0.4765, "step": 6316 }, { "epoch": 0.38467862253752705, "grad_norm": 0.9357582353703262, "learning_rate": 4.950940145599481e-06, "loss": 0.4818, "step": 6317 }, { "epoch": 0.3847395183144049, "grad_norm": 0.9677693747364238, "learning_rate": 4.950924413688903e-06, "loss": 0.4947, "step": 6318 }, { "epoch": 0.3848004140912828, "grad_norm": 1.0017366576068247, "learning_rate": 4.950908679281374e-06, "loss": 0.4579, "step": 6319 }, { "epoch": 0.38486130986816064, "grad_norm": 1.0031319154965612, "learning_rate": 4.9508929423769105e-06, "loss": 0.4868, "step": 6320 }, { "epoch": 0.38492220564503854, "grad_norm": 0.9774797267008417, "learning_rate": 4.950877202975528e-06, "loss": 0.4838, "step": 6321 }, { "epoch": 0.3849831014219164, "grad_norm": 1.0841681249180901, "learning_rate": 4.950861461077244e-06, "loss": 0.4935, "step": 6322 }, { "epoch": 0.3850439971987943, "grad_norm": 1.0737697935394341, "learning_rate": 4.950845716682072e-06, "loss": 0.4739, "step": 6323 }, { "epoch": 0.38510489297567213, "grad_norm": 1.0567879435946645, "learning_rate": 4.95082996979003e-06, "loss": 0.4831, "step": 6324 }, { "epoch": 0.38516578875255003, "grad_norm": 0.9938692628286614, "learning_rate": 4.950814220401133e-06, "loss": 0.4326, "step": 6325 }, { "epoch": 0.3852266845294279, "grad_norm": 0.9233785259969335, "learning_rate": 4.9507984685153976e-06, "loss": 0.4525, "step": 6326 }, { "epoch": 0.3852875803063058, "grad_norm": 0.9988075676640437, "learning_rate": 4.950782714132839e-06, "loss": 0.4367, "step": 6327 }, { "epoch": 0.3853484760831836, "grad_norm": 1.0425921576268418, "learning_rate": 4.950766957253475e-06, "loss": 0.5339, "step": 6328 }, { "epoch": 0.3854093718600615, "grad_norm": 1.069788153121016, "learning_rate": 4.95075119787732e-06, "loss": 0.4592, "step": 6329 }, { "epoch": 0.38547026763693937, "grad_norm": 1.0299339714363, "learning_rate": 4.950735436004391e-06, "loss": 0.4523, "step": 6330 }, { "epoch": 0.38553116341381727, "grad_norm": 1.0589802274255886, "learning_rate": 4.950719671634704e-06, "loss": 0.4117, "step": 6331 }, { "epoch": 0.3855920591906951, "grad_norm": 1.0133058708597933, "learning_rate": 4.950703904768274e-06, "loss": 0.4576, "step": 6332 }, { "epoch": 0.385652954967573, "grad_norm": 0.9746412170036715, "learning_rate": 4.950688135405117e-06, "loss": 0.4607, "step": 6333 }, { "epoch": 0.38571385074445086, "grad_norm": 1.0293865916261402, "learning_rate": 4.950672363545252e-06, "loss": 0.4659, "step": 6334 }, { "epoch": 0.38577474652132876, "grad_norm": 0.9617523538421171, "learning_rate": 4.950656589188692e-06, "loss": 0.4994, "step": 6335 }, { "epoch": 0.3858356422982066, "grad_norm": 1.021319383343782, "learning_rate": 4.950640812335453e-06, "loss": 0.5104, "step": 6336 }, { "epoch": 0.3858965380750845, "grad_norm": 1.09580280013842, "learning_rate": 4.950625032985553e-06, "loss": 0.4486, "step": 6337 }, { "epoch": 0.38595743385196235, "grad_norm": 1.0415210951526357, "learning_rate": 4.950609251139007e-06, "loss": 0.4334, "step": 6338 }, { "epoch": 0.38601832962884025, "grad_norm": 0.9862360800425973, "learning_rate": 4.950593466795831e-06, "loss": 0.5197, "step": 6339 }, { "epoch": 0.3860792254057181, "grad_norm": 0.9834832456924228, "learning_rate": 4.950577679956042e-06, "loss": 0.4651, "step": 6340 }, { "epoch": 0.386140121182596, "grad_norm": 0.9984921965084532, "learning_rate": 4.950561890619655e-06, "loss": 0.4542, "step": 6341 }, { "epoch": 0.38620101695947384, "grad_norm": 1.0178287250763662, "learning_rate": 4.9505460987866865e-06, "loss": 0.4449, "step": 6342 }, { "epoch": 0.38626191273635174, "grad_norm": 0.9604318383867301, "learning_rate": 4.950530304457153e-06, "loss": 0.4541, "step": 6343 }, { "epoch": 0.3863228085132296, "grad_norm": 1.0549476877572637, "learning_rate": 4.95051450763107e-06, "loss": 0.4043, "step": 6344 }, { "epoch": 0.3863837042901075, "grad_norm": 1.0326286140729468, "learning_rate": 4.950498708308453e-06, "loss": 0.5147, "step": 6345 }, { "epoch": 0.38644460006698533, "grad_norm": 1.1162201919780466, "learning_rate": 4.95048290648932e-06, "loss": 0.52, "step": 6346 }, { "epoch": 0.38650549584386323, "grad_norm": 1.034397085970403, "learning_rate": 4.950467102173685e-06, "loss": 0.4726, "step": 6347 }, { "epoch": 0.3865663916207411, "grad_norm": 0.9652204857462271, "learning_rate": 4.950451295361566e-06, "loss": 0.4378, "step": 6348 }, { "epoch": 0.386627287397619, "grad_norm": 1.0750926888151, "learning_rate": 4.950435486052977e-06, "loss": 0.4595, "step": 6349 }, { "epoch": 0.3866881831744968, "grad_norm": 1.0465697294244056, "learning_rate": 4.950419674247936e-06, "loss": 0.3774, "step": 6350 }, { "epoch": 0.3867490789513747, "grad_norm": 1.043732891998748, "learning_rate": 4.950403859946459e-06, "loss": 0.4025, "step": 6351 }, { "epoch": 0.38680997472825257, "grad_norm": 1.1313624927388177, "learning_rate": 4.95038804314856e-06, "loss": 0.397, "step": 6352 }, { "epoch": 0.38687087050513047, "grad_norm": 0.9773335860486658, "learning_rate": 4.950372223854257e-06, "loss": 0.477, "step": 6353 }, { "epoch": 0.3869317662820083, "grad_norm": 1.0442801353347437, "learning_rate": 4.9503564020635665e-06, "loss": 0.4611, "step": 6354 }, { "epoch": 0.3869926620588862, "grad_norm": 1.0109439780256262, "learning_rate": 4.950340577776503e-06, "loss": 0.4819, "step": 6355 }, { "epoch": 0.3870535578357641, "grad_norm": 0.9846806036666983, "learning_rate": 4.950324750993084e-06, "loss": 0.5232, "step": 6356 }, { "epoch": 0.38711445361264196, "grad_norm": 1.0129264953085642, "learning_rate": 4.950308921713326e-06, "loss": 0.4405, "step": 6357 }, { "epoch": 0.38717534938951986, "grad_norm": 0.9584811355138483, "learning_rate": 4.9502930899372425e-06, "loss": 0.4713, "step": 6358 }, { "epoch": 0.3872362451663977, "grad_norm": 1.0314283740941406, "learning_rate": 4.950277255664852e-06, "loss": 0.4829, "step": 6359 }, { "epoch": 0.3872971409432756, "grad_norm": 1.0528286439091543, "learning_rate": 4.9502614188961705e-06, "loss": 0.4391, "step": 6360 }, { "epoch": 0.38735803672015345, "grad_norm": 0.9153242588790159, "learning_rate": 4.950245579631213e-06, "loss": 0.4926, "step": 6361 }, { "epoch": 0.38741893249703135, "grad_norm": 0.9491558865287852, "learning_rate": 4.950229737869996e-06, "loss": 0.5056, "step": 6362 }, { "epoch": 0.3874798282739092, "grad_norm": 0.9722872635357207, "learning_rate": 4.950213893612537e-06, "loss": 0.5005, "step": 6363 }, { "epoch": 0.3875407240507871, "grad_norm": 1.0194452022904033, "learning_rate": 4.95019804685885e-06, "loss": 0.4566, "step": 6364 }, { "epoch": 0.38760161982766494, "grad_norm": 1.1100276754679128, "learning_rate": 4.9501821976089525e-06, "loss": 0.4949, "step": 6365 }, { "epoch": 0.38766251560454285, "grad_norm": 1.0017967062357778, "learning_rate": 4.9501663458628604e-06, "loss": 0.59, "step": 6366 }, { "epoch": 0.3877234113814207, "grad_norm": 1.0242852998016498, "learning_rate": 4.95015049162059e-06, "loss": 0.5047, "step": 6367 }, { "epoch": 0.3877843071582986, "grad_norm": 0.9705342051729012, "learning_rate": 4.950134634882156e-06, "loss": 0.5191, "step": 6368 }, { "epoch": 0.38784520293517644, "grad_norm": 1.0516550382813907, "learning_rate": 4.950118775647577e-06, "loss": 0.4498, "step": 6369 }, { "epoch": 0.38790609871205434, "grad_norm": 0.9790921764229468, "learning_rate": 4.9501029139168676e-06, "loss": 0.4741, "step": 6370 }, { "epoch": 0.3879669944889322, "grad_norm": 1.0218587945704436, "learning_rate": 4.950087049690044e-06, "loss": 0.4473, "step": 6371 }, { "epoch": 0.3880278902658101, "grad_norm": 1.108322294893642, "learning_rate": 4.950071182967124e-06, "loss": 0.4343, "step": 6372 }, { "epoch": 0.3880887860426879, "grad_norm": 1.0207817249666158, "learning_rate": 4.950055313748121e-06, "loss": 0.4173, "step": 6373 }, { "epoch": 0.38814968181956583, "grad_norm": 1.1128852110451526, "learning_rate": 4.950039442033053e-06, "loss": 0.4209, "step": 6374 }, { "epoch": 0.3882105775964437, "grad_norm": 1.0307718920415765, "learning_rate": 4.950023567821936e-06, "loss": 0.4645, "step": 6375 }, { "epoch": 0.3882714733733216, "grad_norm": 1.092118192468334, "learning_rate": 4.950007691114785e-06, "loss": 0.383, "step": 6376 }, { "epoch": 0.3883323691501994, "grad_norm": 0.9781980319924812, "learning_rate": 4.949991811911618e-06, "loss": 0.4395, "step": 6377 }, { "epoch": 0.3883932649270773, "grad_norm": 1.0055344303853269, "learning_rate": 4.949975930212449e-06, "loss": 0.4991, "step": 6378 }, { "epoch": 0.38845416070395516, "grad_norm": 0.9903163766491825, "learning_rate": 4.9499600460172966e-06, "loss": 0.5108, "step": 6379 }, { "epoch": 0.38851505648083307, "grad_norm": 1.0658710892624426, "learning_rate": 4.949944159326177e-06, "loss": 0.4793, "step": 6380 }, { "epoch": 0.3885759522577109, "grad_norm": 0.955866144066806, "learning_rate": 4.949928270139102e-06, "loss": 0.4597, "step": 6381 }, { "epoch": 0.3886368480345888, "grad_norm": 1.037838939103643, "learning_rate": 4.949912378456095e-06, "loss": 0.468, "step": 6382 }, { "epoch": 0.38869774381146666, "grad_norm": 1.089090201834181, "learning_rate": 4.9498964842771655e-06, "loss": 0.4768, "step": 6383 }, { "epoch": 0.38875863958834456, "grad_norm": 1.028548741793015, "learning_rate": 4.9498805876023326e-06, "loss": 0.4874, "step": 6384 }, { "epoch": 0.3888195353652224, "grad_norm": 1.0300341607630452, "learning_rate": 4.949864688431613e-06, "loss": 0.4334, "step": 6385 }, { "epoch": 0.3888804311421003, "grad_norm": 1.0343773590551648, "learning_rate": 4.949848786765022e-06, "loss": 0.4085, "step": 6386 }, { "epoch": 0.38894132691897815, "grad_norm": 1.020244767064479, "learning_rate": 4.949832882602575e-06, "loss": 0.4477, "step": 6387 }, { "epoch": 0.38900222269585605, "grad_norm": 1.0818046106205355, "learning_rate": 4.949816975944291e-06, "loss": 0.4636, "step": 6388 }, { "epoch": 0.3890631184727339, "grad_norm": 1.0761303694228894, "learning_rate": 4.949801066790183e-06, "loss": 0.4394, "step": 6389 }, { "epoch": 0.3891240142496118, "grad_norm": 0.9858558044605464, "learning_rate": 4.9497851551402696e-06, "loss": 0.4851, "step": 6390 }, { "epoch": 0.38918491002648964, "grad_norm": 1.1734249088344224, "learning_rate": 4.949769240994566e-06, "loss": 0.3898, "step": 6391 }, { "epoch": 0.38924580580336754, "grad_norm": 1.001976606781902, "learning_rate": 4.9497533243530875e-06, "loss": 0.5069, "step": 6392 }, { "epoch": 0.3893067015802454, "grad_norm": 0.983230641995478, "learning_rate": 4.949737405215851e-06, "loss": 0.4726, "step": 6393 }, { "epoch": 0.3893675973571233, "grad_norm": 1.1193679350367232, "learning_rate": 4.949721483582874e-06, "loss": 0.4416, "step": 6394 }, { "epoch": 0.38942849313400113, "grad_norm": 1.0463244515416639, "learning_rate": 4.949705559454171e-06, "loss": 0.513, "step": 6395 }, { "epoch": 0.38948938891087903, "grad_norm": 1.036486722908571, "learning_rate": 4.949689632829759e-06, "loss": 0.4364, "step": 6396 }, { "epoch": 0.38955028468775693, "grad_norm": 0.9283724485925434, "learning_rate": 4.9496737037096554e-06, "loss": 0.4897, "step": 6397 }, { "epoch": 0.3896111804646348, "grad_norm": 1.0854169975371695, "learning_rate": 4.949657772093874e-06, "loss": 0.4051, "step": 6398 }, { "epoch": 0.3896720762415127, "grad_norm": 0.9436845055650138, "learning_rate": 4.949641837982432e-06, "loss": 0.4693, "step": 6399 }, { "epoch": 0.3897329720183905, "grad_norm": 0.9743088924783367, "learning_rate": 4.949625901375346e-06, "loss": 0.4314, "step": 6400 }, { "epoch": 0.3897938677952684, "grad_norm": 0.9513981680753608, "learning_rate": 4.949609962272632e-06, "loss": 0.4555, "step": 6401 }, { "epoch": 0.38985476357214627, "grad_norm": 1.0289160525081091, "learning_rate": 4.949594020674307e-06, "loss": 0.4491, "step": 6402 }, { "epoch": 0.38991565934902417, "grad_norm": 1.0359937213846744, "learning_rate": 4.949578076580386e-06, "loss": 0.4131, "step": 6403 }, { "epoch": 0.389976555125902, "grad_norm": 1.095500343064474, "learning_rate": 4.9495621299908856e-06, "loss": 0.4535, "step": 6404 }, { "epoch": 0.3900374509027799, "grad_norm": 0.9804538295758262, "learning_rate": 4.949546180905823e-06, "loss": 0.4709, "step": 6405 }, { "epoch": 0.39009834667965776, "grad_norm": 1.0587418875725434, "learning_rate": 4.949530229325213e-06, "loss": 0.4586, "step": 6406 }, { "epoch": 0.39015924245653566, "grad_norm": 0.9981034345597596, "learning_rate": 4.949514275249073e-06, "loss": 0.4476, "step": 6407 }, { "epoch": 0.3902201382334135, "grad_norm": 1.1667540784634831, "learning_rate": 4.949498318677418e-06, "loss": 0.4337, "step": 6408 }, { "epoch": 0.3902810340102914, "grad_norm": 1.0526914219028676, "learning_rate": 4.949482359610266e-06, "loss": 0.4729, "step": 6409 }, { "epoch": 0.39034192978716925, "grad_norm": 1.002888721275547, "learning_rate": 4.949466398047631e-06, "loss": 0.4699, "step": 6410 }, { "epoch": 0.39040282556404715, "grad_norm": 1.1457258667360766, "learning_rate": 4.949450433989532e-06, "loss": 0.425, "step": 6411 }, { "epoch": 0.390463721340925, "grad_norm": 0.9057428941329452, "learning_rate": 4.949434467435983e-06, "loss": 0.5117, "step": 6412 }, { "epoch": 0.3905246171178029, "grad_norm": 1.0717151246082979, "learning_rate": 4.9494184983870004e-06, "loss": 0.4373, "step": 6413 }, { "epoch": 0.39058551289468074, "grad_norm": 1.0859096426886905, "learning_rate": 4.949402526842603e-06, "loss": 0.4339, "step": 6414 }, { "epoch": 0.39064640867155864, "grad_norm": 0.9693654325716548, "learning_rate": 4.949386552802804e-06, "loss": 0.4326, "step": 6415 }, { "epoch": 0.3907073044484365, "grad_norm": 1.039531561933885, "learning_rate": 4.949370576267621e-06, "loss": 0.4725, "step": 6416 }, { "epoch": 0.3907682002253144, "grad_norm": 1.018161470092905, "learning_rate": 4.94935459723707e-06, "loss": 0.4976, "step": 6417 }, { "epoch": 0.39082909600219223, "grad_norm": 1.0508811763691752, "learning_rate": 4.949338615711168e-06, "loss": 0.4201, "step": 6418 }, { "epoch": 0.39088999177907013, "grad_norm": 1.0394987893515955, "learning_rate": 4.949322631689931e-06, "loss": 0.4268, "step": 6419 }, { "epoch": 0.390950887555948, "grad_norm": 1.1179017800443736, "learning_rate": 4.949306645173374e-06, "loss": 0.4208, "step": 6420 }, { "epoch": 0.3910117833328259, "grad_norm": 1.0867508673843966, "learning_rate": 4.949290656161515e-06, "loss": 0.4548, "step": 6421 }, { "epoch": 0.3910726791097037, "grad_norm": 1.0371169986256918, "learning_rate": 4.94927466465437e-06, "loss": 0.4749, "step": 6422 }, { "epoch": 0.3911335748865816, "grad_norm": 1.0223176413706252, "learning_rate": 4.949258670651954e-06, "loss": 0.4958, "step": 6423 }, { "epoch": 0.39119447066345947, "grad_norm": 1.0710793600640567, "learning_rate": 4.949242674154285e-06, "loss": 0.4748, "step": 6424 }, { "epoch": 0.39125536644033737, "grad_norm": 1.0320421846257315, "learning_rate": 4.949226675161378e-06, "loss": 0.5239, "step": 6425 }, { "epoch": 0.3913162622172152, "grad_norm": 0.9808642727280072, "learning_rate": 4.949210673673249e-06, "loss": 0.5171, "step": 6426 }, { "epoch": 0.3913771579940931, "grad_norm": 0.9638762169362743, "learning_rate": 4.949194669689916e-06, "loss": 0.4602, "step": 6427 }, { "epoch": 0.39143805377097096, "grad_norm": 1.10705159905279, "learning_rate": 4.949178663211395e-06, "loss": 0.461, "step": 6428 }, { "epoch": 0.39149894954784886, "grad_norm": 0.9874469001262415, "learning_rate": 4.9491626542377006e-06, "loss": 0.4886, "step": 6429 }, { "epoch": 0.3915598453247267, "grad_norm": 1.0263978920176118, "learning_rate": 4.94914664276885e-06, "loss": 0.4554, "step": 6430 }, { "epoch": 0.3916207411016046, "grad_norm": 1.0402655802576883, "learning_rate": 4.949130628804861e-06, "loss": 0.4102, "step": 6431 }, { "epoch": 0.39168163687848245, "grad_norm": 1.0351270535429105, "learning_rate": 4.949114612345748e-06, "loss": 0.4439, "step": 6432 }, { "epoch": 0.39174253265536035, "grad_norm": 1.0198294751986667, "learning_rate": 4.949098593391528e-06, "loss": 0.4814, "step": 6433 }, { "epoch": 0.3918034284322382, "grad_norm": 1.0967396881767202, "learning_rate": 4.949082571942218e-06, "loss": 0.5059, "step": 6434 }, { "epoch": 0.3918643242091161, "grad_norm": 0.9887629801354777, "learning_rate": 4.9490665479978316e-06, "loss": 0.4539, "step": 6435 }, { "epoch": 0.39192521998599394, "grad_norm": 1.0604697240470458, "learning_rate": 4.949050521558388e-06, "loss": 0.4268, "step": 6436 }, { "epoch": 0.39198611576287185, "grad_norm": 1.0236796330147706, "learning_rate": 4.949034492623904e-06, "loss": 0.4788, "step": 6437 }, { "epoch": 0.39204701153974975, "grad_norm": 1.017613259220687, "learning_rate": 4.949018461194393e-06, "loss": 0.4482, "step": 6438 }, { "epoch": 0.3921079073166276, "grad_norm": 1.0358924502190139, "learning_rate": 4.949002427269873e-06, "loss": 0.5405, "step": 6439 }, { "epoch": 0.3921688030935055, "grad_norm": 1.032433050517823, "learning_rate": 4.948986390850361e-06, "loss": 0.4395, "step": 6440 }, { "epoch": 0.39222969887038334, "grad_norm": 1.1115396199919994, "learning_rate": 4.948970351935872e-06, "loss": 0.4314, "step": 6441 }, { "epoch": 0.39229059464726124, "grad_norm": 1.000528830011874, "learning_rate": 4.948954310526423e-06, "loss": 0.502, "step": 6442 }, { "epoch": 0.3923514904241391, "grad_norm": 1.0782873772755406, "learning_rate": 4.948938266622031e-06, "loss": 0.4496, "step": 6443 }, { "epoch": 0.392412386201017, "grad_norm": 0.9672023743986548, "learning_rate": 4.948922220222712e-06, "loss": 0.4377, "step": 6444 }, { "epoch": 0.39247328197789483, "grad_norm": 1.0346246655089741, "learning_rate": 4.94890617132848e-06, "loss": 0.4615, "step": 6445 }, { "epoch": 0.39253417775477273, "grad_norm": 1.0878804057998182, "learning_rate": 4.948890119939355e-06, "loss": 0.409, "step": 6446 }, { "epoch": 0.3925950735316506, "grad_norm": 1.0045287329813242, "learning_rate": 4.94887406605535e-06, "loss": 0.4279, "step": 6447 }, { "epoch": 0.3926559693085285, "grad_norm": 1.0318972399000113, "learning_rate": 4.948858009676485e-06, "loss": 0.4857, "step": 6448 }, { "epoch": 0.3927168650854063, "grad_norm": 1.0838431529349921, "learning_rate": 4.948841950802773e-06, "loss": 0.3708, "step": 6449 }, { "epoch": 0.3927777608622842, "grad_norm": 0.9717468363514001, "learning_rate": 4.9488258894342324e-06, "loss": 0.4311, "step": 6450 }, { "epoch": 0.39283865663916206, "grad_norm": 0.9703269931960113, "learning_rate": 4.9488098255708785e-06, "loss": 0.5271, "step": 6451 }, { "epoch": 0.39289955241603997, "grad_norm": 1.1114263463520497, "learning_rate": 4.948793759212728e-06, "loss": 0.4279, "step": 6452 }, { "epoch": 0.3929604481929178, "grad_norm": 1.0678365255039153, "learning_rate": 4.948777690359797e-06, "loss": 0.5031, "step": 6453 }, { "epoch": 0.3930213439697957, "grad_norm": 0.998497825416097, "learning_rate": 4.948761619012103e-06, "loss": 0.5006, "step": 6454 }, { "epoch": 0.39308223974667356, "grad_norm": 1.031057566662535, "learning_rate": 4.948745545169661e-06, "loss": 0.3981, "step": 6455 }, { "epoch": 0.39314313552355146, "grad_norm": 0.9623529938978084, "learning_rate": 4.948729468832488e-06, "loss": 0.4629, "step": 6456 }, { "epoch": 0.3932040313004293, "grad_norm": 1.0377313859569048, "learning_rate": 4.948713390000602e-06, "loss": 0.3808, "step": 6457 }, { "epoch": 0.3932649270773072, "grad_norm": 1.0244824541916986, "learning_rate": 4.948697308674015e-06, "loss": 0.5736, "step": 6458 }, { "epoch": 0.39332582285418505, "grad_norm": 1.0785960886965278, "learning_rate": 4.948681224852747e-06, "loss": 0.4555, "step": 6459 }, { "epoch": 0.39338671863106295, "grad_norm": 1.0511672935768919, "learning_rate": 4.948665138536815e-06, "loss": 0.4254, "step": 6460 }, { "epoch": 0.3934476144079408, "grad_norm": 1.0624724139093964, "learning_rate": 4.9486490497262315e-06, "loss": 0.4677, "step": 6461 }, { "epoch": 0.3935085101848187, "grad_norm": 0.9718296701232991, "learning_rate": 4.948632958421017e-06, "loss": 0.4242, "step": 6462 }, { "epoch": 0.39356940596169654, "grad_norm": 0.9544938235410488, "learning_rate": 4.948616864621185e-06, "loss": 0.4776, "step": 6463 }, { "epoch": 0.39363030173857444, "grad_norm": 1.1054803517763, "learning_rate": 4.9486007683267546e-06, "loss": 0.4477, "step": 6464 }, { "epoch": 0.3936911975154523, "grad_norm": 1.0140846009746516, "learning_rate": 4.948584669537739e-06, "loss": 0.4943, "step": 6465 }, { "epoch": 0.3937520932923302, "grad_norm": 1.0220391721938995, "learning_rate": 4.948568568254157e-06, "loss": 0.4761, "step": 6466 }, { "epoch": 0.39381298906920803, "grad_norm": 1.042525990063406, "learning_rate": 4.948552464476024e-06, "loss": 0.4675, "step": 6467 }, { "epoch": 0.39387388484608593, "grad_norm": 0.9845955242898616, "learning_rate": 4.9485363582033575e-06, "loss": 0.527, "step": 6468 }, { "epoch": 0.3939347806229638, "grad_norm": 1.0368760717175556, "learning_rate": 4.948520249436171e-06, "loss": 0.4646, "step": 6469 }, { "epoch": 0.3939956763998417, "grad_norm": 1.001248633990056, "learning_rate": 4.948504138174486e-06, "loss": 0.4583, "step": 6470 }, { "epoch": 0.3940565721767195, "grad_norm": 1.009651585270744, "learning_rate": 4.948488024418314e-06, "loss": 0.4492, "step": 6471 }, { "epoch": 0.3941174679535974, "grad_norm": 1.1302571377132657, "learning_rate": 4.948471908167674e-06, "loss": 0.4013, "step": 6472 }, { "epoch": 0.39417836373047527, "grad_norm": 1.0719566729986971, "learning_rate": 4.94845578942258e-06, "loss": 0.5387, "step": 6473 }, { "epoch": 0.39423925950735317, "grad_norm": 1.029262966180084, "learning_rate": 4.948439668183052e-06, "loss": 0.4762, "step": 6474 }, { "epoch": 0.394300155284231, "grad_norm": 1.0974460904626753, "learning_rate": 4.948423544449104e-06, "loss": 0.4118, "step": 6475 }, { "epoch": 0.3943610510611089, "grad_norm": 1.0277007191096985, "learning_rate": 4.948407418220753e-06, "loss": 0.4624, "step": 6476 }, { "epoch": 0.39442194683798676, "grad_norm": 1.025700176627893, "learning_rate": 4.9483912894980155e-06, "loss": 0.556, "step": 6477 }, { "epoch": 0.39448284261486466, "grad_norm": 0.9266151296256855, "learning_rate": 4.948375158280908e-06, "loss": 0.5032, "step": 6478 }, { "epoch": 0.39454373839174256, "grad_norm": 1.0949692238261866, "learning_rate": 4.948359024569446e-06, "loss": 0.358, "step": 6479 }, { "epoch": 0.3946046341686204, "grad_norm": 1.010789416719517, "learning_rate": 4.948342888363648e-06, "loss": 0.4303, "step": 6480 }, { "epoch": 0.3946655299454983, "grad_norm": 1.0073926061873475, "learning_rate": 4.9483267496635276e-06, "loss": 0.4613, "step": 6481 }, { "epoch": 0.39472642572237615, "grad_norm": 1.0830390168258455, "learning_rate": 4.948310608469105e-06, "loss": 0.4169, "step": 6482 }, { "epoch": 0.39478732149925405, "grad_norm": 0.9082412609995786, "learning_rate": 4.948294464780392e-06, "loss": 0.5287, "step": 6483 }, { "epoch": 0.3948482172761319, "grad_norm": 1.0174505966043244, "learning_rate": 4.948278318597409e-06, "loss": 0.5159, "step": 6484 }, { "epoch": 0.3949091130530098, "grad_norm": 1.1654913854715987, "learning_rate": 4.948262169920171e-06, "loss": 0.3722, "step": 6485 }, { "epoch": 0.39497000882988764, "grad_norm": 1.057110028290338, "learning_rate": 4.9482460187486935e-06, "loss": 0.4169, "step": 6486 }, { "epoch": 0.39503090460676554, "grad_norm": 1.0008736666228037, "learning_rate": 4.948229865082994e-06, "loss": 0.4602, "step": 6487 }, { "epoch": 0.3950918003836434, "grad_norm": 1.0239879164830714, "learning_rate": 4.948213708923089e-06, "loss": 0.4862, "step": 6488 }, { "epoch": 0.3951526961605213, "grad_norm": 1.0323246004984061, "learning_rate": 4.948197550268996e-06, "loss": 0.4918, "step": 6489 }, { "epoch": 0.39521359193739913, "grad_norm": 1.0584163038383547, "learning_rate": 4.948181389120729e-06, "loss": 0.4552, "step": 6490 }, { "epoch": 0.39527448771427703, "grad_norm": 1.022820022150606, "learning_rate": 4.948165225478305e-06, "loss": 0.4897, "step": 6491 }, { "epoch": 0.3953353834911549, "grad_norm": 1.0847522775517304, "learning_rate": 4.9481490593417425e-06, "loss": 0.3986, "step": 6492 }, { "epoch": 0.3953962792680328, "grad_norm": 1.0461923331357679, "learning_rate": 4.948132890711056e-06, "loss": 0.4575, "step": 6493 }, { "epoch": 0.3954571750449106, "grad_norm": 1.0337714357752312, "learning_rate": 4.948116719586263e-06, "loss": 0.5078, "step": 6494 }, { "epoch": 0.3955180708217885, "grad_norm": 1.0266052119871885, "learning_rate": 4.948100545967379e-06, "loss": 0.4351, "step": 6495 }, { "epoch": 0.39557896659866637, "grad_norm": 1.019261509570704, "learning_rate": 4.948084369854422e-06, "loss": 0.4864, "step": 6496 }, { "epoch": 0.39563986237554427, "grad_norm": 0.9803086986371645, "learning_rate": 4.948068191247407e-06, "loss": 0.488, "step": 6497 }, { "epoch": 0.3957007581524221, "grad_norm": 1.0788566782239037, "learning_rate": 4.948052010146351e-06, "loss": 0.4293, "step": 6498 }, { "epoch": 0.3957616539293, "grad_norm": 1.0734299986468638, "learning_rate": 4.94803582655127e-06, "loss": 0.452, "step": 6499 }, { "epoch": 0.39582254970617786, "grad_norm": 0.975914232009204, "learning_rate": 4.948019640462182e-06, "loss": 0.4881, "step": 6500 }, { "epoch": 0.39588344548305576, "grad_norm": 1.091114477557011, "learning_rate": 4.948003451879102e-06, "loss": 0.4268, "step": 6501 }, { "epoch": 0.3959443412599336, "grad_norm": 1.088198886869426, "learning_rate": 4.947987260802047e-06, "loss": 0.5054, "step": 6502 }, { "epoch": 0.3960052370368115, "grad_norm": 0.9995289795792761, "learning_rate": 4.947971067231033e-06, "loss": 0.4368, "step": 6503 }, { "epoch": 0.39606613281368935, "grad_norm": 0.97425331421946, "learning_rate": 4.947954871166077e-06, "loss": 0.5129, "step": 6504 }, { "epoch": 0.39612702859056725, "grad_norm": 0.9426490347654777, "learning_rate": 4.947938672607197e-06, "loss": 0.4706, "step": 6505 }, { "epoch": 0.3961879243674451, "grad_norm": 1.0429396081061701, "learning_rate": 4.947922471554406e-06, "loss": 0.521, "step": 6506 }, { "epoch": 0.396248820144323, "grad_norm": 0.9947111676712126, "learning_rate": 4.947906268007724e-06, "loss": 0.4804, "step": 6507 }, { "epoch": 0.39630971592120084, "grad_norm": 1.0823182885660778, "learning_rate": 4.947890061967165e-06, "loss": 0.519, "step": 6508 }, { "epoch": 0.39637061169807875, "grad_norm": 1.0430001834039864, "learning_rate": 4.947873853432746e-06, "loss": 0.4785, "step": 6509 }, { "epoch": 0.3964315074749566, "grad_norm": 1.1019163182565934, "learning_rate": 4.947857642404485e-06, "loss": 0.4758, "step": 6510 }, { "epoch": 0.3964924032518345, "grad_norm": 1.1026050417291915, "learning_rate": 4.9478414288823985e-06, "loss": 0.4557, "step": 6511 }, { "epoch": 0.39655329902871234, "grad_norm": 0.9975771259242836, "learning_rate": 4.9478252128665e-06, "loss": 0.4943, "step": 6512 }, { "epoch": 0.39661419480559024, "grad_norm": 1.0954940384131266, "learning_rate": 4.94780899435681e-06, "loss": 0.4081, "step": 6513 }, { "epoch": 0.3966750905824681, "grad_norm": 1.1051175777140128, "learning_rate": 4.947792773353342e-06, "loss": 0.4302, "step": 6514 }, { "epoch": 0.396735986359346, "grad_norm": 0.9684373798013078, "learning_rate": 4.947776549856113e-06, "loss": 0.475, "step": 6515 }, { "epoch": 0.3967968821362238, "grad_norm": 1.0816205751732373, "learning_rate": 4.9477603238651404e-06, "loss": 0.4666, "step": 6516 }, { "epoch": 0.39685777791310173, "grad_norm": 0.9950805926758415, "learning_rate": 4.947744095380441e-06, "loss": 0.4685, "step": 6517 }, { "epoch": 0.3969186736899796, "grad_norm": 0.9447651986656109, "learning_rate": 4.947727864402031e-06, "loss": 0.565, "step": 6518 }, { "epoch": 0.3969795694668575, "grad_norm": 0.945473546250564, "learning_rate": 4.947711630929926e-06, "loss": 0.4739, "step": 6519 }, { "epoch": 0.3970404652437354, "grad_norm": 1.0322168569104946, "learning_rate": 4.947695394964145e-06, "loss": 0.4722, "step": 6520 }, { "epoch": 0.3971013610206132, "grad_norm": 1.0327284307238587, "learning_rate": 4.9476791565047015e-06, "loss": 0.4081, "step": 6521 }, { "epoch": 0.3971622567974911, "grad_norm": 1.0108242143617996, "learning_rate": 4.947662915551613e-06, "loss": 0.4372, "step": 6522 }, { "epoch": 0.39722315257436897, "grad_norm": 0.9655296518886991, "learning_rate": 4.947646672104896e-06, "loss": 0.4924, "step": 6523 }, { "epoch": 0.39728404835124687, "grad_norm": 1.0367814808441709, "learning_rate": 4.947630426164568e-06, "loss": 0.4642, "step": 6524 }, { "epoch": 0.3973449441281247, "grad_norm": 1.0845735298863415, "learning_rate": 4.947614177730646e-06, "loss": 0.4651, "step": 6525 }, { "epoch": 0.3974058399050026, "grad_norm": 1.0014622626455083, "learning_rate": 4.947597926803145e-06, "loss": 0.4873, "step": 6526 }, { "epoch": 0.39746673568188046, "grad_norm": 1.031301034757973, "learning_rate": 4.947581673382081e-06, "loss": 0.4359, "step": 6527 }, { "epoch": 0.39752763145875836, "grad_norm": 0.9543975107479361, "learning_rate": 4.947565417467473e-06, "loss": 0.414, "step": 6528 }, { "epoch": 0.3975885272356362, "grad_norm": 0.9785787544688984, "learning_rate": 4.947549159059336e-06, "loss": 0.4852, "step": 6529 }, { "epoch": 0.3976494230125141, "grad_norm": 1.0724041661849852, "learning_rate": 4.947532898157687e-06, "loss": 0.4991, "step": 6530 }, { "epoch": 0.39771031878939195, "grad_norm": 1.0087176660349486, "learning_rate": 4.947516634762542e-06, "loss": 0.4831, "step": 6531 }, { "epoch": 0.39777121456626985, "grad_norm": 1.112213087054264, "learning_rate": 4.947500368873918e-06, "loss": 0.3793, "step": 6532 }, { "epoch": 0.3978321103431477, "grad_norm": 1.0394408909255153, "learning_rate": 4.947484100491831e-06, "loss": 0.5601, "step": 6533 }, { "epoch": 0.3978930061200256, "grad_norm": 1.0222896474340062, "learning_rate": 4.947467829616299e-06, "loss": 0.4509, "step": 6534 }, { "epoch": 0.39795390189690344, "grad_norm": 1.0262778525328218, "learning_rate": 4.947451556247337e-06, "loss": 0.4982, "step": 6535 }, { "epoch": 0.39801479767378134, "grad_norm": 1.1346300440773969, "learning_rate": 4.947435280384962e-06, "loss": 0.4761, "step": 6536 }, { "epoch": 0.3980756934506592, "grad_norm": 1.0583747872859475, "learning_rate": 4.947419002029192e-06, "loss": 0.4967, "step": 6537 }, { "epoch": 0.3981365892275371, "grad_norm": 1.0352071227161725, "learning_rate": 4.947402721180041e-06, "loss": 0.4668, "step": 6538 }, { "epoch": 0.39819748500441493, "grad_norm": 0.9971490688662645, "learning_rate": 4.947386437837528e-06, "loss": 0.4535, "step": 6539 }, { "epoch": 0.39825838078129283, "grad_norm": 1.0056813649165552, "learning_rate": 4.947370152001668e-06, "loss": 0.4367, "step": 6540 }, { "epoch": 0.3983192765581707, "grad_norm": 0.9983936379206212, "learning_rate": 4.947353863672479e-06, "loss": 0.4827, "step": 6541 }, { "epoch": 0.3983801723350486, "grad_norm": 1.0004451122499012, "learning_rate": 4.947337572849976e-06, "loss": 0.469, "step": 6542 }, { "epoch": 0.3984410681119264, "grad_norm": 0.9961731750853224, "learning_rate": 4.947321279534177e-06, "loss": 0.4654, "step": 6543 }, { "epoch": 0.3985019638888043, "grad_norm": 1.06412289240787, "learning_rate": 4.9473049837250975e-06, "loss": 0.4592, "step": 6544 }, { "epoch": 0.39856285966568217, "grad_norm": 1.1695403901109822, "learning_rate": 4.9472886854227545e-06, "loss": 0.4161, "step": 6545 }, { "epoch": 0.39862375544256007, "grad_norm": 1.0529119675251486, "learning_rate": 4.947272384627164e-06, "loss": 0.5033, "step": 6546 }, { "epoch": 0.3986846512194379, "grad_norm": 1.0583170759372587, "learning_rate": 4.947256081338345e-06, "loss": 0.4516, "step": 6547 }, { "epoch": 0.3987455469963158, "grad_norm": 1.012833920961575, "learning_rate": 4.947239775556311e-06, "loss": 0.4518, "step": 6548 }, { "epoch": 0.39880644277319366, "grad_norm": 1.0027157767339108, "learning_rate": 4.947223467281081e-06, "loss": 0.4234, "step": 6549 }, { "epoch": 0.39886733855007156, "grad_norm": 0.9966675929842216, "learning_rate": 4.947207156512669e-06, "loss": 0.4248, "step": 6550 }, { "epoch": 0.3989282343269494, "grad_norm": 1.0755477766885828, "learning_rate": 4.947190843251095e-06, "loss": 0.3812, "step": 6551 }, { "epoch": 0.3989891301038273, "grad_norm": 0.9059389918218469, "learning_rate": 4.947174527496373e-06, "loss": 0.5247, "step": 6552 }, { "epoch": 0.39905002588070515, "grad_norm": 1.0218955694648424, "learning_rate": 4.9471582092485205e-06, "loss": 0.4089, "step": 6553 }, { "epoch": 0.39911092165758305, "grad_norm": 0.9083334547219732, "learning_rate": 4.947141888507554e-06, "loss": 0.4877, "step": 6554 }, { "epoch": 0.3991718174344609, "grad_norm": 1.0789965061790998, "learning_rate": 4.947125565273491e-06, "loss": 0.4174, "step": 6555 }, { "epoch": 0.3992327132113388, "grad_norm": 0.9567252750359567, "learning_rate": 4.947109239546346e-06, "loss": 0.3761, "step": 6556 }, { "epoch": 0.39929360898821664, "grad_norm": 0.9413218537559443, "learning_rate": 4.947092911326138e-06, "loss": 0.4639, "step": 6557 }, { "epoch": 0.39935450476509454, "grad_norm": 0.9667361585119515, "learning_rate": 4.947076580612882e-06, "loss": 0.495, "step": 6558 }, { "epoch": 0.3994154005419724, "grad_norm": 1.017271073449504, "learning_rate": 4.947060247406595e-06, "loss": 0.4085, "step": 6559 }, { "epoch": 0.3994762963188503, "grad_norm": 1.0556644535546538, "learning_rate": 4.947043911707295e-06, "loss": 0.4487, "step": 6560 }, { "epoch": 0.3995371920957282, "grad_norm": 1.0455232907619971, "learning_rate": 4.947027573514998e-06, "loss": 0.4676, "step": 6561 }, { "epoch": 0.39959808787260603, "grad_norm": 0.9615664279043101, "learning_rate": 4.947011232829718e-06, "loss": 0.4539, "step": 6562 }, { "epoch": 0.39965898364948393, "grad_norm": 0.9710643008091684, "learning_rate": 4.946994889651475e-06, "loss": 0.5044, "step": 6563 }, { "epoch": 0.3997198794263618, "grad_norm": 1.034732298267663, "learning_rate": 4.946978543980286e-06, "loss": 0.4517, "step": 6564 }, { "epoch": 0.3997807752032397, "grad_norm": 1.0793708730920715, "learning_rate": 4.946962195816164e-06, "loss": 0.4123, "step": 6565 }, { "epoch": 0.3998416709801175, "grad_norm": 0.9822606611698914, "learning_rate": 4.946945845159128e-06, "loss": 0.4616, "step": 6566 }, { "epoch": 0.3999025667569954, "grad_norm": 0.9827930553491175, "learning_rate": 4.946929492009194e-06, "loss": 0.4642, "step": 6567 }, { "epoch": 0.39996346253387327, "grad_norm": 0.9557909714089836, "learning_rate": 4.946913136366382e-06, "loss": 0.5097, "step": 6568 }, { "epoch": 0.40002435831075117, "grad_norm": 0.9786406107816, "learning_rate": 4.946896778230703e-06, "loss": 0.4888, "step": 6569 }, { "epoch": 0.400085254087629, "grad_norm": 0.9946275718825446, "learning_rate": 4.9468804176021765e-06, "loss": 0.5047, "step": 6570 }, { "epoch": 0.4001461498645069, "grad_norm": 1.017472922671421, "learning_rate": 4.94686405448082e-06, "loss": 0.4509, "step": 6571 }, { "epoch": 0.40020704564138476, "grad_norm": 1.076557742801801, "learning_rate": 4.946847688866649e-06, "loss": 0.4215, "step": 6572 }, { "epoch": 0.40026794141826266, "grad_norm": 1.031094833928564, "learning_rate": 4.946831320759681e-06, "loss": 0.4383, "step": 6573 }, { "epoch": 0.4003288371951405, "grad_norm": 1.1200673021004341, "learning_rate": 4.946814950159932e-06, "loss": 0.4872, "step": 6574 }, { "epoch": 0.4003897329720184, "grad_norm": 1.0947191191573395, "learning_rate": 4.946798577067418e-06, "loss": 0.5046, "step": 6575 }, { "epoch": 0.40045062874889625, "grad_norm": 0.9869059891992266, "learning_rate": 4.9467822014821565e-06, "loss": 0.443, "step": 6576 }, { "epoch": 0.40051152452577415, "grad_norm": 0.9977131179648576, "learning_rate": 4.946765823404165e-06, "loss": 0.4572, "step": 6577 }, { "epoch": 0.400572420302652, "grad_norm": 1.0774977066962765, "learning_rate": 4.946749442833459e-06, "loss": 0.4235, "step": 6578 }, { "epoch": 0.4006333160795299, "grad_norm": 1.0132843445102016, "learning_rate": 4.946733059770056e-06, "loss": 0.4904, "step": 6579 }, { "epoch": 0.40069421185640774, "grad_norm": 1.0300050884502108, "learning_rate": 4.946716674213971e-06, "loss": 0.4562, "step": 6580 }, { "epoch": 0.40075510763328565, "grad_norm": 0.9928883885837788, "learning_rate": 4.946700286165222e-06, "loss": 0.4675, "step": 6581 }, { "epoch": 0.4008160034101635, "grad_norm": 1.0812247191648359, "learning_rate": 4.946683895623827e-06, "loss": 0.4643, "step": 6582 }, { "epoch": 0.4008768991870414, "grad_norm": 1.0254633552400654, "learning_rate": 4.9466675025898005e-06, "loss": 0.48, "step": 6583 }, { "epoch": 0.40093779496391924, "grad_norm": 1.0442349981247927, "learning_rate": 4.94665110706316e-06, "loss": 0.4824, "step": 6584 }, { "epoch": 0.40099869074079714, "grad_norm": 1.0534345517378687, "learning_rate": 4.946634709043923e-06, "loss": 0.4728, "step": 6585 }, { "epoch": 0.401059586517675, "grad_norm": 1.0179866935000745, "learning_rate": 4.946618308532104e-06, "loss": 0.4838, "step": 6586 }, { "epoch": 0.4011204822945529, "grad_norm": 1.0219996799198927, "learning_rate": 4.946601905527722e-06, "loss": 0.4232, "step": 6587 }, { "epoch": 0.4011813780714307, "grad_norm": 1.0911595192766976, "learning_rate": 4.946585500030793e-06, "loss": 0.45, "step": 6588 }, { "epoch": 0.40124227384830863, "grad_norm": 1.0568195371895879, "learning_rate": 4.946569092041333e-06, "loss": 0.4834, "step": 6589 }, { "epoch": 0.4013031696251865, "grad_norm": 1.0779564100385264, "learning_rate": 4.946552681559359e-06, "loss": 0.4421, "step": 6590 }, { "epoch": 0.4013640654020644, "grad_norm": 0.9076087489357724, "learning_rate": 4.946536268584889e-06, "loss": 0.547, "step": 6591 }, { "epoch": 0.4014249611789422, "grad_norm": 1.0013712185686476, "learning_rate": 4.946519853117938e-06, "loss": 0.4742, "step": 6592 }, { "epoch": 0.4014858569558201, "grad_norm": 1.0445008374273772, "learning_rate": 4.946503435158524e-06, "loss": 0.4672, "step": 6593 }, { "epoch": 0.40154675273269796, "grad_norm": 1.0652395859907484, "learning_rate": 4.946487014706662e-06, "loss": 0.4163, "step": 6594 }, { "epoch": 0.40160764850957587, "grad_norm": 1.057977246071148, "learning_rate": 4.946470591762371e-06, "loss": 0.467, "step": 6595 }, { "epoch": 0.4016685442864537, "grad_norm": 1.038058716012395, "learning_rate": 4.946454166325666e-06, "loss": 0.408, "step": 6596 }, { "epoch": 0.4017294400633316, "grad_norm": 1.0495072795066371, "learning_rate": 4.946437738396566e-06, "loss": 0.4936, "step": 6597 }, { "epoch": 0.40179033584020946, "grad_norm": 1.1129283343785081, "learning_rate": 4.946421307975084e-06, "loss": 0.4123, "step": 6598 }, { "epoch": 0.40185123161708736, "grad_norm": 1.0199263742751334, "learning_rate": 4.946404875061239e-06, "loss": 0.4216, "step": 6599 }, { "epoch": 0.4019121273939652, "grad_norm": 1.0416929879566055, "learning_rate": 4.9463884396550486e-06, "loss": 0.4883, "step": 6600 }, { "epoch": 0.4019730231708431, "grad_norm": 1.0611144923758355, "learning_rate": 4.9463720017565285e-06, "loss": 0.4913, "step": 6601 }, { "epoch": 0.402033918947721, "grad_norm": 1.117211279988071, "learning_rate": 4.946355561365695e-06, "loss": 0.4386, "step": 6602 }, { "epoch": 0.40209481472459885, "grad_norm": 1.0684428075005177, "learning_rate": 4.946339118482565e-06, "loss": 0.4591, "step": 6603 }, { "epoch": 0.40215571050147675, "grad_norm": 1.0089313704796288, "learning_rate": 4.946322673107156e-06, "loss": 0.5375, "step": 6604 }, { "epoch": 0.4022166062783546, "grad_norm": 0.9772857440522033, "learning_rate": 4.946306225239485e-06, "loss": 0.4488, "step": 6605 }, { "epoch": 0.4022775020552325, "grad_norm": 1.064018442010904, "learning_rate": 4.9462897748795666e-06, "loss": 0.4478, "step": 6606 }, { "epoch": 0.40233839783211034, "grad_norm": 1.1279416930762094, "learning_rate": 4.946273322027421e-06, "loss": 0.4664, "step": 6607 }, { "epoch": 0.40239929360898824, "grad_norm": 0.9446393817596883, "learning_rate": 4.946256866683061e-06, "loss": 0.4936, "step": 6608 }, { "epoch": 0.4024601893858661, "grad_norm": 0.9511819359215096, "learning_rate": 4.946240408846506e-06, "loss": 0.5084, "step": 6609 }, { "epoch": 0.402521085162744, "grad_norm": 0.9780463089957568, "learning_rate": 4.946223948517773e-06, "loss": 0.4419, "step": 6610 }, { "epoch": 0.40258198093962183, "grad_norm": 1.0351771317542822, "learning_rate": 4.946207485696877e-06, "loss": 0.4497, "step": 6611 }, { "epoch": 0.40264287671649973, "grad_norm": 0.9895463320503008, "learning_rate": 4.946191020383836e-06, "loss": 0.4747, "step": 6612 }, { "epoch": 0.4027037724933776, "grad_norm": 0.901757878967526, "learning_rate": 4.946174552578666e-06, "loss": 0.5089, "step": 6613 }, { "epoch": 0.4027646682702555, "grad_norm": 0.9348675005204007, "learning_rate": 4.946158082281385e-06, "loss": 0.4814, "step": 6614 }, { "epoch": 0.4028255640471333, "grad_norm": 1.1255618337858617, "learning_rate": 4.946141609492008e-06, "loss": 0.4494, "step": 6615 }, { "epoch": 0.4028864598240112, "grad_norm": 0.949678088250914, "learning_rate": 4.946125134210553e-06, "loss": 0.4807, "step": 6616 }, { "epoch": 0.40294735560088907, "grad_norm": 0.9730130298022076, "learning_rate": 4.946108656437038e-06, "loss": 0.4346, "step": 6617 }, { "epoch": 0.40300825137776697, "grad_norm": 0.975727488943458, "learning_rate": 4.946092176171476e-06, "loss": 0.446, "step": 6618 }, { "epoch": 0.4030691471546448, "grad_norm": 1.0943043002863264, "learning_rate": 4.946075693413888e-06, "loss": 0.4146, "step": 6619 }, { "epoch": 0.4031300429315227, "grad_norm": 1.0347636305902441, "learning_rate": 4.946059208164288e-06, "loss": 0.4164, "step": 6620 }, { "epoch": 0.40319093870840056, "grad_norm": 1.1078003332288346, "learning_rate": 4.9460427204226946e-06, "loss": 0.3673, "step": 6621 }, { "epoch": 0.40325183448527846, "grad_norm": 1.0474121368054397, "learning_rate": 4.946026230189123e-06, "loss": 0.4206, "step": 6622 }, { "epoch": 0.4033127302621563, "grad_norm": 1.1336112493673625, "learning_rate": 4.946009737463591e-06, "loss": 0.4085, "step": 6623 }, { "epoch": 0.4033736260390342, "grad_norm": 1.1136706699490937, "learning_rate": 4.945993242246115e-06, "loss": 0.3558, "step": 6624 }, { "epoch": 0.40343452181591205, "grad_norm": 1.040069305863878, "learning_rate": 4.945976744536712e-06, "loss": 0.4446, "step": 6625 }, { "epoch": 0.40349541759278995, "grad_norm": 0.958122681359473, "learning_rate": 4.945960244335399e-06, "loss": 0.4332, "step": 6626 }, { "epoch": 0.4035563133696678, "grad_norm": 1.0219220070206931, "learning_rate": 4.945943741642192e-06, "loss": 0.4443, "step": 6627 }, { "epoch": 0.4036172091465457, "grad_norm": 1.045838610606303, "learning_rate": 4.945927236457109e-06, "loss": 0.4227, "step": 6628 }, { "epoch": 0.40367810492342354, "grad_norm": 1.0869177408032331, "learning_rate": 4.945910728780166e-06, "loss": 0.4242, "step": 6629 }, { "epoch": 0.40373900070030144, "grad_norm": 0.9392177538505984, "learning_rate": 4.945894218611379e-06, "loss": 0.4752, "step": 6630 }, { "epoch": 0.4037998964771793, "grad_norm": 0.9931698642359261, "learning_rate": 4.945877705950768e-06, "loss": 0.4577, "step": 6631 }, { "epoch": 0.4038607922540572, "grad_norm": 1.016312175392132, "learning_rate": 4.945861190798346e-06, "loss": 0.46, "step": 6632 }, { "epoch": 0.40392168803093503, "grad_norm": 0.9874197612195236, "learning_rate": 4.945844673154132e-06, "loss": 0.4628, "step": 6633 }, { "epoch": 0.40398258380781293, "grad_norm": 1.0498423490693753, "learning_rate": 4.945828153018142e-06, "loss": 0.4437, "step": 6634 }, { "epoch": 0.4040434795846908, "grad_norm": 1.0167677565796143, "learning_rate": 4.945811630390393e-06, "loss": 0.4927, "step": 6635 }, { "epoch": 0.4041043753615687, "grad_norm": 1.0483591050859034, "learning_rate": 4.945795105270902e-06, "loss": 0.4683, "step": 6636 }, { "epoch": 0.4041652711384465, "grad_norm": 0.9775478297340271, "learning_rate": 4.945778577659685e-06, "loss": 0.4235, "step": 6637 }, { "epoch": 0.4042261669153244, "grad_norm": 0.9430243492567629, "learning_rate": 4.945762047556762e-06, "loss": 0.457, "step": 6638 }, { "epoch": 0.40428706269220227, "grad_norm": 1.010269492211598, "learning_rate": 4.945745514962146e-06, "loss": 0.454, "step": 6639 }, { "epoch": 0.40434795846908017, "grad_norm": 0.9789429300109349, "learning_rate": 4.945728979875855e-06, "loss": 0.454, "step": 6640 }, { "epoch": 0.404408854245958, "grad_norm": 1.0583166536783295, "learning_rate": 4.9457124422979065e-06, "loss": 0.4697, "step": 6641 }, { "epoch": 0.4044697500228359, "grad_norm": 1.0851364361235794, "learning_rate": 4.9456959022283166e-06, "loss": 0.4221, "step": 6642 }, { "epoch": 0.4045306457997138, "grad_norm": 0.9893949395400436, "learning_rate": 4.945679359667104e-06, "loss": 0.4824, "step": 6643 }, { "epoch": 0.40459154157659166, "grad_norm": 1.0627961645400779, "learning_rate": 4.945662814614283e-06, "loss": 0.5258, "step": 6644 }, { "epoch": 0.40465243735346956, "grad_norm": 1.0985016720425447, "learning_rate": 4.945646267069872e-06, "loss": 0.4977, "step": 6645 }, { "epoch": 0.4047133331303474, "grad_norm": 1.0211739669414928, "learning_rate": 4.945629717033887e-06, "loss": 0.5105, "step": 6646 }, { "epoch": 0.4047742289072253, "grad_norm": 1.0160885515459321, "learning_rate": 4.945613164506346e-06, "loss": 0.5045, "step": 6647 }, { "epoch": 0.40483512468410315, "grad_norm": 1.035994695189384, "learning_rate": 4.945596609487264e-06, "loss": 0.4594, "step": 6648 }, { "epoch": 0.40489602046098105, "grad_norm": 1.006656094810818, "learning_rate": 4.94558005197666e-06, "loss": 0.4802, "step": 6649 }, { "epoch": 0.4049569162378589, "grad_norm": 0.981285954460504, "learning_rate": 4.945563491974549e-06, "loss": 0.5065, "step": 6650 }, { "epoch": 0.4050178120147368, "grad_norm": 1.0379616041949076, "learning_rate": 4.945546929480949e-06, "loss": 0.4966, "step": 6651 }, { "epoch": 0.40507870779161465, "grad_norm": 1.106957255623764, "learning_rate": 4.945530364495878e-06, "loss": 0.4899, "step": 6652 }, { "epoch": 0.40513960356849255, "grad_norm": 1.0787633871045479, "learning_rate": 4.94551379701935e-06, "loss": 0.5257, "step": 6653 }, { "epoch": 0.4052004993453704, "grad_norm": 0.950382297989184, "learning_rate": 4.945497227051383e-06, "loss": 0.5004, "step": 6654 }, { "epoch": 0.4052613951222483, "grad_norm": 1.0506229565878198, "learning_rate": 4.945480654591995e-06, "loss": 0.4222, "step": 6655 }, { "epoch": 0.40532229089912614, "grad_norm": 1.0075961222832215, "learning_rate": 4.945464079641202e-06, "loss": 0.4278, "step": 6656 }, { "epoch": 0.40538318667600404, "grad_norm": 1.0817210996558646, "learning_rate": 4.945447502199022e-06, "loss": 0.5138, "step": 6657 }, { "epoch": 0.4054440824528819, "grad_norm": 1.0209670782847522, "learning_rate": 4.94543092226547e-06, "loss": 0.4512, "step": 6658 }, { "epoch": 0.4055049782297598, "grad_norm": 1.0768190041842782, "learning_rate": 4.945414339840564e-06, "loss": 0.5006, "step": 6659 }, { "epoch": 0.40556587400663763, "grad_norm": 0.9657491906021435, "learning_rate": 4.945397754924321e-06, "loss": 0.4847, "step": 6660 }, { "epoch": 0.40562676978351553, "grad_norm": 1.0149772317483845, "learning_rate": 4.945381167516757e-06, "loss": 0.4168, "step": 6661 }, { "epoch": 0.4056876655603934, "grad_norm": 1.0810540713682428, "learning_rate": 4.94536457761789e-06, "loss": 0.4643, "step": 6662 }, { "epoch": 0.4057485613372713, "grad_norm": 1.0867155193472318, "learning_rate": 4.9453479852277365e-06, "loss": 0.4654, "step": 6663 }, { "epoch": 0.4058094571141491, "grad_norm": 1.1262435647496907, "learning_rate": 4.945331390346313e-06, "loss": 0.4161, "step": 6664 }, { "epoch": 0.405870352891027, "grad_norm": 1.0477130097695773, "learning_rate": 4.945314792973637e-06, "loss": 0.4756, "step": 6665 }, { "epoch": 0.40593124866790486, "grad_norm": 1.109142833103786, "learning_rate": 4.945298193109724e-06, "loss": 0.4108, "step": 6666 }, { "epoch": 0.40599214444478277, "grad_norm": 1.005426170608154, "learning_rate": 4.945281590754594e-06, "loss": 0.4809, "step": 6667 }, { "epoch": 0.4060530402216606, "grad_norm": 1.0471041202980755, "learning_rate": 4.94526498590826e-06, "loss": 0.463, "step": 6668 }, { "epoch": 0.4061139359985385, "grad_norm": 0.994416248324322, "learning_rate": 4.945248378570742e-06, "loss": 0.5132, "step": 6669 }, { "epoch": 0.40617483177541636, "grad_norm": 0.9873409029533523, "learning_rate": 4.945231768742056e-06, "loss": 0.4913, "step": 6670 }, { "epoch": 0.40623572755229426, "grad_norm": 0.9981060158355257, "learning_rate": 4.945215156422218e-06, "loss": 0.5431, "step": 6671 }, { "epoch": 0.4062966233291721, "grad_norm": 1.0421756792110932, "learning_rate": 4.9451985416112454e-06, "loss": 0.4517, "step": 6672 }, { "epoch": 0.40635751910605, "grad_norm": 1.0193830552198533, "learning_rate": 4.945181924309157e-06, "loss": 0.5006, "step": 6673 }, { "epoch": 0.40641841488292785, "grad_norm": 1.0844356886310536, "learning_rate": 4.945165304515967e-06, "loss": 0.4078, "step": 6674 }, { "epoch": 0.40647931065980575, "grad_norm": 1.0345165481787382, "learning_rate": 4.945148682231693e-06, "loss": 0.4778, "step": 6675 }, { "epoch": 0.4065402064366836, "grad_norm": 1.004329960288904, "learning_rate": 4.9451320574563526e-06, "loss": 0.4706, "step": 6676 }, { "epoch": 0.4066011022135615, "grad_norm": 0.9520204509209097, "learning_rate": 4.945115430189963e-06, "loss": 0.4113, "step": 6677 }, { "epoch": 0.40666199799043934, "grad_norm": 0.94849453574712, "learning_rate": 4.945098800432539e-06, "loss": 0.5866, "step": 6678 }, { "epoch": 0.40672289376731724, "grad_norm": 1.0860889128365272, "learning_rate": 4.945082168184101e-06, "loss": 0.4677, "step": 6679 }, { "epoch": 0.4067837895441951, "grad_norm": 0.9869910608487658, "learning_rate": 4.945065533444664e-06, "loss": 0.5197, "step": 6680 }, { "epoch": 0.406844685321073, "grad_norm": 1.0089254919242165, "learning_rate": 4.945048896214245e-06, "loss": 0.4742, "step": 6681 }, { "epoch": 0.40690558109795083, "grad_norm": 1.0084734913950713, "learning_rate": 4.94503225649286e-06, "loss": 0.426, "step": 6682 }, { "epoch": 0.40696647687482873, "grad_norm": 1.0329446804307516, "learning_rate": 4.945015614280528e-06, "loss": 0.4366, "step": 6683 }, { "epoch": 0.40702737265170663, "grad_norm": 1.0046084576683327, "learning_rate": 4.944998969577264e-06, "loss": 0.4574, "step": 6684 }, { "epoch": 0.4070882684285845, "grad_norm": 1.0281620903084059, "learning_rate": 4.944982322383087e-06, "loss": 0.4489, "step": 6685 }, { "epoch": 0.4071491642054624, "grad_norm": 0.8914379802002593, "learning_rate": 4.944965672698012e-06, "loss": 0.4759, "step": 6686 }, { "epoch": 0.4072100599823402, "grad_norm": 1.0370100421389123, "learning_rate": 4.944949020522057e-06, "loss": 0.4885, "step": 6687 }, { "epoch": 0.4072709557592181, "grad_norm": 0.983616919601663, "learning_rate": 4.944932365855239e-06, "loss": 0.4582, "step": 6688 }, { "epoch": 0.40733185153609597, "grad_norm": 1.0435684997000871, "learning_rate": 4.9449157086975745e-06, "loss": 0.4575, "step": 6689 }, { "epoch": 0.40739274731297387, "grad_norm": 1.0865145845359823, "learning_rate": 4.9448990490490814e-06, "loss": 0.4582, "step": 6690 }, { "epoch": 0.4074536430898517, "grad_norm": 1.0742096092797522, "learning_rate": 4.944882386909775e-06, "loss": 0.4259, "step": 6691 }, { "epoch": 0.4075145388667296, "grad_norm": 0.930384760634701, "learning_rate": 4.944865722279674e-06, "loss": 0.4871, "step": 6692 }, { "epoch": 0.40757543464360746, "grad_norm": 1.001946029292173, "learning_rate": 4.9448490551587935e-06, "loss": 0.4468, "step": 6693 }, { "epoch": 0.40763633042048536, "grad_norm": 0.9554432733694705, "learning_rate": 4.9448323855471525e-06, "loss": 0.525, "step": 6694 }, { "epoch": 0.4076972261973632, "grad_norm": 1.0815075819730753, "learning_rate": 4.944815713444767e-06, "loss": 0.5249, "step": 6695 }, { "epoch": 0.4077581219742411, "grad_norm": 1.0293090460657082, "learning_rate": 4.944799038851654e-06, "loss": 0.397, "step": 6696 }, { "epoch": 0.40781901775111895, "grad_norm": 0.9303032125507049, "learning_rate": 4.944782361767831e-06, "loss": 0.4801, "step": 6697 }, { "epoch": 0.40787991352799685, "grad_norm": 1.0268888776749006, "learning_rate": 4.944765682193314e-06, "loss": 0.4628, "step": 6698 }, { "epoch": 0.4079408093048747, "grad_norm": 1.0188565943642405, "learning_rate": 4.944749000128121e-06, "loss": 0.4924, "step": 6699 }, { "epoch": 0.4080017050817526, "grad_norm": 1.020215858203693, "learning_rate": 4.944732315572268e-06, "loss": 0.4415, "step": 6700 }, { "epoch": 0.40806260085863044, "grad_norm": 0.9840132129306629, "learning_rate": 4.944715628525773e-06, "loss": 0.4504, "step": 6701 }, { "epoch": 0.40812349663550834, "grad_norm": 1.0068266591093407, "learning_rate": 4.944698938988652e-06, "loss": 0.4229, "step": 6702 }, { "epoch": 0.4081843924123862, "grad_norm": 0.952035795783395, "learning_rate": 4.944682246960924e-06, "loss": 0.5029, "step": 6703 }, { "epoch": 0.4082452881892641, "grad_norm": 0.9858671268977195, "learning_rate": 4.9446655524426025e-06, "loss": 0.4646, "step": 6704 }, { "epoch": 0.40830618396614193, "grad_norm": 0.923908915817451, "learning_rate": 4.944648855433708e-06, "loss": 0.4739, "step": 6705 }, { "epoch": 0.40836707974301983, "grad_norm": 1.1185319364167279, "learning_rate": 4.944632155934255e-06, "loss": 0.475, "step": 6706 }, { "epoch": 0.4084279755198977, "grad_norm": 1.036283228280885, "learning_rate": 4.944615453944262e-06, "loss": 0.4348, "step": 6707 }, { "epoch": 0.4084888712967756, "grad_norm": 1.1145052772211916, "learning_rate": 4.944598749463747e-06, "loss": 0.4593, "step": 6708 }, { "epoch": 0.4085497670736534, "grad_norm": 0.9485359611084758, "learning_rate": 4.944582042492724e-06, "loss": 0.4693, "step": 6709 }, { "epoch": 0.4086106628505313, "grad_norm": 1.0668005966206968, "learning_rate": 4.944565333031212e-06, "loss": 0.4394, "step": 6710 }, { "epoch": 0.40867155862740917, "grad_norm": 1.0222293631590265, "learning_rate": 4.944548621079228e-06, "loss": 0.4047, "step": 6711 }, { "epoch": 0.40873245440428707, "grad_norm": 0.9643308499331188, "learning_rate": 4.944531906636788e-06, "loss": 0.5091, "step": 6712 }, { "epoch": 0.4087933501811649, "grad_norm": 1.060432369802787, "learning_rate": 4.94451518970391e-06, "loss": 0.537, "step": 6713 }, { "epoch": 0.4088542459580428, "grad_norm": 0.928959985487889, "learning_rate": 4.944498470280611e-06, "loss": 0.5001, "step": 6714 }, { "epoch": 0.40891514173492066, "grad_norm": 1.0875724566304659, "learning_rate": 4.944481748366907e-06, "loss": 0.4202, "step": 6715 }, { "epoch": 0.40897603751179856, "grad_norm": 1.0836018331340507, "learning_rate": 4.944465023962817e-06, "loss": 0.4505, "step": 6716 }, { "epoch": 0.4090369332886764, "grad_norm": 0.8545310963817723, "learning_rate": 4.944448297068356e-06, "loss": 0.4559, "step": 6717 }, { "epoch": 0.4090978290655543, "grad_norm": 1.060080489617337, "learning_rate": 4.944431567683542e-06, "loss": 0.4538, "step": 6718 }, { "epoch": 0.40915872484243215, "grad_norm": 1.0103559856620417, "learning_rate": 4.944414835808392e-06, "loss": 0.4304, "step": 6719 }, { "epoch": 0.40921962061931005, "grad_norm": 1.002989135687286, "learning_rate": 4.944398101442924e-06, "loss": 0.4599, "step": 6720 }, { "epoch": 0.4092805163961879, "grad_norm": 1.0818826984455117, "learning_rate": 4.944381364587153e-06, "loss": 0.5167, "step": 6721 }, { "epoch": 0.4093414121730658, "grad_norm": 1.0876267794527799, "learning_rate": 4.944364625241097e-06, "loss": 0.4367, "step": 6722 }, { "epoch": 0.40940230794994364, "grad_norm": 1.0098676137137774, "learning_rate": 4.944347883404774e-06, "loss": 0.4264, "step": 6723 }, { "epoch": 0.40946320372682155, "grad_norm": 1.0568717965761136, "learning_rate": 4.944331139078199e-06, "loss": 0.4143, "step": 6724 }, { "epoch": 0.40952409950369945, "grad_norm": 1.0904362054010797, "learning_rate": 4.944314392261391e-06, "loss": 0.4755, "step": 6725 }, { "epoch": 0.4095849952805773, "grad_norm": 0.9696689941460934, "learning_rate": 4.944297642954366e-06, "loss": 0.4925, "step": 6726 }, { "epoch": 0.4096458910574552, "grad_norm": 1.0666180087146333, "learning_rate": 4.944280891157141e-06, "loss": 0.4656, "step": 6727 }, { "epoch": 0.40970678683433304, "grad_norm": 1.0098686920821747, "learning_rate": 4.944264136869735e-06, "loss": 0.5508, "step": 6728 }, { "epoch": 0.40976768261121094, "grad_norm": 1.0529423991921798, "learning_rate": 4.944247380092162e-06, "loss": 0.4087, "step": 6729 }, { "epoch": 0.4098285783880888, "grad_norm": 0.999879308634886, "learning_rate": 4.9442306208244414e-06, "loss": 0.4793, "step": 6730 }, { "epoch": 0.4098894741649667, "grad_norm": 0.9924622734971119, "learning_rate": 4.944213859066589e-06, "loss": 0.4379, "step": 6731 }, { "epoch": 0.40995036994184453, "grad_norm": 1.0217386197670701, "learning_rate": 4.9441970948186225e-06, "loss": 0.4708, "step": 6732 }, { "epoch": 0.41001126571872243, "grad_norm": 1.029719685727215, "learning_rate": 4.944180328080559e-06, "loss": 0.4419, "step": 6733 }, { "epoch": 0.4100721614956003, "grad_norm": 1.0478672141657515, "learning_rate": 4.944163558852415e-06, "loss": 0.4492, "step": 6734 }, { "epoch": 0.4101330572724782, "grad_norm": 1.1029547167888019, "learning_rate": 4.944146787134208e-06, "loss": 0.43, "step": 6735 }, { "epoch": 0.410193953049356, "grad_norm": 1.0169160993904964, "learning_rate": 4.944130012925956e-06, "loss": 0.4127, "step": 6736 }, { "epoch": 0.4102548488262339, "grad_norm": 0.9794905116700386, "learning_rate": 4.944113236227675e-06, "loss": 0.4116, "step": 6737 }, { "epoch": 0.41031574460311176, "grad_norm": 1.070837862594222, "learning_rate": 4.944096457039381e-06, "loss": 0.4608, "step": 6738 }, { "epoch": 0.41037664037998967, "grad_norm": 1.0050328706507772, "learning_rate": 4.944079675361093e-06, "loss": 0.468, "step": 6739 }, { "epoch": 0.4104375361568675, "grad_norm": 0.981759634667728, "learning_rate": 4.944062891192827e-06, "loss": 0.4507, "step": 6740 }, { "epoch": 0.4104984319337454, "grad_norm": 1.0380821496662191, "learning_rate": 4.944046104534601e-06, "loss": 0.525, "step": 6741 }, { "epoch": 0.41055932771062326, "grad_norm": 1.0823745979913144, "learning_rate": 4.944029315386432e-06, "loss": 0.4262, "step": 6742 }, { "epoch": 0.41062022348750116, "grad_norm": 1.0273293293480372, "learning_rate": 4.944012523748336e-06, "loss": 0.3945, "step": 6743 }, { "epoch": 0.410681119264379, "grad_norm": 1.0720322012909538, "learning_rate": 4.943995729620331e-06, "loss": 0.4662, "step": 6744 }, { "epoch": 0.4107420150412569, "grad_norm": 1.002043929288563, "learning_rate": 4.9439789330024345e-06, "loss": 0.4669, "step": 6745 }, { "epoch": 0.41080291081813475, "grad_norm": 1.0974571996476026, "learning_rate": 4.943962133894663e-06, "loss": 0.5056, "step": 6746 }, { "epoch": 0.41086380659501265, "grad_norm": 0.9691172780268781, "learning_rate": 4.943945332297033e-06, "loss": 0.4896, "step": 6747 }, { "epoch": 0.4109247023718905, "grad_norm": 1.0721424785622715, "learning_rate": 4.943928528209563e-06, "loss": 0.4711, "step": 6748 }, { "epoch": 0.4109855981487684, "grad_norm": 0.9851322487791622, "learning_rate": 4.943911721632269e-06, "loss": 0.4567, "step": 6749 }, { "epoch": 0.41104649392564624, "grad_norm": 0.9483531647279703, "learning_rate": 4.94389491256517e-06, "loss": 0.4561, "step": 6750 }, { "epoch": 0.41110738970252414, "grad_norm": 1.0018603564018513, "learning_rate": 4.943878101008279e-06, "loss": 0.4796, "step": 6751 }, { "epoch": 0.411168285479402, "grad_norm": 0.8903500295075826, "learning_rate": 4.943861286961618e-06, "loss": 0.49, "step": 6752 }, { "epoch": 0.4112291812562799, "grad_norm": 1.0532833745481989, "learning_rate": 4.943844470425201e-06, "loss": 0.4194, "step": 6753 }, { "epoch": 0.41129007703315773, "grad_norm": 0.9819534586629612, "learning_rate": 4.943827651399046e-06, "loss": 0.4155, "step": 6754 }, { "epoch": 0.41135097281003563, "grad_norm": 0.9947032617392229, "learning_rate": 4.94381082988317e-06, "loss": 0.402, "step": 6755 }, { "epoch": 0.4114118685869135, "grad_norm": 1.0198809970215537, "learning_rate": 4.9437940058775915e-06, "loss": 0.4541, "step": 6756 }, { "epoch": 0.4114727643637914, "grad_norm": 0.9197697888585517, "learning_rate": 4.943777179382326e-06, "loss": 0.441, "step": 6757 }, { "epoch": 0.4115336601406692, "grad_norm": 1.0174090870619086, "learning_rate": 4.94376035039739e-06, "loss": 0.408, "step": 6758 }, { "epoch": 0.4115945559175471, "grad_norm": 0.9534368364997908, "learning_rate": 4.943743518922803e-06, "loss": 0.5241, "step": 6759 }, { "epoch": 0.41165545169442497, "grad_norm": 0.9868723331644971, "learning_rate": 4.94372668495858e-06, "loss": 0.4207, "step": 6760 }, { "epoch": 0.41171634747130287, "grad_norm": 0.9477300980189315, "learning_rate": 4.94370984850474e-06, "loss": 0.4881, "step": 6761 }, { "epoch": 0.4117772432481807, "grad_norm": 0.998744230796042, "learning_rate": 4.943693009561299e-06, "loss": 0.4873, "step": 6762 }, { "epoch": 0.4118381390250586, "grad_norm": 0.9980137023069672, "learning_rate": 4.9436761681282735e-06, "loss": 0.422, "step": 6763 }, { "epoch": 0.41189903480193646, "grad_norm": 1.0557998313884067, "learning_rate": 4.9436593242056825e-06, "loss": 0.4402, "step": 6764 }, { "epoch": 0.41195993057881436, "grad_norm": 0.9786107629308654, "learning_rate": 4.943642477793541e-06, "loss": 0.4734, "step": 6765 }, { "epoch": 0.41202082635569226, "grad_norm": 1.0635073938989958, "learning_rate": 4.943625628891869e-06, "loss": 0.4209, "step": 6766 }, { "epoch": 0.4120817221325701, "grad_norm": 1.0447689119438484, "learning_rate": 4.9436087775006816e-06, "loss": 0.4064, "step": 6767 }, { "epoch": 0.412142617909448, "grad_norm": 0.9885624959591489, "learning_rate": 4.943591923619996e-06, "loss": 0.4632, "step": 6768 }, { "epoch": 0.41220351368632585, "grad_norm": 1.0321444833467972, "learning_rate": 4.94357506724983e-06, "loss": 0.448, "step": 6769 }, { "epoch": 0.41226440946320375, "grad_norm": 1.0567984266604329, "learning_rate": 4.9435582083902e-06, "loss": 0.4499, "step": 6770 }, { "epoch": 0.4123253052400816, "grad_norm": 1.0333362181084311, "learning_rate": 4.943541347041123e-06, "loss": 0.414, "step": 6771 }, { "epoch": 0.4123862010169595, "grad_norm": 1.0434836377333123, "learning_rate": 4.943524483202619e-06, "loss": 0.4301, "step": 6772 }, { "epoch": 0.41244709679383734, "grad_norm": 0.9670221315085933, "learning_rate": 4.943507616874701e-06, "loss": 0.47, "step": 6773 }, { "epoch": 0.41250799257071524, "grad_norm": 1.0166455582346872, "learning_rate": 4.943490748057389e-06, "loss": 0.4613, "step": 6774 }, { "epoch": 0.4125688883475931, "grad_norm": 0.9180969882589104, "learning_rate": 4.9434738767507e-06, "loss": 0.4868, "step": 6775 }, { "epoch": 0.412629784124471, "grad_norm": 0.9478919982040631, "learning_rate": 4.94345700295465e-06, "loss": 0.4407, "step": 6776 }, { "epoch": 0.41269067990134883, "grad_norm": 1.0460595085776514, "learning_rate": 4.943440126669257e-06, "loss": 0.4544, "step": 6777 }, { "epoch": 0.41275157567822673, "grad_norm": 0.9847697783376154, "learning_rate": 4.943423247894538e-06, "loss": 0.4867, "step": 6778 }, { "epoch": 0.4128124714551046, "grad_norm": 1.062858694834803, "learning_rate": 4.9434063666305106e-06, "loss": 0.461, "step": 6779 }, { "epoch": 0.4128733672319825, "grad_norm": 0.9821259254032865, "learning_rate": 4.943389482877191e-06, "loss": 0.4395, "step": 6780 }, { "epoch": 0.4129342630088603, "grad_norm": 1.1248902069158755, "learning_rate": 4.943372596634597e-06, "loss": 0.4419, "step": 6781 }, { "epoch": 0.4129951587857382, "grad_norm": 0.9042316403213582, "learning_rate": 4.943355707902746e-06, "loss": 0.4689, "step": 6782 }, { "epoch": 0.41305605456261607, "grad_norm": 0.9654124460514951, "learning_rate": 4.943338816681656e-06, "loss": 0.517, "step": 6783 }, { "epoch": 0.41311695033949397, "grad_norm": 1.1803449653262577, "learning_rate": 4.943321922971341e-06, "loss": 0.4234, "step": 6784 }, { "epoch": 0.4131778461163718, "grad_norm": 1.1612485629910922, "learning_rate": 4.9433050267718215e-06, "loss": 0.4124, "step": 6785 }, { "epoch": 0.4132387418932497, "grad_norm": 1.049631664908007, "learning_rate": 4.943288128083114e-06, "loss": 0.4193, "step": 6786 }, { "epoch": 0.41329963767012756, "grad_norm": 0.9914567144909291, "learning_rate": 4.943271226905235e-06, "loss": 0.4787, "step": 6787 }, { "epoch": 0.41336053344700546, "grad_norm": 1.0608861050554201, "learning_rate": 4.943254323238203e-06, "loss": 0.4741, "step": 6788 }, { "epoch": 0.4134214292238833, "grad_norm": 1.060794531458104, "learning_rate": 4.943237417082033e-06, "loss": 0.4717, "step": 6789 }, { "epoch": 0.4134823250007612, "grad_norm": 0.973501865922472, "learning_rate": 4.9432205084367435e-06, "loss": 0.4749, "step": 6790 }, { "epoch": 0.41354322077763905, "grad_norm": 1.0646456301480747, "learning_rate": 4.943203597302353e-06, "loss": 0.4778, "step": 6791 }, { "epoch": 0.41360411655451695, "grad_norm": 0.9572461253627881, "learning_rate": 4.943186683678877e-06, "loss": 0.4704, "step": 6792 }, { "epoch": 0.4136650123313948, "grad_norm": 1.1471744644673219, "learning_rate": 4.943169767566332e-06, "loss": 0.464, "step": 6793 }, { "epoch": 0.4137259081082727, "grad_norm": 1.0336118734251631, "learning_rate": 4.943152848964737e-06, "loss": 0.4629, "step": 6794 }, { "epoch": 0.41378680388515054, "grad_norm": 1.039273175867859, "learning_rate": 4.94313592787411e-06, "loss": 0.4621, "step": 6795 }, { "epoch": 0.41384769966202845, "grad_norm": 1.008522401387392, "learning_rate": 4.943119004294465e-06, "loss": 0.4359, "step": 6796 }, { "epoch": 0.4139085954389063, "grad_norm": 0.979222050372445, "learning_rate": 4.943102078225822e-06, "loss": 0.4275, "step": 6797 }, { "epoch": 0.4139694912157842, "grad_norm": 1.00341617713044, "learning_rate": 4.943085149668198e-06, "loss": 0.5462, "step": 6798 }, { "epoch": 0.41403038699266204, "grad_norm": 0.9552965251448519, "learning_rate": 4.9430682186216085e-06, "loss": 0.4704, "step": 6799 }, { "epoch": 0.41409128276953994, "grad_norm": 1.0681545147264424, "learning_rate": 4.943051285086073e-06, "loss": 0.3974, "step": 6800 }, { "epoch": 0.4141521785464178, "grad_norm": 0.9940082916902812, "learning_rate": 4.943034349061607e-06, "loss": 0.451, "step": 6801 }, { "epoch": 0.4142130743232957, "grad_norm": 1.0132671724440077, "learning_rate": 4.943017410548229e-06, "loss": 0.4883, "step": 6802 }, { "epoch": 0.4142739701001735, "grad_norm": 1.0009226612245565, "learning_rate": 4.943000469545955e-06, "loss": 0.4889, "step": 6803 }, { "epoch": 0.41433486587705143, "grad_norm": 1.0748639109544533, "learning_rate": 4.9429835260548036e-06, "loss": 0.4712, "step": 6804 }, { "epoch": 0.4143957616539293, "grad_norm": 0.9690104264775442, "learning_rate": 4.942966580074791e-06, "loss": 0.4734, "step": 6805 }, { "epoch": 0.4144566574308072, "grad_norm": 1.1059848947799513, "learning_rate": 4.942949631605934e-06, "loss": 0.4701, "step": 6806 }, { "epoch": 0.4145175532076851, "grad_norm": 0.952757488334652, "learning_rate": 4.942932680648252e-06, "loss": 0.4841, "step": 6807 }, { "epoch": 0.4145784489845629, "grad_norm": 1.0099677128591031, "learning_rate": 4.9429157272017596e-06, "loss": 0.5375, "step": 6808 }, { "epoch": 0.4146393447614408, "grad_norm": 0.9829177374626441, "learning_rate": 4.942898771266477e-06, "loss": 0.4869, "step": 6809 }, { "epoch": 0.41470024053831867, "grad_norm": 1.057961497046725, "learning_rate": 4.9428818128424185e-06, "loss": 0.4214, "step": 6810 }, { "epoch": 0.41476113631519657, "grad_norm": 0.9912656397236677, "learning_rate": 4.942864851929604e-06, "loss": 0.46, "step": 6811 }, { "epoch": 0.4148220320920744, "grad_norm": 1.028369446250695, "learning_rate": 4.942847888528048e-06, "loss": 0.4767, "step": 6812 }, { "epoch": 0.4148829278689523, "grad_norm": 0.9862082543533841, "learning_rate": 4.942830922637771e-06, "loss": 0.4169, "step": 6813 }, { "epoch": 0.41494382364583016, "grad_norm": 1.1074348089556743, "learning_rate": 4.942813954258788e-06, "loss": 0.3985, "step": 6814 }, { "epoch": 0.41500471942270806, "grad_norm": 1.0161963103919882, "learning_rate": 4.942796983391117e-06, "loss": 0.4254, "step": 6815 }, { "epoch": 0.4150656151995859, "grad_norm": 1.0244669601342575, "learning_rate": 4.942780010034775e-06, "loss": 0.4026, "step": 6816 }, { "epoch": 0.4151265109764638, "grad_norm": 1.0917856802311112, "learning_rate": 4.94276303418978e-06, "loss": 0.4105, "step": 6817 }, { "epoch": 0.41518740675334165, "grad_norm": 1.052161868649712, "learning_rate": 4.942746055856149e-06, "loss": 0.4321, "step": 6818 }, { "epoch": 0.41524830253021955, "grad_norm": 0.9107145635290448, "learning_rate": 4.942729075033899e-06, "loss": 0.4737, "step": 6819 }, { "epoch": 0.4153091983070974, "grad_norm": 1.039918216975511, "learning_rate": 4.942712091723047e-06, "loss": 0.4918, "step": 6820 }, { "epoch": 0.4153700940839753, "grad_norm": 1.0525197649763014, "learning_rate": 4.94269510592361e-06, "loss": 0.5322, "step": 6821 }, { "epoch": 0.41543098986085314, "grad_norm": 0.9659388209789216, "learning_rate": 4.942678117635606e-06, "loss": 0.4363, "step": 6822 }, { "epoch": 0.41549188563773104, "grad_norm": 1.0603329134559794, "learning_rate": 4.942661126859054e-06, "loss": 0.4179, "step": 6823 }, { "epoch": 0.4155527814146089, "grad_norm": 1.0984792136856611, "learning_rate": 4.942644133593968e-06, "loss": 0.4103, "step": 6824 }, { "epoch": 0.4156136771914868, "grad_norm": 1.0759904219541907, "learning_rate": 4.942627137840368e-06, "loss": 0.4679, "step": 6825 }, { "epoch": 0.41567457296836463, "grad_norm": 1.0273048519290848, "learning_rate": 4.94261013959827e-06, "loss": 0.4636, "step": 6826 }, { "epoch": 0.41573546874524253, "grad_norm": 1.162891098539938, "learning_rate": 4.9425931388676914e-06, "loss": 0.3908, "step": 6827 }, { "epoch": 0.4157963645221204, "grad_norm": 1.0041962584079447, "learning_rate": 4.9425761356486505e-06, "loss": 0.4085, "step": 6828 }, { "epoch": 0.4158572602989983, "grad_norm": 0.9740106675948086, "learning_rate": 4.942559129941162e-06, "loss": 0.51, "step": 6829 }, { "epoch": 0.4159181560758761, "grad_norm": 1.0861354330503674, "learning_rate": 4.942542121745246e-06, "loss": 0.426, "step": 6830 }, { "epoch": 0.415979051852754, "grad_norm": 0.9454357756083693, "learning_rate": 4.942525111060919e-06, "loss": 0.5018, "step": 6831 }, { "epoch": 0.41603994762963187, "grad_norm": 0.9722476886051543, "learning_rate": 4.9425080978881985e-06, "loss": 0.4901, "step": 6832 }, { "epoch": 0.41610084340650977, "grad_norm": 0.8960286819501887, "learning_rate": 4.942491082227101e-06, "loss": 0.4933, "step": 6833 }, { "epoch": 0.4161617391833876, "grad_norm": 1.0008648144708634, "learning_rate": 4.942474064077645e-06, "loss": 0.4902, "step": 6834 }, { "epoch": 0.4162226349602655, "grad_norm": 0.9464946889453728, "learning_rate": 4.942457043439847e-06, "loss": 0.4705, "step": 6835 }, { "epoch": 0.41628353073714336, "grad_norm": 0.9656297672802567, "learning_rate": 4.9424400203137234e-06, "loss": 0.5228, "step": 6836 }, { "epoch": 0.41634442651402126, "grad_norm": 0.9919261750370889, "learning_rate": 4.942422994699294e-06, "loss": 0.4433, "step": 6837 }, { "epoch": 0.4164053222908991, "grad_norm": 0.9873219164221029, "learning_rate": 4.942405966596575e-06, "loss": 0.476, "step": 6838 }, { "epoch": 0.416466218067777, "grad_norm": 0.987957985416052, "learning_rate": 4.942388936005583e-06, "loss": 0.4979, "step": 6839 }, { "epoch": 0.41652711384465485, "grad_norm": 1.0404752038064775, "learning_rate": 4.942371902926336e-06, "loss": 0.5145, "step": 6840 }, { "epoch": 0.41658800962153275, "grad_norm": 1.0538313397133223, "learning_rate": 4.942354867358851e-06, "loss": 0.517, "step": 6841 }, { "epoch": 0.4166489053984106, "grad_norm": 1.0383410983474008, "learning_rate": 4.9423378293031455e-06, "loss": 0.3913, "step": 6842 }, { "epoch": 0.4167098011752885, "grad_norm": 1.123362470464405, "learning_rate": 4.942320788759238e-06, "loss": 0.4412, "step": 6843 }, { "epoch": 0.41677069695216634, "grad_norm": 1.0249139330998889, "learning_rate": 4.9423037457271435e-06, "loss": 0.4932, "step": 6844 }, { "epoch": 0.41683159272904424, "grad_norm": 1.0656893583718048, "learning_rate": 4.942286700206882e-06, "loss": 0.4442, "step": 6845 }, { "epoch": 0.4168924885059221, "grad_norm": 1.1644674614728867, "learning_rate": 4.942269652198469e-06, "loss": 0.4233, "step": 6846 }, { "epoch": 0.4169533842828, "grad_norm": 0.9538522630605323, "learning_rate": 4.942252601701922e-06, "loss": 0.5007, "step": 6847 }, { "epoch": 0.4170142800596779, "grad_norm": 1.0962449387433535, "learning_rate": 4.9422355487172595e-06, "loss": 0.4241, "step": 6848 }, { "epoch": 0.41707517583655573, "grad_norm": 0.9817802894654833, "learning_rate": 4.9422184932444985e-06, "loss": 0.456, "step": 6849 }, { "epoch": 0.41713607161343363, "grad_norm": 0.9000607449586668, "learning_rate": 4.942201435283655e-06, "loss": 0.5276, "step": 6850 }, { "epoch": 0.4171969673903115, "grad_norm": 1.0604764998010032, "learning_rate": 4.942184374834748e-06, "loss": 0.4852, "step": 6851 }, { "epoch": 0.4172578631671894, "grad_norm": 0.9968369625823899, "learning_rate": 4.942167311897794e-06, "loss": 0.4403, "step": 6852 }, { "epoch": 0.4173187589440672, "grad_norm": 1.0704684099916066, "learning_rate": 4.942150246472811e-06, "loss": 0.4229, "step": 6853 }, { "epoch": 0.4173796547209451, "grad_norm": 0.9809211314189279, "learning_rate": 4.942133178559817e-06, "loss": 0.433, "step": 6854 }, { "epoch": 0.41744055049782297, "grad_norm": 0.9346671139997385, "learning_rate": 4.942116108158827e-06, "loss": 0.5152, "step": 6855 }, { "epoch": 0.41750144627470087, "grad_norm": 1.045172133262651, "learning_rate": 4.942099035269861e-06, "loss": 0.4717, "step": 6856 }, { "epoch": 0.4175623420515787, "grad_norm": 1.0250211935850575, "learning_rate": 4.9420819598929345e-06, "loss": 0.4345, "step": 6857 }, { "epoch": 0.4176232378284566, "grad_norm": 1.0511953046137035, "learning_rate": 4.942064882028066e-06, "loss": 0.35, "step": 6858 }, { "epoch": 0.41768413360533446, "grad_norm": 1.062879784129, "learning_rate": 4.942047801675273e-06, "loss": 0.4061, "step": 6859 }, { "epoch": 0.41774502938221236, "grad_norm": 1.0613837309883836, "learning_rate": 4.9420307188345715e-06, "loss": 0.4147, "step": 6860 }, { "epoch": 0.4178059251590902, "grad_norm": 1.0941500369068267, "learning_rate": 4.942013633505981e-06, "loss": 0.4336, "step": 6861 }, { "epoch": 0.4178668209359681, "grad_norm": 0.974718384931401, "learning_rate": 4.941996545689517e-06, "loss": 0.5506, "step": 6862 }, { "epoch": 0.41792771671284595, "grad_norm": 1.063171870687541, "learning_rate": 4.941979455385198e-06, "loss": 0.4875, "step": 6863 }, { "epoch": 0.41798861248972385, "grad_norm": 0.9846422099923258, "learning_rate": 4.9419623625930416e-06, "loss": 0.4058, "step": 6864 }, { "epoch": 0.4180495082666017, "grad_norm": 1.0885060770262147, "learning_rate": 4.9419452673130634e-06, "loss": 0.4565, "step": 6865 }, { "epoch": 0.4181104040434796, "grad_norm": 1.0423094319467916, "learning_rate": 4.9419281695452835e-06, "loss": 0.4278, "step": 6866 }, { "epoch": 0.41817129982035744, "grad_norm": 1.0407434778162934, "learning_rate": 4.941911069289717e-06, "loss": 0.503, "step": 6867 }, { "epoch": 0.41823219559723535, "grad_norm": 0.9965874931382847, "learning_rate": 4.941893966546382e-06, "loss": 0.5091, "step": 6868 }, { "epoch": 0.4182930913741132, "grad_norm": 0.9963306773458843, "learning_rate": 4.941876861315297e-06, "loss": 0.5021, "step": 6869 }, { "epoch": 0.4183539871509911, "grad_norm": 1.0250793043329531, "learning_rate": 4.941859753596478e-06, "loss": 0.4978, "step": 6870 }, { "epoch": 0.41841488292786894, "grad_norm": 1.0078232991129537, "learning_rate": 4.941842643389944e-06, "loss": 0.4764, "step": 6871 }, { "epoch": 0.41847577870474684, "grad_norm": 1.0162420226564404, "learning_rate": 4.9418255306957104e-06, "loss": 0.4625, "step": 6872 }, { "epoch": 0.4185366744816247, "grad_norm": 0.9460217016201417, "learning_rate": 4.941808415513797e-06, "loss": 0.524, "step": 6873 }, { "epoch": 0.4185975702585026, "grad_norm": 1.0549058703695566, "learning_rate": 4.941791297844219e-06, "loss": 0.4452, "step": 6874 }, { "epoch": 0.4186584660353804, "grad_norm": 1.2020984150361766, "learning_rate": 4.941774177686995e-06, "loss": 0.4219, "step": 6875 }, { "epoch": 0.41871936181225833, "grad_norm": 1.0007596780315, "learning_rate": 4.941757055042142e-06, "loss": 0.503, "step": 6876 }, { "epoch": 0.4187802575891362, "grad_norm": 1.1181984840185075, "learning_rate": 4.941739929909678e-06, "loss": 0.4766, "step": 6877 }, { "epoch": 0.4188411533660141, "grad_norm": 0.912080109012682, "learning_rate": 4.94172280228962e-06, "loss": 0.4962, "step": 6878 }, { "epoch": 0.4189020491428919, "grad_norm": 0.9655020385409282, "learning_rate": 4.941705672181986e-06, "loss": 0.4699, "step": 6879 }, { "epoch": 0.4189629449197698, "grad_norm": 0.9687992523380886, "learning_rate": 4.941688539586792e-06, "loss": 0.5059, "step": 6880 }, { "epoch": 0.41902384069664766, "grad_norm": 1.0538788123897214, "learning_rate": 4.941671404504057e-06, "loss": 0.419, "step": 6881 }, { "epoch": 0.41908473647352557, "grad_norm": 1.0622840658163935, "learning_rate": 4.941654266933798e-06, "loss": 0.5385, "step": 6882 }, { "epoch": 0.4191456322504034, "grad_norm": 1.0499347185917822, "learning_rate": 4.9416371268760324e-06, "loss": 0.4334, "step": 6883 }, { "epoch": 0.4192065280272813, "grad_norm": 0.9712858806753586, "learning_rate": 4.941619984330778e-06, "loss": 0.5458, "step": 6884 }, { "epoch": 0.41926742380415916, "grad_norm": 0.9886011600022616, "learning_rate": 4.941602839298052e-06, "loss": 0.4035, "step": 6885 }, { "epoch": 0.41932831958103706, "grad_norm": 1.0383942830119033, "learning_rate": 4.941585691777871e-06, "loss": 0.5407, "step": 6886 }, { "epoch": 0.4193892153579149, "grad_norm": 0.9732462017570155, "learning_rate": 4.941568541770254e-06, "loss": 0.4953, "step": 6887 }, { "epoch": 0.4194501111347928, "grad_norm": 1.0264314182407677, "learning_rate": 4.941551389275217e-06, "loss": 0.5197, "step": 6888 }, { "epoch": 0.4195110069116707, "grad_norm": 1.1401794132365943, "learning_rate": 4.941534234292779e-06, "loss": 0.499, "step": 6889 }, { "epoch": 0.41957190268854855, "grad_norm": 1.106085455370546, "learning_rate": 4.941517076822956e-06, "loss": 0.4266, "step": 6890 }, { "epoch": 0.41963279846542645, "grad_norm": 1.0667242305949913, "learning_rate": 4.941499916865766e-06, "loss": 0.4282, "step": 6891 }, { "epoch": 0.4196936942423043, "grad_norm": 0.9990849518386831, "learning_rate": 4.941482754421228e-06, "loss": 0.4242, "step": 6892 }, { "epoch": 0.4197545900191822, "grad_norm": 1.105274589788735, "learning_rate": 4.941465589489357e-06, "loss": 0.3902, "step": 6893 }, { "epoch": 0.41981548579606004, "grad_norm": 0.9879110956221834, "learning_rate": 4.941448422070172e-06, "loss": 0.4571, "step": 6894 }, { "epoch": 0.41987638157293794, "grad_norm": 1.0690678568181395, "learning_rate": 4.94143125216369e-06, "loss": 0.3889, "step": 6895 }, { "epoch": 0.4199372773498158, "grad_norm": 1.023216283616811, "learning_rate": 4.941414079769928e-06, "loss": 0.4716, "step": 6896 }, { "epoch": 0.4199981731266937, "grad_norm": 1.0209390256409066, "learning_rate": 4.941396904888904e-06, "loss": 0.4626, "step": 6897 }, { "epoch": 0.42005906890357153, "grad_norm": 1.0705466584749161, "learning_rate": 4.941379727520637e-06, "loss": 0.42, "step": 6898 }, { "epoch": 0.42011996468044943, "grad_norm": 1.067935802391655, "learning_rate": 4.941362547665142e-06, "loss": 0.4499, "step": 6899 }, { "epoch": 0.4201808604573273, "grad_norm": 1.127342237934069, "learning_rate": 4.941345365322438e-06, "loss": 0.4114, "step": 6900 }, { "epoch": 0.4202417562342052, "grad_norm": 1.094638635822157, "learning_rate": 4.941328180492542e-06, "loss": 0.47, "step": 6901 }, { "epoch": 0.420302652011083, "grad_norm": 1.0499743193556885, "learning_rate": 4.941310993175472e-06, "loss": 0.4324, "step": 6902 }, { "epoch": 0.4203635477879609, "grad_norm": 1.1380361237811498, "learning_rate": 4.9412938033712445e-06, "loss": 0.458, "step": 6903 }, { "epoch": 0.42042444356483877, "grad_norm": 1.0659975519755147, "learning_rate": 4.941276611079878e-06, "loss": 0.4193, "step": 6904 }, { "epoch": 0.42048533934171667, "grad_norm": 1.051110424902886, "learning_rate": 4.941259416301389e-06, "loss": 0.4484, "step": 6905 }, { "epoch": 0.4205462351185945, "grad_norm": 1.0082776263554576, "learning_rate": 4.941242219035797e-06, "loss": 0.4079, "step": 6906 }, { "epoch": 0.4206071308954724, "grad_norm": 0.9628877538210004, "learning_rate": 4.941225019283117e-06, "loss": 0.5375, "step": 6907 }, { "epoch": 0.42066802667235026, "grad_norm": 0.9846569445600125, "learning_rate": 4.941207817043367e-06, "loss": 0.4459, "step": 6908 }, { "epoch": 0.42072892244922816, "grad_norm": 0.9875306726484664, "learning_rate": 4.941190612316567e-06, "loss": 0.4539, "step": 6909 }, { "epoch": 0.420789818226106, "grad_norm": 0.9892958157818199, "learning_rate": 4.941173405102732e-06, "loss": 0.5263, "step": 6910 }, { "epoch": 0.4208507140029839, "grad_norm": 1.0099534363314326, "learning_rate": 4.9411561954018805e-06, "loss": 0.4468, "step": 6911 }, { "epoch": 0.42091160977986175, "grad_norm": 0.9642688490727765, "learning_rate": 4.94113898321403e-06, "loss": 0.3988, "step": 6912 }, { "epoch": 0.42097250555673965, "grad_norm": 1.0131563727076405, "learning_rate": 4.941121768539197e-06, "loss": 0.3994, "step": 6913 }, { "epoch": 0.4210334013336175, "grad_norm": 1.0816180077408069, "learning_rate": 4.941104551377401e-06, "loss": 0.4549, "step": 6914 }, { "epoch": 0.4210942971104954, "grad_norm": 1.0201244990317855, "learning_rate": 4.9410873317286574e-06, "loss": 0.445, "step": 6915 }, { "epoch": 0.42115519288737324, "grad_norm": 0.9551500950919936, "learning_rate": 4.941070109592986e-06, "loss": 0.4816, "step": 6916 }, { "epoch": 0.42121608866425114, "grad_norm": 1.0950605847046275, "learning_rate": 4.941052884970401e-06, "loss": 0.4703, "step": 6917 }, { "epoch": 0.421276984441129, "grad_norm": 0.9294230563678175, "learning_rate": 4.941035657860924e-06, "loss": 0.4971, "step": 6918 }, { "epoch": 0.4213378802180069, "grad_norm": 0.9465827844722455, "learning_rate": 4.94101842826457e-06, "loss": 0.4762, "step": 6919 }, { "epoch": 0.42139877599488473, "grad_norm": 1.0101937414633266, "learning_rate": 4.941001196181358e-06, "loss": 0.5217, "step": 6920 }, { "epoch": 0.42145967177176263, "grad_norm": 0.9511398627331545, "learning_rate": 4.940983961611303e-06, "loss": 0.5055, "step": 6921 }, { "epoch": 0.4215205675486405, "grad_norm": 1.0522016635259785, "learning_rate": 4.940966724554426e-06, "loss": 0.4067, "step": 6922 }, { "epoch": 0.4215814633255184, "grad_norm": 1.011642882444406, "learning_rate": 4.9409494850107415e-06, "loss": 0.4376, "step": 6923 }, { "epoch": 0.4216423591023962, "grad_norm": 1.1592787021280624, "learning_rate": 4.940932242980269e-06, "loss": 0.4919, "step": 6924 }, { "epoch": 0.4217032548792741, "grad_norm": 1.0414739667384096, "learning_rate": 4.9409149984630255e-06, "loss": 0.4616, "step": 6925 }, { "epoch": 0.42176415065615197, "grad_norm": 1.0225456730650626, "learning_rate": 4.940897751459029e-06, "loss": 0.4917, "step": 6926 }, { "epoch": 0.42182504643302987, "grad_norm": 1.0549180102049043, "learning_rate": 4.940880501968296e-06, "loss": 0.4267, "step": 6927 }, { "epoch": 0.4218859422099077, "grad_norm": 0.9937189901096996, "learning_rate": 4.940863249990844e-06, "loss": 0.5189, "step": 6928 }, { "epoch": 0.4219468379867856, "grad_norm": 0.933368787936607, "learning_rate": 4.940845995526692e-06, "loss": 0.5361, "step": 6929 }, { "epoch": 0.4220077337636635, "grad_norm": 0.9688773327842239, "learning_rate": 4.940828738575857e-06, "loss": 0.4893, "step": 6930 }, { "epoch": 0.42206862954054136, "grad_norm": 0.9901015755147361, "learning_rate": 4.940811479138357e-06, "loss": 0.456, "step": 6931 }, { "epoch": 0.42212952531741926, "grad_norm": 0.9130600547781128, "learning_rate": 4.9407942172142086e-06, "loss": 0.4943, "step": 6932 }, { "epoch": 0.4221904210942971, "grad_norm": 1.0213028245959568, "learning_rate": 4.940776952803429e-06, "loss": 0.444, "step": 6933 }, { "epoch": 0.422251316871175, "grad_norm": 1.0325883463393994, "learning_rate": 4.940759685906037e-06, "loss": 0.4367, "step": 6934 }, { "epoch": 0.42231221264805285, "grad_norm": 1.0502623858798636, "learning_rate": 4.94074241652205e-06, "loss": 0.4313, "step": 6935 }, { "epoch": 0.42237310842493075, "grad_norm": 1.0103354402243365, "learning_rate": 4.940725144651486e-06, "loss": 0.46, "step": 6936 }, { "epoch": 0.4224340042018086, "grad_norm": 1.1111605491441987, "learning_rate": 4.940707870294361e-06, "loss": 0.4144, "step": 6937 }, { "epoch": 0.4224948999786865, "grad_norm": 0.9398467260123436, "learning_rate": 4.940690593450694e-06, "loss": 0.4687, "step": 6938 }, { "epoch": 0.42255579575556435, "grad_norm": 1.0778569928292203, "learning_rate": 4.940673314120502e-06, "loss": 0.4953, "step": 6939 }, { "epoch": 0.42261669153244225, "grad_norm": 1.0190092085487956, "learning_rate": 4.9406560323038025e-06, "loss": 0.4664, "step": 6940 }, { "epoch": 0.4226775873093201, "grad_norm": 1.0850365984810622, "learning_rate": 4.940638748000613e-06, "loss": 0.3761, "step": 6941 }, { "epoch": 0.422738483086198, "grad_norm": 1.0182462937170973, "learning_rate": 4.940621461210952e-06, "loss": 0.4922, "step": 6942 }, { "epoch": 0.42279937886307584, "grad_norm": 1.036285186292872, "learning_rate": 4.940604171934838e-06, "loss": 0.473, "step": 6943 }, { "epoch": 0.42286027463995374, "grad_norm": 0.9966408759446921, "learning_rate": 4.940586880172285e-06, "loss": 0.4845, "step": 6944 }, { "epoch": 0.4229211704168316, "grad_norm": 1.1042813444911208, "learning_rate": 4.940569585923314e-06, "loss": 0.4452, "step": 6945 }, { "epoch": 0.4229820661937095, "grad_norm": 0.9462982840365924, "learning_rate": 4.940552289187941e-06, "loss": 0.471, "step": 6946 }, { "epoch": 0.42304296197058733, "grad_norm": 1.0263902240352818, "learning_rate": 4.940534989966184e-06, "loss": 0.4618, "step": 6947 }, { "epoch": 0.42310385774746523, "grad_norm": 1.0240783833293283, "learning_rate": 4.9405176882580615e-06, "loss": 0.4582, "step": 6948 }, { "epoch": 0.4231647535243431, "grad_norm": 0.9655430895600866, "learning_rate": 4.94050038406359e-06, "loss": 0.4545, "step": 6949 }, { "epoch": 0.423225649301221, "grad_norm": 1.0040303291766122, "learning_rate": 4.940483077382788e-06, "loss": 0.5301, "step": 6950 }, { "epoch": 0.4232865450780988, "grad_norm": 1.023317926634678, "learning_rate": 4.9404657682156706e-06, "loss": 0.5076, "step": 6951 }, { "epoch": 0.4233474408549767, "grad_norm": 0.939752325179063, "learning_rate": 4.940448456562259e-06, "loss": 0.4691, "step": 6952 }, { "epoch": 0.42340833663185456, "grad_norm": 0.9526210286356107, "learning_rate": 4.940431142422569e-06, "loss": 0.4963, "step": 6953 }, { "epoch": 0.42346923240873247, "grad_norm": 1.016852620299676, "learning_rate": 4.940413825796618e-06, "loss": 0.46, "step": 6954 }, { "epoch": 0.4235301281856103, "grad_norm": 1.0694550369532958, "learning_rate": 4.940396506684425e-06, "loss": 0.4286, "step": 6955 }, { "epoch": 0.4235910239624882, "grad_norm": 0.9734987843897014, "learning_rate": 4.940379185086006e-06, "loss": 0.4699, "step": 6956 }, { "epoch": 0.42365191973936606, "grad_norm": 0.9699360162739222, "learning_rate": 4.94036186100138e-06, "loss": 0.4282, "step": 6957 }, { "epoch": 0.42371281551624396, "grad_norm": 0.9924920003044363, "learning_rate": 4.940344534430564e-06, "loss": 0.4784, "step": 6958 }, { "epoch": 0.4237737112931218, "grad_norm": 1.0577513434377066, "learning_rate": 4.940327205373575e-06, "loss": 0.427, "step": 6959 }, { "epoch": 0.4238346070699997, "grad_norm": 1.053350188417864, "learning_rate": 4.9403098738304325e-06, "loss": 0.4944, "step": 6960 }, { "epoch": 0.42389550284687755, "grad_norm": 1.0888298051671244, "learning_rate": 4.940292539801152e-06, "loss": 0.4796, "step": 6961 }, { "epoch": 0.42395639862375545, "grad_norm": 1.0266124968848056, "learning_rate": 4.940275203285752e-06, "loss": 0.4347, "step": 6962 }, { "epoch": 0.4240172944006333, "grad_norm": 1.0185203262010492, "learning_rate": 4.940257864284251e-06, "loss": 0.4149, "step": 6963 }, { "epoch": 0.4240781901775112, "grad_norm": 1.0345933575748654, "learning_rate": 4.940240522796667e-06, "loss": 0.4982, "step": 6964 }, { "epoch": 0.42413908595438904, "grad_norm": 1.0865316898672372, "learning_rate": 4.940223178823015e-06, "loss": 0.4263, "step": 6965 }, { "epoch": 0.42419998173126694, "grad_norm": 1.0813336133565872, "learning_rate": 4.940205832363315e-06, "loss": 0.4009, "step": 6966 }, { "epoch": 0.4242608775081448, "grad_norm": 1.1460867232664789, "learning_rate": 4.940188483417584e-06, "loss": 0.4624, "step": 6967 }, { "epoch": 0.4243217732850227, "grad_norm": 1.0807137576078003, "learning_rate": 4.940171131985841e-06, "loss": 0.3855, "step": 6968 }, { "epoch": 0.42438266906190053, "grad_norm": 1.037062783983756, "learning_rate": 4.9401537780681e-06, "loss": 0.4516, "step": 6969 }, { "epoch": 0.42444356483877843, "grad_norm": 0.9823518976166427, "learning_rate": 4.940136421664382e-06, "loss": 0.4968, "step": 6970 }, { "epoch": 0.42450446061565633, "grad_norm": 1.1052043429512128, "learning_rate": 4.940119062774704e-06, "loss": 0.4663, "step": 6971 }, { "epoch": 0.4245653563925342, "grad_norm": 1.0081892111525674, "learning_rate": 4.9401017013990835e-06, "loss": 0.4321, "step": 6972 }, { "epoch": 0.4246262521694121, "grad_norm": 1.05698260687011, "learning_rate": 4.940084337537539e-06, "loss": 0.4531, "step": 6973 }, { "epoch": 0.4246871479462899, "grad_norm": 1.0508671002882937, "learning_rate": 4.940066971190086e-06, "loss": 0.4412, "step": 6974 }, { "epoch": 0.4247480437231678, "grad_norm": 1.0355220017765956, "learning_rate": 4.9400496023567435e-06, "loss": 0.4368, "step": 6975 }, { "epoch": 0.42480893950004567, "grad_norm": 0.9359909007787997, "learning_rate": 4.94003223103753e-06, "loss": 0.5068, "step": 6976 }, { "epoch": 0.42486983527692357, "grad_norm": 1.0488866243554347, "learning_rate": 4.9400148572324616e-06, "loss": 0.4409, "step": 6977 }, { "epoch": 0.4249307310538014, "grad_norm": 1.1119019771236567, "learning_rate": 4.939997480941557e-06, "loss": 0.4512, "step": 6978 }, { "epoch": 0.4249916268306793, "grad_norm": 1.0475749867905801, "learning_rate": 4.9399801021648345e-06, "loss": 0.4189, "step": 6979 }, { "epoch": 0.42505252260755716, "grad_norm": 1.0425743986077347, "learning_rate": 4.93996272090231e-06, "loss": 0.4665, "step": 6980 }, { "epoch": 0.42511341838443506, "grad_norm": 1.036238305388462, "learning_rate": 4.939945337154002e-06, "loss": 0.4562, "step": 6981 }, { "epoch": 0.4251743141613129, "grad_norm": 1.0276347560595651, "learning_rate": 4.939927950919929e-06, "loss": 0.445, "step": 6982 }, { "epoch": 0.4252352099381908, "grad_norm": 0.9397353233528682, "learning_rate": 4.939910562200109e-06, "loss": 0.432, "step": 6983 }, { "epoch": 0.42529610571506865, "grad_norm": 0.9975536852744393, "learning_rate": 4.939893170994558e-06, "loss": 0.4674, "step": 6984 }, { "epoch": 0.42535700149194655, "grad_norm": 0.9982259401448434, "learning_rate": 4.939875777303293e-06, "loss": 0.4532, "step": 6985 }, { "epoch": 0.4254178972688244, "grad_norm": 1.0871870478115369, "learning_rate": 4.939858381126335e-06, "loss": 0.4633, "step": 6986 }, { "epoch": 0.4254787930457023, "grad_norm": 1.031815737756965, "learning_rate": 4.939840982463701e-06, "loss": 0.4845, "step": 6987 }, { "epoch": 0.42553968882258014, "grad_norm": 0.9954404292900767, "learning_rate": 4.939823581315406e-06, "loss": 0.4531, "step": 6988 }, { "epoch": 0.42560058459945804, "grad_norm": 0.9461123495623435, "learning_rate": 4.93980617768147e-06, "loss": 0.4805, "step": 6989 }, { "epoch": 0.4256614803763359, "grad_norm": 1.061790910746563, "learning_rate": 4.9397887715619095e-06, "loss": 0.4138, "step": 6990 }, { "epoch": 0.4257223761532138, "grad_norm": 1.0331710756406984, "learning_rate": 4.939771362956744e-06, "loss": 0.4417, "step": 6991 }, { "epoch": 0.42578327193009163, "grad_norm": 0.9834644011403237, "learning_rate": 4.93975395186599e-06, "loss": 0.5487, "step": 6992 }, { "epoch": 0.42584416770696953, "grad_norm": 1.0399382430146316, "learning_rate": 4.939736538289665e-06, "loss": 0.4075, "step": 6993 }, { "epoch": 0.4259050634838474, "grad_norm": 1.0008168972534452, "learning_rate": 4.9397191222277875e-06, "loss": 0.4554, "step": 6994 }, { "epoch": 0.4259659592607253, "grad_norm": 1.0289114825750996, "learning_rate": 4.939701703680374e-06, "loss": 0.4397, "step": 6995 }, { "epoch": 0.4260268550376031, "grad_norm": 1.0160030388953158, "learning_rate": 4.939684282647444e-06, "loss": 0.4397, "step": 6996 }, { "epoch": 0.426087750814481, "grad_norm": 0.9449815308929997, "learning_rate": 4.939666859129015e-06, "loss": 0.5058, "step": 6997 }, { "epoch": 0.42614864659135887, "grad_norm": 0.9928772780151173, "learning_rate": 4.939649433125102e-06, "loss": 0.4974, "step": 6998 }, { "epoch": 0.42620954236823677, "grad_norm": 1.0461101072063617, "learning_rate": 4.939632004635727e-06, "loss": 0.4282, "step": 6999 }, { "epoch": 0.4262704381451146, "grad_norm": 1.1414301276456822, "learning_rate": 4.939614573660905e-06, "loss": 0.3853, "step": 7000 }, { "epoch": 0.4263313339219925, "grad_norm": 1.0132488989882424, "learning_rate": 4.939597140200654e-06, "loss": 0.4472, "step": 7001 }, { "epoch": 0.42639222969887036, "grad_norm": 1.0871401730293724, "learning_rate": 4.939579704254992e-06, "loss": 0.4561, "step": 7002 }, { "epoch": 0.42645312547574826, "grad_norm": 0.9156798432951566, "learning_rate": 4.939562265823938e-06, "loss": 0.4602, "step": 7003 }, { "epoch": 0.4265140212526261, "grad_norm": 1.007524054944773, "learning_rate": 4.9395448249075076e-06, "loss": 0.462, "step": 7004 }, { "epoch": 0.426574917029504, "grad_norm": 0.9845664493797267, "learning_rate": 4.93952738150572e-06, "loss": 0.4913, "step": 7005 }, { "epoch": 0.42663581280638185, "grad_norm": 1.010788130061817, "learning_rate": 4.9395099356185924e-06, "loss": 0.4069, "step": 7006 }, { "epoch": 0.42669670858325975, "grad_norm": 0.9786437593105952, "learning_rate": 4.939492487246142e-06, "loss": 0.4602, "step": 7007 }, { "epoch": 0.4267576043601376, "grad_norm": 0.9709057479772282, "learning_rate": 4.9394750363883896e-06, "loss": 0.4298, "step": 7008 }, { "epoch": 0.4268185001370155, "grad_norm": 1.0288104877823887, "learning_rate": 4.939457583045349e-06, "loss": 0.4191, "step": 7009 }, { "epoch": 0.42687939591389334, "grad_norm": 1.015892927957105, "learning_rate": 4.93944012721704e-06, "loss": 0.4367, "step": 7010 }, { "epoch": 0.42694029169077125, "grad_norm": 0.9799115205592929, "learning_rate": 4.939422668903481e-06, "loss": 0.4556, "step": 7011 }, { "epoch": 0.42700118746764915, "grad_norm": 1.0076651290312408, "learning_rate": 4.939405208104688e-06, "loss": 0.5135, "step": 7012 }, { "epoch": 0.427062083244527, "grad_norm": 1.049547453357106, "learning_rate": 4.93938774482068e-06, "loss": 0.494, "step": 7013 }, { "epoch": 0.4271229790214049, "grad_norm": 1.0812495410903404, "learning_rate": 4.939370279051475e-06, "loss": 0.4653, "step": 7014 }, { "epoch": 0.42718387479828274, "grad_norm": 0.9357447919133179, "learning_rate": 4.9393528107970896e-06, "loss": 0.5002, "step": 7015 }, { "epoch": 0.42724477057516064, "grad_norm": 1.0776502412126743, "learning_rate": 4.9393353400575425e-06, "loss": 0.483, "step": 7016 }, { "epoch": 0.4273056663520385, "grad_norm": 1.1827740899761419, "learning_rate": 4.939317866832852e-06, "loss": 0.4777, "step": 7017 }, { "epoch": 0.4273665621289164, "grad_norm": 0.988918966312077, "learning_rate": 4.939300391123033e-06, "loss": 0.4941, "step": 7018 }, { "epoch": 0.42742745790579423, "grad_norm": 1.0050840318116836, "learning_rate": 4.939282912928107e-06, "loss": 0.5217, "step": 7019 }, { "epoch": 0.42748835368267213, "grad_norm": 0.959543566063092, "learning_rate": 4.93926543224809e-06, "loss": 0.4435, "step": 7020 }, { "epoch": 0.42754924945955, "grad_norm": 1.0258445364224773, "learning_rate": 4.939247949083e-06, "loss": 0.4912, "step": 7021 }, { "epoch": 0.4276101452364279, "grad_norm": 1.10453499998866, "learning_rate": 4.939230463432856e-06, "loss": 0.4586, "step": 7022 }, { "epoch": 0.4276710410133057, "grad_norm": 1.1633509632478276, "learning_rate": 4.939212975297674e-06, "loss": 0.4827, "step": 7023 }, { "epoch": 0.4277319367901836, "grad_norm": 1.0373306803719449, "learning_rate": 4.9391954846774716e-06, "loss": 0.4251, "step": 7024 }, { "epoch": 0.42779283256706147, "grad_norm": 1.0670837195703933, "learning_rate": 4.9391779915722686e-06, "loss": 0.3951, "step": 7025 }, { "epoch": 0.42785372834393937, "grad_norm": 0.9892444512888067, "learning_rate": 4.939160495982082e-06, "loss": 0.5216, "step": 7026 }, { "epoch": 0.4279146241208172, "grad_norm": 0.975094729194285, "learning_rate": 4.939142997906928e-06, "loss": 0.4477, "step": 7027 }, { "epoch": 0.4279755198976951, "grad_norm": 1.128155909276017, "learning_rate": 4.939125497346827e-06, "loss": 0.4278, "step": 7028 }, { "epoch": 0.42803641567457296, "grad_norm": 0.9663970108909329, "learning_rate": 4.939107994301795e-06, "loss": 0.4687, "step": 7029 }, { "epoch": 0.42809731145145086, "grad_norm": 1.1018415609152197, "learning_rate": 4.939090488771851e-06, "loss": 0.4554, "step": 7030 }, { "epoch": 0.4281582072283287, "grad_norm": 1.0320985947704786, "learning_rate": 4.9390729807570125e-06, "loss": 0.5207, "step": 7031 }, { "epoch": 0.4282191030052066, "grad_norm": 1.0480163601673853, "learning_rate": 4.939055470257297e-06, "loss": 0.5109, "step": 7032 }, { "epoch": 0.42827999878208445, "grad_norm": 0.919250292677916, "learning_rate": 4.939037957272722e-06, "loss": 0.484, "step": 7033 }, { "epoch": 0.42834089455896235, "grad_norm": 1.080636214689831, "learning_rate": 4.939020441803306e-06, "loss": 0.4524, "step": 7034 }, { "epoch": 0.4284017903358402, "grad_norm": 1.0291258677070394, "learning_rate": 4.939002923849067e-06, "loss": 0.4291, "step": 7035 }, { "epoch": 0.4284626861127181, "grad_norm": 0.9974736867058733, "learning_rate": 4.9389854034100225e-06, "loss": 0.4401, "step": 7036 }, { "epoch": 0.42852358188959594, "grad_norm": 1.0208852748916548, "learning_rate": 4.93896788048619e-06, "loss": 0.4536, "step": 7037 }, { "epoch": 0.42858447766647384, "grad_norm": 1.0826261654472114, "learning_rate": 4.938950355077588e-06, "loss": 0.395, "step": 7038 }, { "epoch": 0.4286453734433517, "grad_norm": 0.9741257489773556, "learning_rate": 4.938932827184234e-06, "loss": 0.4231, "step": 7039 }, { "epoch": 0.4287062692202296, "grad_norm": 1.115673413612196, "learning_rate": 4.938915296806146e-06, "loss": 0.3191, "step": 7040 }, { "epoch": 0.42876716499710743, "grad_norm": 1.047165031527899, "learning_rate": 4.938897763943342e-06, "loss": 0.4989, "step": 7041 }, { "epoch": 0.42882806077398533, "grad_norm": 1.0994218651925871, "learning_rate": 4.9388802285958395e-06, "loss": 0.4177, "step": 7042 }, { "epoch": 0.4288889565508632, "grad_norm": 0.9783260455830464, "learning_rate": 4.938862690763656e-06, "loss": 0.4223, "step": 7043 }, { "epoch": 0.4289498523277411, "grad_norm": 0.9410314266824901, "learning_rate": 4.9388451504468104e-06, "loss": 0.4797, "step": 7044 }, { "epoch": 0.4290107481046189, "grad_norm": 1.0506841370610267, "learning_rate": 4.9388276076453204e-06, "loss": 0.4284, "step": 7045 }, { "epoch": 0.4290716438814968, "grad_norm": 1.1415820217103596, "learning_rate": 4.938810062359203e-06, "loss": 0.4177, "step": 7046 }, { "epoch": 0.42913253965837467, "grad_norm": 1.045195023465164, "learning_rate": 4.938792514588478e-06, "loss": 0.4635, "step": 7047 }, { "epoch": 0.42919343543525257, "grad_norm": 1.0570571583024042, "learning_rate": 4.9387749643331595e-06, "loss": 0.4932, "step": 7048 }, { "epoch": 0.4292543312121304, "grad_norm": 1.1010580995669788, "learning_rate": 4.938757411593269e-06, "loss": 0.3948, "step": 7049 }, { "epoch": 0.4293152269890083, "grad_norm": 0.9869463240108883, "learning_rate": 4.938739856368823e-06, "loss": 0.4858, "step": 7050 }, { "epoch": 0.42937612276588616, "grad_norm": 1.0118590730723809, "learning_rate": 4.93872229865984e-06, "loss": 0.4482, "step": 7051 }, { "epoch": 0.42943701854276406, "grad_norm": 1.020531129344489, "learning_rate": 4.938704738466337e-06, "loss": 0.4023, "step": 7052 }, { "epoch": 0.42949791431964196, "grad_norm": 1.022125571620922, "learning_rate": 4.938687175788332e-06, "loss": 0.4691, "step": 7053 }, { "epoch": 0.4295588100965198, "grad_norm": 0.9438495236002634, "learning_rate": 4.938669610625844e-06, "loss": 0.4883, "step": 7054 }, { "epoch": 0.4296197058733977, "grad_norm": 0.9244424379671831, "learning_rate": 4.93865204297889e-06, "loss": 0.4896, "step": 7055 }, { "epoch": 0.42968060165027555, "grad_norm": 1.0258114350264504, "learning_rate": 4.938634472847488e-06, "loss": 0.4078, "step": 7056 }, { "epoch": 0.42974149742715345, "grad_norm": 1.024124548343945, "learning_rate": 4.938616900231655e-06, "loss": 0.4585, "step": 7057 }, { "epoch": 0.4298023932040313, "grad_norm": 1.0145381530997983, "learning_rate": 4.938599325131411e-06, "loss": 0.4264, "step": 7058 }, { "epoch": 0.4298632889809092, "grad_norm": 0.9960456051855848, "learning_rate": 4.938581747546772e-06, "loss": 0.4768, "step": 7059 }, { "epoch": 0.42992418475778704, "grad_norm": 1.0615262590762542, "learning_rate": 4.938564167477756e-06, "loss": 0.5009, "step": 7060 }, { "epoch": 0.42998508053466494, "grad_norm": 0.9334120670965571, "learning_rate": 4.938546584924383e-06, "loss": 0.4297, "step": 7061 }, { "epoch": 0.4300459763115428, "grad_norm": 0.900686769691645, "learning_rate": 4.938528999886668e-06, "loss": 0.5149, "step": 7062 }, { "epoch": 0.4301068720884207, "grad_norm": 1.0058545884451588, "learning_rate": 4.938511412364631e-06, "loss": 0.4695, "step": 7063 }, { "epoch": 0.43016776786529853, "grad_norm": 0.9317177303969639, "learning_rate": 4.93849382235829e-06, "loss": 0.4507, "step": 7064 }, { "epoch": 0.43022866364217643, "grad_norm": 1.0119626154968677, "learning_rate": 4.938476229867661e-06, "loss": 0.4751, "step": 7065 }, { "epoch": 0.4302895594190543, "grad_norm": 0.9542196061948863, "learning_rate": 4.938458634892764e-06, "loss": 0.4637, "step": 7066 }, { "epoch": 0.4303504551959322, "grad_norm": 0.9909306481961384, "learning_rate": 4.938441037433615e-06, "loss": 0.4667, "step": 7067 }, { "epoch": 0.43041135097281, "grad_norm": 1.0925770574580504, "learning_rate": 4.938423437490234e-06, "loss": 0.4681, "step": 7068 }, { "epoch": 0.4304722467496879, "grad_norm": 0.9455375216037856, "learning_rate": 4.938405835062638e-06, "loss": 0.462, "step": 7069 }, { "epoch": 0.43053314252656577, "grad_norm": 1.043306104927692, "learning_rate": 4.938388230150845e-06, "loss": 0.4522, "step": 7070 }, { "epoch": 0.43059403830344367, "grad_norm": 1.046471829822673, "learning_rate": 4.938370622754871e-06, "loss": 0.4018, "step": 7071 }, { "epoch": 0.4306549340803215, "grad_norm": 0.9750519760104542, "learning_rate": 4.938353012874737e-06, "loss": 0.444, "step": 7072 }, { "epoch": 0.4307158298571994, "grad_norm": 1.0094488131082406, "learning_rate": 4.93833540051046e-06, "loss": 0.414, "step": 7073 }, { "epoch": 0.43077672563407726, "grad_norm": 1.0360162073716277, "learning_rate": 4.9383177856620565e-06, "loss": 0.4056, "step": 7074 }, { "epoch": 0.43083762141095516, "grad_norm": 1.0513415709796703, "learning_rate": 4.938300168329546e-06, "loss": 0.4048, "step": 7075 }, { "epoch": 0.430898517187833, "grad_norm": 1.0814690150944943, "learning_rate": 4.938282548512947e-06, "loss": 0.4047, "step": 7076 }, { "epoch": 0.4309594129647109, "grad_norm": 0.9744654361607861, "learning_rate": 4.938264926212275e-06, "loss": 0.4442, "step": 7077 }, { "epoch": 0.43102030874158875, "grad_norm": 1.0310410107901322, "learning_rate": 4.93824730142755e-06, "loss": 0.5355, "step": 7078 }, { "epoch": 0.43108120451846665, "grad_norm": 0.9898391366501592, "learning_rate": 4.9382296741587885e-06, "loss": 0.4513, "step": 7079 }, { "epoch": 0.4311421002953445, "grad_norm": 1.0758256026831732, "learning_rate": 4.9382120444060105e-06, "loss": 0.4865, "step": 7080 }, { "epoch": 0.4312029960722224, "grad_norm": 1.1210735686802078, "learning_rate": 4.938194412169233e-06, "loss": 0.4859, "step": 7081 }, { "epoch": 0.43126389184910024, "grad_norm": 0.9350051929199468, "learning_rate": 4.938176777448472e-06, "loss": 0.4947, "step": 7082 }, { "epoch": 0.43132478762597815, "grad_norm": 0.9611407998792437, "learning_rate": 4.938159140243749e-06, "loss": 0.4854, "step": 7083 }, { "epoch": 0.431385683402856, "grad_norm": 0.9451077421184301, "learning_rate": 4.938141500555079e-06, "loss": 0.4819, "step": 7084 }, { "epoch": 0.4314465791797339, "grad_norm": 1.0636730032304016, "learning_rate": 4.938123858382482e-06, "loss": 0.4743, "step": 7085 }, { "epoch": 0.43150747495661174, "grad_norm": 1.0837472605590237, "learning_rate": 4.938106213725974e-06, "loss": 0.4348, "step": 7086 }, { "epoch": 0.43156837073348964, "grad_norm": 1.0207173981345758, "learning_rate": 4.938088566585575e-06, "loss": 0.4961, "step": 7087 }, { "epoch": 0.4316292665103675, "grad_norm": 1.0745283895063125, "learning_rate": 4.938070916961302e-06, "loss": 0.4249, "step": 7088 }, { "epoch": 0.4316901622872454, "grad_norm": 1.002591862767097, "learning_rate": 4.938053264853172e-06, "loss": 0.487, "step": 7089 }, { "epoch": 0.4317510580641232, "grad_norm": 0.8909262595633126, "learning_rate": 4.938035610261206e-06, "loss": 0.4699, "step": 7090 }, { "epoch": 0.43181195384100113, "grad_norm": 1.0114105696667288, "learning_rate": 4.938017953185417e-06, "loss": 0.5073, "step": 7091 }, { "epoch": 0.431872849617879, "grad_norm": 0.981222062755004, "learning_rate": 4.938000293625829e-06, "loss": 0.5417, "step": 7092 }, { "epoch": 0.4319337453947569, "grad_norm": 1.0385996015307957, "learning_rate": 4.937982631582456e-06, "loss": 0.4599, "step": 7093 }, { "epoch": 0.4319946411716348, "grad_norm": 1.038099459853274, "learning_rate": 4.937964967055317e-06, "loss": 0.5162, "step": 7094 }, { "epoch": 0.4320555369485126, "grad_norm": 1.06028020528734, "learning_rate": 4.937947300044429e-06, "loss": 0.4552, "step": 7095 }, { "epoch": 0.4321164327253905, "grad_norm": 1.001448843544287, "learning_rate": 4.937929630549812e-06, "loss": 0.4439, "step": 7096 }, { "epoch": 0.43217732850226837, "grad_norm": 1.0250595034826888, "learning_rate": 4.937911958571483e-06, "loss": 0.4475, "step": 7097 }, { "epoch": 0.43223822427914627, "grad_norm": 1.089475594478964, "learning_rate": 4.937894284109459e-06, "loss": 0.4648, "step": 7098 }, { "epoch": 0.4322991200560241, "grad_norm": 1.0957134463833573, "learning_rate": 4.93787660716376e-06, "loss": 0.4614, "step": 7099 }, { "epoch": 0.432360015832902, "grad_norm": 1.021985553124796, "learning_rate": 4.9378589277344025e-06, "loss": 0.4668, "step": 7100 }, { "epoch": 0.43242091160977986, "grad_norm": 1.1135632458497244, "learning_rate": 4.937841245821405e-06, "loss": 0.4224, "step": 7101 }, { "epoch": 0.43248180738665776, "grad_norm": 0.9931257958147263, "learning_rate": 4.937823561424786e-06, "loss": 0.4186, "step": 7102 }, { "epoch": 0.4325427031635356, "grad_norm": 1.041509459659007, "learning_rate": 4.937805874544564e-06, "loss": 0.4598, "step": 7103 }, { "epoch": 0.4326035989404135, "grad_norm": 0.9980360074141074, "learning_rate": 4.9377881851807545e-06, "loss": 0.4812, "step": 7104 }, { "epoch": 0.43266449471729135, "grad_norm": 0.9994704626844962, "learning_rate": 4.937770493333377e-06, "loss": 0.4748, "step": 7105 }, { "epoch": 0.43272539049416925, "grad_norm": 1.1272729232202325, "learning_rate": 4.9377527990024496e-06, "loss": 0.4199, "step": 7106 }, { "epoch": 0.4327862862710471, "grad_norm": 1.0944139497897314, "learning_rate": 4.937735102187991e-06, "loss": 0.4408, "step": 7107 }, { "epoch": 0.432847182047925, "grad_norm": 1.029679406756991, "learning_rate": 4.937717402890019e-06, "loss": 0.5163, "step": 7108 }, { "epoch": 0.43290807782480284, "grad_norm": 0.9499059092808482, "learning_rate": 4.93769970110855e-06, "loss": 0.4656, "step": 7109 }, { "epoch": 0.43296897360168074, "grad_norm": 1.1086874523553165, "learning_rate": 4.937681996843604e-06, "loss": 0.4515, "step": 7110 }, { "epoch": 0.4330298693785586, "grad_norm": 0.9631739919881378, "learning_rate": 4.937664290095198e-06, "loss": 0.5049, "step": 7111 }, { "epoch": 0.4330907651554365, "grad_norm": 1.055631652184079, "learning_rate": 4.93764658086335e-06, "loss": 0.4644, "step": 7112 }, { "epoch": 0.43315166093231433, "grad_norm": 1.0281573128798953, "learning_rate": 4.937628869148079e-06, "loss": 0.4316, "step": 7113 }, { "epoch": 0.43321255670919223, "grad_norm": 1.0434776608518104, "learning_rate": 4.937611154949401e-06, "loss": 0.4324, "step": 7114 }, { "epoch": 0.4332734524860701, "grad_norm": 1.032575094663712, "learning_rate": 4.937593438267337e-06, "loss": 0.4465, "step": 7115 }, { "epoch": 0.433334348262948, "grad_norm": 0.9960139920846053, "learning_rate": 4.937575719101903e-06, "loss": 0.4775, "step": 7116 }, { "epoch": 0.4333952440398258, "grad_norm": 0.9876161841400812, "learning_rate": 4.937557997453118e-06, "loss": 0.4448, "step": 7117 }, { "epoch": 0.4334561398167037, "grad_norm": 0.9876500196684243, "learning_rate": 4.937540273320998e-06, "loss": 0.4888, "step": 7118 }, { "epoch": 0.43351703559358157, "grad_norm": 1.068044536534033, "learning_rate": 4.937522546705564e-06, "loss": 0.4212, "step": 7119 }, { "epoch": 0.43357793137045947, "grad_norm": 1.0103701515483334, "learning_rate": 4.937504817606832e-06, "loss": 0.4818, "step": 7120 }, { "epoch": 0.4336388271473373, "grad_norm": 0.9583081254053605, "learning_rate": 4.937487086024821e-06, "loss": 0.4835, "step": 7121 }, { "epoch": 0.4336997229242152, "grad_norm": 1.0574715654278497, "learning_rate": 4.9374693519595495e-06, "loss": 0.458, "step": 7122 }, { "epoch": 0.43376061870109306, "grad_norm": 1.0104196517506814, "learning_rate": 4.937451615411034e-06, "loss": 0.438, "step": 7123 }, { "epoch": 0.43382151447797096, "grad_norm": 1.080897204134669, "learning_rate": 4.937433876379294e-06, "loss": 0.4884, "step": 7124 }, { "epoch": 0.4338824102548488, "grad_norm": 1.0463319778066493, "learning_rate": 4.937416134864347e-06, "loss": 0.5322, "step": 7125 }, { "epoch": 0.4339433060317267, "grad_norm": 1.0300216660184212, "learning_rate": 4.937398390866211e-06, "loss": 0.4583, "step": 7126 }, { "epoch": 0.43400420180860455, "grad_norm": 1.10495803879854, "learning_rate": 4.9373806443849045e-06, "loss": 0.4302, "step": 7127 }, { "epoch": 0.43406509758548245, "grad_norm": 1.0546796925582107, "learning_rate": 4.937362895420445e-06, "loss": 0.4467, "step": 7128 }, { "epoch": 0.4341259933623603, "grad_norm": 0.9038056004819819, "learning_rate": 4.937345143972851e-06, "loss": 0.491, "step": 7129 }, { "epoch": 0.4341868891392382, "grad_norm": 0.9847286495927526, "learning_rate": 4.9373273900421405e-06, "loss": 0.4733, "step": 7130 }, { "epoch": 0.43424778491611604, "grad_norm": 1.0392213564169699, "learning_rate": 4.937309633628331e-06, "loss": 0.3771, "step": 7131 }, { "epoch": 0.43430868069299394, "grad_norm": 1.0839067393012354, "learning_rate": 4.937291874731441e-06, "loss": 0.4596, "step": 7132 }, { "epoch": 0.4343695764698718, "grad_norm": 1.0022123441914526, "learning_rate": 4.93727411335149e-06, "loss": 0.4679, "step": 7133 }, { "epoch": 0.4344304722467497, "grad_norm": 1.0456732221606384, "learning_rate": 4.937256349488493e-06, "loss": 0.4628, "step": 7134 }, { "epoch": 0.4344913680236276, "grad_norm": 1.0818087495984272, "learning_rate": 4.937238583142472e-06, "loss": 0.4391, "step": 7135 }, { "epoch": 0.43455226380050543, "grad_norm": 0.9542866460723721, "learning_rate": 4.937220814313441e-06, "loss": 0.4264, "step": 7136 }, { "epoch": 0.43461315957738333, "grad_norm": 1.0429017390458684, "learning_rate": 4.937203043001421e-06, "loss": 0.431, "step": 7137 }, { "epoch": 0.4346740553542612, "grad_norm": 1.0534520453253942, "learning_rate": 4.937185269206429e-06, "loss": 0.4842, "step": 7138 }, { "epoch": 0.4347349511311391, "grad_norm": 1.1381490184625294, "learning_rate": 4.937167492928484e-06, "loss": 0.3983, "step": 7139 }, { "epoch": 0.4347958469080169, "grad_norm": 1.0748994133462277, "learning_rate": 4.937149714167603e-06, "loss": 0.3898, "step": 7140 }, { "epoch": 0.4348567426848948, "grad_norm": 1.0231879655903215, "learning_rate": 4.9371319329238045e-06, "loss": 0.5229, "step": 7141 }, { "epoch": 0.43491763846177267, "grad_norm": 1.000445344457481, "learning_rate": 4.937114149197106e-06, "loss": 0.4402, "step": 7142 }, { "epoch": 0.43497853423865057, "grad_norm": 1.0995731814675522, "learning_rate": 4.937096362987528e-06, "loss": 0.441, "step": 7143 }, { "epoch": 0.4350394300155284, "grad_norm": 0.9941223416568419, "learning_rate": 4.937078574295085e-06, "loss": 0.4522, "step": 7144 }, { "epoch": 0.4351003257924063, "grad_norm": 1.0353178014160083, "learning_rate": 4.937060783119798e-06, "loss": 0.4671, "step": 7145 }, { "epoch": 0.43516122156928416, "grad_norm": 1.0942088113085093, "learning_rate": 4.937042989461684e-06, "loss": 0.3881, "step": 7146 }, { "epoch": 0.43522211734616206, "grad_norm": 0.9841655784277671, "learning_rate": 4.937025193320762e-06, "loss": 0.4676, "step": 7147 }, { "epoch": 0.4352830131230399, "grad_norm": 1.0228441162541126, "learning_rate": 4.937007394697048e-06, "loss": 0.44, "step": 7148 }, { "epoch": 0.4353439088999178, "grad_norm": 1.0599002253958922, "learning_rate": 4.936989593590562e-06, "loss": 0.4843, "step": 7149 }, { "epoch": 0.43540480467679565, "grad_norm": 1.0445870357288252, "learning_rate": 4.936971790001322e-06, "loss": 0.3858, "step": 7150 }, { "epoch": 0.43546570045367355, "grad_norm": 1.0187340694127824, "learning_rate": 4.936953983929346e-06, "loss": 0.3981, "step": 7151 }, { "epoch": 0.4355265962305514, "grad_norm": 1.0087027700608464, "learning_rate": 4.936936175374652e-06, "loss": 0.4597, "step": 7152 }, { "epoch": 0.4355874920074293, "grad_norm": 1.0345240063179189, "learning_rate": 4.9369183643372566e-06, "loss": 0.4461, "step": 7153 }, { "epoch": 0.43564838778430715, "grad_norm": 1.0704355390742406, "learning_rate": 4.936900550817181e-06, "loss": 0.4531, "step": 7154 }, { "epoch": 0.43570928356118505, "grad_norm": 1.078352773229725, "learning_rate": 4.936882734814441e-06, "loss": 0.3979, "step": 7155 }, { "epoch": 0.4357701793380629, "grad_norm": 1.088381708106692, "learning_rate": 4.936864916329056e-06, "loss": 0.4411, "step": 7156 }, { "epoch": 0.4358310751149408, "grad_norm": 0.9887209002503845, "learning_rate": 4.936847095361044e-06, "loss": 0.4778, "step": 7157 }, { "epoch": 0.43589197089181864, "grad_norm": 1.0452793698523841, "learning_rate": 4.936829271910421e-06, "loss": 0.4359, "step": 7158 }, { "epoch": 0.43595286666869654, "grad_norm": 0.8962171297415206, "learning_rate": 4.936811445977209e-06, "loss": 0.4463, "step": 7159 }, { "epoch": 0.4360137624455744, "grad_norm": 1.0433837634886487, "learning_rate": 4.9367936175614235e-06, "loss": 0.4706, "step": 7160 }, { "epoch": 0.4360746582224523, "grad_norm": 0.9979812537217342, "learning_rate": 4.936775786663084e-06, "loss": 0.4139, "step": 7161 }, { "epoch": 0.43613555399933013, "grad_norm": 1.092315302636205, "learning_rate": 4.936757953282207e-06, "loss": 0.4066, "step": 7162 }, { "epoch": 0.43619644977620803, "grad_norm": 0.9506331849030417, "learning_rate": 4.9367401174188115e-06, "loss": 0.4844, "step": 7163 }, { "epoch": 0.4362573455530859, "grad_norm": 0.993679120924777, "learning_rate": 4.9367222790729165e-06, "loss": 0.4197, "step": 7164 }, { "epoch": 0.4363182413299638, "grad_norm": 1.0041227154625225, "learning_rate": 4.936704438244539e-06, "loss": 0.4434, "step": 7165 }, { "epoch": 0.4363791371068416, "grad_norm": 1.0668273479298038, "learning_rate": 4.936686594933699e-06, "loss": 0.4385, "step": 7166 }, { "epoch": 0.4364400328837195, "grad_norm": 1.1055154774973153, "learning_rate": 4.936668749140412e-06, "loss": 0.5047, "step": 7167 }, { "epoch": 0.43650092866059736, "grad_norm": 1.045481017536549, "learning_rate": 4.936650900864698e-06, "loss": 0.5166, "step": 7168 }, { "epoch": 0.43656182443747527, "grad_norm": 1.1268498257145763, "learning_rate": 4.936633050106574e-06, "loss": 0.4728, "step": 7169 }, { "epoch": 0.4366227202143531, "grad_norm": 0.987426318748915, "learning_rate": 4.93661519686606e-06, "loss": 0.4913, "step": 7170 }, { "epoch": 0.436683615991231, "grad_norm": 1.0029035149527672, "learning_rate": 4.936597341143172e-06, "loss": 0.5284, "step": 7171 }, { "epoch": 0.43674451176810886, "grad_norm": 1.1662672609119245, "learning_rate": 4.936579482937931e-06, "loss": 0.4409, "step": 7172 }, { "epoch": 0.43680540754498676, "grad_norm": 1.0668791488875486, "learning_rate": 4.936561622250352e-06, "loss": 0.4519, "step": 7173 }, { "epoch": 0.4368663033218646, "grad_norm": 1.0053309065504548, "learning_rate": 4.936543759080455e-06, "loss": 0.4368, "step": 7174 }, { "epoch": 0.4369271990987425, "grad_norm": 1.0482231232373325, "learning_rate": 4.936525893428258e-06, "loss": 0.4934, "step": 7175 }, { "epoch": 0.4369880948756204, "grad_norm": 0.9671227301366416, "learning_rate": 4.936508025293779e-06, "loss": 0.4621, "step": 7176 }, { "epoch": 0.43704899065249825, "grad_norm": 0.9267957636017232, "learning_rate": 4.936490154677036e-06, "loss": 0.4814, "step": 7177 }, { "epoch": 0.43710988642937615, "grad_norm": 0.9981997568321205, "learning_rate": 4.936472281578047e-06, "loss": 0.4838, "step": 7178 }, { "epoch": 0.437170782206254, "grad_norm": 1.0785307981781407, "learning_rate": 4.936454405996832e-06, "loss": 0.4347, "step": 7179 }, { "epoch": 0.4372316779831319, "grad_norm": 0.9728557170372407, "learning_rate": 4.936436527933408e-06, "loss": 0.4831, "step": 7180 }, { "epoch": 0.43729257376000974, "grad_norm": 1.0352113688882916, "learning_rate": 4.936418647387792e-06, "loss": 0.5007, "step": 7181 }, { "epoch": 0.43735346953688764, "grad_norm": 0.9690031655265869, "learning_rate": 4.936400764360004e-06, "loss": 0.3916, "step": 7182 }, { "epoch": 0.4374143653137655, "grad_norm": 1.0310503252623207, "learning_rate": 4.936382878850061e-06, "loss": 0.4876, "step": 7183 }, { "epoch": 0.4374752610906434, "grad_norm": 1.037427995809394, "learning_rate": 4.936364990857983e-06, "loss": 0.4498, "step": 7184 }, { "epoch": 0.43753615686752123, "grad_norm": 1.1295536169305984, "learning_rate": 4.936347100383786e-06, "loss": 0.3962, "step": 7185 }, { "epoch": 0.43759705264439913, "grad_norm": 0.9930325715715437, "learning_rate": 4.936329207427489e-06, "loss": 0.4374, "step": 7186 }, { "epoch": 0.437657948421277, "grad_norm": 0.9697658784871676, "learning_rate": 4.936311311989111e-06, "loss": 0.5, "step": 7187 }, { "epoch": 0.4377188441981549, "grad_norm": 1.1303722125677462, "learning_rate": 4.9362934140686695e-06, "loss": 0.4517, "step": 7188 }, { "epoch": 0.4377797399750327, "grad_norm": 0.9691531038673106, "learning_rate": 4.936275513666183e-06, "loss": 0.4261, "step": 7189 }, { "epoch": 0.4378406357519106, "grad_norm": 1.0629793003859291, "learning_rate": 4.93625761078167e-06, "loss": 0.4431, "step": 7190 }, { "epoch": 0.43790153152878847, "grad_norm": 1.0349330836770902, "learning_rate": 4.9362397054151476e-06, "loss": 0.4626, "step": 7191 }, { "epoch": 0.43796242730566637, "grad_norm": 1.048817896677172, "learning_rate": 4.936221797566636e-06, "loss": 0.4892, "step": 7192 }, { "epoch": 0.4380233230825442, "grad_norm": 1.121295311179444, "learning_rate": 4.936203887236151e-06, "loss": 0.3971, "step": 7193 }, { "epoch": 0.4380842188594221, "grad_norm": 1.0662702267845394, "learning_rate": 4.9361859744237126e-06, "loss": 0.45, "step": 7194 }, { "epoch": 0.43814511463629996, "grad_norm": 1.1005947748928273, "learning_rate": 4.936168059129339e-06, "loss": 0.4183, "step": 7195 }, { "epoch": 0.43820601041317786, "grad_norm": 0.9789167929872251, "learning_rate": 4.936150141353047e-06, "loss": 0.4119, "step": 7196 }, { "epoch": 0.4382669061900557, "grad_norm": 1.0940556702760498, "learning_rate": 4.936132221094857e-06, "loss": 0.4404, "step": 7197 }, { "epoch": 0.4383278019669336, "grad_norm": 1.0527833425271735, "learning_rate": 4.936114298354786e-06, "loss": 0.4573, "step": 7198 }, { "epoch": 0.43838869774381145, "grad_norm": 1.0065547709341545, "learning_rate": 4.9360963731328515e-06, "loss": 0.4219, "step": 7199 }, { "epoch": 0.43844959352068935, "grad_norm": 0.954669757886232, "learning_rate": 4.9360784454290735e-06, "loss": 0.4299, "step": 7200 }, { "epoch": 0.4385104892975672, "grad_norm": 1.0280405412167883, "learning_rate": 4.9360605152434695e-06, "loss": 0.4603, "step": 7201 }, { "epoch": 0.4385713850744451, "grad_norm": 1.0437248658431337, "learning_rate": 4.936042582576057e-06, "loss": 0.4543, "step": 7202 }, { "epoch": 0.43863228085132294, "grad_norm": 0.9619793927138537, "learning_rate": 4.9360246474268555e-06, "loss": 0.4374, "step": 7203 }, { "epoch": 0.43869317662820084, "grad_norm": 1.0626300043110717, "learning_rate": 4.936006709795883e-06, "loss": 0.4924, "step": 7204 }, { "epoch": 0.4387540724050787, "grad_norm": 1.0816309333946716, "learning_rate": 4.935988769683157e-06, "loss": 0.4228, "step": 7205 }, { "epoch": 0.4388149681819566, "grad_norm": 0.9673930963357514, "learning_rate": 4.935970827088696e-06, "loss": 0.4527, "step": 7206 }, { "epoch": 0.43887586395883443, "grad_norm": 1.0682925071184193, "learning_rate": 4.935952882012519e-06, "loss": 0.423, "step": 7207 }, { "epoch": 0.43893675973571233, "grad_norm": 1.032240291367969, "learning_rate": 4.935934934454644e-06, "loss": 0.462, "step": 7208 }, { "epoch": 0.4389976555125902, "grad_norm": 1.0415584351563743, "learning_rate": 4.935916984415089e-06, "loss": 0.4683, "step": 7209 }, { "epoch": 0.4390585512894681, "grad_norm": 0.9724967979983258, "learning_rate": 4.935899031893873e-06, "loss": 0.4272, "step": 7210 }, { "epoch": 0.4391194470663459, "grad_norm": 1.1418079767400402, "learning_rate": 4.935881076891013e-06, "loss": 0.3959, "step": 7211 }, { "epoch": 0.4391803428432238, "grad_norm": 1.0189868568635436, "learning_rate": 4.935863119406528e-06, "loss": 0.4802, "step": 7212 }, { "epoch": 0.43924123862010167, "grad_norm": 0.9818597928960978, "learning_rate": 4.935845159440435e-06, "loss": 0.4793, "step": 7213 }, { "epoch": 0.43930213439697957, "grad_norm": 0.9181510147765516, "learning_rate": 4.935827196992756e-06, "loss": 0.4552, "step": 7214 }, { "epoch": 0.4393630301738574, "grad_norm": 1.1066069877537608, "learning_rate": 4.935809232063505e-06, "loss": 0.4433, "step": 7215 }, { "epoch": 0.4394239259507353, "grad_norm": 0.981504036068618, "learning_rate": 4.935791264652704e-06, "loss": 0.4417, "step": 7216 }, { "epoch": 0.4394848217276132, "grad_norm": 0.9979948817766211, "learning_rate": 4.935773294760369e-06, "loss": 0.4908, "step": 7217 }, { "epoch": 0.43954571750449106, "grad_norm": 0.9124366034028293, "learning_rate": 4.935755322386517e-06, "loss": 0.5051, "step": 7218 }, { "epoch": 0.43960661328136896, "grad_norm": 1.0597469747462063, "learning_rate": 4.93573734753117e-06, "loss": 0.4416, "step": 7219 }, { "epoch": 0.4396675090582468, "grad_norm": 1.056285256349971, "learning_rate": 4.935719370194344e-06, "loss": 0.3897, "step": 7220 }, { "epoch": 0.4397284048351247, "grad_norm": 1.1765673109023065, "learning_rate": 4.935701390376057e-06, "loss": 0.4886, "step": 7221 }, { "epoch": 0.43978930061200255, "grad_norm": 1.034978892894336, "learning_rate": 4.9356834080763295e-06, "loss": 0.4227, "step": 7222 }, { "epoch": 0.43985019638888045, "grad_norm": 1.0463863572630805, "learning_rate": 4.935665423295177e-06, "loss": 0.412, "step": 7223 }, { "epoch": 0.4399110921657583, "grad_norm": 0.9686586931233305, "learning_rate": 4.9356474360326204e-06, "loss": 0.43, "step": 7224 }, { "epoch": 0.4399719879426362, "grad_norm": 1.0622755281453584, "learning_rate": 4.935629446288676e-06, "loss": 0.4799, "step": 7225 }, { "epoch": 0.44003288371951405, "grad_norm": 1.0320147104850963, "learning_rate": 4.935611454063364e-06, "loss": 0.4375, "step": 7226 }, { "epoch": 0.44009377949639195, "grad_norm": 0.9745034858177507, "learning_rate": 4.9355934593567e-06, "loss": 0.4211, "step": 7227 }, { "epoch": 0.4401546752732698, "grad_norm": 1.1221695585023308, "learning_rate": 4.935575462168705e-06, "loss": 0.428, "step": 7228 }, { "epoch": 0.4402155710501477, "grad_norm": 0.9455276973337272, "learning_rate": 4.935557462499396e-06, "loss": 0.5145, "step": 7229 }, { "epoch": 0.44027646682702554, "grad_norm": 0.9939544126851209, "learning_rate": 4.935539460348793e-06, "loss": 0.4706, "step": 7230 }, { "epoch": 0.44033736260390344, "grad_norm": 1.1837427424144529, "learning_rate": 4.935521455716912e-06, "loss": 0.4222, "step": 7231 }, { "epoch": 0.4403982583807813, "grad_norm": 1.0943471616089913, "learning_rate": 4.935503448603772e-06, "loss": 0.4218, "step": 7232 }, { "epoch": 0.4404591541576592, "grad_norm": 1.060743223269677, "learning_rate": 4.935485439009392e-06, "loss": 0.4804, "step": 7233 }, { "epoch": 0.44052004993453703, "grad_norm": 1.0901607468779393, "learning_rate": 4.93546742693379e-06, "loss": 0.4651, "step": 7234 }, { "epoch": 0.44058094571141493, "grad_norm": 1.0579541650059776, "learning_rate": 4.935449412376985e-06, "loss": 0.4449, "step": 7235 }, { "epoch": 0.4406418414882928, "grad_norm": 1.0074546239209186, "learning_rate": 4.935431395338994e-06, "loss": 0.4901, "step": 7236 }, { "epoch": 0.4407027372651707, "grad_norm": 0.9787527195166797, "learning_rate": 4.9354133758198365e-06, "loss": 0.4207, "step": 7237 }, { "epoch": 0.4407636330420485, "grad_norm": 0.9987710653268562, "learning_rate": 4.935395353819531e-06, "loss": 0.4728, "step": 7238 }, { "epoch": 0.4408245288189264, "grad_norm": 1.0397361189019958, "learning_rate": 4.9353773293380945e-06, "loss": 0.503, "step": 7239 }, { "epoch": 0.44088542459580427, "grad_norm": 1.0714125721108727, "learning_rate": 4.935359302375547e-06, "loss": 0.4311, "step": 7240 }, { "epoch": 0.44094632037268217, "grad_norm": 0.9328561512269657, "learning_rate": 4.9353412729319054e-06, "loss": 0.5172, "step": 7241 }, { "epoch": 0.44100721614956, "grad_norm": 1.0970715116289957, "learning_rate": 4.935323241007189e-06, "loss": 0.4041, "step": 7242 }, { "epoch": 0.4410681119264379, "grad_norm": 1.1222463345439022, "learning_rate": 4.935305206601415e-06, "loss": 0.4134, "step": 7243 }, { "epoch": 0.44112900770331576, "grad_norm": 1.0283764620826261, "learning_rate": 4.935287169714604e-06, "loss": 0.4262, "step": 7244 }, { "epoch": 0.44118990348019366, "grad_norm": 0.9915555241159775, "learning_rate": 4.935269130346772e-06, "loss": 0.4731, "step": 7245 }, { "epoch": 0.4412507992570715, "grad_norm": 1.006928867150729, "learning_rate": 4.93525108849794e-06, "loss": 0.4542, "step": 7246 }, { "epoch": 0.4413116950339494, "grad_norm": 0.9480828485734594, "learning_rate": 4.935233044168123e-06, "loss": 0.4606, "step": 7247 }, { "epoch": 0.44137259081082725, "grad_norm": 1.0545184174735345, "learning_rate": 4.935214997357343e-06, "loss": 0.4338, "step": 7248 }, { "epoch": 0.44143348658770515, "grad_norm": 1.048325753737733, "learning_rate": 4.9351969480656155e-06, "loss": 0.4684, "step": 7249 }, { "epoch": 0.441494382364583, "grad_norm": 1.1162227191266734, "learning_rate": 4.93517889629296e-06, "loss": 0.397, "step": 7250 }, { "epoch": 0.4415552781414609, "grad_norm": 1.0175302705806375, "learning_rate": 4.9351608420393945e-06, "loss": 0.4383, "step": 7251 }, { "epoch": 0.44161617391833874, "grad_norm": 0.980201468796836, "learning_rate": 4.935142785304939e-06, "loss": 0.5573, "step": 7252 }, { "epoch": 0.44167706969521664, "grad_norm": 0.9212483096615662, "learning_rate": 4.93512472608961e-06, "loss": 0.4731, "step": 7253 }, { "epoch": 0.4417379654720945, "grad_norm": 0.9399981971024571, "learning_rate": 4.9351066643934265e-06, "loss": 0.5709, "step": 7254 }, { "epoch": 0.4417988612489724, "grad_norm": 1.0339409178673993, "learning_rate": 4.9350886002164065e-06, "loss": 0.5009, "step": 7255 }, { "epoch": 0.44185975702585023, "grad_norm": 0.99834007858473, "learning_rate": 4.935070533558569e-06, "loss": 0.4185, "step": 7256 }, { "epoch": 0.44192065280272813, "grad_norm": 1.0624000752243477, "learning_rate": 4.935052464419933e-06, "loss": 0.4682, "step": 7257 }, { "epoch": 0.44198154857960603, "grad_norm": 0.973687227151754, "learning_rate": 4.935034392800516e-06, "loss": 0.5543, "step": 7258 }, { "epoch": 0.4420424443564839, "grad_norm": 1.0068182193734654, "learning_rate": 4.9350163187003365e-06, "loss": 0.4552, "step": 7259 }, { "epoch": 0.4421033401333618, "grad_norm": 0.9843116036250437, "learning_rate": 4.934998242119412e-06, "loss": 0.4141, "step": 7260 }, { "epoch": 0.4421642359102396, "grad_norm": 1.027228857976661, "learning_rate": 4.934980163057763e-06, "loss": 0.4377, "step": 7261 }, { "epoch": 0.4422251316871175, "grad_norm": 0.9952628868174551, "learning_rate": 4.934962081515406e-06, "loss": 0.45, "step": 7262 }, { "epoch": 0.44228602746399537, "grad_norm": 1.0404973070159145, "learning_rate": 4.934943997492362e-06, "loss": 0.4257, "step": 7263 }, { "epoch": 0.44234692324087327, "grad_norm": 1.0497437472552167, "learning_rate": 4.934925910988646e-06, "loss": 0.4401, "step": 7264 }, { "epoch": 0.4424078190177511, "grad_norm": 1.0203829488020648, "learning_rate": 4.934907822004279e-06, "loss": 0.4826, "step": 7265 }, { "epoch": 0.442468714794629, "grad_norm": 0.965779806275302, "learning_rate": 4.934889730539278e-06, "loss": 0.468, "step": 7266 }, { "epoch": 0.44252961057150686, "grad_norm": 1.1110817052238522, "learning_rate": 4.9348716365936625e-06, "loss": 0.4116, "step": 7267 }, { "epoch": 0.44259050634838476, "grad_norm": 1.0502052498915118, "learning_rate": 4.9348535401674495e-06, "loss": 0.4554, "step": 7268 }, { "epoch": 0.4426514021252626, "grad_norm": 0.9995581945257171, "learning_rate": 4.934835441260659e-06, "loss": 0.4096, "step": 7269 }, { "epoch": 0.4427122979021405, "grad_norm": 0.9411506518630881, "learning_rate": 4.934817339873309e-06, "loss": 0.46, "step": 7270 }, { "epoch": 0.44277319367901835, "grad_norm": 1.0338899812622364, "learning_rate": 4.934799236005417e-06, "loss": 0.4019, "step": 7271 }, { "epoch": 0.44283408945589625, "grad_norm": 1.0467458656485693, "learning_rate": 4.934781129657002e-06, "loss": 0.4861, "step": 7272 }, { "epoch": 0.4428949852327741, "grad_norm": 0.9814579153885988, "learning_rate": 4.934763020828084e-06, "loss": 0.4664, "step": 7273 }, { "epoch": 0.442955881009652, "grad_norm": 1.001744679966709, "learning_rate": 4.934744909518679e-06, "loss": 0.4201, "step": 7274 }, { "epoch": 0.44301677678652984, "grad_norm": 1.1118500804279066, "learning_rate": 4.934726795728806e-06, "loss": 0.4945, "step": 7275 }, { "epoch": 0.44307767256340774, "grad_norm": 0.984748827990905, "learning_rate": 4.934708679458486e-06, "loss": 0.4588, "step": 7276 }, { "epoch": 0.4431385683402856, "grad_norm": 1.111309715531734, "learning_rate": 4.934690560707733e-06, "loss": 0.4331, "step": 7277 }, { "epoch": 0.4431994641171635, "grad_norm": 1.0880103374217203, "learning_rate": 4.934672439476569e-06, "loss": 0.4029, "step": 7278 }, { "epoch": 0.44326035989404133, "grad_norm": 1.0024598913450125, "learning_rate": 4.934654315765012e-06, "loss": 0.4102, "step": 7279 }, { "epoch": 0.44332125567091923, "grad_norm": 0.9812509377506362, "learning_rate": 4.934636189573079e-06, "loss": 0.4824, "step": 7280 }, { "epoch": 0.4433821514477971, "grad_norm": 0.986229534933318, "learning_rate": 4.934618060900789e-06, "loss": 0.4734, "step": 7281 }, { "epoch": 0.443443047224675, "grad_norm": 1.0394075385850179, "learning_rate": 4.934599929748161e-06, "loss": 0.4671, "step": 7282 }, { "epoch": 0.4435039430015528, "grad_norm": 0.9701527796737699, "learning_rate": 4.934581796115213e-06, "loss": 0.4522, "step": 7283 }, { "epoch": 0.4435648387784307, "grad_norm": 0.9287471671589884, "learning_rate": 4.934563660001964e-06, "loss": 0.4317, "step": 7284 }, { "epoch": 0.44362573455530857, "grad_norm": 0.992955101088432, "learning_rate": 4.934545521408433e-06, "loss": 0.4483, "step": 7285 }, { "epoch": 0.44368663033218647, "grad_norm": 1.0800469122952792, "learning_rate": 4.934527380334636e-06, "loss": 0.4826, "step": 7286 }, { "epoch": 0.4437475261090643, "grad_norm": 1.019098113015284, "learning_rate": 4.934509236780593e-06, "loss": 0.4416, "step": 7287 }, { "epoch": 0.4438084218859422, "grad_norm": 0.9956572795705897, "learning_rate": 4.9344910907463246e-06, "loss": 0.4239, "step": 7288 }, { "epoch": 0.44386931766282006, "grad_norm": 1.0032293861974197, "learning_rate": 4.934472942231846e-06, "loss": 0.4889, "step": 7289 }, { "epoch": 0.44393021343969796, "grad_norm": 1.0791753977792846, "learning_rate": 4.934454791237177e-06, "loss": 0.4341, "step": 7290 }, { "epoch": 0.4439911092165758, "grad_norm": 1.0351698214727922, "learning_rate": 4.934436637762337e-06, "loss": 0.372, "step": 7291 }, { "epoch": 0.4440520049934537, "grad_norm": 0.997017010764517, "learning_rate": 4.934418481807342e-06, "loss": 0.4451, "step": 7292 }, { "epoch": 0.44411290077033155, "grad_norm": 1.003930700282819, "learning_rate": 4.934400323372213e-06, "loss": 0.5023, "step": 7293 }, { "epoch": 0.44417379654720945, "grad_norm": 1.0858477059927678, "learning_rate": 4.9343821624569675e-06, "loss": 0.4453, "step": 7294 }, { "epoch": 0.4442346923240873, "grad_norm": 0.9669911845327068, "learning_rate": 4.9343639990616234e-06, "loss": 0.4515, "step": 7295 }, { "epoch": 0.4442955881009652, "grad_norm": 1.0738477376536049, "learning_rate": 4.934345833186201e-06, "loss": 0.4189, "step": 7296 }, { "epoch": 0.44435648387784304, "grad_norm": 0.9888363452631758, "learning_rate": 4.934327664830717e-06, "loss": 0.4929, "step": 7297 }, { "epoch": 0.44441737965472095, "grad_norm": 1.0163203845157402, "learning_rate": 4.934309493995191e-06, "loss": 0.4685, "step": 7298 }, { "epoch": 0.44447827543159885, "grad_norm": 0.9834149795001147, "learning_rate": 4.934291320679641e-06, "loss": 0.4571, "step": 7299 }, { "epoch": 0.4445391712084767, "grad_norm": 1.0112421769174458, "learning_rate": 4.934273144884085e-06, "loss": 0.4842, "step": 7300 }, { "epoch": 0.4446000669853546, "grad_norm": 1.0251551149354612, "learning_rate": 4.934254966608543e-06, "loss": 0.4434, "step": 7301 }, { "epoch": 0.44466096276223244, "grad_norm": 1.0385533563452014, "learning_rate": 4.934236785853032e-06, "loss": 0.45, "step": 7302 }, { "epoch": 0.44472185853911034, "grad_norm": 1.0272465991243627, "learning_rate": 4.9342186026175705e-06, "loss": 0.4941, "step": 7303 }, { "epoch": 0.4447827543159882, "grad_norm": 1.0091444629616264, "learning_rate": 4.93420041690218e-06, "loss": 0.4544, "step": 7304 }, { "epoch": 0.4448436500928661, "grad_norm": 1.1017991407737997, "learning_rate": 4.934182228706875e-06, "loss": 0.4521, "step": 7305 }, { "epoch": 0.44490454586974393, "grad_norm": 0.9520079579142149, "learning_rate": 4.934164038031676e-06, "loss": 0.4144, "step": 7306 }, { "epoch": 0.44496544164662183, "grad_norm": 0.9853790608542587, "learning_rate": 4.934145844876601e-06, "loss": 0.4546, "step": 7307 }, { "epoch": 0.4450263374234997, "grad_norm": 0.9927983734101863, "learning_rate": 4.934127649241669e-06, "loss": 0.4899, "step": 7308 }, { "epoch": 0.4450872332003776, "grad_norm": 1.065237586880423, "learning_rate": 4.934109451126899e-06, "loss": 0.4616, "step": 7309 }, { "epoch": 0.4451481289772554, "grad_norm": 1.0599718971366985, "learning_rate": 4.934091250532308e-06, "loss": 0.4483, "step": 7310 }, { "epoch": 0.4452090247541333, "grad_norm": 1.038722053106346, "learning_rate": 4.934073047457915e-06, "loss": 0.4764, "step": 7311 }, { "epoch": 0.44526992053101117, "grad_norm": 1.034930102970806, "learning_rate": 4.934054841903741e-06, "loss": 0.496, "step": 7312 }, { "epoch": 0.44533081630788907, "grad_norm": 1.0263504429633639, "learning_rate": 4.934036633869801e-06, "loss": 0.4985, "step": 7313 }, { "epoch": 0.4453917120847669, "grad_norm": 0.954428480082317, "learning_rate": 4.934018423356116e-06, "loss": 0.4612, "step": 7314 }, { "epoch": 0.4454526078616448, "grad_norm": 1.0380811013425528, "learning_rate": 4.934000210362703e-06, "loss": 0.4657, "step": 7315 }, { "epoch": 0.44551350363852266, "grad_norm": 0.9495157565936104, "learning_rate": 4.933981994889581e-06, "loss": 0.4711, "step": 7316 }, { "epoch": 0.44557439941540056, "grad_norm": 1.0904241048974326, "learning_rate": 4.933963776936769e-06, "loss": 0.3848, "step": 7317 }, { "epoch": 0.4456352951922784, "grad_norm": 1.0373619497252407, "learning_rate": 4.933945556504285e-06, "loss": 0.4438, "step": 7318 }, { "epoch": 0.4456961909691563, "grad_norm": 1.0336632995772932, "learning_rate": 4.933927333592149e-06, "loss": 0.3951, "step": 7319 }, { "epoch": 0.44575708674603415, "grad_norm": 1.0799052088267815, "learning_rate": 4.933909108200377e-06, "loss": 0.4595, "step": 7320 }, { "epoch": 0.44581798252291205, "grad_norm": 1.0099203850808276, "learning_rate": 4.9338908803289896e-06, "loss": 0.4398, "step": 7321 }, { "epoch": 0.4458788782997899, "grad_norm": 1.1121078312599473, "learning_rate": 4.933872649978005e-06, "loss": 0.425, "step": 7322 }, { "epoch": 0.4459397740766678, "grad_norm": 1.0883079788901158, "learning_rate": 4.933854417147441e-06, "loss": 0.4651, "step": 7323 }, { "epoch": 0.44600066985354564, "grad_norm": 0.935454529114295, "learning_rate": 4.9338361818373174e-06, "loss": 0.4861, "step": 7324 }, { "epoch": 0.44606156563042354, "grad_norm": 1.0128105645785883, "learning_rate": 4.9338179440476515e-06, "loss": 0.4766, "step": 7325 }, { "epoch": 0.4461224614073014, "grad_norm": 0.9554429473832693, "learning_rate": 4.9337997037784635e-06, "loss": 0.4631, "step": 7326 }, { "epoch": 0.4461833571841793, "grad_norm": 1.0575443557323876, "learning_rate": 4.9337814610297695e-06, "loss": 0.4145, "step": 7327 }, { "epoch": 0.44624425296105713, "grad_norm": 0.9671778998589864, "learning_rate": 4.933763215801591e-06, "loss": 0.465, "step": 7328 }, { "epoch": 0.44630514873793503, "grad_norm": 1.0531062950625234, "learning_rate": 4.933744968093944e-06, "loss": 0.4208, "step": 7329 }, { "epoch": 0.4463660445148129, "grad_norm": 1.034118447337677, "learning_rate": 4.933726717906849e-06, "loss": 0.4414, "step": 7330 }, { "epoch": 0.4464269402916908, "grad_norm": 1.023714250189287, "learning_rate": 4.933708465240323e-06, "loss": 0.4582, "step": 7331 }, { "epoch": 0.4464878360685686, "grad_norm": 0.985682795812779, "learning_rate": 4.933690210094386e-06, "loss": 0.429, "step": 7332 }, { "epoch": 0.4465487318454465, "grad_norm": 0.9788396541696571, "learning_rate": 4.933671952469056e-06, "loss": 0.5453, "step": 7333 }, { "epoch": 0.44660962762232437, "grad_norm": 1.0665741088419052, "learning_rate": 4.933653692364352e-06, "loss": 0.4297, "step": 7334 }, { "epoch": 0.44667052339920227, "grad_norm": 1.0167996962054406, "learning_rate": 4.933635429780291e-06, "loss": 0.4956, "step": 7335 }, { "epoch": 0.4467314191760801, "grad_norm": 1.0956211682592765, "learning_rate": 4.933617164716894e-06, "loss": 0.3805, "step": 7336 }, { "epoch": 0.446792314952958, "grad_norm": 1.0044193106606811, "learning_rate": 4.933598897174178e-06, "loss": 0.4546, "step": 7337 }, { "epoch": 0.44685321072983586, "grad_norm": 0.8911059740857264, "learning_rate": 4.933580627152162e-06, "loss": 0.5478, "step": 7338 }, { "epoch": 0.44691410650671376, "grad_norm": 1.0516666623508266, "learning_rate": 4.9335623546508645e-06, "loss": 0.4319, "step": 7339 }, { "epoch": 0.44697500228359166, "grad_norm": 1.1005769286440323, "learning_rate": 4.933544079670304e-06, "loss": 0.4622, "step": 7340 }, { "epoch": 0.4470358980604695, "grad_norm": 1.0804731155690794, "learning_rate": 4.9335258022105e-06, "loss": 0.4825, "step": 7341 }, { "epoch": 0.4470967938373474, "grad_norm": 1.011506050620435, "learning_rate": 4.93350752227147e-06, "loss": 0.4792, "step": 7342 }, { "epoch": 0.44715768961422525, "grad_norm": 1.0197844438136592, "learning_rate": 4.933489239853234e-06, "loss": 0.4629, "step": 7343 }, { "epoch": 0.44721858539110315, "grad_norm": 1.06995840354818, "learning_rate": 4.933470954955809e-06, "loss": 0.491, "step": 7344 }, { "epoch": 0.447279481167981, "grad_norm": 1.0575762770118746, "learning_rate": 4.933452667579215e-06, "loss": 0.498, "step": 7345 }, { "epoch": 0.4473403769448589, "grad_norm": 1.0357950807121394, "learning_rate": 4.933434377723469e-06, "loss": 0.4427, "step": 7346 }, { "epoch": 0.44740127272173674, "grad_norm": 0.9570855681319796, "learning_rate": 4.9334160853885915e-06, "loss": 0.4354, "step": 7347 }, { "epoch": 0.44746216849861464, "grad_norm": 1.1157934181011848, "learning_rate": 4.9333977905746e-06, "loss": 0.4187, "step": 7348 }, { "epoch": 0.4475230642754925, "grad_norm": 0.9729921894865784, "learning_rate": 4.933379493281514e-06, "loss": 0.4602, "step": 7349 }, { "epoch": 0.4475839600523704, "grad_norm": 1.0449068895370273, "learning_rate": 4.933361193509351e-06, "loss": 0.3993, "step": 7350 }, { "epoch": 0.44764485582924823, "grad_norm": 1.0273846338227244, "learning_rate": 4.93334289125813e-06, "loss": 0.4268, "step": 7351 }, { "epoch": 0.44770575160612613, "grad_norm": 0.9884138865101164, "learning_rate": 4.93332458652787e-06, "loss": 0.4483, "step": 7352 }, { "epoch": 0.447766647383004, "grad_norm": 1.0281421547585625, "learning_rate": 4.93330627931859e-06, "loss": 0.444, "step": 7353 }, { "epoch": 0.4478275431598819, "grad_norm": 1.1575063893309854, "learning_rate": 4.9332879696303074e-06, "loss": 0.3997, "step": 7354 }, { "epoch": 0.4478884389367597, "grad_norm": 1.024448311510132, "learning_rate": 4.933269657463041e-06, "loss": 0.4614, "step": 7355 }, { "epoch": 0.4479493347136376, "grad_norm": 1.008793560689516, "learning_rate": 4.933251342816811e-06, "loss": 0.4771, "step": 7356 }, { "epoch": 0.44801023049051547, "grad_norm": 1.0806585620957594, "learning_rate": 4.933233025691636e-06, "loss": 0.4117, "step": 7357 }, { "epoch": 0.44807112626739337, "grad_norm": 1.0498729405285028, "learning_rate": 4.933214706087533e-06, "loss": 0.4597, "step": 7358 }, { "epoch": 0.4481320220442712, "grad_norm": 1.052800215888779, "learning_rate": 4.933196384004521e-06, "loss": 0.4693, "step": 7359 }, { "epoch": 0.4481929178211491, "grad_norm": 1.001559399962401, "learning_rate": 4.933178059442619e-06, "loss": 0.4292, "step": 7360 }, { "epoch": 0.44825381359802696, "grad_norm": 1.1343155857473608, "learning_rate": 4.9331597324018465e-06, "loss": 0.4597, "step": 7361 }, { "epoch": 0.44831470937490486, "grad_norm": 1.0199986625525408, "learning_rate": 4.933141402882221e-06, "loss": 0.489, "step": 7362 }, { "epoch": 0.4483756051517827, "grad_norm": 1.0554293629555378, "learning_rate": 4.9331230708837614e-06, "loss": 0.411, "step": 7363 }, { "epoch": 0.4484365009286606, "grad_norm": 0.9396617443827018, "learning_rate": 4.933104736406487e-06, "loss": 0.4759, "step": 7364 }, { "epoch": 0.44849739670553845, "grad_norm": 0.9522359679257205, "learning_rate": 4.933086399450417e-06, "loss": 0.4548, "step": 7365 }, { "epoch": 0.44855829248241635, "grad_norm": 0.9864468913911251, "learning_rate": 4.933068060015568e-06, "loss": 0.4564, "step": 7366 }, { "epoch": 0.4486191882592942, "grad_norm": 0.9759141361861251, "learning_rate": 4.93304971810196e-06, "loss": 0.4664, "step": 7367 }, { "epoch": 0.4486800840361721, "grad_norm": 1.0393496508651203, "learning_rate": 4.933031373709611e-06, "loss": 0.4052, "step": 7368 }, { "epoch": 0.44874097981304994, "grad_norm": 1.0948315726222801, "learning_rate": 4.933013026838542e-06, "loss": 0.4644, "step": 7369 }, { "epoch": 0.44880187558992785, "grad_norm": 0.9850671821355472, "learning_rate": 4.932994677488769e-06, "loss": 0.4178, "step": 7370 }, { "epoch": 0.4488627713668057, "grad_norm": 1.0140755182510848, "learning_rate": 4.932976325660311e-06, "loss": 0.4368, "step": 7371 }, { "epoch": 0.4489236671436836, "grad_norm": 0.9858879986588635, "learning_rate": 4.9329579713531875e-06, "loss": 0.3999, "step": 7372 }, { "epoch": 0.44898456292056144, "grad_norm": 1.0578891250550975, "learning_rate": 4.932939614567417e-06, "loss": 0.4774, "step": 7373 }, { "epoch": 0.44904545869743934, "grad_norm": 1.0497506030507293, "learning_rate": 4.932921255303018e-06, "loss": 0.4742, "step": 7374 }, { "epoch": 0.4491063544743172, "grad_norm": 0.9740334260044125, "learning_rate": 4.932902893560011e-06, "loss": 0.4694, "step": 7375 }, { "epoch": 0.4491672502511951, "grad_norm": 1.0023026894570104, "learning_rate": 4.932884529338411e-06, "loss": 0.469, "step": 7376 }, { "epoch": 0.4492281460280729, "grad_norm": 1.0883474015768375, "learning_rate": 4.93286616263824e-06, "loss": 0.5051, "step": 7377 }, { "epoch": 0.44928904180495083, "grad_norm": 0.984819282909335, "learning_rate": 4.932847793459515e-06, "loss": 0.5028, "step": 7378 }, { "epoch": 0.4493499375818287, "grad_norm": 0.9321292941557476, "learning_rate": 4.932829421802256e-06, "loss": 0.4777, "step": 7379 }, { "epoch": 0.4494108333587066, "grad_norm": 1.040894490390894, "learning_rate": 4.932811047666481e-06, "loss": 0.4315, "step": 7380 }, { "epoch": 0.4494717291355845, "grad_norm": 0.9587327130572632, "learning_rate": 4.9327926710522075e-06, "loss": 0.4477, "step": 7381 }, { "epoch": 0.4495326249124623, "grad_norm": 1.0072849984966425, "learning_rate": 4.932774291959456e-06, "loss": 0.4022, "step": 7382 }, { "epoch": 0.4495935206893402, "grad_norm": 0.9710007579410049, "learning_rate": 4.932755910388244e-06, "loss": 0.4978, "step": 7383 }, { "epoch": 0.44965441646621807, "grad_norm": 1.019490889778817, "learning_rate": 4.932737526338592e-06, "loss": 0.4681, "step": 7384 }, { "epoch": 0.44971531224309597, "grad_norm": 1.004426210696819, "learning_rate": 4.932719139810518e-06, "loss": 0.4405, "step": 7385 }, { "epoch": 0.4497762080199738, "grad_norm": 1.0103786058658966, "learning_rate": 4.932700750804039e-06, "loss": 0.4533, "step": 7386 }, { "epoch": 0.4498371037968517, "grad_norm": 1.032088413317692, "learning_rate": 4.932682359319175e-06, "loss": 0.4447, "step": 7387 }, { "epoch": 0.44989799957372956, "grad_norm": 0.9599091748829901, "learning_rate": 4.932663965355945e-06, "loss": 0.4709, "step": 7388 }, { "epoch": 0.44995889535060746, "grad_norm": 1.0541048425774984, "learning_rate": 4.932645568914368e-06, "loss": 0.4279, "step": 7389 }, { "epoch": 0.4500197911274853, "grad_norm": 0.953646309566318, "learning_rate": 4.932627169994462e-06, "loss": 0.4288, "step": 7390 }, { "epoch": 0.4500806869043632, "grad_norm": 0.990383763401407, "learning_rate": 4.9326087685962464e-06, "loss": 0.4812, "step": 7391 }, { "epoch": 0.45014158268124105, "grad_norm": 1.0816636635080314, "learning_rate": 4.932590364719739e-06, "loss": 0.4106, "step": 7392 }, { "epoch": 0.45020247845811895, "grad_norm": 1.0516646062964516, "learning_rate": 4.93257195836496e-06, "loss": 0.4602, "step": 7393 }, { "epoch": 0.4502633742349968, "grad_norm": 0.9922952796717079, "learning_rate": 4.9325535495319265e-06, "loss": 0.4789, "step": 7394 }, { "epoch": 0.4503242700118747, "grad_norm": 1.066287826136348, "learning_rate": 4.932535138220658e-06, "loss": 0.4563, "step": 7395 }, { "epoch": 0.45038516578875254, "grad_norm": 0.9866583131117493, "learning_rate": 4.9325167244311735e-06, "loss": 0.4569, "step": 7396 }, { "epoch": 0.45044606156563044, "grad_norm": 1.046049212308855, "learning_rate": 4.932498308163492e-06, "loss": 0.435, "step": 7397 }, { "epoch": 0.4505069573425083, "grad_norm": 1.0174775064348012, "learning_rate": 4.9324798894176304e-06, "loss": 0.4134, "step": 7398 }, { "epoch": 0.4505678531193862, "grad_norm": 0.9928422339472004, "learning_rate": 4.93246146819361e-06, "loss": 0.4897, "step": 7399 }, { "epoch": 0.45062874889626403, "grad_norm": 1.0225066656987338, "learning_rate": 4.932443044491449e-06, "loss": 0.438, "step": 7400 }, { "epoch": 0.45068964467314193, "grad_norm": 1.0333694042270587, "learning_rate": 4.932424618311164e-06, "loss": 0.4288, "step": 7401 }, { "epoch": 0.4507505404500198, "grad_norm": 1.044552232029265, "learning_rate": 4.932406189652776e-06, "loss": 0.4699, "step": 7402 }, { "epoch": 0.4508114362268977, "grad_norm": 1.1133610355051136, "learning_rate": 4.932387758516303e-06, "loss": 0.4369, "step": 7403 }, { "epoch": 0.4508723320037755, "grad_norm": 1.034803014701502, "learning_rate": 4.9323693249017645e-06, "loss": 0.5156, "step": 7404 }, { "epoch": 0.4509332277806534, "grad_norm": 0.958011165186229, "learning_rate": 4.932350888809178e-06, "loss": 0.4394, "step": 7405 }, { "epoch": 0.45099412355753127, "grad_norm": 1.0424422644344555, "learning_rate": 4.932332450238564e-06, "loss": 0.4388, "step": 7406 }, { "epoch": 0.45105501933440917, "grad_norm": 0.9361314976681343, "learning_rate": 4.932314009189939e-06, "loss": 0.496, "step": 7407 }, { "epoch": 0.451115915111287, "grad_norm": 1.0161443015567817, "learning_rate": 4.932295565663324e-06, "loss": 0.4873, "step": 7408 }, { "epoch": 0.4511768108881649, "grad_norm": 0.9881028396273039, "learning_rate": 4.9322771196587366e-06, "loss": 0.4673, "step": 7409 }, { "epoch": 0.45123770666504276, "grad_norm": 1.0295053595321386, "learning_rate": 4.9322586711761954e-06, "loss": 0.4377, "step": 7410 }, { "epoch": 0.45129860244192066, "grad_norm": 1.0196667018598555, "learning_rate": 4.93224022021572e-06, "loss": 0.4031, "step": 7411 }, { "epoch": 0.4513594982187985, "grad_norm": 1.1143769930354233, "learning_rate": 4.932221766777329e-06, "loss": 0.488, "step": 7412 }, { "epoch": 0.4514203939956764, "grad_norm": 0.9793706253849925, "learning_rate": 4.9322033108610415e-06, "loss": 0.4515, "step": 7413 }, { "epoch": 0.45148128977255425, "grad_norm": 0.9171379828777257, "learning_rate": 4.932184852466875e-06, "loss": 0.5312, "step": 7414 }, { "epoch": 0.45154218554943215, "grad_norm": 1.1238103757092197, "learning_rate": 4.932166391594849e-06, "loss": 0.3837, "step": 7415 }, { "epoch": 0.45160308132631, "grad_norm": 1.0453290304203857, "learning_rate": 4.932147928244983e-06, "loss": 0.4052, "step": 7416 }, { "epoch": 0.4516639771031879, "grad_norm": 1.0435206731007836, "learning_rate": 4.932129462417295e-06, "loss": 0.3995, "step": 7417 }, { "epoch": 0.45172487288006574, "grad_norm": 0.9692313671030741, "learning_rate": 4.932110994111804e-06, "loss": 0.4601, "step": 7418 }, { "epoch": 0.45178576865694364, "grad_norm": 1.0920226357206932, "learning_rate": 4.932092523328529e-06, "loss": 0.4426, "step": 7419 }, { "epoch": 0.4518466644338215, "grad_norm": 0.9827759654288231, "learning_rate": 4.932074050067489e-06, "loss": 0.4769, "step": 7420 }, { "epoch": 0.4519075602106994, "grad_norm": 0.9604123447303515, "learning_rate": 4.932055574328703e-06, "loss": 0.5136, "step": 7421 }, { "epoch": 0.4519684559875773, "grad_norm": 1.1061074096492636, "learning_rate": 4.932037096112188e-06, "loss": 0.4086, "step": 7422 }, { "epoch": 0.45202935176445513, "grad_norm": 0.9119312571408844, "learning_rate": 4.932018615417965e-06, "loss": 0.4664, "step": 7423 }, { "epoch": 0.45209024754133303, "grad_norm": 1.0956459662832594, "learning_rate": 4.932000132246052e-06, "loss": 0.4722, "step": 7424 }, { "epoch": 0.4521511433182109, "grad_norm": 0.9765489660554908, "learning_rate": 4.931981646596467e-06, "loss": 0.5148, "step": 7425 }, { "epoch": 0.4522120390950888, "grad_norm": 0.9482616089848922, "learning_rate": 4.93196315846923e-06, "loss": 0.501, "step": 7426 }, { "epoch": 0.4522729348719666, "grad_norm": 0.9219764286645625, "learning_rate": 4.93194466786436e-06, "loss": 0.5119, "step": 7427 }, { "epoch": 0.4523338306488445, "grad_norm": 0.8977244425429998, "learning_rate": 4.9319261747818745e-06, "loss": 0.5052, "step": 7428 }, { "epoch": 0.45239472642572237, "grad_norm": 1.0632710897612336, "learning_rate": 4.9319076792217945e-06, "loss": 0.5076, "step": 7429 }, { "epoch": 0.45245562220260027, "grad_norm": 1.0322116319178465, "learning_rate": 4.931889181184136e-06, "loss": 0.4614, "step": 7430 }, { "epoch": 0.4525165179794781, "grad_norm": 0.967841519970793, "learning_rate": 4.931870680668921e-06, "loss": 0.4332, "step": 7431 }, { "epoch": 0.452577413756356, "grad_norm": 1.0185679294404733, "learning_rate": 4.931852177676165e-06, "loss": 0.4894, "step": 7432 }, { "epoch": 0.45263830953323386, "grad_norm": 1.0243172122855078, "learning_rate": 4.93183367220589e-06, "loss": 0.4414, "step": 7433 }, { "epoch": 0.45269920531011176, "grad_norm": 1.0415089280291496, "learning_rate": 4.9318151642581124e-06, "loss": 0.474, "step": 7434 }, { "epoch": 0.4527601010869896, "grad_norm": 1.0532491504784331, "learning_rate": 4.931796653832852e-06, "loss": 0.433, "step": 7435 }, { "epoch": 0.4528209968638675, "grad_norm": 1.0852362332552357, "learning_rate": 4.931778140930129e-06, "loss": 0.5107, "step": 7436 }, { "epoch": 0.45288189264074535, "grad_norm": 1.1313698331925133, "learning_rate": 4.93175962554996e-06, "loss": 0.5146, "step": 7437 }, { "epoch": 0.45294278841762325, "grad_norm": 0.9112947023860014, "learning_rate": 4.931741107692365e-06, "loss": 0.4847, "step": 7438 }, { "epoch": 0.4530036841945011, "grad_norm": 1.0283730301138805, "learning_rate": 4.931722587357363e-06, "loss": 0.4956, "step": 7439 }, { "epoch": 0.453064579971379, "grad_norm": 0.9670402875596027, "learning_rate": 4.931704064544972e-06, "loss": 0.4187, "step": 7440 }, { "epoch": 0.45312547574825685, "grad_norm": 1.050040178071287, "learning_rate": 4.931685539255212e-06, "loss": 0.4466, "step": 7441 }, { "epoch": 0.45318637152513475, "grad_norm": 1.0079823213233787, "learning_rate": 4.9316670114881015e-06, "loss": 0.3866, "step": 7442 }, { "epoch": 0.4532472673020126, "grad_norm": 0.977790453270162, "learning_rate": 4.931648481243658e-06, "loss": 0.4157, "step": 7443 }, { "epoch": 0.4533081630788905, "grad_norm": 0.9531603672688395, "learning_rate": 4.931629948521903e-06, "loss": 0.5459, "step": 7444 }, { "epoch": 0.45336905885576834, "grad_norm": 0.9825180514462307, "learning_rate": 4.9316114133228535e-06, "loss": 0.4478, "step": 7445 }, { "epoch": 0.45342995463264624, "grad_norm": 1.057454661908373, "learning_rate": 4.931592875646528e-06, "loss": 0.3496, "step": 7446 }, { "epoch": 0.4534908504095241, "grad_norm": 1.001964031632285, "learning_rate": 4.931574335492947e-06, "loss": 0.4253, "step": 7447 }, { "epoch": 0.453551746186402, "grad_norm": 1.0576911617806861, "learning_rate": 4.93155579286213e-06, "loss": 0.5164, "step": 7448 }, { "epoch": 0.45361264196327983, "grad_norm": 1.117385416761931, "learning_rate": 4.9315372477540925e-06, "loss": 0.3975, "step": 7449 }, { "epoch": 0.45367353774015773, "grad_norm": 1.0122672243821254, "learning_rate": 4.931518700168856e-06, "loss": 0.4614, "step": 7450 }, { "epoch": 0.4537344335170356, "grad_norm": 1.045692578117085, "learning_rate": 4.931500150106438e-06, "loss": 0.513, "step": 7451 }, { "epoch": 0.4537953292939135, "grad_norm": 1.0322849987095075, "learning_rate": 4.931481597566859e-06, "loss": 0.3914, "step": 7452 }, { "epoch": 0.4538562250707913, "grad_norm": 1.0999710079983553, "learning_rate": 4.931463042550137e-06, "loss": 0.4317, "step": 7453 }, { "epoch": 0.4539171208476692, "grad_norm": 1.0173154975137277, "learning_rate": 4.9314444850562915e-06, "loss": 0.5106, "step": 7454 }, { "epoch": 0.45397801662454706, "grad_norm": 1.0924462945813784, "learning_rate": 4.931425925085341e-06, "loss": 0.3801, "step": 7455 }, { "epoch": 0.45403891240142497, "grad_norm": 0.9627186875424879, "learning_rate": 4.9314073626373035e-06, "loss": 0.4619, "step": 7456 }, { "epoch": 0.4540998081783028, "grad_norm": 1.0202912697824114, "learning_rate": 4.931388797712199e-06, "loss": 0.4955, "step": 7457 }, { "epoch": 0.4541607039551807, "grad_norm": 1.0384823720395626, "learning_rate": 4.931370230310046e-06, "loss": 0.4189, "step": 7458 }, { "epoch": 0.45422159973205856, "grad_norm": 1.1297759875148612, "learning_rate": 4.931351660430864e-06, "loss": 0.5056, "step": 7459 }, { "epoch": 0.45428249550893646, "grad_norm": 0.9786160384520766, "learning_rate": 4.931333088074671e-06, "loss": 0.4992, "step": 7460 }, { "epoch": 0.4543433912858143, "grad_norm": 1.0102342748821773, "learning_rate": 4.931314513241487e-06, "loss": 0.4572, "step": 7461 }, { "epoch": 0.4544042870626922, "grad_norm": 0.9803542341789964, "learning_rate": 4.9312959359313295e-06, "loss": 0.4224, "step": 7462 }, { "epoch": 0.4544651828395701, "grad_norm": 1.062361032753291, "learning_rate": 4.931277356144219e-06, "loss": 0.5363, "step": 7463 }, { "epoch": 0.45452607861644795, "grad_norm": 0.9520614847969547, "learning_rate": 4.931258773880173e-06, "loss": 0.4115, "step": 7464 }, { "epoch": 0.45458697439332585, "grad_norm": 1.0743819299433177, "learning_rate": 4.931240189139212e-06, "loss": 0.3898, "step": 7465 }, { "epoch": 0.4546478701702037, "grad_norm": 1.04325207532882, "learning_rate": 4.9312216019213535e-06, "loss": 0.4217, "step": 7466 }, { "epoch": 0.4547087659470816, "grad_norm": 1.0684224933553412, "learning_rate": 4.931203012226617e-06, "loss": 0.439, "step": 7467 }, { "epoch": 0.45476966172395944, "grad_norm": 0.9999332256504392, "learning_rate": 4.931184420055022e-06, "loss": 0.4613, "step": 7468 }, { "epoch": 0.45483055750083734, "grad_norm": 0.9560195846115628, "learning_rate": 4.9311658254065855e-06, "loss": 0.4641, "step": 7469 }, { "epoch": 0.4548914532777152, "grad_norm": 0.9593908031029864, "learning_rate": 4.931147228281329e-06, "loss": 0.443, "step": 7470 }, { "epoch": 0.4549523490545931, "grad_norm": 1.0649298515788572, "learning_rate": 4.931128628679269e-06, "loss": 0.4253, "step": 7471 }, { "epoch": 0.45501324483147093, "grad_norm": 1.0768283177028168, "learning_rate": 4.931110026600428e-06, "loss": 0.3643, "step": 7472 }, { "epoch": 0.45507414060834883, "grad_norm": 0.9892102656019351, "learning_rate": 4.9310914220448204e-06, "loss": 0.4667, "step": 7473 }, { "epoch": 0.4551350363852267, "grad_norm": 1.055975054613745, "learning_rate": 4.931072815012468e-06, "loss": 0.398, "step": 7474 }, { "epoch": 0.4551959321621046, "grad_norm": 0.9885264764814726, "learning_rate": 4.931054205503389e-06, "loss": 0.4848, "step": 7475 }, { "epoch": 0.4552568279389824, "grad_norm": 0.9233090031968517, "learning_rate": 4.931035593517604e-06, "loss": 0.4957, "step": 7476 }, { "epoch": 0.4553177237158603, "grad_norm": 1.1333029128908831, "learning_rate": 4.931016979055129e-06, "loss": 0.3885, "step": 7477 }, { "epoch": 0.45537861949273817, "grad_norm": 0.9874134045451549, "learning_rate": 4.930998362115985e-06, "loss": 0.4461, "step": 7478 }, { "epoch": 0.45543951526961607, "grad_norm": 1.0504876720381844, "learning_rate": 4.930979742700189e-06, "loss": 0.4515, "step": 7479 }, { "epoch": 0.4555004110464939, "grad_norm": 1.0180022271555333, "learning_rate": 4.930961120807763e-06, "loss": 0.4745, "step": 7480 }, { "epoch": 0.4555613068233718, "grad_norm": 1.0594802464631854, "learning_rate": 4.930942496438725e-06, "loss": 0.4482, "step": 7481 }, { "epoch": 0.45562220260024966, "grad_norm": 1.0491069043792614, "learning_rate": 4.930923869593091e-06, "loss": 0.4397, "step": 7482 }, { "epoch": 0.45568309837712756, "grad_norm": 1.1734696150189163, "learning_rate": 4.930905240270884e-06, "loss": 0.4539, "step": 7483 }, { "epoch": 0.4557439941540054, "grad_norm": 0.9836566056564968, "learning_rate": 4.930886608472121e-06, "loss": 0.5187, "step": 7484 }, { "epoch": 0.4558048899308833, "grad_norm": 0.9885284381198015, "learning_rate": 4.930867974196821e-06, "loss": 0.5666, "step": 7485 }, { "epoch": 0.45586578570776115, "grad_norm": 0.9818507192199726, "learning_rate": 4.930849337445003e-06, "loss": 0.4527, "step": 7486 }, { "epoch": 0.45592668148463905, "grad_norm": 0.9728924064122573, "learning_rate": 4.930830698216687e-06, "loss": 0.4337, "step": 7487 }, { "epoch": 0.4559875772615169, "grad_norm": 1.0033425341363154, "learning_rate": 4.930812056511891e-06, "loss": 0.5056, "step": 7488 }, { "epoch": 0.4560484730383948, "grad_norm": 0.936444447247266, "learning_rate": 4.930793412330633e-06, "loss": 0.5234, "step": 7489 }, { "epoch": 0.45610936881527264, "grad_norm": 1.1374258775125845, "learning_rate": 4.930774765672935e-06, "loss": 0.4548, "step": 7490 }, { "epoch": 0.45617026459215054, "grad_norm": 1.0031159258031455, "learning_rate": 4.930756116538814e-06, "loss": 0.4104, "step": 7491 }, { "epoch": 0.4562311603690284, "grad_norm": 0.9582834435979617, "learning_rate": 4.9307374649282876e-06, "loss": 0.4305, "step": 7492 }, { "epoch": 0.4562920561459063, "grad_norm": 0.9715925603774189, "learning_rate": 4.930718810841378e-06, "loss": 0.3838, "step": 7493 }, { "epoch": 0.45635295192278413, "grad_norm": 1.0539054527374427, "learning_rate": 4.930700154278102e-06, "loss": 0.3982, "step": 7494 }, { "epoch": 0.45641384769966203, "grad_norm": 0.9949069057557306, "learning_rate": 4.93068149523848e-06, "loss": 0.5215, "step": 7495 }, { "epoch": 0.4564747434765399, "grad_norm": 0.9943428022957613, "learning_rate": 4.930662833722529e-06, "loss": 0.3905, "step": 7496 }, { "epoch": 0.4565356392534178, "grad_norm": 1.132542891858728, "learning_rate": 4.93064416973027e-06, "loss": 0.5353, "step": 7497 }, { "epoch": 0.4565965350302956, "grad_norm": 0.8884026275770566, "learning_rate": 4.930625503261721e-06, "loss": 0.4737, "step": 7498 }, { "epoch": 0.4566574308071735, "grad_norm": 1.1078601586549588, "learning_rate": 4.930606834316901e-06, "loss": 0.464, "step": 7499 }, { "epoch": 0.45671832658405137, "grad_norm": 0.9734168930640096, "learning_rate": 4.9305881628958296e-06, "loss": 0.4716, "step": 7500 }, { "epoch": 0.45677922236092927, "grad_norm": 1.0921746107630095, "learning_rate": 4.930569488998526e-06, "loss": 0.4642, "step": 7501 }, { "epoch": 0.4568401181378071, "grad_norm": 1.0521411894396548, "learning_rate": 4.9305508126250075e-06, "loss": 0.4966, "step": 7502 }, { "epoch": 0.456901013914685, "grad_norm": 1.0380368809115907, "learning_rate": 4.930532133775295e-06, "loss": 0.4385, "step": 7503 }, { "epoch": 0.4569619096915629, "grad_norm": 1.0325931273850661, "learning_rate": 4.930513452449407e-06, "loss": 0.4297, "step": 7504 }, { "epoch": 0.45702280546844076, "grad_norm": 1.0622764401700204, "learning_rate": 4.930494768647363e-06, "loss": 0.4514, "step": 7505 }, { "epoch": 0.45708370124531866, "grad_norm": 0.9832442275245167, "learning_rate": 4.93047608236918e-06, "loss": 0.4627, "step": 7506 }, { "epoch": 0.4571445970221965, "grad_norm": 1.058689960736043, "learning_rate": 4.930457393614879e-06, "loss": 0.441, "step": 7507 }, { "epoch": 0.4572054927990744, "grad_norm": 0.9752313915726505, "learning_rate": 4.930438702384479e-06, "loss": 0.4501, "step": 7508 }, { "epoch": 0.45726638857595225, "grad_norm": 1.0314483096204048, "learning_rate": 4.930420008677999e-06, "loss": 0.3729, "step": 7509 }, { "epoch": 0.45732728435283015, "grad_norm": 1.0916503272328653, "learning_rate": 4.930401312495457e-06, "loss": 0.3906, "step": 7510 }, { "epoch": 0.457388180129708, "grad_norm": 0.9630200016680743, "learning_rate": 4.9303826138368726e-06, "loss": 0.4432, "step": 7511 }, { "epoch": 0.4574490759065859, "grad_norm": 1.0242004615436482, "learning_rate": 4.9303639127022655e-06, "loss": 0.4591, "step": 7512 }, { "epoch": 0.45750997168346375, "grad_norm": 1.0014878244502106, "learning_rate": 4.930345209091654e-06, "loss": 0.5157, "step": 7513 }, { "epoch": 0.45757086746034165, "grad_norm": 1.0521044078066697, "learning_rate": 4.930326503005057e-06, "loss": 0.4461, "step": 7514 }, { "epoch": 0.4576317632372195, "grad_norm": 1.042549987169014, "learning_rate": 4.9303077944424936e-06, "loss": 0.4541, "step": 7515 }, { "epoch": 0.4576926590140974, "grad_norm": 1.0316256782952804, "learning_rate": 4.930289083403984e-06, "loss": 0.4364, "step": 7516 }, { "epoch": 0.45775355479097524, "grad_norm": 0.9317699256616583, "learning_rate": 4.930270369889546e-06, "loss": 0.5461, "step": 7517 }, { "epoch": 0.45781445056785314, "grad_norm": 1.081722536214359, "learning_rate": 4.930251653899198e-06, "loss": 0.4996, "step": 7518 }, { "epoch": 0.457875346344731, "grad_norm": 0.9841456108840312, "learning_rate": 4.930232935432961e-06, "loss": 0.5144, "step": 7519 }, { "epoch": 0.4579362421216089, "grad_norm": 0.9855558336315361, "learning_rate": 4.930214214490854e-06, "loss": 0.4514, "step": 7520 }, { "epoch": 0.45799713789848673, "grad_norm": 1.075516890838076, "learning_rate": 4.9301954910728945e-06, "loss": 0.4113, "step": 7521 }, { "epoch": 0.45805803367536463, "grad_norm": 1.0163754333723383, "learning_rate": 4.930176765179103e-06, "loss": 0.5286, "step": 7522 }, { "epoch": 0.4581189294522425, "grad_norm": 0.9462027308074071, "learning_rate": 4.930158036809497e-06, "loss": 0.4974, "step": 7523 }, { "epoch": 0.4581798252291204, "grad_norm": 1.0379358895183994, "learning_rate": 4.930139305964097e-06, "loss": 0.3762, "step": 7524 }, { "epoch": 0.4582407210059982, "grad_norm": 1.0017991463997684, "learning_rate": 4.930120572642922e-06, "loss": 0.5435, "step": 7525 }, { "epoch": 0.4583016167828761, "grad_norm": 0.9084664066250825, "learning_rate": 4.930101836845989e-06, "loss": 0.4791, "step": 7526 }, { "epoch": 0.45836251255975397, "grad_norm": 1.1231264444624431, "learning_rate": 4.930083098573321e-06, "loss": 0.4402, "step": 7527 }, { "epoch": 0.45842340833663187, "grad_norm": 1.0453918044836152, "learning_rate": 4.9300643578249345e-06, "loss": 0.4928, "step": 7528 }, { "epoch": 0.4584843041135097, "grad_norm": 0.9772756452333127, "learning_rate": 4.930045614600848e-06, "loss": 0.48, "step": 7529 }, { "epoch": 0.4585451998903876, "grad_norm": 1.0424289519188832, "learning_rate": 4.930026868901082e-06, "loss": 0.3834, "step": 7530 }, { "epoch": 0.45860609566726546, "grad_norm": 1.0546189956073915, "learning_rate": 4.930008120725655e-06, "loss": 0.4572, "step": 7531 }, { "epoch": 0.45866699144414336, "grad_norm": 1.0503025611051562, "learning_rate": 4.929989370074586e-06, "loss": 0.4158, "step": 7532 }, { "epoch": 0.4587278872210212, "grad_norm": 1.0168427918556346, "learning_rate": 4.929970616947895e-06, "loss": 0.5061, "step": 7533 }, { "epoch": 0.4587887829978991, "grad_norm": 1.1095434703367997, "learning_rate": 4.9299518613456e-06, "loss": 0.3918, "step": 7534 }, { "epoch": 0.45884967877477695, "grad_norm": 1.2100016338819057, "learning_rate": 4.92993310326772e-06, "loss": 0.4674, "step": 7535 }, { "epoch": 0.45891057455165485, "grad_norm": 1.0597040902343557, "learning_rate": 4.9299143427142755e-06, "loss": 0.4364, "step": 7536 }, { "epoch": 0.4589714703285327, "grad_norm": 1.005481209558817, "learning_rate": 4.9298955796852846e-06, "loss": 0.4347, "step": 7537 }, { "epoch": 0.4590323661054106, "grad_norm": 1.0551208181648255, "learning_rate": 4.929876814180767e-06, "loss": 0.4534, "step": 7538 }, { "epoch": 0.45909326188228844, "grad_norm": 1.001615128724044, "learning_rate": 4.9298580462007405e-06, "loss": 0.4149, "step": 7539 }, { "epoch": 0.45915415765916634, "grad_norm": 0.9213864998245573, "learning_rate": 4.9298392757452264e-06, "loss": 0.4411, "step": 7540 }, { "epoch": 0.4592150534360442, "grad_norm": 0.9986711055397143, "learning_rate": 4.929820502814242e-06, "loss": 0.4809, "step": 7541 }, { "epoch": 0.4592759492129221, "grad_norm": 1.1508463533482525, "learning_rate": 4.929801727407806e-06, "loss": 0.3933, "step": 7542 }, { "epoch": 0.45933684498979993, "grad_norm": 0.9751892808769443, "learning_rate": 4.929782949525939e-06, "loss": 0.4488, "step": 7543 }, { "epoch": 0.45939774076667783, "grad_norm": 1.026229353312782, "learning_rate": 4.92976416916866e-06, "loss": 0.3533, "step": 7544 }, { "epoch": 0.45945863654355573, "grad_norm": 0.9889928925079071, "learning_rate": 4.929745386335989e-06, "loss": 0.43, "step": 7545 }, { "epoch": 0.4595195323204336, "grad_norm": 0.9465705149328636, "learning_rate": 4.929726601027942e-06, "loss": 0.4665, "step": 7546 }, { "epoch": 0.4595804280973115, "grad_norm": 1.0659465063617921, "learning_rate": 4.929707813244541e-06, "loss": 0.4881, "step": 7547 }, { "epoch": 0.4596413238741893, "grad_norm": 1.0159503366589955, "learning_rate": 4.929689022985804e-06, "loss": 0.504, "step": 7548 }, { "epoch": 0.4597022196510672, "grad_norm": 1.064733707484081, "learning_rate": 4.929670230251749e-06, "loss": 0.4358, "step": 7549 }, { "epoch": 0.45976311542794507, "grad_norm": 1.1028307437732447, "learning_rate": 4.929651435042399e-06, "loss": 0.4905, "step": 7550 }, { "epoch": 0.45982401120482297, "grad_norm": 1.0022974802994955, "learning_rate": 4.929632637357769e-06, "loss": 0.4857, "step": 7551 }, { "epoch": 0.4598849069817008, "grad_norm": 1.0726958745313238, "learning_rate": 4.92961383719788e-06, "loss": 0.4775, "step": 7552 }, { "epoch": 0.4599458027585787, "grad_norm": 0.9027288811766855, "learning_rate": 4.9295950345627515e-06, "loss": 0.4907, "step": 7553 }, { "epoch": 0.46000669853545656, "grad_norm": 1.0499487469923832, "learning_rate": 4.929576229452402e-06, "loss": 0.433, "step": 7554 }, { "epoch": 0.46006759431233446, "grad_norm": 1.091029088690275, "learning_rate": 4.92955742186685e-06, "loss": 0.4296, "step": 7555 }, { "epoch": 0.4601284900892123, "grad_norm": 0.9995079711512255, "learning_rate": 4.9295386118061166e-06, "loss": 0.5194, "step": 7556 }, { "epoch": 0.4601893858660902, "grad_norm": 1.009060149415835, "learning_rate": 4.929519799270218e-06, "loss": 0.4091, "step": 7557 }, { "epoch": 0.46025028164296805, "grad_norm": 0.9136975817981404, "learning_rate": 4.929500984259177e-06, "loss": 0.4338, "step": 7558 }, { "epoch": 0.46031117741984595, "grad_norm": 1.0826718735467324, "learning_rate": 4.92948216677301e-06, "loss": 0.4252, "step": 7559 }, { "epoch": 0.4603720731967238, "grad_norm": 0.9604009851000546, "learning_rate": 4.929463346811737e-06, "loss": 0.499, "step": 7560 }, { "epoch": 0.4604329689736017, "grad_norm": 0.9813974724640105, "learning_rate": 4.929444524375378e-06, "loss": 0.4392, "step": 7561 }, { "epoch": 0.46049386475047954, "grad_norm": 1.063118380434815, "learning_rate": 4.92942569946395e-06, "loss": 0.3963, "step": 7562 }, { "epoch": 0.46055476052735744, "grad_norm": 1.07046367103653, "learning_rate": 4.929406872077475e-06, "loss": 0.5129, "step": 7563 }, { "epoch": 0.4606156563042353, "grad_norm": 1.086895600592622, "learning_rate": 4.92938804221597e-06, "loss": 0.4056, "step": 7564 }, { "epoch": 0.4606765520811132, "grad_norm": 1.0139457428090042, "learning_rate": 4.929369209879455e-06, "loss": 0.5269, "step": 7565 }, { "epoch": 0.46073744785799103, "grad_norm": 0.9702604452889886, "learning_rate": 4.92935037506795e-06, "loss": 0.4919, "step": 7566 }, { "epoch": 0.46079834363486893, "grad_norm": 1.002226044609161, "learning_rate": 4.9293315377814725e-06, "loss": 0.5121, "step": 7567 }, { "epoch": 0.4608592394117468, "grad_norm": 1.0095442036299604, "learning_rate": 4.929312698020042e-06, "loss": 0.4493, "step": 7568 }, { "epoch": 0.4609201351886247, "grad_norm": 0.9902589957371549, "learning_rate": 4.929293855783679e-06, "loss": 0.4065, "step": 7569 }, { "epoch": 0.4609810309655025, "grad_norm": 1.0753203389561983, "learning_rate": 4.929275011072402e-06, "loss": 0.3933, "step": 7570 }, { "epoch": 0.4610419267423804, "grad_norm": 1.0495891809228035, "learning_rate": 4.92925616388623e-06, "loss": 0.483, "step": 7571 }, { "epoch": 0.46110282251925827, "grad_norm": 0.9734117552229143, "learning_rate": 4.929237314225183e-06, "loss": 0.4319, "step": 7572 }, { "epoch": 0.46116371829613617, "grad_norm": 1.079311241081342, "learning_rate": 4.9292184620892786e-06, "loss": 0.3836, "step": 7573 }, { "epoch": 0.461224614073014, "grad_norm": 0.9409308280817809, "learning_rate": 4.929199607478537e-06, "loss": 0.5032, "step": 7574 }, { "epoch": 0.4612855098498919, "grad_norm": 1.0099444043043042, "learning_rate": 4.9291807503929776e-06, "loss": 0.4243, "step": 7575 }, { "epoch": 0.46134640562676976, "grad_norm": 1.0210989065420248, "learning_rate": 4.92916189083262e-06, "loss": 0.4747, "step": 7576 }, { "epoch": 0.46140730140364766, "grad_norm": 1.0916214177184014, "learning_rate": 4.929143028797482e-06, "loss": 0.4182, "step": 7577 }, { "epoch": 0.4614681971805255, "grad_norm": 0.9855177019503384, "learning_rate": 4.929124164287583e-06, "loss": 0.4252, "step": 7578 }, { "epoch": 0.4615290929574034, "grad_norm": 1.0521792637314435, "learning_rate": 4.929105297302944e-06, "loss": 0.435, "step": 7579 }, { "epoch": 0.46158998873428125, "grad_norm": 1.0669719071455088, "learning_rate": 4.929086427843582e-06, "loss": 0.4312, "step": 7580 }, { "epoch": 0.46165088451115915, "grad_norm": 1.041478762960134, "learning_rate": 4.929067555909518e-06, "loss": 0.432, "step": 7581 }, { "epoch": 0.461711780288037, "grad_norm": 1.1269842749148524, "learning_rate": 4.92904868150077e-06, "loss": 0.4099, "step": 7582 }, { "epoch": 0.4617726760649149, "grad_norm": 0.9919979384020785, "learning_rate": 4.929029804617358e-06, "loss": 0.4825, "step": 7583 }, { "epoch": 0.46183357184179274, "grad_norm": 1.1441739722244624, "learning_rate": 4.929010925259301e-06, "loss": 0.4338, "step": 7584 }, { "epoch": 0.46189446761867065, "grad_norm": 1.0084421410109006, "learning_rate": 4.928992043426618e-06, "loss": 0.4043, "step": 7585 }, { "epoch": 0.46195536339554855, "grad_norm": 0.9952459549276826, "learning_rate": 4.928973159119329e-06, "loss": 0.465, "step": 7586 }, { "epoch": 0.4620162591724264, "grad_norm": 1.0610623597593538, "learning_rate": 4.928954272337452e-06, "loss": 0.4116, "step": 7587 }, { "epoch": 0.4620771549493043, "grad_norm": 1.022572383288637, "learning_rate": 4.928935383081006e-06, "loss": 0.4482, "step": 7588 }, { "epoch": 0.46213805072618214, "grad_norm": 0.9876568878205371, "learning_rate": 4.928916491350013e-06, "loss": 0.4329, "step": 7589 }, { "epoch": 0.46219894650306004, "grad_norm": 1.0641490164862422, "learning_rate": 4.9288975971444894e-06, "loss": 0.502, "step": 7590 }, { "epoch": 0.4622598422799379, "grad_norm": 1.1784223183164226, "learning_rate": 4.928878700464455e-06, "loss": 0.4455, "step": 7591 }, { "epoch": 0.4623207380568158, "grad_norm": 1.0820989709108406, "learning_rate": 4.92885980130993e-06, "loss": 0.4191, "step": 7592 }, { "epoch": 0.46238163383369363, "grad_norm": 0.9310370197444274, "learning_rate": 4.9288408996809336e-06, "loss": 0.3919, "step": 7593 }, { "epoch": 0.46244252961057153, "grad_norm": 0.9157950556278837, "learning_rate": 4.928821995577484e-06, "loss": 0.4315, "step": 7594 }, { "epoch": 0.4625034253874494, "grad_norm": 0.9975233751837584, "learning_rate": 4.928803088999601e-06, "loss": 0.489, "step": 7595 }, { "epoch": 0.4625643211643273, "grad_norm": 0.9878521013166861, "learning_rate": 4.928784179947304e-06, "loss": 0.4533, "step": 7596 }, { "epoch": 0.4626252169412051, "grad_norm": 1.0894898551273833, "learning_rate": 4.928765268420612e-06, "loss": 0.5034, "step": 7597 }, { "epoch": 0.462686112718083, "grad_norm": 0.9652295741886595, "learning_rate": 4.928746354419545e-06, "loss": 0.4752, "step": 7598 }, { "epoch": 0.46274700849496087, "grad_norm": 1.1210953404120552, "learning_rate": 4.928727437944121e-06, "loss": 0.4572, "step": 7599 }, { "epoch": 0.46280790427183877, "grad_norm": 0.9735352425991124, "learning_rate": 4.9287085189943605e-06, "loss": 0.3999, "step": 7600 }, { "epoch": 0.4628688000487166, "grad_norm": 1.0989988935035802, "learning_rate": 4.928689597570282e-06, "loss": 0.4177, "step": 7601 }, { "epoch": 0.4629296958255945, "grad_norm": 0.9810649435574759, "learning_rate": 4.928670673671905e-06, "loss": 0.4834, "step": 7602 }, { "epoch": 0.46299059160247236, "grad_norm": 1.1010718065408838, "learning_rate": 4.928651747299249e-06, "loss": 0.3738, "step": 7603 }, { "epoch": 0.46305148737935026, "grad_norm": 1.1191444521343479, "learning_rate": 4.928632818452333e-06, "loss": 0.4334, "step": 7604 }, { "epoch": 0.4631123831562281, "grad_norm": 1.0705104233656986, "learning_rate": 4.928613887131177e-06, "loss": 0.4947, "step": 7605 }, { "epoch": 0.463173278933106, "grad_norm": 1.0019640845066566, "learning_rate": 4.928594953335799e-06, "loss": 0.4177, "step": 7606 }, { "epoch": 0.46323417470998385, "grad_norm": 1.1232584033667814, "learning_rate": 4.928576017066219e-06, "loss": 0.4671, "step": 7607 }, { "epoch": 0.46329507048686175, "grad_norm": 1.0469750915513945, "learning_rate": 4.928557078322456e-06, "loss": 0.4169, "step": 7608 }, { "epoch": 0.4633559662637396, "grad_norm": 1.0667094113904936, "learning_rate": 4.928538137104531e-06, "loss": 0.4514, "step": 7609 }, { "epoch": 0.4634168620406175, "grad_norm": 1.0185845055250407, "learning_rate": 4.92851919341246e-06, "loss": 0.4763, "step": 7610 }, { "epoch": 0.46347775781749534, "grad_norm": 1.0163390101870884, "learning_rate": 4.928500247246265e-06, "loss": 0.4885, "step": 7611 }, { "epoch": 0.46353865359437324, "grad_norm": 1.000844439630944, "learning_rate": 4.928481298605964e-06, "loss": 0.499, "step": 7612 }, { "epoch": 0.4635995493712511, "grad_norm": 0.9347967910835702, "learning_rate": 4.9284623474915775e-06, "loss": 0.4572, "step": 7613 }, { "epoch": 0.463660445148129, "grad_norm": 1.162218432974772, "learning_rate": 4.928443393903123e-06, "loss": 0.3837, "step": 7614 }, { "epoch": 0.46372134092500683, "grad_norm": 0.950265645036933, "learning_rate": 4.928424437840621e-06, "loss": 0.5138, "step": 7615 }, { "epoch": 0.46378223670188473, "grad_norm": 1.0622084155210925, "learning_rate": 4.928405479304092e-06, "loss": 0.3995, "step": 7616 }, { "epoch": 0.4638431324787626, "grad_norm": 0.978875377452531, "learning_rate": 4.9283865182935525e-06, "loss": 0.4737, "step": 7617 }, { "epoch": 0.4639040282556405, "grad_norm": 1.0044154964693548, "learning_rate": 4.928367554809024e-06, "loss": 0.411, "step": 7618 }, { "epoch": 0.4639649240325183, "grad_norm": 1.0953620104459298, "learning_rate": 4.928348588850525e-06, "loss": 0.432, "step": 7619 }, { "epoch": 0.4640258198093962, "grad_norm": 0.9916408321402969, "learning_rate": 4.9283296204180745e-06, "loss": 0.4389, "step": 7620 }, { "epoch": 0.46408671558627407, "grad_norm": 0.9056091409857854, "learning_rate": 4.9283106495116925e-06, "loss": 0.6078, "step": 7621 }, { "epoch": 0.46414761136315197, "grad_norm": 1.01892139871481, "learning_rate": 4.928291676131398e-06, "loss": 0.5061, "step": 7622 }, { "epoch": 0.4642085071400298, "grad_norm": 1.0450195982574246, "learning_rate": 4.928272700277211e-06, "loss": 0.4001, "step": 7623 }, { "epoch": 0.4642694029169077, "grad_norm": 0.9437841633790067, "learning_rate": 4.9282537219491504e-06, "loss": 0.4521, "step": 7624 }, { "epoch": 0.46433029869378556, "grad_norm": 1.1311030365603758, "learning_rate": 4.928234741147236e-06, "loss": 0.4161, "step": 7625 }, { "epoch": 0.46439119447066346, "grad_norm": 1.0367859273636353, "learning_rate": 4.928215757871484e-06, "loss": 0.4314, "step": 7626 }, { "epoch": 0.46445209024754136, "grad_norm": 1.088875636571779, "learning_rate": 4.928196772121918e-06, "loss": 0.4296, "step": 7627 }, { "epoch": 0.4645129860244192, "grad_norm": 0.9750620581644552, "learning_rate": 4.9281777838985555e-06, "loss": 0.4804, "step": 7628 }, { "epoch": 0.4645738818012971, "grad_norm": 1.2127363814913221, "learning_rate": 4.928158793201416e-06, "loss": 0.4695, "step": 7629 }, { "epoch": 0.46463477757817495, "grad_norm": 1.0291111438993001, "learning_rate": 4.9281398000305184e-06, "loss": 0.4278, "step": 7630 }, { "epoch": 0.46469567335505285, "grad_norm": 0.9958459690235238, "learning_rate": 4.928120804385883e-06, "loss": 0.4542, "step": 7631 }, { "epoch": 0.4647565691319307, "grad_norm": 1.0617515484327118, "learning_rate": 4.928101806267528e-06, "loss": 0.4504, "step": 7632 }, { "epoch": 0.4648174649088086, "grad_norm": 0.8938063506599627, "learning_rate": 4.928082805675474e-06, "loss": 0.4782, "step": 7633 }, { "epoch": 0.46487836068568644, "grad_norm": 1.0376757294131043, "learning_rate": 4.928063802609739e-06, "loss": 0.4083, "step": 7634 }, { "epoch": 0.46493925646256434, "grad_norm": 1.0361247554989692, "learning_rate": 4.928044797070343e-06, "loss": 0.409, "step": 7635 }, { "epoch": 0.4650001522394422, "grad_norm": 1.0296844075198965, "learning_rate": 4.928025789057307e-06, "loss": 0.4084, "step": 7636 }, { "epoch": 0.4650610480163201, "grad_norm": 0.9827526782293275, "learning_rate": 4.928006778570648e-06, "loss": 0.4348, "step": 7637 }, { "epoch": 0.46512194379319793, "grad_norm": 1.0025267208509367, "learning_rate": 4.927987765610385e-06, "loss": 0.4899, "step": 7638 }, { "epoch": 0.46518283957007583, "grad_norm": 1.1348953214451187, "learning_rate": 4.927968750176539e-06, "loss": 0.4442, "step": 7639 }, { "epoch": 0.4652437353469537, "grad_norm": 1.0620135017751582, "learning_rate": 4.92794973226913e-06, "loss": 0.4245, "step": 7640 }, { "epoch": 0.4653046311238316, "grad_norm": 0.9752909043449395, "learning_rate": 4.927930711888176e-06, "loss": 0.4163, "step": 7641 }, { "epoch": 0.4653655269007094, "grad_norm": 0.9893467950217454, "learning_rate": 4.927911689033696e-06, "loss": 0.4523, "step": 7642 }, { "epoch": 0.4654264226775873, "grad_norm": 0.9484197625549362, "learning_rate": 4.92789266370571e-06, "loss": 0.4133, "step": 7643 }, { "epoch": 0.46548731845446517, "grad_norm": 1.0912158110834504, "learning_rate": 4.927873635904238e-06, "loss": 0.4157, "step": 7644 }, { "epoch": 0.46554821423134307, "grad_norm": 0.9783675976642013, "learning_rate": 4.927854605629298e-06, "loss": 0.5595, "step": 7645 }, { "epoch": 0.4656091100082209, "grad_norm": 1.0453667302348142, "learning_rate": 4.927835572880911e-06, "loss": 0.4233, "step": 7646 }, { "epoch": 0.4656700057850988, "grad_norm": 1.0463317975892357, "learning_rate": 4.927816537659096e-06, "loss": 0.4264, "step": 7647 }, { "epoch": 0.46573090156197666, "grad_norm": 1.0362478179955625, "learning_rate": 4.927797499963871e-06, "loss": 0.391, "step": 7648 }, { "epoch": 0.46579179733885456, "grad_norm": 1.1584774111363845, "learning_rate": 4.927778459795257e-06, "loss": 0.4581, "step": 7649 }, { "epoch": 0.4658526931157324, "grad_norm": 1.0322993677950245, "learning_rate": 4.9277594171532724e-06, "loss": 0.4274, "step": 7650 }, { "epoch": 0.4659135888926103, "grad_norm": 0.9380571129069819, "learning_rate": 4.9277403720379375e-06, "loss": 0.422, "step": 7651 }, { "epoch": 0.46597448466948815, "grad_norm": 1.0561504981801202, "learning_rate": 4.927721324449271e-06, "loss": 0.4032, "step": 7652 }, { "epoch": 0.46603538044636605, "grad_norm": 1.0264559572876004, "learning_rate": 4.927702274387291e-06, "loss": 0.4815, "step": 7653 }, { "epoch": 0.4660962762232439, "grad_norm": 1.0897660760935657, "learning_rate": 4.9276832218520206e-06, "loss": 0.3849, "step": 7654 }, { "epoch": 0.4661571720001218, "grad_norm": 1.0144308599617984, "learning_rate": 4.927664166843476e-06, "loss": 0.4325, "step": 7655 }, { "epoch": 0.46621806777699965, "grad_norm": 1.0680879975034205, "learning_rate": 4.927645109361678e-06, "loss": 0.4681, "step": 7656 }, { "epoch": 0.46627896355387755, "grad_norm": 1.0643753542929504, "learning_rate": 4.927626049406645e-06, "loss": 0.4791, "step": 7657 }, { "epoch": 0.4663398593307554, "grad_norm": 1.041630881783114, "learning_rate": 4.927606986978397e-06, "loss": 0.4575, "step": 7658 }, { "epoch": 0.4664007551076333, "grad_norm": 0.9490433529765393, "learning_rate": 4.927587922076955e-06, "loss": 0.4392, "step": 7659 }, { "epoch": 0.46646165088451114, "grad_norm": 1.0892995676588761, "learning_rate": 4.927568854702335e-06, "loss": 0.4078, "step": 7660 }, { "epoch": 0.46652254666138904, "grad_norm": 1.0835261110195402, "learning_rate": 4.92754978485456e-06, "loss": 0.4069, "step": 7661 }, { "epoch": 0.4665834424382669, "grad_norm": 1.0466853767161652, "learning_rate": 4.927530712533646e-06, "loss": 0.3695, "step": 7662 }, { "epoch": 0.4666443382151448, "grad_norm": 1.161681575239277, "learning_rate": 4.927511637739615e-06, "loss": 0.3776, "step": 7663 }, { "epoch": 0.46670523399202263, "grad_norm": 0.9614058024255653, "learning_rate": 4.927492560472485e-06, "loss": 0.474, "step": 7664 }, { "epoch": 0.46676612976890053, "grad_norm": 1.0030665227967253, "learning_rate": 4.927473480732278e-06, "loss": 0.4642, "step": 7665 }, { "epoch": 0.4668270255457784, "grad_norm": 1.0909636455942067, "learning_rate": 4.92745439851901e-06, "loss": 0.4355, "step": 7666 }, { "epoch": 0.4668879213226563, "grad_norm": 0.9828793290015077, "learning_rate": 4.927435313832702e-06, "loss": 0.4861, "step": 7667 }, { "epoch": 0.4669488170995342, "grad_norm": 0.9645753765687112, "learning_rate": 4.927416226673373e-06, "loss": 0.5025, "step": 7668 }, { "epoch": 0.467009712876412, "grad_norm": 1.0001272006325554, "learning_rate": 4.9273971370410435e-06, "loss": 0.4013, "step": 7669 }, { "epoch": 0.4670706086532899, "grad_norm": 1.0598383538279728, "learning_rate": 4.9273780449357326e-06, "loss": 0.4149, "step": 7670 }, { "epoch": 0.46713150443016777, "grad_norm": 0.9705183100673982, "learning_rate": 4.927358950357458e-06, "loss": 0.4911, "step": 7671 }, { "epoch": 0.46719240020704567, "grad_norm": 0.9481058903755106, "learning_rate": 4.927339853306242e-06, "loss": 0.4494, "step": 7672 }, { "epoch": 0.4672532959839235, "grad_norm": 1.0534525574238431, "learning_rate": 4.927320753782102e-06, "loss": 0.3668, "step": 7673 }, { "epoch": 0.4673141917608014, "grad_norm": 0.9119961102369724, "learning_rate": 4.927301651785058e-06, "loss": 0.4719, "step": 7674 }, { "epoch": 0.46737508753767926, "grad_norm": 1.0035231314059332, "learning_rate": 4.92728254731513e-06, "loss": 0.443, "step": 7675 }, { "epoch": 0.46743598331455716, "grad_norm": 0.9597924529555051, "learning_rate": 4.927263440372336e-06, "loss": 0.4735, "step": 7676 }, { "epoch": 0.467496879091435, "grad_norm": 1.0289945849676323, "learning_rate": 4.927244330956697e-06, "loss": 0.45, "step": 7677 }, { "epoch": 0.4675577748683129, "grad_norm": 1.1087502836640106, "learning_rate": 4.927225219068232e-06, "loss": 0.461, "step": 7678 }, { "epoch": 0.46761867064519075, "grad_norm": 0.9601914447040139, "learning_rate": 4.927206104706961e-06, "loss": 0.4925, "step": 7679 }, { "epoch": 0.46767956642206865, "grad_norm": 1.0415008268147596, "learning_rate": 4.9271869878729025e-06, "loss": 0.4777, "step": 7680 }, { "epoch": 0.4677404621989465, "grad_norm": 0.9774495266855375, "learning_rate": 4.927167868566076e-06, "loss": 0.4352, "step": 7681 }, { "epoch": 0.4678013579758244, "grad_norm": 0.9954536929571373, "learning_rate": 4.927148746786502e-06, "loss": 0.433, "step": 7682 }, { "epoch": 0.46786225375270224, "grad_norm": 1.0383783750403672, "learning_rate": 4.927129622534199e-06, "loss": 0.4341, "step": 7683 }, { "epoch": 0.46792314952958014, "grad_norm": 0.9623783943662301, "learning_rate": 4.927110495809186e-06, "loss": 0.4313, "step": 7684 }, { "epoch": 0.467984045306458, "grad_norm": 0.9626287332961333, "learning_rate": 4.927091366611484e-06, "loss": 0.4678, "step": 7685 }, { "epoch": 0.4680449410833359, "grad_norm": 0.9463294696084522, "learning_rate": 4.9270722349411126e-06, "loss": 0.4759, "step": 7686 }, { "epoch": 0.46810583686021373, "grad_norm": 0.9912633668094067, "learning_rate": 4.927053100798089e-06, "loss": 0.4768, "step": 7687 }, { "epoch": 0.46816673263709163, "grad_norm": 1.094998328358252, "learning_rate": 4.927033964182435e-06, "loss": 0.4405, "step": 7688 }, { "epoch": 0.4682276284139695, "grad_norm": 1.0351095714685652, "learning_rate": 4.927014825094169e-06, "loss": 0.412, "step": 7689 }, { "epoch": 0.4682885241908474, "grad_norm": 0.9522854295881967, "learning_rate": 4.926995683533311e-06, "loss": 0.4874, "step": 7690 }, { "epoch": 0.4683494199677252, "grad_norm": 1.0528573951797966, "learning_rate": 4.92697653949988e-06, "loss": 0.4949, "step": 7691 }, { "epoch": 0.4684103157446031, "grad_norm": 0.9915743604893201, "learning_rate": 4.926957392993896e-06, "loss": 0.4997, "step": 7692 }, { "epoch": 0.46847121152148097, "grad_norm": 1.010807176759435, "learning_rate": 4.9269382440153775e-06, "loss": 0.4521, "step": 7693 }, { "epoch": 0.46853210729835887, "grad_norm": 1.0180361052187372, "learning_rate": 4.926919092564346e-06, "loss": 0.4433, "step": 7694 }, { "epoch": 0.4685930030752367, "grad_norm": 1.0754533858727, "learning_rate": 4.926899938640819e-06, "loss": 0.3937, "step": 7695 }, { "epoch": 0.4686538988521146, "grad_norm": 1.0563565926871157, "learning_rate": 4.926880782244817e-06, "loss": 0.4895, "step": 7696 }, { "epoch": 0.46871479462899246, "grad_norm": 0.9576807171010912, "learning_rate": 4.926861623376359e-06, "loss": 0.505, "step": 7697 }, { "epoch": 0.46877569040587036, "grad_norm": 1.0309468632104022, "learning_rate": 4.926842462035465e-06, "loss": 0.403, "step": 7698 }, { "epoch": 0.4688365861827482, "grad_norm": 0.9482427636023683, "learning_rate": 4.926823298222155e-06, "loss": 0.4676, "step": 7699 }, { "epoch": 0.4688974819596261, "grad_norm": 0.9661802435079537, "learning_rate": 4.9268041319364464e-06, "loss": 0.44, "step": 7700 }, { "epoch": 0.46895837773650395, "grad_norm": 1.0036840594080843, "learning_rate": 4.926784963178361e-06, "loss": 0.5273, "step": 7701 }, { "epoch": 0.46901927351338185, "grad_norm": 1.0503811653194584, "learning_rate": 4.926765791947918e-06, "loss": 0.4265, "step": 7702 }, { "epoch": 0.4690801692902597, "grad_norm": 1.0128933333243577, "learning_rate": 4.9267466182451365e-06, "loss": 0.4256, "step": 7703 }, { "epoch": 0.4691410650671376, "grad_norm": 1.0034273654911956, "learning_rate": 4.926727442070036e-06, "loss": 0.4633, "step": 7704 }, { "epoch": 0.46920196084401544, "grad_norm": 0.9788428313812036, "learning_rate": 4.926708263422635e-06, "loss": 0.4663, "step": 7705 }, { "epoch": 0.46926285662089334, "grad_norm": 1.040998371945736, "learning_rate": 4.9266890823029555e-06, "loss": 0.4415, "step": 7706 }, { "epoch": 0.4693237523977712, "grad_norm": 1.0293363235921387, "learning_rate": 4.926669898711014e-06, "loss": 0.4858, "step": 7707 }, { "epoch": 0.4693846481746491, "grad_norm": 1.0175238090395105, "learning_rate": 4.926650712646833e-06, "loss": 0.3608, "step": 7708 }, { "epoch": 0.469445543951527, "grad_norm": 1.0622884548575686, "learning_rate": 4.92663152411043e-06, "loss": 0.439, "step": 7709 }, { "epoch": 0.46950643972840483, "grad_norm": 0.9466761591449251, "learning_rate": 4.926612333101825e-06, "loss": 0.4369, "step": 7710 }, { "epoch": 0.46956733550528273, "grad_norm": 1.049271839896156, "learning_rate": 4.92659313962104e-06, "loss": 0.4313, "step": 7711 }, { "epoch": 0.4696282312821606, "grad_norm": 1.0284185742092373, "learning_rate": 4.92657394366809e-06, "loss": 0.4061, "step": 7712 }, { "epoch": 0.4696891270590385, "grad_norm": 1.0507145401824207, "learning_rate": 4.926554745242998e-06, "loss": 0.4237, "step": 7713 }, { "epoch": 0.4697500228359163, "grad_norm": 1.0201326925088912, "learning_rate": 4.926535544345781e-06, "loss": 0.4936, "step": 7714 }, { "epoch": 0.4698109186127942, "grad_norm": 1.133385676535608, "learning_rate": 4.926516340976462e-06, "loss": 0.4436, "step": 7715 }, { "epoch": 0.46987181438967207, "grad_norm": 1.0529822116228675, "learning_rate": 4.926497135135057e-06, "loss": 0.4274, "step": 7716 }, { "epoch": 0.46993271016654997, "grad_norm": 1.1169899381801027, "learning_rate": 4.926477926821588e-06, "loss": 0.5441, "step": 7717 }, { "epoch": 0.4699936059434278, "grad_norm": 1.0129149760901277, "learning_rate": 4.926458716036073e-06, "loss": 0.4685, "step": 7718 }, { "epoch": 0.4700545017203057, "grad_norm": 0.9646583623965019, "learning_rate": 4.926439502778534e-06, "loss": 0.4521, "step": 7719 }, { "epoch": 0.47011539749718356, "grad_norm": 1.1466650027470413, "learning_rate": 4.926420287048988e-06, "loss": 0.4758, "step": 7720 }, { "epoch": 0.47017629327406146, "grad_norm": 1.0151899693450965, "learning_rate": 4.926401068847455e-06, "loss": 0.4368, "step": 7721 }, { "epoch": 0.4702371890509393, "grad_norm": 0.9726014318216967, "learning_rate": 4.9263818481739545e-06, "loss": 0.4926, "step": 7722 }, { "epoch": 0.4702980848278172, "grad_norm": 1.021411817995389, "learning_rate": 4.926362625028508e-06, "loss": 0.4757, "step": 7723 }, { "epoch": 0.47035898060469505, "grad_norm": 1.0254683664811686, "learning_rate": 4.926343399411133e-06, "loss": 0.4694, "step": 7724 }, { "epoch": 0.47041987638157295, "grad_norm": 1.0354215358283432, "learning_rate": 4.92632417132185e-06, "loss": 0.4122, "step": 7725 }, { "epoch": 0.4704807721584508, "grad_norm": 0.9443556291955643, "learning_rate": 4.926304940760677e-06, "loss": 0.4332, "step": 7726 }, { "epoch": 0.4705416679353287, "grad_norm": 0.9652615479526111, "learning_rate": 4.926285707727637e-06, "loss": 0.4397, "step": 7727 }, { "epoch": 0.47060256371220655, "grad_norm": 1.031656062840175, "learning_rate": 4.926266472222747e-06, "loss": 0.5471, "step": 7728 }, { "epoch": 0.47066345948908445, "grad_norm": 0.9836783348151777, "learning_rate": 4.926247234246026e-06, "loss": 0.4411, "step": 7729 }, { "epoch": 0.4707243552659623, "grad_norm": 1.1320949952925659, "learning_rate": 4.9262279937974965e-06, "loss": 0.3705, "step": 7730 }, { "epoch": 0.4707852510428402, "grad_norm": 0.9627823226980137, "learning_rate": 4.926208750877176e-06, "loss": 0.4423, "step": 7731 }, { "epoch": 0.47084614681971804, "grad_norm": 1.1372338024307416, "learning_rate": 4.926189505485084e-06, "loss": 0.4021, "step": 7732 }, { "epoch": 0.47090704259659594, "grad_norm": 0.9572550362731801, "learning_rate": 4.92617025762124e-06, "loss": 0.4488, "step": 7733 }, { "epoch": 0.4709679383734738, "grad_norm": 1.1560003747873562, "learning_rate": 4.926151007285665e-06, "loss": 0.4598, "step": 7734 }, { "epoch": 0.4710288341503517, "grad_norm": 1.0360169995901114, "learning_rate": 4.926131754478378e-06, "loss": 0.4783, "step": 7735 }, { "epoch": 0.47108972992722953, "grad_norm": 1.119948431459298, "learning_rate": 4.926112499199397e-06, "loss": 0.3681, "step": 7736 }, { "epoch": 0.47115062570410743, "grad_norm": 1.006681334954362, "learning_rate": 4.926093241448744e-06, "loss": 0.4279, "step": 7737 }, { "epoch": 0.4712115214809853, "grad_norm": 0.9687544473564933, "learning_rate": 4.926073981226438e-06, "loss": 0.4839, "step": 7738 }, { "epoch": 0.4712724172578632, "grad_norm": 0.992418803740307, "learning_rate": 4.926054718532498e-06, "loss": 0.4579, "step": 7739 }, { "epoch": 0.471333313034741, "grad_norm": 0.9822768862577064, "learning_rate": 4.9260354533669445e-06, "loss": 0.4906, "step": 7740 }, { "epoch": 0.4713942088116189, "grad_norm": 1.0150658394682686, "learning_rate": 4.926016185729795e-06, "loss": 0.4437, "step": 7741 }, { "epoch": 0.47145510458849677, "grad_norm": 0.9837951078577479, "learning_rate": 4.9259969156210715e-06, "loss": 0.4485, "step": 7742 }, { "epoch": 0.47151600036537467, "grad_norm": 1.0639008860255574, "learning_rate": 4.925977643040793e-06, "loss": 0.4487, "step": 7743 }, { "epoch": 0.4715768961422525, "grad_norm": 0.9526577567380115, "learning_rate": 4.925958367988979e-06, "loss": 0.5094, "step": 7744 }, { "epoch": 0.4716377919191304, "grad_norm": 1.0462308286694084, "learning_rate": 4.925939090465649e-06, "loss": 0.4386, "step": 7745 }, { "epoch": 0.47169868769600826, "grad_norm": 0.893477644574072, "learning_rate": 4.925919810470822e-06, "loss": 0.5628, "step": 7746 }, { "epoch": 0.47175958347288616, "grad_norm": 0.9515201982012085, "learning_rate": 4.925900528004519e-06, "loss": 0.525, "step": 7747 }, { "epoch": 0.471820479249764, "grad_norm": 1.0717786029547074, "learning_rate": 4.9258812430667584e-06, "loss": 0.4573, "step": 7748 }, { "epoch": 0.4718813750266419, "grad_norm": 1.115333596998544, "learning_rate": 4.925861955657561e-06, "loss": 0.3795, "step": 7749 }, { "epoch": 0.4719422708035198, "grad_norm": 0.9870915216516557, "learning_rate": 4.925842665776946e-06, "loss": 0.4354, "step": 7750 }, { "epoch": 0.47200316658039765, "grad_norm": 1.0777863389604139, "learning_rate": 4.925823373424932e-06, "loss": 0.4354, "step": 7751 }, { "epoch": 0.47206406235727555, "grad_norm": 1.0393061721189427, "learning_rate": 4.92580407860154e-06, "loss": 0.4248, "step": 7752 }, { "epoch": 0.4721249581341534, "grad_norm": 0.949481627016758, "learning_rate": 4.9257847813067895e-06, "loss": 0.5284, "step": 7753 }, { "epoch": 0.4721858539110313, "grad_norm": 1.0403917593860008, "learning_rate": 4.9257654815407e-06, "loss": 0.4602, "step": 7754 }, { "epoch": 0.47224674968790914, "grad_norm": 1.0734734112865407, "learning_rate": 4.92574617930329e-06, "loss": 0.4274, "step": 7755 }, { "epoch": 0.47230764546478704, "grad_norm": 0.9362964867235002, "learning_rate": 4.9257268745945815e-06, "loss": 0.4444, "step": 7756 }, { "epoch": 0.4723685412416649, "grad_norm": 1.0171912785952155, "learning_rate": 4.925707567414592e-06, "loss": 0.5178, "step": 7757 }, { "epoch": 0.4724294370185428, "grad_norm": 0.9670750519677896, "learning_rate": 4.925688257763344e-06, "loss": 0.5288, "step": 7758 }, { "epoch": 0.47249033279542063, "grad_norm": 1.0180554882550479, "learning_rate": 4.925668945640854e-06, "loss": 0.4256, "step": 7759 }, { "epoch": 0.47255122857229853, "grad_norm": 0.9689095889624965, "learning_rate": 4.925649631047142e-06, "loss": 0.4728, "step": 7760 }, { "epoch": 0.4726121243491764, "grad_norm": 1.0857811287994044, "learning_rate": 4.92563031398223e-06, "loss": 0.4719, "step": 7761 }, { "epoch": 0.4726730201260543, "grad_norm": 1.0120909227529948, "learning_rate": 4.925610994446135e-06, "loss": 0.4004, "step": 7762 }, { "epoch": 0.4727339159029321, "grad_norm": 0.9966617597265285, "learning_rate": 4.9255916724388784e-06, "loss": 0.4588, "step": 7763 }, { "epoch": 0.47279481167981, "grad_norm": 0.9624118166109759, "learning_rate": 4.925572347960479e-06, "loss": 0.512, "step": 7764 }, { "epoch": 0.47285570745668787, "grad_norm": 0.9001970005886655, "learning_rate": 4.925553021010958e-06, "loss": 0.4627, "step": 7765 }, { "epoch": 0.47291660323356577, "grad_norm": 1.0737295209833155, "learning_rate": 4.925533691590333e-06, "loss": 0.4736, "step": 7766 }, { "epoch": 0.4729774990104436, "grad_norm": 1.089498915002385, "learning_rate": 4.925514359698626e-06, "loss": 0.4196, "step": 7767 }, { "epoch": 0.4730383947873215, "grad_norm": 0.9892429052431857, "learning_rate": 4.925495025335853e-06, "loss": 0.5289, "step": 7768 }, { "epoch": 0.47309929056419936, "grad_norm": 1.0684740194268285, "learning_rate": 4.9254756885020386e-06, "loss": 0.3718, "step": 7769 }, { "epoch": 0.47316018634107726, "grad_norm": 0.993038851819892, "learning_rate": 4.925456349197198e-06, "loss": 0.4426, "step": 7770 }, { "epoch": 0.4732210821179551, "grad_norm": 1.0115755435156408, "learning_rate": 4.925437007421354e-06, "loss": 0.5306, "step": 7771 }, { "epoch": 0.473281977894833, "grad_norm": 1.026962188252694, "learning_rate": 4.925417663174526e-06, "loss": 0.5068, "step": 7772 }, { "epoch": 0.47334287367171085, "grad_norm": 1.0069287075586042, "learning_rate": 4.925398316456732e-06, "loss": 0.4971, "step": 7773 }, { "epoch": 0.47340376944858875, "grad_norm": 1.0470998399547213, "learning_rate": 4.925378967267993e-06, "loss": 0.428, "step": 7774 }, { "epoch": 0.4734646652254666, "grad_norm": 0.9500340542869317, "learning_rate": 4.925359615608328e-06, "loss": 0.4229, "step": 7775 }, { "epoch": 0.4735255610023445, "grad_norm": 1.010664310597724, "learning_rate": 4.9253402614777566e-06, "loss": 0.4055, "step": 7776 }, { "epoch": 0.47358645677922234, "grad_norm": 0.9208917385519862, "learning_rate": 4.925320904876299e-06, "loss": 0.4092, "step": 7777 }, { "epoch": 0.47364735255610024, "grad_norm": 1.0155265562757188, "learning_rate": 4.925301545803976e-06, "loss": 0.4575, "step": 7778 }, { "epoch": 0.4737082483329781, "grad_norm": 1.04914739105807, "learning_rate": 4.9252821842608055e-06, "loss": 0.4249, "step": 7779 }, { "epoch": 0.473769144109856, "grad_norm": 0.9429747632709398, "learning_rate": 4.925262820246808e-06, "loss": 0.4494, "step": 7780 }, { "epoch": 0.47383003988673383, "grad_norm": 1.1349626237761383, "learning_rate": 4.925243453762003e-06, "loss": 0.4192, "step": 7781 }, { "epoch": 0.47389093566361173, "grad_norm": 1.0329739891357557, "learning_rate": 4.9252240848064105e-06, "loss": 0.4187, "step": 7782 }, { "epoch": 0.4739518314404896, "grad_norm": 1.068266495376303, "learning_rate": 4.92520471338005e-06, "loss": 0.4552, "step": 7783 }, { "epoch": 0.4740127272173675, "grad_norm": 0.9801043271400582, "learning_rate": 4.925185339482942e-06, "loss": 0.4789, "step": 7784 }, { "epoch": 0.4740736229942453, "grad_norm": 1.0228860236485937, "learning_rate": 4.925165963115104e-06, "loss": 0.5032, "step": 7785 }, { "epoch": 0.4741345187711232, "grad_norm": 0.9503651870575994, "learning_rate": 4.925146584276559e-06, "loss": 0.4757, "step": 7786 }, { "epoch": 0.47419541454800107, "grad_norm": 0.9743408872452328, "learning_rate": 4.925127202967325e-06, "loss": 0.4682, "step": 7787 }, { "epoch": 0.47425631032487897, "grad_norm": 0.9759899180080719, "learning_rate": 4.925107819187421e-06, "loss": 0.3926, "step": 7788 }, { "epoch": 0.4743172061017568, "grad_norm": 0.9716325107241462, "learning_rate": 4.925088432936868e-06, "loss": 0.4965, "step": 7789 }, { "epoch": 0.4743781018786347, "grad_norm": 1.0856680063408937, "learning_rate": 4.925069044215685e-06, "loss": 0.4215, "step": 7790 }, { "epoch": 0.4744389976555126, "grad_norm": 1.0149887712881516, "learning_rate": 4.925049653023892e-06, "loss": 0.4195, "step": 7791 }, { "epoch": 0.47449989343239046, "grad_norm": 1.0476744258508803, "learning_rate": 4.9250302593615095e-06, "loss": 0.4361, "step": 7792 }, { "epoch": 0.47456078920926836, "grad_norm": 0.9994981610750653, "learning_rate": 4.925010863228556e-06, "loss": 0.4294, "step": 7793 }, { "epoch": 0.4746216849861462, "grad_norm": 1.066529338140254, "learning_rate": 4.924991464625052e-06, "loss": 0.4569, "step": 7794 }, { "epoch": 0.4746825807630241, "grad_norm": 0.929313357118677, "learning_rate": 4.924972063551017e-06, "loss": 0.4796, "step": 7795 }, { "epoch": 0.47474347653990195, "grad_norm": 0.9629881553772892, "learning_rate": 4.924952660006471e-06, "loss": 0.4406, "step": 7796 }, { "epoch": 0.47480437231677985, "grad_norm": 1.0091275201765249, "learning_rate": 4.924933253991434e-06, "loss": 0.4462, "step": 7797 }, { "epoch": 0.4748652680936577, "grad_norm": 0.9765013792836544, "learning_rate": 4.924913845505925e-06, "loss": 0.4217, "step": 7798 }, { "epoch": 0.4749261638705356, "grad_norm": 1.053575559951427, "learning_rate": 4.924894434549964e-06, "loss": 0.4386, "step": 7799 }, { "epoch": 0.47498705964741345, "grad_norm": 0.9587020906790237, "learning_rate": 4.924875021123572e-06, "loss": 0.463, "step": 7800 }, { "epoch": 0.47504795542429135, "grad_norm": 0.9102038153747581, "learning_rate": 4.9248556052267675e-06, "loss": 0.5019, "step": 7801 }, { "epoch": 0.4751088512011692, "grad_norm": 1.1049008472912878, "learning_rate": 4.9248361868595695e-06, "loss": 0.3609, "step": 7802 }, { "epoch": 0.4751697469780471, "grad_norm": 1.043541981583164, "learning_rate": 4.924816766021999e-06, "loss": 0.4939, "step": 7803 }, { "epoch": 0.47523064275492494, "grad_norm": 0.9225950429123436, "learning_rate": 4.9247973427140765e-06, "loss": 0.457, "step": 7804 }, { "epoch": 0.47529153853180284, "grad_norm": 1.079885270712922, "learning_rate": 4.92477791693582e-06, "loss": 0.4096, "step": 7805 }, { "epoch": 0.4753524343086807, "grad_norm": 1.0095968486999618, "learning_rate": 4.92475848868725e-06, "loss": 0.4824, "step": 7806 }, { "epoch": 0.4754133300855586, "grad_norm": 0.9874366680328446, "learning_rate": 4.9247390579683875e-06, "loss": 0.528, "step": 7807 }, { "epoch": 0.47547422586243643, "grad_norm": 1.081383162255722, "learning_rate": 4.9247196247792505e-06, "loss": 0.4856, "step": 7808 }, { "epoch": 0.47553512163931433, "grad_norm": 1.0731889420457679, "learning_rate": 4.92470018911986e-06, "loss": 0.4167, "step": 7809 }, { "epoch": 0.4755960174161922, "grad_norm": 0.9982155939879239, "learning_rate": 4.924680750990235e-06, "loss": 0.5294, "step": 7810 }, { "epoch": 0.4756569131930701, "grad_norm": 0.9746235069099861, "learning_rate": 4.924661310390396e-06, "loss": 0.4508, "step": 7811 }, { "epoch": 0.4757178089699479, "grad_norm": 0.9908791459392685, "learning_rate": 4.924641867320362e-06, "loss": 0.438, "step": 7812 }, { "epoch": 0.4757787047468258, "grad_norm": 0.9561970839451989, "learning_rate": 4.924622421780153e-06, "loss": 0.4612, "step": 7813 }, { "epoch": 0.47583960052370367, "grad_norm": 0.9665653687865994, "learning_rate": 4.924602973769789e-06, "loss": 0.4991, "step": 7814 }, { "epoch": 0.47590049630058157, "grad_norm": 0.989005236095111, "learning_rate": 4.924583523289291e-06, "loss": 0.5615, "step": 7815 }, { "epoch": 0.4759613920774594, "grad_norm": 0.9682715211191737, "learning_rate": 4.924564070338677e-06, "loss": 0.4157, "step": 7816 }, { "epoch": 0.4760222878543373, "grad_norm": 1.0883486848145243, "learning_rate": 4.924544614917969e-06, "loss": 0.337, "step": 7817 }, { "epoch": 0.47608318363121516, "grad_norm": 1.011174711334035, "learning_rate": 4.9245251570271835e-06, "loss": 0.4857, "step": 7818 }, { "epoch": 0.47614407940809306, "grad_norm": 0.9394792611853726, "learning_rate": 4.9245056966663425e-06, "loss": 0.4412, "step": 7819 }, { "epoch": 0.4762049751849709, "grad_norm": 0.9770173386997126, "learning_rate": 4.924486233835466e-06, "loss": 0.4379, "step": 7820 }, { "epoch": 0.4762658709618488, "grad_norm": 0.9982410460307748, "learning_rate": 4.924466768534574e-06, "loss": 0.5312, "step": 7821 }, { "epoch": 0.47632676673872665, "grad_norm": 1.1834540640760616, "learning_rate": 4.924447300763684e-06, "loss": 0.3715, "step": 7822 }, { "epoch": 0.47638766251560455, "grad_norm": 1.0331772957957757, "learning_rate": 4.9244278305228186e-06, "loss": 0.4121, "step": 7823 }, { "epoch": 0.4764485582924824, "grad_norm": 1.0147257890600725, "learning_rate": 4.924408357811996e-06, "loss": 0.446, "step": 7824 }, { "epoch": 0.4765094540693603, "grad_norm": 0.9452575635936044, "learning_rate": 4.924388882631237e-06, "loss": 0.5244, "step": 7825 }, { "epoch": 0.47657034984623814, "grad_norm": 1.0066106727172548, "learning_rate": 4.924369404980561e-06, "loss": 0.456, "step": 7826 }, { "epoch": 0.47663124562311604, "grad_norm": 1.1051012229939703, "learning_rate": 4.924349924859987e-06, "loss": 0.476, "step": 7827 }, { "epoch": 0.4766921413999939, "grad_norm": 1.0648419925777408, "learning_rate": 4.9243304422695374e-06, "loss": 0.4327, "step": 7828 }, { "epoch": 0.4767530371768718, "grad_norm": 1.0142298930498597, "learning_rate": 4.924310957209228e-06, "loss": 0.4278, "step": 7829 }, { "epoch": 0.47681393295374963, "grad_norm": 1.0532335206180257, "learning_rate": 4.924291469679083e-06, "loss": 0.408, "step": 7830 }, { "epoch": 0.47687482873062753, "grad_norm": 0.9756109566293438, "learning_rate": 4.924271979679119e-06, "loss": 0.5007, "step": 7831 }, { "epoch": 0.47693572450750543, "grad_norm": 1.012016040774282, "learning_rate": 4.9242524872093575e-06, "loss": 0.4141, "step": 7832 }, { "epoch": 0.4769966202843833, "grad_norm": 1.068312841801762, "learning_rate": 4.924232992269819e-06, "loss": 0.4693, "step": 7833 }, { "epoch": 0.4770575160612612, "grad_norm": 0.9828228611873799, "learning_rate": 4.924213494860521e-06, "loss": 0.4872, "step": 7834 }, { "epoch": 0.477118411838139, "grad_norm": 1.0955454204726225, "learning_rate": 4.924193994981485e-06, "loss": 0.4367, "step": 7835 }, { "epoch": 0.4771793076150169, "grad_norm": 1.0058604196175607, "learning_rate": 4.9241744926327315e-06, "loss": 0.4601, "step": 7836 }, { "epoch": 0.47724020339189477, "grad_norm": 1.0756495307780478, "learning_rate": 4.924154987814278e-06, "loss": 0.4524, "step": 7837 }, { "epoch": 0.47730109916877267, "grad_norm": 1.1146701611586034, "learning_rate": 4.924135480526146e-06, "loss": 0.5111, "step": 7838 }, { "epoch": 0.4773619949456505, "grad_norm": 1.025221167196256, "learning_rate": 4.924115970768356e-06, "loss": 0.5002, "step": 7839 }, { "epoch": 0.4774228907225284, "grad_norm": 0.9647559158481427, "learning_rate": 4.924096458540927e-06, "loss": 0.4874, "step": 7840 }, { "epoch": 0.47748378649940626, "grad_norm": 1.035138945727463, "learning_rate": 4.9240769438438785e-06, "loss": 0.5147, "step": 7841 }, { "epoch": 0.47754468227628416, "grad_norm": 1.1406159559528635, "learning_rate": 4.924057426677231e-06, "loss": 0.4346, "step": 7842 }, { "epoch": 0.477605578053162, "grad_norm": 1.0013573823825817, "learning_rate": 4.924037907041004e-06, "loss": 0.4629, "step": 7843 }, { "epoch": 0.4776664738300399, "grad_norm": 1.103614890509314, "learning_rate": 4.924018384935217e-06, "loss": 0.4007, "step": 7844 }, { "epoch": 0.47772736960691775, "grad_norm": 1.0356693897839127, "learning_rate": 4.923998860359892e-06, "loss": 0.4751, "step": 7845 }, { "epoch": 0.47778826538379565, "grad_norm": 0.901229651308117, "learning_rate": 4.923979333315046e-06, "loss": 0.4984, "step": 7846 }, { "epoch": 0.4778491611606735, "grad_norm": 0.9739306183076951, "learning_rate": 4.923959803800701e-06, "loss": 0.5115, "step": 7847 }, { "epoch": 0.4779100569375514, "grad_norm": 0.9514378991340509, "learning_rate": 4.923940271816876e-06, "loss": 0.4879, "step": 7848 }, { "epoch": 0.47797095271442924, "grad_norm": 0.9488148363461857, "learning_rate": 4.923920737363591e-06, "loss": 0.4686, "step": 7849 }, { "epoch": 0.47803184849130714, "grad_norm": 1.0033302033389198, "learning_rate": 4.923901200440866e-06, "loss": 0.4578, "step": 7850 }, { "epoch": 0.478092744268185, "grad_norm": 1.049315169645408, "learning_rate": 4.92388166104872e-06, "loss": 0.4004, "step": 7851 }, { "epoch": 0.4781536400450629, "grad_norm": 1.0706657059044082, "learning_rate": 4.923862119187175e-06, "loss": 0.4567, "step": 7852 }, { "epoch": 0.47821453582194073, "grad_norm": 1.0886684377502163, "learning_rate": 4.923842574856249e-06, "loss": 0.425, "step": 7853 }, { "epoch": 0.47827543159881863, "grad_norm": 1.0527286758684709, "learning_rate": 4.923823028055963e-06, "loss": 0.4584, "step": 7854 }, { "epoch": 0.4783363273756965, "grad_norm": 1.1751317684616565, "learning_rate": 4.923803478786336e-06, "loss": 0.4502, "step": 7855 }, { "epoch": 0.4783972231525744, "grad_norm": 0.9912939882228421, "learning_rate": 4.923783927047389e-06, "loss": 0.469, "step": 7856 }, { "epoch": 0.4784581189294522, "grad_norm": 0.9864251930832775, "learning_rate": 4.92376437283914e-06, "loss": 0.449, "step": 7857 }, { "epoch": 0.4785190147063301, "grad_norm": 0.9569558581210431, "learning_rate": 4.923744816161612e-06, "loss": 0.4898, "step": 7858 }, { "epoch": 0.47857991048320797, "grad_norm": 1.028970603476688, "learning_rate": 4.923725257014822e-06, "loss": 0.4894, "step": 7859 }, { "epoch": 0.47864080626008587, "grad_norm": 1.0625748050054888, "learning_rate": 4.9237056953987916e-06, "loss": 0.4678, "step": 7860 }, { "epoch": 0.4787017020369637, "grad_norm": 1.0678047291285526, "learning_rate": 4.92368613131354e-06, "loss": 0.3885, "step": 7861 }, { "epoch": 0.4787625978138416, "grad_norm": 1.0585342749505084, "learning_rate": 4.9236665647590874e-06, "loss": 0.4159, "step": 7862 }, { "epoch": 0.47882349359071946, "grad_norm": 1.065197045642679, "learning_rate": 4.923646995735453e-06, "loss": 0.4147, "step": 7863 }, { "epoch": 0.47888438936759736, "grad_norm": 0.9960969524117911, "learning_rate": 4.923627424242659e-06, "loss": 0.4179, "step": 7864 }, { "epoch": 0.4789452851444752, "grad_norm": 1.1464200660436568, "learning_rate": 4.923607850280723e-06, "loss": 0.4558, "step": 7865 }, { "epoch": 0.4790061809213531, "grad_norm": 1.09375029063638, "learning_rate": 4.923588273849665e-06, "loss": 0.4398, "step": 7866 }, { "epoch": 0.47906707669823095, "grad_norm": 1.040824827206896, "learning_rate": 4.923568694949507e-06, "loss": 0.4103, "step": 7867 }, { "epoch": 0.47912797247510885, "grad_norm": 0.998775706236044, "learning_rate": 4.923549113580267e-06, "loss": 0.4311, "step": 7868 }, { "epoch": 0.4791888682519867, "grad_norm": 1.1314448509269035, "learning_rate": 4.923529529741965e-06, "loss": 0.4146, "step": 7869 }, { "epoch": 0.4792497640288646, "grad_norm": 0.9432483194717016, "learning_rate": 4.9235099434346225e-06, "loss": 0.5085, "step": 7870 }, { "epoch": 0.47931065980574245, "grad_norm": 0.9863752728617716, "learning_rate": 4.923490354658258e-06, "loss": 0.5114, "step": 7871 }, { "epoch": 0.47937155558262035, "grad_norm": 1.0558458624143452, "learning_rate": 4.923470763412892e-06, "loss": 0.4175, "step": 7872 }, { "epoch": 0.47943245135949825, "grad_norm": 1.0294227874959367, "learning_rate": 4.923451169698544e-06, "loss": 0.4768, "step": 7873 }, { "epoch": 0.4794933471363761, "grad_norm": 1.0390912647741328, "learning_rate": 4.923431573515234e-06, "loss": 0.4202, "step": 7874 }, { "epoch": 0.479554242913254, "grad_norm": 0.9756542426658389, "learning_rate": 4.923411974862983e-06, "loss": 0.4772, "step": 7875 }, { "epoch": 0.47961513869013184, "grad_norm": 1.0077304300998033, "learning_rate": 4.92339237374181e-06, "loss": 0.4435, "step": 7876 }, { "epoch": 0.47967603446700974, "grad_norm": 1.0563469741227343, "learning_rate": 4.923372770151735e-06, "loss": 0.4201, "step": 7877 }, { "epoch": 0.4797369302438876, "grad_norm": 1.0220057016281017, "learning_rate": 4.923353164092779e-06, "loss": 0.4656, "step": 7878 }, { "epoch": 0.4797978260207655, "grad_norm": 1.012410455358937, "learning_rate": 4.923333555564961e-06, "loss": 0.3816, "step": 7879 }, { "epoch": 0.47985872179764333, "grad_norm": 1.0606797352226653, "learning_rate": 4.923313944568301e-06, "loss": 0.4383, "step": 7880 }, { "epoch": 0.47991961757452123, "grad_norm": 1.0535545012653686, "learning_rate": 4.923294331102819e-06, "loss": 0.5192, "step": 7881 }, { "epoch": 0.4799805133513991, "grad_norm": 0.982587903395987, "learning_rate": 4.923274715168535e-06, "loss": 0.4337, "step": 7882 }, { "epoch": 0.480041409128277, "grad_norm": 1.019434951889444, "learning_rate": 4.923255096765469e-06, "loss": 0.4442, "step": 7883 }, { "epoch": 0.4801023049051548, "grad_norm": 0.9972563483514296, "learning_rate": 4.9232354758936415e-06, "loss": 0.3927, "step": 7884 }, { "epoch": 0.4801632006820327, "grad_norm": 1.0352109061086123, "learning_rate": 4.923215852553072e-06, "loss": 0.3827, "step": 7885 }, { "epoch": 0.48022409645891057, "grad_norm": 1.0410378042838015, "learning_rate": 4.923196226743781e-06, "loss": 0.4139, "step": 7886 }, { "epoch": 0.48028499223578847, "grad_norm": 1.0554983447814543, "learning_rate": 4.9231765984657866e-06, "loss": 0.4023, "step": 7887 }, { "epoch": 0.4803458880126663, "grad_norm": 1.0165822172630277, "learning_rate": 4.9231569677191115e-06, "loss": 0.5247, "step": 7888 }, { "epoch": 0.4804067837895442, "grad_norm": 0.9837561501983513, "learning_rate": 4.923137334503774e-06, "loss": 0.4747, "step": 7889 }, { "epoch": 0.48046767956642206, "grad_norm": 0.9784759590367564, "learning_rate": 4.923117698819795e-06, "loss": 0.4484, "step": 7890 }, { "epoch": 0.48052857534329996, "grad_norm": 0.9658029965958851, "learning_rate": 4.923098060667193e-06, "loss": 0.4797, "step": 7891 }, { "epoch": 0.4805894711201778, "grad_norm": 1.0501407103536022, "learning_rate": 4.92307842004599e-06, "loss": 0.4234, "step": 7892 }, { "epoch": 0.4806503668970557, "grad_norm": 1.0480612055237402, "learning_rate": 4.923058776956205e-06, "loss": 0.4004, "step": 7893 }, { "epoch": 0.48071126267393355, "grad_norm": 0.9853367210086413, "learning_rate": 4.923039131397859e-06, "loss": 0.5272, "step": 7894 }, { "epoch": 0.48077215845081145, "grad_norm": 0.9643620351291262, "learning_rate": 4.92301948337097e-06, "loss": 0.4449, "step": 7895 }, { "epoch": 0.4808330542276893, "grad_norm": 1.025990693418907, "learning_rate": 4.922999832875559e-06, "loss": 0.4862, "step": 7896 }, { "epoch": 0.4808939500045672, "grad_norm": 0.9218246073732708, "learning_rate": 4.922980179911646e-06, "loss": 0.4807, "step": 7897 }, { "epoch": 0.48095484578144504, "grad_norm": 1.0748152230463843, "learning_rate": 4.922960524479252e-06, "loss": 0.4776, "step": 7898 }, { "epoch": 0.48101574155832294, "grad_norm": 1.0416168583769394, "learning_rate": 4.922940866578395e-06, "loss": 0.4524, "step": 7899 }, { "epoch": 0.4810766373352008, "grad_norm": 1.1152748754660538, "learning_rate": 4.922921206209098e-06, "loss": 0.3931, "step": 7900 }, { "epoch": 0.4811375331120787, "grad_norm": 1.113954344124321, "learning_rate": 4.9229015433713775e-06, "loss": 0.3787, "step": 7901 }, { "epoch": 0.48119842888895653, "grad_norm": 0.950233073059309, "learning_rate": 4.922881878065257e-06, "loss": 0.5285, "step": 7902 }, { "epoch": 0.48125932466583443, "grad_norm": 0.9735875391570572, "learning_rate": 4.922862210290753e-06, "loss": 0.4997, "step": 7903 }, { "epoch": 0.4813202204427123, "grad_norm": 1.0038621293031798, "learning_rate": 4.922842540047888e-06, "loss": 0.4674, "step": 7904 }, { "epoch": 0.4813811162195902, "grad_norm": 1.0259213342883926, "learning_rate": 4.922822867336682e-06, "loss": 0.4136, "step": 7905 }, { "epoch": 0.481442011996468, "grad_norm": 1.057572075381844, "learning_rate": 4.922803192157154e-06, "loss": 0.4193, "step": 7906 }, { "epoch": 0.4815029077733459, "grad_norm": 1.0486919384583269, "learning_rate": 4.922783514509324e-06, "loss": 0.4928, "step": 7907 }, { "epoch": 0.48156380355022377, "grad_norm": 0.997656033443066, "learning_rate": 4.9227638343932134e-06, "loss": 0.477, "step": 7908 }, { "epoch": 0.48162469932710167, "grad_norm": 0.97036659352707, "learning_rate": 4.922744151808841e-06, "loss": 0.4477, "step": 7909 }, { "epoch": 0.4816855951039795, "grad_norm": 1.0927254671134752, "learning_rate": 4.9227244667562275e-06, "loss": 0.4511, "step": 7910 }, { "epoch": 0.4817464908808574, "grad_norm": 0.9891435077760861, "learning_rate": 4.922704779235392e-06, "loss": 0.4425, "step": 7911 }, { "epoch": 0.48180738665773526, "grad_norm": 1.0237762323793513, "learning_rate": 4.922685089246356e-06, "loss": 0.433, "step": 7912 }, { "epoch": 0.48186828243461316, "grad_norm": 0.962276916317085, "learning_rate": 4.922665396789138e-06, "loss": 0.504, "step": 7913 }, { "epoch": 0.48192917821149106, "grad_norm": 1.034595092271063, "learning_rate": 4.92264570186376e-06, "loss": 0.4528, "step": 7914 }, { "epoch": 0.4819900739883689, "grad_norm": 1.0804426991594718, "learning_rate": 4.9226260044702405e-06, "loss": 0.4481, "step": 7915 }, { "epoch": 0.4820509697652468, "grad_norm": 1.0128760106259007, "learning_rate": 4.922606304608599e-06, "loss": 0.4265, "step": 7916 }, { "epoch": 0.48211186554212465, "grad_norm": 0.9494994184209335, "learning_rate": 4.922586602278857e-06, "loss": 0.4495, "step": 7917 }, { "epoch": 0.48217276131900255, "grad_norm": 0.9782591099078753, "learning_rate": 4.922566897481035e-06, "loss": 0.5181, "step": 7918 }, { "epoch": 0.4822336570958804, "grad_norm": 1.0967188623609754, "learning_rate": 4.922547190215151e-06, "loss": 0.4658, "step": 7919 }, { "epoch": 0.4822945528727583, "grad_norm": 1.0096062766855762, "learning_rate": 4.922527480481227e-06, "loss": 0.459, "step": 7920 }, { "epoch": 0.48235544864963614, "grad_norm": 1.0886457061784531, "learning_rate": 4.922507768279283e-06, "loss": 0.4479, "step": 7921 }, { "epoch": 0.48241634442651404, "grad_norm": 1.10100575342885, "learning_rate": 4.922488053609338e-06, "loss": 0.3825, "step": 7922 }, { "epoch": 0.4824772402033919, "grad_norm": 1.0741119647976012, "learning_rate": 4.922468336471412e-06, "loss": 0.3835, "step": 7923 }, { "epoch": 0.4825381359802698, "grad_norm": 1.0873006902207674, "learning_rate": 4.922448616865525e-06, "loss": 0.4642, "step": 7924 }, { "epoch": 0.48259903175714763, "grad_norm": 1.0974956763657853, "learning_rate": 4.922428894791699e-06, "loss": 0.4615, "step": 7925 }, { "epoch": 0.48265992753402553, "grad_norm": 1.0040826452044647, "learning_rate": 4.922409170249952e-06, "loss": 0.4693, "step": 7926 }, { "epoch": 0.4827208233109034, "grad_norm": 0.9921562449181646, "learning_rate": 4.922389443240306e-06, "loss": 0.5262, "step": 7927 }, { "epoch": 0.4827817190877813, "grad_norm": 0.9862638137260942, "learning_rate": 4.922369713762779e-06, "loss": 0.3964, "step": 7928 }, { "epoch": 0.4828426148646591, "grad_norm": 1.1053665400020976, "learning_rate": 4.922349981817393e-06, "loss": 0.4081, "step": 7929 }, { "epoch": 0.482903510641537, "grad_norm": 1.0117168529619478, "learning_rate": 4.9223302474041664e-06, "loss": 0.4727, "step": 7930 }, { "epoch": 0.48296440641841487, "grad_norm": 1.0535484838524576, "learning_rate": 4.9223105105231204e-06, "loss": 0.4622, "step": 7931 }, { "epoch": 0.48302530219529277, "grad_norm": 1.0233342370922682, "learning_rate": 4.922290771174275e-06, "loss": 0.4771, "step": 7932 }, { "epoch": 0.4830861979721706, "grad_norm": 1.0793297446876529, "learning_rate": 4.92227102935765e-06, "loss": 0.4318, "step": 7933 }, { "epoch": 0.4831470937490485, "grad_norm": 0.9829002043413015, "learning_rate": 4.9222512850732655e-06, "loss": 0.4368, "step": 7934 }, { "epoch": 0.48320798952592636, "grad_norm": 1.1107457879530838, "learning_rate": 4.9222315383211414e-06, "loss": 0.4309, "step": 7935 }, { "epoch": 0.48326888530280426, "grad_norm": 1.0121964804552517, "learning_rate": 4.922211789101299e-06, "loss": 0.4738, "step": 7936 }, { "epoch": 0.4833297810796821, "grad_norm": 1.0193175313572862, "learning_rate": 4.922192037413757e-06, "loss": 0.4547, "step": 7937 }, { "epoch": 0.48339067685656, "grad_norm": 0.982600459038201, "learning_rate": 4.922172283258536e-06, "loss": 0.4339, "step": 7938 }, { "epoch": 0.48345157263343785, "grad_norm": 0.9854948460403133, "learning_rate": 4.922152526635656e-06, "loss": 0.463, "step": 7939 }, { "epoch": 0.48351246841031575, "grad_norm": 1.020066827575857, "learning_rate": 4.922132767545138e-06, "loss": 0.4729, "step": 7940 }, { "epoch": 0.4835733641871936, "grad_norm": 0.9033682970621506, "learning_rate": 4.922113005987001e-06, "loss": 0.4624, "step": 7941 }, { "epoch": 0.4836342599640715, "grad_norm": 0.9636125307591183, "learning_rate": 4.9220932419612666e-06, "loss": 0.4428, "step": 7942 }, { "epoch": 0.48369515574094935, "grad_norm": 1.1197644814616747, "learning_rate": 4.922073475467953e-06, "loss": 0.4261, "step": 7943 }, { "epoch": 0.48375605151782725, "grad_norm": 1.0247417378541759, "learning_rate": 4.922053706507082e-06, "loss": 0.523, "step": 7944 }, { "epoch": 0.4838169472947051, "grad_norm": 0.9794658360629525, "learning_rate": 4.922033935078672e-06, "loss": 0.4265, "step": 7945 }, { "epoch": 0.483877843071583, "grad_norm": 1.0347247697944977, "learning_rate": 4.922014161182745e-06, "loss": 0.4623, "step": 7946 }, { "epoch": 0.48393873884846084, "grad_norm": 0.980059293928669, "learning_rate": 4.92199438481932e-06, "loss": 0.4277, "step": 7947 }, { "epoch": 0.48399963462533874, "grad_norm": 0.975003791967864, "learning_rate": 4.921974605988418e-06, "loss": 0.3828, "step": 7948 }, { "epoch": 0.4840605304022166, "grad_norm": 0.9351412627305169, "learning_rate": 4.921954824690058e-06, "loss": 0.4645, "step": 7949 }, { "epoch": 0.4841214261790945, "grad_norm": 1.0823464672155862, "learning_rate": 4.9219350409242615e-06, "loss": 0.4899, "step": 7950 }, { "epoch": 0.48418232195597233, "grad_norm": 0.9845235159648198, "learning_rate": 4.9219152546910475e-06, "loss": 0.4547, "step": 7951 }, { "epoch": 0.48424321773285023, "grad_norm": 1.1136223173417257, "learning_rate": 4.921895465990436e-06, "loss": 0.4401, "step": 7952 }, { "epoch": 0.4843041135097281, "grad_norm": 0.9742489774588703, "learning_rate": 4.9218756748224486e-06, "loss": 0.4628, "step": 7953 }, { "epoch": 0.484365009286606, "grad_norm": 1.0309690689819015, "learning_rate": 4.921855881187104e-06, "loss": 0.4847, "step": 7954 }, { "epoch": 0.4844259050634839, "grad_norm": 0.9568106475994315, "learning_rate": 4.921836085084424e-06, "loss": 0.4893, "step": 7955 }, { "epoch": 0.4844868008403617, "grad_norm": 1.0237703878164413, "learning_rate": 4.921816286514427e-06, "loss": 0.4647, "step": 7956 }, { "epoch": 0.4845476966172396, "grad_norm": 1.012699575184095, "learning_rate": 4.9217964854771335e-06, "loss": 0.4609, "step": 7957 }, { "epoch": 0.48460859239411747, "grad_norm": 1.0540713888785154, "learning_rate": 4.9217766819725645e-06, "loss": 0.3774, "step": 7958 }, { "epoch": 0.48466948817099537, "grad_norm": 1.146130131286072, "learning_rate": 4.92175687600074e-06, "loss": 0.421, "step": 7959 }, { "epoch": 0.4847303839478732, "grad_norm": 0.94137690640435, "learning_rate": 4.92173706756168e-06, "loss": 0.4549, "step": 7960 }, { "epoch": 0.4847912797247511, "grad_norm": 1.0637084758050934, "learning_rate": 4.921717256655404e-06, "loss": 0.4453, "step": 7961 }, { "epoch": 0.48485217550162896, "grad_norm": 1.1546618636732184, "learning_rate": 4.921697443281933e-06, "loss": 0.4739, "step": 7962 }, { "epoch": 0.48491307127850686, "grad_norm": 0.9792338277182128, "learning_rate": 4.921677627441287e-06, "loss": 0.4074, "step": 7963 }, { "epoch": 0.4849739670553847, "grad_norm": 1.0206599422489484, "learning_rate": 4.921657809133487e-06, "loss": 0.4661, "step": 7964 }, { "epoch": 0.4850348628322626, "grad_norm": 1.0144802303807454, "learning_rate": 4.921637988358552e-06, "loss": 0.4313, "step": 7965 }, { "epoch": 0.48509575860914045, "grad_norm": 1.1039674176391412, "learning_rate": 4.921618165116502e-06, "loss": 0.4153, "step": 7966 }, { "epoch": 0.48515665438601835, "grad_norm": 1.019893603968001, "learning_rate": 4.921598339407359e-06, "loss": 0.4533, "step": 7967 }, { "epoch": 0.4852175501628962, "grad_norm": 0.9428209112300382, "learning_rate": 4.92157851123114e-06, "loss": 0.4846, "step": 7968 }, { "epoch": 0.4852784459397741, "grad_norm": 0.9872696093579632, "learning_rate": 4.9215586805878685e-06, "loss": 0.4866, "step": 7969 }, { "epoch": 0.48533934171665194, "grad_norm": 0.9417583852811761, "learning_rate": 4.9215388474775625e-06, "loss": 0.5044, "step": 7970 }, { "epoch": 0.48540023749352984, "grad_norm": 1.141420263801147, "learning_rate": 4.921519011900244e-06, "loss": 0.3828, "step": 7971 }, { "epoch": 0.4854611332704077, "grad_norm": 1.051340934903097, "learning_rate": 4.921499173855932e-06, "loss": 0.5194, "step": 7972 }, { "epoch": 0.4855220290472856, "grad_norm": 0.9908905632877723, "learning_rate": 4.921479333344647e-06, "loss": 0.4342, "step": 7973 }, { "epoch": 0.48558292482416343, "grad_norm": 1.0011673432853476, "learning_rate": 4.921459490366409e-06, "loss": 0.407, "step": 7974 }, { "epoch": 0.48564382060104133, "grad_norm": 1.0951426784345395, "learning_rate": 4.921439644921238e-06, "loss": 0.4361, "step": 7975 }, { "epoch": 0.4857047163779192, "grad_norm": 0.9488355706300599, "learning_rate": 4.921419797009156e-06, "loss": 0.481, "step": 7976 }, { "epoch": 0.4857656121547971, "grad_norm": 1.0361587797132952, "learning_rate": 4.921399946630181e-06, "loss": 0.4131, "step": 7977 }, { "epoch": 0.4858265079316749, "grad_norm": 1.0700163177716675, "learning_rate": 4.9213800937843336e-06, "loss": 0.4492, "step": 7978 }, { "epoch": 0.4858874037085528, "grad_norm": 1.0210193020178382, "learning_rate": 4.9213602384716355e-06, "loss": 0.4565, "step": 7979 }, { "epoch": 0.48594829948543067, "grad_norm": 0.9775229118105335, "learning_rate": 4.921340380692105e-06, "loss": 0.4373, "step": 7980 }, { "epoch": 0.48600919526230857, "grad_norm": 1.055355479585573, "learning_rate": 4.9213205204457635e-06, "loss": 0.3894, "step": 7981 }, { "epoch": 0.4860700910391864, "grad_norm": 0.9951700075618828, "learning_rate": 4.921300657732632e-06, "loss": 0.4398, "step": 7982 }, { "epoch": 0.4861309868160643, "grad_norm": 1.0054379686572856, "learning_rate": 4.921280792552728e-06, "loss": 0.522, "step": 7983 }, { "epoch": 0.48619188259294216, "grad_norm": 1.0807861038525592, "learning_rate": 4.9212609249060745e-06, "loss": 0.4038, "step": 7984 }, { "epoch": 0.48625277836982006, "grad_norm": 1.0232286765378233, "learning_rate": 4.92124105479269e-06, "loss": 0.4687, "step": 7985 }, { "epoch": 0.4863136741466979, "grad_norm": 1.013742788583379, "learning_rate": 4.921221182212596e-06, "loss": 0.472, "step": 7986 }, { "epoch": 0.4863745699235758, "grad_norm": 1.0486032539845596, "learning_rate": 4.921201307165813e-06, "loss": 0.3924, "step": 7987 }, { "epoch": 0.48643546570045365, "grad_norm": 1.0274657954816304, "learning_rate": 4.92118142965236e-06, "loss": 0.4823, "step": 7988 }, { "epoch": 0.48649636147733155, "grad_norm": 1.056918451773835, "learning_rate": 4.921161549672257e-06, "loss": 0.48, "step": 7989 }, { "epoch": 0.4865572572542094, "grad_norm": 1.062226007709204, "learning_rate": 4.921141667225525e-06, "loss": 0.4237, "step": 7990 }, { "epoch": 0.4866181530310873, "grad_norm": 0.9846895147137211, "learning_rate": 4.921121782312185e-06, "loss": 0.4432, "step": 7991 }, { "epoch": 0.48667904880796514, "grad_norm": 1.1226596774636786, "learning_rate": 4.921101894932255e-06, "loss": 0.4541, "step": 7992 }, { "epoch": 0.48673994458484304, "grad_norm": 0.9629659979917229, "learning_rate": 4.921082005085758e-06, "loss": 0.4598, "step": 7993 }, { "epoch": 0.4868008403617209, "grad_norm": 0.9164007033590652, "learning_rate": 4.921062112772713e-06, "loss": 0.4888, "step": 7994 }, { "epoch": 0.4868617361385988, "grad_norm": 0.8941515397298405, "learning_rate": 4.92104221799314e-06, "loss": 0.551, "step": 7995 }, { "epoch": 0.4869226319154767, "grad_norm": 1.0022750263006093, "learning_rate": 4.92102232074706e-06, "loss": 0.3735, "step": 7996 }, { "epoch": 0.48698352769235453, "grad_norm": 1.0089537295625515, "learning_rate": 4.921002421034492e-06, "loss": 0.4162, "step": 7997 }, { "epoch": 0.48704442346923243, "grad_norm": 1.057787796714543, "learning_rate": 4.920982518855457e-06, "loss": 0.3961, "step": 7998 }, { "epoch": 0.4871053192461103, "grad_norm": 0.9977302033781488, "learning_rate": 4.920962614209975e-06, "loss": 0.4299, "step": 7999 }, { "epoch": 0.4871662150229882, "grad_norm": 1.0020680499173515, "learning_rate": 4.9209427070980676e-06, "loss": 0.4731, "step": 8000 }, { "epoch": 0.487227110799866, "grad_norm": 1.0709666632629997, "learning_rate": 4.920922797519754e-06, "loss": 0.4621, "step": 8001 }, { "epoch": 0.4872880065767439, "grad_norm": 1.0080617782377896, "learning_rate": 4.920902885475054e-06, "loss": 0.5048, "step": 8002 }, { "epoch": 0.48734890235362177, "grad_norm": 1.0112079367883446, "learning_rate": 4.920882970963989e-06, "loss": 0.5761, "step": 8003 }, { "epoch": 0.48740979813049967, "grad_norm": 1.008416969047499, "learning_rate": 4.9208630539865785e-06, "loss": 0.4133, "step": 8004 }, { "epoch": 0.4874706939073775, "grad_norm": 0.995478563914207, "learning_rate": 4.9208431345428435e-06, "loss": 0.4179, "step": 8005 }, { "epoch": 0.4875315896842554, "grad_norm": 1.0003357637162318, "learning_rate": 4.920823212632803e-06, "loss": 0.4939, "step": 8006 }, { "epoch": 0.48759248546113326, "grad_norm": 0.9835572417096737, "learning_rate": 4.9208032882564795e-06, "loss": 0.4581, "step": 8007 }, { "epoch": 0.48765338123801116, "grad_norm": 1.0155853236204315, "learning_rate": 4.920783361413891e-06, "loss": 0.4896, "step": 8008 }, { "epoch": 0.487714277014889, "grad_norm": 1.028835160197496, "learning_rate": 4.920763432105059e-06, "loss": 0.5508, "step": 8009 }, { "epoch": 0.4877751727917669, "grad_norm": 1.0450611019841203, "learning_rate": 4.920743500330003e-06, "loss": 0.4605, "step": 8010 }, { "epoch": 0.48783606856864475, "grad_norm": 0.9565060507582658, "learning_rate": 4.920723566088744e-06, "loss": 0.437, "step": 8011 }, { "epoch": 0.48789696434552265, "grad_norm": 1.088734881646107, "learning_rate": 4.9207036293813026e-06, "loss": 0.4037, "step": 8012 }, { "epoch": 0.4879578601224005, "grad_norm": 1.1322386613920252, "learning_rate": 4.9206836902076985e-06, "loss": 0.453, "step": 8013 }, { "epoch": 0.4880187558992784, "grad_norm": 1.0494468425418582, "learning_rate": 4.920663748567952e-06, "loss": 0.4546, "step": 8014 }, { "epoch": 0.48807965167615625, "grad_norm": 0.931701498021204, "learning_rate": 4.9206438044620845e-06, "loss": 0.4252, "step": 8015 }, { "epoch": 0.48814054745303415, "grad_norm": 1.0429978544273575, "learning_rate": 4.920623857890114e-06, "loss": 0.4427, "step": 8016 }, { "epoch": 0.488201443229912, "grad_norm": 0.9313364420329617, "learning_rate": 4.920603908852063e-06, "loss": 0.4496, "step": 8017 }, { "epoch": 0.4882623390067899, "grad_norm": 1.0620471949480574, "learning_rate": 4.920583957347951e-06, "loss": 0.3813, "step": 8018 }, { "epoch": 0.48832323478366774, "grad_norm": 1.038858768194379, "learning_rate": 4.920564003377799e-06, "loss": 0.4627, "step": 8019 }, { "epoch": 0.48838413056054564, "grad_norm": 1.0220912629193024, "learning_rate": 4.920544046941626e-06, "loss": 0.5205, "step": 8020 }, { "epoch": 0.4884450263374235, "grad_norm": 0.9875015018540917, "learning_rate": 4.920524088039452e-06, "loss": 0.4526, "step": 8021 }, { "epoch": 0.4885059221143014, "grad_norm": 1.0379279305164748, "learning_rate": 4.9205041266713e-06, "loss": 0.4752, "step": 8022 }, { "epoch": 0.48856681789117923, "grad_norm": 0.9365778429382883, "learning_rate": 4.920484162837188e-06, "loss": 0.4747, "step": 8023 }, { "epoch": 0.48862771366805713, "grad_norm": 0.9571074726097896, "learning_rate": 4.920464196537138e-06, "loss": 0.4742, "step": 8024 }, { "epoch": 0.488688609444935, "grad_norm": 1.0347596720019425, "learning_rate": 4.920444227771168e-06, "loss": 0.5049, "step": 8025 }, { "epoch": 0.4887495052218129, "grad_norm": 1.0654210190093547, "learning_rate": 4.920424256539301e-06, "loss": 0.4631, "step": 8026 }, { "epoch": 0.4888104009986907, "grad_norm": 0.9159655943941315, "learning_rate": 4.920404282841556e-06, "loss": 0.4714, "step": 8027 }, { "epoch": 0.4888712967755686, "grad_norm": 0.9985151889739446, "learning_rate": 4.9203843066779524e-06, "loss": 0.4396, "step": 8028 }, { "epoch": 0.48893219255244647, "grad_norm": 0.9441608371001387, "learning_rate": 4.920364328048512e-06, "loss": 0.4671, "step": 8029 }, { "epoch": 0.48899308832932437, "grad_norm": 0.9327922066741289, "learning_rate": 4.920344346953255e-06, "loss": 0.4462, "step": 8030 }, { "epoch": 0.4890539841062022, "grad_norm": 0.9859026724634845, "learning_rate": 4.920324363392201e-06, "loss": 0.4854, "step": 8031 }, { "epoch": 0.4891148798830801, "grad_norm": 1.016732243714942, "learning_rate": 4.920304377365371e-06, "loss": 0.4327, "step": 8032 }, { "epoch": 0.48917577565995796, "grad_norm": 0.9280353992351276, "learning_rate": 4.920284388872786e-06, "loss": 0.4894, "step": 8033 }, { "epoch": 0.48923667143683586, "grad_norm": 1.049832329509541, "learning_rate": 4.920264397914465e-06, "loss": 0.4702, "step": 8034 }, { "epoch": 0.4892975672137137, "grad_norm": 1.0038479692032838, "learning_rate": 4.920244404490429e-06, "loss": 0.3902, "step": 8035 }, { "epoch": 0.4893584629905916, "grad_norm": 1.1355634063794144, "learning_rate": 4.920224408600698e-06, "loss": 0.3989, "step": 8036 }, { "epoch": 0.4894193587674695, "grad_norm": 1.1194513614812003, "learning_rate": 4.920204410245294e-06, "loss": 0.4502, "step": 8037 }, { "epoch": 0.48948025454434735, "grad_norm": 0.9565423245759755, "learning_rate": 4.920184409424234e-06, "loss": 0.4429, "step": 8038 }, { "epoch": 0.48954115032122525, "grad_norm": 0.9160655407848018, "learning_rate": 4.920164406137542e-06, "loss": 0.4506, "step": 8039 }, { "epoch": 0.4896020460981031, "grad_norm": 1.08957844015387, "learning_rate": 4.920144400385236e-06, "loss": 0.4261, "step": 8040 }, { "epoch": 0.489662941874981, "grad_norm": 1.0861452112358896, "learning_rate": 4.920124392167338e-06, "loss": 0.3868, "step": 8041 }, { "epoch": 0.48972383765185884, "grad_norm": 1.0264672085058304, "learning_rate": 4.920104381483867e-06, "loss": 0.4727, "step": 8042 }, { "epoch": 0.48978473342873674, "grad_norm": 0.9026135435420843, "learning_rate": 4.9200843683348446e-06, "loss": 0.503, "step": 8043 }, { "epoch": 0.4898456292056146, "grad_norm": 1.0385045836468512, "learning_rate": 4.92006435272029e-06, "loss": 0.434, "step": 8044 }, { "epoch": 0.4899065249824925, "grad_norm": 1.074960486874617, "learning_rate": 4.9200443346402235e-06, "loss": 0.4677, "step": 8045 }, { "epoch": 0.48996742075937033, "grad_norm": 1.1396488737696826, "learning_rate": 4.920024314094667e-06, "loss": 0.4076, "step": 8046 }, { "epoch": 0.49002831653624823, "grad_norm": 1.0087960716999669, "learning_rate": 4.92000429108364e-06, "loss": 0.4715, "step": 8047 }, { "epoch": 0.4900892123131261, "grad_norm": 1.0804118681311017, "learning_rate": 4.919984265607163e-06, "loss": 0.4288, "step": 8048 }, { "epoch": 0.490150108090004, "grad_norm": 1.0907159838100127, "learning_rate": 4.9199642376652555e-06, "loss": 0.396, "step": 8049 }, { "epoch": 0.4902110038668818, "grad_norm": 1.004740332843962, "learning_rate": 4.919944207257939e-06, "loss": 0.4962, "step": 8050 }, { "epoch": 0.4902718996437597, "grad_norm": 1.0138651237532124, "learning_rate": 4.919924174385235e-06, "loss": 0.4034, "step": 8051 }, { "epoch": 0.49033279542063757, "grad_norm": 0.9694637976437604, "learning_rate": 4.919904139047161e-06, "loss": 0.4605, "step": 8052 }, { "epoch": 0.49039369119751547, "grad_norm": 1.034784180853784, "learning_rate": 4.9198841012437404e-06, "loss": 0.3999, "step": 8053 }, { "epoch": 0.4904545869743933, "grad_norm": 0.984529937508583, "learning_rate": 4.9198640609749914e-06, "loss": 0.4564, "step": 8054 }, { "epoch": 0.4905154827512712, "grad_norm": 0.9594560793883391, "learning_rate": 4.919844018240934e-06, "loss": 0.4697, "step": 8055 }, { "epoch": 0.49057637852814906, "grad_norm": 1.0413165533246962, "learning_rate": 4.919823973041592e-06, "loss": 0.4786, "step": 8056 }, { "epoch": 0.49063727430502696, "grad_norm": 1.013063647999685, "learning_rate": 4.919803925376983e-06, "loss": 0.486, "step": 8057 }, { "epoch": 0.4906981700819048, "grad_norm": 1.0079955392914566, "learning_rate": 4.919783875247127e-06, "loss": 0.4332, "step": 8058 }, { "epoch": 0.4907590658587827, "grad_norm": 1.0180684053629159, "learning_rate": 4.919763822652047e-06, "loss": 0.45, "step": 8059 }, { "epoch": 0.49081996163566055, "grad_norm": 1.0017973175459582, "learning_rate": 4.919743767591761e-06, "loss": 0.4243, "step": 8060 }, { "epoch": 0.49088085741253845, "grad_norm": 1.0970847601040552, "learning_rate": 4.91972371006629e-06, "loss": 0.3721, "step": 8061 }, { "epoch": 0.4909417531894163, "grad_norm": 1.084820527115071, "learning_rate": 4.919703650075655e-06, "loss": 0.4333, "step": 8062 }, { "epoch": 0.4910026489662942, "grad_norm": 1.0546884248865018, "learning_rate": 4.919683587619877e-06, "loss": 0.4134, "step": 8063 }, { "epoch": 0.49106354474317204, "grad_norm": 0.9535205026260998, "learning_rate": 4.919663522698975e-06, "loss": 0.4485, "step": 8064 }, { "epoch": 0.49112444052004994, "grad_norm": 0.9655064828575778, "learning_rate": 4.919643455312971e-06, "loss": 0.4708, "step": 8065 }, { "epoch": 0.4911853362969278, "grad_norm": 1.0159595213059351, "learning_rate": 4.9196233854618836e-06, "loss": 0.4662, "step": 8066 }, { "epoch": 0.4912462320738057, "grad_norm": 1.0238250873934178, "learning_rate": 4.9196033131457345e-06, "loss": 0.4473, "step": 8067 }, { "epoch": 0.49130712785068353, "grad_norm": 1.0508095589744568, "learning_rate": 4.919583238364543e-06, "loss": 0.4326, "step": 8068 }, { "epoch": 0.49136802362756143, "grad_norm": 0.9602103110086964, "learning_rate": 4.919563161118332e-06, "loss": 0.4399, "step": 8069 }, { "epoch": 0.4914289194044393, "grad_norm": 1.084631688693081, "learning_rate": 4.91954308140712e-06, "loss": 0.4614, "step": 8070 }, { "epoch": 0.4914898151813172, "grad_norm": 1.0137383357942342, "learning_rate": 4.919522999230927e-06, "loss": 0.3403, "step": 8071 }, { "epoch": 0.491550710958195, "grad_norm": 1.112084809382153, "learning_rate": 4.919502914589774e-06, "loss": 0.5026, "step": 8072 }, { "epoch": 0.4916116067350729, "grad_norm": 1.0375613155320973, "learning_rate": 4.9194828274836835e-06, "loss": 0.4953, "step": 8073 }, { "epoch": 0.49167250251195077, "grad_norm": 1.0408901959003212, "learning_rate": 4.919462737912673e-06, "loss": 0.4469, "step": 8074 }, { "epoch": 0.49173339828882867, "grad_norm": 0.9748469525468232, "learning_rate": 4.919442645876764e-06, "loss": 0.4996, "step": 8075 }, { "epoch": 0.4917942940657065, "grad_norm": 0.9684985926461172, "learning_rate": 4.919422551375978e-06, "loss": 0.4577, "step": 8076 }, { "epoch": 0.4918551898425844, "grad_norm": 0.9724524290113563, "learning_rate": 4.919402454410334e-06, "loss": 0.4783, "step": 8077 }, { "epoch": 0.4919160856194623, "grad_norm": 1.089651621742167, "learning_rate": 4.919382354979853e-06, "loss": 0.4295, "step": 8078 }, { "epoch": 0.49197698139634016, "grad_norm": 1.089079732592264, "learning_rate": 4.919362253084555e-06, "loss": 0.4471, "step": 8079 }, { "epoch": 0.49203787717321806, "grad_norm": 0.9920573749595991, "learning_rate": 4.919342148724462e-06, "loss": 0.4551, "step": 8080 }, { "epoch": 0.4920987729500959, "grad_norm": 0.946541720536682, "learning_rate": 4.919322041899593e-06, "loss": 0.4318, "step": 8081 }, { "epoch": 0.4921596687269738, "grad_norm": 1.0659633141740823, "learning_rate": 4.919301932609969e-06, "loss": 0.4152, "step": 8082 }, { "epoch": 0.49222056450385165, "grad_norm": 1.0826836250575225, "learning_rate": 4.919281820855611e-06, "loss": 0.4343, "step": 8083 }, { "epoch": 0.49228146028072955, "grad_norm": 1.0524243974760075, "learning_rate": 4.919261706636539e-06, "loss": 0.4758, "step": 8084 }, { "epoch": 0.4923423560576074, "grad_norm": 0.9896917227028547, "learning_rate": 4.9192415899527726e-06, "loss": 0.4914, "step": 8085 }, { "epoch": 0.4924032518344853, "grad_norm": 1.067385802454045, "learning_rate": 4.919221470804334e-06, "loss": 0.5249, "step": 8086 }, { "epoch": 0.49246414761136315, "grad_norm": 0.9741054559193126, "learning_rate": 4.919201349191242e-06, "loss": 0.4697, "step": 8087 }, { "epoch": 0.49252504338824105, "grad_norm": 1.0082826218215133, "learning_rate": 4.919181225113519e-06, "loss": 0.4116, "step": 8088 }, { "epoch": 0.4925859391651189, "grad_norm": 1.0417836051219382, "learning_rate": 4.9191610985711835e-06, "loss": 0.4345, "step": 8089 }, { "epoch": 0.4926468349419968, "grad_norm": 0.9843690676505404, "learning_rate": 4.919140969564258e-06, "loss": 0.4422, "step": 8090 }, { "epoch": 0.49270773071887464, "grad_norm": 1.0415321701925813, "learning_rate": 4.919120838092762e-06, "loss": 0.3559, "step": 8091 }, { "epoch": 0.49276862649575254, "grad_norm": 1.0027228944425317, "learning_rate": 4.919100704156715e-06, "loss": 0.4646, "step": 8092 }, { "epoch": 0.4928295222726304, "grad_norm": 0.9877650370402005, "learning_rate": 4.919080567756138e-06, "loss": 0.4123, "step": 8093 }, { "epoch": 0.4928904180495083, "grad_norm": 1.0174534841170109, "learning_rate": 4.919060428891053e-06, "loss": 0.4202, "step": 8094 }, { "epoch": 0.49295131382638613, "grad_norm": 1.0617537433375441, "learning_rate": 4.9190402875614795e-06, "loss": 0.4106, "step": 8095 }, { "epoch": 0.49301220960326403, "grad_norm": 1.0366636543083951, "learning_rate": 4.919020143767439e-06, "loss": 0.4621, "step": 8096 }, { "epoch": 0.4930731053801419, "grad_norm": 1.0017159616171059, "learning_rate": 4.91899999750895e-06, "loss": 0.4428, "step": 8097 }, { "epoch": 0.4931340011570198, "grad_norm": 0.9884960148010516, "learning_rate": 4.9189798487860334e-06, "loss": 0.5088, "step": 8098 }, { "epoch": 0.4931948969338976, "grad_norm": 1.0289836124844878, "learning_rate": 4.918959697598711e-06, "loss": 0.483, "step": 8099 }, { "epoch": 0.4932557927107755, "grad_norm": 1.1156220007915405, "learning_rate": 4.918939543947004e-06, "loss": 0.4546, "step": 8100 }, { "epoch": 0.49331668848765337, "grad_norm": 1.0504200230737526, "learning_rate": 4.91891938783093e-06, "loss": 0.4777, "step": 8101 }, { "epoch": 0.49337758426453127, "grad_norm": 1.0014002158327706, "learning_rate": 4.918899229250513e-06, "loss": 0.4287, "step": 8102 }, { "epoch": 0.4934384800414091, "grad_norm": 0.9949501430201955, "learning_rate": 4.91887906820577e-06, "loss": 0.4559, "step": 8103 }, { "epoch": 0.493499375818287, "grad_norm": 0.9963877649177929, "learning_rate": 4.918858904696723e-06, "loss": 0.4233, "step": 8104 }, { "epoch": 0.49356027159516486, "grad_norm": 0.8991892506626378, "learning_rate": 4.918838738723394e-06, "loss": 0.5124, "step": 8105 }, { "epoch": 0.49362116737204276, "grad_norm": 1.006919810493341, "learning_rate": 4.918818570285802e-06, "loss": 0.4498, "step": 8106 }, { "epoch": 0.4936820631489206, "grad_norm": 1.0105523869628354, "learning_rate": 4.9187983993839685e-06, "loss": 0.398, "step": 8107 }, { "epoch": 0.4937429589257985, "grad_norm": 1.095360357612358, "learning_rate": 4.918778226017913e-06, "loss": 0.3874, "step": 8108 }, { "epoch": 0.49380385470267635, "grad_norm": 0.9735703889704923, "learning_rate": 4.918758050187656e-06, "loss": 0.4086, "step": 8109 }, { "epoch": 0.49386475047955425, "grad_norm": 0.9714167635014171, "learning_rate": 4.918737871893219e-06, "loss": 0.4421, "step": 8110 }, { "epoch": 0.4939256462564321, "grad_norm": 1.006417389422371, "learning_rate": 4.918717691134622e-06, "loss": 0.4604, "step": 8111 }, { "epoch": 0.49398654203331, "grad_norm": 0.9071787944093037, "learning_rate": 4.918697507911887e-06, "loss": 0.4403, "step": 8112 }, { "epoch": 0.49404743781018784, "grad_norm": 1.0723694785000542, "learning_rate": 4.918677322225032e-06, "loss": 0.429, "step": 8113 }, { "epoch": 0.49410833358706574, "grad_norm": 1.0871159414746923, "learning_rate": 4.918657134074079e-06, "loss": 0.5409, "step": 8114 }, { "epoch": 0.4941692293639436, "grad_norm": 0.9941480989079814, "learning_rate": 4.918636943459048e-06, "loss": 0.4448, "step": 8115 }, { "epoch": 0.4942301251408215, "grad_norm": 0.9071400587030436, "learning_rate": 4.918616750379961e-06, "loss": 0.4548, "step": 8116 }, { "epoch": 0.49429102091769933, "grad_norm": 0.9915316150366994, "learning_rate": 4.918596554836837e-06, "loss": 0.4504, "step": 8117 }, { "epoch": 0.49435191669457723, "grad_norm": 0.9958739855011819, "learning_rate": 4.9185763568296976e-06, "loss": 0.4852, "step": 8118 }, { "epoch": 0.49441281247145513, "grad_norm": 1.004390416669053, "learning_rate": 4.918556156358561e-06, "loss": 0.5132, "step": 8119 }, { "epoch": 0.494473708248333, "grad_norm": 0.9568226733259297, "learning_rate": 4.918535953423452e-06, "loss": 0.4257, "step": 8120 }, { "epoch": 0.4945346040252109, "grad_norm": 1.0472644042802433, "learning_rate": 4.918515748024388e-06, "loss": 0.4567, "step": 8121 }, { "epoch": 0.4945954998020887, "grad_norm": 1.0600347806100552, "learning_rate": 4.91849554016139e-06, "loss": 0.4704, "step": 8122 }, { "epoch": 0.4946563955789666, "grad_norm": 1.0247981039141725, "learning_rate": 4.918475329834479e-06, "loss": 0.4496, "step": 8123 }, { "epoch": 0.49471729135584447, "grad_norm": 0.9868859029251281, "learning_rate": 4.9184551170436765e-06, "loss": 0.4985, "step": 8124 }, { "epoch": 0.49477818713272237, "grad_norm": 1.0961287814016782, "learning_rate": 4.918434901789002e-06, "loss": 0.4002, "step": 8125 }, { "epoch": 0.4948390829096002, "grad_norm": 0.9537477151440101, "learning_rate": 4.918414684070476e-06, "loss": 0.4924, "step": 8126 }, { "epoch": 0.4948999786864781, "grad_norm": 0.9270651620884479, "learning_rate": 4.918394463888119e-06, "loss": 0.4482, "step": 8127 }, { "epoch": 0.49496087446335596, "grad_norm": 1.0046207622390604, "learning_rate": 4.918374241241953e-06, "loss": 0.4242, "step": 8128 }, { "epoch": 0.49502177024023386, "grad_norm": 1.0630493193830604, "learning_rate": 4.918354016131997e-06, "loss": 0.4321, "step": 8129 }, { "epoch": 0.4950826660171117, "grad_norm": 0.9818218296382605, "learning_rate": 4.918333788558272e-06, "loss": 0.4433, "step": 8130 }, { "epoch": 0.4951435617939896, "grad_norm": 0.9682117054559934, "learning_rate": 4.9183135585207985e-06, "loss": 0.4565, "step": 8131 }, { "epoch": 0.49520445757086745, "grad_norm": 1.0063793473447191, "learning_rate": 4.918293326019598e-06, "loss": 0.4598, "step": 8132 }, { "epoch": 0.49526535334774535, "grad_norm": 0.9672511209836169, "learning_rate": 4.91827309105469e-06, "loss": 0.4993, "step": 8133 }, { "epoch": 0.4953262491246232, "grad_norm": 1.0502608466022711, "learning_rate": 4.918252853626097e-06, "loss": 0.5022, "step": 8134 }, { "epoch": 0.4953871449015011, "grad_norm": 1.036154425598418, "learning_rate": 4.918232613733837e-06, "loss": 0.4627, "step": 8135 }, { "epoch": 0.49544804067837894, "grad_norm": 1.0233377861058675, "learning_rate": 4.918212371377933e-06, "loss": 0.42, "step": 8136 }, { "epoch": 0.49550893645525684, "grad_norm": 1.1578974463534433, "learning_rate": 4.918192126558403e-06, "loss": 0.3813, "step": 8137 }, { "epoch": 0.4955698322321347, "grad_norm": 0.9768691011506934, "learning_rate": 4.918171879275271e-06, "loss": 0.4481, "step": 8138 }, { "epoch": 0.4956307280090126, "grad_norm": 1.0222166124137368, "learning_rate": 4.918151629528554e-06, "loss": 0.479, "step": 8139 }, { "epoch": 0.49569162378589043, "grad_norm": 1.0486015114532554, "learning_rate": 4.918131377318275e-06, "loss": 0.5014, "step": 8140 }, { "epoch": 0.49575251956276833, "grad_norm": 1.083065564536226, "learning_rate": 4.918111122644455e-06, "loss": 0.495, "step": 8141 }, { "epoch": 0.4958134153396462, "grad_norm": 0.9658712785337827, "learning_rate": 4.9180908655071125e-06, "loss": 0.4671, "step": 8142 }, { "epoch": 0.4958743111165241, "grad_norm": 0.9586686270142962, "learning_rate": 4.91807060590627e-06, "loss": 0.4345, "step": 8143 }, { "epoch": 0.4959352068934019, "grad_norm": 0.9636944709875712, "learning_rate": 4.918050343841946e-06, "loss": 0.3878, "step": 8144 }, { "epoch": 0.4959961026702798, "grad_norm": 0.9479035699755927, "learning_rate": 4.918030079314164e-06, "loss": 0.5078, "step": 8145 }, { "epoch": 0.49605699844715767, "grad_norm": 1.041170738275936, "learning_rate": 4.918009812322942e-06, "loss": 0.4046, "step": 8146 }, { "epoch": 0.49611789422403557, "grad_norm": 1.0311371362680355, "learning_rate": 4.917989542868303e-06, "loss": 0.5028, "step": 8147 }, { "epoch": 0.4961787900009134, "grad_norm": 1.1122042232241365, "learning_rate": 4.917969270950267e-06, "loss": 0.4345, "step": 8148 }, { "epoch": 0.4962396857777913, "grad_norm": 0.9773813606675987, "learning_rate": 4.917948996568853e-06, "loss": 0.4218, "step": 8149 }, { "epoch": 0.49630058155466916, "grad_norm": 1.0576642782837606, "learning_rate": 4.917928719724083e-06, "loss": 0.3931, "step": 8150 }, { "epoch": 0.49636147733154706, "grad_norm": 1.074174695441281, "learning_rate": 4.917908440415978e-06, "loss": 0.4624, "step": 8151 }, { "epoch": 0.4964223731084249, "grad_norm": 1.093246308757962, "learning_rate": 4.917888158644558e-06, "loss": 0.4661, "step": 8152 }, { "epoch": 0.4964832688853028, "grad_norm": 1.0829162427095747, "learning_rate": 4.917867874409844e-06, "loss": 0.3768, "step": 8153 }, { "epoch": 0.49654416466218065, "grad_norm": 1.0070691378189665, "learning_rate": 4.9178475877118555e-06, "loss": 0.464, "step": 8154 }, { "epoch": 0.49660506043905855, "grad_norm": 1.058318008237389, "learning_rate": 4.917827298550616e-06, "loss": 0.4267, "step": 8155 }, { "epoch": 0.4966659562159364, "grad_norm": 0.9544272375538142, "learning_rate": 4.917807006926142e-06, "loss": 0.4709, "step": 8156 }, { "epoch": 0.4967268519928143, "grad_norm": 0.9673105475254751, "learning_rate": 4.917786712838458e-06, "loss": 0.4533, "step": 8157 }, { "epoch": 0.49678774776969215, "grad_norm": 0.9828223951159409, "learning_rate": 4.917766416287583e-06, "loss": 0.4132, "step": 8158 }, { "epoch": 0.49684864354657005, "grad_norm": 0.9169751641210473, "learning_rate": 4.917746117273537e-06, "loss": 0.4081, "step": 8159 }, { "epoch": 0.49690953932344795, "grad_norm": 0.9970867319160535, "learning_rate": 4.917725815796343e-06, "loss": 0.4079, "step": 8160 }, { "epoch": 0.4969704351003258, "grad_norm": 1.090067192557687, "learning_rate": 4.917705511856019e-06, "loss": 0.3956, "step": 8161 }, { "epoch": 0.4970313308772037, "grad_norm": 1.0189859434240494, "learning_rate": 4.9176852054525874e-06, "loss": 0.4442, "step": 8162 }, { "epoch": 0.49709222665408154, "grad_norm": 1.0835303949174442, "learning_rate": 4.917664896586068e-06, "loss": 0.4399, "step": 8163 }, { "epoch": 0.49715312243095944, "grad_norm": 1.0590645815824062, "learning_rate": 4.917644585256483e-06, "loss": 0.4457, "step": 8164 }, { "epoch": 0.4972140182078373, "grad_norm": 1.0462459021004604, "learning_rate": 4.91762427146385e-06, "loss": 0.4732, "step": 8165 }, { "epoch": 0.4972749139847152, "grad_norm": 1.0165255182493564, "learning_rate": 4.917603955208193e-06, "loss": 0.4655, "step": 8166 }, { "epoch": 0.49733580976159303, "grad_norm": 1.0217482896945707, "learning_rate": 4.917583636489531e-06, "loss": 0.415, "step": 8167 }, { "epoch": 0.49739670553847093, "grad_norm": 1.0160453796250852, "learning_rate": 4.917563315307886e-06, "loss": 0.4078, "step": 8168 }, { "epoch": 0.4974576013153488, "grad_norm": 1.0034090458548997, "learning_rate": 4.9175429916632765e-06, "loss": 0.4685, "step": 8169 }, { "epoch": 0.4975184970922267, "grad_norm": 0.9420605582108704, "learning_rate": 4.917522665555725e-06, "loss": 0.4316, "step": 8170 }, { "epoch": 0.4975793928691045, "grad_norm": 1.0526562349653095, "learning_rate": 4.917502336985252e-06, "loss": 0.4594, "step": 8171 }, { "epoch": 0.4976402886459824, "grad_norm": 1.0129020893325242, "learning_rate": 4.917482005951877e-06, "loss": 0.3971, "step": 8172 }, { "epoch": 0.49770118442286027, "grad_norm": 0.9672689563154542, "learning_rate": 4.917461672455621e-06, "loss": 0.4365, "step": 8173 }, { "epoch": 0.49776208019973817, "grad_norm": 0.971674287864659, "learning_rate": 4.917441336496507e-06, "loss": 0.4402, "step": 8174 }, { "epoch": 0.497822975976616, "grad_norm": 1.0233035933510897, "learning_rate": 4.917420998074553e-06, "loss": 0.4579, "step": 8175 }, { "epoch": 0.4978838717534939, "grad_norm": 1.0094057829336482, "learning_rate": 4.917400657189782e-06, "loss": 0.4694, "step": 8176 }, { "epoch": 0.49794476753037176, "grad_norm": 0.9620899579678347, "learning_rate": 4.917380313842211e-06, "loss": 0.4536, "step": 8177 }, { "epoch": 0.49800566330724966, "grad_norm": 1.1018083141226889, "learning_rate": 4.9173599680318656e-06, "loss": 0.3739, "step": 8178 }, { "epoch": 0.4980665590841275, "grad_norm": 0.9449979623939699, "learning_rate": 4.917339619758763e-06, "loss": 0.4461, "step": 8179 }, { "epoch": 0.4981274548610054, "grad_norm": 1.1063965958529518, "learning_rate": 4.917319269022926e-06, "loss": 0.3829, "step": 8180 }, { "epoch": 0.49818835063788325, "grad_norm": 1.04554551795822, "learning_rate": 4.917298915824373e-06, "loss": 0.4151, "step": 8181 }, { "epoch": 0.49824924641476115, "grad_norm": 0.984836427139911, "learning_rate": 4.917278560163127e-06, "loss": 0.4118, "step": 8182 }, { "epoch": 0.498310142191639, "grad_norm": 1.0127643156207746, "learning_rate": 4.917258202039208e-06, "loss": 0.4529, "step": 8183 }, { "epoch": 0.4983710379685169, "grad_norm": 0.9841168354639265, "learning_rate": 4.917237841452636e-06, "loss": 0.4692, "step": 8184 }, { "epoch": 0.49843193374539474, "grad_norm": 1.0990110245599027, "learning_rate": 4.917217478403433e-06, "loss": 0.4236, "step": 8185 }, { "epoch": 0.49849282952227264, "grad_norm": 1.0343184279129185, "learning_rate": 4.917197112891619e-06, "loss": 0.4334, "step": 8186 }, { "epoch": 0.4985537252991505, "grad_norm": 0.9594152447720348, "learning_rate": 4.9171767449172135e-06, "loss": 0.4733, "step": 8187 }, { "epoch": 0.4986146210760284, "grad_norm": 0.9689518930331954, "learning_rate": 4.91715637448024e-06, "loss": 0.456, "step": 8188 }, { "epoch": 0.49867551685290623, "grad_norm": 0.950243010042346, "learning_rate": 4.917136001580718e-06, "loss": 0.4518, "step": 8189 }, { "epoch": 0.49873641262978413, "grad_norm": 1.1122961928059587, "learning_rate": 4.917115626218667e-06, "loss": 0.4422, "step": 8190 }, { "epoch": 0.498797308406662, "grad_norm": 0.944733457422201, "learning_rate": 4.91709524839411e-06, "loss": 0.4939, "step": 8191 }, { "epoch": 0.4988582041835399, "grad_norm": 1.119157341153504, "learning_rate": 4.917074868107066e-06, "loss": 0.4298, "step": 8192 }, { "epoch": 0.4989190999604177, "grad_norm": 1.009924609283643, "learning_rate": 4.917054485357556e-06, "loss": 0.4431, "step": 8193 }, { "epoch": 0.4989799957372956, "grad_norm": 1.006448993308707, "learning_rate": 4.917034100145602e-06, "loss": 0.4411, "step": 8194 }, { "epoch": 0.49904089151417347, "grad_norm": 0.9907666183702191, "learning_rate": 4.917013712471223e-06, "loss": 0.4486, "step": 8195 }, { "epoch": 0.49910178729105137, "grad_norm": 1.034966824030098, "learning_rate": 4.9169933223344414e-06, "loss": 0.4532, "step": 8196 }, { "epoch": 0.4991626830679292, "grad_norm": 0.9579517025431222, "learning_rate": 4.916972929735277e-06, "loss": 0.478, "step": 8197 }, { "epoch": 0.4992235788448071, "grad_norm": 1.0961163345691023, "learning_rate": 4.916952534673751e-06, "loss": 0.4506, "step": 8198 }, { "epoch": 0.49928447462168496, "grad_norm": 0.9385516994565956, "learning_rate": 4.916932137149884e-06, "loss": 0.4537, "step": 8199 }, { "epoch": 0.49934537039856286, "grad_norm": 1.0775490835277353, "learning_rate": 4.916911737163697e-06, "loss": 0.4721, "step": 8200 }, { "epoch": 0.49940626617544076, "grad_norm": 1.1363332661261607, "learning_rate": 4.916891334715209e-06, "loss": 0.4869, "step": 8201 }, { "epoch": 0.4994671619523186, "grad_norm": 1.007338522020527, "learning_rate": 4.9168709298044435e-06, "loss": 0.3878, "step": 8202 }, { "epoch": 0.4995280577291965, "grad_norm": 1.0444620808301754, "learning_rate": 4.916850522431421e-06, "loss": 0.4575, "step": 8203 }, { "epoch": 0.49958895350607435, "grad_norm": 1.1001685314112895, "learning_rate": 4.9168301125961605e-06, "loss": 0.4805, "step": 8204 }, { "epoch": 0.49964984928295225, "grad_norm": 1.0739547325909828, "learning_rate": 4.916809700298683e-06, "loss": 0.4642, "step": 8205 }, { "epoch": 0.4997107450598301, "grad_norm": 0.9829807041441971, "learning_rate": 4.916789285539012e-06, "loss": 0.5093, "step": 8206 }, { "epoch": 0.499771640836708, "grad_norm": 0.9978110054093994, "learning_rate": 4.916768868317165e-06, "loss": 0.3949, "step": 8207 }, { "epoch": 0.49983253661358584, "grad_norm": 0.9455366354666822, "learning_rate": 4.916748448633164e-06, "loss": 0.439, "step": 8208 }, { "epoch": 0.49989343239046374, "grad_norm": 0.984081038402461, "learning_rate": 4.91672802648703e-06, "loss": 0.4903, "step": 8209 }, { "epoch": 0.4999543281673416, "grad_norm": 1.0409040967347063, "learning_rate": 4.916707601878784e-06, "loss": 0.4805, "step": 8210 }, { "epoch": 0.5000152239442195, "grad_norm": 1.0480250458757248, "learning_rate": 4.9166871748084465e-06, "loss": 0.4206, "step": 8211 }, { "epoch": 0.5000761197210973, "grad_norm": 1.0145994317566482, "learning_rate": 4.9166667452760386e-06, "loss": 0.4063, "step": 8212 }, { "epoch": 0.5001370154979752, "grad_norm": 0.9394175919420614, "learning_rate": 4.916646313281581e-06, "loss": 0.474, "step": 8213 }, { "epoch": 0.5001979112748531, "grad_norm": 0.9274721105867872, "learning_rate": 4.9166258788250945e-06, "loss": 0.4957, "step": 8214 }, { "epoch": 0.500258807051731, "grad_norm": 1.0036903485637496, "learning_rate": 4.916605441906599e-06, "loss": 0.469, "step": 8215 }, { "epoch": 0.5003197028286088, "grad_norm": 0.9999253042644379, "learning_rate": 4.916585002526116e-06, "loss": 0.4386, "step": 8216 }, { "epoch": 0.5003805986054867, "grad_norm": 0.9868294279755585, "learning_rate": 4.916564560683667e-06, "loss": 0.4961, "step": 8217 }, { "epoch": 0.5004414943823646, "grad_norm": 1.0022694750048855, "learning_rate": 4.916544116379272e-06, "loss": 0.4428, "step": 8218 }, { "epoch": 0.5005023901592425, "grad_norm": 0.9998850947992919, "learning_rate": 4.9165236696129525e-06, "loss": 0.4757, "step": 8219 }, { "epoch": 0.5005632859361203, "grad_norm": 1.0351620842097746, "learning_rate": 4.916503220384729e-06, "loss": 0.4338, "step": 8220 }, { "epoch": 0.5006241817129982, "grad_norm": 0.9965977713668277, "learning_rate": 4.916482768694622e-06, "loss": 0.4024, "step": 8221 }, { "epoch": 0.5006850774898761, "grad_norm": 1.0679580412654521, "learning_rate": 4.916462314542652e-06, "loss": 0.4832, "step": 8222 }, { "epoch": 0.500745973266754, "grad_norm": 1.0147994144366952, "learning_rate": 4.916441857928841e-06, "loss": 0.4707, "step": 8223 }, { "epoch": 0.5008068690436318, "grad_norm": 1.0384837162024914, "learning_rate": 4.9164213988532095e-06, "loss": 0.45, "step": 8224 }, { "epoch": 0.5008677648205097, "grad_norm": 0.9233647113003438, "learning_rate": 4.916400937315778e-06, "loss": 0.4857, "step": 8225 }, { "epoch": 0.5009286605973876, "grad_norm": 1.0481836201791592, "learning_rate": 4.916380473316567e-06, "loss": 0.4157, "step": 8226 }, { "epoch": 0.5009895563742655, "grad_norm": 1.0579267945372604, "learning_rate": 4.916360006855598e-06, "loss": 0.4348, "step": 8227 }, { "epoch": 0.5010504521511433, "grad_norm": 1.0276274343940126, "learning_rate": 4.916339537932892e-06, "loss": 0.4083, "step": 8228 }, { "epoch": 0.5011113479280211, "grad_norm": 0.976714312949374, "learning_rate": 4.916319066548469e-06, "loss": 0.4709, "step": 8229 }, { "epoch": 0.5011722437048991, "grad_norm": 0.9893852784867486, "learning_rate": 4.916298592702351e-06, "loss": 0.4093, "step": 8230 }, { "epoch": 0.501233139481777, "grad_norm": 0.9823610273146445, "learning_rate": 4.916278116394559e-06, "loss": 0.4406, "step": 8231 }, { "epoch": 0.5012940352586548, "grad_norm": 0.9764931398563241, "learning_rate": 4.9162576376251105e-06, "loss": 0.488, "step": 8232 }, { "epoch": 0.5013549310355326, "grad_norm": 1.0032445283560945, "learning_rate": 4.9162371563940306e-06, "loss": 0.4642, "step": 8233 }, { "epoch": 0.5014158268124106, "grad_norm": 1.0735056912489804, "learning_rate": 4.916216672701338e-06, "loss": 0.5199, "step": 8234 }, { "epoch": 0.5014767225892884, "grad_norm": 0.9775604638835911, "learning_rate": 4.916196186547055e-06, "loss": 0.5136, "step": 8235 }, { "epoch": 0.5015376183661663, "grad_norm": 1.1143816282517427, "learning_rate": 4.916175697931202e-06, "loss": 0.4157, "step": 8236 }, { "epoch": 0.5015985141430441, "grad_norm": 0.9992515736209108, "learning_rate": 4.916155206853797e-06, "loss": 0.4728, "step": 8237 }, { "epoch": 0.5016594099199221, "grad_norm": 1.0393543395726315, "learning_rate": 4.916134713314865e-06, "loss": 0.5082, "step": 8238 }, { "epoch": 0.5017203056967999, "grad_norm": 1.0358413755037308, "learning_rate": 4.916114217314425e-06, "loss": 0.4426, "step": 8239 }, { "epoch": 0.5017812014736778, "grad_norm": 1.0484381639354292, "learning_rate": 4.9160937188524985e-06, "loss": 0.5234, "step": 8240 }, { "epoch": 0.5018420972505556, "grad_norm": 0.9875411352258142, "learning_rate": 4.916073217929105e-06, "loss": 0.4476, "step": 8241 }, { "epoch": 0.5019029930274336, "grad_norm": 0.9395806925342817, "learning_rate": 4.916052714544267e-06, "loss": 0.3989, "step": 8242 }, { "epoch": 0.5019638888043114, "grad_norm": 1.0254407524947904, "learning_rate": 4.916032208698005e-06, "loss": 0.476, "step": 8243 }, { "epoch": 0.5020247845811893, "grad_norm": 1.1529773228645783, "learning_rate": 4.916011700390339e-06, "loss": 0.5255, "step": 8244 }, { "epoch": 0.5020856803580672, "grad_norm": 1.0536446660788277, "learning_rate": 4.915991189621291e-06, "loss": 0.438, "step": 8245 }, { "epoch": 0.5021465761349451, "grad_norm": 1.0152265406425665, "learning_rate": 4.91597067639088e-06, "loss": 0.4866, "step": 8246 }, { "epoch": 0.5022074719118229, "grad_norm": 1.0316557456143374, "learning_rate": 4.91595016069913e-06, "loss": 0.3939, "step": 8247 }, { "epoch": 0.5022683676887008, "grad_norm": 1.0144372551832406, "learning_rate": 4.91592964254606e-06, "loss": 0.4971, "step": 8248 }, { "epoch": 0.5023292634655787, "grad_norm": 1.0400045879322612, "learning_rate": 4.91590912193169e-06, "loss": 0.4425, "step": 8249 }, { "epoch": 0.5023901592424566, "grad_norm": 1.0461969911883602, "learning_rate": 4.915888598856043e-06, "loss": 0.3637, "step": 8250 }, { "epoch": 0.5024510550193344, "grad_norm": 1.0019643185015186, "learning_rate": 4.915868073319139e-06, "loss": 0.4897, "step": 8251 }, { "epoch": 0.5025119507962122, "grad_norm": 1.0717368881923035, "learning_rate": 4.915847545320998e-06, "loss": 0.4, "step": 8252 }, { "epoch": 0.5025728465730902, "grad_norm": 0.9394574510048871, "learning_rate": 4.915827014861642e-06, "loss": 0.4667, "step": 8253 }, { "epoch": 0.502633742349968, "grad_norm": 1.0615033974577013, "learning_rate": 4.915806481941092e-06, "loss": 0.4117, "step": 8254 }, { "epoch": 0.5026946381268459, "grad_norm": 0.9915078331065026, "learning_rate": 4.915785946559368e-06, "loss": 0.4582, "step": 8255 }, { "epoch": 0.5027555339037237, "grad_norm": 1.0126725443286524, "learning_rate": 4.915765408716493e-06, "loss": 0.3915, "step": 8256 }, { "epoch": 0.5028164296806017, "grad_norm": 0.9862259775354372, "learning_rate": 4.915744868412485e-06, "loss": 0.4092, "step": 8257 }, { "epoch": 0.5028773254574795, "grad_norm": 0.9171979317808733, "learning_rate": 4.915724325647366e-06, "loss": 0.4162, "step": 8258 }, { "epoch": 0.5029382212343574, "grad_norm": 0.9507235284111752, "learning_rate": 4.915703780421158e-06, "loss": 0.5114, "step": 8259 }, { "epoch": 0.5029991170112352, "grad_norm": 1.0803325461191755, "learning_rate": 4.9156832327338824e-06, "loss": 0.4487, "step": 8260 }, { "epoch": 0.5030600127881132, "grad_norm": 1.0627254688448204, "learning_rate": 4.915662682585557e-06, "loss": 0.4602, "step": 8261 }, { "epoch": 0.503120908564991, "grad_norm": 1.0029364352551888, "learning_rate": 4.915642129976206e-06, "loss": 0.4287, "step": 8262 }, { "epoch": 0.5031818043418689, "grad_norm": 1.0888711217012454, "learning_rate": 4.915621574905848e-06, "loss": 0.3784, "step": 8263 }, { "epoch": 0.5032427001187467, "grad_norm": 1.107091429760823, "learning_rate": 4.915601017374505e-06, "loss": 0.4159, "step": 8264 }, { "epoch": 0.5033035958956247, "grad_norm": 1.0304781192306083, "learning_rate": 4.915580457382199e-06, "loss": 0.3973, "step": 8265 }, { "epoch": 0.5033644916725025, "grad_norm": 0.9796838278486356, "learning_rate": 4.915559894928949e-06, "loss": 0.4607, "step": 8266 }, { "epoch": 0.5034253874493804, "grad_norm": 1.0754241880028292, "learning_rate": 4.915539330014777e-06, "loss": 0.3989, "step": 8267 }, { "epoch": 0.5034862832262582, "grad_norm": 1.0463408956625047, "learning_rate": 4.915518762639704e-06, "loss": 0.3612, "step": 8268 }, { "epoch": 0.5035471790031362, "grad_norm": 1.0381687053039401, "learning_rate": 4.91549819280375e-06, "loss": 0.4584, "step": 8269 }, { "epoch": 0.503608074780014, "grad_norm": 1.0366912361771785, "learning_rate": 4.915477620506938e-06, "loss": 0.3887, "step": 8270 }, { "epoch": 0.5036689705568919, "grad_norm": 1.0807263963187934, "learning_rate": 4.915457045749286e-06, "loss": 0.3983, "step": 8271 }, { "epoch": 0.5037298663337697, "grad_norm": 1.0357646327027121, "learning_rate": 4.915436468530818e-06, "loss": 0.4657, "step": 8272 }, { "epoch": 0.5037907621106477, "grad_norm": 1.1299981560299117, "learning_rate": 4.915415888851552e-06, "loss": 0.406, "step": 8273 }, { "epoch": 0.5038516578875255, "grad_norm": 1.07981096573318, "learning_rate": 4.915395306711512e-06, "loss": 0.4186, "step": 8274 }, { "epoch": 0.5039125536644034, "grad_norm": 1.0338958248376084, "learning_rate": 4.9153747221107165e-06, "loss": 0.4468, "step": 8275 }, { "epoch": 0.5039734494412812, "grad_norm": 1.1255851702874975, "learning_rate": 4.915354135049188e-06, "loss": 0.3992, "step": 8276 }, { "epoch": 0.5040343452181592, "grad_norm": 1.0460064955506139, "learning_rate": 4.915333545526947e-06, "loss": 0.3929, "step": 8277 }, { "epoch": 0.504095240995037, "grad_norm": 0.9324358832034654, "learning_rate": 4.915312953544014e-06, "loss": 0.4956, "step": 8278 }, { "epoch": 0.5041561367719148, "grad_norm": 1.040378827469332, "learning_rate": 4.91529235910041e-06, "loss": 0.444, "step": 8279 }, { "epoch": 0.5042170325487927, "grad_norm": 0.9537687603045987, "learning_rate": 4.915271762196157e-06, "loss": 0.4431, "step": 8280 }, { "epoch": 0.5042779283256706, "grad_norm": 0.9896045444587342, "learning_rate": 4.915251162831275e-06, "loss": 0.4772, "step": 8281 }, { "epoch": 0.5043388241025485, "grad_norm": 1.006162136359954, "learning_rate": 4.915230561005786e-06, "loss": 0.4556, "step": 8282 }, { "epoch": 0.5043997198794263, "grad_norm": 1.0402607912122777, "learning_rate": 4.91520995671971e-06, "loss": 0.4673, "step": 8283 }, { "epoch": 0.5044606156563043, "grad_norm": 0.9849612284226241, "learning_rate": 4.915189349973067e-06, "loss": 0.4218, "step": 8284 }, { "epoch": 0.5045215114331821, "grad_norm": 1.026237572462583, "learning_rate": 4.915168740765881e-06, "loss": 0.3869, "step": 8285 }, { "epoch": 0.50458240721006, "grad_norm": 1.0563493524306387, "learning_rate": 4.9151481290981704e-06, "loss": 0.4623, "step": 8286 }, { "epoch": 0.5046433029869378, "grad_norm": 1.063708049463425, "learning_rate": 4.915127514969958e-06, "loss": 0.3717, "step": 8287 }, { "epoch": 0.5047041987638158, "grad_norm": 0.9688402312337515, "learning_rate": 4.915106898381263e-06, "loss": 0.4437, "step": 8288 }, { "epoch": 0.5047650945406936, "grad_norm": 1.0898900656281534, "learning_rate": 4.915086279332108e-06, "loss": 0.4593, "step": 8289 }, { "epoch": 0.5048259903175715, "grad_norm": 1.0204725616634112, "learning_rate": 4.915065657822513e-06, "loss": 0.4422, "step": 8290 }, { "epoch": 0.5048868860944493, "grad_norm": 1.060951258884856, "learning_rate": 4.9150450338524994e-06, "loss": 0.4453, "step": 8291 }, { "epoch": 0.5049477818713273, "grad_norm": 1.0488727367199226, "learning_rate": 4.915024407422088e-06, "loss": 0.4869, "step": 8292 }, { "epoch": 0.5050086776482051, "grad_norm": 0.9973367304505518, "learning_rate": 4.9150037785313e-06, "loss": 0.4751, "step": 8293 }, { "epoch": 0.505069573425083, "grad_norm": 1.0041781806411556, "learning_rate": 4.914983147180157e-06, "loss": 0.4784, "step": 8294 }, { "epoch": 0.5051304692019608, "grad_norm": 1.0053949992239375, "learning_rate": 4.914962513368678e-06, "loss": 0.4112, "step": 8295 }, { "epoch": 0.5051913649788388, "grad_norm": 1.0102482771945485, "learning_rate": 4.914941877096886e-06, "loss": 0.4358, "step": 8296 }, { "epoch": 0.5052522607557166, "grad_norm": 0.9400136516164629, "learning_rate": 4.914921238364801e-06, "loss": 0.4362, "step": 8297 }, { "epoch": 0.5053131565325945, "grad_norm": 1.0658411930712786, "learning_rate": 4.914900597172445e-06, "loss": 0.4299, "step": 8298 }, { "epoch": 0.5053740523094723, "grad_norm": 1.0542708307860371, "learning_rate": 4.914879953519839e-06, "loss": 0.3676, "step": 8299 }, { "epoch": 0.5054349480863503, "grad_norm": 1.0326239939818618, "learning_rate": 4.914859307407003e-06, "loss": 0.4243, "step": 8300 }, { "epoch": 0.5054958438632281, "grad_norm": 1.0241476505786227, "learning_rate": 4.914838658833958e-06, "loss": 0.4155, "step": 8301 }, { "epoch": 0.505556739640106, "grad_norm": 1.1091070889105576, "learning_rate": 4.914818007800727e-06, "loss": 0.4402, "step": 8302 }, { "epoch": 0.5056176354169838, "grad_norm": 0.9666538695842777, "learning_rate": 4.9147973543073276e-06, "loss": 0.5333, "step": 8303 }, { "epoch": 0.5056785311938617, "grad_norm": 1.0150865762257362, "learning_rate": 4.914776698353784e-06, "loss": 0.4591, "step": 8304 }, { "epoch": 0.5057394269707396, "grad_norm": 1.0326748755812742, "learning_rate": 4.914756039940117e-06, "loss": 0.5059, "step": 8305 }, { "epoch": 0.5058003227476174, "grad_norm": 1.0457924315364449, "learning_rate": 4.914735379066346e-06, "loss": 0.4461, "step": 8306 }, { "epoch": 0.5058612185244953, "grad_norm": 1.0216206494729632, "learning_rate": 4.914714715732492e-06, "loss": 0.4418, "step": 8307 }, { "epoch": 0.5059221143013732, "grad_norm": 0.9871352476041194, "learning_rate": 4.914694049938577e-06, "loss": 0.4254, "step": 8308 }, { "epoch": 0.5059830100782511, "grad_norm": 1.0214446154939365, "learning_rate": 4.9146733816846225e-06, "loss": 0.3926, "step": 8309 }, { "epoch": 0.5060439058551289, "grad_norm": 1.0616279918197338, "learning_rate": 4.914652710970649e-06, "loss": 0.477, "step": 8310 }, { "epoch": 0.5061048016320068, "grad_norm": 1.0134004684090823, "learning_rate": 4.914632037796678e-06, "loss": 0.4148, "step": 8311 }, { "epoch": 0.5061656974088847, "grad_norm": 1.1925115885691253, "learning_rate": 4.914611362162729e-06, "loss": 0.3875, "step": 8312 }, { "epoch": 0.5062265931857626, "grad_norm": 1.0099236502268316, "learning_rate": 4.914590684068825e-06, "loss": 0.488, "step": 8313 }, { "epoch": 0.5062874889626404, "grad_norm": 1.0039073484711418, "learning_rate": 4.914570003514986e-06, "loss": 0.3933, "step": 8314 }, { "epoch": 0.5063483847395183, "grad_norm": 0.9437550476752262, "learning_rate": 4.914549320501233e-06, "loss": 0.4859, "step": 8315 }, { "epoch": 0.5064092805163962, "grad_norm": 1.0111999075686624, "learning_rate": 4.914528635027587e-06, "loss": 0.4504, "step": 8316 }, { "epoch": 0.5064701762932741, "grad_norm": 0.9470213066969483, "learning_rate": 4.91450794709407e-06, "loss": 0.4816, "step": 8317 }, { "epoch": 0.5065310720701519, "grad_norm": 1.1097473516461551, "learning_rate": 4.914487256700702e-06, "loss": 0.4353, "step": 8318 }, { "epoch": 0.5065919678470298, "grad_norm": 0.9954929331598709, "learning_rate": 4.914466563847506e-06, "loss": 0.4617, "step": 8319 }, { "epoch": 0.5066528636239077, "grad_norm": 1.069707185137774, "learning_rate": 4.9144458685345e-06, "loss": 0.4106, "step": 8320 }, { "epoch": 0.5067137594007856, "grad_norm": 1.0464476337862136, "learning_rate": 4.9144251707617075e-06, "loss": 0.4339, "step": 8321 }, { "epoch": 0.5067746551776634, "grad_norm": 0.9919252943358514, "learning_rate": 4.9144044705291485e-06, "loss": 0.4404, "step": 8322 }, { "epoch": 0.5068355509545412, "grad_norm": 0.9466043425594095, "learning_rate": 4.914383767836845e-06, "loss": 0.458, "step": 8323 }, { "epoch": 0.5068964467314192, "grad_norm": 1.1011213939156848, "learning_rate": 4.914363062684817e-06, "loss": 0.3261, "step": 8324 }, { "epoch": 0.506957342508297, "grad_norm": 0.9979217699075146, "learning_rate": 4.9143423550730855e-06, "loss": 0.4541, "step": 8325 }, { "epoch": 0.5070182382851749, "grad_norm": 0.9834147994434927, "learning_rate": 4.914321645001673e-06, "loss": 0.4963, "step": 8326 }, { "epoch": 0.5070791340620528, "grad_norm": 1.0492195484212699, "learning_rate": 4.9143009324706e-06, "loss": 0.4847, "step": 8327 }, { "epoch": 0.5071400298389307, "grad_norm": 1.016417550733733, "learning_rate": 4.914280217479887e-06, "loss": 0.473, "step": 8328 }, { "epoch": 0.5072009256158085, "grad_norm": 1.0182377299280172, "learning_rate": 4.914259500029555e-06, "loss": 0.4736, "step": 8329 }, { "epoch": 0.5072618213926864, "grad_norm": 1.0037140057036749, "learning_rate": 4.914238780119626e-06, "loss": 0.4364, "step": 8330 }, { "epoch": 0.5073227171695643, "grad_norm": 0.9721494990536464, "learning_rate": 4.914218057750122e-06, "loss": 0.4239, "step": 8331 }, { "epoch": 0.5073836129464422, "grad_norm": 1.0656576420109438, "learning_rate": 4.9141973329210615e-06, "loss": 0.3891, "step": 8332 }, { "epoch": 0.50744450872332, "grad_norm": 1.0628729584771017, "learning_rate": 4.914176605632468e-06, "loss": 0.44, "step": 8333 }, { "epoch": 0.5075054045001979, "grad_norm": 0.9471711293977645, "learning_rate": 4.9141558758843604e-06, "loss": 0.4685, "step": 8334 }, { "epoch": 0.5075663002770758, "grad_norm": 1.0310575844058707, "learning_rate": 4.914135143676762e-06, "loss": 0.3987, "step": 8335 }, { "epoch": 0.5076271960539537, "grad_norm": 0.990817508974282, "learning_rate": 4.914114409009692e-06, "loss": 0.4617, "step": 8336 }, { "epoch": 0.5076880918308315, "grad_norm": 1.0363793901871479, "learning_rate": 4.914093671883172e-06, "loss": 0.4644, "step": 8337 }, { "epoch": 0.5077489876077094, "grad_norm": 0.9707434914999458, "learning_rate": 4.9140729322972244e-06, "loss": 0.4968, "step": 8338 }, { "epoch": 0.5078098833845873, "grad_norm": 1.076350549996223, "learning_rate": 4.91405219025187e-06, "loss": 0.4074, "step": 8339 }, { "epoch": 0.5078707791614652, "grad_norm": 0.9911276080171267, "learning_rate": 4.914031445747128e-06, "loss": 0.446, "step": 8340 }, { "epoch": 0.507931674938343, "grad_norm": 1.0058385411552266, "learning_rate": 4.914010698783023e-06, "loss": 0.4371, "step": 8341 }, { "epoch": 0.5079925707152209, "grad_norm": 0.9361126697013923, "learning_rate": 4.913989949359572e-06, "loss": 0.4533, "step": 8342 }, { "epoch": 0.5080534664920988, "grad_norm": 1.0708746418660227, "learning_rate": 4.9139691974768e-06, "loss": 0.3593, "step": 8343 }, { "epoch": 0.5081143622689767, "grad_norm": 1.0377831110645175, "learning_rate": 4.913948443134725e-06, "loss": 0.3768, "step": 8344 }, { "epoch": 0.5081752580458545, "grad_norm": 1.0426603165772104, "learning_rate": 4.91392768633337e-06, "loss": 0.4409, "step": 8345 }, { "epoch": 0.5082361538227324, "grad_norm": 0.8941212896477583, "learning_rate": 4.913906927072756e-06, "loss": 0.44, "step": 8346 }, { "epoch": 0.5082970495996103, "grad_norm": 1.054908517111049, "learning_rate": 4.913886165352903e-06, "loss": 0.4071, "step": 8347 }, { "epoch": 0.5083579453764882, "grad_norm": 1.0530048247330266, "learning_rate": 4.913865401173833e-06, "loss": 0.4707, "step": 8348 }, { "epoch": 0.508418841153366, "grad_norm": 1.031584491721372, "learning_rate": 4.913844634535568e-06, "loss": 0.4645, "step": 8349 }, { "epoch": 0.5084797369302438, "grad_norm": 0.9768995285006518, "learning_rate": 4.913823865438128e-06, "loss": 0.4481, "step": 8350 }, { "epoch": 0.5085406327071218, "grad_norm": 1.1039179135621746, "learning_rate": 4.913803093881534e-06, "loss": 0.4048, "step": 8351 }, { "epoch": 0.5086015284839996, "grad_norm": 1.0602222486081818, "learning_rate": 4.913782319865808e-06, "loss": 0.5185, "step": 8352 }, { "epoch": 0.5086624242608775, "grad_norm": 0.9885854949091166, "learning_rate": 4.91376154339097e-06, "loss": 0.4504, "step": 8353 }, { "epoch": 0.5087233200377553, "grad_norm": 0.9835070458261894, "learning_rate": 4.913740764457043e-06, "loss": 0.4768, "step": 8354 }, { "epoch": 0.5087842158146333, "grad_norm": 1.0742952891830189, "learning_rate": 4.913719983064046e-06, "loss": 0.4074, "step": 8355 }, { "epoch": 0.5088451115915111, "grad_norm": 0.9931307436862242, "learning_rate": 4.913699199212002e-06, "loss": 0.5284, "step": 8356 }, { "epoch": 0.508906007368389, "grad_norm": 1.023639466741578, "learning_rate": 4.9136784129009315e-06, "loss": 0.4539, "step": 8357 }, { "epoch": 0.5089669031452668, "grad_norm": 0.9259048715667947, "learning_rate": 4.913657624130855e-06, "loss": 0.5447, "step": 8358 }, { "epoch": 0.5090277989221448, "grad_norm": 1.0914558874153848, "learning_rate": 4.913636832901795e-06, "loss": 0.3547, "step": 8359 }, { "epoch": 0.5090886946990226, "grad_norm": 1.0411354883064328, "learning_rate": 4.913616039213772e-06, "loss": 0.4501, "step": 8360 }, { "epoch": 0.5091495904759005, "grad_norm": 0.9553274346369984, "learning_rate": 4.913595243066807e-06, "loss": 0.453, "step": 8361 }, { "epoch": 0.5092104862527783, "grad_norm": 0.9990871746243446, "learning_rate": 4.9135744444609205e-06, "loss": 0.403, "step": 8362 }, { "epoch": 0.5092713820296563, "grad_norm": 0.9348599314767481, "learning_rate": 4.913553643396135e-06, "loss": 0.4265, "step": 8363 }, { "epoch": 0.5093322778065341, "grad_norm": 1.118863835059077, "learning_rate": 4.913532839872472e-06, "loss": 0.5769, "step": 8364 }, { "epoch": 0.509393173583412, "grad_norm": 0.9113127215893202, "learning_rate": 4.913512033889951e-06, "loss": 0.442, "step": 8365 }, { "epoch": 0.5094540693602899, "grad_norm": 1.0411392670370716, "learning_rate": 4.913491225448595e-06, "loss": 0.4473, "step": 8366 }, { "epoch": 0.5095149651371678, "grad_norm": 1.003543302476234, "learning_rate": 4.9134704145484234e-06, "loss": 0.5599, "step": 8367 }, { "epoch": 0.5095758609140456, "grad_norm": 0.9626467661165781, "learning_rate": 4.9134496011894585e-06, "loss": 0.4497, "step": 8368 }, { "epoch": 0.5096367566909235, "grad_norm": 1.1384088697145076, "learning_rate": 4.913428785371722e-06, "loss": 0.3724, "step": 8369 }, { "epoch": 0.5096976524678014, "grad_norm": 1.1154512122480995, "learning_rate": 4.913407967095234e-06, "loss": 0.3949, "step": 8370 }, { "epoch": 0.5097585482446793, "grad_norm": 0.9459712748263016, "learning_rate": 4.913387146360016e-06, "loss": 0.4895, "step": 8371 }, { "epoch": 0.5098194440215571, "grad_norm": 0.9659752285093669, "learning_rate": 4.91336632316609e-06, "loss": 0.4805, "step": 8372 }, { "epoch": 0.509880339798435, "grad_norm": 1.0042262700015703, "learning_rate": 4.913345497513475e-06, "loss": 0.4386, "step": 8373 }, { "epoch": 0.5099412355753129, "grad_norm": 1.1119054899249983, "learning_rate": 4.9133246694021954e-06, "loss": 0.3548, "step": 8374 }, { "epoch": 0.5100021313521907, "grad_norm": 0.9397885622201619, "learning_rate": 4.91330383883227e-06, "loss": 0.4651, "step": 8375 }, { "epoch": 0.5100630271290686, "grad_norm": 1.1339627803565115, "learning_rate": 4.913283005803721e-06, "loss": 0.4226, "step": 8376 }, { "epoch": 0.5101239229059464, "grad_norm": 1.0077708356833879, "learning_rate": 4.91326217031657e-06, "loss": 0.481, "step": 8377 }, { "epoch": 0.5101848186828244, "grad_norm": 1.0492771484330186, "learning_rate": 4.913241332370836e-06, "loss": 0.4331, "step": 8378 }, { "epoch": 0.5102457144597022, "grad_norm": 1.0231970200712788, "learning_rate": 4.913220491966544e-06, "loss": 0.4382, "step": 8379 }, { "epoch": 0.5103066102365801, "grad_norm": 0.9619565495699908, "learning_rate": 4.913199649103712e-06, "loss": 0.5045, "step": 8380 }, { "epoch": 0.5103675060134579, "grad_norm": 0.9548818090952357, "learning_rate": 4.913178803782362e-06, "loss": 0.3526, "step": 8381 }, { "epoch": 0.5104284017903359, "grad_norm": 0.9533941085553209, "learning_rate": 4.913157956002517e-06, "loss": 0.4558, "step": 8382 }, { "epoch": 0.5104892975672137, "grad_norm": 1.0517089460743336, "learning_rate": 4.9131371057641955e-06, "loss": 0.3951, "step": 8383 }, { "epoch": 0.5105501933440916, "grad_norm": 0.993623613425874, "learning_rate": 4.913116253067421e-06, "loss": 0.4002, "step": 8384 }, { "epoch": 0.5106110891209694, "grad_norm": 1.0307291291300542, "learning_rate": 4.913095397912213e-06, "loss": 0.4368, "step": 8385 }, { "epoch": 0.5106719848978474, "grad_norm": 1.0230254227818123, "learning_rate": 4.913074540298594e-06, "loss": 0.4332, "step": 8386 }, { "epoch": 0.5107328806747252, "grad_norm": 1.1603527616479106, "learning_rate": 4.913053680226585e-06, "loss": 0.4682, "step": 8387 }, { "epoch": 0.5107937764516031, "grad_norm": 0.9548680056877453, "learning_rate": 4.913032817696207e-06, "loss": 0.4846, "step": 8388 }, { "epoch": 0.5108546722284809, "grad_norm": 0.9806049848799697, "learning_rate": 4.913011952707481e-06, "loss": 0.4819, "step": 8389 }, { "epoch": 0.5109155680053589, "grad_norm": 0.9304744287937748, "learning_rate": 4.912991085260429e-06, "loss": 0.5185, "step": 8390 }, { "epoch": 0.5109764637822367, "grad_norm": 0.9836515582706606, "learning_rate": 4.912970215355071e-06, "loss": 0.4716, "step": 8391 }, { "epoch": 0.5110373595591146, "grad_norm": 0.9730393771590212, "learning_rate": 4.912949342991431e-06, "loss": 0.4543, "step": 8392 }, { "epoch": 0.5110982553359924, "grad_norm": 1.0485816515233146, "learning_rate": 4.912928468169526e-06, "loss": 0.3801, "step": 8393 }, { "epoch": 0.5111591511128704, "grad_norm": 0.9932666431982463, "learning_rate": 4.912907590889382e-06, "loss": 0.4222, "step": 8394 }, { "epoch": 0.5112200468897482, "grad_norm": 1.0881757474487819, "learning_rate": 4.912886711151016e-06, "loss": 0.4045, "step": 8395 }, { "epoch": 0.511280942666626, "grad_norm": 1.012564998766804, "learning_rate": 4.912865828954452e-06, "loss": 0.4815, "step": 8396 }, { "epoch": 0.5113418384435039, "grad_norm": 1.0463083401154663, "learning_rate": 4.912844944299711e-06, "loss": 0.4141, "step": 8397 }, { "epoch": 0.5114027342203818, "grad_norm": 0.9974795804351037, "learning_rate": 4.912824057186812e-06, "loss": 0.4308, "step": 8398 }, { "epoch": 0.5114636299972597, "grad_norm": 1.0366122887017641, "learning_rate": 4.912803167615779e-06, "loss": 0.4145, "step": 8399 }, { "epoch": 0.5115245257741375, "grad_norm": 1.0067252352360911, "learning_rate": 4.912782275586633e-06, "loss": 0.5697, "step": 8400 }, { "epoch": 0.5115854215510154, "grad_norm": 1.001718032990911, "learning_rate": 4.9127613810993944e-06, "loss": 0.4922, "step": 8401 }, { "epoch": 0.5116463173278933, "grad_norm": 0.9918498049919918, "learning_rate": 4.912740484154084e-06, "loss": 0.4553, "step": 8402 }, { "epoch": 0.5117072131047712, "grad_norm": 0.9721091927316241, "learning_rate": 4.912719584750724e-06, "loss": 0.443, "step": 8403 }, { "epoch": 0.511768108881649, "grad_norm": 1.0150561008980563, "learning_rate": 4.912698682889335e-06, "loss": 0.4335, "step": 8404 }, { "epoch": 0.5118290046585269, "grad_norm": 0.9946162151035344, "learning_rate": 4.91267777856994e-06, "loss": 0.4887, "step": 8405 }, { "epoch": 0.5118899004354048, "grad_norm": 1.0057445635716233, "learning_rate": 4.912656871792558e-06, "loss": 0.4076, "step": 8406 }, { "epoch": 0.5119507962122827, "grad_norm": 0.9649266008262128, "learning_rate": 4.912635962557212e-06, "loss": 0.5564, "step": 8407 }, { "epoch": 0.5120116919891605, "grad_norm": 0.9770937225148982, "learning_rate": 4.912615050863922e-06, "loss": 0.4535, "step": 8408 }, { "epoch": 0.5120725877660385, "grad_norm": 0.922849345646547, "learning_rate": 4.9125941367127106e-06, "loss": 0.4535, "step": 8409 }, { "epoch": 0.5121334835429163, "grad_norm": 1.1852770004521096, "learning_rate": 4.912573220103597e-06, "loss": 0.5331, "step": 8410 }, { "epoch": 0.5121943793197942, "grad_norm": 0.965356296269453, "learning_rate": 4.912552301036605e-06, "loss": 0.4621, "step": 8411 }, { "epoch": 0.512255275096672, "grad_norm": 1.1056658776634585, "learning_rate": 4.912531379511756e-06, "loss": 0.4024, "step": 8412 }, { "epoch": 0.51231617087355, "grad_norm": 1.0273795444948508, "learning_rate": 4.912510455529068e-06, "loss": 0.4026, "step": 8413 }, { "epoch": 0.5123770666504278, "grad_norm": 1.0438057926091828, "learning_rate": 4.912489529088566e-06, "loss": 0.4359, "step": 8414 }, { "epoch": 0.5124379624273057, "grad_norm": 0.965303225296784, "learning_rate": 4.9124686001902694e-06, "loss": 0.4808, "step": 8415 }, { "epoch": 0.5124988582041835, "grad_norm": 1.0430818622105011, "learning_rate": 4.912447668834201e-06, "loss": 0.4414, "step": 8416 }, { "epoch": 0.5125597539810615, "grad_norm": 1.0526091323931046, "learning_rate": 4.9124267350203795e-06, "loss": 0.4271, "step": 8417 }, { "epoch": 0.5126206497579393, "grad_norm": 1.0582039957313756, "learning_rate": 4.912405798748828e-06, "loss": 0.418, "step": 8418 }, { "epoch": 0.5126815455348172, "grad_norm": 0.9850947531620889, "learning_rate": 4.912384860019568e-06, "loss": 0.4668, "step": 8419 }, { "epoch": 0.512742441311695, "grad_norm": 0.8712377516091484, "learning_rate": 4.91236391883262e-06, "loss": 0.4664, "step": 8420 }, { "epoch": 0.512803337088573, "grad_norm": 0.9218514852230555, "learning_rate": 4.912342975188007e-06, "loss": 0.5046, "step": 8421 }, { "epoch": 0.5128642328654508, "grad_norm": 1.0380545241707875, "learning_rate": 4.912322029085747e-06, "loss": 0.4404, "step": 8422 }, { "epoch": 0.5129251286423286, "grad_norm": 1.1155271477503226, "learning_rate": 4.912301080525865e-06, "loss": 0.4352, "step": 8423 }, { "epoch": 0.5129860244192065, "grad_norm": 1.019989869185658, "learning_rate": 4.912280129508381e-06, "loss": 0.3883, "step": 8424 }, { "epoch": 0.5130469201960844, "grad_norm": 1.0251040700238876, "learning_rate": 4.912259176033316e-06, "loss": 0.5462, "step": 8425 }, { "epoch": 0.5131078159729623, "grad_norm": 1.1090634572957057, "learning_rate": 4.912238220100691e-06, "loss": 0.4301, "step": 8426 }, { "epoch": 0.5131687117498401, "grad_norm": 1.013746033034442, "learning_rate": 4.912217261710528e-06, "loss": 0.406, "step": 8427 }, { "epoch": 0.513229607526718, "grad_norm": 1.026696732846761, "learning_rate": 4.912196300862849e-06, "loss": 0.4284, "step": 8428 }, { "epoch": 0.5132905033035959, "grad_norm": 0.9908049767640289, "learning_rate": 4.912175337557673e-06, "loss": 0.4708, "step": 8429 }, { "epoch": 0.5133513990804738, "grad_norm": 1.0850701671306282, "learning_rate": 4.912154371795024e-06, "loss": 0.3703, "step": 8430 }, { "epoch": 0.5134122948573516, "grad_norm": 1.0007829141117706, "learning_rate": 4.912133403574922e-06, "loss": 0.3822, "step": 8431 }, { "epoch": 0.5134731906342295, "grad_norm": 1.0075273220340242, "learning_rate": 4.912112432897389e-06, "loss": 0.457, "step": 8432 }, { "epoch": 0.5135340864111074, "grad_norm": 0.9759069119517849, "learning_rate": 4.912091459762446e-06, "loss": 0.4186, "step": 8433 }, { "epoch": 0.5135949821879853, "grad_norm": 1.0122306169432858, "learning_rate": 4.912070484170114e-06, "loss": 0.425, "step": 8434 }, { "epoch": 0.5136558779648631, "grad_norm": 0.9344659956812211, "learning_rate": 4.912049506120415e-06, "loss": 0.4332, "step": 8435 }, { "epoch": 0.513716773741741, "grad_norm": 0.9494548942595549, "learning_rate": 4.912028525613369e-06, "loss": 0.4528, "step": 8436 }, { "epoch": 0.5137776695186189, "grad_norm": 1.0393703021044038, "learning_rate": 4.912007542649e-06, "loss": 0.4567, "step": 8437 }, { "epoch": 0.5138385652954968, "grad_norm": 1.0870179957080606, "learning_rate": 4.9119865572273275e-06, "loss": 0.4037, "step": 8438 }, { "epoch": 0.5138994610723746, "grad_norm": 1.0523643051678813, "learning_rate": 4.9119655693483725e-06, "loss": 0.4203, "step": 8439 }, { "epoch": 0.5139603568492525, "grad_norm": 1.0402705545857474, "learning_rate": 4.911944579012158e-06, "loss": 0.4475, "step": 8440 }, { "epoch": 0.5140212526261304, "grad_norm": 1.014740853577183, "learning_rate": 4.911923586218704e-06, "loss": 0.4585, "step": 8441 }, { "epoch": 0.5140821484030083, "grad_norm": 1.0142274170129753, "learning_rate": 4.911902590968033e-06, "loss": 0.4326, "step": 8442 }, { "epoch": 0.5141430441798861, "grad_norm": 0.9283971007770191, "learning_rate": 4.911881593260165e-06, "loss": 0.494, "step": 8443 }, { "epoch": 0.514203939956764, "grad_norm": 0.9580052683633056, "learning_rate": 4.911860593095123e-06, "loss": 0.4897, "step": 8444 }, { "epoch": 0.5142648357336419, "grad_norm": 1.0041426053917983, "learning_rate": 4.911839590472926e-06, "loss": 0.488, "step": 8445 }, { "epoch": 0.5143257315105197, "grad_norm": 1.14140945692166, "learning_rate": 4.9118185853935985e-06, "loss": 0.4407, "step": 8446 }, { "epoch": 0.5143866272873976, "grad_norm": 1.0722712362269513, "learning_rate": 4.91179757785716e-06, "loss": 0.4409, "step": 8447 }, { "epoch": 0.5144475230642755, "grad_norm": 1.0033388964352685, "learning_rate": 4.911776567863632e-06, "loss": 0.4158, "step": 8448 }, { "epoch": 0.5145084188411534, "grad_norm": 1.100653924078908, "learning_rate": 4.911755555413037e-06, "loss": 0.38, "step": 8449 }, { "epoch": 0.5145693146180312, "grad_norm": 1.0434774063998744, "learning_rate": 4.911734540505394e-06, "loss": 0.435, "step": 8450 }, { "epoch": 0.5146302103949091, "grad_norm": 0.9025153318873368, "learning_rate": 4.911713523140728e-06, "loss": 0.4436, "step": 8451 }, { "epoch": 0.514691106171787, "grad_norm": 1.0328874321409025, "learning_rate": 4.9116925033190565e-06, "loss": 0.4074, "step": 8452 }, { "epoch": 0.5147520019486649, "grad_norm": 0.9798001808923298, "learning_rate": 4.9116714810404044e-06, "loss": 0.4232, "step": 8453 }, { "epoch": 0.5148128977255427, "grad_norm": 1.0295017164748859, "learning_rate": 4.911650456304791e-06, "loss": 0.4506, "step": 8454 }, { "epoch": 0.5148737935024206, "grad_norm": 1.089833808808818, "learning_rate": 4.911629429112237e-06, "loss": 0.5084, "step": 8455 }, { "epoch": 0.5149346892792985, "grad_norm": 1.0292077814581837, "learning_rate": 4.9116083994627665e-06, "loss": 0.3676, "step": 8456 }, { "epoch": 0.5149955850561764, "grad_norm": 1.0883013092546447, "learning_rate": 4.911587367356399e-06, "loss": 0.4276, "step": 8457 }, { "epoch": 0.5150564808330542, "grad_norm": 0.9608396695936109, "learning_rate": 4.9115663327931565e-06, "loss": 0.469, "step": 8458 }, { "epoch": 0.5151173766099321, "grad_norm": 0.9862974055385412, "learning_rate": 4.911545295773061e-06, "loss": 0.392, "step": 8459 }, { "epoch": 0.51517827238681, "grad_norm": 1.003856311726001, "learning_rate": 4.911524256296132e-06, "loss": 0.4544, "step": 8460 }, { "epoch": 0.5152391681636879, "grad_norm": 0.9887337333733689, "learning_rate": 4.911503214362393e-06, "loss": 0.4856, "step": 8461 }, { "epoch": 0.5153000639405657, "grad_norm": 0.993151341056759, "learning_rate": 4.911482169971865e-06, "loss": 0.433, "step": 8462 }, { "epoch": 0.5153609597174436, "grad_norm": 1.0735498477122658, "learning_rate": 4.911461123124569e-06, "loss": 0.4482, "step": 8463 }, { "epoch": 0.5154218554943215, "grad_norm": 1.0243907074086478, "learning_rate": 4.911440073820526e-06, "loss": 0.4487, "step": 8464 }, { "epoch": 0.5154827512711994, "grad_norm": 1.1450193493240166, "learning_rate": 4.911419022059758e-06, "loss": 0.4319, "step": 8465 }, { "epoch": 0.5155436470480772, "grad_norm": 0.9824072731517923, "learning_rate": 4.911397967842287e-06, "loss": 0.423, "step": 8466 }, { "epoch": 0.515604542824955, "grad_norm": 1.0064065558311388, "learning_rate": 4.911376911168133e-06, "loss": 0.4717, "step": 8467 }, { "epoch": 0.515665438601833, "grad_norm": 1.1108949455415347, "learning_rate": 4.911355852037319e-06, "loss": 0.4048, "step": 8468 }, { "epoch": 0.5157263343787108, "grad_norm": 1.0029442633115988, "learning_rate": 4.911334790449866e-06, "loss": 0.5004, "step": 8469 }, { "epoch": 0.5157872301555887, "grad_norm": 1.021941689995531, "learning_rate": 4.911313726405795e-06, "loss": 0.4012, "step": 8470 }, { "epoch": 0.5158481259324665, "grad_norm": 1.043655050543881, "learning_rate": 4.911292659905127e-06, "loss": 0.3725, "step": 8471 }, { "epoch": 0.5159090217093445, "grad_norm": 1.040148672271331, "learning_rate": 4.911271590947885e-06, "loss": 0.4213, "step": 8472 }, { "epoch": 0.5159699174862223, "grad_norm": 0.9453114935801371, "learning_rate": 4.9112505195340895e-06, "loss": 0.5085, "step": 8473 }, { "epoch": 0.5160308132631002, "grad_norm": 1.0849582312803816, "learning_rate": 4.911229445663762e-06, "loss": 0.3661, "step": 8474 }, { "epoch": 0.516091709039978, "grad_norm": 1.0524934644369004, "learning_rate": 4.911208369336924e-06, "loss": 0.4337, "step": 8475 }, { "epoch": 0.516152604816856, "grad_norm": 0.9953907029088835, "learning_rate": 4.911187290553597e-06, "loss": 0.4508, "step": 8476 }, { "epoch": 0.5162135005937338, "grad_norm": 1.0380506194040227, "learning_rate": 4.911166209313804e-06, "loss": 0.3895, "step": 8477 }, { "epoch": 0.5162743963706117, "grad_norm": 1.072896733811343, "learning_rate": 4.911145125617563e-06, "loss": 0.3811, "step": 8478 }, { "epoch": 0.5163352921474895, "grad_norm": 1.0167724072012883, "learning_rate": 4.911124039464898e-06, "loss": 0.4178, "step": 8479 }, { "epoch": 0.5163961879243675, "grad_norm": 1.049714212422888, "learning_rate": 4.91110295085583e-06, "loss": 0.4929, "step": 8480 }, { "epoch": 0.5164570837012453, "grad_norm": 1.0276311726272507, "learning_rate": 4.911081859790381e-06, "loss": 0.4346, "step": 8481 }, { "epoch": 0.5165179794781232, "grad_norm": 1.020600771227954, "learning_rate": 4.911060766268571e-06, "loss": 0.4397, "step": 8482 }, { "epoch": 0.516578875255001, "grad_norm": 1.0035353816150139, "learning_rate": 4.911039670290423e-06, "loss": 0.4429, "step": 8483 }, { "epoch": 0.516639771031879, "grad_norm": 1.040148096626415, "learning_rate": 4.911018571855958e-06, "loss": 0.399, "step": 8484 }, { "epoch": 0.5167006668087568, "grad_norm": 1.0186620529587431, "learning_rate": 4.910997470965197e-06, "loss": 0.4025, "step": 8485 }, { "epoch": 0.5167615625856347, "grad_norm": 1.0059681471808914, "learning_rate": 4.910976367618162e-06, "loss": 0.4532, "step": 8486 }, { "epoch": 0.5168224583625125, "grad_norm": 1.0164417333815376, "learning_rate": 4.910955261814875e-06, "loss": 0.4455, "step": 8487 }, { "epoch": 0.5168833541393905, "grad_norm": 1.06850118562162, "learning_rate": 4.910934153555356e-06, "loss": 0.4667, "step": 8488 }, { "epoch": 0.5169442499162683, "grad_norm": 1.0306563772689188, "learning_rate": 4.910913042839628e-06, "loss": 0.4155, "step": 8489 }, { "epoch": 0.5170051456931462, "grad_norm": 1.1072363042126, "learning_rate": 4.9108919296677115e-06, "loss": 0.4303, "step": 8490 }, { "epoch": 0.5170660414700241, "grad_norm": 1.0723334918693797, "learning_rate": 4.9108708140396285e-06, "loss": 0.4003, "step": 8491 }, { "epoch": 0.517126937246902, "grad_norm": 0.957646305421407, "learning_rate": 4.910849695955401e-06, "loss": 0.5028, "step": 8492 }, { "epoch": 0.5171878330237798, "grad_norm": 1.0389641880288205, "learning_rate": 4.91082857541505e-06, "loss": 0.4442, "step": 8493 }, { "epoch": 0.5172487288006576, "grad_norm": 1.03852566222708, "learning_rate": 4.910807452418595e-06, "loss": 0.4367, "step": 8494 }, { "epoch": 0.5173096245775356, "grad_norm": 0.9281115298259606, "learning_rate": 4.910786326966062e-06, "loss": 0.5177, "step": 8495 }, { "epoch": 0.5173705203544134, "grad_norm": 1.00634406987426, "learning_rate": 4.910765199057469e-06, "loss": 0.4025, "step": 8496 }, { "epoch": 0.5174314161312913, "grad_norm": 0.9773881246680165, "learning_rate": 4.910744068692839e-06, "loss": 0.3981, "step": 8497 }, { "epoch": 0.5174923119081691, "grad_norm": 1.0267502468167742, "learning_rate": 4.910722935872192e-06, "loss": 0.3874, "step": 8498 }, { "epoch": 0.5175532076850471, "grad_norm": 0.9627900544955451, "learning_rate": 4.9107018005955514e-06, "loss": 0.5265, "step": 8499 }, { "epoch": 0.5176141034619249, "grad_norm": 0.9645622052251106, "learning_rate": 4.9106806628629375e-06, "loss": 0.451, "step": 8500 }, { "epoch": 0.5176749992388028, "grad_norm": 1.046499386533939, "learning_rate": 4.910659522674373e-06, "loss": 0.4072, "step": 8501 }, { "epoch": 0.5177358950156806, "grad_norm": 0.9850173359834665, "learning_rate": 4.910638380029878e-06, "loss": 0.4257, "step": 8502 }, { "epoch": 0.5177967907925586, "grad_norm": 1.070602392580532, "learning_rate": 4.910617234929474e-06, "loss": 0.4689, "step": 8503 }, { "epoch": 0.5178576865694364, "grad_norm": 0.9330635034122995, "learning_rate": 4.910596087373185e-06, "loss": 0.4812, "step": 8504 }, { "epoch": 0.5179185823463143, "grad_norm": 1.0158654378875354, "learning_rate": 4.91057493736103e-06, "loss": 0.4629, "step": 8505 }, { "epoch": 0.5179794781231921, "grad_norm": 0.9900830825083944, "learning_rate": 4.910553784893032e-06, "loss": 0.4572, "step": 8506 }, { "epoch": 0.5180403739000701, "grad_norm": 0.9927107773054996, "learning_rate": 4.910532629969211e-06, "loss": 0.4356, "step": 8507 }, { "epoch": 0.5181012696769479, "grad_norm": 1.0238318644473723, "learning_rate": 4.91051147258959e-06, "loss": 0.4644, "step": 8508 }, { "epoch": 0.5181621654538258, "grad_norm": 0.9753912766782754, "learning_rate": 4.910490312754189e-06, "loss": 0.4878, "step": 8509 }, { "epoch": 0.5182230612307036, "grad_norm": 1.0618222639512411, "learning_rate": 4.910469150463032e-06, "loss": 0.4762, "step": 8510 }, { "epoch": 0.5182839570075816, "grad_norm": 1.0263624553715798, "learning_rate": 4.910447985716139e-06, "loss": 0.4753, "step": 8511 }, { "epoch": 0.5183448527844594, "grad_norm": 1.0705815823546188, "learning_rate": 4.910426818513531e-06, "loss": 0.4226, "step": 8512 }, { "epoch": 0.5184057485613373, "grad_norm": 1.099261409009667, "learning_rate": 4.91040564885523e-06, "loss": 0.4044, "step": 8513 }, { "epoch": 0.5184666443382151, "grad_norm": 0.9403892280000513, "learning_rate": 4.910384476741259e-06, "loss": 0.4856, "step": 8514 }, { "epoch": 0.518527540115093, "grad_norm": 0.9820781948079325, "learning_rate": 4.910363302171638e-06, "loss": 0.4578, "step": 8515 }, { "epoch": 0.5185884358919709, "grad_norm": 1.0930137170104077, "learning_rate": 4.910342125146388e-06, "loss": 0.4633, "step": 8516 }, { "epoch": 0.5186493316688487, "grad_norm": 1.0100846074381227, "learning_rate": 4.910320945665533e-06, "loss": 0.4062, "step": 8517 }, { "epoch": 0.5187102274457266, "grad_norm": 1.007245208114069, "learning_rate": 4.9102997637290916e-06, "loss": 0.4417, "step": 8518 }, { "epoch": 0.5187711232226045, "grad_norm": 0.935317689531168, "learning_rate": 4.910278579337088e-06, "loss": 0.4902, "step": 8519 }, { "epoch": 0.5188320189994824, "grad_norm": 1.0604969761816085, "learning_rate": 4.9102573924895425e-06, "loss": 0.4258, "step": 8520 }, { "epoch": 0.5188929147763602, "grad_norm": 0.9180161583405663, "learning_rate": 4.910236203186477e-06, "loss": 0.5055, "step": 8521 }, { "epoch": 0.5189538105532381, "grad_norm": 1.050296640191336, "learning_rate": 4.910215011427913e-06, "loss": 0.4404, "step": 8522 }, { "epoch": 0.519014706330116, "grad_norm": 1.097510830135985, "learning_rate": 4.9101938172138715e-06, "loss": 0.4222, "step": 8523 }, { "epoch": 0.5190756021069939, "grad_norm": 0.9958235819415049, "learning_rate": 4.910172620544376e-06, "loss": 0.4116, "step": 8524 }, { "epoch": 0.5191364978838717, "grad_norm": 1.079547298661236, "learning_rate": 4.9101514214194455e-06, "loss": 0.4751, "step": 8525 }, { "epoch": 0.5191973936607496, "grad_norm": 0.9537767069196704, "learning_rate": 4.9101302198391024e-06, "loss": 0.4585, "step": 8526 }, { "epoch": 0.5192582894376275, "grad_norm": 1.0669123182442621, "learning_rate": 4.910109015803369e-06, "loss": 0.4885, "step": 8527 }, { "epoch": 0.5193191852145054, "grad_norm": 1.1297966733723799, "learning_rate": 4.910087809312268e-06, "loss": 0.3974, "step": 8528 }, { "epoch": 0.5193800809913832, "grad_norm": 1.0047312859758235, "learning_rate": 4.910066600365818e-06, "loss": 0.3938, "step": 8529 }, { "epoch": 0.5194409767682612, "grad_norm": 0.968724197029759, "learning_rate": 4.910045388964043e-06, "loss": 0.4406, "step": 8530 }, { "epoch": 0.519501872545139, "grad_norm": 1.0580682690607364, "learning_rate": 4.910024175106965e-06, "loss": 0.3966, "step": 8531 }, { "epoch": 0.5195627683220169, "grad_norm": 0.989322365004037, "learning_rate": 4.910002958794603e-06, "loss": 0.4308, "step": 8532 }, { "epoch": 0.5196236640988947, "grad_norm": 1.0194055259978139, "learning_rate": 4.90998174002698e-06, "loss": 0.4357, "step": 8533 }, { "epoch": 0.5196845598757727, "grad_norm": 1.122550011907693, "learning_rate": 4.909960518804119e-06, "loss": 0.4983, "step": 8534 }, { "epoch": 0.5197454556526505, "grad_norm": 1.0079324526169813, "learning_rate": 4.909939295126039e-06, "loss": 0.4441, "step": 8535 }, { "epoch": 0.5198063514295284, "grad_norm": 1.0724240324963896, "learning_rate": 4.9099180689927625e-06, "loss": 0.3923, "step": 8536 }, { "epoch": 0.5198672472064062, "grad_norm": 1.0532737058240282, "learning_rate": 4.909896840404313e-06, "loss": 0.4271, "step": 8537 }, { "epoch": 0.5199281429832842, "grad_norm": 1.0264900140508462, "learning_rate": 4.90987560936071e-06, "loss": 0.3938, "step": 8538 }, { "epoch": 0.519989038760162, "grad_norm": 1.0839328629072278, "learning_rate": 4.909854375861977e-06, "loss": 0.4141, "step": 8539 }, { "epoch": 0.5200499345370398, "grad_norm": 1.0822952042198026, "learning_rate": 4.909833139908132e-06, "loss": 0.451, "step": 8540 }, { "epoch": 0.5201108303139177, "grad_norm": 0.9469833507626185, "learning_rate": 4.909811901499201e-06, "loss": 0.4468, "step": 8541 }, { "epoch": 0.5201717260907956, "grad_norm": 1.0526331913040057, "learning_rate": 4.909790660635204e-06, "loss": 0.428, "step": 8542 }, { "epoch": 0.5202326218676735, "grad_norm": 0.9861260713235551, "learning_rate": 4.909769417316161e-06, "loss": 0.4162, "step": 8543 }, { "epoch": 0.5202935176445513, "grad_norm": 1.1059967802886375, "learning_rate": 4.909748171542096e-06, "loss": 0.4234, "step": 8544 }, { "epoch": 0.5203544134214292, "grad_norm": 1.0069147589839431, "learning_rate": 4.90972692331303e-06, "loss": 0.448, "step": 8545 }, { "epoch": 0.5204153091983071, "grad_norm": 0.9854213962080972, "learning_rate": 4.909705672628983e-06, "loss": 0.4921, "step": 8546 }, { "epoch": 0.520476204975185, "grad_norm": 0.9877311700513816, "learning_rate": 4.909684419489978e-06, "loss": 0.4276, "step": 8547 }, { "epoch": 0.5205371007520628, "grad_norm": 0.9703319290295793, "learning_rate": 4.909663163896038e-06, "loss": 0.4354, "step": 8548 }, { "epoch": 0.5205979965289407, "grad_norm": 0.9622661070066005, "learning_rate": 4.909641905847182e-06, "loss": 0.5042, "step": 8549 }, { "epoch": 0.5206588923058186, "grad_norm": 1.086421671938841, "learning_rate": 4.9096206453434335e-06, "loss": 0.3643, "step": 8550 }, { "epoch": 0.5207197880826965, "grad_norm": 0.9771798159318373, "learning_rate": 4.909599382384814e-06, "loss": 0.4996, "step": 8551 }, { "epoch": 0.5207806838595743, "grad_norm": 1.0178334778373153, "learning_rate": 4.909578116971344e-06, "loss": 0.506, "step": 8552 }, { "epoch": 0.5208415796364522, "grad_norm": 0.9287424056197428, "learning_rate": 4.909556849103047e-06, "loss": 0.5818, "step": 8553 }, { "epoch": 0.5209024754133301, "grad_norm": 0.9507438361434665, "learning_rate": 4.909535578779942e-06, "loss": 0.4574, "step": 8554 }, { "epoch": 0.520963371190208, "grad_norm": 1.0132260420863415, "learning_rate": 4.909514306002053e-06, "loss": 0.4902, "step": 8555 }, { "epoch": 0.5210242669670858, "grad_norm": 1.0050808834346157, "learning_rate": 4.909493030769401e-06, "loss": 0.4869, "step": 8556 }, { "epoch": 0.5210851627439637, "grad_norm": 1.080482054607036, "learning_rate": 4.909471753082008e-06, "loss": 0.5146, "step": 8557 }, { "epoch": 0.5211460585208416, "grad_norm": 0.9739238257131994, "learning_rate": 4.909450472939894e-06, "loss": 0.4508, "step": 8558 }, { "epoch": 0.5212069542977195, "grad_norm": 1.0047970096155356, "learning_rate": 4.909429190343083e-06, "loss": 0.4347, "step": 8559 }, { "epoch": 0.5212678500745973, "grad_norm": 1.0890094693190449, "learning_rate": 4.9094079052915955e-06, "loss": 0.3998, "step": 8560 }, { "epoch": 0.5213287458514752, "grad_norm": 0.9999003008812141, "learning_rate": 4.909386617785453e-06, "loss": 0.3905, "step": 8561 }, { "epoch": 0.5213896416283531, "grad_norm": 1.1082567262808092, "learning_rate": 4.909365327824678e-06, "loss": 0.4972, "step": 8562 }, { "epoch": 0.521450537405231, "grad_norm": 1.0398443094202605, "learning_rate": 4.909344035409292e-06, "loss": 0.4764, "step": 8563 }, { "epoch": 0.5215114331821088, "grad_norm": 1.179039381991679, "learning_rate": 4.909322740539315e-06, "loss": 0.3886, "step": 8564 }, { "epoch": 0.5215723289589866, "grad_norm": 1.0709518909569247, "learning_rate": 4.909301443214771e-06, "loss": 0.4443, "step": 8565 }, { "epoch": 0.5216332247358646, "grad_norm": 0.993361651035601, "learning_rate": 4.909280143435681e-06, "loss": 0.4507, "step": 8566 }, { "epoch": 0.5216941205127424, "grad_norm": 0.9933309332281406, "learning_rate": 4.9092588412020655e-06, "loss": 0.4852, "step": 8567 }, { "epoch": 0.5217550162896203, "grad_norm": 1.004341810579752, "learning_rate": 4.9092375365139476e-06, "loss": 0.5049, "step": 8568 }, { "epoch": 0.5218159120664981, "grad_norm": 1.005925084296951, "learning_rate": 4.909216229371349e-06, "loss": 0.4767, "step": 8569 }, { "epoch": 0.5218768078433761, "grad_norm": 1.0613242700721062, "learning_rate": 4.909194919774291e-06, "loss": 0.394, "step": 8570 }, { "epoch": 0.5219377036202539, "grad_norm": 1.0007937054978953, "learning_rate": 4.909173607722794e-06, "loss": 0.4703, "step": 8571 }, { "epoch": 0.5219985993971318, "grad_norm": 1.048748555508546, "learning_rate": 4.909152293216884e-06, "loss": 0.4244, "step": 8572 }, { "epoch": 0.5220594951740097, "grad_norm": 1.0751504288771165, "learning_rate": 4.909130976256577e-06, "loss": 0.4441, "step": 8573 }, { "epoch": 0.5221203909508876, "grad_norm": 0.9251333183612322, "learning_rate": 4.909109656841899e-06, "loss": 0.5035, "step": 8574 }, { "epoch": 0.5221812867277654, "grad_norm": 0.9447279046450302, "learning_rate": 4.909088334972869e-06, "loss": 0.478, "step": 8575 }, { "epoch": 0.5222421825046433, "grad_norm": 0.9924336584396978, "learning_rate": 4.90906701064951e-06, "loss": 0.3629, "step": 8576 }, { "epoch": 0.5223030782815212, "grad_norm": 0.97495461832459, "learning_rate": 4.909045683871844e-06, "loss": 0.4278, "step": 8577 }, { "epoch": 0.5223639740583991, "grad_norm": 0.8923684705253473, "learning_rate": 4.909024354639893e-06, "loss": 0.518, "step": 8578 }, { "epoch": 0.5224248698352769, "grad_norm": 1.059870722574201, "learning_rate": 4.909003022953677e-06, "loss": 0.3904, "step": 8579 }, { "epoch": 0.5224857656121548, "grad_norm": 1.045844596820486, "learning_rate": 4.908981688813219e-06, "loss": 0.4465, "step": 8580 }, { "epoch": 0.5225466613890327, "grad_norm": 1.1093706086675974, "learning_rate": 4.9089603522185405e-06, "loss": 0.4037, "step": 8581 }, { "epoch": 0.5226075571659106, "grad_norm": 1.0319238638501393, "learning_rate": 4.908939013169664e-06, "loss": 0.4608, "step": 8582 }, { "epoch": 0.5226684529427884, "grad_norm": 1.0371798651497728, "learning_rate": 4.90891767166661e-06, "loss": 0.429, "step": 8583 }, { "epoch": 0.5227293487196663, "grad_norm": 1.0611791849182528, "learning_rate": 4.908896327709401e-06, "loss": 0.4169, "step": 8584 }, { "epoch": 0.5227902444965442, "grad_norm": 0.9476058978281375, "learning_rate": 4.908874981298058e-06, "loss": 0.5096, "step": 8585 }, { "epoch": 0.522851140273422, "grad_norm": 0.985369683840825, "learning_rate": 4.908853632432603e-06, "loss": 0.4688, "step": 8586 }, { "epoch": 0.5229120360502999, "grad_norm": 1.0587492043522326, "learning_rate": 4.908832281113059e-06, "loss": 0.486, "step": 8587 }, { "epoch": 0.5229729318271777, "grad_norm": 1.010244296352972, "learning_rate": 4.908810927339447e-06, "loss": 0.4404, "step": 8588 }, { "epoch": 0.5230338276040557, "grad_norm": 1.0783497228729821, "learning_rate": 4.908789571111787e-06, "loss": 0.446, "step": 8589 }, { "epoch": 0.5230947233809335, "grad_norm": 1.097091672016871, "learning_rate": 4.908768212430103e-06, "loss": 0.383, "step": 8590 }, { "epoch": 0.5231556191578114, "grad_norm": 1.1096635780654283, "learning_rate": 4.908746851294416e-06, "loss": 0.4059, "step": 8591 }, { "epoch": 0.5232165149346892, "grad_norm": 0.9805638749163436, "learning_rate": 4.908725487704748e-06, "loss": 0.4295, "step": 8592 }, { "epoch": 0.5232774107115672, "grad_norm": 1.1123935367999478, "learning_rate": 4.90870412166112e-06, "loss": 0.3729, "step": 8593 }, { "epoch": 0.523338306488445, "grad_norm": 0.9615833634245132, "learning_rate": 4.908682753163555e-06, "loss": 0.4746, "step": 8594 }, { "epoch": 0.5233992022653229, "grad_norm": 1.0269781556033306, "learning_rate": 4.908661382212074e-06, "loss": 0.3893, "step": 8595 }, { "epoch": 0.5234600980422007, "grad_norm": 1.047228432623373, "learning_rate": 4.908640008806699e-06, "loss": 0.4775, "step": 8596 }, { "epoch": 0.5235209938190787, "grad_norm": 1.080967681990089, "learning_rate": 4.908618632947451e-06, "loss": 0.346, "step": 8597 }, { "epoch": 0.5235818895959565, "grad_norm": 1.0039487792280293, "learning_rate": 4.908597254634353e-06, "loss": 0.4418, "step": 8598 }, { "epoch": 0.5236427853728344, "grad_norm": 0.9629250064886133, "learning_rate": 4.908575873867426e-06, "loss": 0.4018, "step": 8599 }, { "epoch": 0.5237036811497122, "grad_norm": 1.064982413648252, "learning_rate": 4.908554490646692e-06, "loss": 0.3664, "step": 8600 }, { "epoch": 0.5237645769265902, "grad_norm": 1.0304836259168377, "learning_rate": 4.908533104972172e-06, "loss": 0.4555, "step": 8601 }, { "epoch": 0.523825472703468, "grad_norm": 1.045217394511248, "learning_rate": 4.90851171684389e-06, "loss": 0.4528, "step": 8602 }, { "epoch": 0.5238863684803459, "grad_norm": 1.0437525396799523, "learning_rate": 4.908490326261866e-06, "loss": 0.4242, "step": 8603 }, { "epoch": 0.5239472642572237, "grad_norm": 1.082089262959652, "learning_rate": 4.90846893322612e-06, "loss": 0.4021, "step": 8604 }, { "epoch": 0.5240081600341017, "grad_norm": 1.0386080126578288, "learning_rate": 4.908447537736678e-06, "loss": 0.3951, "step": 8605 }, { "epoch": 0.5240690558109795, "grad_norm": 0.9950677891595794, "learning_rate": 4.908426139793559e-06, "loss": 0.4483, "step": 8606 }, { "epoch": 0.5241299515878574, "grad_norm": 1.0829898421081936, "learning_rate": 4.9084047393967865e-06, "loss": 0.451, "step": 8607 }, { "epoch": 0.5241908473647352, "grad_norm": 0.988941641451332, "learning_rate": 4.90838333654638e-06, "loss": 0.4305, "step": 8608 }, { "epoch": 0.5242517431416132, "grad_norm": 1.064481378458262, "learning_rate": 4.9083619312423645e-06, "loss": 0.4715, "step": 8609 }, { "epoch": 0.524312638918491, "grad_norm": 0.9805571934911115, "learning_rate": 4.9083405234847585e-06, "loss": 0.4729, "step": 8610 }, { "epoch": 0.5243735346953688, "grad_norm": 1.024173288075929, "learning_rate": 4.908319113273585e-06, "loss": 0.4432, "step": 8611 }, { "epoch": 0.5244344304722468, "grad_norm": 0.9052510977516766, "learning_rate": 4.908297700608867e-06, "loss": 0.5192, "step": 8612 }, { "epoch": 0.5244953262491246, "grad_norm": 1.0756558643506708, "learning_rate": 4.9082762854906255e-06, "loss": 0.4701, "step": 8613 }, { "epoch": 0.5245562220260025, "grad_norm": 0.9864461816242682, "learning_rate": 4.908254867918882e-06, "loss": 0.4782, "step": 8614 }, { "epoch": 0.5246171178028803, "grad_norm": 0.9987403082282066, "learning_rate": 4.908233447893657e-06, "loss": 0.5045, "step": 8615 }, { "epoch": 0.5246780135797583, "grad_norm": 1.0410319903660332, "learning_rate": 4.908212025414975e-06, "loss": 0.4284, "step": 8616 }, { "epoch": 0.5247389093566361, "grad_norm": 1.0096077316768, "learning_rate": 4.908190600482857e-06, "loss": 0.4224, "step": 8617 }, { "epoch": 0.524799805133514, "grad_norm": 1.1294957992021777, "learning_rate": 4.908169173097324e-06, "loss": 0.3957, "step": 8618 }, { "epoch": 0.5248607009103918, "grad_norm": 1.0582737626826615, "learning_rate": 4.908147743258398e-06, "loss": 0.4428, "step": 8619 }, { "epoch": 0.5249215966872698, "grad_norm": 1.0419813358002803, "learning_rate": 4.908126310966102e-06, "loss": 0.3955, "step": 8620 }, { "epoch": 0.5249824924641476, "grad_norm": 1.061008664793576, "learning_rate": 4.908104876220456e-06, "loss": 0.4239, "step": 8621 }, { "epoch": 0.5250433882410255, "grad_norm": 0.9915056826088376, "learning_rate": 4.9080834390214835e-06, "loss": 0.4605, "step": 8622 }, { "epoch": 0.5251042840179033, "grad_norm": 1.0614033005008872, "learning_rate": 4.908061999369206e-06, "loss": 0.3828, "step": 8623 }, { "epoch": 0.5251651797947813, "grad_norm": 0.9922521838049723, "learning_rate": 4.908040557263644e-06, "loss": 0.4383, "step": 8624 }, { "epoch": 0.5252260755716591, "grad_norm": 0.9827094969531334, "learning_rate": 4.9080191127048205e-06, "loss": 0.4459, "step": 8625 }, { "epoch": 0.525286971348537, "grad_norm": 0.9894034142603675, "learning_rate": 4.9079976656927575e-06, "loss": 0.4884, "step": 8626 }, { "epoch": 0.5253478671254148, "grad_norm": 1.18666371885818, "learning_rate": 4.907976216227477e-06, "loss": 0.3497, "step": 8627 }, { "epoch": 0.5254087629022928, "grad_norm": 0.9250696971399086, "learning_rate": 4.907954764308999e-06, "loss": 0.4594, "step": 8628 }, { "epoch": 0.5254696586791706, "grad_norm": 0.9209968220627324, "learning_rate": 4.907933309937348e-06, "loss": 0.469, "step": 8629 }, { "epoch": 0.5255305544560485, "grad_norm": 0.9444083746979887, "learning_rate": 4.907911853112545e-06, "loss": 0.4651, "step": 8630 }, { "epoch": 0.5255914502329263, "grad_norm": 0.9541938566226762, "learning_rate": 4.9078903938346115e-06, "loss": 0.4717, "step": 8631 }, { "epoch": 0.5256523460098043, "grad_norm": 1.0342753290718625, "learning_rate": 4.907868932103568e-06, "loss": 0.465, "step": 8632 }, { "epoch": 0.5257132417866821, "grad_norm": 1.0195001078362789, "learning_rate": 4.907847467919438e-06, "loss": 0.4472, "step": 8633 }, { "epoch": 0.52577413756356, "grad_norm": 0.976932492785576, "learning_rate": 4.907826001282244e-06, "loss": 0.4091, "step": 8634 }, { "epoch": 0.5258350333404378, "grad_norm": 0.9154422567781239, "learning_rate": 4.907804532192006e-06, "loss": 0.5042, "step": 8635 }, { "epoch": 0.5258959291173158, "grad_norm": 1.0324163022464197, "learning_rate": 4.907783060648747e-06, "loss": 0.4251, "step": 8636 }, { "epoch": 0.5259568248941936, "grad_norm": 1.0559374009139482, "learning_rate": 4.907761586652489e-06, "loss": 0.4124, "step": 8637 }, { "epoch": 0.5260177206710714, "grad_norm": 1.0785883294176724, "learning_rate": 4.907740110203253e-06, "loss": 0.4596, "step": 8638 }, { "epoch": 0.5260786164479493, "grad_norm": 1.0236998784463347, "learning_rate": 4.907718631301062e-06, "loss": 0.4092, "step": 8639 }, { "epoch": 0.5261395122248272, "grad_norm": 1.0793349342128926, "learning_rate": 4.907697149945937e-06, "loss": 0.4352, "step": 8640 }, { "epoch": 0.5262004080017051, "grad_norm": 0.922268870072163, "learning_rate": 4.9076756661379e-06, "loss": 0.4975, "step": 8641 }, { "epoch": 0.5262613037785829, "grad_norm": 1.0395961911322877, "learning_rate": 4.907654179876974e-06, "loss": 0.4634, "step": 8642 }, { "epoch": 0.5263221995554608, "grad_norm": 1.0577431494325895, "learning_rate": 4.907632691163179e-06, "loss": 0.4454, "step": 8643 }, { "epoch": 0.5263830953323387, "grad_norm": 0.9388528118179404, "learning_rate": 4.907611199996538e-06, "loss": 0.4632, "step": 8644 }, { "epoch": 0.5264439911092166, "grad_norm": 1.0812358641424438, "learning_rate": 4.907589706377074e-06, "loss": 0.3976, "step": 8645 }, { "epoch": 0.5265048868860944, "grad_norm": 1.0725698691855294, "learning_rate": 4.907568210304806e-06, "loss": 0.4471, "step": 8646 }, { "epoch": 0.5265657826629723, "grad_norm": 0.9764757181582497, "learning_rate": 4.907546711779758e-06, "loss": 0.4081, "step": 8647 }, { "epoch": 0.5266266784398502, "grad_norm": 1.0537232263096385, "learning_rate": 4.907525210801952e-06, "loss": 0.3588, "step": 8648 }, { "epoch": 0.5266875742167281, "grad_norm": 1.0510049548377782, "learning_rate": 4.9075037073714096e-06, "loss": 0.4151, "step": 8649 }, { "epoch": 0.5267484699936059, "grad_norm": 0.9510812354584224, "learning_rate": 4.907482201488151e-06, "loss": 0.4554, "step": 8650 }, { "epoch": 0.5268093657704838, "grad_norm": 0.962666980302078, "learning_rate": 4.9074606931522004e-06, "loss": 0.4401, "step": 8651 }, { "epoch": 0.5268702615473617, "grad_norm": 1.0535242954957635, "learning_rate": 4.907439182363579e-06, "loss": 0.4162, "step": 8652 }, { "epoch": 0.5269311573242396, "grad_norm": 1.1004633883943122, "learning_rate": 4.907417669122309e-06, "loss": 0.3947, "step": 8653 }, { "epoch": 0.5269920531011174, "grad_norm": 0.9639376108339724, "learning_rate": 4.907396153428412e-06, "loss": 0.4197, "step": 8654 }, { "epoch": 0.5270529488779954, "grad_norm": 0.9733913423270477, "learning_rate": 4.907374635281909e-06, "loss": 0.4359, "step": 8655 }, { "epoch": 0.5271138446548732, "grad_norm": 1.101162133005141, "learning_rate": 4.9073531146828235e-06, "loss": 0.4278, "step": 8656 }, { "epoch": 0.527174740431751, "grad_norm": 1.004328726405885, "learning_rate": 4.907331591631176e-06, "loss": 0.4493, "step": 8657 }, { "epoch": 0.5272356362086289, "grad_norm": 0.9980087873444597, "learning_rate": 4.907310066126989e-06, "loss": 0.4709, "step": 8658 }, { "epoch": 0.5272965319855069, "grad_norm": 0.9784452192093017, "learning_rate": 4.907288538170286e-06, "loss": 0.3895, "step": 8659 }, { "epoch": 0.5273574277623847, "grad_norm": 1.009269919863873, "learning_rate": 4.907267007761086e-06, "loss": 0.4385, "step": 8660 }, { "epoch": 0.5274183235392625, "grad_norm": 1.0384660329229294, "learning_rate": 4.907245474899413e-06, "loss": 0.3901, "step": 8661 }, { "epoch": 0.5274792193161404, "grad_norm": 1.0858985767544092, "learning_rate": 4.907223939585289e-06, "loss": 0.3965, "step": 8662 }, { "epoch": 0.5275401150930183, "grad_norm": 1.042810872035256, "learning_rate": 4.907202401818734e-06, "loss": 0.4186, "step": 8663 }, { "epoch": 0.5276010108698962, "grad_norm": 0.9519915252487742, "learning_rate": 4.9071808615997715e-06, "loss": 0.4575, "step": 8664 }, { "epoch": 0.527661906646774, "grad_norm": 0.9673020232753384, "learning_rate": 4.907159318928424e-06, "loss": 0.3967, "step": 8665 }, { "epoch": 0.5277228024236519, "grad_norm": 0.9670526816881253, "learning_rate": 4.907137773804712e-06, "loss": 0.4232, "step": 8666 }, { "epoch": 0.5277836982005298, "grad_norm": 0.961258478847096, "learning_rate": 4.9071162262286584e-06, "loss": 0.471, "step": 8667 }, { "epoch": 0.5278445939774077, "grad_norm": 1.04275145064952, "learning_rate": 4.907094676200285e-06, "loss": 0.3943, "step": 8668 }, { "epoch": 0.5279054897542855, "grad_norm": 0.9880556117057019, "learning_rate": 4.907073123719614e-06, "loss": 0.4309, "step": 8669 }, { "epoch": 0.5279663855311634, "grad_norm": 0.9590079987284565, "learning_rate": 4.9070515687866646e-06, "loss": 0.5206, "step": 8670 }, { "epoch": 0.5280272813080413, "grad_norm": 1.0962493200920396, "learning_rate": 4.9070300114014634e-06, "loss": 0.4246, "step": 8671 }, { "epoch": 0.5280881770849192, "grad_norm": 1.035139352568009, "learning_rate": 4.907008451564029e-06, "loss": 0.357, "step": 8672 }, { "epoch": 0.528149072861797, "grad_norm": 1.0818560676680617, "learning_rate": 4.906986889274385e-06, "loss": 0.396, "step": 8673 }, { "epoch": 0.5282099686386749, "grad_norm": 0.9854932695207087, "learning_rate": 4.906965324532553e-06, "loss": 0.4505, "step": 8674 }, { "epoch": 0.5282708644155528, "grad_norm": 1.021685762370257, "learning_rate": 4.906943757338555e-06, "loss": 0.4353, "step": 8675 }, { "epoch": 0.5283317601924307, "grad_norm": 0.9097502531819667, "learning_rate": 4.906922187692411e-06, "loss": 0.4536, "step": 8676 }, { "epoch": 0.5283926559693085, "grad_norm": 0.9501273324481635, "learning_rate": 4.906900615594146e-06, "loss": 0.4618, "step": 8677 }, { "epoch": 0.5284535517461864, "grad_norm": 1.0546268526284397, "learning_rate": 4.906879041043781e-06, "loss": 0.4546, "step": 8678 }, { "epoch": 0.5285144475230643, "grad_norm": 1.0925514727639773, "learning_rate": 4.906857464041337e-06, "loss": 0.4819, "step": 8679 }, { "epoch": 0.5285753432999422, "grad_norm": 1.0286801792611513, "learning_rate": 4.906835884586837e-06, "loss": 0.4331, "step": 8680 }, { "epoch": 0.52863623907682, "grad_norm": 0.979042280010626, "learning_rate": 4.906814302680303e-06, "loss": 0.415, "step": 8681 }, { "epoch": 0.5286971348536978, "grad_norm": 0.9870111520550247, "learning_rate": 4.906792718321756e-06, "loss": 0.444, "step": 8682 }, { "epoch": 0.5287580306305758, "grad_norm": 1.0454747971733986, "learning_rate": 4.906771131511219e-06, "loss": 0.4014, "step": 8683 }, { "epoch": 0.5288189264074536, "grad_norm": 1.025445005511119, "learning_rate": 4.906749542248713e-06, "loss": 0.4246, "step": 8684 }, { "epoch": 0.5288798221843315, "grad_norm": 0.9596554621402509, "learning_rate": 4.906727950534261e-06, "loss": 0.5256, "step": 8685 }, { "epoch": 0.5289407179612093, "grad_norm": 1.0386465160592442, "learning_rate": 4.906706356367884e-06, "loss": 0.4177, "step": 8686 }, { "epoch": 0.5290016137380873, "grad_norm": 0.9244013563008245, "learning_rate": 4.906684759749606e-06, "loss": 0.4547, "step": 8687 }, { "epoch": 0.5290625095149651, "grad_norm": 1.0570770541438366, "learning_rate": 4.906663160679446e-06, "loss": 0.5328, "step": 8688 }, { "epoch": 0.529123405291843, "grad_norm": 1.0140280698483142, "learning_rate": 4.906641559157429e-06, "loss": 0.421, "step": 8689 }, { "epoch": 0.5291843010687208, "grad_norm": 0.9950802818935448, "learning_rate": 4.906619955183574e-06, "loss": 0.4379, "step": 8690 }, { "epoch": 0.5292451968455988, "grad_norm": 1.047885927624752, "learning_rate": 4.906598348757906e-06, "loss": 0.4687, "step": 8691 }, { "epoch": 0.5293060926224766, "grad_norm": 1.0579585264406997, "learning_rate": 4.906576739880445e-06, "loss": 0.4608, "step": 8692 }, { "epoch": 0.5293669883993545, "grad_norm": 0.9932486569126717, "learning_rate": 4.906555128551215e-06, "loss": 0.4961, "step": 8693 }, { "epoch": 0.5294278841762324, "grad_norm": 1.0230074479461995, "learning_rate": 4.906533514770236e-06, "loss": 0.4788, "step": 8694 }, { "epoch": 0.5294887799531103, "grad_norm": 1.020165081619229, "learning_rate": 4.906511898537529e-06, "loss": 0.4626, "step": 8695 }, { "epoch": 0.5295496757299881, "grad_norm": 1.0014202507404533, "learning_rate": 4.906490279853119e-06, "loss": 0.4372, "step": 8696 }, { "epoch": 0.529610571506866, "grad_norm": 1.0617510343561023, "learning_rate": 4.906468658717028e-06, "loss": 0.4356, "step": 8697 }, { "epoch": 0.5296714672837439, "grad_norm": 0.9776535730457042, "learning_rate": 4.906447035129275e-06, "loss": 0.4864, "step": 8698 }, { "epoch": 0.5297323630606218, "grad_norm": 1.1170049803494546, "learning_rate": 4.906425409089884e-06, "loss": 0.5044, "step": 8699 }, { "epoch": 0.5297932588374996, "grad_norm": 1.0436514092736897, "learning_rate": 4.906403780598878e-06, "loss": 0.4224, "step": 8700 }, { "epoch": 0.5298541546143775, "grad_norm": 1.08524318144234, "learning_rate": 4.906382149656276e-06, "loss": 0.4214, "step": 8701 }, { "epoch": 0.5299150503912554, "grad_norm": 0.9301643832932852, "learning_rate": 4.906360516262103e-06, "loss": 0.4342, "step": 8702 }, { "epoch": 0.5299759461681333, "grad_norm": 1.0966291478449885, "learning_rate": 4.90633888041638e-06, "loss": 0.3889, "step": 8703 }, { "epoch": 0.5300368419450111, "grad_norm": 1.0332902328840952, "learning_rate": 4.906317242119129e-06, "loss": 0.4197, "step": 8704 }, { "epoch": 0.530097737721889, "grad_norm": 1.1099748102830016, "learning_rate": 4.9062956013703715e-06, "loss": 0.3713, "step": 8705 }, { "epoch": 0.5301586334987669, "grad_norm": 0.9063707111686679, "learning_rate": 4.9062739581701305e-06, "loss": 0.4561, "step": 8706 }, { "epoch": 0.5302195292756448, "grad_norm": 0.934222281269506, "learning_rate": 4.906252312518427e-06, "loss": 0.4393, "step": 8707 }, { "epoch": 0.5302804250525226, "grad_norm": 0.990864890633535, "learning_rate": 4.906230664415285e-06, "loss": 0.4182, "step": 8708 }, { "epoch": 0.5303413208294004, "grad_norm": 1.0316404143381441, "learning_rate": 4.906209013860724e-06, "loss": 0.4466, "step": 8709 }, { "epoch": 0.5304022166062784, "grad_norm": 1.0717021261374389, "learning_rate": 4.906187360854767e-06, "loss": 0.4136, "step": 8710 }, { "epoch": 0.5304631123831562, "grad_norm": 1.0020392772445623, "learning_rate": 4.906165705397437e-06, "loss": 0.486, "step": 8711 }, { "epoch": 0.5305240081600341, "grad_norm": 1.138828811596824, "learning_rate": 4.9061440474887555e-06, "loss": 0.3913, "step": 8712 }, { "epoch": 0.5305849039369119, "grad_norm": 1.1272036761586537, "learning_rate": 4.906122387128744e-06, "loss": 0.4069, "step": 8713 }, { "epoch": 0.5306457997137899, "grad_norm": 1.0389452917672306, "learning_rate": 4.9061007243174264e-06, "loss": 0.405, "step": 8714 }, { "epoch": 0.5307066954906677, "grad_norm": 1.0088414259670804, "learning_rate": 4.906079059054822e-06, "loss": 0.4436, "step": 8715 }, { "epoch": 0.5307675912675456, "grad_norm": 1.0937289043370835, "learning_rate": 4.906057391340955e-06, "loss": 0.4258, "step": 8716 }, { "epoch": 0.5308284870444234, "grad_norm": 1.0246028030814502, "learning_rate": 4.906035721175846e-06, "loss": 0.4382, "step": 8717 }, { "epoch": 0.5308893828213014, "grad_norm": 0.9856671649701163, "learning_rate": 4.906014048559519e-06, "loss": 0.4357, "step": 8718 }, { "epoch": 0.5309502785981792, "grad_norm": 1.001662247948854, "learning_rate": 4.9059923734919935e-06, "loss": 0.449, "step": 8719 }, { "epoch": 0.5310111743750571, "grad_norm": 1.0025093013116513, "learning_rate": 4.905970695973294e-06, "loss": 0.4812, "step": 8720 }, { "epoch": 0.5310720701519349, "grad_norm": 1.1128729768398418, "learning_rate": 4.905949016003441e-06, "loss": 0.4085, "step": 8721 }, { "epoch": 0.5311329659288129, "grad_norm": 1.0063646148986256, "learning_rate": 4.905927333582458e-06, "loss": 0.3862, "step": 8722 }, { "epoch": 0.5311938617056907, "grad_norm": 0.9867164060888354, "learning_rate": 4.905905648710365e-06, "loss": 0.439, "step": 8723 }, { "epoch": 0.5312547574825686, "grad_norm": 1.0585233170796657, "learning_rate": 4.905883961387186e-06, "loss": 0.3687, "step": 8724 }, { "epoch": 0.5313156532594464, "grad_norm": 0.9461483421154513, "learning_rate": 4.905862271612943e-06, "loss": 0.4346, "step": 8725 }, { "epoch": 0.5313765490363244, "grad_norm": 1.0787975125573193, "learning_rate": 4.905840579387657e-06, "loss": 0.3967, "step": 8726 }, { "epoch": 0.5314374448132022, "grad_norm": 1.0822194085012988, "learning_rate": 4.90581888471135e-06, "loss": 0.4037, "step": 8727 }, { "epoch": 0.53149834059008, "grad_norm": 1.0163598933692621, "learning_rate": 4.905797187584046e-06, "loss": 0.4351, "step": 8728 }, { "epoch": 0.5315592363669579, "grad_norm": 0.9609090344664282, "learning_rate": 4.9057754880057655e-06, "loss": 0.4939, "step": 8729 }, { "epoch": 0.5316201321438359, "grad_norm": 1.022449714076184, "learning_rate": 4.90575378597653e-06, "loss": 0.4614, "step": 8730 }, { "epoch": 0.5316810279207137, "grad_norm": 1.0304374344565999, "learning_rate": 4.905732081496363e-06, "loss": 0.4688, "step": 8731 }, { "epoch": 0.5317419236975915, "grad_norm": 0.9994016320705547, "learning_rate": 4.905710374565287e-06, "loss": 0.5489, "step": 8732 }, { "epoch": 0.5318028194744694, "grad_norm": 1.0032565723342126, "learning_rate": 4.905688665183323e-06, "loss": 0.4501, "step": 8733 }, { "epoch": 0.5318637152513473, "grad_norm": 1.0259536587782734, "learning_rate": 4.905666953350492e-06, "loss": 0.5034, "step": 8734 }, { "epoch": 0.5319246110282252, "grad_norm": 1.0051392407148565, "learning_rate": 4.9056452390668194e-06, "loss": 0.4433, "step": 8735 }, { "epoch": 0.531985506805103, "grad_norm": 0.9800169250858429, "learning_rate": 4.9056235223323246e-06, "loss": 0.4702, "step": 8736 }, { "epoch": 0.532046402581981, "grad_norm": 1.1171674267207985, "learning_rate": 4.9056018031470305e-06, "loss": 0.4799, "step": 8737 }, { "epoch": 0.5321072983588588, "grad_norm": 1.008190221194554, "learning_rate": 4.905580081510959e-06, "loss": 0.3909, "step": 8738 }, { "epoch": 0.5321681941357367, "grad_norm": 0.9594979894706758, "learning_rate": 4.905558357424134e-06, "loss": 0.4713, "step": 8739 }, { "epoch": 0.5322290899126145, "grad_norm": 1.062968908943712, "learning_rate": 4.905536630886575e-06, "loss": 0.4063, "step": 8740 }, { "epoch": 0.5322899856894925, "grad_norm": 0.9742996303057286, "learning_rate": 4.905514901898305e-06, "loss": 0.3916, "step": 8741 }, { "epoch": 0.5323508814663703, "grad_norm": 0.9569778152376321, "learning_rate": 4.905493170459347e-06, "loss": 0.4504, "step": 8742 }, { "epoch": 0.5324117772432482, "grad_norm": 1.0304368923521612, "learning_rate": 4.905471436569722e-06, "loss": 0.3889, "step": 8743 }, { "epoch": 0.532472673020126, "grad_norm": 0.9852150162347811, "learning_rate": 4.9054497002294535e-06, "loss": 0.44, "step": 8744 }, { "epoch": 0.532533568797004, "grad_norm": 1.075806785366784, "learning_rate": 4.905427961438562e-06, "loss": 0.4095, "step": 8745 }, { "epoch": 0.5325944645738818, "grad_norm": 1.0699621230704972, "learning_rate": 4.905406220197071e-06, "loss": 0.4605, "step": 8746 }, { "epoch": 0.5326553603507597, "grad_norm": 1.0053910759695674, "learning_rate": 4.905384476505002e-06, "loss": 0.4038, "step": 8747 }, { "epoch": 0.5327162561276375, "grad_norm": 1.037684752189671, "learning_rate": 4.905362730362377e-06, "loss": 0.42, "step": 8748 }, { "epoch": 0.5327771519045155, "grad_norm": 1.0274708574926694, "learning_rate": 4.90534098176922e-06, "loss": 0.3908, "step": 8749 }, { "epoch": 0.5328380476813933, "grad_norm": 1.0570935108673545, "learning_rate": 4.905319230725551e-06, "loss": 0.3938, "step": 8750 }, { "epoch": 0.5328989434582712, "grad_norm": 0.9857516658549654, "learning_rate": 4.905297477231391e-06, "loss": 0.4669, "step": 8751 }, { "epoch": 0.532959839235149, "grad_norm": 1.0385053647868687, "learning_rate": 4.905275721286766e-06, "loss": 0.4627, "step": 8752 }, { "epoch": 0.533020735012027, "grad_norm": 1.0973752387943303, "learning_rate": 4.905253962891695e-06, "loss": 0.4351, "step": 8753 }, { "epoch": 0.5330816307889048, "grad_norm": 1.0689368652540898, "learning_rate": 4.905232202046202e-06, "loss": 0.4567, "step": 8754 }, { "epoch": 0.5331425265657826, "grad_norm": 1.0740372421697455, "learning_rate": 4.905210438750308e-06, "loss": 0.4151, "step": 8755 }, { "epoch": 0.5332034223426605, "grad_norm": 0.9468507087579875, "learning_rate": 4.905188673004035e-06, "loss": 0.5107, "step": 8756 }, { "epoch": 0.5332643181195384, "grad_norm": 1.1229700425012341, "learning_rate": 4.9051669048074065e-06, "loss": 0.4258, "step": 8757 }, { "epoch": 0.5333252138964163, "grad_norm": 0.9766980053264821, "learning_rate": 4.905145134160444e-06, "loss": 0.4721, "step": 8758 }, { "epoch": 0.5333861096732941, "grad_norm": 0.9612861356490919, "learning_rate": 4.90512336106317e-06, "loss": 0.4308, "step": 8759 }, { "epoch": 0.533447005450172, "grad_norm": 1.0043371440407733, "learning_rate": 4.905101585515605e-06, "loss": 0.4136, "step": 8760 }, { "epoch": 0.5335079012270499, "grad_norm": 1.0765601628355332, "learning_rate": 4.905079807517774e-06, "loss": 0.404, "step": 8761 }, { "epoch": 0.5335687970039278, "grad_norm": 1.0043085460550436, "learning_rate": 4.9050580270696966e-06, "loss": 0.4481, "step": 8762 }, { "epoch": 0.5336296927808056, "grad_norm": 0.9660890720582136, "learning_rate": 4.905036244171397e-06, "loss": 0.4451, "step": 8763 }, { "epoch": 0.5336905885576835, "grad_norm": 1.0462376542213785, "learning_rate": 4.905014458822896e-06, "loss": 0.3843, "step": 8764 }, { "epoch": 0.5337514843345614, "grad_norm": 1.038883608764162, "learning_rate": 4.9049926710242165e-06, "loss": 0.421, "step": 8765 }, { "epoch": 0.5338123801114393, "grad_norm": 1.0616582394308498, "learning_rate": 4.90497088077538e-06, "loss": 0.382, "step": 8766 }, { "epoch": 0.5338732758883171, "grad_norm": 1.0261922058606552, "learning_rate": 4.90494908807641e-06, "loss": 0.4359, "step": 8767 }, { "epoch": 0.533934171665195, "grad_norm": 0.9650478593224517, "learning_rate": 4.904927292927326e-06, "loss": 0.4992, "step": 8768 }, { "epoch": 0.5339950674420729, "grad_norm": 0.9994832657457573, "learning_rate": 4.904905495328154e-06, "loss": 0.547, "step": 8769 }, { "epoch": 0.5340559632189508, "grad_norm": 1.0375986443698353, "learning_rate": 4.904883695278914e-06, "loss": 0.457, "step": 8770 }, { "epoch": 0.5341168589958286, "grad_norm": 1.0944006234817953, "learning_rate": 4.904861892779627e-06, "loss": 0.4498, "step": 8771 }, { "epoch": 0.5341777547727065, "grad_norm": 1.0957930382182608, "learning_rate": 4.904840087830319e-06, "loss": 0.3732, "step": 8772 }, { "epoch": 0.5342386505495844, "grad_norm": 1.0134468530843659, "learning_rate": 4.904818280431009e-06, "loss": 0.4822, "step": 8773 }, { "epoch": 0.5342995463264623, "grad_norm": 1.050837921066168, "learning_rate": 4.9047964705817195e-06, "loss": 0.545, "step": 8774 }, { "epoch": 0.5343604421033401, "grad_norm": 1.024303889551598, "learning_rate": 4.904774658282474e-06, "loss": 0.4579, "step": 8775 }, { "epoch": 0.5344213378802181, "grad_norm": 0.9932702955947269, "learning_rate": 4.904752843533294e-06, "loss": 0.4356, "step": 8776 }, { "epoch": 0.5344822336570959, "grad_norm": 1.0348159877891001, "learning_rate": 4.904731026334201e-06, "loss": 0.4531, "step": 8777 }, { "epoch": 0.5345431294339738, "grad_norm": 0.9339069260548718, "learning_rate": 4.904709206685219e-06, "loss": 0.4879, "step": 8778 }, { "epoch": 0.5346040252108516, "grad_norm": 1.0048092773644812, "learning_rate": 4.9046873845863685e-06, "loss": 0.439, "step": 8779 }, { "epoch": 0.5346649209877296, "grad_norm": 0.9711277014533346, "learning_rate": 4.904665560037673e-06, "loss": 0.4312, "step": 8780 }, { "epoch": 0.5347258167646074, "grad_norm": 1.0585575291747384, "learning_rate": 4.904643733039154e-06, "loss": 0.4212, "step": 8781 }, { "epoch": 0.5347867125414852, "grad_norm": 0.9395755760188568, "learning_rate": 4.904621903590833e-06, "loss": 0.4441, "step": 8782 }, { "epoch": 0.5348476083183631, "grad_norm": 1.0915142408969474, "learning_rate": 4.904600071692735e-06, "loss": 0.4436, "step": 8783 }, { "epoch": 0.534908504095241, "grad_norm": 0.9459315658044363, "learning_rate": 4.904578237344881e-06, "loss": 0.434, "step": 8784 }, { "epoch": 0.5349693998721189, "grad_norm": 0.9996070589557743, "learning_rate": 4.90455640054729e-06, "loss": 0.4831, "step": 8785 }, { "epoch": 0.5350302956489967, "grad_norm": 1.1350240538844638, "learning_rate": 4.904534561299988e-06, "loss": 0.4755, "step": 8786 }, { "epoch": 0.5350911914258746, "grad_norm": 0.9992085807911638, "learning_rate": 4.904512719602997e-06, "loss": 0.5021, "step": 8787 }, { "epoch": 0.5351520872027525, "grad_norm": 0.9918694646848969, "learning_rate": 4.904490875456338e-06, "loss": 0.3957, "step": 8788 }, { "epoch": 0.5352129829796304, "grad_norm": 0.9786070677779807, "learning_rate": 4.904469028860034e-06, "loss": 0.4052, "step": 8789 }, { "epoch": 0.5352738787565082, "grad_norm": 0.9949264699607133, "learning_rate": 4.904447179814106e-06, "loss": 0.437, "step": 8790 }, { "epoch": 0.5353347745333861, "grad_norm": 1.0154700729262642, "learning_rate": 4.904425328318578e-06, "loss": 0.4612, "step": 8791 }, { "epoch": 0.535395670310264, "grad_norm": 0.9917205282077092, "learning_rate": 4.904403474373472e-06, "loss": 0.4586, "step": 8792 }, { "epoch": 0.5354565660871419, "grad_norm": 0.940997628609883, "learning_rate": 4.904381617978808e-06, "loss": 0.5066, "step": 8793 }, { "epoch": 0.5355174618640197, "grad_norm": 1.0450016569423546, "learning_rate": 4.9043597591346116e-06, "loss": 0.5113, "step": 8794 }, { "epoch": 0.5355783576408976, "grad_norm": 1.00657127059955, "learning_rate": 4.9043378978409025e-06, "loss": 0.485, "step": 8795 }, { "epoch": 0.5356392534177755, "grad_norm": 1.1164223008766654, "learning_rate": 4.9043160340977045e-06, "loss": 0.3727, "step": 8796 }, { "epoch": 0.5357001491946534, "grad_norm": 1.0324556960034639, "learning_rate": 4.904294167905039e-06, "loss": 0.442, "step": 8797 }, { "epoch": 0.5357610449715312, "grad_norm": 0.9589159940925782, "learning_rate": 4.9042722992629285e-06, "loss": 0.4768, "step": 8798 }, { "epoch": 0.535821940748409, "grad_norm": 0.9313720046792108, "learning_rate": 4.904250428171395e-06, "loss": 0.4927, "step": 8799 }, { "epoch": 0.535882836525287, "grad_norm": 1.061595623721985, "learning_rate": 4.904228554630462e-06, "loss": 0.4173, "step": 8800 }, { "epoch": 0.5359437323021649, "grad_norm": 1.0583501394066324, "learning_rate": 4.904206678640151e-06, "loss": 0.3789, "step": 8801 }, { "epoch": 0.5360046280790427, "grad_norm": 0.9240166682586353, "learning_rate": 4.904184800200483e-06, "loss": 0.3884, "step": 8802 }, { "epoch": 0.5360655238559205, "grad_norm": 1.0256614625649487, "learning_rate": 4.904162919311482e-06, "loss": 0.4185, "step": 8803 }, { "epoch": 0.5361264196327985, "grad_norm": 0.9813338540486756, "learning_rate": 4.9041410359731715e-06, "loss": 0.3975, "step": 8804 }, { "epoch": 0.5361873154096763, "grad_norm": 0.97938612375245, "learning_rate": 4.90411915018557e-06, "loss": 0.4064, "step": 8805 }, { "epoch": 0.5362482111865542, "grad_norm": 0.9873492805913616, "learning_rate": 4.904097261948703e-06, "loss": 0.4818, "step": 8806 }, { "epoch": 0.536309106963432, "grad_norm": 1.0202545104547442, "learning_rate": 4.904075371262591e-06, "loss": 0.4631, "step": 8807 }, { "epoch": 0.53637000274031, "grad_norm": 0.9484341088293203, "learning_rate": 4.904053478127258e-06, "loss": 0.4629, "step": 8808 }, { "epoch": 0.5364308985171878, "grad_norm": 1.0709646173071097, "learning_rate": 4.904031582542724e-06, "loss": 0.4296, "step": 8809 }, { "epoch": 0.5364917942940657, "grad_norm": 1.0498595027359283, "learning_rate": 4.904009684509013e-06, "loss": 0.4568, "step": 8810 }, { "epoch": 0.5365526900709435, "grad_norm": 0.9726768306486795, "learning_rate": 4.903987784026148e-06, "loss": 0.4426, "step": 8811 }, { "epoch": 0.5366135858478215, "grad_norm": 1.112422756449798, "learning_rate": 4.90396588109415e-06, "loss": 0.3904, "step": 8812 }, { "epoch": 0.5366744816246993, "grad_norm": 1.0689192291580103, "learning_rate": 4.9039439757130405e-06, "loss": 0.3537, "step": 8813 }, { "epoch": 0.5367353774015772, "grad_norm": 1.0726688944028444, "learning_rate": 4.903922067882842e-06, "loss": 0.4273, "step": 8814 }, { "epoch": 0.536796273178455, "grad_norm": 0.9602869252724383, "learning_rate": 4.90390015760358e-06, "loss": 0.4137, "step": 8815 }, { "epoch": 0.536857168955333, "grad_norm": 1.0388449374825388, "learning_rate": 4.903878244875273e-06, "loss": 0.466, "step": 8816 }, { "epoch": 0.5369180647322108, "grad_norm": 1.0524195127841307, "learning_rate": 4.903856329697945e-06, "loss": 0.4108, "step": 8817 }, { "epoch": 0.5369789605090887, "grad_norm": 1.0253290852891346, "learning_rate": 4.903834412071619e-06, "loss": 0.4459, "step": 8818 }, { "epoch": 0.5370398562859666, "grad_norm": 1.0117352633420251, "learning_rate": 4.903812491996316e-06, "loss": 0.4045, "step": 8819 }, { "epoch": 0.5371007520628445, "grad_norm": 1.0039684651192462, "learning_rate": 4.903790569472059e-06, "loss": 0.4704, "step": 8820 }, { "epoch": 0.5371616478397223, "grad_norm": 0.9657809964932154, "learning_rate": 4.903768644498869e-06, "loss": 0.4982, "step": 8821 }, { "epoch": 0.5372225436166002, "grad_norm": 1.054597286073078, "learning_rate": 4.903746717076771e-06, "loss": 0.4178, "step": 8822 }, { "epoch": 0.5372834393934781, "grad_norm": 1.051380464729637, "learning_rate": 4.9037247872057845e-06, "loss": 0.4488, "step": 8823 }, { "epoch": 0.537344335170356, "grad_norm": 1.1308981081890037, "learning_rate": 4.9037028548859335e-06, "loss": 0.3828, "step": 8824 }, { "epoch": 0.5374052309472338, "grad_norm": 1.0021234573756461, "learning_rate": 4.903680920117241e-06, "loss": 0.5184, "step": 8825 }, { "epoch": 0.5374661267241116, "grad_norm": 0.9811528227479334, "learning_rate": 4.9036589828997275e-06, "loss": 0.3856, "step": 8826 }, { "epoch": 0.5375270225009896, "grad_norm": 1.02332120538785, "learning_rate": 4.903637043233417e-06, "loss": 0.4265, "step": 8827 }, { "epoch": 0.5375879182778674, "grad_norm": 1.0230925146966947, "learning_rate": 4.90361510111833e-06, "loss": 0.4474, "step": 8828 }, { "epoch": 0.5376488140547453, "grad_norm": 1.0179551345996987, "learning_rate": 4.90359315655449e-06, "loss": 0.3741, "step": 8829 }, { "epoch": 0.5377097098316231, "grad_norm": 0.9541475659903479, "learning_rate": 4.90357120954192e-06, "loss": 0.4648, "step": 8830 }, { "epoch": 0.5377706056085011, "grad_norm": 1.0376019325349968, "learning_rate": 4.90354926008064e-06, "loss": 0.4628, "step": 8831 }, { "epoch": 0.5378315013853789, "grad_norm": 0.9855375105446861, "learning_rate": 4.9035273081706755e-06, "loss": 0.4687, "step": 8832 }, { "epoch": 0.5378923971622568, "grad_norm": 1.0309103844940484, "learning_rate": 4.903505353812048e-06, "loss": 0.4463, "step": 8833 }, { "epoch": 0.5379532929391346, "grad_norm": 1.0648159767662762, "learning_rate": 4.903483397004778e-06, "loss": 0.4459, "step": 8834 }, { "epoch": 0.5380141887160126, "grad_norm": 1.010336319411342, "learning_rate": 4.9034614377488884e-06, "loss": 0.4192, "step": 8835 }, { "epoch": 0.5380750844928904, "grad_norm": 1.0298395880804645, "learning_rate": 4.903439476044404e-06, "loss": 0.4824, "step": 8836 }, { "epoch": 0.5381359802697683, "grad_norm": 1.0776871308226332, "learning_rate": 4.903417511891344e-06, "loss": 0.4441, "step": 8837 }, { "epoch": 0.5381968760466461, "grad_norm": 1.0526490307239313, "learning_rate": 4.903395545289733e-06, "loss": 0.5071, "step": 8838 }, { "epoch": 0.5382577718235241, "grad_norm": 1.0287330478348344, "learning_rate": 4.903373576239593e-06, "loss": 0.4513, "step": 8839 }, { "epoch": 0.5383186676004019, "grad_norm": 1.0845234709421214, "learning_rate": 4.903351604740945e-06, "loss": 0.418, "step": 8840 }, { "epoch": 0.5383795633772798, "grad_norm": 1.0030205951362872, "learning_rate": 4.9033296307938124e-06, "loss": 0.4657, "step": 8841 }, { "epoch": 0.5384404591541576, "grad_norm": 0.9381692854624355, "learning_rate": 4.903307654398218e-06, "loss": 0.4327, "step": 8842 }, { "epoch": 0.5385013549310356, "grad_norm": 1.0272255985141678, "learning_rate": 4.903285675554184e-06, "loss": 0.4453, "step": 8843 }, { "epoch": 0.5385622507079134, "grad_norm": 1.0401714014152261, "learning_rate": 4.903263694261731e-06, "loss": 0.4577, "step": 8844 }, { "epoch": 0.5386231464847913, "grad_norm": 0.941303466182871, "learning_rate": 4.903241710520885e-06, "loss": 0.4712, "step": 8845 }, { "epoch": 0.5386840422616691, "grad_norm": 0.9946522202917325, "learning_rate": 4.903219724331665e-06, "loss": 0.3975, "step": 8846 }, { "epoch": 0.5387449380385471, "grad_norm": 1.0717968396725852, "learning_rate": 4.903197735694095e-06, "loss": 0.3976, "step": 8847 }, { "epoch": 0.5388058338154249, "grad_norm": 1.01367962792509, "learning_rate": 4.903175744608198e-06, "loss": 0.427, "step": 8848 }, { "epoch": 0.5388667295923028, "grad_norm": 1.0522411469946624, "learning_rate": 4.903153751073995e-06, "loss": 0.4879, "step": 8849 }, { "epoch": 0.5389276253691806, "grad_norm": 0.993170247402638, "learning_rate": 4.903131755091508e-06, "loss": 0.4838, "step": 8850 }, { "epoch": 0.5389885211460586, "grad_norm": 1.0752090358587159, "learning_rate": 4.903109756660761e-06, "loss": 0.4024, "step": 8851 }, { "epoch": 0.5390494169229364, "grad_norm": 1.0440214799456065, "learning_rate": 4.903087755781776e-06, "loss": 0.4498, "step": 8852 }, { "epoch": 0.5391103126998142, "grad_norm": 0.9877865406440135, "learning_rate": 4.903065752454575e-06, "loss": 0.4302, "step": 8853 }, { "epoch": 0.5391712084766921, "grad_norm": 1.08453171099103, "learning_rate": 4.903043746679179e-06, "loss": 0.4092, "step": 8854 }, { "epoch": 0.53923210425357, "grad_norm": 1.0389533420644828, "learning_rate": 4.903021738455614e-06, "loss": 0.4086, "step": 8855 }, { "epoch": 0.5392930000304479, "grad_norm": 0.9970936852939618, "learning_rate": 4.9029997277839e-06, "loss": 0.405, "step": 8856 }, { "epoch": 0.5393538958073257, "grad_norm": 1.0905517999513383, "learning_rate": 4.90297771466406e-06, "loss": 0.3442, "step": 8857 }, { "epoch": 0.5394147915842037, "grad_norm": 1.0732683306534652, "learning_rate": 4.902955699096116e-06, "loss": 0.4268, "step": 8858 }, { "epoch": 0.5394756873610815, "grad_norm": 1.101338700524692, "learning_rate": 4.90293368108009e-06, "loss": 0.5061, "step": 8859 }, { "epoch": 0.5395365831379594, "grad_norm": 0.9329099532433348, "learning_rate": 4.902911660616006e-06, "loss": 0.4347, "step": 8860 }, { "epoch": 0.5395974789148372, "grad_norm": 0.9764863023422466, "learning_rate": 4.902889637703885e-06, "loss": 0.4851, "step": 8861 }, { "epoch": 0.5396583746917152, "grad_norm": 1.0872564542646372, "learning_rate": 4.9028676123437505e-06, "loss": 0.401, "step": 8862 }, { "epoch": 0.539719270468593, "grad_norm": 0.9920214231312706, "learning_rate": 4.902845584535624e-06, "loss": 0.4953, "step": 8863 }, { "epoch": 0.5397801662454709, "grad_norm": 0.9979111421377957, "learning_rate": 4.902823554279529e-06, "loss": 0.5019, "step": 8864 }, { "epoch": 0.5398410620223487, "grad_norm": 0.9955528469603008, "learning_rate": 4.902801521575487e-06, "loss": 0.4342, "step": 8865 }, { "epoch": 0.5399019577992267, "grad_norm": 1.0207735327251055, "learning_rate": 4.90277948642352e-06, "loss": 0.4674, "step": 8866 }, { "epoch": 0.5399628535761045, "grad_norm": 0.9769556514279572, "learning_rate": 4.902757448823652e-06, "loss": 0.4096, "step": 8867 }, { "epoch": 0.5400237493529824, "grad_norm": 0.9576339332072332, "learning_rate": 4.902735408775905e-06, "loss": 0.4136, "step": 8868 }, { "epoch": 0.5400846451298602, "grad_norm": 1.0482401245099875, "learning_rate": 4.9027133662803e-06, "loss": 0.4078, "step": 8869 }, { "epoch": 0.5401455409067382, "grad_norm": 1.0287046016943167, "learning_rate": 4.902691321336862e-06, "loss": 0.4329, "step": 8870 }, { "epoch": 0.540206436683616, "grad_norm": 1.0232259925380958, "learning_rate": 4.902669273945611e-06, "loss": 0.4127, "step": 8871 }, { "epoch": 0.5402673324604939, "grad_norm": 1.0589645610055267, "learning_rate": 4.902647224106571e-06, "loss": 0.4196, "step": 8872 }, { "epoch": 0.5403282282373717, "grad_norm": 1.000839325247304, "learning_rate": 4.902625171819764e-06, "loss": 0.4775, "step": 8873 }, { "epoch": 0.5403891240142497, "grad_norm": 0.9861767899914893, "learning_rate": 4.902603117085212e-06, "loss": 0.4219, "step": 8874 }, { "epoch": 0.5404500197911275, "grad_norm": 1.004271742363558, "learning_rate": 4.902581059902937e-06, "loss": 0.409, "step": 8875 }, { "epoch": 0.5405109155680053, "grad_norm": 1.0201300534311897, "learning_rate": 4.902559000272964e-06, "loss": 0.4537, "step": 8876 }, { "epoch": 0.5405718113448832, "grad_norm": 1.03991207759013, "learning_rate": 4.902536938195314e-06, "loss": 0.4532, "step": 8877 }, { "epoch": 0.5406327071217611, "grad_norm": 1.0373048939428122, "learning_rate": 4.902514873670008e-06, "loss": 0.3876, "step": 8878 }, { "epoch": 0.540693602898639, "grad_norm": 0.9461691829399309, "learning_rate": 4.9024928066970704e-06, "loss": 0.4831, "step": 8879 }, { "epoch": 0.5407544986755168, "grad_norm": 0.9879195117996892, "learning_rate": 4.902470737276523e-06, "loss": 0.4583, "step": 8880 }, { "epoch": 0.5408153944523947, "grad_norm": 0.9584364151564985, "learning_rate": 4.902448665408389e-06, "loss": 0.5165, "step": 8881 }, { "epoch": 0.5408762902292726, "grad_norm": 1.0660366294467463, "learning_rate": 4.902426591092689e-06, "loss": 0.4173, "step": 8882 }, { "epoch": 0.5409371860061505, "grad_norm": 1.0487804032230656, "learning_rate": 4.9024045143294475e-06, "loss": 0.4333, "step": 8883 }, { "epoch": 0.5409980817830283, "grad_norm": 0.8945133941845145, "learning_rate": 4.902382435118687e-06, "loss": 0.4115, "step": 8884 }, { "epoch": 0.5410589775599062, "grad_norm": 1.0199408564256711, "learning_rate": 4.902360353460428e-06, "loss": 0.4579, "step": 8885 }, { "epoch": 0.5411198733367841, "grad_norm": 0.9270151034042129, "learning_rate": 4.902338269354694e-06, "loss": 0.4737, "step": 8886 }, { "epoch": 0.541180769113662, "grad_norm": 1.0186201846749932, "learning_rate": 4.902316182801508e-06, "loss": 0.3954, "step": 8887 }, { "epoch": 0.5412416648905398, "grad_norm": 0.9923973448887533, "learning_rate": 4.9022940938008935e-06, "loss": 0.4376, "step": 8888 }, { "epoch": 0.5413025606674177, "grad_norm": 1.0087482090687485, "learning_rate": 4.90227200235287e-06, "loss": 0.4249, "step": 8889 }, { "epoch": 0.5413634564442956, "grad_norm": 1.0020860144059982, "learning_rate": 4.902249908457463e-06, "loss": 0.4758, "step": 8890 }, { "epoch": 0.5414243522211735, "grad_norm": 1.1032359925162003, "learning_rate": 4.9022278121146924e-06, "loss": 0.3657, "step": 8891 }, { "epoch": 0.5414852479980513, "grad_norm": 1.223045184540932, "learning_rate": 4.902205713324584e-06, "loss": 0.3625, "step": 8892 }, { "epoch": 0.5415461437749292, "grad_norm": 0.9931090614886199, "learning_rate": 4.902183612087157e-06, "loss": 0.4498, "step": 8893 }, { "epoch": 0.5416070395518071, "grad_norm": 1.0551892826971234, "learning_rate": 4.9021615084024355e-06, "loss": 0.4462, "step": 8894 }, { "epoch": 0.541667935328685, "grad_norm": 1.00153207834239, "learning_rate": 4.902139402270442e-06, "loss": 0.4483, "step": 8895 }, { "epoch": 0.5417288311055628, "grad_norm": 0.974574119721614, "learning_rate": 4.902117293691198e-06, "loss": 0.489, "step": 8896 }, { "epoch": 0.5417897268824406, "grad_norm": 1.0134628352495076, "learning_rate": 4.9020951826647275e-06, "loss": 0.416, "step": 8897 }, { "epoch": 0.5418506226593186, "grad_norm": 0.9703892933961767, "learning_rate": 4.902073069191052e-06, "loss": 0.4201, "step": 8898 }, { "epoch": 0.5419115184361964, "grad_norm": 1.0315246484356395, "learning_rate": 4.902050953270195e-06, "loss": 0.4275, "step": 8899 }, { "epoch": 0.5419724142130743, "grad_norm": 0.933090801152509, "learning_rate": 4.902028834902178e-06, "loss": 0.5033, "step": 8900 }, { "epoch": 0.5420333099899522, "grad_norm": 0.9815196691669624, "learning_rate": 4.902006714087024e-06, "loss": 0.4741, "step": 8901 }, { "epoch": 0.5420942057668301, "grad_norm": 0.9835866223602149, "learning_rate": 4.901984590824756e-06, "loss": 0.3935, "step": 8902 }, { "epoch": 0.5421551015437079, "grad_norm": 1.0143513292452317, "learning_rate": 4.901962465115395e-06, "loss": 0.4801, "step": 8903 }, { "epoch": 0.5422159973205858, "grad_norm": 1.0880112836982585, "learning_rate": 4.901940336958966e-06, "loss": 0.4202, "step": 8904 }, { "epoch": 0.5422768930974637, "grad_norm": 1.0032374371832526, "learning_rate": 4.901918206355489e-06, "loss": 0.4711, "step": 8905 }, { "epoch": 0.5423377888743416, "grad_norm": 0.9662535298987077, "learning_rate": 4.901896073304988e-06, "loss": 0.4311, "step": 8906 }, { "epoch": 0.5423986846512194, "grad_norm": 0.9866414038154725, "learning_rate": 4.901873937807485e-06, "loss": 0.4335, "step": 8907 }, { "epoch": 0.5424595804280973, "grad_norm": 0.9519312169526684, "learning_rate": 4.9018517998630035e-06, "loss": 0.4947, "step": 8908 }, { "epoch": 0.5425204762049752, "grad_norm": 1.0582036325647166, "learning_rate": 4.901829659471565e-06, "loss": 0.5192, "step": 8909 }, { "epoch": 0.5425813719818531, "grad_norm": 1.0353608485774621, "learning_rate": 4.901807516633192e-06, "loss": 0.4598, "step": 8910 }, { "epoch": 0.5426422677587309, "grad_norm": 1.1302940914216706, "learning_rate": 4.9017853713479076e-06, "loss": 0.4071, "step": 8911 }, { "epoch": 0.5427031635356088, "grad_norm": 0.9520792995531941, "learning_rate": 4.901763223615734e-06, "loss": 0.5464, "step": 8912 }, { "epoch": 0.5427640593124867, "grad_norm": 1.1003806436729262, "learning_rate": 4.901741073436694e-06, "loss": 0.3981, "step": 8913 }, { "epoch": 0.5428249550893646, "grad_norm": 0.9674469046018422, "learning_rate": 4.9017189208108105e-06, "loss": 0.3694, "step": 8914 }, { "epoch": 0.5428858508662424, "grad_norm": 1.0130848829064534, "learning_rate": 4.901696765738105e-06, "loss": 0.4869, "step": 8915 }, { "epoch": 0.5429467466431203, "grad_norm": 1.0129330487926758, "learning_rate": 4.901674608218602e-06, "loss": 0.4297, "step": 8916 }, { "epoch": 0.5430076424199982, "grad_norm": 0.9545024730750731, "learning_rate": 4.901652448252322e-06, "loss": 0.4736, "step": 8917 }, { "epoch": 0.5430685381968761, "grad_norm": 1.0419807280359532, "learning_rate": 4.901630285839288e-06, "loss": 0.3747, "step": 8918 }, { "epoch": 0.5431294339737539, "grad_norm": 1.0130568115230265, "learning_rate": 4.901608120979524e-06, "loss": 0.4449, "step": 8919 }, { "epoch": 0.5431903297506318, "grad_norm": 1.0496652867983303, "learning_rate": 4.9015859536730515e-06, "loss": 0.4215, "step": 8920 }, { "epoch": 0.5432512255275097, "grad_norm": 1.0987429990855002, "learning_rate": 4.901563783919892e-06, "loss": 0.4314, "step": 8921 }, { "epoch": 0.5433121213043876, "grad_norm": 1.0700775860605538, "learning_rate": 4.901541611720071e-06, "loss": 0.4167, "step": 8922 }, { "epoch": 0.5433730170812654, "grad_norm": 1.1207390656198697, "learning_rate": 4.901519437073608e-06, "loss": 0.3534, "step": 8923 }, { "epoch": 0.5434339128581432, "grad_norm": 1.0801499741414353, "learning_rate": 4.901497259980528e-06, "loss": 0.4931, "step": 8924 }, { "epoch": 0.5434948086350212, "grad_norm": 0.9655098758474467, "learning_rate": 4.901475080440851e-06, "loss": 0.4839, "step": 8925 }, { "epoch": 0.543555704411899, "grad_norm": 0.9674769171518295, "learning_rate": 4.901452898454602e-06, "loss": 0.5099, "step": 8926 }, { "epoch": 0.5436166001887769, "grad_norm": 0.9871705993449964, "learning_rate": 4.901430714021803e-06, "loss": 0.4276, "step": 8927 }, { "epoch": 0.5436774959656547, "grad_norm": 1.0395913959397551, "learning_rate": 4.901408527142476e-06, "loss": 0.4233, "step": 8928 }, { "epoch": 0.5437383917425327, "grad_norm": 0.9989833225393461, "learning_rate": 4.901386337816644e-06, "loss": 0.4151, "step": 8929 }, { "epoch": 0.5437992875194105, "grad_norm": 1.0798515366340824, "learning_rate": 4.901364146044329e-06, "loss": 0.4985, "step": 8930 }, { "epoch": 0.5438601832962884, "grad_norm": 1.0220621480356735, "learning_rate": 4.901341951825554e-06, "loss": 0.391, "step": 8931 }, { "epoch": 0.5439210790731662, "grad_norm": 0.9769031592429293, "learning_rate": 4.901319755160343e-06, "loss": 0.4927, "step": 8932 }, { "epoch": 0.5439819748500442, "grad_norm": 0.9215514897142753, "learning_rate": 4.901297556048716e-06, "loss": 0.4532, "step": 8933 }, { "epoch": 0.544042870626922, "grad_norm": 1.0126451967653152, "learning_rate": 4.901275354490698e-06, "loss": 0.402, "step": 8934 }, { "epoch": 0.5441037664037999, "grad_norm": 1.2072645014427823, "learning_rate": 4.90125315048631e-06, "loss": 0.4212, "step": 8935 }, { "epoch": 0.5441646621806777, "grad_norm": 0.9289309152891018, "learning_rate": 4.901230944035576e-06, "loss": 0.4807, "step": 8936 }, { "epoch": 0.5442255579575557, "grad_norm": 1.0477291976383798, "learning_rate": 4.901208735138518e-06, "loss": 0.3785, "step": 8937 }, { "epoch": 0.5442864537344335, "grad_norm": 0.9460827972316779, "learning_rate": 4.9011865237951565e-06, "loss": 0.446, "step": 8938 }, { "epoch": 0.5443473495113114, "grad_norm": 1.0010018614789926, "learning_rate": 4.901164310005518e-06, "loss": 0.4002, "step": 8939 }, { "epoch": 0.5444082452881893, "grad_norm": 1.0854554939288723, "learning_rate": 4.901142093769622e-06, "loss": 0.4148, "step": 8940 }, { "epoch": 0.5444691410650672, "grad_norm": 1.0125329292099854, "learning_rate": 4.901119875087493e-06, "loss": 0.5017, "step": 8941 }, { "epoch": 0.544530036841945, "grad_norm": 0.9907717178176005, "learning_rate": 4.901097653959152e-06, "loss": 0.4239, "step": 8942 }, { "epoch": 0.5445909326188229, "grad_norm": 1.0550177388907076, "learning_rate": 4.9010754303846245e-06, "loss": 0.4536, "step": 8943 }, { "epoch": 0.5446518283957008, "grad_norm": 1.0989897776438597, "learning_rate": 4.90105320436393e-06, "loss": 0.4475, "step": 8944 }, { "epoch": 0.5447127241725787, "grad_norm": 1.031343423731824, "learning_rate": 4.901030975897093e-06, "loss": 0.3719, "step": 8945 }, { "epoch": 0.5447736199494565, "grad_norm": 1.0259965881699198, "learning_rate": 4.901008744984135e-06, "loss": 0.4273, "step": 8946 }, { "epoch": 0.5448345157263343, "grad_norm": 1.0158436113804388, "learning_rate": 4.90098651162508e-06, "loss": 0.4706, "step": 8947 }, { "epoch": 0.5448954115032123, "grad_norm": 0.9748302548073634, "learning_rate": 4.9009642758199485e-06, "loss": 0.4128, "step": 8948 }, { "epoch": 0.5449563072800901, "grad_norm": 1.046436780999672, "learning_rate": 4.9009420375687656e-06, "loss": 0.4323, "step": 8949 }, { "epoch": 0.545017203056968, "grad_norm": 1.05348852353459, "learning_rate": 4.900919796871553e-06, "loss": 0.4845, "step": 8950 }, { "epoch": 0.5450780988338458, "grad_norm": 1.0373818515434676, "learning_rate": 4.900897553728333e-06, "loss": 0.4859, "step": 8951 }, { "epoch": 0.5451389946107238, "grad_norm": 1.0507610609646176, "learning_rate": 4.900875308139128e-06, "loss": 0.3823, "step": 8952 }, { "epoch": 0.5451998903876016, "grad_norm": 1.0074919735075514, "learning_rate": 4.900853060103962e-06, "loss": 0.5041, "step": 8953 }, { "epoch": 0.5452607861644795, "grad_norm": 1.0489952912199811, "learning_rate": 4.9008308096228555e-06, "loss": 0.4137, "step": 8954 }, { "epoch": 0.5453216819413573, "grad_norm": 1.0402824999097957, "learning_rate": 4.900808556695833e-06, "loss": 0.3919, "step": 8955 }, { "epoch": 0.5453825777182353, "grad_norm": 1.0177215247221825, "learning_rate": 4.900786301322918e-06, "loss": 0.3996, "step": 8956 }, { "epoch": 0.5454434734951131, "grad_norm": 1.0780882326022836, "learning_rate": 4.90076404350413e-06, "loss": 0.4074, "step": 8957 }, { "epoch": 0.545504369271991, "grad_norm": 1.0180188191686608, "learning_rate": 4.900741783239494e-06, "loss": 0.4072, "step": 8958 }, { "epoch": 0.5455652650488688, "grad_norm": 1.000036144862882, "learning_rate": 4.900719520529032e-06, "loss": 0.471, "step": 8959 }, { "epoch": 0.5456261608257468, "grad_norm": 1.0500171970538323, "learning_rate": 4.9006972553727684e-06, "loss": 0.407, "step": 8960 }, { "epoch": 0.5456870566026246, "grad_norm": 1.0683432339332328, "learning_rate": 4.900674987770723e-06, "loss": 0.434, "step": 8961 }, { "epoch": 0.5457479523795025, "grad_norm": 0.9957522796663678, "learning_rate": 4.9006527177229204e-06, "loss": 0.4344, "step": 8962 }, { "epoch": 0.5458088481563803, "grad_norm": 0.9512621127794519, "learning_rate": 4.900630445229382e-06, "loss": 0.4539, "step": 8963 }, { "epoch": 0.5458697439332583, "grad_norm": 1.0432996265654644, "learning_rate": 4.900608170290132e-06, "loss": 0.4024, "step": 8964 }, { "epoch": 0.5459306397101361, "grad_norm": 0.9971106076725595, "learning_rate": 4.900585892905192e-06, "loss": 0.427, "step": 8965 }, { "epoch": 0.545991535487014, "grad_norm": 0.9706567485259235, "learning_rate": 4.900563613074585e-06, "loss": 0.4271, "step": 8966 }, { "epoch": 0.5460524312638918, "grad_norm": 1.0456911665990392, "learning_rate": 4.900541330798333e-06, "loss": 0.4634, "step": 8967 }, { "epoch": 0.5461133270407698, "grad_norm": 0.9689891479810051, "learning_rate": 4.900519046076461e-06, "loss": 0.5268, "step": 8968 }, { "epoch": 0.5461742228176476, "grad_norm": 0.9650428813368853, "learning_rate": 4.9004967589089886e-06, "loss": 0.4521, "step": 8969 }, { "epoch": 0.5462351185945254, "grad_norm": 0.9758876073231528, "learning_rate": 4.90047446929594e-06, "loss": 0.4453, "step": 8970 }, { "epoch": 0.5462960143714033, "grad_norm": 1.0439859981459325, "learning_rate": 4.900452177237339e-06, "loss": 0.4329, "step": 8971 }, { "epoch": 0.5463569101482812, "grad_norm": 1.1382351317207993, "learning_rate": 4.9004298827332064e-06, "loss": 0.4283, "step": 8972 }, { "epoch": 0.5464178059251591, "grad_norm": 0.993985752905156, "learning_rate": 4.900407585783566e-06, "loss": 0.4091, "step": 8973 }, { "epoch": 0.5464787017020369, "grad_norm": 0.9950931102396884, "learning_rate": 4.900385286388441e-06, "loss": 0.428, "step": 8974 }, { "epoch": 0.5465395974789148, "grad_norm": 1.009999190152401, "learning_rate": 4.9003629845478525e-06, "loss": 0.405, "step": 8975 }, { "epoch": 0.5466004932557927, "grad_norm": 1.0108485085229586, "learning_rate": 4.900340680261824e-06, "loss": 0.4212, "step": 8976 }, { "epoch": 0.5466613890326706, "grad_norm": 0.9800219577848399, "learning_rate": 4.900318373530379e-06, "loss": 0.4273, "step": 8977 }, { "epoch": 0.5467222848095484, "grad_norm": 1.0249096879639978, "learning_rate": 4.900296064353539e-06, "loss": 0.433, "step": 8978 }, { "epoch": 0.5467831805864263, "grad_norm": 1.0848659277891461, "learning_rate": 4.900273752731327e-06, "loss": 0.4603, "step": 8979 }, { "epoch": 0.5468440763633042, "grad_norm": 1.0700905438406876, "learning_rate": 4.900251438663767e-06, "loss": 0.3497, "step": 8980 }, { "epoch": 0.5469049721401821, "grad_norm": 1.0320707832694578, "learning_rate": 4.90022912215088e-06, "loss": 0.436, "step": 8981 }, { "epoch": 0.5469658679170599, "grad_norm": 1.096144588450918, "learning_rate": 4.900206803192689e-06, "loss": 0.4445, "step": 8982 }, { "epoch": 0.5470267636939379, "grad_norm": 1.0097227256611396, "learning_rate": 4.900184481789219e-06, "loss": 0.467, "step": 8983 }, { "epoch": 0.5470876594708157, "grad_norm": 1.0356760729985095, "learning_rate": 4.900162157940489e-06, "loss": 0.4172, "step": 8984 }, { "epoch": 0.5471485552476936, "grad_norm": 1.0396456112281323, "learning_rate": 4.900139831646525e-06, "loss": 0.4203, "step": 8985 }, { "epoch": 0.5472094510245714, "grad_norm": 1.0453438896667957, "learning_rate": 4.900117502907348e-06, "loss": 0.4634, "step": 8986 }, { "epoch": 0.5472703468014494, "grad_norm": 0.9499232145696499, "learning_rate": 4.90009517172298e-06, "loss": 0.4947, "step": 8987 }, { "epoch": 0.5473312425783272, "grad_norm": 1.0110734136733643, "learning_rate": 4.900072838093447e-06, "loss": 0.437, "step": 8988 }, { "epoch": 0.5473921383552051, "grad_norm": 0.9120110357484021, "learning_rate": 4.900050502018769e-06, "loss": 0.4573, "step": 8989 }, { "epoch": 0.5474530341320829, "grad_norm": 0.9723480077582616, "learning_rate": 4.900028163498969e-06, "loss": 0.4547, "step": 8990 }, { "epoch": 0.5475139299089609, "grad_norm": 0.9885974602250519, "learning_rate": 4.90000582253407e-06, "loss": 0.3958, "step": 8991 }, { "epoch": 0.5475748256858387, "grad_norm": 0.9668665319718945, "learning_rate": 4.899983479124095e-06, "loss": 0.5045, "step": 8992 }, { "epoch": 0.5476357214627166, "grad_norm": 0.9788642398979872, "learning_rate": 4.899961133269068e-06, "loss": 0.4257, "step": 8993 }, { "epoch": 0.5476966172395944, "grad_norm": 1.155974549260999, "learning_rate": 4.8999387849690095e-06, "loss": 0.4009, "step": 8994 }, { "epoch": 0.5477575130164724, "grad_norm": 1.0189496762466024, "learning_rate": 4.899916434223943e-06, "loss": 0.4013, "step": 8995 }, { "epoch": 0.5478184087933502, "grad_norm": 1.0134530804758481, "learning_rate": 4.899894081033892e-06, "loss": 0.4532, "step": 8996 }, { "epoch": 0.547879304570228, "grad_norm": 1.0218258676344636, "learning_rate": 4.899871725398879e-06, "loss": 0.4367, "step": 8997 }, { "epoch": 0.5479402003471059, "grad_norm": 0.9575944766331325, "learning_rate": 4.899849367318927e-06, "loss": 0.5155, "step": 8998 }, { "epoch": 0.5480010961239838, "grad_norm": 1.0418626626016998, "learning_rate": 4.899827006794057e-06, "loss": 0.4093, "step": 8999 }, { "epoch": 0.5480619919008617, "grad_norm": 1.0353685940215032, "learning_rate": 4.899804643824293e-06, "loss": 0.4201, "step": 9000 }, { "epoch": 0.5481228876777395, "grad_norm": 0.99502125088026, "learning_rate": 4.899782278409659e-06, "loss": 0.5091, "step": 9001 }, { "epoch": 0.5481837834546174, "grad_norm": 1.0679402486755503, "learning_rate": 4.899759910550176e-06, "loss": 0.4507, "step": 9002 }, { "epoch": 0.5482446792314953, "grad_norm": 1.0670375855880365, "learning_rate": 4.899737540245868e-06, "loss": 0.3838, "step": 9003 }, { "epoch": 0.5483055750083732, "grad_norm": 0.9447484355153937, "learning_rate": 4.899715167496757e-06, "loss": 0.4516, "step": 9004 }, { "epoch": 0.548366470785251, "grad_norm": 1.0730951363504209, "learning_rate": 4.899692792302867e-06, "loss": 0.4607, "step": 9005 }, { "epoch": 0.5484273665621289, "grad_norm": 1.0345965314598469, "learning_rate": 4.899670414664219e-06, "loss": 0.4783, "step": 9006 }, { "epoch": 0.5484882623390068, "grad_norm": 0.9827269555835229, "learning_rate": 4.899648034580837e-06, "loss": 0.4739, "step": 9007 }, { "epoch": 0.5485491581158847, "grad_norm": 0.9538864876473504, "learning_rate": 4.899625652052743e-06, "loss": 0.3935, "step": 9008 }, { "epoch": 0.5486100538927625, "grad_norm": 1.050128833131509, "learning_rate": 4.8996032670799605e-06, "loss": 0.4407, "step": 9009 }, { "epoch": 0.5486709496696404, "grad_norm": 1.0131784400231922, "learning_rate": 4.899580879662512e-06, "loss": 0.397, "step": 9010 }, { "epoch": 0.5487318454465183, "grad_norm": 0.9909706222306999, "learning_rate": 4.899558489800421e-06, "loss": 0.5054, "step": 9011 }, { "epoch": 0.5487927412233962, "grad_norm": 0.9140809968330661, "learning_rate": 4.899536097493709e-06, "loss": 0.4286, "step": 9012 }, { "epoch": 0.548853637000274, "grad_norm": 1.084130762904841, "learning_rate": 4.899513702742399e-06, "loss": 0.4919, "step": 9013 }, { "epoch": 0.5489145327771519, "grad_norm": 0.9326545667771342, "learning_rate": 4.899491305546515e-06, "loss": 0.5249, "step": 9014 }, { "epoch": 0.5489754285540298, "grad_norm": 1.0389566471687737, "learning_rate": 4.899468905906079e-06, "loss": 0.4165, "step": 9015 }, { "epoch": 0.5490363243309077, "grad_norm": 1.1014505810158368, "learning_rate": 4.8994465038211144e-06, "loss": 0.3985, "step": 9016 }, { "epoch": 0.5490972201077855, "grad_norm": 1.0119326984902728, "learning_rate": 4.899424099291644e-06, "loss": 0.4933, "step": 9017 }, { "epoch": 0.5491581158846633, "grad_norm": 0.9692475355650202, "learning_rate": 4.89940169231769e-06, "loss": 0.4761, "step": 9018 }, { "epoch": 0.5492190116615413, "grad_norm": 1.109415162497102, "learning_rate": 4.899379282899275e-06, "loss": 0.4382, "step": 9019 }, { "epoch": 0.5492799074384191, "grad_norm": 1.0117053642867677, "learning_rate": 4.8993568710364216e-06, "loss": 0.4123, "step": 9020 }, { "epoch": 0.549340803215297, "grad_norm": 0.9917956541749631, "learning_rate": 4.899334456729154e-06, "loss": 0.4891, "step": 9021 }, { "epoch": 0.549401698992175, "grad_norm": 0.9759650254516902, "learning_rate": 4.899312039977495e-06, "loss": 0.5072, "step": 9022 }, { "epoch": 0.5494625947690528, "grad_norm": 1.0504498619144949, "learning_rate": 4.899289620781466e-06, "loss": 0.4204, "step": 9023 }, { "epoch": 0.5495234905459306, "grad_norm": 0.8918778348505546, "learning_rate": 4.89926719914109e-06, "loss": 0.474, "step": 9024 }, { "epoch": 0.5495843863228085, "grad_norm": 0.99785371567302, "learning_rate": 4.899244775056391e-06, "loss": 0.3917, "step": 9025 }, { "epoch": 0.5496452820996864, "grad_norm": 0.9743917064889352, "learning_rate": 4.899222348527391e-06, "loss": 0.455, "step": 9026 }, { "epoch": 0.5497061778765643, "grad_norm": 1.0181743202525608, "learning_rate": 4.899199919554114e-06, "loss": 0.4944, "step": 9027 }, { "epoch": 0.5497670736534421, "grad_norm": 1.0141522760264423, "learning_rate": 4.8991774881365825e-06, "loss": 0.4915, "step": 9028 }, { "epoch": 0.54982796943032, "grad_norm": 1.0357996724552931, "learning_rate": 4.8991550542748176e-06, "loss": 0.3881, "step": 9029 }, { "epoch": 0.5498888652071979, "grad_norm": 0.994273870956782, "learning_rate": 4.899132617968843e-06, "loss": 0.3866, "step": 9030 }, { "epoch": 0.5499497609840758, "grad_norm": 1.0673178087347641, "learning_rate": 4.899110179218684e-06, "loss": 0.3773, "step": 9031 }, { "epoch": 0.5500106567609536, "grad_norm": 0.9425243397855395, "learning_rate": 4.899087738024359e-06, "loss": 0.4852, "step": 9032 }, { "epoch": 0.5500715525378315, "grad_norm": 0.9477242713652129, "learning_rate": 4.899065294385895e-06, "loss": 0.5299, "step": 9033 }, { "epoch": 0.5501324483147094, "grad_norm": 1.112768941167182, "learning_rate": 4.899042848303313e-06, "loss": 0.4059, "step": 9034 }, { "epoch": 0.5501933440915873, "grad_norm": 1.0870374326359908, "learning_rate": 4.899020399776635e-06, "loss": 0.4547, "step": 9035 }, { "epoch": 0.5502542398684651, "grad_norm": 0.9567941476240601, "learning_rate": 4.8989979488058856e-06, "loss": 0.5155, "step": 9036 }, { "epoch": 0.550315135645343, "grad_norm": 1.0357715101363119, "learning_rate": 4.8989754953910876e-06, "loss": 0.3671, "step": 9037 }, { "epoch": 0.5503760314222209, "grad_norm": 1.033819141314235, "learning_rate": 4.898953039532262e-06, "loss": 0.4203, "step": 9038 }, { "epoch": 0.5504369271990988, "grad_norm": 1.0433679263236952, "learning_rate": 4.898930581229434e-06, "loss": 0.4066, "step": 9039 }, { "epoch": 0.5504978229759766, "grad_norm": 1.1639050921105845, "learning_rate": 4.898908120482625e-06, "loss": 0.3736, "step": 9040 }, { "epoch": 0.5505587187528544, "grad_norm": 1.0442212759434482, "learning_rate": 4.898885657291858e-06, "loss": 0.3919, "step": 9041 }, { "epoch": 0.5506196145297324, "grad_norm": 0.969614975381669, "learning_rate": 4.898863191657156e-06, "loss": 0.4588, "step": 9042 }, { "epoch": 0.5506805103066102, "grad_norm": 1.0593695005003196, "learning_rate": 4.898840723578543e-06, "loss": 0.4518, "step": 9043 }, { "epoch": 0.5507414060834881, "grad_norm": 1.0449949596737582, "learning_rate": 4.89881825305604e-06, "loss": 0.3953, "step": 9044 }, { "epoch": 0.5508023018603659, "grad_norm": 1.0006828722896524, "learning_rate": 4.898795780089671e-06, "loss": 0.4203, "step": 9045 }, { "epoch": 0.5508631976372439, "grad_norm": 1.0066323717361543, "learning_rate": 4.8987733046794595e-06, "loss": 0.4351, "step": 9046 }, { "epoch": 0.5509240934141217, "grad_norm": 1.0021024498154272, "learning_rate": 4.8987508268254265e-06, "loss": 0.4665, "step": 9047 }, { "epoch": 0.5509849891909996, "grad_norm": 1.1546715888156356, "learning_rate": 4.898728346527597e-06, "loss": 0.4336, "step": 9048 }, { "epoch": 0.5510458849678774, "grad_norm": 1.0143066779639083, "learning_rate": 4.8987058637859915e-06, "loss": 0.5775, "step": 9049 }, { "epoch": 0.5511067807447554, "grad_norm": 0.9682533968305279, "learning_rate": 4.898683378600636e-06, "loss": 0.4818, "step": 9050 }, { "epoch": 0.5511676765216332, "grad_norm": 0.9631780310887736, "learning_rate": 4.8986608909715515e-06, "loss": 0.4378, "step": 9051 }, { "epoch": 0.5512285722985111, "grad_norm": 1.0405426247982914, "learning_rate": 4.89863840089876e-06, "loss": 0.4296, "step": 9052 }, { "epoch": 0.5512894680753889, "grad_norm": 1.0898582868241078, "learning_rate": 4.898615908382287e-06, "loss": 0.3864, "step": 9053 }, { "epoch": 0.5513503638522669, "grad_norm": 0.9608962304237125, "learning_rate": 4.8985934134221525e-06, "loss": 0.4717, "step": 9054 }, { "epoch": 0.5514112596291447, "grad_norm": 0.9992939956511765, "learning_rate": 4.898570916018382e-06, "loss": 0.4217, "step": 9055 }, { "epoch": 0.5514721554060226, "grad_norm": 1.0317359487815474, "learning_rate": 4.898548416170997e-06, "loss": 0.4638, "step": 9056 }, { "epoch": 0.5515330511829004, "grad_norm": 1.087033222803651, "learning_rate": 4.898525913880021e-06, "loss": 0.4612, "step": 9057 }, { "epoch": 0.5515939469597784, "grad_norm": 0.969149442863387, "learning_rate": 4.898503409145477e-06, "loss": 0.4629, "step": 9058 }, { "epoch": 0.5516548427366562, "grad_norm": 0.9542682458931984, "learning_rate": 4.898480901967386e-06, "loss": 0.4477, "step": 9059 }, { "epoch": 0.5517157385135341, "grad_norm": 1.003130536873456, "learning_rate": 4.898458392345774e-06, "loss": 0.4259, "step": 9060 }, { "epoch": 0.5517766342904119, "grad_norm": 1.1243036034558258, "learning_rate": 4.898435880280662e-06, "loss": 0.3631, "step": 9061 }, { "epoch": 0.5518375300672899, "grad_norm": 1.0617791843582305, "learning_rate": 4.898413365772073e-06, "loss": 0.4481, "step": 9062 }, { "epoch": 0.5518984258441677, "grad_norm": 0.9717063126675078, "learning_rate": 4.898390848820031e-06, "loss": 0.4372, "step": 9063 }, { "epoch": 0.5519593216210456, "grad_norm": 1.048003001054004, "learning_rate": 4.898368329424558e-06, "loss": 0.4214, "step": 9064 }, { "epoch": 0.5520202173979235, "grad_norm": 1.0342977561149602, "learning_rate": 4.898345807585677e-06, "loss": 0.4785, "step": 9065 }, { "epoch": 0.5520811131748014, "grad_norm": 1.0393049108171206, "learning_rate": 4.8983232833034115e-06, "loss": 0.4397, "step": 9066 }, { "epoch": 0.5521420089516792, "grad_norm": 1.0065747307682549, "learning_rate": 4.898300756577783e-06, "loss": 0.3931, "step": 9067 }, { "epoch": 0.552202904728557, "grad_norm": 0.958701198438337, "learning_rate": 4.898278227408817e-06, "loss": 0.5001, "step": 9068 }, { "epoch": 0.552263800505435, "grad_norm": 0.9228973271852196, "learning_rate": 4.898255695796535e-06, "loss": 0.5337, "step": 9069 }, { "epoch": 0.5523246962823128, "grad_norm": 0.9792370560025941, "learning_rate": 4.8982331617409585e-06, "loss": 0.4672, "step": 9070 }, { "epoch": 0.5523855920591907, "grad_norm": 1.0655396788771094, "learning_rate": 4.898210625242113e-06, "loss": 0.4639, "step": 9071 }, { "epoch": 0.5524464878360685, "grad_norm": 0.9107581671678769, "learning_rate": 4.89818808630002e-06, "loss": 0.5012, "step": 9072 }, { "epoch": 0.5525073836129465, "grad_norm": 1.0235627222660169, "learning_rate": 4.898165544914704e-06, "loss": 0.3979, "step": 9073 }, { "epoch": 0.5525682793898243, "grad_norm": 1.0977185566509728, "learning_rate": 4.898143001086185e-06, "loss": 0.4257, "step": 9074 }, { "epoch": 0.5526291751667022, "grad_norm": 0.9618695469496367, "learning_rate": 4.898120454814489e-06, "loss": 0.5063, "step": 9075 }, { "epoch": 0.55269007094358, "grad_norm": 0.9907696571011866, "learning_rate": 4.898097906099637e-06, "loss": 0.4301, "step": 9076 }, { "epoch": 0.552750966720458, "grad_norm": 1.0394681676757282, "learning_rate": 4.898075354941654e-06, "loss": 0.3928, "step": 9077 }, { "epoch": 0.5528118624973358, "grad_norm": 0.9905757824761319, "learning_rate": 4.8980528013405606e-06, "loss": 0.4462, "step": 9078 }, { "epoch": 0.5528727582742137, "grad_norm": 0.94204621102323, "learning_rate": 4.89803024529638e-06, "loss": 0.4491, "step": 9079 }, { "epoch": 0.5529336540510915, "grad_norm": 1.0477722424529374, "learning_rate": 4.898007686809137e-06, "loss": 0.4354, "step": 9080 }, { "epoch": 0.5529945498279695, "grad_norm": 0.9886693058678864, "learning_rate": 4.897985125878855e-06, "loss": 0.4553, "step": 9081 }, { "epoch": 0.5530554456048473, "grad_norm": 1.024584156313258, "learning_rate": 4.897962562505554e-06, "loss": 0.4497, "step": 9082 }, { "epoch": 0.5531163413817252, "grad_norm": 0.9744827104112511, "learning_rate": 4.897939996689259e-06, "loss": 0.4667, "step": 9083 }, { "epoch": 0.553177237158603, "grad_norm": 1.028281168072449, "learning_rate": 4.8979174284299915e-06, "loss": 0.4408, "step": 9084 }, { "epoch": 0.553238132935481, "grad_norm": 1.0060014013311611, "learning_rate": 4.897894857727777e-06, "loss": 0.4614, "step": 9085 }, { "epoch": 0.5532990287123588, "grad_norm": 1.017050888086095, "learning_rate": 4.897872284582637e-06, "loss": 0.4487, "step": 9086 }, { "epoch": 0.5533599244892367, "grad_norm": 1.0954089303950032, "learning_rate": 4.897849708994594e-06, "loss": 0.394, "step": 9087 }, { "epoch": 0.5534208202661145, "grad_norm": 1.0356366446526937, "learning_rate": 4.897827130963672e-06, "loss": 0.4483, "step": 9088 }, { "epoch": 0.5534817160429925, "grad_norm": 1.029998481638611, "learning_rate": 4.897804550489893e-06, "loss": 0.4225, "step": 9089 }, { "epoch": 0.5535426118198703, "grad_norm": 0.9863508922844936, "learning_rate": 4.897781967573281e-06, "loss": 0.4302, "step": 9090 }, { "epoch": 0.5536035075967481, "grad_norm": 1.0342699786132956, "learning_rate": 4.897759382213858e-06, "loss": 0.4511, "step": 9091 }, { "epoch": 0.553664403373626, "grad_norm": 1.0192552128847645, "learning_rate": 4.897736794411649e-06, "loss": 0.395, "step": 9092 }, { "epoch": 0.553725299150504, "grad_norm": 0.9518102678163921, "learning_rate": 4.897714204166674e-06, "loss": 0.4959, "step": 9093 }, { "epoch": 0.5537861949273818, "grad_norm": 0.9778466771234922, "learning_rate": 4.897691611478959e-06, "loss": 0.4273, "step": 9094 }, { "epoch": 0.5538470907042596, "grad_norm": 0.9788513878871554, "learning_rate": 4.897669016348524e-06, "loss": 0.3909, "step": 9095 }, { "epoch": 0.5539079864811375, "grad_norm": 1.023694145627476, "learning_rate": 4.897646418775395e-06, "loss": 0.4499, "step": 9096 }, { "epoch": 0.5539688822580154, "grad_norm": 0.9547651028499715, "learning_rate": 4.897623818759594e-06, "loss": 0.5295, "step": 9097 }, { "epoch": 0.5540297780348933, "grad_norm": 1.037161628414527, "learning_rate": 4.897601216301142e-06, "loss": 0.5507, "step": 9098 }, { "epoch": 0.5540906738117711, "grad_norm": 1.1587498540482322, "learning_rate": 4.8975786114000655e-06, "loss": 0.4126, "step": 9099 }, { "epoch": 0.554151569588649, "grad_norm": 1.008987423295109, "learning_rate": 4.897556004056385e-06, "loss": 0.4504, "step": 9100 }, { "epoch": 0.5542124653655269, "grad_norm": 1.0862140165965926, "learning_rate": 4.897533394270124e-06, "loss": 0.4327, "step": 9101 }, { "epoch": 0.5542733611424048, "grad_norm": 1.0517522160168178, "learning_rate": 4.897510782041306e-06, "loss": 0.4499, "step": 9102 }, { "epoch": 0.5543342569192826, "grad_norm": 1.056100051533361, "learning_rate": 4.8974881673699536e-06, "loss": 0.4179, "step": 9103 }, { "epoch": 0.5543951526961606, "grad_norm": 0.9961931730113159, "learning_rate": 4.897465550256091e-06, "loss": 0.4345, "step": 9104 }, { "epoch": 0.5544560484730384, "grad_norm": 1.0684009808666894, "learning_rate": 4.897442930699739e-06, "loss": 0.5535, "step": 9105 }, { "epoch": 0.5545169442499163, "grad_norm": 0.9857924191982209, "learning_rate": 4.8974203087009235e-06, "loss": 0.4789, "step": 9106 }, { "epoch": 0.5545778400267941, "grad_norm": 0.9806724658838202, "learning_rate": 4.897397684259665e-06, "loss": 0.4345, "step": 9107 }, { "epoch": 0.5546387358036721, "grad_norm": 1.0793959838506535, "learning_rate": 4.897375057375988e-06, "loss": 0.3608, "step": 9108 }, { "epoch": 0.5546996315805499, "grad_norm": 1.1671503832786316, "learning_rate": 4.897352428049915e-06, "loss": 0.5035, "step": 9109 }, { "epoch": 0.5547605273574278, "grad_norm": 0.9818374637415751, "learning_rate": 4.897329796281469e-06, "loss": 0.4612, "step": 9110 }, { "epoch": 0.5548214231343056, "grad_norm": 1.0327339744339257, "learning_rate": 4.897307162070674e-06, "loss": 0.428, "step": 9111 }, { "epoch": 0.5548823189111836, "grad_norm": 0.9838852294622201, "learning_rate": 4.897284525417552e-06, "loss": 0.4161, "step": 9112 }, { "epoch": 0.5549432146880614, "grad_norm": 1.075592768719986, "learning_rate": 4.8972618863221255e-06, "loss": 0.4077, "step": 9113 }, { "epoch": 0.5550041104649392, "grad_norm": 1.0722522231859704, "learning_rate": 4.897239244784419e-06, "loss": 0.3867, "step": 9114 }, { "epoch": 0.5550650062418171, "grad_norm": 1.0238413345004247, "learning_rate": 4.8972166008044555e-06, "loss": 0.4317, "step": 9115 }, { "epoch": 0.555125902018695, "grad_norm": 1.06634334858935, "learning_rate": 4.897193954382257e-06, "loss": 0.4114, "step": 9116 }, { "epoch": 0.5551867977955729, "grad_norm": 1.0120067851696828, "learning_rate": 4.897171305517847e-06, "loss": 0.441, "step": 9117 }, { "epoch": 0.5552476935724507, "grad_norm": 0.9709422525993272, "learning_rate": 4.897148654211249e-06, "loss": 0.4362, "step": 9118 }, { "epoch": 0.5553085893493286, "grad_norm": 1.0201405059699884, "learning_rate": 4.8971260004624855e-06, "loss": 0.4395, "step": 9119 }, { "epoch": 0.5553694851262065, "grad_norm": 0.9312664279329116, "learning_rate": 4.89710334427158e-06, "loss": 0.4333, "step": 9120 }, { "epoch": 0.5554303809030844, "grad_norm": 1.0042737022996053, "learning_rate": 4.897080685638556e-06, "loss": 0.4381, "step": 9121 }, { "epoch": 0.5554912766799622, "grad_norm": 1.098877779017519, "learning_rate": 4.897058024563436e-06, "loss": 0.4456, "step": 9122 }, { "epoch": 0.5555521724568401, "grad_norm": 0.9983138652656423, "learning_rate": 4.897035361046243e-06, "loss": 0.4775, "step": 9123 }, { "epoch": 0.555613068233718, "grad_norm": 0.9592377529407315, "learning_rate": 4.897012695086999e-06, "loss": 0.4991, "step": 9124 }, { "epoch": 0.5556739640105959, "grad_norm": 0.9976240364300897, "learning_rate": 4.89699002668573e-06, "loss": 0.4165, "step": 9125 }, { "epoch": 0.5557348597874737, "grad_norm": 1.0553247381500803, "learning_rate": 4.8969673558424566e-06, "loss": 0.4202, "step": 9126 }, { "epoch": 0.5557957555643516, "grad_norm": 1.1159863153430303, "learning_rate": 4.896944682557202e-06, "loss": 0.403, "step": 9127 }, { "epoch": 0.5558566513412295, "grad_norm": 1.065866929763409, "learning_rate": 4.896922006829991e-06, "loss": 0.4281, "step": 9128 }, { "epoch": 0.5559175471181074, "grad_norm": 0.9637661432504475, "learning_rate": 4.8968993286608455e-06, "loss": 0.4434, "step": 9129 }, { "epoch": 0.5559784428949852, "grad_norm": 1.012520423351713, "learning_rate": 4.896876648049789e-06, "loss": 0.4464, "step": 9130 }, { "epoch": 0.5560393386718631, "grad_norm": 1.0471762686065909, "learning_rate": 4.896853964996844e-06, "loss": 0.4256, "step": 9131 }, { "epoch": 0.556100234448741, "grad_norm": 1.1705552679407196, "learning_rate": 4.896831279502034e-06, "loss": 0.391, "step": 9132 }, { "epoch": 0.5561611302256189, "grad_norm": 1.0472258727574546, "learning_rate": 4.8968085915653816e-06, "loss": 0.4156, "step": 9133 }, { "epoch": 0.5562220260024967, "grad_norm": 0.9950670637244436, "learning_rate": 4.896785901186912e-06, "loss": 0.4317, "step": 9134 }, { "epoch": 0.5562829217793746, "grad_norm": 1.0795237661186172, "learning_rate": 4.896763208366646e-06, "loss": 0.3875, "step": 9135 }, { "epoch": 0.5563438175562525, "grad_norm": 0.9716613136166841, "learning_rate": 4.896740513104607e-06, "loss": 0.4583, "step": 9136 }, { "epoch": 0.5564047133331304, "grad_norm": 0.9730242008135007, "learning_rate": 4.8967178154008185e-06, "loss": 0.3965, "step": 9137 }, { "epoch": 0.5564656091100082, "grad_norm": 0.9920646238217078, "learning_rate": 4.8966951152553044e-06, "loss": 0.4593, "step": 9138 }, { "epoch": 0.556526504886886, "grad_norm": 0.9732694504856815, "learning_rate": 4.8966724126680874e-06, "loss": 0.4783, "step": 9139 }, { "epoch": 0.556587400663764, "grad_norm": 1.0124395195474356, "learning_rate": 4.8966497076391895e-06, "loss": 0.4825, "step": 9140 }, { "epoch": 0.5566482964406418, "grad_norm": 1.0070556173019225, "learning_rate": 4.896627000168635e-06, "loss": 0.4418, "step": 9141 }, { "epoch": 0.5567091922175197, "grad_norm": 0.992757271514245, "learning_rate": 4.896604290256447e-06, "loss": 0.4259, "step": 9142 }, { "epoch": 0.5567700879943975, "grad_norm": 0.9942829716015631, "learning_rate": 4.8965815779026485e-06, "loss": 0.4857, "step": 9143 }, { "epoch": 0.5568309837712755, "grad_norm": 0.9794536901789086, "learning_rate": 4.896558863107262e-06, "loss": 0.4469, "step": 9144 }, { "epoch": 0.5568918795481533, "grad_norm": 1.051433235354601, "learning_rate": 4.896536145870311e-06, "loss": 0.4053, "step": 9145 }, { "epoch": 0.5569527753250312, "grad_norm": 0.9530838897845336, "learning_rate": 4.896513426191819e-06, "loss": 0.4386, "step": 9146 }, { "epoch": 0.5570136711019091, "grad_norm": 0.9242208214677272, "learning_rate": 4.896490704071809e-06, "loss": 0.4917, "step": 9147 }, { "epoch": 0.557074566878787, "grad_norm": 0.9492972824810553, "learning_rate": 4.896467979510304e-06, "loss": 0.5002, "step": 9148 }, { "epoch": 0.5571354626556648, "grad_norm": 0.9635836143555062, "learning_rate": 4.896445252507328e-06, "loss": 0.4772, "step": 9149 }, { "epoch": 0.5571963584325427, "grad_norm": 1.0131144433491344, "learning_rate": 4.896422523062902e-06, "loss": 0.4407, "step": 9150 }, { "epoch": 0.5572572542094206, "grad_norm": 0.9868867286633011, "learning_rate": 4.896399791177052e-06, "loss": 0.439, "step": 9151 }, { "epoch": 0.5573181499862985, "grad_norm": 1.1099553117984544, "learning_rate": 4.896377056849799e-06, "loss": 0.4652, "step": 9152 }, { "epoch": 0.5573790457631763, "grad_norm": 1.0052265190438716, "learning_rate": 4.896354320081167e-06, "loss": 0.4717, "step": 9153 }, { "epoch": 0.5574399415400542, "grad_norm": 1.1416129307742378, "learning_rate": 4.896331580871179e-06, "loss": 0.4196, "step": 9154 }, { "epoch": 0.5575008373169321, "grad_norm": 0.9716221116245307, "learning_rate": 4.896308839219859e-06, "loss": 0.4425, "step": 9155 }, { "epoch": 0.55756173309381, "grad_norm": 1.0143139968496608, "learning_rate": 4.896286095127228e-06, "loss": 0.4543, "step": 9156 }, { "epoch": 0.5576226288706878, "grad_norm": 0.9985135471418014, "learning_rate": 4.896263348593311e-06, "loss": 0.5116, "step": 9157 }, { "epoch": 0.5576835246475657, "grad_norm": 1.0091168824695556, "learning_rate": 4.896240599618131e-06, "loss": 0.4276, "step": 9158 }, { "epoch": 0.5577444204244436, "grad_norm": 0.9811524731235305, "learning_rate": 4.896217848201711e-06, "loss": 0.4145, "step": 9159 }, { "epoch": 0.5578053162013215, "grad_norm": 0.9593341226247409, "learning_rate": 4.896195094344074e-06, "loss": 0.4405, "step": 9160 }, { "epoch": 0.5578662119781993, "grad_norm": 1.0138640933617602, "learning_rate": 4.8961723380452435e-06, "loss": 0.4038, "step": 9161 }, { "epoch": 0.5579271077550771, "grad_norm": 0.9802039667980471, "learning_rate": 4.896149579305242e-06, "loss": 0.4138, "step": 9162 }, { "epoch": 0.5579880035319551, "grad_norm": 0.9589738469505491, "learning_rate": 4.896126818124092e-06, "loss": 0.5118, "step": 9163 }, { "epoch": 0.558048899308833, "grad_norm": 1.0048077348230833, "learning_rate": 4.896104054501819e-06, "loss": 0.4572, "step": 9164 }, { "epoch": 0.5581097950857108, "grad_norm": 0.9601289570224437, "learning_rate": 4.896081288438446e-06, "loss": 0.4567, "step": 9165 }, { "epoch": 0.5581706908625886, "grad_norm": 1.1222340505055666, "learning_rate": 4.896058519933994e-06, "loss": 0.4029, "step": 9166 }, { "epoch": 0.5582315866394666, "grad_norm": 0.9643287131319781, "learning_rate": 4.8960357489884865e-06, "loss": 0.4417, "step": 9167 }, { "epoch": 0.5582924824163444, "grad_norm": 1.0916014028117393, "learning_rate": 4.8960129756019494e-06, "loss": 0.4071, "step": 9168 }, { "epoch": 0.5583533781932223, "grad_norm": 1.067687224398344, "learning_rate": 4.895990199774403e-06, "loss": 0.4195, "step": 9169 }, { "epoch": 0.5584142739701001, "grad_norm": 1.0855736699365204, "learning_rate": 4.895967421505872e-06, "loss": 0.4606, "step": 9170 }, { "epoch": 0.5584751697469781, "grad_norm": 0.9823735618839102, "learning_rate": 4.89594464079638e-06, "loss": 0.3859, "step": 9171 }, { "epoch": 0.5585360655238559, "grad_norm": 1.0661210126984195, "learning_rate": 4.895921857645948e-06, "loss": 0.4672, "step": 9172 }, { "epoch": 0.5585969613007338, "grad_norm": 1.050253575631273, "learning_rate": 4.8958990720546015e-06, "loss": 0.3548, "step": 9173 }, { "epoch": 0.5586578570776116, "grad_norm": 1.0127676775946528, "learning_rate": 4.8958762840223625e-06, "loss": 0.473, "step": 9174 }, { "epoch": 0.5587187528544896, "grad_norm": 1.1687527436372769, "learning_rate": 4.895853493549254e-06, "loss": 0.412, "step": 9175 }, { "epoch": 0.5587796486313674, "grad_norm": 1.029097288443375, "learning_rate": 4.895830700635301e-06, "loss": 0.4688, "step": 9176 }, { "epoch": 0.5588405444082453, "grad_norm": 1.0017371240363449, "learning_rate": 4.895807905280525e-06, "loss": 0.4704, "step": 9177 }, { "epoch": 0.5589014401851231, "grad_norm": 1.0778908552147404, "learning_rate": 4.895785107484949e-06, "loss": 0.3744, "step": 9178 }, { "epoch": 0.5589623359620011, "grad_norm": 1.0668123868971715, "learning_rate": 4.895762307248598e-06, "loss": 0.4529, "step": 9179 }, { "epoch": 0.5590232317388789, "grad_norm": 0.9870375250245115, "learning_rate": 4.895739504571494e-06, "loss": 0.4172, "step": 9180 }, { "epoch": 0.5590841275157568, "grad_norm": 1.0057472657255924, "learning_rate": 4.89571669945366e-06, "loss": 0.461, "step": 9181 }, { "epoch": 0.5591450232926346, "grad_norm": 1.015190890122496, "learning_rate": 4.895693891895119e-06, "loss": 0.4156, "step": 9182 }, { "epoch": 0.5592059190695126, "grad_norm": 0.9822233702678532, "learning_rate": 4.895671081895896e-06, "loss": 0.4789, "step": 9183 }, { "epoch": 0.5592668148463904, "grad_norm": 0.9450037985469742, "learning_rate": 4.895648269456013e-06, "loss": 0.5032, "step": 9184 }, { "epoch": 0.5593277106232682, "grad_norm": 1.0205665907682937, "learning_rate": 4.895625454575492e-06, "loss": 0.4609, "step": 9185 }, { "epoch": 0.5593886064001462, "grad_norm": 0.9828916129619789, "learning_rate": 4.895602637254359e-06, "loss": 0.5232, "step": 9186 }, { "epoch": 0.559449502177024, "grad_norm": 0.9514985794361981, "learning_rate": 4.895579817492636e-06, "loss": 0.4403, "step": 9187 }, { "epoch": 0.5595103979539019, "grad_norm": 0.9903127259899945, "learning_rate": 4.895556995290345e-06, "loss": 0.4317, "step": 9188 }, { "epoch": 0.5595712937307797, "grad_norm": 0.950645751541365, "learning_rate": 4.895534170647511e-06, "loss": 0.4806, "step": 9189 }, { "epoch": 0.5596321895076577, "grad_norm": 0.8843228042359194, "learning_rate": 4.895511343564156e-06, "loss": 0.4401, "step": 9190 }, { "epoch": 0.5596930852845355, "grad_norm": 1.0398363161573863, "learning_rate": 4.895488514040305e-06, "loss": 0.449, "step": 9191 }, { "epoch": 0.5597539810614134, "grad_norm": 1.0962752419881194, "learning_rate": 4.8954656820759795e-06, "loss": 0.4283, "step": 9192 }, { "epoch": 0.5598148768382912, "grad_norm": 1.0961820343805753, "learning_rate": 4.895442847671203e-06, "loss": 0.4239, "step": 9193 }, { "epoch": 0.5598757726151692, "grad_norm": 1.1308180053298689, "learning_rate": 4.8954200108259996e-06, "loss": 0.3879, "step": 9194 }, { "epoch": 0.559936668392047, "grad_norm": 0.9708266779813461, "learning_rate": 4.895397171540392e-06, "loss": 0.5452, "step": 9195 }, { "epoch": 0.5599975641689249, "grad_norm": 0.9835247055760461, "learning_rate": 4.895374329814404e-06, "loss": 0.498, "step": 9196 }, { "epoch": 0.5600584599458027, "grad_norm": 0.9402026275830635, "learning_rate": 4.895351485648058e-06, "loss": 0.409, "step": 9197 }, { "epoch": 0.5601193557226807, "grad_norm": 1.1863097502275068, "learning_rate": 4.895328639041377e-06, "loss": 0.4126, "step": 9198 }, { "epoch": 0.5601802514995585, "grad_norm": 0.9701656463124728, "learning_rate": 4.8953057899943854e-06, "loss": 0.4369, "step": 9199 }, { "epoch": 0.5602411472764364, "grad_norm": 1.0622244143693285, "learning_rate": 4.895282938507107e-06, "loss": 0.4261, "step": 9200 }, { "epoch": 0.5603020430533142, "grad_norm": 1.0993281041331062, "learning_rate": 4.895260084579564e-06, "loss": 0.4245, "step": 9201 }, { "epoch": 0.5603629388301922, "grad_norm": 1.18520920670481, "learning_rate": 4.895237228211779e-06, "loss": 0.4685, "step": 9202 }, { "epoch": 0.56042383460707, "grad_norm": 1.0236174809892358, "learning_rate": 4.895214369403776e-06, "loss": 0.4718, "step": 9203 }, { "epoch": 0.5604847303839479, "grad_norm": 1.0225241884871008, "learning_rate": 4.895191508155579e-06, "loss": 0.4492, "step": 9204 }, { "epoch": 0.5605456261608257, "grad_norm": 1.0357347467041795, "learning_rate": 4.89516864446721e-06, "loss": 0.4142, "step": 9205 }, { "epoch": 0.5606065219377037, "grad_norm": 0.9919224950111134, "learning_rate": 4.8951457783386935e-06, "loss": 0.4645, "step": 9206 }, { "epoch": 0.5606674177145815, "grad_norm": 0.9554684412947334, "learning_rate": 4.895122909770053e-06, "loss": 0.438, "step": 9207 }, { "epoch": 0.5607283134914594, "grad_norm": 1.0187733047093734, "learning_rate": 4.895100038761309e-06, "loss": 0.3962, "step": 9208 }, { "epoch": 0.5607892092683372, "grad_norm": 0.9787894354582112, "learning_rate": 4.895077165312488e-06, "loss": 0.4363, "step": 9209 }, { "epoch": 0.5608501050452152, "grad_norm": 1.0293381367104961, "learning_rate": 4.895054289423613e-06, "loss": 0.4801, "step": 9210 }, { "epoch": 0.560911000822093, "grad_norm": 0.9436225256888625, "learning_rate": 4.895031411094706e-06, "loss": 0.4357, "step": 9211 }, { "epoch": 0.5609718965989708, "grad_norm": 1.0656958229743962, "learning_rate": 4.895008530325791e-06, "loss": 0.4014, "step": 9212 }, { "epoch": 0.5610327923758487, "grad_norm": 0.930950565132015, "learning_rate": 4.89498564711689e-06, "loss": 0.4206, "step": 9213 }, { "epoch": 0.5610936881527266, "grad_norm": 1.0342713778857962, "learning_rate": 4.8949627614680285e-06, "loss": 0.4191, "step": 9214 }, { "epoch": 0.5611545839296045, "grad_norm": 0.9807338333310092, "learning_rate": 4.894939873379229e-06, "loss": 0.4663, "step": 9215 }, { "epoch": 0.5612154797064823, "grad_norm": 1.0656435899855634, "learning_rate": 4.894916982850513e-06, "loss": 0.4152, "step": 9216 }, { "epoch": 0.5612763754833602, "grad_norm": 0.9962990175875913, "learning_rate": 4.8948940898819065e-06, "loss": 0.4778, "step": 9217 }, { "epoch": 0.5613372712602381, "grad_norm": 0.9947069622049759, "learning_rate": 4.894871194473432e-06, "loss": 0.5099, "step": 9218 }, { "epoch": 0.561398167037116, "grad_norm": 0.9342283487639395, "learning_rate": 4.894848296625112e-06, "loss": 0.4266, "step": 9219 }, { "epoch": 0.5614590628139938, "grad_norm": 1.0115770149892098, "learning_rate": 4.89482539633697e-06, "loss": 0.495, "step": 9220 }, { "epoch": 0.5615199585908717, "grad_norm": 0.9209054638408092, "learning_rate": 4.89480249360903e-06, "loss": 0.5158, "step": 9221 }, { "epoch": 0.5615808543677496, "grad_norm": 0.9681319608101672, "learning_rate": 4.894779588441315e-06, "loss": 0.4839, "step": 9222 }, { "epoch": 0.5616417501446275, "grad_norm": 1.017031712109911, "learning_rate": 4.8947566808338486e-06, "loss": 0.4217, "step": 9223 }, { "epoch": 0.5617026459215053, "grad_norm": 1.0125889161920931, "learning_rate": 4.894733770786654e-06, "loss": 0.3732, "step": 9224 }, { "epoch": 0.5617635416983832, "grad_norm": 1.0030631545066522, "learning_rate": 4.894710858299754e-06, "loss": 0.4864, "step": 9225 }, { "epoch": 0.5618244374752611, "grad_norm": 1.0088736393600553, "learning_rate": 4.894687943373172e-06, "loss": 0.482, "step": 9226 }, { "epoch": 0.561885333252139, "grad_norm": 0.9844573280355334, "learning_rate": 4.894665026006932e-06, "loss": 0.4703, "step": 9227 }, { "epoch": 0.5619462290290168, "grad_norm": 1.0114829866484136, "learning_rate": 4.894642106201057e-06, "loss": 0.4009, "step": 9228 }, { "epoch": 0.5620071248058948, "grad_norm": 1.052082929759196, "learning_rate": 4.894619183955571e-06, "loss": 0.4124, "step": 9229 }, { "epoch": 0.5620680205827726, "grad_norm": 1.0998684313130778, "learning_rate": 4.894596259270496e-06, "loss": 0.4703, "step": 9230 }, { "epoch": 0.5621289163596505, "grad_norm": 1.0697578672731571, "learning_rate": 4.894573332145857e-06, "loss": 0.4139, "step": 9231 }, { "epoch": 0.5621898121365283, "grad_norm": 1.0886014318532153, "learning_rate": 4.894550402581676e-06, "loss": 0.3856, "step": 9232 }, { "epoch": 0.5622507079134063, "grad_norm": 1.0717645525041453, "learning_rate": 4.8945274705779765e-06, "loss": 0.3854, "step": 9233 }, { "epoch": 0.5623116036902841, "grad_norm": 1.0279936894551784, "learning_rate": 4.894504536134783e-06, "loss": 0.4115, "step": 9234 }, { "epoch": 0.562372499467162, "grad_norm": 1.0316514711544158, "learning_rate": 4.894481599252118e-06, "loss": 0.4223, "step": 9235 }, { "epoch": 0.5624333952440398, "grad_norm": 0.9669462050950944, "learning_rate": 4.894458659930004e-06, "loss": 0.4479, "step": 9236 }, { "epoch": 0.5624942910209177, "grad_norm": 1.1113719001641447, "learning_rate": 4.894435718168466e-06, "loss": 0.4075, "step": 9237 }, { "epoch": 0.5625551867977956, "grad_norm": 1.0219541807527854, "learning_rate": 4.8944127739675265e-06, "loss": 0.4532, "step": 9238 }, { "epoch": 0.5626160825746734, "grad_norm": 0.9498780633813266, "learning_rate": 4.8943898273272086e-06, "loss": 0.5314, "step": 9239 }, { "epoch": 0.5626769783515513, "grad_norm": 0.9916928891600824, "learning_rate": 4.8943668782475365e-06, "loss": 0.4872, "step": 9240 }, { "epoch": 0.5627378741284292, "grad_norm": 0.9514294108586197, "learning_rate": 4.894343926728533e-06, "loss": 0.4698, "step": 9241 }, { "epoch": 0.5627987699053071, "grad_norm": 1.1181298876129586, "learning_rate": 4.8943209727702225e-06, "loss": 0.4507, "step": 9242 }, { "epoch": 0.5628596656821849, "grad_norm": 0.967793655564799, "learning_rate": 4.894298016372627e-06, "loss": 0.5547, "step": 9243 }, { "epoch": 0.5629205614590628, "grad_norm": 1.0609366561851938, "learning_rate": 4.89427505753577e-06, "loss": 0.4581, "step": 9244 }, { "epoch": 0.5629814572359407, "grad_norm": 0.9882979443354972, "learning_rate": 4.894252096259676e-06, "loss": 0.4617, "step": 9245 }, { "epoch": 0.5630423530128186, "grad_norm": 1.0496860765395124, "learning_rate": 4.894229132544368e-06, "loss": 0.4751, "step": 9246 }, { "epoch": 0.5631032487896964, "grad_norm": 1.000913071194918, "learning_rate": 4.894206166389869e-06, "loss": 0.504, "step": 9247 }, { "epoch": 0.5631641445665743, "grad_norm": 0.9845006821376394, "learning_rate": 4.894183197796202e-06, "loss": 0.4779, "step": 9248 }, { "epoch": 0.5632250403434522, "grad_norm": 0.9440603191964834, "learning_rate": 4.894160226763391e-06, "loss": 0.4304, "step": 9249 }, { "epoch": 0.5632859361203301, "grad_norm": 0.9191864708142403, "learning_rate": 4.89413725329146e-06, "loss": 0.4759, "step": 9250 }, { "epoch": 0.5633468318972079, "grad_norm": 1.0540545956342742, "learning_rate": 4.894114277380431e-06, "loss": 0.4457, "step": 9251 }, { "epoch": 0.5634077276740858, "grad_norm": 1.1009948507775242, "learning_rate": 4.894091299030328e-06, "loss": 0.4325, "step": 9252 }, { "epoch": 0.5634686234509637, "grad_norm": 0.9912366301685033, "learning_rate": 4.894068318241176e-06, "loss": 0.4604, "step": 9253 }, { "epoch": 0.5635295192278416, "grad_norm": 1.0869753452039386, "learning_rate": 4.894045335012994e-06, "loss": 0.5428, "step": 9254 }, { "epoch": 0.5635904150047194, "grad_norm": 0.9163259771270561, "learning_rate": 4.894022349345811e-06, "loss": 0.4389, "step": 9255 }, { "epoch": 0.5636513107815972, "grad_norm": 1.000040898464272, "learning_rate": 4.893999361239647e-06, "loss": 0.4315, "step": 9256 }, { "epoch": 0.5637122065584752, "grad_norm": 0.9923868884488661, "learning_rate": 4.893976370694526e-06, "loss": 0.431, "step": 9257 }, { "epoch": 0.563773102335353, "grad_norm": 0.9102537989018168, "learning_rate": 4.893953377710472e-06, "loss": 0.4319, "step": 9258 }, { "epoch": 0.5638339981122309, "grad_norm": 0.9406426855516754, "learning_rate": 4.8939303822875086e-06, "loss": 0.5025, "step": 9259 }, { "epoch": 0.5638948938891087, "grad_norm": 1.0512705233242046, "learning_rate": 4.893907384425658e-06, "loss": 0.3899, "step": 9260 }, { "epoch": 0.5639557896659867, "grad_norm": 1.070630819014053, "learning_rate": 4.893884384124945e-06, "loss": 0.4334, "step": 9261 }, { "epoch": 0.5640166854428645, "grad_norm": 0.9875820416341962, "learning_rate": 4.893861381385392e-06, "loss": 0.4852, "step": 9262 }, { "epoch": 0.5640775812197424, "grad_norm": 0.9448607156760805, "learning_rate": 4.8938383762070216e-06, "loss": 0.4648, "step": 9263 }, { "epoch": 0.5641384769966202, "grad_norm": 1.0064683306169757, "learning_rate": 4.8938153685898605e-06, "loss": 0.415, "step": 9264 }, { "epoch": 0.5641993727734982, "grad_norm": 1.0264211133994332, "learning_rate": 4.893792358533929e-06, "loss": 0.4249, "step": 9265 }, { "epoch": 0.564260268550376, "grad_norm": 1.0181152766562214, "learning_rate": 4.893769346039251e-06, "loss": 0.3988, "step": 9266 }, { "epoch": 0.5643211643272539, "grad_norm": 0.9964228120028205, "learning_rate": 4.893746331105851e-06, "loss": 0.5014, "step": 9267 }, { "epoch": 0.5643820601041318, "grad_norm": 1.0329048200335953, "learning_rate": 4.893723313733753e-06, "loss": 0.4242, "step": 9268 }, { "epoch": 0.5644429558810097, "grad_norm": 1.0781313999289748, "learning_rate": 4.893700293922978e-06, "loss": 0.3801, "step": 9269 }, { "epoch": 0.5645038516578875, "grad_norm": 1.098398182774067, "learning_rate": 4.893677271673552e-06, "loss": 0.4444, "step": 9270 }, { "epoch": 0.5645647474347654, "grad_norm": 1.0224626049828103, "learning_rate": 4.893654246985496e-06, "loss": 0.4468, "step": 9271 }, { "epoch": 0.5646256432116433, "grad_norm": 1.0088038860803554, "learning_rate": 4.893631219858836e-06, "loss": 0.4858, "step": 9272 }, { "epoch": 0.5646865389885212, "grad_norm": 0.9452083254050637, "learning_rate": 4.893608190293595e-06, "loss": 0.4605, "step": 9273 }, { "epoch": 0.564747434765399, "grad_norm": 1.1143116151210435, "learning_rate": 4.893585158289794e-06, "loss": 0.4818, "step": 9274 }, { "epoch": 0.5648083305422769, "grad_norm": 1.0936321283554393, "learning_rate": 4.89356212384746e-06, "loss": 0.4603, "step": 9275 }, { "epoch": 0.5648692263191548, "grad_norm": 0.9803366953254639, "learning_rate": 4.893539086966613e-06, "loss": 0.444, "step": 9276 }, { "epoch": 0.5649301220960327, "grad_norm": 1.039015474229476, "learning_rate": 4.893516047647279e-06, "loss": 0.398, "step": 9277 }, { "epoch": 0.5649910178729105, "grad_norm": 0.9375510736647629, "learning_rate": 4.89349300588948e-06, "loss": 0.5208, "step": 9278 }, { "epoch": 0.5650519136497884, "grad_norm": 0.9937385852583618, "learning_rate": 4.893469961693241e-06, "loss": 0.412, "step": 9279 }, { "epoch": 0.5651128094266663, "grad_norm": 1.0287556041173167, "learning_rate": 4.893446915058584e-06, "loss": 0.4346, "step": 9280 }, { "epoch": 0.5651737052035442, "grad_norm": 1.0172706539222098, "learning_rate": 4.893423865985532e-06, "loss": 0.4588, "step": 9281 }, { "epoch": 0.565234600980422, "grad_norm": 1.1697594581026671, "learning_rate": 4.893400814474112e-06, "loss": 0.4046, "step": 9282 }, { "epoch": 0.5652954967572998, "grad_norm": 1.0421036350722908, "learning_rate": 4.893377760524343e-06, "loss": 0.421, "step": 9283 }, { "epoch": 0.5653563925341778, "grad_norm": 0.9357465242971182, "learning_rate": 4.893354704136251e-06, "loss": 0.4213, "step": 9284 }, { "epoch": 0.5654172883110556, "grad_norm": 0.979915391961865, "learning_rate": 4.89333164530986e-06, "loss": 0.459, "step": 9285 }, { "epoch": 0.5654781840879335, "grad_norm": 1.0899135487733107, "learning_rate": 4.893308584045191e-06, "loss": 0.4142, "step": 9286 }, { "epoch": 0.5655390798648113, "grad_norm": 1.002771675473478, "learning_rate": 4.893285520342269e-06, "loss": 0.4067, "step": 9287 }, { "epoch": 0.5655999756416893, "grad_norm": 1.0180164413520179, "learning_rate": 4.893262454201118e-06, "loss": 0.4056, "step": 9288 }, { "epoch": 0.5656608714185671, "grad_norm": 0.9989754739999211, "learning_rate": 4.893239385621762e-06, "loss": 0.4696, "step": 9289 }, { "epoch": 0.565721767195445, "grad_norm": 1.0716116889036513, "learning_rate": 4.893216314604222e-06, "loss": 0.384, "step": 9290 }, { "epoch": 0.5657826629723228, "grad_norm": 0.9698667439719804, "learning_rate": 4.893193241148524e-06, "loss": 0.4502, "step": 9291 }, { "epoch": 0.5658435587492008, "grad_norm": 1.0069437505457348, "learning_rate": 4.893170165254689e-06, "loss": 0.5045, "step": 9292 }, { "epoch": 0.5659044545260786, "grad_norm": 0.9965539113359894, "learning_rate": 4.893147086922744e-06, "loss": 0.4113, "step": 9293 }, { "epoch": 0.5659653503029565, "grad_norm": 0.9234872763686947, "learning_rate": 4.893124006152709e-06, "loss": 0.4944, "step": 9294 }, { "epoch": 0.5660262460798343, "grad_norm": 1.0198269785723681, "learning_rate": 4.893100922944609e-06, "loss": 0.443, "step": 9295 }, { "epoch": 0.5660871418567123, "grad_norm": 0.946106712061539, "learning_rate": 4.893077837298468e-06, "loss": 0.4254, "step": 9296 }, { "epoch": 0.5661480376335901, "grad_norm": 1.0307156946485607, "learning_rate": 4.893054749214309e-06, "loss": 0.3698, "step": 9297 }, { "epoch": 0.566208933410468, "grad_norm": 0.949434150052729, "learning_rate": 4.893031658692155e-06, "loss": 0.4088, "step": 9298 }, { "epoch": 0.5662698291873458, "grad_norm": 1.095014796261569, "learning_rate": 4.893008565732031e-06, "loss": 0.4759, "step": 9299 }, { "epoch": 0.5663307249642238, "grad_norm": 1.0537214698858768, "learning_rate": 4.892985470333959e-06, "loss": 0.4216, "step": 9300 }, { "epoch": 0.5663916207411016, "grad_norm": 1.000979947964106, "learning_rate": 4.892962372497965e-06, "loss": 0.4421, "step": 9301 }, { "epoch": 0.5664525165179795, "grad_norm": 1.0209239583492644, "learning_rate": 4.8929392722240685e-06, "loss": 0.403, "step": 9302 }, { "epoch": 0.5665134122948573, "grad_norm": 0.9328117304237262, "learning_rate": 4.892916169512296e-06, "loss": 0.4443, "step": 9303 }, { "epoch": 0.5665743080717353, "grad_norm": 1.0674417387785506, "learning_rate": 4.89289306436267e-06, "loss": 0.4276, "step": 9304 }, { "epoch": 0.5666352038486131, "grad_norm": 1.050475270178406, "learning_rate": 4.892869956775215e-06, "loss": 0.379, "step": 9305 }, { "epoch": 0.566696099625491, "grad_norm": 1.0011887520448552, "learning_rate": 4.892846846749953e-06, "loss": 0.3922, "step": 9306 }, { "epoch": 0.5667569954023688, "grad_norm": 0.9635743263583344, "learning_rate": 4.892823734286909e-06, "loss": 0.5407, "step": 9307 }, { "epoch": 0.5668178911792467, "grad_norm": 1.0025158343756477, "learning_rate": 4.892800619386105e-06, "loss": 0.4725, "step": 9308 }, { "epoch": 0.5668787869561246, "grad_norm": 0.9371038275850317, "learning_rate": 4.892777502047567e-06, "loss": 0.39, "step": 9309 }, { "epoch": 0.5669396827330024, "grad_norm": 1.161988213199571, "learning_rate": 4.8927543822713155e-06, "loss": 0.4594, "step": 9310 }, { "epoch": 0.5670005785098804, "grad_norm": 0.995419032109646, "learning_rate": 4.892731260057376e-06, "loss": 0.4825, "step": 9311 }, { "epoch": 0.5670614742867582, "grad_norm": 1.027356923893648, "learning_rate": 4.892708135405771e-06, "loss": 0.4423, "step": 9312 }, { "epoch": 0.5671223700636361, "grad_norm": 1.088936779366635, "learning_rate": 4.8926850083165265e-06, "loss": 0.4186, "step": 9313 }, { "epoch": 0.5671832658405139, "grad_norm": 0.9495010892779436, "learning_rate": 4.892661878789663e-06, "loss": 0.4208, "step": 9314 }, { "epoch": 0.5672441616173919, "grad_norm": 0.9624709938820786, "learning_rate": 4.892638746825206e-06, "loss": 0.4823, "step": 9315 }, { "epoch": 0.5673050573942697, "grad_norm": 0.9881182354076107, "learning_rate": 4.8926156124231775e-06, "loss": 0.4377, "step": 9316 }, { "epoch": 0.5673659531711476, "grad_norm": 1.0137694864429754, "learning_rate": 4.892592475583603e-06, "loss": 0.3835, "step": 9317 }, { "epoch": 0.5674268489480254, "grad_norm": 0.9076523005065854, "learning_rate": 4.892569336306504e-06, "loss": 0.4321, "step": 9318 }, { "epoch": 0.5674877447249034, "grad_norm": 1.0160675465684528, "learning_rate": 4.892546194591906e-06, "loss": 0.4056, "step": 9319 }, { "epoch": 0.5675486405017812, "grad_norm": 1.0299267715099873, "learning_rate": 4.892523050439832e-06, "loss": 0.5276, "step": 9320 }, { "epoch": 0.5676095362786591, "grad_norm": 1.1022294750528137, "learning_rate": 4.892499903850304e-06, "loss": 0.4135, "step": 9321 }, { "epoch": 0.5676704320555369, "grad_norm": 1.0056878788768921, "learning_rate": 4.892476754823347e-06, "loss": 0.3772, "step": 9322 }, { "epoch": 0.5677313278324149, "grad_norm": 0.9461576960051912, "learning_rate": 4.892453603358984e-06, "loss": 0.4283, "step": 9323 }, { "epoch": 0.5677922236092927, "grad_norm": 1.026965268070405, "learning_rate": 4.892430449457241e-06, "loss": 0.4433, "step": 9324 }, { "epoch": 0.5678531193861706, "grad_norm": 0.9927933339482645, "learning_rate": 4.892407293118137e-06, "loss": 0.4301, "step": 9325 }, { "epoch": 0.5679140151630484, "grad_norm": 0.9482284125028106, "learning_rate": 4.8923841343417e-06, "loss": 0.4396, "step": 9326 }, { "epoch": 0.5679749109399264, "grad_norm": 0.99862928704095, "learning_rate": 4.892360973127952e-06, "loss": 0.4783, "step": 9327 }, { "epoch": 0.5680358067168042, "grad_norm": 0.9220984233509822, "learning_rate": 4.892337809476916e-06, "loss": 0.4342, "step": 9328 }, { "epoch": 0.568096702493682, "grad_norm": 1.0796461464868563, "learning_rate": 4.892314643388615e-06, "loss": 0.3529, "step": 9329 }, { "epoch": 0.5681575982705599, "grad_norm": 1.056231427204922, "learning_rate": 4.892291474863075e-06, "loss": 0.4579, "step": 9330 }, { "epoch": 0.5682184940474378, "grad_norm": 1.0069251053652046, "learning_rate": 4.892268303900317e-06, "loss": 0.466, "step": 9331 }, { "epoch": 0.5682793898243157, "grad_norm": 1.0577959813253968, "learning_rate": 4.892245130500366e-06, "loss": 0.4321, "step": 9332 }, { "epoch": 0.5683402856011935, "grad_norm": 1.0858142254009773, "learning_rate": 4.892221954663246e-06, "loss": 0.3468, "step": 9333 }, { "epoch": 0.5684011813780714, "grad_norm": 0.9993206505096119, "learning_rate": 4.89219877638898e-06, "loss": 0.4546, "step": 9334 }, { "epoch": 0.5684620771549493, "grad_norm": 0.995075924423158, "learning_rate": 4.892175595677591e-06, "loss": 0.4351, "step": 9335 }, { "epoch": 0.5685229729318272, "grad_norm": 1.1160441827216658, "learning_rate": 4.8921524125291035e-06, "loss": 0.4229, "step": 9336 }, { "epoch": 0.568583868708705, "grad_norm": 1.0419350254324737, "learning_rate": 4.8921292269435406e-06, "loss": 0.3712, "step": 9337 }, { "epoch": 0.5686447644855829, "grad_norm": 0.9349486240771577, "learning_rate": 4.892106038920927e-06, "loss": 0.431, "step": 9338 }, { "epoch": 0.5687056602624608, "grad_norm": 1.0513498425223342, "learning_rate": 4.892082848461285e-06, "loss": 0.4563, "step": 9339 }, { "epoch": 0.5687665560393387, "grad_norm": 1.0438248237173668, "learning_rate": 4.892059655564638e-06, "loss": 0.4363, "step": 9340 }, { "epoch": 0.5688274518162165, "grad_norm": 0.9580903407759845, "learning_rate": 4.892036460231011e-06, "loss": 0.5285, "step": 9341 }, { "epoch": 0.5688883475930944, "grad_norm": 1.019131703794979, "learning_rate": 4.892013262460428e-06, "loss": 0.5092, "step": 9342 }, { "epoch": 0.5689492433699723, "grad_norm": 0.979360238639532, "learning_rate": 4.89199006225291e-06, "loss": 0.4223, "step": 9343 }, { "epoch": 0.5690101391468502, "grad_norm": 0.938651213673721, "learning_rate": 4.891966859608483e-06, "loss": 0.4531, "step": 9344 }, { "epoch": 0.569071034923728, "grad_norm": 1.0210493622098138, "learning_rate": 4.8919436545271695e-06, "loss": 0.4286, "step": 9345 }, { "epoch": 0.5691319307006059, "grad_norm": 0.9972716101113916, "learning_rate": 4.8919204470089945e-06, "loss": 0.4126, "step": 9346 }, { "epoch": 0.5691928264774838, "grad_norm": 0.9895442692773325, "learning_rate": 4.8918972370539795e-06, "loss": 0.4093, "step": 9347 }, { "epoch": 0.5692537222543617, "grad_norm": 0.9790525658573141, "learning_rate": 4.89187402466215e-06, "loss": 0.539, "step": 9348 }, { "epoch": 0.5693146180312395, "grad_norm": 1.0389361625603417, "learning_rate": 4.891850809833529e-06, "loss": 0.4462, "step": 9349 }, { "epoch": 0.5693755138081175, "grad_norm": 1.1346266983198365, "learning_rate": 4.891827592568139e-06, "loss": 0.492, "step": 9350 }, { "epoch": 0.5694364095849953, "grad_norm": 0.8964758603756409, "learning_rate": 4.891804372866006e-06, "loss": 0.5095, "step": 9351 }, { "epoch": 0.5694973053618732, "grad_norm": 1.0097963462453667, "learning_rate": 4.891781150727152e-06, "loss": 0.5049, "step": 9352 }, { "epoch": 0.569558201138751, "grad_norm": 1.074657447721578, "learning_rate": 4.8917579261516015e-06, "loss": 0.4115, "step": 9353 }, { "epoch": 0.569619096915629, "grad_norm": 0.9674433399725826, "learning_rate": 4.8917346991393775e-06, "loss": 0.47, "step": 9354 }, { "epoch": 0.5696799926925068, "grad_norm": 1.081549768918777, "learning_rate": 4.891711469690505e-06, "loss": 0.4303, "step": 9355 }, { "epoch": 0.5697408884693846, "grad_norm": 1.0300639616936433, "learning_rate": 4.8916882378050045e-06, "loss": 0.4795, "step": 9356 }, { "epoch": 0.5698017842462625, "grad_norm": 1.0461974092055903, "learning_rate": 4.891665003482903e-06, "loss": 0.4676, "step": 9357 }, { "epoch": 0.5698626800231404, "grad_norm": 0.9117033151989818, "learning_rate": 4.8916417667242225e-06, "loss": 0.4511, "step": 9358 }, { "epoch": 0.5699235758000183, "grad_norm": 1.0400546399701118, "learning_rate": 4.891618527528987e-06, "loss": 0.4423, "step": 9359 }, { "epoch": 0.5699844715768961, "grad_norm": 1.035567168174677, "learning_rate": 4.89159528589722e-06, "loss": 0.4544, "step": 9360 }, { "epoch": 0.570045367353774, "grad_norm": 0.9491706458038978, "learning_rate": 4.891572041828947e-06, "loss": 0.4839, "step": 9361 }, { "epoch": 0.5701062631306519, "grad_norm": 0.9409398322658107, "learning_rate": 4.891548795324189e-06, "loss": 0.5077, "step": 9362 }, { "epoch": 0.5701671589075298, "grad_norm": 0.9190691268308558, "learning_rate": 4.8915255463829705e-06, "loss": 0.4952, "step": 9363 }, { "epoch": 0.5702280546844076, "grad_norm": 1.0295286657597031, "learning_rate": 4.891502295005316e-06, "loss": 0.5254, "step": 9364 }, { "epoch": 0.5702889504612855, "grad_norm": 1.0266326492758422, "learning_rate": 4.891479041191248e-06, "loss": 0.4706, "step": 9365 }, { "epoch": 0.5703498462381634, "grad_norm": 0.9840561492451313, "learning_rate": 4.891455784940792e-06, "loss": 0.4035, "step": 9366 }, { "epoch": 0.5704107420150413, "grad_norm": 1.0304100859162493, "learning_rate": 4.89143252625397e-06, "loss": 0.3962, "step": 9367 }, { "epoch": 0.5704716377919191, "grad_norm": 0.9387973579336008, "learning_rate": 4.891409265130806e-06, "loss": 0.5336, "step": 9368 }, { "epoch": 0.570532533568797, "grad_norm": 1.072806035574443, "learning_rate": 4.891386001571324e-06, "loss": 0.4208, "step": 9369 }, { "epoch": 0.5705934293456749, "grad_norm": 0.9879807185450689, "learning_rate": 4.891362735575547e-06, "loss": 0.4602, "step": 9370 }, { "epoch": 0.5706543251225528, "grad_norm": 1.0295454185870905, "learning_rate": 4.891339467143501e-06, "loss": 0.3805, "step": 9371 }, { "epoch": 0.5707152208994306, "grad_norm": 1.0100978492403467, "learning_rate": 4.891316196275208e-06, "loss": 0.4196, "step": 9372 }, { "epoch": 0.5707761166763085, "grad_norm": 1.00116284345243, "learning_rate": 4.891292922970691e-06, "loss": 0.3986, "step": 9373 }, { "epoch": 0.5708370124531864, "grad_norm": 1.1213232355307516, "learning_rate": 4.891269647229974e-06, "loss": 0.3944, "step": 9374 }, { "epoch": 0.5708979082300643, "grad_norm": 1.007970880255596, "learning_rate": 4.891246369053082e-06, "loss": 0.4237, "step": 9375 }, { "epoch": 0.5709588040069421, "grad_norm": 1.0138152620008793, "learning_rate": 4.891223088440038e-06, "loss": 0.3982, "step": 9376 }, { "epoch": 0.57101969978382, "grad_norm": 0.9254760511372696, "learning_rate": 4.891199805390865e-06, "loss": 0.4721, "step": 9377 }, { "epoch": 0.5710805955606979, "grad_norm": 1.0186114564020974, "learning_rate": 4.891176519905587e-06, "loss": 0.535, "step": 9378 }, { "epoch": 0.5711414913375757, "grad_norm": 0.9780655347876226, "learning_rate": 4.8911532319842296e-06, "loss": 0.4462, "step": 9379 }, { "epoch": 0.5712023871144536, "grad_norm": 0.8784390460681749, "learning_rate": 4.891129941626814e-06, "loss": 0.4209, "step": 9380 }, { "epoch": 0.5712632828913314, "grad_norm": 1.0944610010173474, "learning_rate": 4.891106648833365e-06, "loss": 0.4497, "step": 9381 }, { "epoch": 0.5713241786682094, "grad_norm": 1.0517239827012022, "learning_rate": 4.891083353603906e-06, "loss": 0.4361, "step": 9382 }, { "epoch": 0.5713850744450872, "grad_norm": 0.9732294502226145, "learning_rate": 4.891060055938462e-06, "loss": 0.4378, "step": 9383 }, { "epoch": 0.5714459702219651, "grad_norm": 1.0256177988412967, "learning_rate": 4.891036755837055e-06, "loss": 0.384, "step": 9384 }, { "epoch": 0.5715068659988429, "grad_norm": 1.0859048103592106, "learning_rate": 4.89101345329971e-06, "loss": 0.3922, "step": 9385 }, { "epoch": 0.5715677617757209, "grad_norm": 1.0507793496705617, "learning_rate": 4.890990148326449e-06, "loss": 0.4078, "step": 9386 }, { "epoch": 0.5716286575525987, "grad_norm": 0.9804091475128045, "learning_rate": 4.890966840917298e-06, "loss": 0.45, "step": 9387 }, { "epoch": 0.5716895533294766, "grad_norm": 1.06034360153772, "learning_rate": 4.8909435310722795e-06, "loss": 0.4502, "step": 9388 }, { "epoch": 0.5717504491063544, "grad_norm": 1.0032823468275116, "learning_rate": 4.890920218791417e-06, "loss": 0.4319, "step": 9389 }, { "epoch": 0.5718113448832324, "grad_norm": 1.063998500535485, "learning_rate": 4.890896904074735e-06, "loss": 0.4135, "step": 9390 }, { "epoch": 0.5718722406601102, "grad_norm": 0.9945714399187732, "learning_rate": 4.890873586922257e-06, "loss": 0.4266, "step": 9391 }, { "epoch": 0.5719331364369881, "grad_norm": 1.0007749908417345, "learning_rate": 4.8908502673340064e-06, "loss": 0.4285, "step": 9392 }, { "epoch": 0.571994032213866, "grad_norm": 1.041974637640333, "learning_rate": 4.890826945310008e-06, "loss": 0.469, "step": 9393 }, { "epoch": 0.5720549279907439, "grad_norm": 0.9751626826193333, "learning_rate": 4.890803620850284e-06, "loss": 0.4102, "step": 9394 }, { "epoch": 0.5721158237676217, "grad_norm": 0.9829842839254317, "learning_rate": 4.890780293954859e-06, "loss": 0.4274, "step": 9395 }, { "epoch": 0.5721767195444996, "grad_norm": 1.0388923425220666, "learning_rate": 4.890756964623758e-06, "loss": 0.4584, "step": 9396 }, { "epoch": 0.5722376153213775, "grad_norm": 0.9036341463345481, "learning_rate": 4.890733632857001e-06, "loss": 0.4971, "step": 9397 }, { "epoch": 0.5722985110982554, "grad_norm": 1.0581726435200098, "learning_rate": 4.890710298654616e-06, "loss": 0.4288, "step": 9398 }, { "epoch": 0.5723594068751332, "grad_norm": 0.9276120966611262, "learning_rate": 4.890686962016625e-06, "loss": 0.4108, "step": 9399 }, { "epoch": 0.572420302652011, "grad_norm": 1.036245113803797, "learning_rate": 4.890663622943052e-06, "loss": 0.4072, "step": 9400 }, { "epoch": 0.572481198428889, "grad_norm": 0.9962643495755602, "learning_rate": 4.890640281433921e-06, "loss": 0.4308, "step": 9401 }, { "epoch": 0.5725420942057668, "grad_norm": 1.088250977377567, "learning_rate": 4.890616937489254e-06, "loss": 0.4474, "step": 9402 }, { "epoch": 0.5726029899826447, "grad_norm": 1.0175181258214732, "learning_rate": 4.890593591109077e-06, "loss": 0.3936, "step": 9403 }, { "epoch": 0.5726638857595225, "grad_norm": 1.062529999584938, "learning_rate": 4.890570242293413e-06, "loss": 0.4619, "step": 9404 }, { "epoch": 0.5727247815364005, "grad_norm": 1.01667239588101, "learning_rate": 4.890546891042285e-06, "loss": 0.4063, "step": 9405 }, { "epoch": 0.5727856773132783, "grad_norm": 0.9828486477963094, "learning_rate": 4.8905235373557184e-06, "loss": 0.5123, "step": 9406 }, { "epoch": 0.5728465730901562, "grad_norm": 0.9958103390003955, "learning_rate": 4.890500181233735e-06, "loss": 0.4378, "step": 9407 }, { "epoch": 0.572907468867034, "grad_norm": 1.0215709589800954, "learning_rate": 4.890476822676361e-06, "loss": 0.4505, "step": 9408 }, { "epoch": 0.572968364643912, "grad_norm": 1.0249922626862609, "learning_rate": 4.890453461683619e-06, "loss": 0.4104, "step": 9409 }, { "epoch": 0.5730292604207898, "grad_norm": 0.974359171049537, "learning_rate": 4.8904300982555316e-06, "loss": 0.4453, "step": 9410 }, { "epoch": 0.5730901561976677, "grad_norm": 1.0153651105108288, "learning_rate": 4.890406732392125e-06, "loss": 0.4571, "step": 9411 }, { "epoch": 0.5731510519745455, "grad_norm": 0.9387794617229666, "learning_rate": 4.89038336409342e-06, "loss": 0.4101, "step": 9412 }, { "epoch": 0.5732119477514235, "grad_norm": 0.9941092864847351, "learning_rate": 4.890359993359443e-06, "loss": 0.4498, "step": 9413 }, { "epoch": 0.5732728435283013, "grad_norm": 0.9617038285386772, "learning_rate": 4.890336620190217e-06, "loss": 0.4838, "step": 9414 }, { "epoch": 0.5733337393051792, "grad_norm": 0.9727242958458981, "learning_rate": 4.890313244585766e-06, "loss": 0.4005, "step": 9415 }, { "epoch": 0.573394635082057, "grad_norm": 1.0427655919653431, "learning_rate": 4.8902898665461125e-06, "loss": 0.4195, "step": 9416 }, { "epoch": 0.573455530858935, "grad_norm": 1.0375513711347377, "learning_rate": 4.890266486071283e-06, "loss": 0.5029, "step": 9417 }, { "epoch": 0.5735164266358128, "grad_norm": 0.9682331251637882, "learning_rate": 4.890243103161298e-06, "loss": 0.4103, "step": 9418 }, { "epoch": 0.5735773224126907, "grad_norm": 0.9666975351452883, "learning_rate": 4.8902197178161845e-06, "loss": 0.4114, "step": 9419 }, { "epoch": 0.5736382181895685, "grad_norm": 1.109028610983371, "learning_rate": 4.890196330035964e-06, "loss": 0.4558, "step": 9420 }, { "epoch": 0.5736991139664465, "grad_norm": 0.9718241511622159, "learning_rate": 4.890172939820662e-06, "loss": 0.4787, "step": 9421 }, { "epoch": 0.5737600097433243, "grad_norm": 1.061501390195266, "learning_rate": 4.8901495471703005e-06, "loss": 0.3511, "step": 9422 }, { "epoch": 0.5738209055202022, "grad_norm": 0.9577262139298655, "learning_rate": 4.890126152084905e-06, "loss": 0.467, "step": 9423 }, { "epoch": 0.57388180129708, "grad_norm": 1.040346023414469, "learning_rate": 4.890102754564499e-06, "loss": 0.4605, "step": 9424 }, { "epoch": 0.573942697073958, "grad_norm": 1.0456802386771595, "learning_rate": 4.890079354609105e-06, "loss": 0.3965, "step": 9425 }, { "epoch": 0.5740035928508358, "grad_norm": 1.0007008208854393, "learning_rate": 4.890055952218748e-06, "loss": 0.3868, "step": 9426 }, { "epoch": 0.5740644886277136, "grad_norm": 1.0147868314791175, "learning_rate": 4.890032547393452e-06, "loss": 0.4052, "step": 9427 }, { "epoch": 0.5741253844045915, "grad_norm": 0.9610362534680947, "learning_rate": 4.89000914013324e-06, "loss": 0.4266, "step": 9428 }, { "epoch": 0.5741862801814694, "grad_norm": 0.963121683006829, "learning_rate": 4.889985730438137e-06, "loss": 0.4398, "step": 9429 }, { "epoch": 0.5742471759583473, "grad_norm": 1.0453673853655177, "learning_rate": 4.889962318308167e-06, "loss": 0.4383, "step": 9430 }, { "epoch": 0.5743080717352251, "grad_norm": 0.9877453081841948, "learning_rate": 4.889938903743352e-06, "loss": 0.4682, "step": 9431 }, { "epoch": 0.5743689675121031, "grad_norm": 0.9848722756537913, "learning_rate": 4.889915486743717e-06, "loss": 0.5206, "step": 9432 }, { "epoch": 0.5744298632889809, "grad_norm": 0.9961937331662283, "learning_rate": 4.889892067309286e-06, "loss": 0.5019, "step": 9433 }, { "epoch": 0.5744907590658588, "grad_norm": 0.9791726314271921, "learning_rate": 4.889868645440082e-06, "loss": 0.4251, "step": 9434 }, { "epoch": 0.5745516548427366, "grad_norm": 1.0590573915239012, "learning_rate": 4.889845221136131e-06, "loss": 0.3725, "step": 9435 }, { "epoch": 0.5746125506196146, "grad_norm": 0.9457971492843854, "learning_rate": 4.889821794397454e-06, "loss": 0.4957, "step": 9436 }, { "epoch": 0.5746734463964924, "grad_norm": 1.046761747446296, "learning_rate": 4.889798365224077e-06, "loss": 0.4267, "step": 9437 }, { "epoch": 0.5747343421733703, "grad_norm": 1.1022440323606468, "learning_rate": 4.889774933616023e-06, "loss": 0.3755, "step": 9438 }, { "epoch": 0.5747952379502481, "grad_norm": 0.969851307196148, "learning_rate": 4.889751499573316e-06, "loss": 0.4024, "step": 9439 }, { "epoch": 0.5748561337271261, "grad_norm": 1.0705654469196284, "learning_rate": 4.889728063095979e-06, "loss": 0.4183, "step": 9440 }, { "epoch": 0.5749170295040039, "grad_norm": 0.9983234581543022, "learning_rate": 4.889704624184037e-06, "loss": 0.4054, "step": 9441 }, { "epoch": 0.5749779252808818, "grad_norm": 1.0046149545956458, "learning_rate": 4.8896811828375145e-06, "loss": 0.4584, "step": 9442 }, { "epoch": 0.5750388210577596, "grad_norm": 0.9553908000739708, "learning_rate": 4.889657739056434e-06, "loss": 0.4423, "step": 9443 }, { "epoch": 0.5750997168346376, "grad_norm": 0.9464377952207956, "learning_rate": 4.88963429284082e-06, "loss": 0.4232, "step": 9444 }, { "epoch": 0.5751606126115154, "grad_norm": 1.0000560121004485, "learning_rate": 4.889610844190696e-06, "loss": 0.4552, "step": 9445 }, { "epoch": 0.5752215083883933, "grad_norm": 0.9316433835092195, "learning_rate": 4.889587393106086e-06, "loss": 0.436, "step": 9446 }, { "epoch": 0.5752824041652711, "grad_norm": 0.9523645178214907, "learning_rate": 4.8895639395870145e-06, "loss": 0.4416, "step": 9447 }, { "epoch": 0.5753432999421491, "grad_norm": 1.0120548848684785, "learning_rate": 4.889540483633504e-06, "loss": 0.4356, "step": 9448 }, { "epoch": 0.5754041957190269, "grad_norm": 0.9617531494139145, "learning_rate": 4.889517025245581e-06, "loss": 0.4788, "step": 9449 }, { "epoch": 0.5754650914959047, "grad_norm": 0.9336247930856026, "learning_rate": 4.889493564423267e-06, "loss": 0.4876, "step": 9450 }, { "epoch": 0.5755259872727826, "grad_norm": 0.9926712782253093, "learning_rate": 4.889470101166586e-06, "loss": 0.4757, "step": 9451 }, { "epoch": 0.5755868830496605, "grad_norm": 0.982883776448768, "learning_rate": 4.889446635475563e-06, "loss": 0.4856, "step": 9452 }, { "epoch": 0.5756477788265384, "grad_norm": 0.9633164197418969, "learning_rate": 4.889423167350221e-06, "loss": 0.4568, "step": 9453 }, { "epoch": 0.5757086746034162, "grad_norm": 1.042033558264612, "learning_rate": 4.889399696790586e-06, "loss": 0.4175, "step": 9454 }, { "epoch": 0.5757695703802941, "grad_norm": 1.064727900995122, "learning_rate": 4.8893762237966785e-06, "loss": 0.4366, "step": 9455 }, { "epoch": 0.575830466157172, "grad_norm": 1.101871938790798, "learning_rate": 4.889352748368524e-06, "loss": 0.4363, "step": 9456 }, { "epoch": 0.5758913619340499, "grad_norm": 1.0089922789288257, "learning_rate": 4.8893292705061475e-06, "loss": 0.4534, "step": 9457 }, { "epoch": 0.5759522577109277, "grad_norm": 0.8833144954555872, "learning_rate": 4.889305790209573e-06, "loss": 0.4537, "step": 9458 }, { "epoch": 0.5760131534878056, "grad_norm": 1.0034581108480702, "learning_rate": 4.889282307478822e-06, "loss": 0.4319, "step": 9459 }, { "epoch": 0.5760740492646835, "grad_norm": 0.9950050001768067, "learning_rate": 4.8892588223139196e-06, "loss": 0.4125, "step": 9460 }, { "epoch": 0.5761349450415614, "grad_norm": 0.8885288628939704, "learning_rate": 4.889235334714891e-06, "loss": 0.4657, "step": 9461 }, { "epoch": 0.5761958408184392, "grad_norm": 0.9723149258675022, "learning_rate": 4.889211844681758e-06, "loss": 0.4456, "step": 9462 }, { "epoch": 0.5762567365953171, "grad_norm": 0.9854556948843798, "learning_rate": 4.8891883522145465e-06, "loss": 0.446, "step": 9463 }, { "epoch": 0.576317632372195, "grad_norm": 0.9936267408885218, "learning_rate": 4.88916485731328e-06, "loss": 0.4907, "step": 9464 }, { "epoch": 0.5763785281490729, "grad_norm": 0.9972876534685613, "learning_rate": 4.889141359977981e-06, "loss": 0.3812, "step": 9465 }, { "epoch": 0.5764394239259507, "grad_norm": 1.02677190201874, "learning_rate": 4.889117860208675e-06, "loss": 0.42, "step": 9466 }, { "epoch": 0.5765003197028286, "grad_norm": 1.0324181362975626, "learning_rate": 4.889094358005385e-06, "loss": 0.4748, "step": 9467 }, { "epoch": 0.5765612154797065, "grad_norm": 1.0607436910306827, "learning_rate": 4.889070853368136e-06, "loss": 0.5091, "step": 9468 }, { "epoch": 0.5766221112565844, "grad_norm": 0.9931317746305519, "learning_rate": 4.889047346296951e-06, "loss": 0.4342, "step": 9469 }, { "epoch": 0.5766830070334622, "grad_norm": 1.037751904395643, "learning_rate": 4.8890238367918544e-06, "loss": 0.3941, "step": 9470 }, { "epoch": 0.57674390281034, "grad_norm": 0.9982655460030023, "learning_rate": 4.889000324852869e-06, "loss": 0.4624, "step": 9471 }, { "epoch": 0.576804798587218, "grad_norm": 1.0241172467201318, "learning_rate": 4.888976810480021e-06, "loss": 0.4279, "step": 9472 }, { "epoch": 0.5768656943640958, "grad_norm": 0.9874885288221167, "learning_rate": 4.888953293673332e-06, "loss": 0.3917, "step": 9473 }, { "epoch": 0.5769265901409737, "grad_norm": 0.9644727481963272, "learning_rate": 4.888929774432828e-06, "loss": 0.4289, "step": 9474 }, { "epoch": 0.5769874859178517, "grad_norm": 1.0834286509653337, "learning_rate": 4.888906252758531e-06, "loss": 0.4696, "step": 9475 }, { "epoch": 0.5770483816947295, "grad_norm": 0.9553082723188735, "learning_rate": 4.888882728650467e-06, "loss": 0.3981, "step": 9476 }, { "epoch": 0.5771092774716073, "grad_norm": 1.0228748443589197, "learning_rate": 4.888859202108658e-06, "loss": 0.4506, "step": 9477 }, { "epoch": 0.5771701732484852, "grad_norm": 0.9300157458403906, "learning_rate": 4.88883567313313e-06, "loss": 0.504, "step": 9478 }, { "epoch": 0.5772310690253631, "grad_norm": 1.1160620842695181, "learning_rate": 4.888812141723904e-06, "loss": 0.4458, "step": 9479 }, { "epoch": 0.577291964802241, "grad_norm": 1.0143260337135989, "learning_rate": 4.888788607881008e-06, "loss": 0.4829, "step": 9480 }, { "epoch": 0.5773528605791188, "grad_norm": 1.0129376003685493, "learning_rate": 4.888765071604464e-06, "loss": 0.4496, "step": 9481 }, { "epoch": 0.5774137563559967, "grad_norm": 1.0512172305647327, "learning_rate": 4.888741532894294e-06, "loss": 0.4025, "step": 9482 }, { "epoch": 0.5774746521328746, "grad_norm": 1.056897816132524, "learning_rate": 4.888717991750525e-06, "loss": 0.3918, "step": 9483 }, { "epoch": 0.5775355479097525, "grad_norm": 0.917425333692416, "learning_rate": 4.888694448173179e-06, "loss": 0.4676, "step": 9484 }, { "epoch": 0.5775964436866303, "grad_norm": 1.0697161907723725, "learning_rate": 4.8886709021622805e-06, "loss": 0.4622, "step": 9485 }, { "epoch": 0.5776573394635082, "grad_norm": 1.0384099376080302, "learning_rate": 4.888647353717855e-06, "loss": 0.43, "step": 9486 }, { "epoch": 0.5777182352403861, "grad_norm": 1.043422185444454, "learning_rate": 4.8886238028399245e-06, "loss": 0.462, "step": 9487 }, { "epoch": 0.577779131017264, "grad_norm": 0.9971323874274284, "learning_rate": 4.8886002495285134e-06, "loss": 0.4358, "step": 9488 }, { "epoch": 0.5778400267941418, "grad_norm": 1.0520446056577373, "learning_rate": 4.888576693783646e-06, "loss": 0.4878, "step": 9489 }, { "epoch": 0.5779009225710197, "grad_norm": 0.9257988638759518, "learning_rate": 4.888553135605347e-06, "loss": 0.4403, "step": 9490 }, { "epoch": 0.5779618183478976, "grad_norm": 0.9149574988309309, "learning_rate": 4.88852957499364e-06, "loss": 0.5187, "step": 9491 }, { "epoch": 0.5780227141247755, "grad_norm": 1.001656157410161, "learning_rate": 4.888506011948549e-06, "loss": 0.3761, "step": 9492 }, { "epoch": 0.5780836099016533, "grad_norm": 0.9687731795536906, "learning_rate": 4.8884824464700964e-06, "loss": 0.4584, "step": 9493 }, { "epoch": 0.5781445056785312, "grad_norm": 0.9659815967917587, "learning_rate": 4.888458878558307e-06, "loss": 0.4634, "step": 9494 }, { "epoch": 0.5782054014554091, "grad_norm": 0.9805757204639726, "learning_rate": 4.888435308213208e-06, "loss": 0.4657, "step": 9495 }, { "epoch": 0.578266297232287, "grad_norm": 0.9428449075345413, "learning_rate": 4.888411735434818e-06, "loss": 0.4593, "step": 9496 }, { "epoch": 0.5783271930091648, "grad_norm": 1.0640704086901103, "learning_rate": 4.888388160223165e-06, "loss": 0.423, "step": 9497 }, { "epoch": 0.5783880887860426, "grad_norm": 1.0432179386128113, "learning_rate": 4.888364582578272e-06, "loss": 0.474, "step": 9498 }, { "epoch": 0.5784489845629206, "grad_norm": 1.0016205227680535, "learning_rate": 4.888341002500163e-06, "loss": 0.425, "step": 9499 }, { "epoch": 0.5785098803397984, "grad_norm": 1.0740850120105498, "learning_rate": 4.888317419988862e-06, "loss": 0.3995, "step": 9500 }, { "epoch": 0.5785707761166763, "grad_norm": 1.076620694708439, "learning_rate": 4.888293835044392e-06, "loss": 0.4826, "step": 9501 }, { "epoch": 0.5786316718935541, "grad_norm": 1.0185884666105, "learning_rate": 4.888270247666778e-06, "loss": 0.4439, "step": 9502 }, { "epoch": 0.5786925676704321, "grad_norm": 0.9794383150125585, "learning_rate": 4.888246657856044e-06, "loss": 0.458, "step": 9503 }, { "epoch": 0.5787534634473099, "grad_norm": 1.0801181394012376, "learning_rate": 4.8882230656122144e-06, "loss": 0.4787, "step": 9504 }, { "epoch": 0.5788143592241878, "grad_norm": 0.9407676170505936, "learning_rate": 4.888199470935312e-06, "loss": 0.4921, "step": 9505 }, { "epoch": 0.5788752550010656, "grad_norm": 1.0332461650870075, "learning_rate": 4.8881758738253626e-06, "loss": 0.4127, "step": 9506 }, { "epoch": 0.5789361507779436, "grad_norm": 1.0797638969464967, "learning_rate": 4.8881522742823884e-06, "loss": 0.5054, "step": 9507 }, { "epoch": 0.5789970465548214, "grad_norm": 1.0543612909849747, "learning_rate": 4.888128672306415e-06, "loss": 0.4374, "step": 9508 }, { "epoch": 0.5790579423316993, "grad_norm": 0.9819219838000429, "learning_rate": 4.888105067897465e-06, "loss": 0.48, "step": 9509 }, { "epoch": 0.5791188381085771, "grad_norm": 0.9791414292322748, "learning_rate": 4.888081461055564e-06, "loss": 0.4411, "step": 9510 }, { "epoch": 0.5791797338854551, "grad_norm": 0.9624895705847591, "learning_rate": 4.888057851780735e-06, "loss": 0.4757, "step": 9511 }, { "epoch": 0.5792406296623329, "grad_norm": 0.9709567227410253, "learning_rate": 4.888034240073002e-06, "loss": 0.463, "step": 9512 }, { "epoch": 0.5793015254392108, "grad_norm": 0.9774875693706339, "learning_rate": 4.88801062593239e-06, "loss": 0.4097, "step": 9513 }, { "epoch": 0.5793624212160887, "grad_norm": 0.9029218739915197, "learning_rate": 4.887987009358922e-06, "loss": 0.4583, "step": 9514 }, { "epoch": 0.5794233169929666, "grad_norm": 1.0769981790961625, "learning_rate": 4.887963390352622e-06, "loss": 0.4226, "step": 9515 }, { "epoch": 0.5794842127698444, "grad_norm": 0.9720077668050596, "learning_rate": 4.887939768913515e-06, "loss": 0.4411, "step": 9516 }, { "epoch": 0.5795451085467223, "grad_norm": 1.0081241590202354, "learning_rate": 4.887916145041625e-06, "loss": 0.4438, "step": 9517 }, { "epoch": 0.5796060043236002, "grad_norm": 0.9996729758498737, "learning_rate": 4.887892518736975e-06, "loss": 0.4461, "step": 9518 }, { "epoch": 0.579666900100478, "grad_norm": 0.9280900925589571, "learning_rate": 4.887868889999591e-06, "loss": 0.4288, "step": 9519 }, { "epoch": 0.5797277958773559, "grad_norm": 1.0060998435656472, "learning_rate": 4.887845258829495e-06, "loss": 0.4282, "step": 9520 }, { "epoch": 0.5797886916542337, "grad_norm": 1.002095564860163, "learning_rate": 4.887821625226711e-06, "loss": 0.4328, "step": 9521 }, { "epoch": 0.5798495874311117, "grad_norm": 1.0057760839827123, "learning_rate": 4.887797989191265e-06, "loss": 0.4354, "step": 9522 }, { "epoch": 0.5799104832079895, "grad_norm": 1.0440420637322412, "learning_rate": 4.88777435072318e-06, "loss": 0.5175, "step": 9523 }, { "epoch": 0.5799713789848674, "grad_norm": 1.0488222560248122, "learning_rate": 4.88775070982248e-06, "loss": 0.4601, "step": 9524 }, { "epoch": 0.5800322747617452, "grad_norm": 1.0794966975942555, "learning_rate": 4.887727066489189e-06, "loss": 0.4573, "step": 9525 }, { "epoch": 0.5800931705386232, "grad_norm": 0.9909137167984896, "learning_rate": 4.887703420723331e-06, "loss": 0.3785, "step": 9526 }, { "epoch": 0.580154066315501, "grad_norm": 0.9232397476116864, "learning_rate": 4.8876797725249314e-06, "loss": 0.4442, "step": 9527 }, { "epoch": 0.5802149620923789, "grad_norm": 1.0666333463349018, "learning_rate": 4.887656121894013e-06, "loss": 0.415, "step": 9528 }, { "epoch": 0.5802758578692567, "grad_norm": 1.0143764913081055, "learning_rate": 4.8876324688306e-06, "loss": 0.4712, "step": 9529 }, { "epoch": 0.5803367536461347, "grad_norm": 1.0567398244981496, "learning_rate": 4.887608813334716e-06, "loss": 0.3918, "step": 9530 }, { "epoch": 0.5803976494230125, "grad_norm": 0.983807810388449, "learning_rate": 4.887585155406387e-06, "loss": 0.454, "step": 9531 }, { "epoch": 0.5804585451998904, "grad_norm": 1.0624276329929798, "learning_rate": 4.887561495045635e-06, "loss": 0.4329, "step": 9532 }, { "epoch": 0.5805194409767682, "grad_norm": 0.9372234521681235, "learning_rate": 4.8875378322524855e-06, "loss": 0.4914, "step": 9533 }, { "epoch": 0.5805803367536462, "grad_norm": 1.0806131694848602, "learning_rate": 4.887514167026962e-06, "loss": 0.3945, "step": 9534 }, { "epoch": 0.580641232530524, "grad_norm": 0.9351622616054343, "learning_rate": 4.887490499369088e-06, "loss": 0.3948, "step": 9535 }, { "epoch": 0.5807021283074019, "grad_norm": 1.0340915101160828, "learning_rate": 4.88746682927889e-06, "loss": 0.4743, "step": 9536 }, { "epoch": 0.5807630240842797, "grad_norm": 1.127958064296269, "learning_rate": 4.887443156756389e-06, "loss": 0.3857, "step": 9537 }, { "epoch": 0.5808239198611577, "grad_norm": 0.9694755436024614, "learning_rate": 4.887419481801611e-06, "loss": 0.4697, "step": 9538 }, { "epoch": 0.5808848156380355, "grad_norm": 0.9290389066929521, "learning_rate": 4.887395804414579e-06, "loss": 0.491, "step": 9539 }, { "epoch": 0.5809457114149134, "grad_norm": 1.097065193468815, "learning_rate": 4.8873721245953186e-06, "loss": 0.4456, "step": 9540 }, { "epoch": 0.5810066071917912, "grad_norm": 1.0535759038538592, "learning_rate": 4.887348442343853e-06, "loss": 0.3827, "step": 9541 }, { "epoch": 0.5810675029686692, "grad_norm": 0.9755258730070128, "learning_rate": 4.887324757660206e-06, "loss": 0.4435, "step": 9542 }, { "epoch": 0.581128398745547, "grad_norm": 0.9730400639435711, "learning_rate": 4.887301070544402e-06, "loss": 0.434, "step": 9543 }, { "epoch": 0.5811892945224248, "grad_norm": 1.0037514054470107, "learning_rate": 4.887277380996466e-06, "loss": 0.4467, "step": 9544 }, { "epoch": 0.5812501902993027, "grad_norm": 0.9345942853172606, "learning_rate": 4.887253689016421e-06, "loss": 0.4335, "step": 9545 }, { "epoch": 0.5813110860761806, "grad_norm": 0.9927159848734683, "learning_rate": 4.8872299946042925e-06, "loss": 0.3988, "step": 9546 }, { "epoch": 0.5813719818530585, "grad_norm": 1.0194467236907447, "learning_rate": 4.8872062977601035e-06, "loss": 0.4659, "step": 9547 }, { "epoch": 0.5814328776299363, "grad_norm": 1.0287216969242896, "learning_rate": 4.887182598483877e-06, "loss": 0.4184, "step": 9548 }, { "epoch": 0.5814937734068142, "grad_norm": 0.9120354007475737, "learning_rate": 4.8871588967756395e-06, "loss": 0.5385, "step": 9549 }, { "epoch": 0.5815546691836921, "grad_norm": 1.0393554320328393, "learning_rate": 4.8871351926354136e-06, "loss": 0.3953, "step": 9550 }, { "epoch": 0.58161556496057, "grad_norm": 0.9982551893708046, "learning_rate": 4.887111486063225e-06, "loss": 0.4171, "step": 9551 }, { "epoch": 0.5816764607374478, "grad_norm": 1.073255309850338, "learning_rate": 4.887087777059096e-06, "loss": 0.4838, "step": 9552 }, { "epoch": 0.5817373565143257, "grad_norm": 0.9923198724109161, "learning_rate": 4.887064065623052e-06, "loss": 0.4039, "step": 9553 }, { "epoch": 0.5817982522912036, "grad_norm": 0.9776123942166841, "learning_rate": 4.887040351755117e-06, "loss": 0.4526, "step": 9554 }, { "epoch": 0.5818591480680815, "grad_norm": 1.0003459508299586, "learning_rate": 4.8870166354553135e-06, "loss": 0.4167, "step": 9555 }, { "epoch": 0.5819200438449593, "grad_norm": 1.1116296082811943, "learning_rate": 4.886992916723669e-06, "loss": 0.4353, "step": 9556 }, { "epoch": 0.5819809396218373, "grad_norm": 0.9747919158964375, "learning_rate": 4.886969195560205e-06, "loss": 0.4605, "step": 9557 }, { "epoch": 0.5820418353987151, "grad_norm": 1.1190247108470268, "learning_rate": 4.886945471964946e-06, "loss": 0.4234, "step": 9558 }, { "epoch": 0.582102731175593, "grad_norm": 0.9337703438506001, "learning_rate": 4.886921745937916e-06, "loss": 0.4744, "step": 9559 }, { "epoch": 0.5821636269524708, "grad_norm": 0.9108307081686399, "learning_rate": 4.886898017479142e-06, "loss": 0.5216, "step": 9560 }, { "epoch": 0.5822245227293488, "grad_norm": 1.0261059720426948, "learning_rate": 4.886874286588644e-06, "loss": 0.5385, "step": 9561 }, { "epoch": 0.5822854185062266, "grad_norm": 0.9593046134320602, "learning_rate": 4.886850553266448e-06, "loss": 0.4085, "step": 9562 }, { "epoch": 0.5823463142831045, "grad_norm": 0.9880067290546138, "learning_rate": 4.886826817512579e-06, "loss": 0.4885, "step": 9563 }, { "epoch": 0.5824072100599823, "grad_norm": 1.0444584566497375, "learning_rate": 4.886803079327061e-06, "loss": 0.4179, "step": 9564 }, { "epoch": 0.5824681058368603, "grad_norm": 0.9797967990288933, "learning_rate": 4.886779338709917e-06, "loss": 0.4405, "step": 9565 }, { "epoch": 0.5825290016137381, "grad_norm": 0.9537196417611215, "learning_rate": 4.886755595661171e-06, "loss": 0.4458, "step": 9566 }, { "epoch": 0.582589897390616, "grad_norm": 0.9883574325452839, "learning_rate": 4.8867318501808494e-06, "loss": 0.4199, "step": 9567 }, { "epoch": 0.5826507931674938, "grad_norm": 1.0021804789867692, "learning_rate": 4.8867081022689746e-06, "loss": 0.4024, "step": 9568 }, { "epoch": 0.5827116889443718, "grad_norm": 0.9851796709648275, "learning_rate": 4.8866843519255705e-06, "loss": 0.4782, "step": 9569 }, { "epoch": 0.5827725847212496, "grad_norm": 0.9170318570545717, "learning_rate": 4.886660599150663e-06, "loss": 0.458, "step": 9570 }, { "epoch": 0.5828334804981274, "grad_norm": 0.9994776360150854, "learning_rate": 4.886636843944275e-06, "loss": 0.466, "step": 9571 }, { "epoch": 0.5828943762750053, "grad_norm": 1.00214746466086, "learning_rate": 4.886613086306431e-06, "loss": 0.4274, "step": 9572 }, { "epoch": 0.5829552720518832, "grad_norm": 0.9572418614097293, "learning_rate": 4.886589326237154e-06, "loss": 0.4308, "step": 9573 }, { "epoch": 0.5830161678287611, "grad_norm": 0.9893843758324284, "learning_rate": 4.886565563736471e-06, "loss": 0.4253, "step": 9574 }, { "epoch": 0.5830770636056389, "grad_norm": 0.9133811373155678, "learning_rate": 4.886541798804404e-06, "loss": 0.4627, "step": 9575 }, { "epoch": 0.5831379593825168, "grad_norm": 1.0210001029552174, "learning_rate": 4.8865180314409785e-06, "loss": 0.4677, "step": 9576 }, { "epoch": 0.5831988551593947, "grad_norm": 0.9245696754272876, "learning_rate": 4.8864942616462175e-06, "loss": 0.5014, "step": 9577 }, { "epoch": 0.5832597509362726, "grad_norm": 0.9701413669447793, "learning_rate": 4.886470489420145e-06, "loss": 0.4455, "step": 9578 }, { "epoch": 0.5833206467131504, "grad_norm": 1.0088749702322481, "learning_rate": 4.886446714762787e-06, "loss": 0.459, "step": 9579 }, { "epoch": 0.5833815424900283, "grad_norm": 1.0397868789322866, "learning_rate": 4.886422937674167e-06, "loss": 0.4353, "step": 9580 }, { "epoch": 0.5834424382669062, "grad_norm": 1.0594519584135944, "learning_rate": 4.886399158154308e-06, "loss": 0.4594, "step": 9581 }, { "epoch": 0.5835033340437841, "grad_norm": 1.0689908354148103, "learning_rate": 4.886375376203235e-06, "loss": 0.4235, "step": 9582 }, { "epoch": 0.5835642298206619, "grad_norm": 0.993476873484271, "learning_rate": 4.886351591820974e-06, "loss": 0.4525, "step": 9583 }, { "epoch": 0.5836251255975398, "grad_norm": 1.046777871712002, "learning_rate": 4.8863278050075455e-06, "loss": 0.4315, "step": 9584 }, { "epoch": 0.5836860213744177, "grad_norm": 0.9589599888740887, "learning_rate": 4.886304015762977e-06, "loss": 0.4639, "step": 9585 }, { "epoch": 0.5837469171512956, "grad_norm": 0.9442356977439631, "learning_rate": 4.886280224087292e-06, "loss": 0.453, "step": 9586 }, { "epoch": 0.5838078129281734, "grad_norm": 1.0335732686731733, "learning_rate": 4.886256429980514e-06, "loss": 0.4687, "step": 9587 }, { "epoch": 0.5838687087050513, "grad_norm": 0.9702814197150169, "learning_rate": 4.886232633442667e-06, "loss": 0.503, "step": 9588 }, { "epoch": 0.5839296044819292, "grad_norm": 0.9872842390038018, "learning_rate": 4.8862088344737756e-06, "loss": 0.4497, "step": 9589 }, { "epoch": 0.583990500258807, "grad_norm": 0.9689165837624784, "learning_rate": 4.886185033073866e-06, "loss": 0.4211, "step": 9590 }, { "epoch": 0.5840513960356849, "grad_norm": 1.0070726685257505, "learning_rate": 4.8861612292429585e-06, "loss": 0.4356, "step": 9591 }, { "epoch": 0.5841122918125627, "grad_norm": 0.9511086733903529, "learning_rate": 4.886137422981081e-06, "loss": 0.3899, "step": 9592 }, { "epoch": 0.5841731875894407, "grad_norm": 1.0240660014905054, "learning_rate": 4.886113614288256e-06, "loss": 0.4864, "step": 9593 }, { "epoch": 0.5842340833663185, "grad_norm": 1.0211031794119292, "learning_rate": 4.8860898031645074e-06, "loss": 0.3849, "step": 9594 }, { "epoch": 0.5842949791431964, "grad_norm": 1.0102212425740509, "learning_rate": 4.88606598960986e-06, "loss": 0.4687, "step": 9595 }, { "epoch": 0.5843558749200743, "grad_norm": 0.9717321933447369, "learning_rate": 4.886042173624339e-06, "loss": 0.431, "step": 9596 }, { "epoch": 0.5844167706969522, "grad_norm": 1.079139926806279, "learning_rate": 4.886018355207968e-06, "loss": 0.4443, "step": 9597 }, { "epoch": 0.58447766647383, "grad_norm": 1.1126793711356269, "learning_rate": 4.885994534360771e-06, "loss": 0.4016, "step": 9598 }, { "epoch": 0.5845385622507079, "grad_norm": 0.9808864635692627, "learning_rate": 4.885970711082772e-06, "loss": 0.4903, "step": 9599 }, { "epoch": 0.5845994580275858, "grad_norm": 1.0050627380454258, "learning_rate": 4.885946885373995e-06, "loss": 0.4636, "step": 9600 }, { "epoch": 0.5846603538044637, "grad_norm": 0.9990014653736038, "learning_rate": 4.8859230572344665e-06, "loss": 0.416, "step": 9601 }, { "epoch": 0.5847212495813415, "grad_norm": 1.0300965525544938, "learning_rate": 4.885899226664208e-06, "loss": 0.4028, "step": 9602 }, { "epoch": 0.5847821453582194, "grad_norm": 0.9852467912177515, "learning_rate": 4.8858753936632455e-06, "loss": 0.4116, "step": 9603 }, { "epoch": 0.5848430411350973, "grad_norm": 1.0280910187920924, "learning_rate": 4.8858515582316024e-06, "loss": 0.5167, "step": 9604 }, { "epoch": 0.5849039369119752, "grad_norm": 1.0067687670759042, "learning_rate": 4.8858277203693035e-06, "loss": 0.4962, "step": 9605 }, { "epoch": 0.584964832688853, "grad_norm": 1.1273177570201933, "learning_rate": 4.885803880076373e-06, "loss": 0.4008, "step": 9606 }, { "epoch": 0.5850257284657309, "grad_norm": 0.9783042001717102, "learning_rate": 4.885780037352835e-06, "loss": 0.4141, "step": 9607 }, { "epoch": 0.5850866242426088, "grad_norm": 1.0138668284709125, "learning_rate": 4.885756192198714e-06, "loss": 0.4132, "step": 9608 }, { "epoch": 0.5851475200194867, "grad_norm": 1.0643321949045874, "learning_rate": 4.885732344614035e-06, "loss": 0.3962, "step": 9609 }, { "epoch": 0.5852084157963645, "grad_norm": 1.056570315281462, "learning_rate": 4.8857084945988194e-06, "loss": 0.3751, "step": 9610 }, { "epoch": 0.5852693115732424, "grad_norm": 1.0021464866958703, "learning_rate": 4.885684642153096e-06, "loss": 0.4753, "step": 9611 }, { "epoch": 0.5853302073501203, "grad_norm": 0.9952785684719744, "learning_rate": 4.885660787276885e-06, "loss": 0.4409, "step": 9612 }, { "epoch": 0.5853911031269982, "grad_norm": 0.9771974376178882, "learning_rate": 4.885636929970213e-06, "loss": 0.4357, "step": 9613 }, { "epoch": 0.585451998903876, "grad_norm": 0.9707089059971216, "learning_rate": 4.885613070233103e-06, "loss": 0.425, "step": 9614 }, { "epoch": 0.5855128946807538, "grad_norm": 1.0127092309146852, "learning_rate": 4.885589208065581e-06, "loss": 0.4221, "step": 9615 }, { "epoch": 0.5855737904576318, "grad_norm": 0.9711667141895214, "learning_rate": 4.885565343467669e-06, "loss": 0.3996, "step": 9616 }, { "epoch": 0.5856346862345096, "grad_norm": 1.0244948811501553, "learning_rate": 4.885541476439394e-06, "loss": 0.4233, "step": 9617 }, { "epoch": 0.5856955820113875, "grad_norm": 0.9841156736712181, "learning_rate": 4.885517606980778e-06, "loss": 0.4266, "step": 9618 }, { "epoch": 0.5857564777882653, "grad_norm": 1.007418557910247, "learning_rate": 4.885493735091847e-06, "loss": 0.3648, "step": 9619 }, { "epoch": 0.5858173735651433, "grad_norm": 0.9713623148568125, "learning_rate": 4.885469860772625e-06, "loss": 0.4405, "step": 9620 }, { "epoch": 0.5858782693420211, "grad_norm": 1.0445823649404093, "learning_rate": 4.885445984023135e-06, "loss": 0.4477, "step": 9621 }, { "epoch": 0.585939165118899, "grad_norm": 1.0749275684969615, "learning_rate": 4.885422104843402e-06, "loss": 0.3282, "step": 9622 }, { "epoch": 0.5860000608957768, "grad_norm": 1.0468984247309843, "learning_rate": 4.8853982232334515e-06, "loss": 0.4186, "step": 9623 }, { "epoch": 0.5860609566726548, "grad_norm": 1.0128901108310606, "learning_rate": 4.8853743391933065e-06, "loss": 0.4323, "step": 9624 }, { "epoch": 0.5861218524495326, "grad_norm": 0.9882272146382319, "learning_rate": 4.885350452722991e-06, "loss": 0.4305, "step": 9625 }, { "epoch": 0.5861827482264105, "grad_norm": 0.9675271212804571, "learning_rate": 4.885326563822531e-06, "loss": 0.4446, "step": 9626 }, { "epoch": 0.5862436440032883, "grad_norm": 1.1071311287057548, "learning_rate": 4.885302672491949e-06, "loss": 0.3814, "step": 9627 }, { "epoch": 0.5863045397801663, "grad_norm": 1.0496182655690944, "learning_rate": 4.885278778731272e-06, "loss": 0.4634, "step": 9628 }, { "epoch": 0.5863654355570441, "grad_norm": 1.0034680037553634, "learning_rate": 4.8852548825405214e-06, "loss": 0.4273, "step": 9629 }, { "epoch": 0.586426331333922, "grad_norm": 1.0672315212969967, "learning_rate": 4.8852309839197224e-06, "loss": 0.4836, "step": 9630 }, { "epoch": 0.5864872271107998, "grad_norm": 1.0009049933308662, "learning_rate": 4.8852070828688994e-06, "loss": 0.4987, "step": 9631 }, { "epoch": 0.5865481228876778, "grad_norm": 1.036278186885521, "learning_rate": 4.885183179388078e-06, "loss": 0.3797, "step": 9632 }, { "epoch": 0.5866090186645556, "grad_norm": 0.9782240540881616, "learning_rate": 4.88515927347728e-06, "loss": 0.4375, "step": 9633 }, { "epoch": 0.5866699144414335, "grad_norm": 0.982997798935774, "learning_rate": 4.885135365136533e-06, "loss": 0.4776, "step": 9634 }, { "epoch": 0.5867308102183113, "grad_norm": 1.0563291361188238, "learning_rate": 4.885111454365859e-06, "loss": 0.3741, "step": 9635 }, { "epoch": 0.5867917059951893, "grad_norm": 0.9653301663878798, "learning_rate": 4.885087541165283e-06, "loss": 0.4679, "step": 9636 }, { "epoch": 0.5868526017720671, "grad_norm": 1.0000764511716806, "learning_rate": 4.88506362553483e-06, "loss": 0.4029, "step": 9637 }, { "epoch": 0.586913497548945, "grad_norm": 1.0228157231428059, "learning_rate": 4.885039707474523e-06, "loss": 0.4199, "step": 9638 }, { "epoch": 0.5869743933258229, "grad_norm": 1.0200035340964764, "learning_rate": 4.885015786984387e-06, "loss": 0.4147, "step": 9639 }, { "epoch": 0.5870352891027008, "grad_norm": 0.9759766974238483, "learning_rate": 4.884991864064448e-06, "loss": 0.4959, "step": 9640 }, { "epoch": 0.5870961848795786, "grad_norm": 0.9339991283380894, "learning_rate": 4.8849679387147274e-06, "loss": 0.4758, "step": 9641 }, { "epoch": 0.5871570806564564, "grad_norm": 1.0058808570627156, "learning_rate": 4.884944010935251e-06, "loss": 0.453, "step": 9642 }, { "epoch": 0.5872179764333344, "grad_norm": 1.0836902339006176, "learning_rate": 4.884920080726044e-06, "loss": 0.3824, "step": 9643 }, { "epoch": 0.5872788722102122, "grad_norm": 1.0229798822410292, "learning_rate": 4.88489614808713e-06, "loss": 0.4774, "step": 9644 }, { "epoch": 0.5873397679870901, "grad_norm": 0.9814107567927879, "learning_rate": 4.8848722130185326e-06, "loss": 0.4004, "step": 9645 }, { "epoch": 0.5874006637639679, "grad_norm": 1.09989209829444, "learning_rate": 4.884848275520277e-06, "loss": 0.462, "step": 9646 }, { "epoch": 0.5874615595408459, "grad_norm": 0.9631198031152383, "learning_rate": 4.884824335592389e-06, "loss": 0.4381, "step": 9647 }, { "epoch": 0.5875224553177237, "grad_norm": 0.9540722750362942, "learning_rate": 4.88480039323489e-06, "loss": 0.3974, "step": 9648 }, { "epoch": 0.5875833510946016, "grad_norm": 1.066946163836782, "learning_rate": 4.884776448447806e-06, "loss": 0.4114, "step": 9649 }, { "epoch": 0.5876442468714794, "grad_norm": 1.0459779040755035, "learning_rate": 4.884752501231163e-06, "loss": 0.4109, "step": 9650 }, { "epoch": 0.5877051426483574, "grad_norm": 1.0397564155806067, "learning_rate": 4.884728551584982e-06, "loss": 0.447, "step": 9651 }, { "epoch": 0.5877660384252352, "grad_norm": 1.0737769351183941, "learning_rate": 4.88470459950929e-06, "loss": 0.3891, "step": 9652 }, { "epoch": 0.5878269342021131, "grad_norm": 1.0157245122395637, "learning_rate": 4.88468064500411e-06, "loss": 0.4533, "step": 9653 }, { "epoch": 0.5878878299789909, "grad_norm": 1.0378477071902692, "learning_rate": 4.8846566880694665e-06, "loss": 0.3639, "step": 9654 }, { "epoch": 0.5879487257558689, "grad_norm": 0.9707491852164921, "learning_rate": 4.884632728705386e-06, "loss": 0.3999, "step": 9655 }, { "epoch": 0.5880096215327467, "grad_norm": 1.012759715195501, "learning_rate": 4.88460876691189e-06, "loss": 0.4616, "step": 9656 }, { "epoch": 0.5880705173096246, "grad_norm": 0.9234896163860435, "learning_rate": 4.884584802689004e-06, "loss": 0.4709, "step": 9657 }, { "epoch": 0.5881314130865024, "grad_norm": 1.0034254713853277, "learning_rate": 4.884560836036753e-06, "loss": 0.4459, "step": 9658 }, { "epoch": 0.5881923088633804, "grad_norm": 1.1420029658487076, "learning_rate": 4.884536866955161e-06, "loss": 0.4169, "step": 9659 }, { "epoch": 0.5882532046402582, "grad_norm": 1.0491362803757969, "learning_rate": 4.884512895444253e-06, "loss": 0.4125, "step": 9660 }, { "epoch": 0.588314100417136, "grad_norm": 1.014159412775763, "learning_rate": 4.884488921504052e-06, "loss": 0.4946, "step": 9661 }, { "epoch": 0.5883749961940139, "grad_norm": 0.9279050775731195, "learning_rate": 4.884464945134584e-06, "loss": 0.4608, "step": 9662 }, { "epoch": 0.5884358919708919, "grad_norm": 0.9889455753958296, "learning_rate": 4.884440966335871e-06, "loss": 0.4902, "step": 9663 }, { "epoch": 0.5884967877477697, "grad_norm": 1.0744273060283034, "learning_rate": 4.884416985107941e-06, "loss": 0.3743, "step": 9664 }, { "epoch": 0.5885576835246475, "grad_norm": 1.0534841583396517, "learning_rate": 4.884393001450816e-06, "loss": 0.3996, "step": 9665 }, { "epoch": 0.5886185793015254, "grad_norm": 0.9822992619669229, "learning_rate": 4.884369015364521e-06, "loss": 0.4648, "step": 9666 }, { "epoch": 0.5886794750784033, "grad_norm": 0.9658268036907003, "learning_rate": 4.88434502684908e-06, "loss": 0.4888, "step": 9667 }, { "epoch": 0.5887403708552812, "grad_norm": 1.0863854613745771, "learning_rate": 4.884321035904518e-06, "loss": 0.3806, "step": 9668 }, { "epoch": 0.588801266632159, "grad_norm": 1.1211581958205767, "learning_rate": 4.88429704253086e-06, "loss": 0.4143, "step": 9669 }, { "epoch": 0.5888621624090369, "grad_norm": 0.9851467401303592, "learning_rate": 4.884273046728129e-06, "loss": 0.4988, "step": 9670 }, { "epoch": 0.5889230581859148, "grad_norm": 1.096790370969812, "learning_rate": 4.88424904849635e-06, "loss": 0.3903, "step": 9671 }, { "epoch": 0.5889839539627927, "grad_norm": 1.0635668521204418, "learning_rate": 4.884225047835548e-06, "loss": 0.4413, "step": 9672 }, { "epoch": 0.5890448497396705, "grad_norm": 1.0800605253035191, "learning_rate": 4.884201044745747e-06, "loss": 0.3936, "step": 9673 }, { "epoch": 0.5891057455165484, "grad_norm": 1.08322355710681, "learning_rate": 4.884177039226972e-06, "loss": 0.3971, "step": 9674 }, { "epoch": 0.5891666412934263, "grad_norm": 1.0295178231499635, "learning_rate": 4.884153031279247e-06, "loss": 0.4068, "step": 9675 }, { "epoch": 0.5892275370703042, "grad_norm": 1.046566446321984, "learning_rate": 4.884129020902596e-06, "loss": 0.4025, "step": 9676 }, { "epoch": 0.589288432847182, "grad_norm": 0.9809061187620245, "learning_rate": 4.884105008097044e-06, "loss": 0.4201, "step": 9677 }, { "epoch": 0.58934932862406, "grad_norm": 1.0348468615119817, "learning_rate": 4.884080992862615e-06, "loss": 0.4159, "step": 9678 }, { "epoch": 0.5894102244009378, "grad_norm": 1.0523757921392414, "learning_rate": 4.884056975199335e-06, "loss": 0.4131, "step": 9679 }, { "epoch": 0.5894711201778157, "grad_norm": 1.0731639927652727, "learning_rate": 4.884032955107226e-06, "loss": 0.45, "step": 9680 }, { "epoch": 0.5895320159546935, "grad_norm": 0.9913894445650062, "learning_rate": 4.8840089325863145e-06, "loss": 0.3971, "step": 9681 }, { "epoch": 0.5895929117315715, "grad_norm": 0.9974842973015482, "learning_rate": 4.883984907636624e-06, "loss": 0.4347, "step": 9682 }, { "epoch": 0.5896538075084493, "grad_norm": 0.9952319056240007, "learning_rate": 4.88396088025818e-06, "loss": 0.4618, "step": 9683 }, { "epoch": 0.5897147032853272, "grad_norm": 0.8742949382194373, "learning_rate": 4.883936850451005e-06, "loss": 0.4671, "step": 9684 }, { "epoch": 0.589775599062205, "grad_norm": 1.0312493755352374, "learning_rate": 4.883912818215125e-06, "loss": 0.381, "step": 9685 }, { "epoch": 0.589836494839083, "grad_norm": 1.008316053221914, "learning_rate": 4.883888783550564e-06, "loss": 0.4867, "step": 9686 }, { "epoch": 0.5898973906159608, "grad_norm": 1.0829066651228176, "learning_rate": 4.8838647464573476e-06, "loss": 0.4257, "step": 9687 }, { "epoch": 0.5899582863928386, "grad_norm": 0.9722973993896697, "learning_rate": 4.8838407069354986e-06, "loss": 0.415, "step": 9688 }, { "epoch": 0.5900191821697165, "grad_norm": 1.1239310070168629, "learning_rate": 4.883816664985043e-06, "loss": 0.4071, "step": 9689 }, { "epoch": 0.5900800779465945, "grad_norm": 1.0208340731347625, "learning_rate": 4.8837926206060034e-06, "loss": 0.4993, "step": 9690 }, { "epoch": 0.5901409737234723, "grad_norm": 0.9626090495413732, "learning_rate": 4.883768573798407e-06, "loss": 0.4292, "step": 9691 }, { "epoch": 0.5902018695003501, "grad_norm": 0.9892050811172851, "learning_rate": 4.883744524562276e-06, "loss": 0.4415, "step": 9692 }, { "epoch": 0.590262765277228, "grad_norm": 0.9576121414507093, "learning_rate": 4.883720472897635e-06, "loss": 0.4706, "step": 9693 }, { "epoch": 0.5903236610541059, "grad_norm": 1.032146476403816, "learning_rate": 4.88369641880451e-06, "loss": 0.427, "step": 9694 }, { "epoch": 0.5903845568309838, "grad_norm": 1.0988009811956738, "learning_rate": 4.883672362282924e-06, "loss": 0.4708, "step": 9695 }, { "epoch": 0.5904454526078616, "grad_norm": 0.9752591899260711, "learning_rate": 4.883648303332903e-06, "loss": 0.4446, "step": 9696 }, { "epoch": 0.5905063483847395, "grad_norm": 0.966733789677795, "learning_rate": 4.88362424195447e-06, "loss": 0.4279, "step": 9697 }, { "epoch": 0.5905672441616174, "grad_norm": 0.9460786654514504, "learning_rate": 4.88360017814765e-06, "loss": 0.4349, "step": 9698 }, { "epoch": 0.5906281399384953, "grad_norm": 1.0901058724035144, "learning_rate": 4.883576111912468e-06, "loss": 0.5122, "step": 9699 }, { "epoch": 0.5906890357153731, "grad_norm": 0.9351529067466691, "learning_rate": 4.883552043248948e-06, "loss": 0.4166, "step": 9700 }, { "epoch": 0.590749931492251, "grad_norm": 0.9396550795603561, "learning_rate": 4.883527972157115e-06, "loss": 0.4797, "step": 9701 }, { "epoch": 0.5908108272691289, "grad_norm": 1.0731845552418269, "learning_rate": 4.883503898636993e-06, "loss": 0.329, "step": 9702 }, { "epoch": 0.5908717230460068, "grad_norm": 0.9446268177867572, "learning_rate": 4.883479822688607e-06, "loss": 0.4488, "step": 9703 }, { "epoch": 0.5909326188228846, "grad_norm": 0.920372451301502, "learning_rate": 4.883455744311982e-06, "loss": 0.5012, "step": 9704 }, { "epoch": 0.5909935145997625, "grad_norm": 0.9628506063624375, "learning_rate": 4.883431663507141e-06, "loss": 0.4537, "step": 9705 }, { "epoch": 0.5910544103766404, "grad_norm": 1.0556709235263773, "learning_rate": 4.88340758027411e-06, "loss": 0.4578, "step": 9706 }, { "epoch": 0.5911153061535183, "grad_norm": 1.0206292955033143, "learning_rate": 4.8833834946129115e-06, "loss": 0.376, "step": 9707 }, { "epoch": 0.5911762019303961, "grad_norm": 0.9318685347850197, "learning_rate": 4.883359406523572e-06, "loss": 0.4203, "step": 9708 }, { "epoch": 0.591237097707274, "grad_norm": 1.0405734488763283, "learning_rate": 4.8833353160061165e-06, "loss": 0.3999, "step": 9709 }, { "epoch": 0.5912979934841519, "grad_norm": 1.0611346980063765, "learning_rate": 4.883311223060568e-06, "loss": 0.4031, "step": 9710 }, { "epoch": 0.5913588892610298, "grad_norm": 0.9714338550006842, "learning_rate": 4.883287127686951e-06, "loss": 0.5174, "step": 9711 }, { "epoch": 0.5914197850379076, "grad_norm": 1.0270869219022765, "learning_rate": 4.883263029885291e-06, "loss": 0.4455, "step": 9712 }, { "epoch": 0.5914806808147854, "grad_norm": 1.0071269984232882, "learning_rate": 4.883238929655613e-06, "loss": 0.436, "step": 9713 }, { "epoch": 0.5915415765916634, "grad_norm": 1.0261310980464589, "learning_rate": 4.88321482699794e-06, "loss": 0.4479, "step": 9714 }, { "epoch": 0.5916024723685412, "grad_norm": 0.9711954173966225, "learning_rate": 4.883190721912297e-06, "loss": 0.3757, "step": 9715 }, { "epoch": 0.5916633681454191, "grad_norm": 1.022982023227119, "learning_rate": 4.883166614398709e-06, "loss": 0.41, "step": 9716 }, { "epoch": 0.5917242639222969, "grad_norm": 1.047727458222814, "learning_rate": 4.8831425044572e-06, "loss": 0.3369, "step": 9717 }, { "epoch": 0.5917851596991749, "grad_norm": 1.1340657772809188, "learning_rate": 4.8831183920877955e-06, "loss": 0.3761, "step": 9718 }, { "epoch": 0.5918460554760527, "grad_norm": 1.057353698209581, "learning_rate": 4.88309427729052e-06, "loss": 0.4133, "step": 9719 }, { "epoch": 0.5919069512529306, "grad_norm": 1.0284021955633182, "learning_rate": 4.883070160065398e-06, "loss": 0.435, "step": 9720 }, { "epoch": 0.5919678470298085, "grad_norm": 0.9920368570037874, "learning_rate": 4.8830460404124515e-06, "loss": 0.452, "step": 9721 }, { "epoch": 0.5920287428066864, "grad_norm": 1.0899576920677871, "learning_rate": 4.883021918331709e-06, "loss": 0.3878, "step": 9722 }, { "epoch": 0.5920896385835642, "grad_norm": 1.1275466434742971, "learning_rate": 4.882997793823193e-06, "loss": 0.3944, "step": 9723 }, { "epoch": 0.5921505343604421, "grad_norm": 1.015015082973947, "learning_rate": 4.882973666886928e-06, "loss": 0.4151, "step": 9724 }, { "epoch": 0.59221143013732, "grad_norm": 0.9908116707871136, "learning_rate": 4.882949537522939e-06, "loss": 0.4269, "step": 9725 }, { "epoch": 0.5922723259141979, "grad_norm": 1.0316560330464488, "learning_rate": 4.88292540573125e-06, "loss": 0.4056, "step": 9726 }, { "epoch": 0.5923332216910757, "grad_norm": 0.9973277142374563, "learning_rate": 4.882901271511887e-06, "loss": 0.376, "step": 9727 }, { "epoch": 0.5923941174679536, "grad_norm": 0.9977034106824456, "learning_rate": 4.882877134864874e-06, "loss": 0.4643, "step": 9728 }, { "epoch": 0.5924550132448315, "grad_norm": 1.0213160513702757, "learning_rate": 4.8828529957902344e-06, "loss": 0.4136, "step": 9729 }, { "epoch": 0.5925159090217094, "grad_norm": 1.0076794485200815, "learning_rate": 4.8828288542879945e-06, "loss": 0.4662, "step": 9730 }, { "epoch": 0.5925768047985872, "grad_norm": 1.0306801977305657, "learning_rate": 4.8828047103581785e-06, "loss": 0.4613, "step": 9731 }, { "epoch": 0.592637700575465, "grad_norm": 0.9251238034747933, "learning_rate": 4.88278056400081e-06, "loss": 0.4743, "step": 9732 }, { "epoch": 0.592698596352343, "grad_norm": 1.0813571185355588, "learning_rate": 4.882756415215914e-06, "loss": 0.4053, "step": 9733 }, { "epoch": 0.5927594921292209, "grad_norm": 0.9569792410923399, "learning_rate": 4.882732264003515e-06, "loss": 0.4273, "step": 9734 }, { "epoch": 0.5928203879060987, "grad_norm": 1.00944099847808, "learning_rate": 4.882708110363639e-06, "loss": 0.5148, "step": 9735 }, { "epoch": 0.5928812836829765, "grad_norm": 1.1201791339048195, "learning_rate": 4.882683954296308e-06, "loss": 0.4333, "step": 9736 }, { "epoch": 0.5929421794598545, "grad_norm": 1.0092398897659467, "learning_rate": 4.88265979580155e-06, "loss": 0.4578, "step": 9737 }, { "epoch": 0.5930030752367323, "grad_norm": 0.9786116392468198, "learning_rate": 4.882635634879386e-06, "loss": 0.4742, "step": 9738 }, { "epoch": 0.5930639710136102, "grad_norm": 1.031263392336418, "learning_rate": 4.882611471529843e-06, "loss": 0.4147, "step": 9739 }, { "epoch": 0.593124866790488, "grad_norm": 1.0034001014290148, "learning_rate": 4.882587305752945e-06, "loss": 0.458, "step": 9740 }, { "epoch": 0.593185762567366, "grad_norm": 0.961522739532033, "learning_rate": 4.882563137548717e-06, "loss": 0.4195, "step": 9741 }, { "epoch": 0.5932466583442438, "grad_norm": 0.9782021170651812, "learning_rate": 4.882538966917183e-06, "loss": 0.456, "step": 9742 }, { "epoch": 0.5933075541211217, "grad_norm": 1.0934842354837233, "learning_rate": 4.882514793858367e-06, "loss": 0.3852, "step": 9743 }, { "epoch": 0.5933684498979995, "grad_norm": 1.0080983128675267, "learning_rate": 4.8824906183722954e-06, "loss": 0.4501, "step": 9744 }, { "epoch": 0.5934293456748775, "grad_norm": 1.081300190634941, "learning_rate": 4.8824664404589916e-06, "loss": 0.4005, "step": 9745 }, { "epoch": 0.5934902414517553, "grad_norm": 1.024059830994691, "learning_rate": 4.882442260118481e-06, "loss": 0.3764, "step": 9746 }, { "epoch": 0.5935511372286332, "grad_norm": 1.0261807247388144, "learning_rate": 4.882418077350787e-06, "loss": 0.4341, "step": 9747 }, { "epoch": 0.593612033005511, "grad_norm": 1.0219368502561605, "learning_rate": 4.882393892155936e-06, "loss": 0.4253, "step": 9748 }, { "epoch": 0.593672928782389, "grad_norm": 0.8911294903399883, "learning_rate": 4.88236970453395e-06, "loss": 0.4649, "step": 9749 }, { "epoch": 0.5937338245592668, "grad_norm": 1.0967143814556828, "learning_rate": 4.8823455144848564e-06, "loss": 0.3522, "step": 9750 }, { "epoch": 0.5937947203361447, "grad_norm": 1.0171770137002798, "learning_rate": 4.882321322008679e-06, "loss": 0.4102, "step": 9751 }, { "epoch": 0.5938556161130225, "grad_norm": 1.0187260009911858, "learning_rate": 4.882297127105441e-06, "loss": 0.3833, "step": 9752 }, { "epoch": 0.5939165118899005, "grad_norm": 1.0273685748141261, "learning_rate": 4.882272929775169e-06, "loss": 0.3807, "step": 9753 }, { "epoch": 0.5939774076667783, "grad_norm": 1.0062049223455047, "learning_rate": 4.882248730017887e-06, "loss": 0.4757, "step": 9754 }, { "epoch": 0.5940383034436562, "grad_norm": 1.0091535154758209, "learning_rate": 4.882224527833619e-06, "loss": 0.4235, "step": 9755 }, { "epoch": 0.594099199220534, "grad_norm": 1.021921513134861, "learning_rate": 4.88220032322239e-06, "loss": 0.4215, "step": 9756 }, { "epoch": 0.594160094997412, "grad_norm": 0.9827812420838841, "learning_rate": 4.882176116184226e-06, "loss": 0.3992, "step": 9757 }, { "epoch": 0.5942209907742898, "grad_norm": 0.9558739956617124, "learning_rate": 4.88215190671915e-06, "loss": 0.5002, "step": 9758 }, { "epoch": 0.5942818865511676, "grad_norm": 1.0858849275199804, "learning_rate": 4.882127694827187e-06, "loss": 0.4314, "step": 9759 }, { "epoch": 0.5943427823280456, "grad_norm": 0.964025947068204, "learning_rate": 4.882103480508361e-06, "loss": 0.4776, "step": 9760 }, { "epoch": 0.5944036781049234, "grad_norm": 1.0613844983191594, "learning_rate": 4.882079263762699e-06, "loss": 0.4071, "step": 9761 }, { "epoch": 0.5944645738818013, "grad_norm": 0.996934056850432, "learning_rate": 4.882055044590224e-06, "loss": 0.4388, "step": 9762 }, { "epoch": 0.5945254696586791, "grad_norm": 1.055844113251742, "learning_rate": 4.882030822990959e-06, "loss": 0.4472, "step": 9763 }, { "epoch": 0.5945863654355571, "grad_norm": 1.0086478318276335, "learning_rate": 4.882006598964933e-06, "loss": 0.4027, "step": 9764 }, { "epoch": 0.5946472612124349, "grad_norm": 0.9942230859181521, "learning_rate": 4.881982372512166e-06, "loss": 0.401, "step": 9765 }, { "epoch": 0.5947081569893128, "grad_norm": 0.955996591128363, "learning_rate": 4.8819581436326865e-06, "loss": 0.416, "step": 9766 }, { "epoch": 0.5947690527661906, "grad_norm": 0.9047310844349515, "learning_rate": 4.881933912326517e-06, "loss": 0.4389, "step": 9767 }, { "epoch": 0.5948299485430686, "grad_norm": 0.9998021377121236, "learning_rate": 4.881909678593682e-06, "loss": 0.445, "step": 9768 }, { "epoch": 0.5948908443199464, "grad_norm": 0.9872461514123612, "learning_rate": 4.881885442434209e-06, "loss": 0.4883, "step": 9769 }, { "epoch": 0.5949517400968243, "grad_norm": 1.128134128276327, "learning_rate": 4.881861203848119e-06, "loss": 0.3994, "step": 9770 }, { "epoch": 0.5950126358737021, "grad_norm": 1.0638039733724742, "learning_rate": 4.881836962835438e-06, "loss": 0.3514, "step": 9771 }, { "epoch": 0.5950735316505801, "grad_norm": 0.9666976837990677, "learning_rate": 4.881812719396192e-06, "loss": 0.4176, "step": 9772 }, { "epoch": 0.5951344274274579, "grad_norm": 1.0247860256265193, "learning_rate": 4.881788473530404e-06, "loss": 0.4741, "step": 9773 }, { "epoch": 0.5951953232043358, "grad_norm": 0.9567973051418809, "learning_rate": 4.881764225238101e-06, "loss": 0.4633, "step": 9774 }, { "epoch": 0.5952562189812136, "grad_norm": 1.0544415443275925, "learning_rate": 4.881739974519304e-06, "loss": 0.3756, "step": 9775 }, { "epoch": 0.5953171147580916, "grad_norm": 0.9528082405950534, "learning_rate": 4.88171572137404e-06, "loss": 0.4674, "step": 9776 }, { "epoch": 0.5953780105349694, "grad_norm": 1.0585204475473118, "learning_rate": 4.881691465802335e-06, "loss": 0.4067, "step": 9777 }, { "epoch": 0.5954389063118473, "grad_norm": 0.9462414355348874, "learning_rate": 4.8816672078042116e-06, "loss": 0.4378, "step": 9778 }, { "epoch": 0.5954998020887251, "grad_norm": 1.0576293720588952, "learning_rate": 4.881642947379695e-06, "loss": 0.5007, "step": 9779 }, { "epoch": 0.5955606978656031, "grad_norm": 1.0046113189192918, "learning_rate": 4.881618684528811e-06, "loss": 0.4672, "step": 9780 }, { "epoch": 0.5956215936424809, "grad_norm": 0.9928865034357476, "learning_rate": 4.881594419251582e-06, "loss": 0.4695, "step": 9781 }, { "epoch": 0.5956824894193588, "grad_norm": 0.9617574232232546, "learning_rate": 4.881570151548035e-06, "loss": 0.3909, "step": 9782 }, { "epoch": 0.5957433851962366, "grad_norm": 0.9673513979824221, "learning_rate": 4.881545881418193e-06, "loss": 0.4533, "step": 9783 }, { "epoch": 0.5958042809731146, "grad_norm": 1.0238662929068107, "learning_rate": 4.881521608862082e-06, "loss": 0.4082, "step": 9784 }, { "epoch": 0.5958651767499924, "grad_norm": 1.0233041267377314, "learning_rate": 4.881497333879727e-06, "loss": 0.4108, "step": 9785 }, { "epoch": 0.5959260725268702, "grad_norm": 0.980897592802786, "learning_rate": 4.881473056471151e-06, "loss": 0.4431, "step": 9786 }, { "epoch": 0.5959869683037481, "grad_norm": 1.0564190214702227, "learning_rate": 4.88144877663638e-06, "loss": 0.3986, "step": 9787 }, { "epoch": 0.596047864080626, "grad_norm": 1.0515868067497238, "learning_rate": 4.881424494375439e-06, "loss": 0.4963, "step": 9788 }, { "epoch": 0.5961087598575039, "grad_norm": 1.1133724505862508, "learning_rate": 4.881400209688352e-06, "loss": 0.433, "step": 9789 }, { "epoch": 0.5961696556343817, "grad_norm": 1.0092366499501382, "learning_rate": 4.881375922575145e-06, "loss": 0.4224, "step": 9790 }, { "epoch": 0.5962305514112596, "grad_norm": 1.0352807151632633, "learning_rate": 4.88135163303584e-06, "loss": 0.4016, "step": 9791 }, { "epoch": 0.5962914471881375, "grad_norm": 1.0419915316739743, "learning_rate": 4.881327341070464e-06, "loss": 0.3703, "step": 9792 }, { "epoch": 0.5963523429650154, "grad_norm": 0.977875370514956, "learning_rate": 4.881303046679041e-06, "loss": 0.3999, "step": 9793 }, { "epoch": 0.5964132387418932, "grad_norm": 0.958919773567644, "learning_rate": 4.881278749861597e-06, "loss": 0.4752, "step": 9794 }, { "epoch": 0.5964741345187711, "grad_norm": 0.9800087699948364, "learning_rate": 4.881254450618154e-06, "loss": 0.4624, "step": 9795 }, { "epoch": 0.596535030295649, "grad_norm": 0.9502233313475077, "learning_rate": 4.88123014894874e-06, "loss": 0.406, "step": 9796 }, { "epoch": 0.5965959260725269, "grad_norm": 0.9817887719741865, "learning_rate": 4.8812058448533785e-06, "loss": 0.4981, "step": 9797 }, { "epoch": 0.5966568218494047, "grad_norm": 0.9878741987021886, "learning_rate": 4.8811815383320925e-06, "loss": 0.4189, "step": 9798 }, { "epoch": 0.5967177176262826, "grad_norm": 1.1469256334901603, "learning_rate": 4.881157229384909e-06, "loss": 0.3885, "step": 9799 }, { "epoch": 0.5967786134031605, "grad_norm": 0.9365708185339667, "learning_rate": 4.8811329180118525e-06, "loss": 0.4159, "step": 9800 }, { "epoch": 0.5968395091800384, "grad_norm": 0.939655461719101, "learning_rate": 4.881108604212947e-06, "loss": 0.4526, "step": 9801 }, { "epoch": 0.5969004049569162, "grad_norm": 0.9563429281435447, "learning_rate": 4.881084287988217e-06, "loss": 0.5068, "step": 9802 }, { "epoch": 0.5969613007337942, "grad_norm": 1.030925675806955, "learning_rate": 4.881059969337688e-06, "loss": 0.4721, "step": 9803 }, { "epoch": 0.597022196510672, "grad_norm": 1.0402651631719706, "learning_rate": 4.881035648261384e-06, "loss": 0.4327, "step": 9804 }, { "epoch": 0.5970830922875499, "grad_norm": 1.041153617379042, "learning_rate": 4.8810113247593315e-06, "loss": 0.5226, "step": 9805 }, { "epoch": 0.5971439880644277, "grad_norm": 1.0836819736122332, "learning_rate": 4.880986998831554e-06, "loss": 0.4373, "step": 9806 }, { "epoch": 0.5972048838413057, "grad_norm": 1.027748996228458, "learning_rate": 4.880962670478076e-06, "loss": 0.3974, "step": 9807 }, { "epoch": 0.5972657796181835, "grad_norm": 0.9913504575667438, "learning_rate": 4.880938339698924e-06, "loss": 0.5515, "step": 9808 }, { "epoch": 0.5973266753950613, "grad_norm": 1.029567634513626, "learning_rate": 4.8809140064941196e-06, "loss": 0.3967, "step": 9809 }, { "epoch": 0.5973875711719392, "grad_norm": 0.9614103964866992, "learning_rate": 4.880889670863691e-06, "loss": 0.4526, "step": 9810 }, { "epoch": 0.5974484669488171, "grad_norm": 1.0180431755030492, "learning_rate": 4.8808653328076605e-06, "loss": 0.4132, "step": 9811 }, { "epoch": 0.597509362725695, "grad_norm": 0.9755488573305774, "learning_rate": 4.880840992326055e-06, "loss": 0.4639, "step": 9812 }, { "epoch": 0.5975702585025728, "grad_norm": 1.0388016866453664, "learning_rate": 4.880816649418897e-06, "loss": 0.4791, "step": 9813 }, { "epoch": 0.5976311542794507, "grad_norm": 1.0557172182610817, "learning_rate": 4.8807923040862135e-06, "loss": 0.3882, "step": 9814 }, { "epoch": 0.5976920500563286, "grad_norm": 0.9565379912267681, "learning_rate": 4.880767956328027e-06, "loss": 0.4538, "step": 9815 }, { "epoch": 0.5977529458332065, "grad_norm": 0.9636187304926181, "learning_rate": 4.880743606144365e-06, "loss": 0.4982, "step": 9816 }, { "epoch": 0.5978138416100843, "grad_norm": 1.0021747726989465, "learning_rate": 4.88071925353525e-06, "loss": 0.4082, "step": 9817 }, { "epoch": 0.5978747373869622, "grad_norm": 0.9291781253678632, "learning_rate": 4.880694898500709e-06, "loss": 0.4649, "step": 9818 }, { "epoch": 0.5979356331638401, "grad_norm": 1.0216259736153015, "learning_rate": 4.880670541040764e-06, "loss": 0.4533, "step": 9819 }, { "epoch": 0.597996528940718, "grad_norm": 0.9991481146189904, "learning_rate": 4.880646181155442e-06, "loss": 0.4484, "step": 9820 }, { "epoch": 0.5980574247175958, "grad_norm": 1.069505205352075, "learning_rate": 4.880621818844767e-06, "loss": 0.4057, "step": 9821 }, { "epoch": 0.5981183204944737, "grad_norm": 1.0641575142701016, "learning_rate": 4.880597454108764e-06, "loss": 0.464, "step": 9822 }, { "epoch": 0.5981792162713516, "grad_norm": 0.9708372652981204, "learning_rate": 4.8805730869474575e-06, "loss": 0.4522, "step": 9823 }, { "epoch": 0.5982401120482295, "grad_norm": 0.9990314804126853, "learning_rate": 4.880548717360874e-06, "loss": 0.4568, "step": 9824 }, { "epoch": 0.5983010078251073, "grad_norm": 1.0448339710898413, "learning_rate": 4.880524345349036e-06, "loss": 0.4338, "step": 9825 }, { "epoch": 0.5983619036019852, "grad_norm": 1.0233129166102464, "learning_rate": 4.880499970911969e-06, "loss": 0.3893, "step": 9826 }, { "epoch": 0.5984227993788631, "grad_norm": 1.0414047001080011, "learning_rate": 4.880475594049698e-06, "loss": 0.4495, "step": 9827 }, { "epoch": 0.598483695155741, "grad_norm": 1.0380219155404506, "learning_rate": 4.880451214762249e-06, "loss": 0.402, "step": 9828 }, { "epoch": 0.5985445909326188, "grad_norm": 1.0099993881890783, "learning_rate": 4.880426833049645e-06, "loss": 0.4546, "step": 9829 }, { "epoch": 0.5986054867094966, "grad_norm": 0.9933688215144763, "learning_rate": 4.880402448911912e-06, "loss": 0.4577, "step": 9830 }, { "epoch": 0.5986663824863746, "grad_norm": 1.004306768632997, "learning_rate": 4.8803780623490734e-06, "loss": 0.4226, "step": 9831 }, { "epoch": 0.5987272782632524, "grad_norm": 1.0140740729513753, "learning_rate": 4.880353673361157e-06, "loss": 0.4227, "step": 9832 }, { "epoch": 0.5987881740401303, "grad_norm": 1.0479556183008456, "learning_rate": 4.880329281948184e-06, "loss": 0.3861, "step": 9833 }, { "epoch": 0.5988490698170081, "grad_norm": 1.1123167226466035, "learning_rate": 4.8803048881101825e-06, "loss": 0.3916, "step": 9834 }, { "epoch": 0.5989099655938861, "grad_norm": 1.0037017077177102, "learning_rate": 4.8802804918471746e-06, "loss": 0.4431, "step": 9835 }, { "epoch": 0.5989708613707639, "grad_norm": 0.9583207631808922, "learning_rate": 4.880256093159187e-06, "loss": 0.4611, "step": 9836 }, { "epoch": 0.5990317571476418, "grad_norm": 1.067034406625459, "learning_rate": 4.880231692046244e-06, "loss": 0.4776, "step": 9837 }, { "epoch": 0.5990926529245196, "grad_norm": 0.9463256099204874, "learning_rate": 4.88020728850837e-06, "loss": 0.4106, "step": 9838 }, { "epoch": 0.5991535487013976, "grad_norm": 0.9504231826466979, "learning_rate": 4.880182882545591e-06, "loss": 0.4633, "step": 9839 }, { "epoch": 0.5992144444782754, "grad_norm": 0.9430634195649348, "learning_rate": 4.880158474157931e-06, "loss": 0.433, "step": 9840 }, { "epoch": 0.5992753402551533, "grad_norm": 1.0782419298681312, "learning_rate": 4.880134063345415e-06, "loss": 0.4963, "step": 9841 }, { "epoch": 0.5993362360320312, "grad_norm": 1.0378391864130183, "learning_rate": 4.880109650108067e-06, "loss": 0.451, "step": 9842 }, { "epoch": 0.5993971318089091, "grad_norm": 1.0770519748951415, "learning_rate": 4.880085234445913e-06, "loss": 0.3952, "step": 9843 }, { "epoch": 0.5994580275857869, "grad_norm": 0.9548532555685743, "learning_rate": 4.880060816358979e-06, "loss": 0.4286, "step": 9844 }, { "epoch": 0.5995189233626648, "grad_norm": 1.0840081956179692, "learning_rate": 4.880036395847288e-06, "loss": 0.408, "step": 9845 }, { "epoch": 0.5995798191395427, "grad_norm": 1.0604589188436184, "learning_rate": 4.880011972910865e-06, "loss": 0.4549, "step": 9846 }, { "epoch": 0.5996407149164206, "grad_norm": 1.023069775395982, "learning_rate": 4.879987547549735e-06, "loss": 0.4152, "step": 9847 }, { "epoch": 0.5997016106932984, "grad_norm": 1.01301607830543, "learning_rate": 4.879963119763924e-06, "loss": 0.4778, "step": 9848 }, { "epoch": 0.5997625064701763, "grad_norm": 0.9691680257195163, "learning_rate": 4.879938689553455e-06, "loss": 0.4154, "step": 9849 }, { "epoch": 0.5998234022470542, "grad_norm": 0.9605349690684272, "learning_rate": 4.879914256918355e-06, "loss": 0.4643, "step": 9850 }, { "epoch": 0.5998842980239321, "grad_norm": 0.9836913389743651, "learning_rate": 4.879889821858647e-06, "loss": 0.3856, "step": 9851 }, { "epoch": 0.5999451938008099, "grad_norm": 1.0497961953583186, "learning_rate": 4.8798653843743575e-06, "loss": 0.4406, "step": 9852 }, { "epoch": 0.6000060895776878, "grad_norm": 1.096003649247723, "learning_rate": 4.87984094446551e-06, "loss": 0.4017, "step": 9853 }, { "epoch": 0.6000669853545657, "grad_norm": 0.9305195537376325, "learning_rate": 4.8798165021321306e-06, "loss": 0.4763, "step": 9854 }, { "epoch": 0.6001278811314436, "grad_norm": 1.0077791948266153, "learning_rate": 4.879792057374243e-06, "loss": 0.4515, "step": 9855 }, { "epoch": 0.6001887769083214, "grad_norm": 0.9721837972591665, "learning_rate": 4.879767610191874e-06, "loss": 0.4456, "step": 9856 }, { "epoch": 0.6002496726851992, "grad_norm": 1.0942635006580617, "learning_rate": 4.8797431605850456e-06, "loss": 0.3347, "step": 9857 }, { "epoch": 0.6003105684620772, "grad_norm": 1.0412642081256265, "learning_rate": 4.879718708553785e-06, "loss": 0.4136, "step": 9858 }, { "epoch": 0.600371464238955, "grad_norm": 0.9324047472078808, "learning_rate": 4.879694254098117e-06, "loss": 0.4091, "step": 9859 }, { "epoch": 0.6004323600158329, "grad_norm": 1.0942349310836252, "learning_rate": 4.879669797218065e-06, "loss": 0.3847, "step": 9860 }, { "epoch": 0.6004932557927107, "grad_norm": 0.9831767996899281, "learning_rate": 4.879645337913656e-06, "loss": 0.4844, "step": 9861 }, { "epoch": 0.6005541515695887, "grad_norm": 0.9708666061225684, "learning_rate": 4.879620876184912e-06, "loss": 0.4614, "step": 9862 }, { "epoch": 0.6006150473464665, "grad_norm": 1.071455413839032, "learning_rate": 4.879596412031862e-06, "loss": 0.4381, "step": 9863 }, { "epoch": 0.6006759431233444, "grad_norm": 1.0634338228471185, "learning_rate": 4.879571945454526e-06, "loss": 0.4837, "step": 9864 }, { "epoch": 0.6007368389002222, "grad_norm": 1.034242832051259, "learning_rate": 4.879547476452933e-06, "loss": 0.4181, "step": 9865 }, { "epoch": 0.6007977346771002, "grad_norm": 1.0550014927414983, "learning_rate": 4.8795230050271075e-06, "loss": 0.4295, "step": 9866 }, { "epoch": 0.600858630453978, "grad_norm": 1.0288821838221631, "learning_rate": 4.8794985311770726e-06, "loss": 0.4044, "step": 9867 }, { "epoch": 0.6009195262308559, "grad_norm": 0.9799947995661565, "learning_rate": 4.879474054902854e-06, "loss": 0.4374, "step": 9868 }, { "epoch": 0.6009804220077337, "grad_norm": 0.9700030585892502, "learning_rate": 4.879449576204477e-06, "loss": 0.4324, "step": 9869 }, { "epoch": 0.6010413177846117, "grad_norm": 0.9502215147562701, "learning_rate": 4.879425095081965e-06, "loss": 0.4731, "step": 9870 }, { "epoch": 0.6011022135614895, "grad_norm": 1.0028341109639676, "learning_rate": 4.879400611535345e-06, "loss": 0.4471, "step": 9871 }, { "epoch": 0.6011631093383674, "grad_norm": 1.0176645027515456, "learning_rate": 4.879376125564642e-06, "loss": 0.4317, "step": 9872 }, { "epoch": 0.6012240051152452, "grad_norm": 1.020206824762612, "learning_rate": 4.879351637169879e-06, "loss": 0.4213, "step": 9873 }, { "epoch": 0.6012849008921232, "grad_norm": 1.0043254890192173, "learning_rate": 4.8793271463510825e-06, "loss": 0.4283, "step": 9874 }, { "epoch": 0.601345796669001, "grad_norm": 0.9747826952435772, "learning_rate": 4.8793026531082764e-06, "loss": 0.4836, "step": 9875 }, { "epoch": 0.6014066924458789, "grad_norm": 1.0229407548882612, "learning_rate": 4.8792781574414874e-06, "loss": 0.5035, "step": 9876 }, { "epoch": 0.6014675882227567, "grad_norm": 1.132232071806221, "learning_rate": 4.8792536593507374e-06, "loss": 0.381, "step": 9877 }, { "epoch": 0.6015284839996347, "grad_norm": 1.0226588847639697, "learning_rate": 4.879229158836055e-06, "loss": 0.4235, "step": 9878 }, { "epoch": 0.6015893797765125, "grad_norm": 0.981876945338781, "learning_rate": 4.879204655897463e-06, "loss": 0.451, "step": 9879 }, { "epoch": 0.6016502755533903, "grad_norm": 1.018489084216555, "learning_rate": 4.879180150534986e-06, "loss": 0.3749, "step": 9880 }, { "epoch": 0.6017111713302682, "grad_norm": 1.0420433237497555, "learning_rate": 4.8791556427486505e-06, "loss": 0.4167, "step": 9881 }, { "epoch": 0.6017720671071461, "grad_norm": 0.9918911372459923, "learning_rate": 4.87913113253848e-06, "loss": 0.4106, "step": 9882 }, { "epoch": 0.601832962884024, "grad_norm": 0.9422744452483528, "learning_rate": 4.8791066199045016e-06, "loss": 0.4572, "step": 9883 }, { "epoch": 0.6018938586609018, "grad_norm": 1.0343298378433774, "learning_rate": 4.879082104846737e-06, "loss": 0.3556, "step": 9884 }, { "epoch": 0.6019547544377798, "grad_norm": 1.0676502426066348, "learning_rate": 4.879057587365214e-06, "loss": 0.3644, "step": 9885 }, { "epoch": 0.6020156502146576, "grad_norm": 0.9149136181513977, "learning_rate": 4.879033067459956e-06, "loss": 0.5038, "step": 9886 }, { "epoch": 0.6020765459915355, "grad_norm": 1.0158592251955638, "learning_rate": 4.87900854513099e-06, "loss": 0.4011, "step": 9887 }, { "epoch": 0.6021374417684133, "grad_norm": 1.0435704872844849, "learning_rate": 4.878984020378338e-06, "loss": 0.4283, "step": 9888 }, { "epoch": 0.6021983375452913, "grad_norm": 1.0673256041146097, "learning_rate": 4.878959493202026e-06, "loss": 0.3733, "step": 9889 }, { "epoch": 0.6022592333221691, "grad_norm": 0.9951571856981565, "learning_rate": 4.878934963602082e-06, "loss": 0.4716, "step": 9890 }, { "epoch": 0.602320129099047, "grad_norm": 1.0097412258467693, "learning_rate": 4.878910431578527e-06, "loss": 0.4594, "step": 9891 }, { "epoch": 0.6023810248759248, "grad_norm": 0.9488863121071889, "learning_rate": 4.878885897131388e-06, "loss": 0.4502, "step": 9892 }, { "epoch": 0.6024419206528028, "grad_norm": 0.9307001002969415, "learning_rate": 4.878861360260688e-06, "loss": 0.5138, "step": 9893 }, { "epoch": 0.6025028164296806, "grad_norm": 1.0571478864201693, "learning_rate": 4.878836820966455e-06, "loss": 0.4831, "step": 9894 }, { "epoch": 0.6025637122065585, "grad_norm": 1.0404070534735195, "learning_rate": 4.8788122792487125e-06, "loss": 0.4249, "step": 9895 }, { "epoch": 0.6026246079834363, "grad_norm": 1.0201767354016187, "learning_rate": 4.878787735107485e-06, "loss": 0.4206, "step": 9896 }, { "epoch": 0.6026855037603143, "grad_norm": 1.0596430052044847, "learning_rate": 4.878763188542799e-06, "loss": 0.3673, "step": 9897 }, { "epoch": 0.6027463995371921, "grad_norm": 0.9941537123411395, "learning_rate": 4.8787386395546775e-06, "loss": 0.4288, "step": 9898 }, { "epoch": 0.60280729531407, "grad_norm": 0.978572159937003, "learning_rate": 4.878714088143146e-06, "loss": 0.4214, "step": 9899 }, { "epoch": 0.6028681910909478, "grad_norm": 0.9846366975793795, "learning_rate": 4.878689534308231e-06, "loss": 0.4662, "step": 9900 }, { "epoch": 0.6029290868678258, "grad_norm": 0.9490386021913603, "learning_rate": 4.8786649780499565e-06, "loss": 0.5227, "step": 9901 }, { "epoch": 0.6029899826447036, "grad_norm": 1.0283558982253838, "learning_rate": 4.878640419368347e-06, "loss": 0.4287, "step": 9902 }, { "epoch": 0.6030508784215814, "grad_norm": 1.0077145220006956, "learning_rate": 4.878615858263429e-06, "loss": 0.4195, "step": 9903 }, { "epoch": 0.6031117741984593, "grad_norm": 1.011581912955907, "learning_rate": 4.878591294735226e-06, "loss": 0.4132, "step": 9904 }, { "epoch": 0.6031726699753373, "grad_norm": 1.116288490451952, "learning_rate": 4.878566728783764e-06, "loss": 0.4283, "step": 9905 }, { "epoch": 0.6032335657522151, "grad_norm": 1.0589156367869947, "learning_rate": 4.878542160409067e-06, "loss": 0.4356, "step": 9906 }, { "epoch": 0.6032944615290929, "grad_norm": 0.9050795000833913, "learning_rate": 4.8785175896111615e-06, "loss": 0.4616, "step": 9907 }, { "epoch": 0.6033553573059708, "grad_norm": 1.1304641800447734, "learning_rate": 4.878493016390071e-06, "loss": 0.4249, "step": 9908 }, { "epoch": 0.6034162530828487, "grad_norm": 0.9975819757171518, "learning_rate": 4.878468440745822e-06, "loss": 0.4802, "step": 9909 }, { "epoch": 0.6034771488597266, "grad_norm": 1.0559966704601744, "learning_rate": 4.878443862678438e-06, "loss": 0.404, "step": 9910 }, { "epoch": 0.6035380446366044, "grad_norm": 0.999116558559276, "learning_rate": 4.878419282187946e-06, "loss": 0.4641, "step": 9911 }, { "epoch": 0.6035989404134823, "grad_norm": 1.0234395681634731, "learning_rate": 4.878394699274369e-06, "loss": 0.4085, "step": 9912 }, { "epoch": 0.6036598361903602, "grad_norm": 1.0624901541251919, "learning_rate": 4.8783701139377325e-06, "loss": 0.5284, "step": 9913 }, { "epoch": 0.6037207319672381, "grad_norm": 1.0938064610993294, "learning_rate": 4.878345526178063e-06, "loss": 0.4066, "step": 9914 }, { "epoch": 0.6037816277441159, "grad_norm": 1.0005131663555185, "learning_rate": 4.878320935995385e-06, "loss": 0.4189, "step": 9915 }, { "epoch": 0.6038425235209938, "grad_norm": 1.0194084699386263, "learning_rate": 4.878296343389721e-06, "loss": 0.4225, "step": 9916 }, { "epoch": 0.6039034192978717, "grad_norm": 1.034342999110174, "learning_rate": 4.878271748361099e-06, "loss": 0.3661, "step": 9917 }, { "epoch": 0.6039643150747496, "grad_norm": 0.9977054733055476, "learning_rate": 4.878247150909544e-06, "loss": 0.4968, "step": 9918 }, { "epoch": 0.6040252108516274, "grad_norm": 1.02195275003352, "learning_rate": 4.87822255103508e-06, "loss": 0.3751, "step": 9919 }, { "epoch": 0.6040861066285053, "grad_norm": 0.9905628768883296, "learning_rate": 4.878197948737732e-06, "loss": 0.3903, "step": 9920 }, { "epoch": 0.6041470024053832, "grad_norm": 1.110461206442431, "learning_rate": 4.878173344017525e-06, "loss": 0.4065, "step": 9921 }, { "epoch": 0.6042078981822611, "grad_norm": 1.0307457883505162, "learning_rate": 4.878148736874485e-06, "loss": 0.5041, "step": 9922 }, { "epoch": 0.6042687939591389, "grad_norm": 1.0587743470760775, "learning_rate": 4.878124127308637e-06, "loss": 0.4797, "step": 9923 }, { "epoch": 0.6043296897360169, "grad_norm": 1.0732128666499416, "learning_rate": 4.878099515320004e-06, "loss": 0.4841, "step": 9924 }, { "epoch": 0.6043905855128947, "grad_norm": 1.1502605397051695, "learning_rate": 4.878074900908614e-06, "loss": 0.4077, "step": 9925 }, { "epoch": 0.6044514812897726, "grad_norm": 1.0328232203253713, "learning_rate": 4.87805028407449e-06, "loss": 0.4516, "step": 9926 }, { "epoch": 0.6045123770666504, "grad_norm": 0.9724540826730335, "learning_rate": 4.878025664817658e-06, "loss": 0.4137, "step": 9927 }, { "epoch": 0.6045732728435284, "grad_norm": 1.0328594044184871, "learning_rate": 4.878001043138143e-06, "loss": 0.5014, "step": 9928 }, { "epoch": 0.6046341686204062, "grad_norm": 1.002254562999832, "learning_rate": 4.87797641903597e-06, "loss": 0.4557, "step": 9929 }, { "epoch": 0.604695064397284, "grad_norm": 1.115673329768141, "learning_rate": 4.877951792511164e-06, "loss": 0.433, "step": 9930 }, { "epoch": 0.6047559601741619, "grad_norm": 1.0038307293927342, "learning_rate": 4.87792716356375e-06, "loss": 0.4341, "step": 9931 }, { "epoch": 0.6048168559510398, "grad_norm": 1.017155122442959, "learning_rate": 4.877902532193754e-06, "loss": 0.477, "step": 9932 }, { "epoch": 0.6048777517279177, "grad_norm": 1.0406850711716804, "learning_rate": 4.8778778984012e-06, "loss": 0.4436, "step": 9933 }, { "epoch": 0.6049386475047955, "grad_norm": 1.0037549212515826, "learning_rate": 4.877853262186113e-06, "loss": 0.4457, "step": 9934 }, { "epoch": 0.6049995432816734, "grad_norm": 0.9117312882065775, "learning_rate": 4.877828623548519e-06, "loss": 0.5326, "step": 9935 }, { "epoch": 0.6050604390585513, "grad_norm": 1.0498564272566655, "learning_rate": 4.877803982488443e-06, "loss": 0.4456, "step": 9936 }, { "epoch": 0.6051213348354292, "grad_norm": 1.0768416162883125, "learning_rate": 4.877779339005909e-06, "loss": 0.4225, "step": 9937 }, { "epoch": 0.605182230612307, "grad_norm": 1.040279113583083, "learning_rate": 4.877754693100943e-06, "loss": 0.4607, "step": 9938 }, { "epoch": 0.6052431263891849, "grad_norm": 1.1227669345640523, "learning_rate": 4.87773004477357e-06, "loss": 0.3499, "step": 9939 }, { "epoch": 0.6053040221660628, "grad_norm": 1.0404127765082687, "learning_rate": 4.877705394023814e-06, "loss": 0.4422, "step": 9940 }, { "epoch": 0.6053649179429407, "grad_norm": 0.99368873298561, "learning_rate": 4.8776807408517026e-06, "loss": 0.4171, "step": 9941 }, { "epoch": 0.6054258137198185, "grad_norm": 1.1464573106875378, "learning_rate": 4.877656085257259e-06, "loss": 0.3906, "step": 9942 }, { "epoch": 0.6054867094966964, "grad_norm": 0.9341322085299187, "learning_rate": 4.877631427240509e-06, "loss": 0.4815, "step": 9943 }, { "epoch": 0.6055476052735743, "grad_norm": 1.0501389554629608, "learning_rate": 4.877606766801478e-06, "loss": 0.3746, "step": 9944 }, { "epoch": 0.6056085010504522, "grad_norm": 0.9724022128405656, "learning_rate": 4.877582103940189e-06, "loss": 0.3829, "step": 9945 }, { "epoch": 0.60566939682733, "grad_norm": 0.9521180017550209, "learning_rate": 4.8775574386566704e-06, "loss": 0.4526, "step": 9946 }, { "epoch": 0.6057302926042079, "grad_norm": 0.964839550382307, "learning_rate": 4.877532770950946e-06, "loss": 0.4029, "step": 9947 }, { "epoch": 0.6057911883810858, "grad_norm": 1.0334365117163657, "learning_rate": 4.877508100823039e-06, "loss": 0.4503, "step": 9948 }, { "epoch": 0.6058520841579637, "grad_norm": 0.9779227178436383, "learning_rate": 4.8774834282729775e-06, "loss": 0.4364, "step": 9949 }, { "epoch": 0.6059129799348415, "grad_norm": 1.0090851061210424, "learning_rate": 4.877458753300784e-06, "loss": 0.4217, "step": 9950 }, { "epoch": 0.6059738757117193, "grad_norm": 0.9609580864603836, "learning_rate": 4.877434075906486e-06, "loss": 0.5072, "step": 9951 }, { "epoch": 0.6060347714885973, "grad_norm": 1.1117573522054047, "learning_rate": 4.877409396090107e-06, "loss": 0.4369, "step": 9952 }, { "epoch": 0.6060956672654751, "grad_norm": 0.9530741355670725, "learning_rate": 4.877384713851674e-06, "loss": 0.4082, "step": 9953 }, { "epoch": 0.606156563042353, "grad_norm": 0.9087070382005531, "learning_rate": 4.877360029191209e-06, "loss": 0.4158, "step": 9954 }, { "epoch": 0.6062174588192308, "grad_norm": 1.0650291860280852, "learning_rate": 4.877335342108741e-06, "loss": 0.389, "step": 9955 }, { "epoch": 0.6062783545961088, "grad_norm": 0.9867702771872968, "learning_rate": 4.877310652604292e-06, "loss": 0.4277, "step": 9956 }, { "epoch": 0.6063392503729866, "grad_norm": 1.052579341688216, "learning_rate": 4.877285960677889e-06, "loss": 0.4838, "step": 9957 }, { "epoch": 0.6064001461498645, "grad_norm": 1.0537474002294454, "learning_rate": 4.877261266329556e-06, "loss": 0.5296, "step": 9958 }, { "epoch": 0.6064610419267423, "grad_norm": 1.108823351711405, "learning_rate": 4.877236569559318e-06, "loss": 0.4898, "step": 9959 }, { "epoch": 0.6065219377036203, "grad_norm": 0.9956379313357218, "learning_rate": 4.877211870367202e-06, "loss": 0.3792, "step": 9960 }, { "epoch": 0.6065828334804981, "grad_norm": 1.0537440221342018, "learning_rate": 4.877187168753231e-06, "loss": 0.4325, "step": 9961 }, { "epoch": 0.606643729257376, "grad_norm": 1.0889898783303684, "learning_rate": 4.877162464717432e-06, "loss": 0.477, "step": 9962 }, { "epoch": 0.6067046250342538, "grad_norm": 0.9874194498192986, "learning_rate": 4.8771377582598284e-06, "loss": 0.4421, "step": 9963 }, { "epoch": 0.6067655208111318, "grad_norm": 1.1093363624948709, "learning_rate": 4.877113049380446e-06, "loss": 0.3716, "step": 9964 }, { "epoch": 0.6068264165880096, "grad_norm": 0.9445859240526714, "learning_rate": 4.877088338079312e-06, "loss": 0.4583, "step": 9965 }, { "epoch": 0.6068873123648875, "grad_norm": 1.0619799162686079, "learning_rate": 4.877063624356448e-06, "loss": 0.4138, "step": 9966 }, { "epoch": 0.6069482081417654, "grad_norm": 0.984655852355331, "learning_rate": 4.877038908211882e-06, "loss": 0.4178, "step": 9967 }, { "epoch": 0.6070091039186433, "grad_norm": 1.041936494895896, "learning_rate": 4.877014189645639e-06, "loss": 0.4288, "step": 9968 }, { "epoch": 0.6070699996955211, "grad_norm": 1.0188135868117887, "learning_rate": 4.876989468657742e-06, "loss": 0.462, "step": 9969 }, { "epoch": 0.607130895472399, "grad_norm": 1.1041921256105993, "learning_rate": 4.8769647452482175e-06, "loss": 0.3749, "step": 9970 }, { "epoch": 0.6071917912492769, "grad_norm": 1.07216870451328, "learning_rate": 4.876940019417091e-06, "loss": 0.4151, "step": 9971 }, { "epoch": 0.6072526870261548, "grad_norm": 1.0419821295009157, "learning_rate": 4.876915291164388e-06, "loss": 0.4842, "step": 9972 }, { "epoch": 0.6073135828030326, "grad_norm": 1.0147577420018665, "learning_rate": 4.8768905604901326e-06, "loss": 0.4227, "step": 9973 }, { "epoch": 0.6073744785799104, "grad_norm": 0.9647188081354123, "learning_rate": 4.87686582739435e-06, "loss": 0.4467, "step": 9974 }, { "epoch": 0.6074353743567884, "grad_norm": 0.991285769955547, "learning_rate": 4.876841091877067e-06, "loss": 0.452, "step": 9975 }, { "epoch": 0.6074962701336662, "grad_norm": 1.001318224419885, "learning_rate": 4.8768163539383075e-06, "loss": 0.4685, "step": 9976 }, { "epoch": 0.6075571659105441, "grad_norm": 1.023810745815772, "learning_rate": 4.876791613578097e-06, "loss": 0.4029, "step": 9977 }, { "epoch": 0.6076180616874219, "grad_norm": 1.009034256706948, "learning_rate": 4.87676687079646e-06, "loss": 0.4174, "step": 9978 }, { "epoch": 0.6076789574642999, "grad_norm": 0.9579247339253645, "learning_rate": 4.8767421255934225e-06, "loss": 0.4231, "step": 9979 }, { "epoch": 0.6077398532411777, "grad_norm": 0.9872622859352331, "learning_rate": 4.876717377969009e-06, "loss": 0.4239, "step": 9980 }, { "epoch": 0.6078007490180556, "grad_norm": 1.0483182972482024, "learning_rate": 4.876692627923246e-06, "loss": 0.4328, "step": 9981 }, { "epoch": 0.6078616447949334, "grad_norm": 1.024677424110546, "learning_rate": 4.8766678754561584e-06, "loss": 0.3821, "step": 9982 }, { "epoch": 0.6079225405718114, "grad_norm": 1.0067440333422737, "learning_rate": 4.87664312056777e-06, "loss": 0.4827, "step": 9983 }, { "epoch": 0.6079834363486892, "grad_norm": 0.9643001946457717, "learning_rate": 4.876618363258108e-06, "loss": 0.4927, "step": 9984 }, { "epoch": 0.6080443321255671, "grad_norm": 1.0798266588326952, "learning_rate": 4.876593603527196e-06, "loss": 0.3689, "step": 9985 }, { "epoch": 0.6081052279024449, "grad_norm": 0.9876318093576433, "learning_rate": 4.876568841375059e-06, "loss": 0.4209, "step": 9986 }, { "epoch": 0.6081661236793229, "grad_norm": 0.9959468045978758, "learning_rate": 4.876544076801724e-06, "loss": 0.5039, "step": 9987 }, { "epoch": 0.6082270194562007, "grad_norm": 1.0075920800480112, "learning_rate": 4.876519309807216e-06, "loss": 0.4059, "step": 9988 }, { "epoch": 0.6082879152330786, "grad_norm": 1.0519466455320574, "learning_rate": 4.876494540391559e-06, "loss": 0.3946, "step": 9989 }, { "epoch": 0.6083488110099564, "grad_norm": 1.0643730475703035, "learning_rate": 4.876469768554778e-06, "loss": 0.4265, "step": 9990 }, { "epoch": 0.6084097067868344, "grad_norm": 0.9681130582564519, "learning_rate": 4.876444994296899e-06, "loss": 0.4491, "step": 9991 }, { "epoch": 0.6084706025637122, "grad_norm": 0.9684232495976419, "learning_rate": 4.876420217617949e-06, "loss": 0.4903, "step": 9992 }, { "epoch": 0.6085314983405901, "grad_norm": 1.0378996632904975, "learning_rate": 4.87639543851795e-06, "loss": 0.4519, "step": 9993 }, { "epoch": 0.6085923941174679, "grad_norm": 1.0272524595813448, "learning_rate": 4.876370656996929e-06, "loss": 0.4844, "step": 9994 }, { "epoch": 0.6086532898943459, "grad_norm": 1.0606580523047209, "learning_rate": 4.876345873054911e-06, "loss": 0.4852, "step": 9995 }, { "epoch": 0.6087141856712237, "grad_norm": 0.9153879153531528, "learning_rate": 4.876321086691921e-06, "loss": 0.456, "step": 9996 }, { "epoch": 0.6087750814481016, "grad_norm": 0.9685952517923018, "learning_rate": 4.876296297907985e-06, "loss": 0.4578, "step": 9997 }, { "epoch": 0.6088359772249794, "grad_norm": 1.0354785499121386, "learning_rate": 4.876271506703127e-06, "loss": 0.4665, "step": 9998 }, { "epoch": 0.6088968730018574, "grad_norm": 1.025802979190463, "learning_rate": 4.8762467130773734e-06, "loss": 0.4317, "step": 9999 }, { "epoch": 0.6089577687787352, "grad_norm": 0.996778409186113, "learning_rate": 4.87622191703075e-06, "loss": 0.3939, "step": 10000 }, { "epoch": 0.609018664555613, "grad_norm": 1.0595706094921649, "learning_rate": 4.876197118563279e-06, "loss": 0.4395, "step": 10001 }, { "epoch": 0.6090795603324909, "grad_norm": 1.0035431047908228, "learning_rate": 4.87617231767499e-06, "loss": 0.4295, "step": 10002 }, { "epoch": 0.6091404561093688, "grad_norm": 1.00204181389756, "learning_rate": 4.876147514365905e-06, "loss": 0.4758, "step": 10003 }, { "epoch": 0.6092013518862467, "grad_norm": 1.0035075445722785, "learning_rate": 4.87612270863605e-06, "loss": 0.3988, "step": 10004 }, { "epoch": 0.6092622476631245, "grad_norm": 0.9627635868945686, "learning_rate": 4.876097900485452e-06, "loss": 0.4338, "step": 10005 }, { "epoch": 0.6093231434400025, "grad_norm": 0.9369399507630256, "learning_rate": 4.876073089914133e-06, "loss": 0.4184, "step": 10006 }, { "epoch": 0.6093840392168803, "grad_norm": 1.0109302159230957, "learning_rate": 4.8760482769221205e-06, "loss": 0.4595, "step": 10007 }, { "epoch": 0.6094449349937582, "grad_norm": 1.0660449442259101, "learning_rate": 4.87602346150944e-06, "loss": 0.4349, "step": 10008 }, { "epoch": 0.609505830770636, "grad_norm": 0.9900742798187294, "learning_rate": 4.875998643676117e-06, "loss": 0.4168, "step": 10009 }, { "epoch": 0.609566726547514, "grad_norm": 1.1366397509620938, "learning_rate": 4.875973823422174e-06, "loss": 0.3712, "step": 10010 }, { "epoch": 0.6096276223243918, "grad_norm": 1.029892565611792, "learning_rate": 4.87594900074764e-06, "loss": 0.4192, "step": 10011 }, { "epoch": 0.6096885181012697, "grad_norm": 0.9709671623472146, "learning_rate": 4.8759241756525376e-06, "loss": 0.4498, "step": 10012 }, { "epoch": 0.6097494138781475, "grad_norm": 0.978026102417563, "learning_rate": 4.875899348136893e-06, "loss": 0.4201, "step": 10013 }, { "epoch": 0.6098103096550255, "grad_norm": 0.9314615303394074, "learning_rate": 4.875874518200732e-06, "loss": 0.4549, "step": 10014 }, { "epoch": 0.6098712054319033, "grad_norm": 1.1136188648887775, "learning_rate": 4.87584968584408e-06, "loss": 0.3929, "step": 10015 }, { "epoch": 0.6099321012087812, "grad_norm": 1.020421588789804, "learning_rate": 4.87582485106696e-06, "loss": 0.4027, "step": 10016 }, { "epoch": 0.609992996985659, "grad_norm": 0.9977077962103746, "learning_rate": 4.8758000138694005e-06, "loss": 0.4725, "step": 10017 }, { "epoch": 0.610053892762537, "grad_norm": 0.9631222526492104, "learning_rate": 4.875775174251425e-06, "loss": 0.4188, "step": 10018 }, { "epoch": 0.6101147885394148, "grad_norm": 0.9802633995619623, "learning_rate": 4.875750332213059e-06, "loss": 0.3978, "step": 10019 }, { "epoch": 0.6101756843162927, "grad_norm": 0.9890283779539129, "learning_rate": 4.875725487754328e-06, "loss": 0.3498, "step": 10020 }, { "epoch": 0.6102365800931705, "grad_norm": 1.0600155733968257, "learning_rate": 4.875700640875257e-06, "loss": 0.4017, "step": 10021 }, { "epoch": 0.6102974758700485, "grad_norm": 1.0802683982079877, "learning_rate": 4.875675791575872e-06, "loss": 0.4199, "step": 10022 }, { "epoch": 0.6103583716469263, "grad_norm": 1.0270983551134587, "learning_rate": 4.875650939856198e-06, "loss": 0.4667, "step": 10023 }, { "epoch": 0.6104192674238041, "grad_norm": 1.039611703972009, "learning_rate": 4.8756260857162596e-06, "loss": 0.3881, "step": 10024 }, { "epoch": 0.610480163200682, "grad_norm": 1.028038590583007, "learning_rate": 4.875601229156083e-06, "loss": 0.4249, "step": 10025 }, { "epoch": 0.61054105897756, "grad_norm": 0.9548039320266061, "learning_rate": 4.875576370175694e-06, "loss": 0.4809, "step": 10026 }, { "epoch": 0.6106019547544378, "grad_norm": 1.0002581375935269, "learning_rate": 4.875551508775116e-06, "loss": 0.4002, "step": 10027 }, { "epoch": 0.6106628505313156, "grad_norm": 1.0238403132325045, "learning_rate": 4.875526644954376e-06, "loss": 0.4288, "step": 10028 }, { "epoch": 0.6107237463081935, "grad_norm": 1.0776581319585352, "learning_rate": 4.8755017787134995e-06, "loss": 0.4265, "step": 10029 }, { "epoch": 0.6107846420850714, "grad_norm": 1.0670226453455811, "learning_rate": 4.87547691005251e-06, "loss": 0.4567, "step": 10030 }, { "epoch": 0.6108455378619493, "grad_norm": 1.0320613564647547, "learning_rate": 4.875452038971435e-06, "loss": 0.3807, "step": 10031 }, { "epoch": 0.6109064336388271, "grad_norm": 0.9875347235275463, "learning_rate": 4.875427165470298e-06, "loss": 0.4481, "step": 10032 }, { "epoch": 0.610967329415705, "grad_norm": 1.0448468527864547, "learning_rate": 4.875402289549126e-06, "loss": 0.3908, "step": 10033 }, { "epoch": 0.6110282251925829, "grad_norm": 1.0753979659547437, "learning_rate": 4.8753774112079435e-06, "loss": 0.3936, "step": 10034 }, { "epoch": 0.6110891209694608, "grad_norm": 0.9509959441475685, "learning_rate": 4.8753525304467755e-06, "loss": 0.4803, "step": 10035 }, { "epoch": 0.6111500167463386, "grad_norm": 0.8735506422493521, "learning_rate": 4.875327647265647e-06, "loss": 0.4551, "step": 10036 }, { "epoch": 0.6112109125232165, "grad_norm": 1.1154360210936556, "learning_rate": 4.875302761664585e-06, "loss": 0.3643, "step": 10037 }, { "epoch": 0.6112718083000944, "grad_norm": 0.9681198791102753, "learning_rate": 4.875277873643614e-06, "loss": 0.5004, "step": 10038 }, { "epoch": 0.6113327040769723, "grad_norm": 0.9961033750280378, "learning_rate": 4.875252983202759e-06, "loss": 0.3662, "step": 10039 }, { "epoch": 0.6113935998538501, "grad_norm": 1.0481751055576898, "learning_rate": 4.875228090342046e-06, "loss": 0.4463, "step": 10040 }, { "epoch": 0.611454495630728, "grad_norm": 1.0914321666511901, "learning_rate": 4.8752031950615e-06, "loss": 0.4054, "step": 10041 }, { "epoch": 0.6115153914076059, "grad_norm": 0.924880756758412, "learning_rate": 4.875178297361146e-06, "loss": 0.4176, "step": 10042 }, { "epoch": 0.6115762871844838, "grad_norm": 0.9578882479028762, "learning_rate": 4.8751533972410094e-06, "loss": 0.4199, "step": 10043 }, { "epoch": 0.6116371829613616, "grad_norm": 1.0938828556596811, "learning_rate": 4.875128494701117e-06, "loss": 0.4214, "step": 10044 }, { "epoch": 0.6116980787382394, "grad_norm": 1.039812640820652, "learning_rate": 4.875103589741491e-06, "loss": 0.4893, "step": 10045 }, { "epoch": 0.6117589745151174, "grad_norm": 0.9454298173310649, "learning_rate": 4.875078682362161e-06, "loss": 0.4761, "step": 10046 }, { "epoch": 0.6118198702919952, "grad_norm": 1.0734792520603356, "learning_rate": 4.875053772563149e-06, "loss": 0.4369, "step": 10047 }, { "epoch": 0.6118807660688731, "grad_norm": 0.9397893906939193, "learning_rate": 4.875028860344482e-06, "loss": 0.4132, "step": 10048 }, { "epoch": 0.611941661845751, "grad_norm": 1.0584948502992835, "learning_rate": 4.875003945706185e-06, "loss": 0.4549, "step": 10049 }, { "epoch": 0.6120025576226289, "grad_norm": 1.049462280629462, "learning_rate": 4.874979028648283e-06, "loss": 0.3985, "step": 10050 }, { "epoch": 0.6120634533995067, "grad_norm": 0.9811575494373137, "learning_rate": 4.874954109170803e-06, "loss": 0.3604, "step": 10051 }, { "epoch": 0.6121243491763846, "grad_norm": 0.992535628908721, "learning_rate": 4.8749291872737685e-06, "loss": 0.4438, "step": 10052 }, { "epoch": 0.6121852449532625, "grad_norm": 0.9699008616493864, "learning_rate": 4.874904262957205e-06, "loss": 0.4156, "step": 10053 }, { "epoch": 0.6122461407301404, "grad_norm": 1.0231140095268247, "learning_rate": 4.874879336221138e-06, "loss": 0.4888, "step": 10054 }, { "epoch": 0.6123070365070182, "grad_norm": 1.0103414427922108, "learning_rate": 4.874854407065594e-06, "loss": 0.4339, "step": 10055 }, { "epoch": 0.6123679322838961, "grad_norm": 1.0691774149078372, "learning_rate": 4.874829475490598e-06, "loss": 0.3805, "step": 10056 }, { "epoch": 0.612428828060774, "grad_norm": 1.0616583371320654, "learning_rate": 4.874804541496175e-06, "loss": 0.4091, "step": 10057 }, { "epoch": 0.6124897238376519, "grad_norm": 0.9593692988650551, "learning_rate": 4.87477960508235e-06, "loss": 0.458, "step": 10058 }, { "epoch": 0.6125506196145297, "grad_norm": 1.0621029012630139, "learning_rate": 4.87475466624915e-06, "loss": 0.4659, "step": 10059 }, { "epoch": 0.6126115153914076, "grad_norm": 1.0740759078186448, "learning_rate": 4.874729724996598e-06, "loss": 0.4414, "step": 10060 }, { "epoch": 0.6126724111682855, "grad_norm": 0.9364817114361618, "learning_rate": 4.874704781324721e-06, "loss": 0.4692, "step": 10061 }, { "epoch": 0.6127333069451634, "grad_norm": 0.9755960022745901, "learning_rate": 4.874679835233545e-06, "loss": 0.4405, "step": 10062 }, { "epoch": 0.6127942027220412, "grad_norm": 1.0242916144923782, "learning_rate": 4.8746548867230935e-06, "loss": 0.4179, "step": 10063 }, { "epoch": 0.6128550984989191, "grad_norm": 1.086806736099031, "learning_rate": 4.8746299357933935e-06, "loss": 0.3942, "step": 10064 }, { "epoch": 0.612915994275797, "grad_norm": 1.0324298670404959, "learning_rate": 4.87460498244447e-06, "loss": 0.4165, "step": 10065 }, { "epoch": 0.6129768900526749, "grad_norm": 1.0332740623588699, "learning_rate": 4.874580026676347e-06, "loss": 0.4115, "step": 10066 }, { "epoch": 0.6130377858295527, "grad_norm": 0.9867399171470187, "learning_rate": 4.874555068489053e-06, "loss": 0.4462, "step": 10067 }, { "epoch": 0.6130986816064306, "grad_norm": 0.9614339553658057, "learning_rate": 4.8745301078826114e-06, "loss": 0.4789, "step": 10068 }, { "epoch": 0.6131595773833085, "grad_norm": 0.997664545126744, "learning_rate": 4.874505144857047e-06, "loss": 0.4299, "step": 10069 }, { "epoch": 0.6132204731601864, "grad_norm": 0.9212100901442909, "learning_rate": 4.874480179412386e-06, "loss": 0.4581, "step": 10070 }, { "epoch": 0.6132813689370642, "grad_norm": 1.0492504627394215, "learning_rate": 4.874455211548655e-06, "loss": 0.3845, "step": 10071 }, { "epoch": 0.613342264713942, "grad_norm": 1.0904013692289443, "learning_rate": 4.874430241265879e-06, "loss": 0.3955, "step": 10072 }, { "epoch": 0.61340316049082, "grad_norm": 1.0744177487900513, "learning_rate": 4.874405268564081e-06, "loss": 0.3883, "step": 10073 }, { "epoch": 0.6134640562676978, "grad_norm": 1.0290481161326175, "learning_rate": 4.8743802934432895e-06, "loss": 0.5109, "step": 10074 }, { "epoch": 0.6135249520445757, "grad_norm": 0.9914545674595681, "learning_rate": 4.8743553159035284e-06, "loss": 0.4411, "step": 10075 }, { "epoch": 0.6135858478214535, "grad_norm": 1.1120497215527043, "learning_rate": 4.874330335944823e-06, "loss": 0.3788, "step": 10076 }, { "epoch": 0.6136467435983315, "grad_norm": 1.009505059990995, "learning_rate": 4.8743053535672e-06, "loss": 0.4904, "step": 10077 }, { "epoch": 0.6137076393752093, "grad_norm": 1.0229842927153605, "learning_rate": 4.874280368770683e-06, "loss": 0.4693, "step": 10078 }, { "epoch": 0.6137685351520872, "grad_norm": 1.04115085736904, "learning_rate": 4.8742553815552994e-06, "loss": 0.4316, "step": 10079 }, { "epoch": 0.613829430928965, "grad_norm": 0.984709925759965, "learning_rate": 4.8742303919210735e-06, "loss": 0.4664, "step": 10080 }, { "epoch": 0.613890326705843, "grad_norm": 1.0862216864356777, "learning_rate": 4.874205399868031e-06, "loss": 0.4109, "step": 10081 }, { "epoch": 0.6139512224827208, "grad_norm": 0.9923500436036335, "learning_rate": 4.874180405396198e-06, "loss": 0.4474, "step": 10082 }, { "epoch": 0.6140121182595987, "grad_norm": 0.9338152511680745, "learning_rate": 4.874155408505599e-06, "loss": 0.4671, "step": 10083 }, { "epoch": 0.6140730140364765, "grad_norm": 1.0068447946368406, "learning_rate": 4.874130409196259e-06, "loss": 0.4827, "step": 10084 }, { "epoch": 0.6141339098133545, "grad_norm": 1.0382673927565558, "learning_rate": 4.874105407468205e-06, "loss": 0.4482, "step": 10085 }, { "epoch": 0.6141948055902323, "grad_norm": 0.9497582300162176, "learning_rate": 4.874080403321462e-06, "loss": 0.4375, "step": 10086 }, { "epoch": 0.6142557013671102, "grad_norm": 0.8794633423939784, "learning_rate": 4.874055396756056e-06, "loss": 0.4551, "step": 10087 }, { "epoch": 0.6143165971439881, "grad_norm": 1.0739349221179213, "learning_rate": 4.87403038777201e-06, "loss": 0.3878, "step": 10088 }, { "epoch": 0.614377492920866, "grad_norm": 0.944228177764833, "learning_rate": 4.8740053763693515e-06, "loss": 0.3803, "step": 10089 }, { "epoch": 0.6144383886977438, "grad_norm": 0.9638620818117034, "learning_rate": 4.8739803625481065e-06, "loss": 0.4482, "step": 10090 }, { "epoch": 0.6144992844746217, "grad_norm": 0.9421651877479948, "learning_rate": 4.8739553463082995e-06, "loss": 0.4539, "step": 10091 }, { "epoch": 0.6145601802514996, "grad_norm": 0.9842412990793195, "learning_rate": 4.873930327649956e-06, "loss": 0.398, "step": 10092 }, { "epoch": 0.6146210760283775, "grad_norm": 1.0344889580471128, "learning_rate": 4.873905306573101e-06, "loss": 0.4146, "step": 10093 }, { "epoch": 0.6146819718052553, "grad_norm": 1.1582824343436218, "learning_rate": 4.873880283077762e-06, "loss": 0.4172, "step": 10094 }, { "epoch": 0.6147428675821331, "grad_norm": 1.0421477562140686, "learning_rate": 4.873855257163962e-06, "loss": 0.3523, "step": 10095 }, { "epoch": 0.6148037633590111, "grad_norm": 1.101787152544934, "learning_rate": 4.873830228831728e-06, "loss": 0.4064, "step": 10096 }, { "epoch": 0.614864659135889, "grad_norm": 1.0296360825479558, "learning_rate": 4.873805198081085e-06, "loss": 0.3991, "step": 10097 }, { "epoch": 0.6149255549127668, "grad_norm": 1.0758662847130374, "learning_rate": 4.873780164912058e-06, "loss": 0.4053, "step": 10098 }, { "epoch": 0.6149864506896446, "grad_norm": 1.061490405683345, "learning_rate": 4.8737551293246745e-06, "loss": 0.4016, "step": 10099 }, { "epoch": 0.6150473464665226, "grad_norm": 1.1088110037402774, "learning_rate": 4.8737300913189575e-06, "loss": 0.5159, "step": 10100 }, { "epoch": 0.6151082422434004, "grad_norm": 0.9503151942531283, "learning_rate": 4.873705050894934e-06, "loss": 0.4701, "step": 10101 }, { "epoch": 0.6151691380202783, "grad_norm": 0.9799644535407999, "learning_rate": 4.8736800080526295e-06, "loss": 0.4613, "step": 10102 }, { "epoch": 0.6152300337971561, "grad_norm": 1.064244694968557, "learning_rate": 4.873654962792069e-06, "loss": 0.4018, "step": 10103 }, { "epoch": 0.6152909295740341, "grad_norm": 1.0601721422870511, "learning_rate": 4.873629915113278e-06, "loss": 0.3977, "step": 10104 }, { "epoch": 0.6153518253509119, "grad_norm": 0.9846736553266396, "learning_rate": 4.873604865016282e-06, "loss": 0.4077, "step": 10105 }, { "epoch": 0.6154127211277898, "grad_norm": 0.9940763799695538, "learning_rate": 4.873579812501107e-06, "loss": 0.4156, "step": 10106 }, { "epoch": 0.6154736169046676, "grad_norm": 0.9619182355032825, "learning_rate": 4.873554757567778e-06, "loss": 0.4309, "step": 10107 }, { "epoch": 0.6155345126815456, "grad_norm": 1.0047438251519343, "learning_rate": 4.873529700216321e-06, "loss": 0.4288, "step": 10108 }, { "epoch": 0.6155954084584234, "grad_norm": 1.029435538357581, "learning_rate": 4.8735046404467615e-06, "loss": 0.4074, "step": 10109 }, { "epoch": 0.6156563042353013, "grad_norm": 0.9295633348294393, "learning_rate": 4.873479578259125e-06, "loss": 0.4514, "step": 10110 }, { "epoch": 0.6157172000121791, "grad_norm": 1.0195950448858315, "learning_rate": 4.8734545136534364e-06, "loss": 0.4321, "step": 10111 }, { "epoch": 0.6157780957890571, "grad_norm": 1.0329493219437111, "learning_rate": 4.873429446629721e-06, "loss": 0.4486, "step": 10112 }, { "epoch": 0.6158389915659349, "grad_norm": 1.0588248887124778, "learning_rate": 4.873404377188006e-06, "loss": 0.4283, "step": 10113 }, { "epoch": 0.6158998873428128, "grad_norm": 1.027970346149615, "learning_rate": 4.8733793053283155e-06, "loss": 0.482, "step": 10114 }, { "epoch": 0.6159607831196906, "grad_norm": 1.0058183138861274, "learning_rate": 4.873354231050676e-06, "loss": 0.4351, "step": 10115 }, { "epoch": 0.6160216788965686, "grad_norm": 1.0287588321371484, "learning_rate": 4.873329154355112e-06, "loss": 0.3792, "step": 10116 }, { "epoch": 0.6160825746734464, "grad_norm": 0.9942092373143694, "learning_rate": 4.873304075241649e-06, "loss": 0.4596, "step": 10117 }, { "epoch": 0.6161434704503242, "grad_norm": 0.9955053416729597, "learning_rate": 4.873278993710315e-06, "loss": 0.4579, "step": 10118 }, { "epoch": 0.6162043662272021, "grad_norm": 0.9561670634464088, "learning_rate": 4.873253909761132e-06, "loss": 0.4187, "step": 10119 }, { "epoch": 0.61626526200408, "grad_norm": 0.9475457246298538, "learning_rate": 4.873228823394128e-06, "loss": 0.4475, "step": 10120 }, { "epoch": 0.6163261577809579, "grad_norm": 0.9777161154858452, "learning_rate": 4.873203734609328e-06, "loss": 0.3686, "step": 10121 }, { "epoch": 0.6163870535578357, "grad_norm": 1.003818073770187, "learning_rate": 4.873178643406757e-06, "loss": 0.448, "step": 10122 }, { "epoch": 0.6164479493347136, "grad_norm": 1.0536437382140798, "learning_rate": 4.8731535497864414e-06, "loss": 0.3979, "step": 10123 }, { "epoch": 0.6165088451115915, "grad_norm": 1.0041398219702156, "learning_rate": 4.873128453748406e-06, "loss": 0.4281, "step": 10124 }, { "epoch": 0.6165697408884694, "grad_norm": 1.1387350098131235, "learning_rate": 4.8731033552926765e-06, "loss": 0.3837, "step": 10125 }, { "epoch": 0.6166306366653472, "grad_norm": 1.01587528395367, "learning_rate": 4.873078254419279e-06, "loss": 0.3816, "step": 10126 }, { "epoch": 0.6166915324422251, "grad_norm": 1.0467830154380897, "learning_rate": 4.873053151128238e-06, "loss": 0.4491, "step": 10127 }, { "epoch": 0.616752428219103, "grad_norm": 0.9774566723314404, "learning_rate": 4.873028045419581e-06, "loss": 0.4175, "step": 10128 }, { "epoch": 0.6168133239959809, "grad_norm": 0.9330009159815903, "learning_rate": 4.8730029372933315e-06, "loss": 0.4943, "step": 10129 }, { "epoch": 0.6168742197728587, "grad_norm": 0.9526000671629751, "learning_rate": 4.872977826749515e-06, "loss": 0.4155, "step": 10130 }, { "epoch": 0.6169351155497367, "grad_norm": 0.9985920990217747, "learning_rate": 4.8729527137881596e-06, "loss": 0.4241, "step": 10131 }, { "epoch": 0.6169960113266145, "grad_norm": 0.9416841660943072, "learning_rate": 4.87292759840929e-06, "loss": 0.4659, "step": 10132 }, { "epoch": 0.6170569071034924, "grad_norm": 0.9830072387337656, "learning_rate": 4.872902480612929e-06, "loss": 0.3972, "step": 10133 }, { "epoch": 0.6171178028803702, "grad_norm": 0.948112311128051, "learning_rate": 4.872877360399105e-06, "loss": 0.4607, "step": 10134 }, { "epoch": 0.6171786986572482, "grad_norm": 0.9753997728334182, "learning_rate": 4.872852237767844e-06, "loss": 0.4862, "step": 10135 }, { "epoch": 0.617239594434126, "grad_norm": 0.9724469783385365, "learning_rate": 4.872827112719169e-06, "loss": 0.4673, "step": 10136 }, { "epoch": 0.6173004902110039, "grad_norm": 0.9993898395978047, "learning_rate": 4.872801985253107e-06, "loss": 0.4388, "step": 10137 }, { "epoch": 0.6173613859878817, "grad_norm": 1.052562968727812, "learning_rate": 4.872776855369685e-06, "loss": 0.4206, "step": 10138 }, { "epoch": 0.6174222817647597, "grad_norm": 0.9907245099210927, "learning_rate": 4.872751723068926e-06, "loss": 0.4192, "step": 10139 }, { "epoch": 0.6174831775416375, "grad_norm": 1.0412712132563169, "learning_rate": 4.872726588350858e-06, "loss": 0.4534, "step": 10140 }, { "epoch": 0.6175440733185154, "grad_norm": 1.1324079402341651, "learning_rate": 4.872701451215505e-06, "loss": 0.398, "step": 10141 }, { "epoch": 0.6176049690953932, "grad_norm": 1.0984948457551813, "learning_rate": 4.872676311662893e-06, "loss": 0.4469, "step": 10142 }, { "epoch": 0.6176658648722712, "grad_norm": 0.9535034469423387, "learning_rate": 4.872651169693048e-06, "loss": 0.5592, "step": 10143 }, { "epoch": 0.617726760649149, "grad_norm": 1.0112075085106746, "learning_rate": 4.8726260253059945e-06, "loss": 0.4471, "step": 10144 }, { "epoch": 0.6177876564260268, "grad_norm": 0.9957773759064715, "learning_rate": 4.87260087850176e-06, "loss": 0.4098, "step": 10145 }, { "epoch": 0.6178485522029047, "grad_norm": 1.1193761101411752, "learning_rate": 4.872575729280368e-06, "loss": 0.4167, "step": 10146 }, { "epoch": 0.6179094479797826, "grad_norm": 1.1094699653767133, "learning_rate": 4.8725505776418455e-06, "loss": 0.3988, "step": 10147 }, { "epoch": 0.6179703437566605, "grad_norm": 1.0493418904856562, "learning_rate": 4.872525423586219e-06, "loss": 0.4754, "step": 10148 }, { "epoch": 0.6180312395335383, "grad_norm": 1.0175700712064546, "learning_rate": 4.8725002671135115e-06, "loss": 0.4842, "step": 10149 }, { "epoch": 0.6180921353104162, "grad_norm": 1.0201002402610972, "learning_rate": 4.87247510822375e-06, "loss": 0.4417, "step": 10150 }, { "epoch": 0.6181530310872941, "grad_norm": 1.0614145487723157, "learning_rate": 4.8724499469169604e-06, "loss": 0.4043, "step": 10151 }, { "epoch": 0.618213926864172, "grad_norm": 1.0579796421329977, "learning_rate": 4.872424783193168e-06, "loss": 0.3653, "step": 10152 }, { "epoch": 0.6182748226410498, "grad_norm": 1.037405647140387, "learning_rate": 4.8723996170524e-06, "loss": 0.4532, "step": 10153 }, { "epoch": 0.6183357184179277, "grad_norm": 1.036525704747745, "learning_rate": 4.872374448494679e-06, "loss": 0.4008, "step": 10154 }, { "epoch": 0.6183966141948056, "grad_norm": 0.9761851675705131, "learning_rate": 4.872349277520033e-06, "loss": 0.4572, "step": 10155 }, { "epoch": 0.6184575099716835, "grad_norm": 1.1042169548941942, "learning_rate": 4.8723241041284865e-06, "loss": 0.4536, "step": 10156 }, { "epoch": 0.6185184057485613, "grad_norm": 0.9826540417354842, "learning_rate": 4.872298928320066e-06, "loss": 0.4335, "step": 10157 }, { "epoch": 0.6185793015254392, "grad_norm": 0.9836212724974163, "learning_rate": 4.8722737500947955e-06, "loss": 0.4535, "step": 10158 }, { "epoch": 0.6186401973023171, "grad_norm": 1.002854819084662, "learning_rate": 4.8722485694527036e-06, "loss": 0.4283, "step": 10159 }, { "epoch": 0.618701093079195, "grad_norm": 0.9758789462510689, "learning_rate": 4.872223386393813e-06, "loss": 0.4658, "step": 10160 }, { "epoch": 0.6187619888560728, "grad_norm": 1.0396513198355128, "learning_rate": 4.872198200918151e-06, "loss": 0.4733, "step": 10161 }, { "epoch": 0.6188228846329507, "grad_norm": 1.008726053040934, "learning_rate": 4.872173013025742e-06, "loss": 0.4356, "step": 10162 }, { "epoch": 0.6188837804098286, "grad_norm": 0.9817788412365361, "learning_rate": 4.872147822716613e-06, "loss": 0.385, "step": 10163 }, { "epoch": 0.6189446761867065, "grad_norm": 1.058989083270772, "learning_rate": 4.87212262999079e-06, "loss": 0.3996, "step": 10164 }, { "epoch": 0.6190055719635843, "grad_norm": 0.9519357692832644, "learning_rate": 4.872097434848296e-06, "loss": 0.4207, "step": 10165 }, { "epoch": 0.6190664677404621, "grad_norm": 1.1275994939525418, "learning_rate": 4.8720722372891596e-06, "loss": 0.3509, "step": 10166 }, { "epoch": 0.6191273635173401, "grad_norm": 0.9983710531618606, "learning_rate": 4.872047037313405e-06, "loss": 0.3858, "step": 10167 }, { "epoch": 0.619188259294218, "grad_norm": 0.99480025893478, "learning_rate": 4.872021834921059e-06, "loss": 0.449, "step": 10168 }, { "epoch": 0.6192491550710958, "grad_norm": 1.151367702590325, "learning_rate": 4.8719966301121454e-06, "loss": 0.4045, "step": 10169 }, { "epoch": 0.6193100508479737, "grad_norm": 0.9313437442836764, "learning_rate": 4.871971422886691e-06, "loss": 0.4461, "step": 10170 }, { "epoch": 0.6193709466248516, "grad_norm": 0.9447881511592159, "learning_rate": 4.871946213244721e-06, "loss": 0.4468, "step": 10171 }, { "epoch": 0.6194318424017294, "grad_norm": 1.073973391771461, "learning_rate": 4.871921001186263e-06, "loss": 0.4179, "step": 10172 }, { "epoch": 0.6194927381786073, "grad_norm": 0.9950331398771944, "learning_rate": 4.8718957867113404e-06, "loss": 0.4517, "step": 10173 }, { "epoch": 0.6195536339554852, "grad_norm": 1.0266428531164578, "learning_rate": 4.87187056981998e-06, "loss": 0.4667, "step": 10174 }, { "epoch": 0.6196145297323631, "grad_norm": 0.9751882751514573, "learning_rate": 4.871845350512207e-06, "loss": 0.3926, "step": 10175 }, { "epoch": 0.6196754255092409, "grad_norm": 1.151709148298338, "learning_rate": 4.871820128788047e-06, "loss": 0.4337, "step": 10176 }, { "epoch": 0.6197363212861188, "grad_norm": 1.0439446276229345, "learning_rate": 4.871794904647526e-06, "loss": 0.4332, "step": 10177 }, { "epoch": 0.6197972170629967, "grad_norm": 1.0336091349664442, "learning_rate": 4.87176967809067e-06, "loss": 0.3652, "step": 10178 }, { "epoch": 0.6198581128398746, "grad_norm": 1.0299520468431214, "learning_rate": 4.871744449117504e-06, "loss": 0.4288, "step": 10179 }, { "epoch": 0.6199190086167524, "grad_norm": 1.068287550377862, "learning_rate": 4.8717192177280545e-06, "loss": 0.4021, "step": 10180 }, { "epoch": 0.6199799043936303, "grad_norm": 1.0984712545475015, "learning_rate": 4.871693983922346e-06, "loss": 0.4151, "step": 10181 }, { "epoch": 0.6200408001705082, "grad_norm": 1.0249343077239341, "learning_rate": 4.871668747700405e-06, "loss": 0.4659, "step": 10182 }, { "epoch": 0.6201016959473861, "grad_norm": 1.0480632880346734, "learning_rate": 4.871643509062258e-06, "loss": 0.4222, "step": 10183 }, { "epoch": 0.6201625917242639, "grad_norm": 1.105342234253779, "learning_rate": 4.87161826800793e-06, "loss": 0.4258, "step": 10184 }, { "epoch": 0.6202234875011418, "grad_norm": 1.0458980069737756, "learning_rate": 4.871593024537446e-06, "loss": 0.4147, "step": 10185 }, { "epoch": 0.6202843832780197, "grad_norm": 1.0270929650022815, "learning_rate": 4.871567778650833e-06, "loss": 0.4134, "step": 10186 }, { "epoch": 0.6203452790548976, "grad_norm": 0.9877400695372828, "learning_rate": 4.871542530348115e-06, "loss": 0.4757, "step": 10187 }, { "epoch": 0.6204061748317754, "grad_norm": 1.00355057179203, "learning_rate": 4.87151727962932e-06, "loss": 0.386, "step": 10188 }, { "epoch": 0.6204670706086532, "grad_norm": 1.0494436550119168, "learning_rate": 4.871492026494471e-06, "loss": 0.3719, "step": 10189 }, { "epoch": 0.6205279663855312, "grad_norm": 0.979873423603282, "learning_rate": 4.871466770943597e-06, "loss": 0.3948, "step": 10190 }, { "epoch": 0.620588862162409, "grad_norm": 0.9683075108880842, "learning_rate": 4.871441512976721e-06, "loss": 0.393, "step": 10191 }, { "epoch": 0.6206497579392869, "grad_norm": 0.9217320149346065, "learning_rate": 4.871416252593869e-06, "loss": 0.4902, "step": 10192 }, { "epoch": 0.6207106537161647, "grad_norm": 1.0128653694934038, "learning_rate": 4.871390989795068e-06, "loss": 0.4292, "step": 10193 }, { "epoch": 0.6207715494930427, "grad_norm": 0.9799055398199474, "learning_rate": 4.871365724580344e-06, "loss": 0.4051, "step": 10194 }, { "epoch": 0.6208324452699205, "grad_norm": 1.1385211136001516, "learning_rate": 4.871340456949721e-06, "loss": 0.3952, "step": 10195 }, { "epoch": 0.6208933410467984, "grad_norm": 1.007520135784371, "learning_rate": 4.871315186903226e-06, "loss": 0.4285, "step": 10196 }, { "epoch": 0.6209542368236762, "grad_norm": 0.9138425440341805, "learning_rate": 4.871289914440884e-06, "loss": 0.4833, "step": 10197 }, { "epoch": 0.6210151326005542, "grad_norm": 0.9629418320518208, "learning_rate": 4.871264639562722e-06, "loss": 0.4415, "step": 10198 }, { "epoch": 0.621076028377432, "grad_norm": 1.0611352518464539, "learning_rate": 4.871239362268764e-06, "loss": 0.3795, "step": 10199 }, { "epoch": 0.6211369241543099, "grad_norm": 1.0873942479632914, "learning_rate": 4.871214082559037e-06, "loss": 0.4016, "step": 10200 }, { "epoch": 0.6211978199311877, "grad_norm": 0.9391409503612073, "learning_rate": 4.871188800433566e-06, "loss": 0.4873, "step": 10201 }, { "epoch": 0.6212587157080657, "grad_norm": 0.9164751299207241, "learning_rate": 4.871163515892378e-06, "loss": 0.439, "step": 10202 }, { "epoch": 0.6213196114849435, "grad_norm": 1.116679853319958, "learning_rate": 4.871138228935497e-06, "loss": 0.4239, "step": 10203 }, { "epoch": 0.6213805072618214, "grad_norm": 1.0495819152560872, "learning_rate": 4.871112939562949e-06, "loss": 0.4744, "step": 10204 }, { "epoch": 0.6214414030386992, "grad_norm": 0.9317719735277602, "learning_rate": 4.871087647774762e-06, "loss": 0.5191, "step": 10205 }, { "epoch": 0.6215022988155772, "grad_norm": 0.9879788377795928, "learning_rate": 4.87106235357096e-06, "loss": 0.3991, "step": 10206 }, { "epoch": 0.621563194592455, "grad_norm": 0.9315443922842102, "learning_rate": 4.871037056951569e-06, "loss": 0.4287, "step": 10207 }, { "epoch": 0.6216240903693329, "grad_norm": 1.062690701780089, "learning_rate": 4.871011757916614e-06, "loss": 0.42, "step": 10208 }, { "epoch": 0.6216849861462107, "grad_norm": 0.9520656158989858, "learning_rate": 4.870986456466121e-06, "loss": 0.4453, "step": 10209 }, { "epoch": 0.6217458819230887, "grad_norm": 1.0328140227420068, "learning_rate": 4.870961152600118e-06, "loss": 0.4748, "step": 10210 }, { "epoch": 0.6218067776999665, "grad_norm": 0.9195233331484847, "learning_rate": 4.8709358463186276e-06, "loss": 0.5052, "step": 10211 }, { "epoch": 0.6218676734768444, "grad_norm": 0.9730935464393338, "learning_rate": 4.870910537621678e-06, "loss": 0.404, "step": 10212 }, { "epoch": 0.6219285692537223, "grad_norm": 0.9257754802854459, "learning_rate": 4.870885226509294e-06, "loss": 0.5058, "step": 10213 }, { "epoch": 0.6219894650306002, "grad_norm": 0.9757669905970598, "learning_rate": 4.870859912981501e-06, "loss": 0.5286, "step": 10214 }, { "epoch": 0.622050360807478, "grad_norm": 0.9746952052177718, "learning_rate": 4.870834597038325e-06, "loss": 0.4992, "step": 10215 }, { "epoch": 0.6221112565843558, "grad_norm": 1.050179048518058, "learning_rate": 4.870809278679793e-06, "loss": 0.4061, "step": 10216 }, { "epoch": 0.6221721523612338, "grad_norm": 1.0479782582143147, "learning_rate": 4.870783957905929e-06, "loss": 0.4537, "step": 10217 }, { "epoch": 0.6222330481381116, "grad_norm": 0.9428512621689263, "learning_rate": 4.87075863471676e-06, "loss": 0.4475, "step": 10218 }, { "epoch": 0.6222939439149895, "grad_norm": 0.9549469471556176, "learning_rate": 4.870733309112311e-06, "loss": 0.4361, "step": 10219 }, { "epoch": 0.6223548396918673, "grad_norm": 0.9579530958562986, "learning_rate": 4.8707079810926085e-06, "loss": 0.4781, "step": 10220 }, { "epoch": 0.6224157354687453, "grad_norm": 0.9604557277638592, "learning_rate": 4.870682650657678e-06, "loss": 0.4402, "step": 10221 }, { "epoch": 0.6224766312456231, "grad_norm": 0.9893430320393144, "learning_rate": 4.870657317807544e-06, "loss": 0.4995, "step": 10222 }, { "epoch": 0.622537527022501, "grad_norm": 1.0076701405567001, "learning_rate": 4.8706319825422355e-06, "loss": 0.4437, "step": 10223 }, { "epoch": 0.6225984227993788, "grad_norm": 1.0207206653356906, "learning_rate": 4.870606644861776e-06, "loss": 0.3947, "step": 10224 }, { "epoch": 0.6226593185762568, "grad_norm": 1.027771891943847, "learning_rate": 4.870581304766191e-06, "loss": 0.3538, "step": 10225 }, { "epoch": 0.6227202143531346, "grad_norm": 0.9833114843657974, "learning_rate": 4.870555962255508e-06, "loss": 0.4001, "step": 10226 }, { "epoch": 0.6227811101300125, "grad_norm": 0.9434760067518807, "learning_rate": 4.8705306173297506e-06, "loss": 0.4453, "step": 10227 }, { "epoch": 0.6228420059068903, "grad_norm": 1.0214021321568811, "learning_rate": 4.870505269988946e-06, "loss": 0.4316, "step": 10228 }, { "epoch": 0.6229029016837683, "grad_norm": 1.0158314556289965, "learning_rate": 4.870479920233121e-06, "loss": 0.4656, "step": 10229 }, { "epoch": 0.6229637974606461, "grad_norm": 1.0314236567344308, "learning_rate": 4.870454568062301e-06, "loss": 0.3562, "step": 10230 }, { "epoch": 0.623024693237524, "grad_norm": 0.9743029154379005, "learning_rate": 4.870429213476509e-06, "loss": 0.4587, "step": 10231 }, { "epoch": 0.6230855890144018, "grad_norm": 0.9774068393706826, "learning_rate": 4.870403856475775e-06, "loss": 0.4373, "step": 10232 }, { "epoch": 0.6231464847912798, "grad_norm": 0.9550055783674218, "learning_rate": 4.870378497060121e-06, "loss": 0.4011, "step": 10233 }, { "epoch": 0.6232073805681576, "grad_norm": 0.9659876273849167, "learning_rate": 4.8703531352295755e-06, "loss": 0.5239, "step": 10234 }, { "epoch": 0.6232682763450355, "grad_norm": 1.0676658025678, "learning_rate": 4.870327770984164e-06, "loss": 0.4977, "step": 10235 }, { "epoch": 0.6233291721219133, "grad_norm": 0.9171623772728816, "learning_rate": 4.870302404323911e-06, "loss": 0.4482, "step": 10236 }, { "epoch": 0.6233900678987913, "grad_norm": 0.9696636547616743, "learning_rate": 4.8702770352488435e-06, "loss": 0.5066, "step": 10237 }, { "epoch": 0.6234509636756691, "grad_norm": 0.9934457574819626, "learning_rate": 4.8702516637589876e-06, "loss": 0.5183, "step": 10238 }, { "epoch": 0.623511859452547, "grad_norm": 1.0021215192152966, "learning_rate": 4.870226289854369e-06, "loss": 0.4934, "step": 10239 }, { "epoch": 0.6235727552294248, "grad_norm": 0.9223575131994084, "learning_rate": 4.870200913535011e-06, "loss": 0.4329, "step": 10240 }, { "epoch": 0.6236336510063027, "grad_norm": 0.9099486804507819, "learning_rate": 4.8701755348009424e-06, "loss": 0.4787, "step": 10241 }, { "epoch": 0.6236945467831806, "grad_norm": 0.9587630479725446, "learning_rate": 4.8701501536521894e-06, "loss": 0.4084, "step": 10242 }, { "epoch": 0.6237554425600584, "grad_norm": 0.9747359874343962, "learning_rate": 4.870124770088776e-06, "loss": 0.4918, "step": 10243 }, { "epoch": 0.6238163383369363, "grad_norm": 0.9779551046521395, "learning_rate": 4.870099384110729e-06, "loss": 0.4768, "step": 10244 }, { "epoch": 0.6238772341138142, "grad_norm": 0.9904580893853915, "learning_rate": 4.870073995718073e-06, "loss": 0.4601, "step": 10245 }, { "epoch": 0.6239381298906921, "grad_norm": 0.9639910568367576, "learning_rate": 4.870048604910836e-06, "loss": 0.4697, "step": 10246 }, { "epoch": 0.6239990256675699, "grad_norm": 1.0975303730449235, "learning_rate": 4.870023211689042e-06, "loss": 0.5234, "step": 10247 }, { "epoch": 0.6240599214444478, "grad_norm": 0.9660923247202844, "learning_rate": 4.869997816052718e-06, "loss": 0.436, "step": 10248 }, { "epoch": 0.6241208172213257, "grad_norm": 0.9287138759937723, "learning_rate": 4.86997241800189e-06, "loss": 0.4857, "step": 10249 }, { "epoch": 0.6241817129982036, "grad_norm": 0.9902220226574756, "learning_rate": 4.869947017536583e-06, "loss": 0.4295, "step": 10250 }, { "epoch": 0.6242426087750814, "grad_norm": 0.9490800038641688, "learning_rate": 4.869921614656824e-06, "loss": 0.4602, "step": 10251 }, { "epoch": 0.6243035045519594, "grad_norm": 0.9879425657167527, "learning_rate": 4.869896209362637e-06, "loss": 0.406, "step": 10252 }, { "epoch": 0.6243644003288372, "grad_norm": 1.0035636207884144, "learning_rate": 4.869870801654048e-06, "loss": 0.3914, "step": 10253 }, { "epoch": 0.6244252961057151, "grad_norm": 1.04346727408117, "learning_rate": 4.869845391531086e-06, "loss": 0.4322, "step": 10254 }, { "epoch": 0.6244861918825929, "grad_norm": 0.947991406251578, "learning_rate": 4.8698199789937736e-06, "loss": 0.4956, "step": 10255 }, { "epoch": 0.6245470876594709, "grad_norm": 0.9961977500162436, "learning_rate": 4.869794564042139e-06, "loss": 0.5447, "step": 10256 }, { "epoch": 0.6246079834363487, "grad_norm": 1.0437947154672151, "learning_rate": 4.869769146676206e-06, "loss": 0.4383, "step": 10257 }, { "epoch": 0.6246688792132266, "grad_norm": 0.9632471355296727, "learning_rate": 4.869743726896002e-06, "loss": 0.4195, "step": 10258 }, { "epoch": 0.6247297749901044, "grad_norm": 0.9913914300792533, "learning_rate": 4.869718304701552e-06, "loss": 0.4065, "step": 10259 }, { "epoch": 0.6247906707669824, "grad_norm": 1.0888486043934331, "learning_rate": 4.869692880092882e-06, "loss": 0.4212, "step": 10260 }, { "epoch": 0.6248515665438602, "grad_norm": 0.9593228423039306, "learning_rate": 4.869667453070018e-06, "loss": 0.4396, "step": 10261 }, { "epoch": 0.624912462320738, "grad_norm": 0.9419087699230605, "learning_rate": 4.869642023632987e-06, "loss": 0.4439, "step": 10262 }, { "epoch": 0.6249733580976159, "grad_norm": 1.0163031975885237, "learning_rate": 4.869616591781814e-06, "loss": 0.4586, "step": 10263 }, { "epoch": 0.6250342538744939, "grad_norm": 0.9843515307108532, "learning_rate": 4.869591157516525e-06, "loss": 0.4386, "step": 10264 }, { "epoch": 0.6250951496513717, "grad_norm": 0.9610888942925475, "learning_rate": 4.869565720837144e-06, "loss": 0.4866, "step": 10265 }, { "epoch": 0.6251560454282495, "grad_norm": 1.109179210995592, "learning_rate": 4.8695402817437e-06, "loss": 0.4608, "step": 10266 }, { "epoch": 0.6252169412051274, "grad_norm": 1.045382667123221, "learning_rate": 4.869514840236218e-06, "loss": 0.4292, "step": 10267 }, { "epoch": 0.6252778369820053, "grad_norm": 1.0915773591237599, "learning_rate": 4.8694893963147225e-06, "loss": 0.4518, "step": 10268 }, { "epoch": 0.6253387327588832, "grad_norm": 0.9804062527928352, "learning_rate": 4.869463949979241e-06, "loss": 0.4579, "step": 10269 }, { "epoch": 0.625399628535761, "grad_norm": 1.0000678037297366, "learning_rate": 4.869438501229799e-06, "loss": 0.4024, "step": 10270 }, { "epoch": 0.6254605243126389, "grad_norm": 1.0001661301373619, "learning_rate": 4.869413050066423e-06, "loss": 0.4782, "step": 10271 }, { "epoch": 0.6255214200895168, "grad_norm": 0.989490962893769, "learning_rate": 4.869387596489137e-06, "loss": 0.4077, "step": 10272 }, { "epoch": 0.6255823158663947, "grad_norm": 0.9883459892486186, "learning_rate": 4.869362140497969e-06, "loss": 0.3723, "step": 10273 }, { "epoch": 0.6256432116432725, "grad_norm": 0.9185953526426247, "learning_rate": 4.869336682092943e-06, "loss": 0.4539, "step": 10274 }, { "epoch": 0.6257041074201504, "grad_norm": 1.0940991371187578, "learning_rate": 4.869311221274087e-06, "loss": 0.4198, "step": 10275 }, { "epoch": 0.6257650031970283, "grad_norm": 1.070659659711464, "learning_rate": 4.869285758041426e-06, "loss": 0.4208, "step": 10276 }, { "epoch": 0.6258258989739062, "grad_norm": 1.0018811883136127, "learning_rate": 4.869260292394986e-06, "loss": 0.3889, "step": 10277 }, { "epoch": 0.625886794750784, "grad_norm": 1.0242161628361364, "learning_rate": 4.869234824334792e-06, "loss": 0.428, "step": 10278 }, { "epoch": 0.6259476905276619, "grad_norm": 0.9871521558900025, "learning_rate": 4.869209353860872e-06, "loss": 0.4536, "step": 10279 }, { "epoch": 0.6260085863045398, "grad_norm": 0.9864308230038217, "learning_rate": 4.86918388097325e-06, "loss": 0.4628, "step": 10280 }, { "epoch": 0.6260694820814177, "grad_norm": 0.9341239402486599, "learning_rate": 4.8691584056719535e-06, "loss": 0.4378, "step": 10281 }, { "epoch": 0.6261303778582955, "grad_norm": 1.0443684876976718, "learning_rate": 4.869132927957007e-06, "loss": 0.4137, "step": 10282 }, { "epoch": 0.6261912736351734, "grad_norm": 1.0371239589031196, "learning_rate": 4.8691074478284365e-06, "loss": 0.4683, "step": 10283 }, { "epoch": 0.6262521694120513, "grad_norm": 0.9333097659616004, "learning_rate": 4.869081965286269e-06, "loss": 0.4453, "step": 10284 }, { "epoch": 0.6263130651889292, "grad_norm": 0.9486207393755195, "learning_rate": 4.869056480330531e-06, "loss": 0.412, "step": 10285 }, { "epoch": 0.626373960965807, "grad_norm": 0.9527330179298661, "learning_rate": 4.869030992961247e-06, "loss": 0.3832, "step": 10286 }, { "epoch": 0.6264348567426848, "grad_norm": 1.0416300369126414, "learning_rate": 4.869005503178443e-06, "loss": 0.3885, "step": 10287 }, { "epoch": 0.6264957525195628, "grad_norm": 0.9812553787637386, "learning_rate": 4.868980010982146e-06, "loss": 0.447, "step": 10288 }, { "epoch": 0.6265566482964406, "grad_norm": 1.0124282736701922, "learning_rate": 4.868954516372381e-06, "loss": 0.4505, "step": 10289 }, { "epoch": 0.6266175440733185, "grad_norm": 1.0591310245181251, "learning_rate": 4.8689290193491745e-06, "loss": 0.4731, "step": 10290 }, { "epoch": 0.6266784398501963, "grad_norm": 1.0027442487594191, "learning_rate": 4.8689035199125525e-06, "loss": 0.4587, "step": 10291 }, { "epoch": 0.6267393356270743, "grad_norm": 1.0351641140019445, "learning_rate": 4.868878018062541e-06, "loss": 0.4738, "step": 10292 }, { "epoch": 0.6268002314039521, "grad_norm": 1.0343268976316977, "learning_rate": 4.868852513799166e-06, "loss": 0.4475, "step": 10293 }, { "epoch": 0.62686112718083, "grad_norm": 1.0051988451878833, "learning_rate": 4.868827007122452e-06, "loss": 0.4857, "step": 10294 }, { "epoch": 0.6269220229577079, "grad_norm": 1.0304327906116784, "learning_rate": 4.868801498032428e-06, "loss": 0.5078, "step": 10295 }, { "epoch": 0.6269829187345858, "grad_norm": 1.163886925992294, "learning_rate": 4.8687759865291176e-06, "loss": 0.3962, "step": 10296 }, { "epoch": 0.6270438145114636, "grad_norm": 0.8808721547796513, "learning_rate": 4.868750472612547e-06, "loss": 0.4407, "step": 10297 }, { "epoch": 0.6271047102883415, "grad_norm": 1.052021619623573, "learning_rate": 4.868724956282743e-06, "loss": 0.3919, "step": 10298 }, { "epoch": 0.6271656060652194, "grad_norm": 1.0316894575853717, "learning_rate": 4.868699437539731e-06, "loss": 0.4257, "step": 10299 }, { "epoch": 0.6272265018420973, "grad_norm": 0.9593653012075105, "learning_rate": 4.8686739163835385e-06, "loss": 0.5019, "step": 10300 }, { "epoch": 0.6272873976189751, "grad_norm": 0.9910091455442681, "learning_rate": 4.868648392814189e-06, "loss": 0.4444, "step": 10301 }, { "epoch": 0.627348293395853, "grad_norm": 0.9709702305199369, "learning_rate": 4.86862286683171e-06, "loss": 0.4687, "step": 10302 }, { "epoch": 0.6274091891727309, "grad_norm": 1.0914034192097404, "learning_rate": 4.868597338436128e-06, "loss": 0.432, "step": 10303 }, { "epoch": 0.6274700849496088, "grad_norm": 1.0136828315517252, "learning_rate": 4.868571807627467e-06, "loss": 0.4447, "step": 10304 }, { "epoch": 0.6275309807264866, "grad_norm": 0.9868803999179172, "learning_rate": 4.868546274405755e-06, "loss": 0.4572, "step": 10305 }, { "epoch": 0.6275918765033645, "grad_norm": 1.040788968285088, "learning_rate": 4.868520738771017e-06, "loss": 0.3381, "step": 10306 }, { "epoch": 0.6276527722802424, "grad_norm": 1.0243718655112801, "learning_rate": 4.86849520072328e-06, "loss": 0.4708, "step": 10307 }, { "epoch": 0.6277136680571203, "grad_norm": 1.0602954869555883, "learning_rate": 4.868469660262569e-06, "loss": 0.4518, "step": 10308 }, { "epoch": 0.6277745638339981, "grad_norm": 0.9259339382272751, "learning_rate": 4.86844411738891e-06, "loss": 0.5032, "step": 10309 }, { "epoch": 0.627835459610876, "grad_norm": 0.9263734091290801, "learning_rate": 4.868418572102329e-06, "loss": 0.5028, "step": 10310 }, { "epoch": 0.6278963553877539, "grad_norm": 1.0088398614260259, "learning_rate": 4.868393024402853e-06, "loss": 0.4745, "step": 10311 }, { "epoch": 0.6279572511646317, "grad_norm": 0.9922638489816875, "learning_rate": 4.868367474290508e-06, "loss": 0.3784, "step": 10312 }, { "epoch": 0.6280181469415096, "grad_norm": 0.8826703085072444, "learning_rate": 4.868341921765319e-06, "loss": 0.5368, "step": 10313 }, { "epoch": 0.6280790427183874, "grad_norm": 1.0399312517483812, "learning_rate": 4.868316366827312e-06, "loss": 0.3852, "step": 10314 }, { "epoch": 0.6281399384952654, "grad_norm": 1.0042102315927661, "learning_rate": 4.868290809476514e-06, "loss": 0.4609, "step": 10315 }, { "epoch": 0.6282008342721432, "grad_norm": 0.9390160119227783, "learning_rate": 4.86826524971295e-06, "loss": 0.4167, "step": 10316 }, { "epoch": 0.6282617300490211, "grad_norm": 0.9519302383968793, "learning_rate": 4.868239687536648e-06, "loss": 0.4481, "step": 10317 }, { "epoch": 0.6283226258258989, "grad_norm": 0.9317436325253816, "learning_rate": 4.868214122947631e-06, "loss": 0.4388, "step": 10318 }, { "epoch": 0.6283835216027769, "grad_norm": 0.9431254560786274, "learning_rate": 4.868188555945928e-06, "loss": 0.3953, "step": 10319 }, { "epoch": 0.6284444173796547, "grad_norm": 1.0300891413974795, "learning_rate": 4.868162986531563e-06, "loss": 0.3755, "step": 10320 }, { "epoch": 0.6285053131565326, "grad_norm": 1.0232167189662749, "learning_rate": 4.868137414704563e-06, "loss": 0.4746, "step": 10321 }, { "epoch": 0.6285662089334104, "grad_norm": 0.9992797529766038, "learning_rate": 4.868111840464954e-06, "loss": 0.3881, "step": 10322 }, { "epoch": 0.6286271047102884, "grad_norm": 1.1050026966756987, "learning_rate": 4.868086263812761e-06, "loss": 0.4576, "step": 10323 }, { "epoch": 0.6286880004871662, "grad_norm": 1.027962526392509, "learning_rate": 4.8680606847480115e-06, "loss": 0.429, "step": 10324 }, { "epoch": 0.6287488962640441, "grad_norm": 1.053925224447216, "learning_rate": 4.868035103270732e-06, "loss": 0.4113, "step": 10325 }, { "epoch": 0.6288097920409219, "grad_norm": 1.0349376382510238, "learning_rate": 4.8680095193809464e-06, "loss": 0.422, "step": 10326 }, { "epoch": 0.6288706878177999, "grad_norm": 1.0197236016247622, "learning_rate": 4.867983933078682e-06, "loss": 0.4642, "step": 10327 }, { "epoch": 0.6289315835946777, "grad_norm": 1.0528818836790839, "learning_rate": 4.867958344363965e-06, "loss": 0.4046, "step": 10328 }, { "epoch": 0.6289924793715556, "grad_norm": 1.0143250856587274, "learning_rate": 4.867932753236821e-06, "loss": 0.3971, "step": 10329 }, { "epoch": 0.6290533751484334, "grad_norm": 1.125569929349754, "learning_rate": 4.867907159697277e-06, "loss": 0.4315, "step": 10330 }, { "epoch": 0.6291142709253114, "grad_norm": 1.008485078782145, "learning_rate": 4.867881563745358e-06, "loss": 0.4312, "step": 10331 }, { "epoch": 0.6291751667021892, "grad_norm": 1.0181781524421272, "learning_rate": 4.86785596538109e-06, "loss": 0.4734, "step": 10332 }, { "epoch": 0.629236062479067, "grad_norm": 1.0861470976213885, "learning_rate": 4.8678303646045e-06, "loss": 0.3935, "step": 10333 }, { "epoch": 0.629296958255945, "grad_norm": 0.9667465532740931, "learning_rate": 4.8678047614156145e-06, "loss": 0.4126, "step": 10334 }, { "epoch": 0.6293578540328228, "grad_norm": 0.9346300730634888, "learning_rate": 4.867779155814458e-06, "loss": 0.5151, "step": 10335 }, { "epoch": 0.6294187498097007, "grad_norm": 0.9915396854195064, "learning_rate": 4.867753547801057e-06, "loss": 0.5089, "step": 10336 }, { "epoch": 0.6294796455865785, "grad_norm": 0.9698646356642212, "learning_rate": 4.867727937375438e-06, "loss": 0.4603, "step": 10337 }, { "epoch": 0.6295405413634565, "grad_norm": 0.9284116964876804, "learning_rate": 4.8677023245376274e-06, "loss": 0.5023, "step": 10338 }, { "epoch": 0.6296014371403343, "grad_norm": 0.9991763071209717, "learning_rate": 4.867676709287651e-06, "loss": 0.4599, "step": 10339 }, { "epoch": 0.6296623329172122, "grad_norm": 1.0823420020410905, "learning_rate": 4.867651091625534e-06, "loss": 0.4232, "step": 10340 }, { "epoch": 0.62972322869409, "grad_norm": 1.1081754149149037, "learning_rate": 4.867625471551304e-06, "loss": 0.4264, "step": 10341 }, { "epoch": 0.629784124470968, "grad_norm": 1.0117311587638225, "learning_rate": 4.8675998490649865e-06, "loss": 0.3885, "step": 10342 }, { "epoch": 0.6298450202478458, "grad_norm": 0.9576106353183063, "learning_rate": 4.867574224166607e-06, "loss": 0.4214, "step": 10343 }, { "epoch": 0.6299059160247237, "grad_norm": 1.00169930990106, "learning_rate": 4.867548596856193e-06, "loss": 0.3963, "step": 10344 }, { "epoch": 0.6299668118016015, "grad_norm": 1.0768588027076964, "learning_rate": 4.867522967133769e-06, "loss": 0.3859, "step": 10345 }, { "epoch": 0.6300277075784795, "grad_norm": 0.9792156813685233, "learning_rate": 4.867497334999362e-06, "loss": 0.4183, "step": 10346 }, { "epoch": 0.6300886033553573, "grad_norm": 0.9368754751024237, "learning_rate": 4.867471700452997e-06, "loss": 0.4047, "step": 10347 }, { "epoch": 0.6301494991322352, "grad_norm": 1.083599759370685, "learning_rate": 4.867446063494702e-06, "loss": 0.407, "step": 10348 }, { "epoch": 0.630210394909113, "grad_norm": 1.0031696896890923, "learning_rate": 4.867420424124502e-06, "loss": 0.4101, "step": 10349 }, { "epoch": 0.630271290685991, "grad_norm": 1.0906045300419116, "learning_rate": 4.8673947823424225e-06, "loss": 0.4176, "step": 10350 }, { "epoch": 0.6303321864628688, "grad_norm": 1.0857660602392667, "learning_rate": 4.867369138148492e-06, "loss": 0.4181, "step": 10351 }, { "epoch": 0.6303930822397467, "grad_norm": 1.06322870834352, "learning_rate": 4.867343491542734e-06, "loss": 0.4548, "step": 10352 }, { "epoch": 0.6304539780166245, "grad_norm": 1.1164840864987193, "learning_rate": 4.867317842525176e-06, "loss": 0.3688, "step": 10353 }, { "epoch": 0.6305148737935025, "grad_norm": 0.9935924316303018, "learning_rate": 4.867292191095844e-06, "loss": 0.4944, "step": 10354 }, { "epoch": 0.6305757695703803, "grad_norm": 1.0181999237590895, "learning_rate": 4.867266537254763e-06, "loss": 0.444, "step": 10355 }, { "epoch": 0.6306366653472582, "grad_norm": 0.9512887760142956, "learning_rate": 4.867240881001961e-06, "loss": 0.5163, "step": 10356 }, { "epoch": 0.630697561124136, "grad_norm": 0.9584702762179481, "learning_rate": 4.867215222337463e-06, "loss": 0.4537, "step": 10357 }, { "epoch": 0.630758456901014, "grad_norm": 0.9714419967360499, "learning_rate": 4.867189561261296e-06, "loss": 0.4191, "step": 10358 }, { "epoch": 0.6308193526778918, "grad_norm": 1.0075217881778118, "learning_rate": 4.867163897773484e-06, "loss": 0.4008, "step": 10359 }, { "epoch": 0.6308802484547696, "grad_norm": 0.9051657412018207, "learning_rate": 4.867138231874056e-06, "loss": 0.4979, "step": 10360 }, { "epoch": 0.6309411442316475, "grad_norm": 0.970971248413331, "learning_rate": 4.867112563563036e-06, "loss": 0.4032, "step": 10361 }, { "epoch": 0.6310020400085254, "grad_norm": 1.0641582623070573, "learning_rate": 4.8670868928404505e-06, "loss": 0.435, "step": 10362 }, { "epoch": 0.6310629357854033, "grad_norm": 0.9799487570146793, "learning_rate": 4.867061219706327e-06, "loss": 0.4473, "step": 10363 }, { "epoch": 0.6311238315622811, "grad_norm": 0.9572394943389859, "learning_rate": 4.867035544160691e-06, "loss": 0.4188, "step": 10364 }, { "epoch": 0.631184727339159, "grad_norm": 1.0073979709994954, "learning_rate": 4.867009866203567e-06, "loss": 0.3409, "step": 10365 }, { "epoch": 0.6312456231160369, "grad_norm": 0.9264839483902201, "learning_rate": 4.866984185834984e-06, "loss": 0.4864, "step": 10366 }, { "epoch": 0.6313065188929148, "grad_norm": 1.0610665501953758, "learning_rate": 4.866958503054966e-06, "loss": 0.3405, "step": 10367 }, { "epoch": 0.6313674146697926, "grad_norm": 1.0286861854513558, "learning_rate": 4.86693281786354e-06, "loss": 0.4438, "step": 10368 }, { "epoch": 0.6314283104466705, "grad_norm": 0.9848950229339616, "learning_rate": 4.866907130260732e-06, "loss": 0.4729, "step": 10369 }, { "epoch": 0.6314892062235484, "grad_norm": 1.1184190544570647, "learning_rate": 4.866881440246568e-06, "loss": 0.3455, "step": 10370 }, { "epoch": 0.6315501020004263, "grad_norm": 0.986603598896068, "learning_rate": 4.866855747821075e-06, "loss": 0.4172, "step": 10371 }, { "epoch": 0.6316109977773041, "grad_norm": 1.0490491784683118, "learning_rate": 4.8668300529842784e-06, "loss": 0.3916, "step": 10372 }, { "epoch": 0.631671893554182, "grad_norm": 1.061059763888569, "learning_rate": 4.866804355736204e-06, "loss": 0.463, "step": 10373 }, { "epoch": 0.6317327893310599, "grad_norm": 1.1465970666386112, "learning_rate": 4.8667786560768795e-06, "loss": 0.3692, "step": 10374 }, { "epoch": 0.6317936851079378, "grad_norm": 1.0469773336270207, "learning_rate": 4.86675295400633e-06, "loss": 0.355, "step": 10375 }, { "epoch": 0.6318545808848156, "grad_norm": 0.9872897562716081, "learning_rate": 4.866727249524581e-06, "loss": 0.4576, "step": 10376 }, { "epoch": 0.6319154766616936, "grad_norm": 0.9785481132390957, "learning_rate": 4.86670154263166e-06, "loss": 0.4282, "step": 10377 }, { "epoch": 0.6319763724385714, "grad_norm": 1.0171098370653024, "learning_rate": 4.866675833327592e-06, "loss": 0.4568, "step": 10378 }, { "epoch": 0.6320372682154493, "grad_norm": 1.0065766066074258, "learning_rate": 4.866650121612404e-06, "loss": 0.4242, "step": 10379 }, { "epoch": 0.6320981639923271, "grad_norm": 0.9487094926100337, "learning_rate": 4.866624407486123e-06, "loss": 0.4323, "step": 10380 }, { "epoch": 0.6321590597692051, "grad_norm": 0.8814952523501804, "learning_rate": 4.866598690948774e-06, "loss": 0.5025, "step": 10381 }, { "epoch": 0.6322199555460829, "grad_norm": 1.0585946882182165, "learning_rate": 4.866572972000383e-06, "loss": 0.4142, "step": 10382 }, { "epoch": 0.6322808513229607, "grad_norm": 0.9901589004998231, "learning_rate": 4.866547250640976e-06, "loss": 0.4976, "step": 10383 }, { "epoch": 0.6323417470998386, "grad_norm": 1.026224585354919, "learning_rate": 4.866521526870582e-06, "loss": 0.4021, "step": 10384 }, { "epoch": 0.6324026428767165, "grad_norm": 1.0284015762306626, "learning_rate": 4.866495800689223e-06, "loss": 0.3709, "step": 10385 }, { "epoch": 0.6324635386535944, "grad_norm": 0.9404620390291032, "learning_rate": 4.866470072096928e-06, "loss": 0.4592, "step": 10386 }, { "epoch": 0.6325244344304722, "grad_norm": 1.0290091053872912, "learning_rate": 4.866444341093722e-06, "loss": 0.4204, "step": 10387 }, { "epoch": 0.6325853302073501, "grad_norm": 1.0228651313565837, "learning_rate": 4.866418607679633e-06, "loss": 0.3761, "step": 10388 }, { "epoch": 0.632646225984228, "grad_norm": 0.9895507001675763, "learning_rate": 4.866392871854685e-06, "loss": 0.4451, "step": 10389 }, { "epoch": 0.6327071217611059, "grad_norm": 1.0480704379991768, "learning_rate": 4.866367133618905e-06, "loss": 0.4194, "step": 10390 }, { "epoch": 0.6327680175379837, "grad_norm": 1.069524341083823, "learning_rate": 4.86634139297232e-06, "loss": 0.4399, "step": 10391 }, { "epoch": 0.6328289133148616, "grad_norm": 1.0004671145429824, "learning_rate": 4.866315649914955e-06, "loss": 0.4248, "step": 10392 }, { "epoch": 0.6328898090917395, "grad_norm": 0.9511201512994187, "learning_rate": 4.866289904446837e-06, "loss": 0.4937, "step": 10393 }, { "epoch": 0.6329507048686174, "grad_norm": 0.9678206646715661, "learning_rate": 4.866264156567992e-06, "loss": 0.4592, "step": 10394 }, { "epoch": 0.6330116006454952, "grad_norm": 1.0112319382814867, "learning_rate": 4.866238406278446e-06, "loss": 0.3971, "step": 10395 }, { "epoch": 0.6330724964223731, "grad_norm": 0.988487743498981, "learning_rate": 4.866212653578226e-06, "loss": 0.4293, "step": 10396 }, { "epoch": 0.633133392199251, "grad_norm": 0.990583525526602, "learning_rate": 4.866186898467358e-06, "loss": 0.4452, "step": 10397 }, { "epoch": 0.6331942879761289, "grad_norm": 1.04108510065041, "learning_rate": 4.8661611409458675e-06, "loss": 0.4532, "step": 10398 }, { "epoch": 0.6332551837530067, "grad_norm": 1.0487171351137403, "learning_rate": 4.8661353810137814e-06, "loss": 0.4786, "step": 10399 }, { "epoch": 0.6333160795298846, "grad_norm": 1.056617641764981, "learning_rate": 4.866109618671125e-06, "loss": 0.4614, "step": 10400 }, { "epoch": 0.6333769753067625, "grad_norm": 0.9258110298698121, "learning_rate": 4.866083853917927e-06, "loss": 0.4297, "step": 10401 }, { "epoch": 0.6334378710836404, "grad_norm": 1.001745905748793, "learning_rate": 4.8660580867542105e-06, "loss": 0.4345, "step": 10402 }, { "epoch": 0.6334987668605182, "grad_norm": 0.9449175046521987, "learning_rate": 4.866032317180004e-06, "loss": 0.44, "step": 10403 }, { "epoch": 0.633559662637396, "grad_norm": 0.9177540021195816, "learning_rate": 4.866006545195332e-06, "loss": 0.4955, "step": 10404 }, { "epoch": 0.633620558414274, "grad_norm": 1.0347955811938176, "learning_rate": 4.8659807708002225e-06, "loss": 0.4572, "step": 10405 }, { "epoch": 0.6336814541911518, "grad_norm": 0.9250418558781744, "learning_rate": 4.865954993994701e-06, "loss": 0.4125, "step": 10406 }, { "epoch": 0.6337423499680297, "grad_norm": 0.9298633895851703, "learning_rate": 4.865929214778794e-06, "loss": 0.4216, "step": 10407 }, { "epoch": 0.6338032457449075, "grad_norm": 1.0559698114080889, "learning_rate": 4.865903433152526e-06, "loss": 0.4234, "step": 10408 }, { "epoch": 0.6338641415217855, "grad_norm": 1.0093914077421455, "learning_rate": 4.865877649115927e-06, "loss": 0.4226, "step": 10409 }, { "epoch": 0.6339250372986633, "grad_norm": 1.018304970590069, "learning_rate": 4.865851862669019e-06, "loss": 0.3963, "step": 10410 }, { "epoch": 0.6339859330755412, "grad_norm": 0.9662757945931353, "learning_rate": 4.865826073811831e-06, "loss": 0.5033, "step": 10411 }, { "epoch": 0.634046828852419, "grad_norm": 0.9915797070060142, "learning_rate": 4.86580028254439e-06, "loss": 0.4553, "step": 10412 }, { "epoch": 0.634107724629297, "grad_norm": 1.000482018890975, "learning_rate": 4.865774488866719e-06, "loss": 0.4384, "step": 10413 }, { "epoch": 0.6341686204061748, "grad_norm": 1.062829526118386, "learning_rate": 4.865748692778847e-06, "loss": 0.4212, "step": 10414 }, { "epoch": 0.6342295161830527, "grad_norm": 1.0395414696728766, "learning_rate": 4.865722894280799e-06, "loss": 0.425, "step": 10415 }, { "epoch": 0.6342904119599306, "grad_norm": 0.9746239192455095, "learning_rate": 4.865697093372602e-06, "loss": 0.4403, "step": 10416 }, { "epoch": 0.6343513077368085, "grad_norm": 0.9888622954599039, "learning_rate": 4.865671290054282e-06, "loss": 0.3983, "step": 10417 }, { "epoch": 0.6344122035136863, "grad_norm": 0.990825233105617, "learning_rate": 4.865645484325865e-06, "loss": 0.3962, "step": 10418 }, { "epoch": 0.6344730992905642, "grad_norm": 1.0632040549572872, "learning_rate": 4.8656196761873775e-06, "loss": 0.4639, "step": 10419 }, { "epoch": 0.6345339950674421, "grad_norm": 0.9723892408940062, "learning_rate": 4.865593865638846e-06, "loss": 0.4775, "step": 10420 }, { "epoch": 0.63459489084432, "grad_norm": 1.0794100565685762, "learning_rate": 4.865568052680297e-06, "loss": 0.4715, "step": 10421 }, { "epoch": 0.6346557866211978, "grad_norm": 0.948410454375544, "learning_rate": 4.8655422373117565e-06, "loss": 0.4661, "step": 10422 }, { "epoch": 0.6347166823980757, "grad_norm": 1.078001186343749, "learning_rate": 4.86551641953325e-06, "loss": 0.4972, "step": 10423 }, { "epoch": 0.6347775781749536, "grad_norm": 1.0325806571228857, "learning_rate": 4.865490599344806e-06, "loss": 0.4478, "step": 10424 }, { "epoch": 0.6348384739518315, "grad_norm": 0.9707627448046251, "learning_rate": 4.865464776746447e-06, "loss": 0.4995, "step": 10425 }, { "epoch": 0.6348993697287093, "grad_norm": 1.0618554926364447, "learning_rate": 4.865438951738203e-06, "loss": 0.373, "step": 10426 }, { "epoch": 0.6349602655055872, "grad_norm": 0.9709627936456289, "learning_rate": 4.8654131243200995e-06, "loss": 0.421, "step": 10427 }, { "epoch": 0.6350211612824651, "grad_norm": 0.977268671419569, "learning_rate": 4.865387294492162e-06, "loss": 0.4009, "step": 10428 }, { "epoch": 0.635082057059343, "grad_norm": 0.9416859720000199, "learning_rate": 4.865361462254417e-06, "loss": 0.4373, "step": 10429 }, { "epoch": 0.6351429528362208, "grad_norm": 1.0044485209194258, "learning_rate": 4.86533562760689e-06, "loss": 0.3666, "step": 10430 }, { "epoch": 0.6352038486130986, "grad_norm": 1.1020967439348468, "learning_rate": 4.865309790549609e-06, "loss": 0.485, "step": 10431 }, { "epoch": 0.6352647443899766, "grad_norm": 1.0211863662260419, "learning_rate": 4.8652839510826e-06, "loss": 0.4517, "step": 10432 }, { "epoch": 0.6353256401668544, "grad_norm": 0.954149315279896, "learning_rate": 4.8652581092058885e-06, "loss": 0.4973, "step": 10433 }, { "epoch": 0.6353865359437323, "grad_norm": 1.0958206034141407, "learning_rate": 4.8652322649195014e-06, "loss": 0.4645, "step": 10434 }, { "epoch": 0.6354474317206101, "grad_norm": 1.02882679985465, "learning_rate": 4.865206418223464e-06, "loss": 0.3873, "step": 10435 }, { "epoch": 0.6355083274974881, "grad_norm": 1.2358700040954418, "learning_rate": 4.865180569117804e-06, "loss": 0.4279, "step": 10436 }, { "epoch": 0.6355692232743659, "grad_norm": 1.1213259082953315, "learning_rate": 4.8651547176025475e-06, "loss": 0.4436, "step": 10437 }, { "epoch": 0.6356301190512438, "grad_norm": 1.0519235979219017, "learning_rate": 4.865128863677721e-06, "loss": 0.4003, "step": 10438 }, { "epoch": 0.6356910148281216, "grad_norm": 1.0050243065666382, "learning_rate": 4.865103007343349e-06, "loss": 0.4481, "step": 10439 }, { "epoch": 0.6357519106049996, "grad_norm": 1.009382582486942, "learning_rate": 4.86507714859946e-06, "loss": 0.4641, "step": 10440 }, { "epoch": 0.6358128063818774, "grad_norm": 0.996463376409283, "learning_rate": 4.86505128744608e-06, "loss": 0.422, "step": 10441 }, { "epoch": 0.6358737021587553, "grad_norm": 0.9745785872369009, "learning_rate": 4.865025423883234e-06, "loss": 0.4199, "step": 10442 }, { "epoch": 0.6359345979356331, "grad_norm": 1.0147453141278968, "learning_rate": 4.86499955791095e-06, "loss": 0.4168, "step": 10443 }, { "epoch": 0.6359954937125111, "grad_norm": 0.9325475071077306, "learning_rate": 4.864973689529253e-06, "loss": 0.4289, "step": 10444 }, { "epoch": 0.6360563894893889, "grad_norm": 1.0018704710709703, "learning_rate": 4.864947818738171e-06, "loss": 0.4676, "step": 10445 }, { "epoch": 0.6361172852662668, "grad_norm": 1.0734788629625402, "learning_rate": 4.864921945537728e-06, "loss": 0.4132, "step": 10446 }, { "epoch": 0.6361781810431446, "grad_norm": 0.9208616807170211, "learning_rate": 4.864896069927952e-06, "loss": 0.4535, "step": 10447 }, { "epoch": 0.6362390768200226, "grad_norm": 0.980701249489754, "learning_rate": 4.86487019190887e-06, "loss": 0.4209, "step": 10448 }, { "epoch": 0.6362999725969004, "grad_norm": 1.0403130677307426, "learning_rate": 4.8648443114805064e-06, "loss": 0.3992, "step": 10449 }, { "epoch": 0.6363608683737783, "grad_norm": 0.9781101816548462, "learning_rate": 4.8648184286428895e-06, "loss": 0.441, "step": 10450 }, { "epoch": 0.6364217641506561, "grad_norm": 1.0494607214084335, "learning_rate": 4.864792543396044e-06, "loss": 0.3969, "step": 10451 }, { "epoch": 0.6364826599275341, "grad_norm": 1.0530152240878847, "learning_rate": 4.864766655739998e-06, "loss": 0.4273, "step": 10452 }, { "epoch": 0.6365435557044119, "grad_norm": 1.0453758724079434, "learning_rate": 4.864740765674776e-06, "loss": 0.4045, "step": 10453 }, { "epoch": 0.6366044514812897, "grad_norm": 1.0419257755097646, "learning_rate": 4.864714873200405e-06, "loss": 0.3839, "step": 10454 }, { "epoch": 0.6366653472581676, "grad_norm": 1.0373802824504132, "learning_rate": 4.864688978316913e-06, "loss": 0.399, "step": 10455 }, { "epoch": 0.6367262430350455, "grad_norm": 0.9097512226659265, "learning_rate": 4.864663081024323e-06, "loss": 0.4811, "step": 10456 }, { "epoch": 0.6367871388119234, "grad_norm": 1.0025578587184372, "learning_rate": 4.864637181322665e-06, "loss": 0.3863, "step": 10457 }, { "epoch": 0.6368480345888012, "grad_norm": 1.0362047497149913, "learning_rate": 4.864611279211964e-06, "loss": 0.4545, "step": 10458 }, { "epoch": 0.6369089303656792, "grad_norm": 1.0449944079854985, "learning_rate": 4.864585374692244e-06, "loss": 0.4733, "step": 10459 }, { "epoch": 0.636969826142557, "grad_norm": 1.0611132116099582, "learning_rate": 4.864559467763536e-06, "loss": 0.4156, "step": 10460 }, { "epoch": 0.6370307219194349, "grad_norm": 1.093915622937792, "learning_rate": 4.864533558425863e-06, "loss": 0.4426, "step": 10461 }, { "epoch": 0.6370916176963127, "grad_norm": 1.038342138092537, "learning_rate": 4.864507646679253e-06, "loss": 0.45, "step": 10462 }, { "epoch": 0.6371525134731907, "grad_norm": 0.9423435212600103, "learning_rate": 4.864481732523731e-06, "loss": 0.4923, "step": 10463 }, { "epoch": 0.6372134092500685, "grad_norm": 0.9531322048791934, "learning_rate": 4.864455815959324e-06, "loss": 0.4424, "step": 10464 }, { "epoch": 0.6372743050269464, "grad_norm": 1.0481643282078918, "learning_rate": 4.864429896986059e-06, "loss": 0.4, "step": 10465 }, { "epoch": 0.6373352008038242, "grad_norm": 0.9356627998225571, "learning_rate": 4.864403975603962e-06, "loss": 0.497, "step": 10466 }, { "epoch": 0.6373960965807022, "grad_norm": 1.00232441651331, "learning_rate": 4.864378051813059e-06, "loss": 0.447, "step": 10467 }, { "epoch": 0.63745699235758, "grad_norm": 1.025285718208498, "learning_rate": 4.8643521256133775e-06, "loss": 0.3876, "step": 10468 }, { "epoch": 0.6375178881344579, "grad_norm": 1.0295592385620431, "learning_rate": 4.864326197004943e-06, "loss": 0.4607, "step": 10469 }, { "epoch": 0.6375787839113357, "grad_norm": 0.9319293605517959, "learning_rate": 4.864300265987782e-06, "loss": 0.4286, "step": 10470 }, { "epoch": 0.6376396796882137, "grad_norm": 1.0364564194487667, "learning_rate": 4.8642743325619215e-06, "loss": 0.3772, "step": 10471 }, { "epoch": 0.6377005754650915, "grad_norm": 1.0282074682914495, "learning_rate": 4.864248396727386e-06, "loss": 0.417, "step": 10472 }, { "epoch": 0.6377614712419694, "grad_norm": 1.0351136540690913, "learning_rate": 4.864222458484205e-06, "loss": 0.4826, "step": 10473 }, { "epoch": 0.6378223670188472, "grad_norm": 0.9774776931342042, "learning_rate": 4.864196517832402e-06, "loss": 0.37, "step": 10474 }, { "epoch": 0.6378832627957252, "grad_norm": 0.9833472212798267, "learning_rate": 4.8641705747720055e-06, "loss": 0.5041, "step": 10475 }, { "epoch": 0.637944158572603, "grad_norm": 1.0325380460680846, "learning_rate": 4.864144629303041e-06, "loss": 0.4451, "step": 10476 }, { "epoch": 0.6380050543494808, "grad_norm": 1.0752380402093091, "learning_rate": 4.864118681425536e-06, "loss": 0.4375, "step": 10477 }, { "epoch": 0.6380659501263587, "grad_norm": 0.9680681917646777, "learning_rate": 4.8640927311395145e-06, "loss": 0.3944, "step": 10478 }, { "epoch": 0.6381268459032367, "grad_norm": 0.9378440530928487, "learning_rate": 4.864066778445006e-06, "loss": 0.4269, "step": 10479 }, { "epoch": 0.6381877416801145, "grad_norm": 1.0771138770301083, "learning_rate": 4.864040823342034e-06, "loss": 0.4458, "step": 10480 }, { "epoch": 0.6382486374569923, "grad_norm": 1.0400747296656099, "learning_rate": 4.864014865830627e-06, "loss": 0.4278, "step": 10481 }, { "epoch": 0.6383095332338702, "grad_norm": 0.9582303797171784, "learning_rate": 4.8639889059108105e-06, "loss": 0.4361, "step": 10482 }, { "epoch": 0.6383704290107481, "grad_norm": 1.0429244704130767, "learning_rate": 4.8639629435826116e-06, "loss": 0.4249, "step": 10483 }, { "epoch": 0.638431324787626, "grad_norm": 0.9930595814204902, "learning_rate": 4.8639369788460565e-06, "loss": 0.4425, "step": 10484 }, { "epoch": 0.6384922205645038, "grad_norm": 0.9931961802576459, "learning_rate": 4.863911011701171e-06, "loss": 0.4233, "step": 10485 }, { "epoch": 0.6385531163413817, "grad_norm": 1.0402408916153487, "learning_rate": 4.863885042147983e-06, "loss": 0.4617, "step": 10486 }, { "epoch": 0.6386140121182596, "grad_norm": 0.9935281246492349, "learning_rate": 4.863859070186518e-06, "loss": 0.401, "step": 10487 }, { "epoch": 0.6386749078951375, "grad_norm": 0.9774275913208501, "learning_rate": 4.863833095816802e-06, "loss": 0.3951, "step": 10488 }, { "epoch": 0.6387358036720153, "grad_norm": 0.9893172283388673, "learning_rate": 4.863807119038862e-06, "loss": 0.4417, "step": 10489 }, { "epoch": 0.6387966994488932, "grad_norm": 1.0180228517266803, "learning_rate": 4.863781139852724e-06, "loss": 0.402, "step": 10490 }, { "epoch": 0.6388575952257711, "grad_norm": 1.084012621163214, "learning_rate": 4.8637551582584154e-06, "loss": 0.3852, "step": 10491 }, { "epoch": 0.638918491002649, "grad_norm": 0.9960360333901718, "learning_rate": 4.863729174255963e-06, "loss": 0.4929, "step": 10492 }, { "epoch": 0.6389793867795268, "grad_norm": 1.0095364898145294, "learning_rate": 4.863703187845391e-06, "loss": 0.3759, "step": 10493 }, { "epoch": 0.6390402825564047, "grad_norm": 0.9966193620919752, "learning_rate": 4.863677199026729e-06, "loss": 0.3775, "step": 10494 }, { "epoch": 0.6391011783332826, "grad_norm": 1.0198583190930866, "learning_rate": 4.8636512078e-06, "loss": 0.4168, "step": 10495 }, { "epoch": 0.6391620741101605, "grad_norm": 0.9998282452334478, "learning_rate": 4.863625214165234e-06, "loss": 0.386, "step": 10496 }, { "epoch": 0.6392229698870383, "grad_norm": 1.0624025870431606, "learning_rate": 4.8635992181224545e-06, "loss": 0.3783, "step": 10497 }, { "epoch": 0.6392838656639163, "grad_norm": 1.0536573002126386, "learning_rate": 4.863573219671689e-06, "loss": 0.4219, "step": 10498 }, { "epoch": 0.6393447614407941, "grad_norm": 0.9416078386197718, "learning_rate": 4.863547218812965e-06, "loss": 0.4472, "step": 10499 }, { "epoch": 0.639405657217672, "grad_norm": 1.0000183025663993, "learning_rate": 4.863521215546309e-06, "loss": 0.4034, "step": 10500 }, { "epoch": 0.6394665529945498, "grad_norm": 1.0848268092568731, "learning_rate": 4.863495209871745e-06, "loss": 0.4118, "step": 10501 }, { "epoch": 0.6395274487714278, "grad_norm": 1.0138404438747586, "learning_rate": 4.863469201789302e-06, "loss": 0.5071, "step": 10502 }, { "epoch": 0.6395883445483056, "grad_norm": 1.0293094173411066, "learning_rate": 4.863443191299006e-06, "loss": 0.4814, "step": 10503 }, { "epoch": 0.6396492403251834, "grad_norm": 1.0366165849860016, "learning_rate": 4.863417178400883e-06, "loss": 0.4799, "step": 10504 }, { "epoch": 0.6397101361020613, "grad_norm": 1.0230464621168636, "learning_rate": 4.86339116309496e-06, "loss": 0.4466, "step": 10505 }, { "epoch": 0.6397710318789392, "grad_norm": 0.964540870168137, "learning_rate": 4.863365145381263e-06, "loss": 0.4135, "step": 10506 }, { "epoch": 0.6398319276558171, "grad_norm": 0.9675728658946712, "learning_rate": 4.863339125259818e-06, "loss": 0.4156, "step": 10507 }, { "epoch": 0.6398928234326949, "grad_norm": 0.9299309157546336, "learning_rate": 4.863313102730653e-06, "loss": 0.4894, "step": 10508 }, { "epoch": 0.6399537192095728, "grad_norm": 0.9449918049675945, "learning_rate": 4.8632870777937945e-06, "loss": 0.4776, "step": 10509 }, { "epoch": 0.6400146149864507, "grad_norm": 1.080519615626287, "learning_rate": 4.863261050449268e-06, "loss": 0.4317, "step": 10510 }, { "epoch": 0.6400755107633286, "grad_norm": 1.0173063181073534, "learning_rate": 4.8632350206970995e-06, "loss": 0.4448, "step": 10511 }, { "epoch": 0.6401364065402064, "grad_norm": 0.9347319193742483, "learning_rate": 4.863208988537316e-06, "loss": 0.441, "step": 10512 }, { "epoch": 0.6401973023170843, "grad_norm": 1.07520474689411, "learning_rate": 4.863182953969945e-06, "loss": 0.4152, "step": 10513 }, { "epoch": 0.6402581980939622, "grad_norm": 0.9632309080077461, "learning_rate": 4.8631569169950124e-06, "loss": 0.4567, "step": 10514 }, { "epoch": 0.6403190938708401, "grad_norm": 1.0273541534928041, "learning_rate": 4.863130877612544e-06, "loss": 0.4422, "step": 10515 }, { "epoch": 0.6403799896477179, "grad_norm": 0.9001765894865732, "learning_rate": 4.863104835822567e-06, "loss": 0.4511, "step": 10516 }, { "epoch": 0.6404408854245958, "grad_norm": 0.9559153310458257, "learning_rate": 4.8630787916251085e-06, "loss": 0.4225, "step": 10517 }, { "epoch": 0.6405017812014737, "grad_norm": 1.0293190573869537, "learning_rate": 4.863052745020195e-06, "loss": 0.4136, "step": 10518 }, { "epoch": 0.6405626769783516, "grad_norm": 0.9468335072922939, "learning_rate": 4.863026696007851e-06, "loss": 0.4801, "step": 10519 }, { "epoch": 0.6406235727552294, "grad_norm": 0.9721746571992215, "learning_rate": 4.863000644588105e-06, "loss": 0.4961, "step": 10520 }, { "epoch": 0.6406844685321073, "grad_norm": 0.9839959662314072, "learning_rate": 4.862974590760984e-06, "loss": 0.4511, "step": 10521 }, { "epoch": 0.6407453643089852, "grad_norm": 1.0195019973545816, "learning_rate": 4.862948534526513e-06, "loss": 0.3779, "step": 10522 }, { "epoch": 0.640806260085863, "grad_norm": 1.1107046478545188, "learning_rate": 4.862922475884719e-06, "loss": 0.3876, "step": 10523 }, { "epoch": 0.6408671558627409, "grad_norm": 0.9718796780824402, "learning_rate": 4.862896414835628e-06, "loss": 0.4283, "step": 10524 }, { "epoch": 0.6409280516396187, "grad_norm": 0.9756063249034618, "learning_rate": 4.8628703513792685e-06, "loss": 0.4505, "step": 10525 }, { "epoch": 0.6409889474164967, "grad_norm": 0.9749293460524524, "learning_rate": 4.862844285515665e-06, "loss": 0.4733, "step": 10526 }, { "epoch": 0.6410498431933745, "grad_norm": 0.9472756398333103, "learning_rate": 4.862818217244846e-06, "loss": 0.426, "step": 10527 }, { "epoch": 0.6411107389702524, "grad_norm": 1.0595198469002445, "learning_rate": 4.862792146566836e-06, "loss": 0.387, "step": 10528 }, { "epoch": 0.6411716347471302, "grad_norm": 1.0631497906865348, "learning_rate": 4.862766073481663e-06, "loss": 0.442, "step": 10529 }, { "epoch": 0.6412325305240082, "grad_norm": 1.016567990152607, "learning_rate": 4.862739997989353e-06, "loss": 0.4097, "step": 10530 }, { "epoch": 0.641293426300886, "grad_norm": 1.0195766228933933, "learning_rate": 4.862713920089932e-06, "loss": 0.3875, "step": 10531 }, { "epoch": 0.6413543220777639, "grad_norm": 1.076873593565555, "learning_rate": 4.862687839783427e-06, "loss": 0.4434, "step": 10532 }, { "epoch": 0.6414152178546417, "grad_norm": 1.0485515952167586, "learning_rate": 4.862661757069866e-06, "loss": 0.3783, "step": 10533 }, { "epoch": 0.6414761136315197, "grad_norm": 1.1655003998110576, "learning_rate": 4.862635671949273e-06, "loss": 0.4249, "step": 10534 }, { "epoch": 0.6415370094083975, "grad_norm": 0.9780702284755981, "learning_rate": 4.862609584421677e-06, "loss": 0.3895, "step": 10535 }, { "epoch": 0.6415979051852754, "grad_norm": 1.054723238612435, "learning_rate": 4.862583494487103e-06, "loss": 0.3965, "step": 10536 }, { "epoch": 0.6416588009621532, "grad_norm": 0.993881126802651, "learning_rate": 4.862557402145578e-06, "loss": 0.4997, "step": 10537 }, { "epoch": 0.6417196967390312, "grad_norm": 1.0487776067884285, "learning_rate": 4.862531307397129e-06, "loss": 0.4818, "step": 10538 }, { "epoch": 0.641780592515909, "grad_norm": 1.011829212984355, "learning_rate": 4.862505210241781e-06, "loss": 0.5037, "step": 10539 }, { "epoch": 0.6418414882927869, "grad_norm": 1.0539047208803565, "learning_rate": 4.862479110679563e-06, "loss": 0.4392, "step": 10540 }, { "epoch": 0.6419023840696648, "grad_norm": 1.0254773507187236, "learning_rate": 4.862453008710501e-06, "loss": 0.4228, "step": 10541 }, { "epoch": 0.6419632798465427, "grad_norm": 1.053257645153928, "learning_rate": 4.862426904334619e-06, "loss": 0.3985, "step": 10542 }, { "epoch": 0.6420241756234205, "grad_norm": 1.0125597134818534, "learning_rate": 4.8624007975519475e-06, "loss": 0.4591, "step": 10543 }, { "epoch": 0.6420850714002984, "grad_norm": 1.0515699249895656, "learning_rate": 4.86237468836251e-06, "loss": 0.4268, "step": 10544 }, { "epoch": 0.6421459671771763, "grad_norm": 1.002866397820736, "learning_rate": 4.8623485767663345e-06, "loss": 0.4594, "step": 10545 }, { "epoch": 0.6422068629540542, "grad_norm": 1.0310487822238719, "learning_rate": 4.862322462763448e-06, "loss": 0.3848, "step": 10546 }, { "epoch": 0.642267758730932, "grad_norm": 0.9940128141854734, "learning_rate": 4.862296346353876e-06, "loss": 0.3906, "step": 10547 }, { "epoch": 0.6423286545078098, "grad_norm": 1.1003965326099998, "learning_rate": 4.862270227537645e-06, "loss": 0.421, "step": 10548 }, { "epoch": 0.6423895502846878, "grad_norm": 0.9839989421941795, "learning_rate": 4.862244106314783e-06, "loss": 0.5188, "step": 10549 }, { "epoch": 0.6424504460615656, "grad_norm": 1.0900412835169846, "learning_rate": 4.862217982685316e-06, "loss": 0.3584, "step": 10550 }, { "epoch": 0.6425113418384435, "grad_norm": 0.9427219780320818, "learning_rate": 4.86219185664927e-06, "loss": 0.4898, "step": 10551 }, { "epoch": 0.6425722376153213, "grad_norm": 1.0361452660179906, "learning_rate": 4.862165728206673e-06, "loss": 0.4173, "step": 10552 }, { "epoch": 0.6426331333921993, "grad_norm": 1.03327684514905, "learning_rate": 4.8621395973575495e-06, "loss": 0.3804, "step": 10553 }, { "epoch": 0.6426940291690771, "grad_norm": 1.0224569933066328, "learning_rate": 4.862113464101927e-06, "loss": 0.4337, "step": 10554 }, { "epoch": 0.642754924945955, "grad_norm": 1.0179264588936878, "learning_rate": 4.862087328439833e-06, "loss": 0.433, "step": 10555 }, { "epoch": 0.6428158207228328, "grad_norm": 1.0102334429052768, "learning_rate": 4.862061190371293e-06, "loss": 0.434, "step": 10556 }, { "epoch": 0.6428767164997108, "grad_norm": 0.9679839623852085, "learning_rate": 4.862035049896335e-06, "loss": 0.4484, "step": 10557 }, { "epoch": 0.6429376122765886, "grad_norm": 1.0469364912224843, "learning_rate": 4.8620089070149844e-06, "loss": 0.3979, "step": 10558 }, { "epoch": 0.6429985080534665, "grad_norm": 0.9830834280367098, "learning_rate": 4.861982761727268e-06, "loss": 0.4508, "step": 10559 }, { "epoch": 0.6430594038303443, "grad_norm": 0.9589099549627871, "learning_rate": 4.861956614033214e-06, "loss": 0.4162, "step": 10560 }, { "epoch": 0.6431202996072223, "grad_norm": 1.0125484010309092, "learning_rate": 4.861930463932847e-06, "loss": 0.4184, "step": 10561 }, { "epoch": 0.6431811953841001, "grad_norm": 1.0156335309349453, "learning_rate": 4.861904311426193e-06, "loss": 0.4435, "step": 10562 }, { "epoch": 0.643242091160978, "grad_norm": 1.0022993086357432, "learning_rate": 4.861878156513282e-06, "loss": 0.4584, "step": 10563 }, { "epoch": 0.6433029869378558, "grad_norm": 1.0021368020826897, "learning_rate": 4.8618519991941374e-06, "loss": 0.456, "step": 10564 }, { "epoch": 0.6433638827147338, "grad_norm": 0.9524699463431355, "learning_rate": 4.861825839468787e-06, "loss": 0.4502, "step": 10565 }, { "epoch": 0.6434247784916116, "grad_norm": 0.9022657401882825, "learning_rate": 4.861799677337257e-06, "loss": 0.5595, "step": 10566 }, { "epoch": 0.6434856742684895, "grad_norm": 0.9842984151533907, "learning_rate": 4.8617735127995765e-06, "loss": 0.4279, "step": 10567 }, { "epoch": 0.6435465700453673, "grad_norm": 0.9458596201978712, "learning_rate": 4.86174734585577e-06, "loss": 0.4302, "step": 10568 }, { "epoch": 0.6436074658222453, "grad_norm": 0.9805318328049424, "learning_rate": 4.8617211765058635e-06, "loss": 0.4374, "step": 10569 }, { "epoch": 0.6436683615991231, "grad_norm": 1.094754626526192, "learning_rate": 4.861695004749885e-06, "loss": 0.4321, "step": 10570 }, { "epoch": 0.643729257376001, "grad_norm": 1.0340138489607757, "learning_rate": 4.86166883058786e-06, "loss": 0.4632, "step": 10571 }, { "epoch": 0.6437901531528788, "grad_norm": 1.0292772666070702, "learning_rate": 4.861642654019816e-06, "loss": 0.4439, "step": 10572 }, { "epoch": 0.6438510489297568, "grad_norm": 1.1031224644172177, "learning_rate": 4.86161647504578e-06, "loss": 0.4146, "step": 10573 }, { "epoch": 0.6439119447066346, "grad_norm": 1.00012964646356, "learning_rate": 4.8615902936657785e-06, "loss": 0.3792, "step": 10574 }, { "epoch": 0.6439728404835124, "grad_norm": 1.1054000764351017, "learning_rate": 4.861564109879837e-06, "loss": 0.4186, "step": 10575 }, { "epoch": 0.6440337362603903, "grad_norm": 1.09096944334143, "learning_rate": 4.861537923687984e-06, "loss": 0.5083, "step": 10576 }, { "epoch": 0.6440946320372682, "grad_norm": 0.9168812022425431, "learning_rate": 4.861511735090245e-06, "loss": 0.4831, "step": 10577 }, { "epoch": 0.6441555278141461, "grad_norm": 1.0157093412725704, "learning_rate": 4.861485544086647e-06, "loss": 0.4152, "step": 10578 }, { "epoch": 0.6442164235910239, "grad_norm": 1.0132668594178684, "learning_rate": 4.8614593506772165e-06, "loss": 0.4363, "step": 10579 }, { "epoch": 0.6442773193679019, "grad_norm": 0.983651190301926, "learning_rate": 4.86143315486198e-06, "loss": 0.4656, "step": 10580 }, { "epoch": 0.6443382151447797, "grad_norm": 0.985335827961475, "learning_rate": 4.861406956640965e-06, "loss": 0.4824, "step": 10581 }, { "epoch": 0.6443991109216576, "grad_norm": 1.011704032408697, "learning_rate": 4.861380756014197e-06, "loss": 0.3731, "step": 10582 }, { "epoch": 0.6444600066985354, "grad_norm": 0.9382838173175052, "learning_rate": 4.8613545529817045e-06, "loss": 0.445, "step": 10583 }, { "epoch": 0.6445209024754134, "grad_norm": 0.948635455780823, "learning_rate": 4.861328347543512e-06, "loss": 0.4058, "step": 10584 }, { "epoch": 0.6445817982522912, "grad_norm": 1.0442759721282102, "learning_rate": 4.861302139699647e-06, "loss": 0.4053, "step": 10585 }, { "epoch": 0.6446426940291691, "grad_norm": 1.069627153244621, "learning_rate": 4.861275929450138e-06, "loss": 0.3977, "step": 10586 }, { "epoch": 0.6447035898060469, "grad_norm": 1.0541556615210181, "learning_rate": 4.861249716795009e-06, "loss": 0.3778, "step": 10587 }, { "epoch": 0.6447644855829249, "grad_norm": 1.0298664547800662, "learning_rate": 4.861223501734288e-06, "loss": 0.43, "step": 10588 }, { "epoch": 0.6448253813598027, "grad_norm": 1.1343787331172799, "learning_rate": 4.861197284268002e-06, "loss": 0.4129, "step": 10589 }, { "epoch": 0.6448862771366806, "grad_norm": 0.9240051554861916, "learning_rate": 4.861171064396177e-06, "loss": 0.4824, "step": 10590 }, { "epoch": 0.6449471729135584, "grad_norm": 1.0885333994513031, "learning_rate": 4.86114484211884e-06, "loss": 0.4941, "step": 10591 }, { "epoch": 0.6450080686904364, "grad_norm": 0.9481604520687096, "learning_rate": 4.861118617436018e-06, "loss": 0.5242, "step": 10592 }, { "epoch": 0.6450689644673142, "grad_norm": 0.9833201036494563, "learning_rate": 4.861092390347737e-06, "loss": 0.4506, "step": 10593 }, { "epoch": 0.645129860244192, "grad_norm": 1.0476752482734253, "learning_rate": 4.861066160854025e-06, "loss": 0.4164, "step": 10594 }, { "epoch": 0.6451907560210699, "grad_norm": 0.9584868947363762, "learning_rate": 4.861039928954907e-06, "loss": 0.4459, "step": 10595 }, { "epoch": 0.6452516517979479, "grad_norm": 1.032973447414365, "learning_rate": 4.861013694650412e-06, "loss": 0.4211, "step": 10596 }, { "epoch": 0.6453125475748257, "grad_norm": 0.9700104599556907, "learning_rate": 4.8609874579405634e-06, "loss": 0.4311, "step": 10597 }, { "epoch": 0.6453734433517035, "grad_norm": 1.0606984811701334, "learning_rate": 4.860961218825391e-06, "loss": 0.4563, "step": 10598 }, { "epoch": 0.6454343391285814, "grad_norm": 0.9342832339736323, "learning_rate": 4.86093497730492e-06, "loss": 0.4702, "step": 10599 }, { "epoch": 0.6454952349054593, "grad_norm": 0.9417992402968991, "learning_rate": 4.860908733379177e-06, "loss": 0.4319, "step": 10600 }, { "epoch": 0.6455561306823372, "grad_norm": 0.9412000634458526, "learning_rate": 4.860882487048191e-06, "loss": 0.4173, "step": 10601 }, { "epoch": 0.645617026459215, "grad_norm": 0.9239432284732534, "learning_rate": 4.860856238311985e-06, "loss": 0.4942, "step": 10602 }, { "epoch": 0.6456779222360929, "grad_norm": 1.0499882206099673, "learning_rate": 4.860829987170589e-06, "loss": 0.3366, "step": 10603 }, { "epoch": 0.6457388180129708, "grad_norm": 1.0504061599912495, "learning_rate": 4.860803733624029e-06, "loss": 0.4044, "step": 10604 }, { "epoch": 0.6457997137898487, "grad_norm": 1.0373497829498273, "learning_rate": 4.8607774776723295e-06, "loss": 0.4718, "step": 10605 }, { "epoch": 0.6458606095667265, "grad_norm": 1.0146417692090641, "learning_rate": 4.86075121931552e-06, "loss": 0.4093, "step": 10606 }, { "epoch": 0.6459215053436044, "grad_norm": 0.9417356538974917, "learning_rate": 4.860724958553627e-06, "loss": 0.4057, "step": 10607 }, { "epoch": 0.6459824011204823, "grad_norm": 1.0297456428110283, "learning_rate": 4.8606986953866755e-06, "loss": 0.4159, "step": 10608 }, { "epoch": 0.6460432968973602, "grad_norm": 1.0710066576426787, "learning_rate": 4.860672429814693e-06, "loss": 0.4144, "step": 10609 }, { "epoch": 0.646104192674238, "grad_norm": 0.9392884773793859, "learning_rate": 4.860646161837707e-06, "loss": 0.4437, "step": 10610 }, { "epoch": 0.6461650884511159, "grad_norm": 1.1254855306299756, "learning_rate": 4.8606198914557435e-06, "loss": 0.4043, "step": 10611 }, { "epoch": 0.6462259842279938, "grad_norm": 1.0471533422939518, "learning_rate": 4.86059361866883e-06, "loss": 0.4201, "step": 10612 }, { "epoch": 0.6462868800048717, "grad_norm": 1.0977293069534626, "learning_rate": 4.860567343476992e-06, "loss": 0.3661, "step": 10613 }, { "epoch": 0.6463477757817495, "grad_norm": 1.0256161482272752, "learning_rate": 4.8605410658802575e-06, "loss": 0.4616, "step": 10614 }, { "epoch": 0.6464086715586274, "grad_norm": 1.1036500214891647, "learning_rate": 4.860514785878653e-06, "loss": 0.3644, "step": 10615 }, { "epoch": 0.6464695673355053, "grad_norm": 0.9719366773505089, "learning_rate": 4.8604885034722046e-06, "loss": 0.4511, "step": 10616 }, { "epoch": 0.6465304631123832, "grad_norm": 0.9674253036732576, "learning_rate": 4.860462218660941e-06, "loss": 0.4622, "step": 10617 }, { "epoch": 0.646591358889261, "grad_norm": 0.9786652879088036, "learning_rate": 4.8604359314448855e-06, "loss": 0.3732, "step": 10618 }, { "epoch": 0.6466522546661388, "grad_norm": 0.9959242395705412, "learning_rate": 4.860409641824069e-06, "loss": 0.3993, "step": 10619 }, { "epoch": 0.6467131504430168, "grad_norm": 0.9439834018518124, "learning_rate": 4.860383349798515e-06, "loss": 0.3786, "step": 10620 }, { "epoch": 0.6467740462198946, "grad_norm": 0.8792514081759222, "learning_rate": 4.860357055368252e-06, "loss": 0.4326, "step": 10621 }, { "epoch": 0.6468349419967725, "grad_norm": 1.0144294024559652, "learning_rate": 4.860330758533306e-06, "loss": 0.4605, "step": 10622 }, { "epoch": 0.6468958377736505, "grad_norm": 1.0219267376617747, "learning_rate": 4.860304459293704e-06, "loss": 0.4349, "step": 10623 }, { "epoch": 0.6469567335505283, "grad_norm": 0.8827630427690252, "learning_rate": 4.860278157649473e-06, "loss": 0.5489, "step": 10624 }, { "epoch": 0.6470176293274061, "grad_norm": 0.9830358237525374, "learning_rate": 4.860251853600639e-06, "loss": 0.4527, "step": 10625 }, { "epoch": 0.647078525104284, "grad_norm": 1.0440736721862096, "learning_rate": 4.8602255471472305e-06, "loss": 0.4427, "step": 10626 }, { "epoch": 0.6471394208811619, "grad_norm": 1.0720179850777591, "learning_rate": 4.860199238289273e-06, "loss": 0.3746, "step": 10627 }, { "epoch": 0.6472003166580398, "grad_norm": 1.0320554621199058, "learning_rate": 4.8601729270267936e-06, "loss": 0.4238, "step": 10628 }, { "epoch": 0.6472612124349176, "grad_norm": 0.9817958213716119, "learning_rate": 4.8601466133598194e-06, "loss": 0.398, "step": 10629 }, { "epoch": 0.6473221082117955, "grad_norm": 0.9496541960316972, "learning_rate": 4.860120297288376e-06, "loss": 0.3772, "step": 10630 }, { "epoch": 0.6473830039886734, "grad_norm": 1.1144729605555717, "learning_rate": 4.860093978812492e-06, "loss": 0.4077, "step": 10631 }, { "epoch": 0.6474438997655513, "grad_norm": 0.9836043163003929, "learning_rate": 4.860067657932194e-06, "loss": 0.4036, "step": 10632 }, { "epoch": 0.6475047955424291, "grad_norm": 0.9466139843802468, "learning_rate": 4.860041334647506e-06, "loss": 0.3983, "step": 10633 }, { "epoch": 0.647565691319307, "grad_norm": 0.9942693031407045, "learning_rate": 4.860015008958459e-06, "loss": 0.4973, "step": 10634 }, { "epoch": 0.6476265870961849, "grad_norm": 1.035845185231962, "learning_rate": 4.859988680865076e-06, "loss": 0.4663, "step": 10635 }, { "epoch": 0.6476874828730628, "grad_norm": 1.026442761719514, "learning_rate": 4.859962350367386e-06, "loss": 0.4219, "step": 10636 }, { "epoch": 0.6477483786499406, "grad_norm": 1.0217587126982839, "learning_rate": 4.859936017465416e-06, "loss": 0.4196, "step": 10637 }, { "epoch": 0.6478092744268185, "grad_norm": 1.0626767788090579, "learning_rate": 4.859909682159193e-06, "loss": 0.4248, "step": 10638 }, { "epoch": 0.6478701702036964, "grad_norm": 1.0291506906471348, "learning_rate": 4.859883344448742e-06, "loss": 0.3923, "step": 10639 }, { "epoch": 0.6479310659805743, "grad_norm": 0.9813056066887623, "learning_rate": 4.859857004334091e-06, "loss": 0.4733, "step": 10640 }, { "epoch": 0.6479919617574521, "grad_norm": 0.9859994866983671, "learning_rate": 4.8598306618152664e-06, "loss": 0.4026, "step": 10641 }, { "epoch": 0.64805285753433, "grad_norm": 1.0423887493906887, "learning_rate": 4.859804316892297e-06, "loss": 0.4649, "step": 10642 }, { "epoch": 0.6481137533112079, "grad_norm": 0.8914889209378303, "learning_rate": 4.859777969565206e-06, "loss": 0.4697, "step": 10643 }, { "epoch": 0.6481746490880858, "grad_norm": 1.0102158761430937, "learning_rate": 4.859751619834023e-06, "loss": 0.4172, "step": 10644 }, { "epoch": 0.6482355448649636, "grad_norm": 0.9964017094674331, "learning_rate": 4.859725267698775e-06, "loss": 0.4132, "step": 10645 }, { "epoch": 0.6482964406418414, "grad_norm": 1.0233815464113951, "learning_rate": 4.859698913159487e-06, "loss": 0.3736, "step": 10646 }, { "epoch": 0.6483573364187194, "grad_norm": 1.0297604727954635, "learning_rate": 4.859672556216188e-06, "loss": 0.4578, "step": 10647 }, { "epoch": 0.6484182321955972, "grad_norm": 1.006084685849315, "learning_rate": 4.859646196868902e-06, "loss": 0.445, "step": 10648 }, { "epoch": 0.6484791279724751, "grad_norm": 1.0363119028222503, "learning_rate": 4.859619835117658e-06, "loss": 0.4583, "step": 10649 }, { "epoch": 0.6485400237493529, "grad_norm": 1.0433746718677357, "learning_rate": 4.859593470962483e-06, "loss": 0.4591, "step": 10650 }, { "epoch": 0.6486009195262309, "grad_norm": 1.0310143734703534, "learning_rate": 4.859567104403403e-06, "loss": 0.4025, "step": 10651 }, { "epoch": 0.6486618153031087, "grad_norm": 0.9870467361887348, "learning_rate": 4.859540735440444e-06, "loss": 0.4891, "step": 10652 }, { "epoch": 0.6487227110799866, "grad_norm": 0.9472903416572835, "learning_rate": 4.859514364073635e-06, "loss": 0.4823, "step": 10653 }, { "epoch": 0.6487836068568644, "grad_norm": 0.9912509371406741, "learning_rate": 4.859487990303002e-06, "loss": 0.3794, "step": 10654 }, { "epoch": 0.6488445026337424, "grad_norm": 1.1319620000005475, "learning_rate": 4.859461614128571e-06, "loss": 0.3412, "step": 10655 }, { "epoch": 0.6489053984106202, "grad_norm": 1.0292678105772615, "learning_rate": 4.85943523555037e-06, "loss": 0.3801, "step": 10656 }, { "epoch": 0.6489662941874981, "grad_norm": 0.9103173378716486, "learning_rate": 4.859408854568426e-06, "loss": 0.4441, "step": 10657 }, { "epoch": 0.6490271899643759, "grad_norm": 1.0429425181199843, "learning_rate": 4.859382471182764e-06, "loss": 0.4502, "step": 10658 }, { "epoch": 0.6490880857412539, "grad_norm": 1.037609507653976, "learning_rate": 4.859356085393413e-06, "loss": 0.4197, "step": 10659 }, { "epoch": 0.6491489815181317, "grad_norm": 0.9862251231824257, "learning_rate": 4.8593296972003985e-06, "loss": 0.4162, "step": 10660 }, { "epoch": 0.6492098772950096, "grad_norm": 1.0305954588686652, "learning_rate": 4.859303306603749e-06, "loss": 0.4595, "step": 10661 }, { "epoch": 0.6492707730718875, "grad_norm": 0.9581883732483011, "learning_rate": 4.8592769136034904e-06, "loss": 0.4509, "step": 10662 }, { "epoch": 0.6493316688487654, "grad_norm": 1.0087284648763222, "learning_rate": 4.859250518199649e-06, "loss": 0.4368, "step": 10663 }, { "epoch": 0.6493925646256432, "grad_norm": 1.0229859744820975, "learning_rate": 4.859224120392251e-06, "loss": 0.3712, "step": 10664 }, { "epoch": 0.649453460402521, "grad_norm": 1.022801817983442, "learning_rate": 4.859197720181325e-06, "loss": 0.5019, "step": 10665 }, { "epoch": 0.649514356179399, "grad_norm": 1.072557980758065, "learning_rate": 4.859171317566899e-06, "loss": 0.3893, "step": 10666 }, { "epoch": 0.6495752519562769, "grad_norm": 1.1194125750510233, "learning_rate": 4.859144912548998e-06, "loss": 0.4082, "step": 10667 }, { "epoch": 0.6496361477331547, "grad_norm": 0.9396890913180205, "learning_rate": 4.859118505127648e-06, "loss": 0.4262, "step": 10668 }, { "epoch": 0.6496970435100325, "grad_norm": 1.1152007588487818, "learning_rate": 4.859092095302877e-06, "loss": 0.4085, "step": 10669 }, { "epoch": 0.6497579392869105, "grad_norm": 0.9713256623739546, "learning_rate": 4.859065683074713e-06, "loss": 0.4329, "step": 10670 }, { "epoch": 0.6498188350637883, "grad_norm": 0.9369298052104601, "learning_rate": 4.859039268443182e-06, "loss": 0.471, "step": 10671 }, { "epoch": 0.6498797308406662, "grad_norm": 1.0420272651560964, "learning_rate": 4.85901285140831e-06, "loss": 0.4724, "step": 10672 }, { "epoch": 0.649940626617544, "grad_norm": 0.9939708411469895, "learning_rate": 4.8589864319701255e-06, "loss": 0.508, "step": 10673 }, { "epoch": 0.650001522394422, "grad_norm": 1.009237313606144, "learning_rate": 4.858960010128654e-06, "loss": 0.5236, "step": 10674 }, { "epoch": 0.6500624181712998, "grad_norm": 1.0774387535092405, "learning_rate": 4.858933585883924e-06, "loss": 0.4163, "step": 10675 }, { "epoch": 0.6501233139481777, "grad_norm": 1.033062953629723, "learning_rate": 4.85890715923596e-06, "loss": 0.4205, "step": 10676 }, { "epoch": 0.6501842097250555, "grad_norm": 1.0481376839371173, "learning_rate": 4.858880730184792e-06, "loss": 0.3485, "step": 10677 }, { "epoch": 0.6502451055019335, "grad_norm": 0.99566230215343, "learning_rate": 4.858854298730444e-06, "loss": 0.4334, "step": 10678 }, { "epoch": 0.6503060012788113, "grad_norm": 0.9479956664933922, "learning_rate": 4.858827864872946e-06, "loss": 0.4592, "step": 10679 }, { "epoch": 0.6503668970556892, "grad_norm": 1.052936621145186, "learning_rate": 4.858801428612322e-06, "loss": 0.3908, "step": 10680 }, { "epoch": 0.650427792832567, "grad_norm": 0.9818836025839305, "learning_rate": 4.8587749899486004e-06, "loss": 0.4614, "step": 10681 }, { "epoch": 0.650488688609445, "grad_norm": 0.9259698224919435, "learning_rate": 4.858748548881808e-06, "loss": 0.4241, "step": 10682 }, { "epoch": 0.6505495843863228, "grad_norm": 0.9550681254024396, "learning_rate": 4.858722105411971e-06, "loss": 0.4654, "step": 10683 }, { "epoch": 0.6506104801632007, "grad_norm": 0.9610651328243288, "learning_rate": 4.858695659539118e-06, "loss": 0.4856, "step": 10684 }, { "epoch": 0.6506713759400785, "grad_norm": 0.9544138172629343, "learning_rate": 4.858669211263274e-06, "loss": 0.4354, "step": 10685 }, { "epoch": 0.6507322717169565, "grad_norm": 0.9337836669963537, "learning_rate": 4.858642760584467e-06, "loss": 0.4267, "step": 10686 }, { "epoch": 0.6507931674938343, "grad_norm": 0.9917273241413078, "learning_rate": 4.8586163075027246e-06, "loss": 0.3587, "step": 10687 }, { "epoch": 0.6508540632707122, "grad_norm": 0.9648653216176402, "learning_rate": 4.858589852018072e-06, "loss": 0.4789, "step": 10688 }, { "epoch": 0.65091495904759, "grad_norm": 1.0430624098842887, "learning_rate": 4.8585633941305375e-06, "loss": 0.4045, "step": 10689 }, { "epoch": 0.650975854824468, "grad_norm": 0.9935817332677301, "learning_rate": 4.858536933840148e-06, "loss": 0.4556, "step": 10690 }, { "epoch": 0.6510367506013458, "grad_norm": 0.9671771362561605, "learning_rate": 4.858510471146929e-06, "loss": 0.4065, "step": 10691 }, { "epoch": 0.6510976463782236, "grad_norm": 1.0234464417737643, "learning_rate": 4.858484006050909e-06, "loss": 0.4505, "step": 10692 }, { "epoch": 0.6511585421551015, "grad_norm": 0.9266049072245872, "learning_rate": 4.858457538552115e-06, "loss": 0.4866, "step": 10693 }, { "epoch": 0.6512194379319795, "grad_norm": 0.9033237944253941, "learning_rate": 4.858431068650573e-06, "loss": 0.401, "step": 10694 }, { "epoch": 0.6512803337088573, "grad_norm": 0.9612529398443316, "learning_rate": 4.858404596346311e-06, "loss": 0.4832, "step": 10695 }, { "epoch": 0.6513412294857351, "grad_norm": 0.9395114675822827, "learning_rate": 4.858378121639355e-06, "loss": 0.4837, "step": 10696 }, { "epoch": 0.651402125262613, "grad_norm": 1.137799721237349, "learning_rate": 4.858351644529733e-06, "loss": 0.3865, "step": 10697 }, { "epoch": 0.6514630210394909, "grad_norm": 1.0946503487400454, "learning_rate": 4.85832516501747e-06, "loss": 0.4932, "step": 10698 }, { "epoch": 0.6515239168163688, "grad_norm": 1.028777868261304, "learning_rate": 4.858298683102596e-06, "loss": 0.4222, "step": 10699 }, { "epoch": 0.6515848125932466, "grad_norm": 0.9893352631743837, "learning_rate": 4.858272198785136e-06, "loss": 0.4188, "step": 10700 }, { "epoch": 0.6516457083701245, "grad_norm": 0.9843026608753719, "learning_rate": 4.858245712065116e-06, "loss": 0.3372, "step": 10701 }, { "epoch": 0.6517066041470024, "grad_norm": 0.9749933012872901, "learning_rate": 4.8582192229425655e-06, "loss": 0.4634, "step": 10702 }, { "epoch": 0.6517674999238803, "grad_norm": 1.0341485483031116, "learning_rate": 4.8581927314175105e-06, "loss": 0.4336, "step": 10703 }, { "epoch": 0.6518283957007581, "grad_norm": 1.0557905615772547, "learning_rate": 4.858166237489977e-06, "loss": 0.5053, "step": 10704 }, { "epoch": 0.6518892914776361, "grad_norm": 1.0570168134738445, "learning_rate": 4.858139741159993e-06, "loss": 0.4306, "step": 10705 }, { "epoch": 0.6519501872545139, "grad_norm": 0.9676884578191283, "learning_rate": 4.858113242427585e-06, "loss": 0.4685, "step": 10706 }, { "epoch": 0.6520110830313918, "grad_norm": 1.0189915429827314, "learning_rate": 4.8580867412927805e-06, "loss": 0.4115, "step": 10707 }, { "epoch": 0.6520719788082696, "grad_norm": 1.0625261069320295, "learning_rate": 4.8580602377556065e-06, "loss": 0.4111, "step": 10708 }, { "epoch": 0.6521328745851476, "grad_norm": 0.9495024814257299, "learning_rate": 4.8580337318160895e-06, "loss": 0.3733, "step": 10709 }, { "epoch": 0.6521937703620254, "grad_norm": 1.0731315772072556, "learning_rate": 4.858007223474257e-06, "loss": 0.4358, "step": 10710 }, { "epoch": 0.6522546661389033, "grad_norm": 0.961276446081222, "learning_rate": 4.857980712730136e-06, "loss": 0.4756, "step": 10711 }, { "epoch": 0.6523155619157811, "grad_norm": 1.0194406293484586, "learning_rate": 4.8579541995837525e-06, "loss": 0.451, "step": 10712 }, { "epoch": 0.6523764576926591, "grad_norm": 0.9981251257453106, "learning_rate": 4.857927684035135e-06, "loss": 0.3572, "step": 10713 }, { "epoch": 0.6524373534695369, "grad_norm": 0.9403347365441006, "learning_rate": 4.857901166084309e-06, "loss": 0.541, "step": 10714 }, { "epoch": 0.6524982492464148, "grad_norm": 0.9532194752543874, "learning_rate": 4.857874645731303e-06, "loss": 0.4245, "step": 10715 }, { "epoch": 0.6525591450232926, "grad_norm": 1.0406020934547693, "learning_rate": 4.857848122976143e-06, "loss": 0.3767, "step": 10716 }, { "epoch": 0.6526200408001706, "grad_norm": 1.0956538225673307, "learning_rate": 4.857821597818856e-06, "loss": 0.3542, "step": 10717 }, { "epoch": 0.6526809365770484, "grad_norm": 0.9711629524436183, "learning_rate": 4.85779507025947e-06, "loss": 0.4003, "step": 10718 }, { "epoch": 0.6527418323539262, "grad_norm": 0.9759212594827058, "learning_rate": 4.857768540298011e-06, "loss": 0.4046, "step": 10719 }, { "epoch": 0.6528027281308041, "grad_norm": 0.9699785116532157, "learning_rate": 4.8577420079345074e-06, "loss": 0.4647, "step": 10720 }, { "epoch": 0.652863623907682, "grad_norm": 0.9401942569199842, "learning_rate": 4.857715473168984e-06, "loss": 0.4303, "step": 10721 }, { "epoch": 0.6529245196845599, "grad_norm": 1.0453623475150757, "learning_rate": 4.85768893600147e-06, "loss": 0.4922, "step": 10722 }, { "epoch": 0.6529854154614377, "grad_norm": 1.0467463166256505, "learning_rate": 4.8576623964319915e-06, "loss": 0.3586, "step": 10723 }, { "epoch": 0.6530463112383156, "grad_norm": 1.0204763284827776, "learning_rate": 4.857635854460575e-06, "loss": 0.4184, "step": 10724 }, { "epoch": 0.6531072070151935, "grad_norm": 1.0422439618482238, "learning_rate": 4.857609310087248e-06, "loss": 0.3433, "step": 10725 }, { "epoch": 0.6531681027920714, "grad_norm": 1.050696278330967, "learning_rate": 4.857582763312038e-06, "loss": 0.4424, "step": 10726 }, { "epoch": 0.6532289985689492, "grad_norm": 0.9467275845788353, "learning_rate": 4.857556214134972e-06, "loss": 0.4143, "step": 10727 }, { "epoch": 0.6532898943458271, "grad_norm": 1.0329357814537132, "learning_rate": 4.857529662556076e-06, "loss": 0.4169, "step": 10728 }, { "epoch": 0.653350790122705, "grad_norm": 0.9958468668249771, "learning_rate": 4.857503108575378e-06, "loss": 0.4913, "step": 10729 }, { "epoch": 0.6534116858995829, "grad_norm": 1.0763908683601642, "learning_rate": 4.857476552192905e-06, "loss": 0.392, "step": 10730 }, { "epoch": 0.6534725816764607, "grad_norm": 1.0744690955034248, "learning_rate": 4.857449993408684e-06, "loss": 0.393, "step": 10731 }, { "epoch": 0.6535334774533386, "grad_norm": 0.9655141987199217, "learning_rate": 4.857423432222742e-06, "loss": 0.415, "step": 10732 }, { "epoch": 0.6535943732302165, "grad_norm": 1.0662907234364187, "learning_rate": 4.857396868635106e-06, "loss": 0.4476, "step": 10733 }, { "epoch": 0.6536552690070944, "grad_norm": 1.046673475330789, "learning_rate": 4.857370302645803e-06, "loss": 0.3891, "step": 10734 }, { "epoch": 0.6537161647839722, "grad_norm": 1.147244245667816, "learning_rate": 4.85734373425486e-06, "loss": 0.4246, "step": 10735 }, { "epoch": 0.65377706056085, "grad_norm": 0.9921685316868103, "learning_rate": 4.857317163462304e-06, "loss": 0.4296, "step": 10736 }, { "epoch": 0.653837956337728, "grad_norm": 1.0847830075560723, "learning_rate": 4.857290590268163e-06, "loss": 0.4484, "step": 10737 }, { "epoch": 0.6538988521146059, "grad_norm": 1.007160418401543, "learning_rate": 4.857264014672464e-06, "loss": 0.4495, "step": 10738 }, { "epoch": 0.6539597478914837, "grad_norm": 0.9926879175114219, "learning_rate": 4.8572374366752315e-06, "loss": 0.3769, "step": 10739 }, { "epoch": 0.6540206436683615, "grad_norm": 1.1509461261026492, "learning_rate": 4.8572108562764955e-06, "loss": 0.4507, "step": 10740 }, { "epoch": 0.6540815394452395, "grad_norm": 0.9391565512492822, "learning_rate": 4.857184273476282e-06, "loss": 0.5096, "step": 10741 }, { "epoch": 0.6541424352221173, "grad_norm": 0.9853403113013137, "learning_rate": 4.857157688274619e-06, "loss": 0.4034, "step": 10742 }, { "epoch": 0.6542033309989952, "grad_norm": 0.9269359583419198, "learning_rate": 4.857131100671531e-06, "loss": 0.4691, "step": 10743 }, { "epoch": 0.6542642267758731, "grad_norm": 0.9679062074285353, "learning_rate": 4.857104510667048e-06, "loss": 0.4629, "step": 10744 }, { "epoch": 0.654325122552751, "grad_norm": 1.0100083284931827, "learning_rate": 4.857077918261196e-06, "loss": 0.4129, "step": 10745 }, { "epoch": 0.6543860183296288, "grad_norm": 1.04930277961787, "learning_rate": 4.857051323454001e-06, "loss": 0.4248, "step": 10746 }, { "epoch": 0.6544469141065067, "grad_norm": 0.9645797982230644, "learning_rate": 4.8570247262454925e-06, "loss": 0.4391, "step": 10747 }, { "epoch": 0.6545078098833846, "grad_norm": 0.9830814495841744, "learning_rate": 4.856998126635696e-06, "loss": 0.3637, "step": 10748 }, { "epoch": 0.6545687056602625, "grad_norm": 0.978827601388164, "learning_rate": 4.8569715246246375e-06, "loss": 0.4218, "step": 10749 }, { "epoch": 0.6546296014371403, "grad_norm": 0.9754306382072941, "learning_rate": 4.8569449202123464e-06, "loss": 0.4487, "step": 10750 }, { "epoch": 0.6546904972140182, "grad_norm": 0.9829180596031601, "learning_rate": 4.856918313398849e-06, "loss": 0.4825, "step": 10751 }, { "epoch": 0.6547513929908961, "grad_norm": 1.1170795418622772, "learning_rate": 4.8568917041841725e-06, "loss": 0.3375, "step": 10752 }, { "epoch": 0.654812288767774, "grad_norm": 0.9417487792178735, "learning_rate": 4.856865092568342e-06, "loss": 0.4899, "step": 10753 }, { "epoch": 0.6548731845446518, "grad_norm": 0.948848379626938, "learning_rate": 4.856838478551389e-06, "loss": 0.4664, "step": 10754 }, { "epoch": 0.6549340803215297, "grad_norm": 0.9563320434867972, "learning_rate": 4.856811862133336e-06, "loss": 0.4471, "step": 10755 }, { "epoch": 0.6549949760984076, "grad_norm": 0.9584456954032616, "learning_rate": 4.8567852433142126e-06, "loss": 0.4431, "step": 10756 }, { "epoch": 0.6550558718752855, "grad_norm": 1.0256552824964695, "learning_rate": 4.856758622094045e-06, "loss": 0.3622, "step": 10757 }, { "epoch": 0.6551167676521633, "grad_norm": 0.9907082994774545, "learning_rate": 4.856731998472861e-06, "loss": 0.4052, "step": 10758 }, { "epoch": 0.6551776634290412, "grad_norm": 0.9790284733732244, "learning_rate": 4.856705372450688e-06, "loss": 0.4207, "step": 10759 }, { "epoch": 0.6552385592059191, "grad_norm": 0.9243484942820699, "learning_rate": 4.856678744027552e-06, "loss": 0.4518, "step": 10760 }, { "epoch": 0.655299454982797, "grad_norm": 1.0013797022704551, "learning_rate": 4.856652113203481e-06, "loss": 0.453, "step": 10761 }, { "epoch": 0.6553603507596748, "grad_norm": 1.0029105110741636, "learning_rate": 4.856625479978501e-06, "loss": 0.3817, "step": 10762 }, { "epoch": 0.6554212465365526, "grad_norm": 1.0694943399547647, "learning_rate": 4.856598844352641e-06, "loss": 0.3378, "step": 10763 }, { "epoch": 0.6554821423134306, "grad_norm": 1.1061939405459822, "learning_rate": 4.856572206325926e-06, "loss": 0.399, "step": 10764 }, { "epoch": 0.6555430380903084, "grad_norm": 1.0926247203147652, "learning_rate": 4.856545565898385e-06, "loss": 0.3561, "step": 10765 }, { "epoch": 0.6556039338671863, "grad_norm": 1.051960238452815, "learning_rate": 4.856518923070045e-06, "loss": 0.3406, "step": 10766 }, { "epoch": 0.6556648296440641, "grad_norm": 1.0159974486765333, "learning_rate": 4.856492277840931e-06, "loss": 0.4179, "step": 10767 }, { "epoch": 0.6557257254209421, "grad_norm": 0.9765655227085539, "learning_rate": 4.856465630211072e-06, "loss": 0.4734, "step": 10768 }, { "epoch": 0.6557866211978199, "grad_norm": 1.0537941682078442, "learning_rate": 4.856438980180495e-06, "loss": 0.3736, "step": 10769 }, { "epoch": 0.6558475169746978, "grad_norm": 0.9573059429241525, "learning_rate": 4.856412327749228e-06, "loss": 0.4014, "step": 10770 }, { "epoch": 0.6559084127515756, "grad_norm": 0.9622716159230342, "learning_rate": 4.856385672917296e-06, "loss": 0.44, "step": 10771 }, { "epoch": 0.6559693085284536, "grad_norm": 1.0650682665359055, "learning_rate": 4.856359015684728e-06, "loss": 0.385, "step": 10772 }, { "epoch": 0.6560302043053314, "grad_norm": 1.0338981254780606, "learning_rate": 4.856332356051549e-06, "loss": 0.4039, "step": 10773 }, { "epoch": 0.6560911000822093, "grad_norm": 1.072052738929831, "learning_rate": 4.856305694017789e-06, "loss": 0.4089, "step": 10774 }, { "epoch": 0.6561519958590871, "grad_norm": 1.0204894041609656, "learning_rate": 4.856279029583474e-06, "loss": 0.4043, "step": 10775 }, { "epoch": 0.6562128916359651, "grad_norm": 1.0351774791788668, "learning_rate": 4.85625236274863e-06, "loss": 0.4665, "step": 10776 }, { "epoch": 0.6562737874128429, "grad_norm": 1.071851814807069, "learning_rate": 4.856225693513285e-06, "loss": 0.424, "step": 10777 }, { "epoch": 0.6563346831897208, "grad_norm": 0.9640595529797289, "learning_rate": 4.856199021877467e-06, "loss": 0.546, "step": 10778 }, { "epoch": 0.6563955789665986, "grad_norm": 0.9800550096117806, "learning_rate": 4.856172347841202e-06, "loss": 0.4301, "step": 10779 }, { "epoch": 0.6564564747434766, "grad_norm": 1.0200755949153983, "learning_rate": 4.856145671404518e-06, "loss": 0.5025, "step": 10780 }, { "epoch": 0.6565173705203544, "grad_norm": 1.1131010288238652, "learning_rate": 4.85611899256744e-06, "loss": 0.3556, "step": 10781 }, { "epoch": 0.6565782662972323, "grad_norm": 0.9712977650329988, "learning_rate": 4.856092311329999e-06, "loss": 0.4345, "step": 10782 }, { "epoch": 0.6566391620741101, "grad_norm": 1.0729957182008134, "learning_rate": 4.856065627692219e-06, "loss": 0.3951, "step": 10783 }, { "epoch": 0.6567000578509881, "grad_norm": 1.0702283179709389, "learning_rate": 4.856038941654129e-06, "loss": 0.3927, "step": 10784 }, { "epoch": 0.6567609536278659, "grad_norm": 0.9580746678624003, "learning_rate": 4.856012253215755e-06, "loss": 0.4257, "step": 10785 }, { "epoch": 0.6568218494047438, "grad_norm": 0.9651476344287314, "learning_rate": 4.855985562377126e-06, "loss": 0.3858, "step": 10786 }, { "epoch": 0.6568827451816217, "grad_norm": 1.1853069845632158, "learning_rate": 4.855958869138267e-06, "loss": 0.3742, "step": 10787 }, { "epoch": 0.6569436409584996, "grad_norm": 1.0489965898374678, "learning_rate": 4.855932173499205e-06, "loss": 0.4321, "step": 10788 }, { "epoch": 0.6570045367353774, "grad_norm": 0.9598825803075277, "learning_rate": 4.855905475459969e-06, "loss": 0.45, "step": 10789 }, { "epoch": 0.6570654325122552, "grad_norm": 0.9509724149428022, "learning_rate": 4.8558787750205864e-06, "loss": 0.4441, "step": 10790 }, { "epoch": 0.6571263282891332, "grad_norm": 1.0656564053292, "learning_rate": 4.855852072181082e-06, "loss": 0.3805, "step": 10791 }, { "epoch": 0.657187224066011, "grad_norm": 0.996813649998882, "learning_rate": 4.855825366941486e-06, "loss": 0.4364, "step": 10792 }, { "epoch": 0.6572481198428889, "grad_norm": 0.9843171079276983, "learning_rate": 4.855798659301824e-06, "loss": 0.4038, "step": 10793 }, { "epoch": 0.6573090156197667, "grad_norm": 1.0590338405172262, "learning_rate": 4.855771949262122e-06, "loss": 0.3943, "step": 10794 }, { "epoch": 0.6573699113966447, "grad_norm": 1.125462923697314, "learning_rate": 4.855745236822409e-06, "loss": 0.3872, "step": 10795 }, { "epoch": 0.6574308071735225, "grad_norm": 1.0648361646952091, "learning_rate": 4.855718521982712e-06, "loss": 0.4325, "step": 10796 }, { "epoch": 0.6574917029504004, "grad_norm": 0.9565555989029692, "learning_rate": 4.855691804743058e-06, "loss": 0.4447, "step": 10797 }, { "epoch": 0.6575525987272782, "grad_norm": 0.9808608336968754, "learning_rate": 4.855665085103474e-06, "loss": 0.4441, "step": 10798 }, { "epoch": 0.6576134945041562, "grad_norm": 1.0209004757063764, "learning_rate": 4.8556383630639875e-06, "loss": 0.4017, "step": 10799 }, { "epoch": 0.657674390281034, "grad_norm": 0.9218579143758534, "learning_rate": 4.855611638624626e-06, "loss": 0.4519, "step": 10800 }, { "epoch": 0.6577352860579119, "grad_norm": 1.0202803998141703, "learning_rate": 4.855584911785415e-06, "loss": 0.3975, "step": 10801 }, { "epoch": 0.6577961818347897, "grad_norm": 1.0948467084260454, "learning_rate": 4.855558182546384e-06, "loss": 0.5135, "step": 10802 }, { "epoch": 0.6578570776116677, "grad_norm": 0.9459887011897772, "learning_rate": 4.855531450907559e-06, "loss": 0.4664, "step": 10803 }, { "epoch": 0.6579179733885455, "grad_norm": 0.9419682695900831, "learning_rate": 4.855504716868969e-06, "loss": 0.4776, "step": 10804 }, { "epoch": 0.6579788691654234, "grad_norm": 0.9758994529363422, "learning_rate": 4.855477980430638e-06, "loss": 0.4369, "step": 10805 }, { "epoch": 0.6580397649423012, "grad_norm": 1.0154377947582789, "learning_rate": 4.855451241592595e-06, "loss": 0.4039, "step": 10806 }, { "epoch": 0.6581006607191792, "grad_norm": 1.0569041541619129, "learning_rate": 4.855424500354868e-06, "loss": 0.3733, "step": 10807 }, { "epoch": 0.658161556496057, "grad_norm": 1.0025228561619548, "learning_rate": 4.8553977567174835e-06, "loss": 0.4611, "step": 10808 }, { "epoch": 0.6582224522729349, "grad_norm": 0.9779107944851193, "learning_rate": 4.855371010680469e-06, "loss": 0.4262, "step": 10809 }, { "epoch": 0.6582833480498127, "grad_norm": 0.9446621038395492, "learning_rate": 4.85534426224385e-06, "loss": 0.4206, "step": 10810 }, { "epoch": 0.6583442438266907, "grad_norm": 0.9430154911736195, "learning_rate": 4.855317511407657e-06, "loss": 0.4324, "step": 10811 }, { "epoch": 0.6584051396035685, "grad_norm": 0.9779738220226666, "learning_rate": 4.855290758171914e-06, "loss": 0.4125, "step": 10812 }, { "epoch": 0.6584660353804463, "grad_norm": 0.9607162059751695, "learning_rate": 4.855264002536651e-06, "loss": 0.4807, "step": 10813 }, { "epoch": 0.6585269311573242, "grad_norm": 0.9972801012218467, "learning_rate": 4.855237244501894e-06, "loss": 0.459, "step": 10814 }, { "epoch": 0.6585878269342021, "grad_norm": 1.0256271483864585, "learning_rate": 4.8552104840676695e-06, "loss": 0.3966, "step": 10815 }, { "epoch": 0.65864872271108, "grad_norm": 1.022833038141481, "learning_rate": 4.855183721234006e-06, "loss": 0.3378, "step": 10816 }, { "epoch": 0.6587096184879578, "grad_norm": 1.0016843807279425, "learning_rate": 4.855156956000929e-06, "loss": 0.4557, "step": 10817 }, { "epoch": 0.6587705142648357, "grad_norm": 0.9571910597949741, "learning_rate": 4.855130188368469e-06, "loss": 0.4454, "step": 10818 }, { "epoch": 0.6588314100417136, "grad_norm": 1.0995592298417767, "learning_rate": 4.85510341833665e-06, "loss": 0.3528, "step": 10819 }, { "epoch": 0.6588923058185915, "grad_norm": 1.0503220062084164, "learning_rate": 4.855076645905501e-06, "loss": 0.4054, "step": 10820 }, { "epoch": 0.6589532015954693, "grad_norm": 1.0228470607695097, "learning_rate": 4.855049871075049e-06, "loss": 0.415, "step": 10821 }, { "epoch": 0.6590140973723472, "grad_norm": 0.9892227328979069, "learning_rate": 4.855023093845321e-06, "loss": 0.4371, "step": 10822 }, { "epoch": 0.6590749931492251, "grad_norm": 1.0743902170065447, "learning_rate": 4.854996314216345e-06, "loss": 0.3942, "step": 10823 }, { "epoch": 0.659135888926103, "grad_norm": 0.9653726504000136, "learning_rate": 4.8549695321881465e-06, "loss": 0.4199, "step": 10824 }, { "epoch": 0.6591967847029808, "grad_norm": 1.0532695381362671, "learning_rate": 4.854942747760755e-06, "loss": 0.4013, "step": 10825 }, { "epoch": 0.6592576804798588, "grad_norm": 1.0027866446685398, "learning_rate": 4.854915960934197e-06, "loss": 0.4408, "step": 10826 }, { "epoch": 0.6593185762567366, "grad_norm": 1.01040603190071, "learning_rate": 4.854889171708499e-06, "loss": 0.4209, "step": 10827 }, { "epoch": 0.6593794720336145, "grad_norm": 1.0010781429089783, "learning_rate": 4.854862380083689e-06, "loss": 0.4112, "step": 10828 }, { "epoch": 0.6594403678104923, "grad_norm": 0.998675058843228, "learning_rate": 4.854835586059794e-06, "loss": 0.4351, "step": 10829 }, { "epoch": 0.6595012635873703, "grad_norm": 1.011450894409111, "learning_rate": 4.854808789636841e-06, "loss": 0.3968, "step": 10830 }, { "epoch": 0.6595621593642481, "grad_norm": 1.0196342394126334, "learning_rate": 4.854781990814858e-06, "loss": 0.4055, "step": 10831 }, { "epoch": 0.659623055141126, "grad_norm": 0.9723168386992315, "learning_rate": 4.854755189593873e-06, "loss": 0.5126, "step": 10832 }, { "epoch": 0.6596839509180038, "grad_norm": 1.015619856613477, "learning_rate": 4.854728385973912e-06, "loss": 0.4468, "step": 10833 }, { "epoch": 0.6597448466948818, "grad_norm": 1.0313117032991197, "learning_rate": 4.854701579955003e-06, "loss": 0.4057, "step": 10834 }, { "epoch": 0.6598057424717596, "grad_norm": 1.0683823536022974, "learning_rate": 4.854674771537172e-06, "loss": 0.4111, "step": 10835 }, { "epoch": 0.6598666382486374, "grad_norm": 1.031793404667392, "learning_rate": 4.854647960720448e-06, "loss": 0.3708, "step": 10836 }, { "epoch": 0.6599275340255153, "grad_norm": 0.9126686532607116, "learning_rate": 4.854621147504857e-06, "loss": 0.4549, "step": 10837 }, { "epoch": 0.6599884298023933, "grad_norm": 0.9949450711158766, "learning_rate": 4.854594331890427e-06, "loss": 0.4183, "step": 10838 }, { "epoch": 0.6600493255792711, "grad_norm": 1.0060331475400075, "learning_rate": 4.854567513877185e-06, "loss": 0.3552, "step": 10839 }, { "epoch": 0.6601102213561489, "grad_norm": 1.1225469407054418, "learning_rate": 4.854540693465159e-06, "loss": 0.4175, "step": 10840 }, { "epoch": 0.6601711171330268, "grad_norm": 1.0289088618741509, "learning_rate": 4.854513870654377e-06, "loss": 0.3989, "step": 10841 }, { "epoch": 0.6602320129099047, "grad_norm": 0.9430542271577445, "learning_rate": 4.854487045444864e-06, "loss": 0.4825, "step": 10842 }, { "epoch": 0.6602929086867826, "grad_norm": 1.0018488583915521, "learning_rate": 4.854460217836648e-06, "loss": 0.4951, "step": 10843 }, { "epoch": 0.6603538044636604, "grad_norm": 1.0240148695009852, "learning_rate": 4.854433387829758e-06, "loss": 0.4042, "step": 10844 }, { "epoch": 0.6604147002405383, "grad_norm": 1.0233765370192025, "learning_rate": 4.85440655542422e-06, "loss": 0.4158, "step": 10845 }, { "epoch": 0.6604755960174162, "grad_norm": 1.1001802769640023, "learning_rate": 4.854379720620062e-06, "loss": 0.432, "step": 10846 }, { "epoch": 0.6605364917942941, "grad_norm": 0.982984358122493, "learning_rate": 4.85435288341731e-06, "loss": 0.4137, "step": 10847 }, { "epoch": 0.6605973875711719, "grad_norm": 0.9928911370022958, "learning_rate": 4.8543260438159925e-06, "loss": 0.4187, "step": 10848 }, { "epoch": 0.6606582833480498, "grad_norm": 1.0563511301511919, "learning_rate": 4.854299201816137e-06, "loss": 0.3508, "step": 10849 }, { "epoch": 0.6607191791249277, "grad_norm": 0.9477503629557527, "learning_rate": 4.85427235741777e-06, "loss": 0.4473, "step": 10850 }, { "epoch": 0.6607800749018056, "grad_norm": 0.9599696341997122, "learning_rate": 4.854245510620919e-06, "loss": 0.4091, "step": 10851 }, { "epoch": 0.6608409706786834, "grad_norm": 0.9838231738279957, "learning_rate": 4.854218661425612e-06, "loss": 0.4394, "step": 10852 }, { "epoch": 0.6609018664555613, "grad_norm": 1.085831090676276, "learning_rate": 4.854191809831876e-06, "loss": 0.3928, "step": 10853 }, { "epoch": 0.6609627622324392, "grad_norm": 0.9619572524556399, "learning_rate": 4.854164955839738e-06, "loss": 0.485, "step": 10854 }, { "epoch": 0.6610236580093171, "grad_norm": 0.9414999978185774, "learning_rate": 4.854138099449226e-06, "loss": 0.4434, "step": 10855 }, { "epoch": 0.6610845537861949, "grad_norm": 1.00313205181737, "learning_rate": 4.854111240660367e-06, "loss": 0.4181, "step": 10856 }, { "epoch": 0.6611454495630728, "grad_norm": 0.9594359161041118, "learning_rate": 4.854084379473188e-06, "loss": 0.4059, "step": 10857 }, { "epoch": 0.6612063453399507, "grad_norm": 0.9606120209868656, "learning_rate": 4.854057515887718e-06, "loss": 0.4113, "step": 10858 }, { "epoch": 0.6612672411168286, "grad_norm": 1.040199813717488, "learning_rate": 4.854030649903982e-06, "loss": 0.4133, "step": 10859 }, { "epoch": 0.6613281368937064, "grad_norm": 0.9839927159084053, "learning_rate": 4.854003781522008e-06, "loss": 0.4172, "step": 10860 }, { "epoch": 0.6613890326705842, "grad_norm": 0.9905012268521387, "learning_rate": 4.8539769107418254e-06, "loss": 0.409, "step": 10861 }, { "epoch": 0.6614499284474622, "grad_norm": 1.0094622299287923, "learning_rate": 4.853950037563459e-06, "loss": 0.4039, "step": 10862 }, { "epoch": 0.66151082422434, "grad_norm": 0.9324575502217681, "learning_rate": 4.853923161986937e-06, "loss": 0.4453, "step": 10863 }, { "epoch": 0.6615717200012179, "grad_norm": 0.9582996123417824, "learning_rate": 4.853896284012289e-06, "loss": 0.4848, "step": 10864 }, { "epoch": 0.6616326157780957, "grad_norm": 1.0531461679095455, "learning_rate": 4.853869403639538e-06, "loss": 0.3833, "step": 10865 }, { "epoch": 0.6616935115549737, "grad_norm": 1.0269765638891026, "learning_rate": 4.853842520868715e-06, "loss": 0.4333, "step": 10866 }, { "epoch": 0.6617544073318515, "grad_norm": 1.0167994106099834, "learning_rate": 4.853815635699846e-06, "loss": 0.3793, "step": 10867 }, { "epoch": 0.6618153031087294, "grad_norm": 0.9768474222463055, "learning_rate": 4.853788748132959e-06, "loss": 0.3989, "step": 10868 }, { "epoch": 0.6618761988856073, "grad_norm": 1.0245015223022833, "learning_rate": 4.8537618581680805e-06, "loss": 0.4022, "step": 10869 }, { "epoch": 0.6619370946624852, "grad_norm": 1.0230132985087168, "learning_rate": 4.853734965805238e-06, "loss": 0.4503, "step": 10870 }, { "epoch": 0.661997990439363, "grad_norm": 0.9649894843790323, "learning_rate": 4.85370807104446e-06, "loss": 0.3898, "step": 10871 }, { "epoch": 0.6620588862162409, "grad_norm": 1.0332590869451515, "learning_rate": 4.853681173885773e-06, "loss": 0.3893, "step": 10872 }, { "epoch": 0.6621197819931188, "grad_norm": 0.944717320027584, "learning_rate": 4.853654274329203e-06, "loss": 0.497, "step": 10873 }, { "epoch": 0.6621806777699967, "grad_norm": 1.0307152275513787, "learning_rate": 4.853627372374781e-06, "loss": 0.4199, "step": 10874 }, { "epoch": 0.6622415735468745, "grad_norm": 1.040253766535061, "learning_rate": 4.853600468022532e-06, "loss": 0.417, "step": 10875 }, { "epoch": 0.6623024693237524, "grad_norm": 1.0396760455720562, "learning_rate": 4.853573561272483e-06, "loss": 0.3669, "step": 10876 }, { "epoch": 0.6623633651006303, "grad_norm": 0.9818632165809128, "learning_rate": 4.853546652124663e-06, "loss": 0.403, "step": 10877 }, { "epoch": 0.6624242608775082, "grad_norm": 1.0127830985217798, "learning_rate": 4.853519740579098e-06, "loss": 0.3824, "step": 10878 }, { "epoch": 0.662485156654386, "grad_norm": 1.0157384049928673, "learning_rate": 4.853492826635816e-06, "loss": 0.4911, "step": 10879 }, { "epoch": 0.6625460524312639, "grad_norm": 1.0616934353734908, "learning_rate": 4.853465910294845e-06, "loss": 0.4785, "step": 10880 }, { "epoch": 0.6626069482081418, "grad_norm": 1.0178553146392049, "learning_rate": 4.853438991556212e-06, "loss": 0.4057, "step": 10881 }, { "epoch": 0.6626678439850197, "grad_norm": 1.0612842946713013, "learning_rate": 4.853412070419944e-06, "loss": 0.3625, "step": 10882 }, { "epoch": 0.6627287397618975, "grad_norm": 0.9998781184756224, "learning_rate": 4.8533851468860686e-06, "loss": 0.3747, "step": 10883 }, { "epoch": 0.6627896355387753, "grad_norm": 0.9540266637506134, "learning_rate": 4.8533582209546125e-06, "loss": 0.4868, "step": 10884 }, { "epoch": 0.6628505313156533, "grad_norm": 0.9517293619638534, "learning_rate": 4.853331292625605e-06, "loss": 0.4338, "step": 10885 }, { "epoch": 0.6629114270925311, "grad_norm": 1.0299812487976, "learning_rate": 4.853304361899072e-06, "loss": 0.4268, "step": 10886 }, { "epoch": 0.662972322869409, "grad_norm": 1.0228174071762608, "learning_rate": 4.8532774287750415e-06, "loss": 0.4326, "step": 10887 }, { "epoch": 0.6630332186462868, "grad_norm": 1.0172093203294825, "learning_rate": 4.853250493253542e-06, "loss": 0.3807, "step": 10888 }, { "epoch": 0.6630941144231648, "grad_norm": 1.0231744581008393, "learning_rate": 4.853223555334599e-06, "loss": 0.3956, "step": 10889 }, { "epoch": 0.6631550102000426, "grad_norm": 0.973215085165014, "learning_rate": 4.8531966150182405e-06, "loss": 0.5366, "step": 10890 }, { "epoch": 0.6632159059769205, "grad_norm": 0.9638885532071572, "learning_rate": 4.8531696723044944e-06, "loss": 0.425, "step": 10891 }, { "epoch": 0.6632768017537983, "grad_norm": 0.9422792124475408, "learning_rate": 4.853142727193388e-06, "loss": 0.4153, "step": 10892 }, { "epoch": 0.6633376975306763, "grad_norm": 0.9757795412452663, "learning_rate": 4.853115779684948e-06, "loss": 0.4318, "step": 10893 }, { "epoch": 0.6633985933075541, "grad_norm": 0.9056458284232386, "learning_rate": 4.853088829779204e-06, "loss": 0.5079, "step": 10894 }, { "epoch": 0.663459489084432, "grad_norm": 0.9370676969589701, "learning_rate": 4.853061877476181e-06, "loss": 0.4604, "step": 10895 }, { "epoch": 0.6635203848613098, "grad_norm": 1.036326703767138, "learning_rate": 4.853034922775908e-06, "loss": 0.3522, "step": 10896 }, { "epoch": 0.6635812806381878, "grad_norm": 0.9718583070792979, "learning_rate": 4.8530079656784115e-06, "loss": 0.4135, "step": 10897 }, { "epoch": 0.6636421764150656, "grad_norm": 1.0225612061259486, "learning_rate": 4.852981006183719e-06, "loss": 0.4707, "step": 10898 }, { "epoch": 0.6637030721919435, "grad_norm": 1.0237765423759144, "learning_rate": 4.8529540442918584e-06, "loss": 0.4326, "step": 10899 }, { "epoch": 0.6637639679688213, "grad_norm": 1.0498573902125752, "learning_rate": 4.8529270800028584e-06, "loss": 0.451, "step": 10900 }, { "epoch": 0.6638248637456993, "grad_norm": 1.0193540352588217, "learning_rate": 4.852900113316744e-06, "loss": 0.409, "step": 10901 }, { "epoch": 0.6638857595225771, "grad_norm": 1.032179311943782, "learning_rate": 4.8528731442335445e-06, "loss": 0.3751, "step": 10902 }, { "epoch": 0.663946655299455, "grad_norm": 1.050378272422504, "learning_rate": 4.852846172753287e-06, "loss": 0.3709, "step": 10903 }, { "epoch": 0.6640075510763328, "grad_norm": 0.9236642645886319, "learning_rate": 4.852819198875999e-06, "loss": 0.4645, "step": 10904 }, { "epoch": 0.6640684468532108, "grad_norm": 0.9688001458454807, "learning_rate": 4.852792222601706e-06, "loss": 0.4517, "step": 10905 }, { "epoch": 0.6641293426300886, "grad_norm": 0.9295537950605691, "learning_rate": 4.852765243930438e-06, "loss": 0.4518, "step": 10906 }, { "epoch": 0.6641902384069664, "grad_norm": 0.9646217230357522, "learning_rate": 4.852738262862223e-06, "loss": 0.3758, "step": 10907 }, { "epoch": 0.6642511341838444, "grad_norm": 1.0190966463579725, "learning_rate": 4.852711279397086e-06, "loss": 0.4101, "step": 10908 }, { "epoch": 0.6643120299607223, "grad_norm": 0.9427329548273702, "learning_rate": 4.852684293535056e-06, "loss": 0.4732, "step": 10909 }, { "epoch": 0.6643729257376001, "grad_norm": 1.0534966076492536, "learning_rate": 4.85265730527616e-06, "loss": 0.3814, "step": 10910 }, { "epoch": 0.6644338215144779, "grad_norm": 1.0438908750842169, "learning_rate": 4.852630314620426e-06, "loss": 0.4517, "step": 10911 }, { "epoch": 0.6644947172913559, "grad_norm": 0.9170702049388648, "learning_rate": 4.85260332156788e-06, "loss": 0.4241, "step": 10912 }, { "epoch": 0.6645556130682337, "grad_norm": 1.0922586010142585, "learning_rate": 4.852576326118551e-06, "loss": 0.3522, "step": 10913 }, { "epoch": 0.6646165088451116, "grad_norm": 0.9485925712235919, "learning_rate": 4.852549328272467e-06, "loss": 0.4687, "step": 10914 }, { "epoch": 0.6646774046219894, "grad_norm": 1.0119460776553728, "learning_rate": 4.852522328029654e-06, "loss": 0.4085, "step": 10915 }, { "epoch": 0.6647383003988674, "grad_norm": 1.0354685286977583, "learning_rate": 4.85249532539014e-06, "loss": 0.3575, "step": 10916 }, { "epoch": 0.6647991961757452, "grad_norm": 1.186882478175263, "learning_rate": 4.852468320353954e-06, "loss": 0.4266, "step": 10917 }, { "epoch": 0.6648600919526231, "grad_norm": 0.9491023127818863, "learning_rate": 4.8524413129211215e-06, "loss": 0.4509, "step": 10918 }, { "epoch": 0.6649209877295009, "grad_norm": 1.051952172581502, "learning_rate": 4.8524143030916695e-06, "loss": 0.4673, "step": 10919 }, { "epoch": 0.6649818835063789, "grad_norm": 1.0268053475001107, "learning_rate": 4.852387290865628e-06, "loss": 0.3939, "step": 10920 }, { "epoch": 0.6650427792832567, "grad_norm": 1.0024289129522694, "learning_rate": 4.852360276243023e-06, "loss": 0.4125, "step": 10921 }, { "epoch": 0.6651036750601346, "grad_norm": 0.9730569222008284, "learning_rate": 4.852333259223882e-06, "loss": 0.4208, "step": 10922 }, { "epoch": 0.6651645708370124, "grad_norm": 1.0539722679162622, "learning_rate": 4.852306239808233e-06, "loss": 0.3827, "step": 10923 }, { "epoch": 0.6652254666138904, "grad_norm": 1.242524141745074, "learning_rate": 4.8522792179961035e-06, "loss": 0.3817, "step": 10924 }, { "epoch": 0.6652863623907682, "grad_norm": 0.9307748737408275, "learning_rate": 4.85225219378752e-06, "loss": 0.4458, "step": 10925 }, { "epoch": 0.6653472581676461, "grad_norm": 0.9605486084454907, "learning_rate": 4.852225167182511e-06, "loss": 0.422, "step": 10926 }, { "epoch": 0.6654081539445239, "grad_norm": 0.9940655969459177, "learning_rate": 4.852198138181105e-06, "loss": 0.5045, "step": 10927 }, { "epoch": 0.6654690497214019, "grad_norm": 0.9765896749760535, "learning_rate": 4.8521711067833275e-06, "loss": 0.4202, "step": 10928 }, { "epoch": 0.6655299454982797, "grad_norm": 0.9439601374605454, "learning_rate": 4.852144072989208e-06, "loss": 0.4047, "step": 10929 }, { "epoch": 0.6655908412751576, "grad_norm": 1.0078550397237753, "learning_rate": 4.852117036798772e-06, "loss": 0.4342, "step": 10930 }, { "epoch": 0.6656517370520354, "grad_norm": 0.9867723388945756, "learning_rate": 4.852089998212049e-06, "loss": 0.4411, "step": 10931 }, { "epoch": 0.6657126328289134, "grad_norm": 1.0631918780024967, "learning_rate": 4.852062957229065e-06, "loss": 0.3904, "step": 10932 }, { "epoch": 0.6657735286057912, "grad_norm": 1.0366536193781506, "learning_rate": 4.852035913849847e-06, "loss": 0.4109, "step": 10933 }, { "epoch": 0.665834424382669, "grad_norm": 0.9541742496952883, "learning_rate": 4.852008868074425e-06, "loss": 0.404, "step": 10934 }, { "epoch": 0.6658953201595469, "grad_norm": 1.0947693304406823, "learning_rate": 4.851981819902825e-06, "loss": 0.362, "step": 10935 }, { "epoch": 0.6659562159364248, "grad_norm": 0.9835329512986996, "learning_rate": 4.851954769335075e-06, "loss": 0.439, "step": 10936 }, { "epoch": 0.6660171117133027, "grad_norm": 0.9677577398852311, "learning_rate": 4.851927716371202e-06, "loss": 0.3689, "step": 10937 }, { "epoch": 0.6660780074901805, "grad_norm": 0.997654117552148, "learning_rate": 4.851900661011235e-06, "loss": 0.4486, "step": 10938 }, { "epoch": 0.6661389032670584, "grad_norm": 1.0058419485605494, "learning_rate": 4.851873603255199e-06, "loss": 0.3514, "step": 10939 }, { "epoch": 0.6661997990439363, "grad_norm": 1.0655833982672573, "learning_rate": 4.851846543103124e-06, "loss": 0.4271, "step": 10940 }, { "epoch": 0.6662606948208142, "grad_norm": 1.0638302380451383, "learning_rate": 4.851819480555036e-06, "loss": 0.4242, "step": 10941 }, { "epoch": 0.666321590597692, "grad_norm": 1.008323628272093, "learning_rate": 4.851792415610964e-06, "loss": 0.4467, "step": 10942 }, { "epoch": 0.6663824863745699, "grad_norm": 1.0920253097760089, "learning_rate": 4.8517653482709344e-06, "loss": 0.4325, "step": 10943 }, { "epoch": 0.6664433821514478, "grad_norm": 1.0265132649551747, "learning_rate": 4.851738278534976e-06, "loss": 0.3892, "step": 10944 }, { "epoch": 0.6665042779283257, "grad_norm": 0.9706098920142473, "learning_rate": 4.851711206403115e-06, "loss": 0.4242, "step": 10945 }, { "epoch": 0.6665651737052035, "grad_norm": 0.9999648311075655, "learning_rate": 4.851684131875378e-06, "loss": 0.468, "step": 10946 }, { "epoch": 0.6666260694820814, "grad_norm": 0.9543740121172526, "learning_rate": 4.8516570549517964e-06, "loss": 0.4717, "step": 10947 }, { "epoch": 0.6666869652589593, "grad_norm": 0.9623932364405504, "learning_rate": 4.851629975632393e-06, "loss": 0.4958, "step": 10948 }, { "epoch": 0.6667478610358372, "grad_norm": 0.9944568410010683, "learning_rate": 4.8516028939172e-06, "loss": 0.3915, "step": 10949 }, { "epoch": 0.666808756812715, "grad_norm": 1.0531670552288612, "learning_rate": 4.8515758098062425e-06, "loss": 0.4441, "step": 10950 }, { "epoch": 0.666869652589593, "grad_norm": 0.9374210531466822, "learning_rate": 4.851548723299548e-06, "loss": 0.4008, "step": 10951 }, { "epoch": 0.6669305483664708, "grad_norm": 1.0171582096174043, "learning_rate": 4.851521634397145e-06, "loss": 0.4145, "step": 10952 }, { "epoch": 0.6669914441433487, "grad_norm": 1.137726548631766, "learning_rate": 4.85149454309906e-06, "loss": 0.4179, "step": 10953 }, { "epoch": 0.6670523399202265, "grad_norm": 1.0717028781808087, "learning_rate": 4.851467449405321e-06, "loss": 0.4137, "step": 10954 }, { "epoch": 0.6671132356971045, "grad_norm": 1.0237320162248034, "learning_rate": 4.851440353315957e-06, "loss": 0.3859, "step": 10955 }, { "epoch": 0.6671741314739823, "grad_norm": 0.9510467498668178, "learning_rate": 4.851413254830994e-06, "loss": 0.4273, "step": 10956 }, { "epoch": 0.6672350272508601, "grad_norm": 1.0012707886940226, "learning_rate": 4.851386153950461e-06, "loss": 0.3847, "step": 10957 }, { "epoch": 0.667295923027738, "grad_norm": 1.0227496499018105, "learning_rate": 4.851359050674382e-06, "loss": 0.3639, "step": 10958 }, { "epoch": 0.667356818804616, "grad_norm": 0.9959812910431032, "learning_rate": 4.85133194500279e-06, "loss": 0.4248, "step": 10959 }, { "epoch": 0.6674177145814938, "grad_norm": 0.9259170627481387, "learning_rate": 4.851304836935709e-06, "loss": 0.4806, "step": 10960 }, { "epoch": 0.6674786103583716, "grad_norm": 1.0748945059954411, "learning_rate": 4.8512777264731665e-06, "loss": 0.3906, "step": 10961 }, { "epoch": 0.6675395061352495, "grad_norm": 1.0197708827351524, "learning_rate": 4.851250613615192e-06, "loss": 0.4918, "step": 10962 }, { "epoch": 0.6676004019121274, "grad_norm": 1.030589773659939, "learning_rate": 4.851223498361812e-06, "loss": 0.427, "step": 10963 }, { "epoch": 0.6676612976890053, "grad_norm": 0.9692609249464885, "learning_rate": 4.851196380713055e-06, "loss": 0.442, "step": 10964 }, { "epoch": 0.6677221934658831, "grad_norm": 1.0420586698976888, "learning_rate": 4.851169260668948e-06, "loss": 0.4274, "step": 10965 }, { "epoch": 0.667783089242761, "grad_norm": 0.9879932649577083, "learning_rate": 4.851142138229517e-06, "loss": 0.4404, "step": 10966 }, { "epoch": 0.6678439850196389, "grad_norm": 0.9795325802123572, "learning_rate": 4.851115013394793e-06, "loss": 0.4302, "step": 10967 }, { "epoch": 0.6679048807965168, "grad_norm": 1.0406697789433454, "learning_rate": 4.851087886164802e-06, "loss": 0.412, "step": 10968 }, { "epoch": 0.6679657765733946, "grad_norm": 1.0183234417604294, "learning_rate": 4.8510607565395705e-06, "loss": 0.4353, "step": 10969 }, { "epoch": 0.6680266723502725, "grad_norm": 0.9569717217839143, "learning_rate": 4.851033624519128e-06, "loss": 0.4412, "step": 10970 }, { "epoch": 0.6680875681271504, "grad_norm": 1.0280002093134524, "learning_rate": 4.8510064901035e-06, "loss": 0.4498, "step": 10971 }, { "epoch": 0.6681484639040283, "grad_norm": 1.0306306169723762, "learning_rate": 4.850979353292717e-06, "loss": 0.4016, "step": 10972 }, { "epoch": 0.6682093596809061, "grad_norm": 1.036425435635285, "learning_rate": 4.850952214086805e-06, "loss": 0.411, "step": 10973 }, { "epoch": 0.668270255457784, "grad_norm": 1.0009645537431864, "learning_rate": 4.850925072485791e-06, "loss": 0.366, "step": 10974 }, { "epoch": 0.6683311512346619, "grad_norm": 1.0305837942660647, "learning_rate": 4.850897928489704e-06, "loss": 0.4244, "step": 10975 }, { "epoch": 0.6683920470115398, "grad_norm": 1.015149378561846, "learning_rate": 4.85087078209857e-06, "loss": 0.4183, "step": 10976 }, { "epoch": 0.6684529427884176, "grad_norm": 1.0209767617554988, "learning_rate": 4.850843633312418e-06, "loss": 0.438, "step": 10977 }, { "epoch": 0.6685138385652954, "grad_norm": 0.9704049823719034, "learning_rate": 4.850816482131277e-06, "loss": 0.5421, "step": 10978 }, { "epoch": 0.6685747343421734, "grad_norm": 1.04083457530483, "learning_rate": 4.850789328555172e-06, "loss": 0.434, "step": 10979 }, { "epoch": 0.6686356301190512, "grad_norm": 1.0165274694912019, "learning_rate": 4.8507621725841315e-06, "loss": 0.5068, "step": 10980 }, { "epoch": 0.6686965258959291, "grad_norm": 1.052370837579109, "learning_rate": 4.850735014218184e-06, "loss": 0.3767, "step": 10981 }, { "epoch": 0.6687574216728069, "grad_norm": 1.1785140134153098, "learning_rate": 4.850707853457356e-06, "loss": 0.4402, "step": 10982 }, { "epoch": 0.6688183174496849, "grad_norm": 1.0238153596857873, "learning_rate": 4.850680690301676e-06, "loss": 0.4644, "step": 10983 }, { "epoch": 0.6688792132265627, "grad_norm": 1.0091040359102175, "learning_rate": 4.85065352475117e-06, "loss": 0.4131, "step": 10984 }, { "epoch": 0.6689401090034406, "grad_norm": 1.020237959503554, "learning_rate": 4.850626356805869e-06, "loss": 0.4644, "step": 10985 }, { "epoch": 0.6690010047803184, "grad_norm": 0.9168453829018834, "learning_rate": 4.850599186465798e-06, "loss": 0.4402, "step": 10986 }, { "epoch": 0.6690619005571964, "grad_norm": 0.9988071096921363, "learning_rate": 4.850572013730985e-06, "loss": 0.4802, "step": 10987 }, { "epoch": 0.6691227963340742, "grad_norm": 1.1480246250390511, "learning_rate": 4.850544838601458e-06, "loss": 0.4077, "step": 10988 }, { "epoch": 0.6691836921109521, "grad_norm": 1.047557088723968, "learning_rate": 4.850517661077246e-06, "loss": 0.4681, "step": 10989 }, { "epoch": 0.66924458788783, "grad_norm": 0.9436739949572789, "learning_rate": 4.850490481158374e-06, "loss": 0.4778, "step": 10990 }, { "epoch": 0.6693054836647079, "grad_norm": 1.0443355717699387, "learning_rate": 4.850463298844872e-06, "loss": 0.3949, "step": 10991 }, { "epoch": 0.6693663794415857, "grad_norm": 1.0280256450285195, "learning_rate": 4.8504361141367676e-06, "loss": 0.4363, "step": 10992 }, { "epoch": 0.6694272752184636, "grad_norm": 0.9715675499030834, "learning_rate": 4.850408927034086e-06, "loss": 0.3727, "step": 10993 }, { "epoch": 0.6694881709953415, "grad_norm": 1.0506465119531778, "learning_rate": 4.8503817375368574e-06, "loss": 0.4431, "step": 10994 }, { "epoch": 0.6695490667722194, "grad_norm": 1.043807222102904, "learning_rate": 4.8503545456451095e-06, "loss": 0.3944, "step": 10995 }, { "epoch": 0.6696099625490972, "grad_norm": 0.9331654500821939, "learning_rate": 4.850327351358869e-06, "loss": 0.5255, "step": 10996 }, { "epoch": 0.6696708583259751, "grad_norm": 0.9804529174212946, "learning_rate": 4.850300154678164e-06, "loss": 0.3732, "step": 10997 }, { "epoch": 0.669731754102853, "grad_norm": 0.9724646343062724, "learning_rate": 4.850272955603021e-06, "loss": 0.3947, "step": 10998 }, { "epoch": 0.6697926498797309, "grad_norm": 0.9766786036223919, "learning_rate": 4.850245754133469e-06, "loss": 0.4026, "step": 10999 }, { "epoch": 0.6698535456566087, "grad_norm": 1.0129337998731796, "learning_rate": 4.850218550269536e-06, "loss": 0.4812, "step": 11000 }, { "epoch": 0.6699144414334866, "grad_norm": 1.002523986784341, "learning_rate": 4.850191344011249e-06, "loss": 0.4388, "step": 11001 }, { "epoch": 0.6699753372103645, "grad_norm": 1.065831999239812, "learning_rate": 4.850164135358636e-06, "loss": 0.4046, "step": 11002 }, { "epoch": 0.6700362329872424, "grad_norm": 0.9867182527266276, "learning_rate": 4.850136924311724e-06, "loss": 0.4374, "step": 11003 }, { "epoch": 0.6700971287641202, "grad_norm": 1.0295490877572095, "learning_rate": 4.850109710870543e-06, "loss": 0.3951, "step": 11004 }, { "epoch": 0.670158024540998, "grad_norm": 1.0079562917941012, "learning_rate": 4.850082495035118e-06, "loss": 0.4412, "step": 11005 }, { "epoch": 0.670218920317876, "grad_norm": 1.0109812255744688, "learning_rate": 4.850055276805478e-06, "loss": 0.4609, "step": 11006 }, { "epoch": 0.6702798160947538, "grad_norm": 0.9825009139870144, "learning_rate": 4.850028056181651e-06, "loss": 0.4731, "step": 11007 }, { "epoch": 0.6703407118716317, "grad_norm": 1.0146682728435632, "learning_rate": 4.850000833163663e-06, "loss": 0.327, "step": 11008 }, { "epoch": 0.6704016076485095, "grad_norm": 0.9777628004064945, "learning_rate": 4.849973607751543e-06, "loss": 0.3658, "step": 11009 }, { "epoch": 0.6704625034253875, "grad_norm": 1.0863574423200937, "learning_rate": 4.84994637994532e-06, "loss": 0.4764, "step": 11010 }, { "epoch": 0.6705233992022653, "grad_norm": 1.0418806465046708, "learning_rate": 4.8499191497450195e-06, "loss": 0.3992, "step": 11011 }, { "epoch": 0.6705842949791432, "grad_norm": 0.9646707418597216, "learning_rate": 4.849891917150671e-06, "loss": 0.4929, "step": 11012 }, { "epoch": 0.670645190756021, "grad_norm": 0.9341890342279993, "learning_rate": 4.849864682162301e-06, "loss": 0.4568, "step": 11013 }, { "epoch": 0.670706086532899, "grad_norm": 1.0234021571315992, "learning_rate": 4.849837444779938e-06, "loss": 0.4158, "step": 11014 }, { "epoch": 0.6707669823097768, "grad_norm": 0.9968830537504527, "learning_rate": 4.849810205003609e-06, "loss": 0.4497, "step": 11015 }, { "epoch": 0.6708278780866547, "grad_norm": 0.9743198236906094, "learning_rate": 4.849782962833344e-06, "loss": 0.4056, "step": 11016 }, { "epoch": 0.6708887738635325, "grad_norm": 0.9551392188890868, "learning_rate": 4.849755718269167e-06, "loss": 0.49, "step": 11017 }, { "epoch": 0.6709496696404105, "grad_norm": 1.009235700735248, "learning_rate": 4.849728471311108e-06, "loss": 0.4674, "step": 11018 }, { "epoch": 0.6710105654172883, "grad_norm": 1.086455086953784, "learning_rate": 4.849701221959194e-06, "loss": 0.3847, "step": 11019 }, { "epoch": 0.6710714611941662, "grad_norm": 0.9102766291121078, "learning_rate": 4.8496739702134546e-06, "loss": 0.5001, "step": 11020 }, { "epoch": 0.671132356971044, "grad_norm": 1.0481247700252898, "learning_rate": 4.849646716073916e-06, "loss": 0.4402, "step": 11021 }, { "epoch": 0.671193252747922, "grad_norm": 1.0210478517404513, "learning_rate": 4.849619459540606e-06, "loss": 0.4338, "step": 11022 }, { "epoch": 0.6712541485247998, "grad_norm": 0.9791742257599879, "learning_rate": 4.8495922006135515e-06, "loss": 0.4164, "step": 11023 }, { "epoch": 0.6713150443016777, "grad_norm": 1.0881237367425283, "learning_rate": 4.849564939292782e-06, "loss": 0.3367, "step": 11024 }, { "epoch": 0.6713759400785555, "grad_norm": 0.9931840154566002, "learning_rate": 4.849537675578326e-06, "loss": 0.4222, "step": 11025 }, { "epoch": 0.6714368358554335, "grad_norm": 1.0556362916724997, "learning_rate": 4.849510409470207e-06, "loss": 0.4417, "step": 11026 }, { "epoch": 0.6714977316323113, "grad_norm": 0.9254307557904018, "learning_rate": 4.849483140968457e-06, "loss": 0.415, "step": 11027 }, { "epoch": 0.6715586274091891, "grad_norm": 0.983868618570989, "learning_rate": 4.849455870073103e-06, "loss": 0.4021, "step": 11028 }, { "epoch": 0.671619523186067, "grad_norm": 0.9291175752405493, "learning_rate": 4.849428596784173e-06, "loss": 0.4006, "step": 11029 }, { "epoch": 0.671680418962945, "grad_norm": 0.994028921491516, "learning_rate": 4.849401321101692e-06, "loss": 0.4206, "step": 11030 }, { "epoch": 0.6717413147398228, "grad_norm": 1.057449612909826, "learning_rate": 4.849374043025691e-06, "loss": 0.4339, "step": 11031 }, { "epoch": 0.6718022105167006, "grad_norm": 0.9690503499568918, "learning_rate": 4.849346762556196e-06, "loss": 0.4211, "step": 11032 }, { "epoch": 0.6718631062935786, "grad_norm": 0.9939976988844786, "learning_rate": 4.849319479693235e-06, "loss": 0.4453, "step": 11033 }, { "epoch": 0.6719240020704564, "grad_norm": 1.002269462288247, "learning_rate": 4.8492921944368375e-06, "loss": 0.3666, "step": 11034 }, { "epoch": 0.6719848978473343, "grad_norm": 1.04398563514531, "learning_rate": 4.849264906787029e-06, "loss": 0.3939, "step": 11035 }, { "epoch": 0.6720457936242121, "grad_norm": 0.9986164828072213, "learning_rate": 4.849237616743838e-06, "loss": 0.3652, "step": 11036 }, { "epoch": 0.6721066894010901, "grad_norm": 0.933507319220859, "learning_rate": 4.849210324307293e-06, "loss": 0.4779, "step": 11037 }, { "epoch": 0.6721675851779679, "grad_norm": 1.0280931698885212, "learning_rate": 4.849183029477421e-06, "loss": 0.3724, "step": 11038 }, { "epoch": 0.6722284809548458, "grad_norm": 1.0095338732830854, "learning_rate": 4.849155732254251e-06, "loss": 0.3753, "step": 11039 }, { "epoch": 0.6722893767317236, "grad_norm": 1.1347114823635263, "learning_rate": 4.8491284326378096e-06, "loss": 0.4553, "step": 11040 }, { "epoch": 0.6723502725086016, "grad_norm": 1.0633277232418699, "learning_rate": 4.8491011306281244e-06, "loss": 0.3869, "step": 11041 }, { "epoch": 0.6724111682854794, "grad_norm": 1.0424508846829528, "learning_rate": 4.849073826225225e-06, "loss": 0.3987, "step": 11042 }, { "epoch": 0.6724720640623573, "grad_norm": 0.9931704981268175, "learning_rate": 4.849046519429137e-06, "loss": 0.4443, "step": 11043 }, { "epoch": 0.6725329598392351, "grad_norm": 0.9654313159849062, "learning_rate": 4.8490192102398896e-06, "loss": 0.472, "step": 11044 }, { "epoch": 0.6725938556161131, "grad_norm": 1.0500651648992991, "learning_rate": 4.84899189865751e-06, "loss": 0.3854, "step": 11045 }, { "epoch": 0.6726547513929909, "grad_norm": 1.0513220800966414, "learning_rate": 4.848964584682027e-06, "loss": 0.3564, "step": 11046 }, { "epoch": 0.6727156471698688, "grad_norm": 1.0615214250958254, "learning_rate": 4.848937268313467e-06, "loss": 0.3707, "step": 11047 }, { "epoch": 0.6727765429467466, "grad_norm": 0.9355517455552463, "learning_rate": 4.848909949551859e-06, "loss": 0.4687, "step": 11048 }, { "epoch": 0.6728374387236246, "grad_norm": 1.001244378120841, "learning_rate": 4.84888262839723e-06, "loss": 0.3808, "step": 11049 }, { "epoch": 0.6728983345005024, "grad_norm": 1.0421128810551652, "learning_rate": 4.848855304849608e-06, "loss": 0.375, "step": 11050 }, { "epoch": 0.6729592302773802, "grad_norm": 0.952989093846905, "learning_rate": 4.848827978909021e-06, "loss": 0.4751, "step": 11051 }, { "epoch": 0.6730201260542581, "grad_norm": 0.9607962012235028, "learning_rate": 4.848800650575498e-06, "loss": 0.4713, "step": 11052 }, { "epoch": 0.673081021831136, "grad_norm": 1.0777805747795686, "learning_rate": 4.848773319849065e-06, "loss": 0.4373, "step": 11053 }, { "epoch": 0.6731419176080139, "grad_norm": 0.9190848021105836, "learning_rate": 4.848745986729751e-06, "loss": 0.4763, "step": 11054 }, { "epoch": 0.6732028133848917, "grad_norm": 0.9381170375017944, "learning_rate": 4.848718651217582e-06, "loss": 0.4081, "step": 11055 }, { "epoch": 0.6732637091617696, "grad_norm": 0.9970841588050136, "learning_rate": 4.848691313312589e-06, "loss": 0.3946, "step": 11056 }, { "epoch": 0.6733246049386475, "grad_norm": 1.0329881476568976, "learning_rate": 4.848663973014797e-06, "loss": 0.4003, "step": 11057 }, { "epoch": 0.6733855007155254, "grad_norm": 0.9996666960366568, "learning_rate": 4.848636630324236e-06, "loss": 0.4234, "step": 11058 }, { "epoch": 0.6734463964924032, "grad_norm": 1.0486557032226116, "learning_rate": 4.848609285240932e-06, "loss": 0.4583, "step": 11059 }, { "epoch": 0.6735072922692811, "grad_norm": 1.0016169356804683, "learning_rate": 4.848581937764914e-06, "loss": 0.466, "step": 11060 }, { "epoch": 0.673568188046159, "grad_norm": 1.051695050638161, "learning_rate": 4.848554587896209e-06, "loss": 0.3998, "step": 11061 }, { "epoch": 0.6736290838230369, "grad_norm": 1.0209556682263567, "learning_rate": 4.848527235634846e-06, "loss": 0.4387, "step": 11062 }, { "epoch": 0.6736899795999147, "grad_norm": 0.9945242767973426, "learning_rate": 4.848499880980853e-06, "loss": 0.4215, "step": 11063 }, { "epoch": 0.6737508753767926, "grad_norm": 1.0035401848764889, "learning_rate": 4.848472523934256e-06, "loss": 0.4764, "step": 11064 }, { "epoch": 0.6738117711536705, "grad_norm": 1.036368904807259, "learning_rate": 4.848445164495084e-06, "loss": 0.4468, "step": 11065 }, { "epoch": 0.6738726669305484, "grad_norm": 0.9424424370912071, "learning_rate": 4.848417802663366e-06, "loss": 0.4434, "step": 11066 }, { "epoch": 0.6739335627074262, "grad_norm": 0.9893956347286609, "learning_rate": 4.8483904384391276e-06, "loss": 0.4172, "step": 11067 }, { "epoch": 0.6739944584843041, "grad_norm": 1.0431070671074523, "learning_rate": 4.8483630718223974e-06, "loss": 0.4747, "step": 11068 }, { "epoch": 0.674055354261182, "grad_norm": 1.0066370765701975, "learning_rate": 4.848335702813205e-06, "loss": 0.4041, "step": 11069 }, { "epoch": 0.6741162500380599, "grad_norm": 0.9770245859674555, "learning_rate": 4.848308331411576e-06, "loss": 0.458, "step": 11070 }, { "epoch": 0.6741771458149377, "grad_norm": 1.008226099238263, "learning_rate": 4.8482809576175405e-06, "loss": 0.3604, "step": 11071 }, { "epoch": 0.6742380415918157, "grad_norm": 0.954000950880963, "learning_rate": 4.848253581431124e-06, "loss": 0.5101, "step": 11072 }, { "epoch": 0.6742989373686935, "grad_norm": 1.0551629980393427, "learning_rate": 4.848226202852356e-06, "loss": 0.5367, "step": 11073 }, { "epoch": 0.6743598331455714, "grad_norm": 0.9364871432238147, "learning_rate": 4.848198821881264e-06, "loss": 0.4848, "step": 11074 }, { "epoch": 0.6744207289224492, "grad_norm": 1.1019848787871274, "learning_rate": 4.848171438517877e-06, "loss": 0.447, "step": 11075 }, { "epoch": 0.6744816246993272, "grad_norm": 0.9658020881781938, "learning_rate": 4.848144052762221e-06, "loss": 0.436, "step": 11076 }, { "epoch": 0.674542520476205, "grad_norm": 0.9464544843500194, "learning_rate": 4.848116664614324e-06, "loss": 0.4624, "step": 11077 }, { "epoch": 0.6746034162530828, "grad_norm": 0.9830562147622031, "learning_rate": 4.8480892740742145e-06, "loss": 0.4377, "step": 11078 }, { "epoch": 0.6746643120299607, "grad_norm": 0.934948814646265, "learning_rate": 4.848061881141921e-06, "loss": 0.5022, "step": 11079 }, { "epoch": 0.6747252078068386, "grad_norm": 1.0479006423804473, "learning_rate": 4.848034485817471e-06, "loss": 0.4225, "step": 11080 }, { "epoch": 0.6747861035837165, "grad_norm": 1.0438474899371928, "learning_rate": 4.848007088100892e-06, "loss": 0.377, "step": 11081 }, { "epoch": 0.6748469993605943, "grad_norm": 0.9246003755595069, "learning_rate": 4.847979687992212e-06, "loss": 0.4459, "step": 11082 }, { "epoch": 0.6749078951374722, "grad_norm": 0.9463017277176468, "learning_rate": 4.84795228549146e-06, "loss": 0.4438, "step": 11083 }, { "epoch": 0.6749687909143501, "grad_norm": 1.0795723111551443, "learning_rate": 4.847924880598662e-06, "loss": 0.4303, "step": 11084 }, { "epoch": 0.675029686691228, "grad_norm": 0.9780673382319126, "learning_rate": 4.847897473313847e-06, "loss": 0.3871, "step": 11085 }, { "epoch": 0.6750905824681058, "grad_norm": 1.0962200242230788, "learning_rate": 4.8478700636370435e-06, "loss": 0.4026, "step": 11086 }, { "epoch": 0.6751514782449837, "grad_norm": 1.0833928997621967, "learning_rate": 4.847842651568278e-06, "loss": 0.4142, "step": 11087 }, { "epoch": 0.6752123740218616, "grad_norm": 0.926046760790489, "learning_rate": 4.84781523710758e-06, "loss": 0.4661, "step": 11088 }, { "epoch": 0.6752732697987395, "grad_norm": 0.9860629389591611, "learning_rate": 4.847787820254976e-06, "loss": 0.4081, "step": 11089 }, { "epoch": 0.6753341655756173, "grad_norm": 1.0106140354002344, "learning_rate": 4.847760401010495e-06, "loss": 0.3965, "step": 11090 }, { "epoch": 0.6753950613524952, "grad_norm": 0.9760027853677836, "learning_rate": 4.847732979374165e-06, "loss": 0.4733, "step": 11091 }, { "epoch": 0.6754559571293731, "grad_norm": 0.9815180684571929, "learning_rate": 4.847705555346013e-06, "loss": 0.4398, "step": 11092 }, { "epoch": 0.675516852906251, "grad_norm": 0.988699202758586, "learning_rate": 4.847678128926067e-06, "loss": 0.3667, "step": 11093 }, { "epoch": 0.6755777486831288, "grad_norm": 1.060188559196745, "learning_rate": 4.847650700114355e-06, "loss": 0.3953, "step": 11094 }, { "epoch": 0.6756386444600067, "grad_norm": 1.0354818467631446, "learning_rate": 4.847623268910906e-06, "loss": 0.4182, "step": 11095 }, { "epoch": 0.6756995402368846, "grad_norm": 0.9707223055953975, "learning_rate": 4.847595835315747e-06, "loss": 0.4583, "step": 11096 }, { "epoch": 0.6757604360137625, "grad_norm": 0.9425341327915804, "learning_rate": 4.8475683993289065e-06, "loss": 0.4699, "step": 11097 }, { "epoch": 0.6758213317906403, "grad_norm": 0.988243289197217, "learning_rate": 4.847540960950412e-06, "loss": 0.4534, "step": 11098 }, { "epoch": 0.6758822275675181, "grad_norm": 0.9528666235786516, "learning_rate": 4.8475135201802915e-06, "loss": 0.4906, "step": 11099 }, { "epoch": 0.6759431233443961, "grad_norm": 1.0411895378733695, "learning_rate": 4.847486077018573e-06, "loss": 0.4077, "step": 11100 }, { "epoch": 0.676004019121274, "grad_norm": 0.957753289934023, "learning_rate": 4.847458631465285e-06, "loss": 0.4565, "step": 11101 }, { "epoch": 0.6760649148981518, "grad_norm": 0.8902918200899529, "learning_rate": 4.847431183520454e-06, "loss": 0.4632, "step": 11102 }, { "epoch": 0.6761258106750296, "grad_norm": 0.9471954268624351, "learning_rate": 4.84740373318411e-06, "loss": 0.4373, "step": 11103 }, { "epoch": 0.6761867064519076, "grad_norm": 0.9665624841816434, "learning_rate": 4.847376280456279e-06, "loss": 0.4253, "step": 11104 }, { "epoch": 0.6762476022287854, "grad_norm": 1.0404658092708357, "learning_rate": 4.847348825336989e-06, "loss": 0.3605, "step": 11105 }, { "epoch": 0.6763084980056633, "grad_norm": 0.9781461412325797, "learning_rate": 4.847321367826271e-06, "loss": 0.4391, "step": 11106 }, { "epoch": 0.6763693937825411, "grad_norm": 1.0524852009434085, "learning_rate": 4.8472939079241496e-06, "loss": 0.4383, "step": 11107 }, { "epoch": 0.6764302895594191, "grad_norm": 1.0256936030222337, "learning_rate": 4.847266445630654e-06, "loss": 0.4375, "step": 11108 }, { "epoch": 0.6764911853362969, "grad_norm": 0.990346062282462, "learning_rate": 4.847238980945812e-06, "loss": 0.464, "step": 11109 }, { "epoch": 0.6765520811131748, "grad_norm": 1.0635451459793475, "learning_rate": 4.847211513869652e-06, "loss": 0.3744, "step": 11110 }, { "epoch": 0.6766129768900526, "grad_norm": 1.041623817307115, "learning_rate": 4.847184044402202e-06, "loss": 0.4826, "step": 11111 }, { "epoch": 0.6766738726669306, "grad_norm": 1.0267300373809238, "learning_rate": 4.847156572543489e-06, "loss": 0.4678, "step": 11112 }, { "epoch": 0.6767347684438084, "grad_norm": 1.0037743038873563, "learning_rate": 4.847129098293542e-06, "loss": 0.4797, "step": 11113 }, { "epoch": 0.6767956642206863, "grad_norm": 0.9871416522647254, "learning_rate": 4.8471016216523894e-06, "loss": 0.4172, "step": 11114 }, { "epoch": 0.6768565599975642, "grad_norm": 0.9913200123797524, "learning_rate": 4.847074142620057e-06, "loss": 0.4101, "step": 11115 }, { "epoch": 0.6769174557744421, "grad_norm": 1.1320525907300296, "learning_rate": 4.847046661196575e-06, "loss": 0.4233, "step": 11116 }, { "epoch": 0.6769783515513199, "grad_norm": 0.9957202135279261, "learning_rate": 4.847019177381971e-06, "loss": 0.4671, "step": 11117 }, { "epoch": 0.6770392473281978, "grad_norm": 1.1341819874707992, "learning_rate": 4.846991691176272e-06, "loss": 0.4971, "step": 11118 }, { "epoch": 0.6771001431050757, "grad_norm": 0.9418739783193932, "learning_rate": 4.846964202579507e-06, "loss": 0.4659, "step": 11119 }, { "epoch": 0.6771610388819536, "grad_norm": 1.0808784653439325, "learning_rate": 4.846936711591704e-06, "loss": 0.3435, "step": 11120 }, { "epoch": 0.6772219346588314, "grad_norm": 0.897539636139626, "learning_rate": 4.84690921821289e-06, "loss": 0.4632, "step": 11121 }, { "epoch": 0.6772828304357092, "grad_norm": 1.0219239840177803, "learning_rate": 4.8468817224430945e-06, "loss": 0.4535, "step": 11122 }, { "epoch": 0.6773437262125872, "grad_norm": 1.011731048083088, "learning_rate": 4.846854224282344e-06, "loss": 0.443, "step": 11123 }, { "epoch": 0.677404621989465, "grad_norm": 1.0998338425114227, "learning_rate": 4.846826723730668e-06, "loss": 0.4133, "step": 11124 }, { "epoch": 0.6774655177663429, "grad_norm": 1.0591216557392324, "learning_rate": 4.846799220788092e-06, "loss": 0.4077, "step": 11125 }, { "epoch": 0.6775264135432207, "grad_norm": 1.053723352050714, "learning_rate": 4.846771715454648e-06, "loss": 0.359, "step": 11126 }, { "epoch": 0.6775873093200987, "grad_norm": 0.9676087862253189, "learning_rate": 4.846744207730361e-06, "loss": 0.4608, "step": 11127 }, { "epoch": 0.6776482050969765, "grad_norm": 0.9931438177941839, "learning_rate": 4.846716697615259e-06, "loss": 0.3801, "step": 11128 }, { "epoch": 0.6777091008738544, "grad_norm": 1.017543346463844, "learning_rate": 4.846689185109371e-06, "loss": 0.434, "step": 11129 }, { "epoch": 0.6777699966507322, "grad_norm": 1.0176375655800487, "learning_rate": 4.846661670212725e-06, "loss": 0.4445, "step": 11130 }, { "epoch": 0.6778308924276102, "grad_norm": 0.9382093962805554, "learning_rate": 4.846634152925349e-06, "loss": 0.4779, "step": 11131 }, { "epoch": 0.677891788204488, "grad_norm": 0.9290558087889601, "learning_rate": 4.846606633247271e-06, "loss": 0.4348, "step": 11132 }, { "epoch": 0.6779526839813659, "grad_norm": 1.1085710457658675, "learning_rate": 4.846579111178519e-06, "loss": 0.3731, "step": 11133 }, { "epoch": 0.6780135797582437, "grad_norm": 1.0300956818120826, "learning_rate": 4.846551586719121e-06, "loss": 0.4566, "step": 11134 }, { "epoch": 0.6780744755351217, "grad_norm": 1.0311922283378039, "learning_rate": 4.846524059869104e-06, "loss": 0.4061, "step": 11135 }, { "epoch": 0.6781353713119995, "grad_norm": 0.9557337487719128, "learning_rate": 4.8464965306284985e-06, "loss": 0.4412, "step": 11136 }, { "epoch": 0.6781962670888774, "grad_norm": 1.0499453501349596, "learning_rate": 4.846468998997331e-06, "loss": 0.366, "step": 11137 }, { "epoch": 0.6782571628657552, "grad_norm": 0.975057548164393, "learning_rate": 4.846441464975629e-06, "loss": 0.5317, "step": 11138 }, { "epoch": 0.6783180586426332, "grad_norm": 1.2705392577685275, "learning_rate": 4.846413928563421e-06, "loss": 0.4445, "step": 11139 }, { "epoch": 0.678378954419511, "grad_norm": 1.004350398321868, "learning_rate": 4.846386389760737e-06, "loss": 0.4231, "step": 11140 }, { "epoch": 0.6784398501963889, "grad_norm": 1.1312956550051845, "learning_rate": 4.8463588485676015e-06, "loss": 0.4222, "step": 11141 }, { "epoch": 0.6785007459732667, "grad_norm": 0.9950605167268929, "learning_rate": 4.846331304984045e-06, "loss": 0.4322, "step": 11142 }, { "epoch": 0.6785616417501447, "grad_norm": 0.960732872054951, "learning_rate": 4.846303759010094e-06, "loss": 0.4428, "step": 11143 }, { "epoch": 0.6786225375270225, "grad_norm": 1.0146748797392138, "learning_rate": 4.846276210645779e-06, "loss": 0.457, "step": 11144 }, { "epoch": 0.6786834333039004, "grad_norm": 0.9723270679074975, "learning_rate": 4.846248659891126e-06, "loss": 0.4171, "step": 11145 }, { "epoch": 0.6787443290807782, "grad_norm": 0.9400445911519925, "learning_rate": 4.846221106746164e-06, "loss": 0.414, "step": 11146 }, { "epoch": 0.6788052248576562, "grad_norm": 1.0350943154385943, "learning_rate": 4.84619355121092e-06, "loss": 0.4273, "step": 11147 }, { "epoch": 0.678866120634534, "grad_norm": 0.9150563858129426, "learning_rate": 4.846165993285423e-06, "loss": 0.4648, "step": 11148 }, { "epoch": 0.6789270164114118, "grad_norm": 0.9916965693247667, "learning_rate": 4.846138432969702e-06, "loss": 0.4373, "step": 11149 }, { "epoch": 0.6789879121882897, "grad_norm": 1.0058421370177222, "learning_rate": 4.846110870263782e-06, "loss": 0.492, "step": 11150 }, { "epoch": 0.6790488079651676, "grad_norm": 1.0284930380450996, "learning_rate": 4.846083305167694e-06, "loss": 0.3883, "step": 11151 }, { "epoch": 0.6791097037420455, "grad_norm": 0.9690462598100611, "learning_rate": 4.846055737681465e-06, "loss": 0.4886, "step": 11152 }, { "epoch": 0.6791705995189233, "grad_norm": 1.0037636671790746, "learning_rate": 4.8460281678051235e-06, "loss": 0.3983, "step": 11153 }, { "epoch": 0.6792314952958013, "grad_norm": 1.007200284318885, "learning_rate": 4.8460005955386964e-06, "loss": 0.4079, "step": 11154 }, { "epoch": 0.6792923910726791, "grad_norm": 1.0290659902925707, "learning_rate": 4.845973020882214e-06, "loss": 0.432, "step": 11155 }, { "epoch": 0.679353286849557, "grad_norm": 0.9479707433724124, "learning_rate": 4.8459454438357024e-06, "loss": 0.4623, "step": 11156 }, { "epoch": 0.6794141826264348, "grad_norm": 0.9549281840954996, "learning_rate": 4.84591786439919e-06, "loss": 0.5325, "step": 11157 }, { "epoch": 0.6794750784033128, "grad_norm": 0.9385432140526372, "learning_rate": 4.845890282572706e-06, "loss": 0.4687, "step": 11158 }, { "epoch": 0.6795359741801906, "grad_norm": 1.0181695562684667, "learning_rate": 4.845862698356277e-06, "loss": 0.4177, "step": 11159 }, { "epoch": 0.6795968699570685, "grad_norm": 1.1156105485611822, "learning_rate": 4.845835111749933e-06, "loss": 0.4098, "step": 11160 }, { "epoch": 0.6796577657339463, "grad_norm": 0.9692022761144092, "learning_rate": 4.8458075227536995e-06, "loss": 0.4115, "step": 11161 }, { "epoch": 0.6797186615108243, "grad_norm": 1.0688773372288656, "learning_rate": 4.845779931367607e-06, "loss": 0.401, "step": 11162 }, { "epoch": 0.6797795572877021, "grad_norm": 0.9385719543209087, "learning_rate": 4.845752337591682e-06, "loss": 0.4805, "step": 11163 }, { "epoch": 0.67984045306458, "grad_norm": 1.150507514820652, "learning_rate": 4.845724741425954e-06, "loss": 0.3831, "step": 11164 }, { "epoch": 0.6799013488414578, "grad_norm": 1.0339319295692415, "learning_rate": 4.84569714287045e-06, "loss": 0.4153, "step": 11165 }, { "epoch": 0.6799622446183358, "grad_norm": 1.029999006895575, "learning_rate": 4.845669541925198e-06, "loss": 0.4231, "step": 11166 }, { "epoch": 0.6800231403952136, "grad_norm": 1.0879459178503468, "learning_rate": 4.845641938590228e-06, "loss": 0.4064, "step": 11167 }, { "epoch": 0.6800840361720915, "grad_norm": 0.9708916042839965, "learning_rate": 4.845614332865566e-06, "loss": 0.4401, "step": 11168 }, { "epoch": 0.6801449319489693, "grad_norm": 0.9440924255380537, "learning_rate": 4.845586724751241e-06, "loss": 0.426, "step": 11169 }, { "epoch": 0.6802058277258473, "grad_norm": 1.0787774986984227, "learning_rate": 4.84555911424728e-06, "loss": 0.3752, "step": 11170 }, { "epoch": 0.6802667235027251, "grad_norm": 1.0063941050619316, "learning_rate": 4.845531501353714e-06, "loss": 0.4148, "step": 11171 }, { "epoch": 0.680327619279603, "grad_norm": 1.018146567067235, "learning_rate": 4.845503886070568e-06, "loss": 0.4191, "step": 11172 }, { "epoch": 0.6803885150564808, "grad_norm": 1.054932953175995, "learning_rate": 4.845476268397872e-06, "loss": 0.4525, "step": 11173 }, { "epoch": 0.6804494108333587, "grad_norm": 0.9929617864020082, "learning_rate": 4.8454486483356526e-06, "loss": 0.4617, "step": 11174 }, { "epoch": 0.6805103066102366, "grad_norm": 1.0142235326869604, "learning_rate": 4.84542102588394e-06, "loss": 0.4088, "step": 11175 }, { "epoch": 0.6805712023871144, "grad_norm": 1.018384752519241, "learning_rate": 4.84539340104276e-06, "loss": 0.4018, "step": 11176 }, { "epoch": 0.6806320981639923, "grad_norm": 0.9330932700806039, "learning_rate": 4.845365773812143e-06, "loss": 0.4517, "step": 11177 }, { "epoch": 0.6806929939408702, "grad_norm": 1.098962689559958, "learning_rate": 4.845338144192116e-06, "loss": 0.4357, "step": 11178 }, { "epoch": 0.6807538897177481, "grad_norm": 1.140970759987273, "learning_rate": 4.845310512182707e-06, "loss": 0.3979, "step": 11179 }, { "epoch": 0.6808147854946259, "grad_norm": 1.0701940078310788, "learning_rate": 4.845282877783945e-06, "loss": 0.4286, "step": 11180 }, { "epoch": 0.6808756812715038, "grad_norm": 1.0129667103894955, "learning_rate": 4.845255240995856e-06, "loss": 0.4325, "step": 11181 }, { "epoch": 0.6809365770483817, "grad_norm": 1.0161220592514184, "learning_rate": 4.845227601818472e-06, "loss": 0.4312, "step": 11182 }, { "epoch": 0.6809974728252596, "grad_norm": 0.9286197409768241, "learning_rate": 4.8451999602518175e-06, "loss": 0.4096, "step": 11183 }, { "epoch": 0.6810583686021374, "grad_norm": 1.0064229392890214, "learning_rate": 4.845172316295922e-06, "loss": 0.4148, "step": 11184 }, { "epoch": 0.6811192643790153, "grad_norm": 1.0813968318452267, "learning_rate": 4.8451446699508145e-06, "loss": 0.393, "step": 11185 }, { "epoch": 0.6811801601558932, "grad_norm": 0.9123667537223084, "learning_rate": 4.845117021216521e-06, "loss": 0.4608, "step": 11186 }, { "epoch": 0.6812410559327711, "grad_norm": 1.1851900880047013, "learning_rate": 4.845089370093073e-06, "loss": 0.5489, "step": 11187 }, { "epoch": 0.6813019517096489, "grad_norm": 0.9354402103397539, "learning_rate": 4.845061716580495e-06, "loss": 0.4317, "step": 11188 }, { "epoch": 0.6813628474865268, "grad_norm": 0.881504801822446, "learning_rate": 4.845034060678817e-06, "loss": 0.43, "step": 11189 }, { "epoch": 0.6814237432634047, "grad_norm": 1.0990378561794805, "learning_rate": 4.845006402388067e-06, "loss": 0.4057, "step": 11190 }, { "epoch": 0.6814846390402826, "grad_norm": 1.036142632607258, "learning_rate": 4.844978741708274e-06, "loss": 0.3712, "step": 11191 }, { "epoch": 0.6815455348171604, "grad_norm": 0.9670343928321802, "learning_rate": 4.844951078639465e-06, "loss": 0.4672, "step": 11192 }, { "epoch": 0.6816064305940382, "grad_norm": 1.0059186704092156, "learning_rate": 4.844923413181668e-06, "loss": 0.3869, "step": 11193 }, { "epoch": 0.6816673263709162, "grad_norm": 1.069398140590521, "learning_rate": 4.8448957453349135e-06, "loss": 0.4573, "step": 11194 }, { "epoch": 0.681728222147794, "grad_norm": 0.9830114165903263, "learning_rate": 4.844868075099227e-06, "loss": 0.4523, "step": 11195 }, { "epoch": 0.6817891179246719, "grad_norm": 1.044673592561948, "learning_rate": 4.844840402474637e-06, "loss": 0.3958, "step": 11196 }, { "epoch": 0.6818500137015499, "grad_norm": 1.0126290863258713, "learning_rate": 4.844812727461174e-06, "loss": 0.4525, "step": 11197 }, { "epoch": 0.6819109094784277, "grad_norm": 1.039060264053878, "learning_rate": 4.844785050058862e-06, "loss": 0.4461, "step": 11198 }, { "epoch": 0.6819718052553055, "grad_norm": 0.9882938836934559, "learning_rate": 4.8447573702677335e-06, "loss": 0.5071, "step": 11199 }, { "epoch": 0.6820327010321834, "grad_norm": 0.952277102920067, "learning_rate": 4.844729688087815e-06, "loss": 0.5249, "step": 11200 }, { "epoch": 0.6820935968090613, "grad_norm": 0.9753202211406046, "learning_rate": 4.844702003519134e-06, "loss": 0.4007, "step": 11201 }, { "epoch": 0.6821544925859392, "grad_norm": 1.0211282860211328, "learning_rate": 4.84467431656172e-06, "loss": 0.4265, "step": 11202 }, { "epoch": 0.682215388362817, "grad_norm": 0.9595245078671515, "learning_rate": 4.8446466272156e-06, "loss": 0.4776, "step": 11203 }, { "epoch": 0.6822762841396949, "grad_norm": 1.0410038359171714, "learning_rate": 4.844618935480803e-06, "loss": 0.3834, "step": 11204 }, { "epoch": 0.6823371799165728, "grad_norm": 1.0204892429755141, "learning_rate": 4.844591241357357e-06, "loss": 0.4388, "step": 11205 }, { "epoch": 0.6823980756934507, "grad_norm": 1.038304577386356, "learning_rate": 4.84456354484529e-06, "loss": 0.4767, "step": 11206 }, { "epoch": 0.6824589714703285, "grad_norm": 1.074157933528745, "learning_rate": 4.844535845944631e-06, "loss": 0.4016, "step": 11207 }, { "epoch": 0.6825198672472064, "grad_norm": 0.9948603751448493, "learning_rate": 4.844508144655407e-06, "loss": 0.4389, "step": 11208 }, { "epoch": 0.6825807630240843, "grad_norm": 0.8929747309636394, "learning_rate": 4.844480440977647e-06, "loss": 0.4426, "step": 11209 }, { "epoch": 0.6826416588009622, "grad_norm": 0.9751930849496584, "learning_rate": 4.844452734911379e-06, "loss": 0.4079, "step": 11210 }, { "epoch": 0.68270255457784, "grad_norm": 0.9386377698819374, "learning_rate": 4.844425026456631e-06, "loss": 0.4948, "step": 11211 }, { "epoch": 0.6827634503547179, "grad_norm": 1.0171549573266911, "learning_rate": 4.8443973156134325e-06, "loss": 0.3883, "step": 11212 }, { "epoch": 0.6828243461315958, "grad_norm": 1.0791576091440063, "learning_rate": 4.84436960238181e-06, "loss": 0.4441, "step": 11213 }, { "epoch": 0.6828852419084737, "grad_norm": 0.9818667774564157, "learning_rate": 4.844341886761792e-06, "loss": 0.4113, "step": 11214 }, { "epoch": 0.6829461376853515, "grad_norm": 1.0135370058988455, "learning_rate": 4.844314168753408e-06, "loss": 0.398, "step": 11215 }, { "epoch": 0.6830070334622294, "grad_norm": 1.0415898241292145, "learning_rate": 4.844286448356685e-06, "loss": 0.3822, "step": 11216 }, { "epoch": 0.6830679292391073, "grad_norm": 0.9846388364245625, "learning_rate": 4.844258725571652e-06, "loss": 0.4383, "step": 11217 }, { "epoch": 0.6831288250159852, "grad_norm": 0.9622492795542101, "learning_rate": 4.8442310003983375e-06, "loss": 0.4453, "step": 11218 }, { "epoch": 0.683189720792863, "grad_norm": 1.0047119943008171, "learning_rate": 4.844203272836769e-06, "loss": 0.4746, "step": 11219 }, { "epoch": 0.6832506165697408, "grad_norm": 1.0255105606795734, "learning_rate": 4.8441755428869745e-06, "loss": 0.4721, "step": 11220 }, { "epoch": 0.6833115123466188, "grad_norm": 1.0512832431751105, "learning_rate": 4.844147810548983e-06, "loss": 0.3971, "step": 11221 }, { "epoch": 0.6833724081234966, "grad_norm": 0.9928947185443288, "learning_rate": 4.844120075822822e-06, "loss": 0.3966, "step": 11222 }, { "epoch": 0.6834333039003745, "grad_norm": 1.024833880039256, "learning_rate": 4.84409233870852e-06, "loss": 0.4582, "step": 11223 }, { "epoch": 0.6834941996772523, "grad_norm": 0.9832639667088942, "learning_rate": 4.844064599206107e-06, "loss": 0.4349, "step": 11224 }, { "epoch": 0.6835550954541303, "grad_norm": 0.9645340322561644, "learning_rate": 4.844036857315609e-06, "loss": 0.4011, "step": 11225 }, { "epoch": 0.6836159912310081, "grad_norm": 0.981428713061004, "learning_rate": 4.8440091130370535e-06, "loss": 0.3886, "step": 11226 }, { "epoch": 0.683676887007886, "grad_norm": 0.9976808827971598, "learning_rate": 4.843981366370472e-06, "loss": 0.4541, "step": 11227 }, { "epoch": 0.6837377827847638, "grad_norm": 1.0736484182545374, "learning_rate": 4.843953617315889e-06, "loss": 0.3744, "step": 11228 }, { "epoch": 0.6837986785616418, "grad_norm": 0.926185990014509, "learning_rate": 4.843925865873337e-06, "loss": 0.4378, "step": 11229 }, { "epoch": 0.6838595743385196, "grad_norm": 0.9982186911168145, "learning_rate": 4.8438981120428415e-06, "loss": 0.3703, "step": 11230 }, { "epoch": 0.6839204701153975, "grad_norm": 1.0052295673088774, "learning_rate": 4.84387035582443e-06, "loss": 0.3824, "step": 11231 }, { "epoch": 0.6839813658922753, "grad_norm": 0.9572075434041996, "learning_rate": 4.843842597218134e-06, "loss": 0.4062, "step": 11232 }, { "epoch": 0.6840422616691533, "grad_norm": 1.0098180183831877, "learning_rate": 4.843814836223979e-06, "loss": 0.4061, "step": 11233 }, { "epoch": 0.6841031574460311, "grad_norm": 0.9460291632391699, "learning_rate": 4.843787072841995e-06, "loss": 0.5189, "step": 11234 }, { "epoch": 0.684164053222909, "grad_norm": 0.985827786791382, "learning_rate": 4.843759307072208e-06, "loss": 0.4643, "step": 11235 }, { "epoch": 0.6842249489997869, "grad_norm": 1.0461205574647343, "learning_rate": 4.843731538914649e-06, "loss": 0.4247, "step": 11236 }, { "epoch": 0.6842858447766648, "grad_norm": 0.9441757014928721, "learning_rate": 4.843703768369344e-06, "loss": 0.4244, "step": 11237 }, { "epoch": 0.6843467405535426, "grad_norm": 1.0612689190940585, "learning_rate": 4.843675995436323e-06, "loss": 0.3645, "step": 11238 }, { "epoch": 0.6844076363304205, "grad_norm": 0.9750205298094589, "learning_rate": 4.843648220115613e-06, "loss": 0.3915, "step": 11239 }, { "epoch": 0.6844685321072984, "grad_norm": 1.039665497678461, "learning_rate": 4.843620442407243e-06, "loss": 0.3757, "step": 11240 }, { "epoch": 0.6845294278841763, "grad_norm": 0.8864663690381395, "learning_rate": 4.843592662311243e-06, "loss": 0.4716, "step": 11241 }, { "epoch": 0.6845903236610541, "grad_norm": 1.0198458591988109, "learning_rate": 4.843564879827637e-06, "loss": 0.4231, "step": 11242 }, { "epoch": 0.684651219437932, "grad_norm": 0.9817675290207709, "learning_rate": 4.843537094956457e-06, "loss": 0.3803, "step": 11243 }, { "epoch": 0.6847121152148099, "grad_norm": 1.0329122273257827, "learning_rate": 4.84350930769773e-06, "loss": 0.4704, "step": 11244 }, { "epoch": 0.6847730109916877, "grad_norm": 1.0137607919492677, "learning_rate": 4.843481518051485e-06, "loss": 0.4131, "step": 11245 }, { "epoch": 0.6848339067685656, "grad_norm": 0.9525504890283382, "learning_rate": 4.843453726017749e-06, "loss": 0.4243, "step": 11246 }, { "epoch": 0.6848948025454434, "grad_norm": 0.9876747185579196, "learning_rate": 4.843425931596551e-06, "loss": 0.4544, "step": 11247 }, { "epoch": 0.6849556983223214, "grad_norm": 1.0713725610229106, "learning_rate": 4.84339813478792e-06, "loss": 0.3893, "step": 11248 }, { "epoch": 0.6850165940991992, "grad_norm": 0.9662468483184822, "learning_rate": 4.8433703355918835e-06, "loss": 0.4149, "step": 11249 }, { "epoch": 0.6850774898760771, "grad_norm": 1.005996281961638, "learning_rate": 4.84334253400847e-06, "loss": 0.4544, "step": 11250 }, { "epoch": 0.6851383856529549, "grad_norm": 0.9705706177845823, "learning_rate": 4.843314730037708e-06, "loss": 0.4048, "step": 11251 }, { "epoch": 0.6851992814298329, "grad_norm": 1.0193555995797134, "learning_rate": 4.843286923679625e-06, "loss": 0.4249, "step": 11252 }, { "epoch": 0.6852601772067107, "grad_norm": 1.0334784569731588, "learning_rate": 4.84325911493425e-06, "loss": 0.4279, "step": 11253 }, { "epoch": 0.6853210729835886, "grad_norm": 1.0374098479439555, "learning_rate": 4.843231303801612e-06, "loss": 0.4091, "step": 11254 }, { "epoch": 0.6853819687604664, "grad_norm": 0.9238504095077457, "learning_rate": 4.843203490281738e-06, "loss": 0.3966, "step": 11255 }, { "epoch": 0.6854428645373444, "grad_norm": 0.9430530020942196, "learning_rate": 4.843175674374658e-06, "loss": 0.4649, "step": 11256 }, { "epoch": 0.6855037603142222, "grad_norm": 1.0801191008340738, "learning_rate": 4.843147856080399e-06, "loss": 0.4139, "step": 11257 }, { "epoch": 0.6855646560911001, "grad_norm": 0.9975839185748404, "learning_rate": 4.843120035398989e-06, "loss": 0.4893, "step": 11258 }, { "epoch": 0.6856255518679779, "grad_norm": 0.9428970495872931, "learning_rate": 4.843092212330458e-06, "loss": 0.4533, "step": 11259 }, { "epoch": 0.6856864476448559, "grad_norm": 0.8605558369046944, "learning_rate": 4.843064386874833e-06, "loss": 0.499, "step": 11260 }, { "epoch": 0.6857473434217337, "grad_norm": 1.1321842605961614, "learning_rate": 4.843036559032142e-06, "loss": 0.4009, "step": 11261 }, { "epoch": 0.6858082391986116, "grad_norm": 1.0544654866638243, "learning_rate": 4.843008728802415e-06, "loss": 0.3655, "step": 11262 }, { "epoch": 0.6858691349754894, "grad_norm": 1.0662882174223647, "learning_rate": 4.842980896185679e-06, "loss": 0.4346, "step": 11263 }, { "epoch": 0.6859300307523674, "grad_norm": 1.1204143942781162, "learning_rate": 4.842953061181963e-06, "loss": 0.4011, "step": 11264 }, { "epoch": 0.6859909265292452, "grad_norm": 1.0200140889963956, "learning_rate": 4.842925223791295e-06, "loss": 0.4647, "step": 11265 }, { "epoch": 0.686051822306123, "grad_norm": 0.9493264007786301, "learning_rate": 4.842897384013703e-06, "loss": 0.4535, "step": 11266 }, { "epoch": 0.6861127180830009, "grad_norm": 0.9418050492194633, "learning_rate": 4.842869541849217e-06, "loss": 0.4724, "step": 11267 }, { "epoch": 0.6861736138598789, "grad_norm": 0.9561393518978135, "learning_rate": 4.842841697297863e-06, "loss": 0.4265, "step": 11268 }, { "epoch": 0.6862345096367567, "grad_norm": 1.0098906743059768, "learning_rate": 4.842813850359671e-06, "loss": 0.4527, "step": 11269 }, { "epoch": 0.6862954054136345, "grad_norm": 0.9674613452003347, "learning_rate": 4.842786001034669e-06, "loss": 0.4287, "step": 11270 }, { "epoch": 0.6863563011905124, "grad_norm": 1.0414185152389743, "learning_rate": 4.842758149322886e-06, "loss": 0.4083, "step": 11271 }, { "epoch": 0.6864171969673903, "grad_norm": 0.9623247434690707, "learning_rate": 4.842730295224348e-06, "loss": 0.3521, "step": 11272 }, { "epoch": 0.6864780927442682, "grad_norm": 1.0436833956221936, "learning_rate": 4.842702438739086e-06, "loss": 0.4729, "step": 11273 }, { "epoch": 0.686538988521146, "grad_norm": 0.9281000714625977, "learning_rate": 4.842674579867128e-06, "loss": 0.4345, "step": 11274 }, { "epoch": 0.6865998842980239, "grad_norm": 1.0838043599362799, "learning_rate": 4.842646718608502e-06, "loss": 0.3557, "step": 11275 }, { "epoch": 0.6866607800749018, "grad_norm": 1.0990434870905257, "learning_rate": 4.842618854963236e-06, "loss": 0.3664, "step": 11276 }, { "epoch": 0.6867216758517797, "grad_norm": 1.0608007547966467, "learning_rate": 4.8425909889313576e-06, "loss": 0.4984, "step": 11277 }, { "epoch": 0.6867825716286575, "grad_norm": 0.9535063844589946, "learning_rate": 4.842563120512897e-06, "loss": 0.3574, "step": 11278 }, { "epoch": 0.6868434674055355, "grad_norm": 0.961817483588357, "learning_rate": 4.842535249707882e-06, "loss": 0.4371, "step": 11279 }, { "epoch": 0.6869043631824133, "grad_norm": 1.032068037654684, "learning_rate": 4.8425073765163406e-06, "loss": 0.4894, "step": 11280 }, { "epoch": 0.6869652589592912, "grad_norm": 0.9407896627068993, "learning_rate": 4.842479500938301e-06, "loss": 0.4372, "step": 11281 }, { "epoch": 0.687026154736169, "grad_norm": 1.0344977859933948, "learning_rate": 4.842451622973793e-06, "loss": 0.4004, "step": 11282 }, { "epoch": 0.687087050513047, "grad_norm": 1.0558638037903785, "learning_rate": 4.842423742622842e-06, "loss": 0.3768, "step": 11283 }, { "epoch": 0.6871479462899248, "grad_norm": 1.1445253995792053, "learning_rate": 4.8423958598854805e-06, "loss": 0.4693, "step": 11284 }, { "epoch": 0.6872088420668027, "grad_norm": 0.9985815456301655, "learning_rate": 4.842367974761734e-06, "loss": 0.4245, "step": 11285 }, { "epoch": 0.6872697378436805, "grad_norm": 0.9367015980705219, "learning_rate": 4.842340087251631e-06, "loss": 0.3916, "step": 11286 }, { "epoch": 0.6873306336205585, "grad_norm": 0.9322574606641667, "learning_rate": 4.842312197355202e-06, "loss": 0.4306, "step": 11287 }, { "epoch": 0.6873915293974363, "grad_norm": 1.002912258507457, "learning_rate": 4.842284305072473e-06, "loss": 0.5383, "step": 11288 }, { "epoch": 0.6874524251743142, "grad_norm": 0.9516063133959023, "learning_rate": 4.842256410403474e-06, "loss": 0.5401, "step": 11289 }, { "epoch": 0.687513320951192, "grad_norm": 0.9891542747808003, "learning_rate": 4.842228513348233e-06, "loss": 0.3561, "step": 11290 }, { "epoch": 0.68757421672807, "grad_norm": 1.03352242427367, "learning_rate": 4.842200613906777e-06, "loss": 0.48, "step": 11291 }, { "epoch": 0.6876351125049478, "grad_norm": 1.0062412974062995, "learning_rate": 4.8421727120791375e-06, "loss": 0.4675, "step": 11292 }, { "epoch": 0.6876960082818256, "grad_norm": 0.9853099483359881, "learning_rate": 4.84214480786534e-06, "loss": 0.3527, "step": 11293 }, { "epoch": 0.6877569040587035, "grad_norm": 0.9789435676100101, "learning_rate": 4.842116901265414e-06, "loss": 0.4387, "step": 11294 }, { "epoch": 0.6878177998355814, "grad_norm": 0.9716340815773928, "learning_rate": 4.842088992279388e-06, "loss": 0.445, "step": 11295 }, { "epoch": 0.6878786956124593, "grad_norm": 1.0102016312954316, "learning_rate": 4.842061080907291e-06, "loss": 0.4366, "step": 11296 }, { "epoch": 0.6879395913893371, "grad_norm": 1.0225156743759556, "learning_rate": 4.842033167149151e-06, "loss": 0.3986, "step": 11297 }, { "epoch": 0.688000487166215, "grad_norm": 1.0465328061901993, "learning_rate": 4.842005251004996e-06, "loss": 0.4601, "step": 11298 }, { "epoch": 0.6880613829430929, "grad_norm": 0.9973217046860448, "learning_rate": 4.841977332474854e-06, "loss": 0.4452, "step": 11299 }, { "epoch": 0.6881222787199708, "grad_norm": 1.0187092042127226, "learning_rate": 4.841949411558755e-06, "loss": 0.4132, "step": 11300 }, { "epoch": 0.6881831744968486, "grad_norm": 1.1043841660731006, "learning_rate": 4.8419214882567266e-06, "loss": 0.3771, "step": 11301 }, { "epoch": 0.6882440702737265, "grad_norm": 1.014670893093701, "learning_rate": 4.841893562568797e-06, "loss": 0.3588, "step": 11302 }, { "epoch": 0.6883049660506044, "grad_norm": 0.9498147776207586, "learning_rate": 4.841865634494996e-06, "loss": 0.4903, "step": 11303 }, { "epoch": 0.6883658618274823, "grad_norm": 1.074984951287645, "learning_rate": 4.84183770403535e-06, "loss": 0.3969, "step": 11304 }, { "epoch": 0.6884267576043601, "grad_norm": 0.9850134054172418, "learning_rate": 4.841809771189888e-06, "loss": 0.4144, "step": 11305 }, { "epoch": 0.688487653381238, "grad_norm": 0.9896562835408252, "learning_rate": 4.841781835958639e-06, "loss": 0.4377, "step": 11306 }, { "epoch": 0.6885485491581159, "grad_norm": 1.034392716992224, "learning_rate": 4.841753898341632e-06, "loss": 0.3945, "step": 11307 }, { "epoch": 0.6886094449349938, "grad_norm": 0.9530158878886967, "learning_rate": 4.841725958338895e-06, "loss": 0.4325, "step": 11308 }, { "epoch": 0.6886703407118716, "grad_norm": 1.0304004355358947, "learning_rate": 4.8416980159504555e-06, "loss": 0.427, "step": 11309 }, { "epoch": 0.6887312364887495, "grad_norm": 0.958545451750549, "learning_rate": 4.841670071176343e-06, "loss": 0.4881, "step": 11310 }, { "epoch": 0.6887921322656274, "grad_norm": 1.1087024016311717, "learning_rate": 4.841642124016586e-06, "loss": 0.3937, "step": 11311 }, { "epoch": 0.6888530280425053, "grad_norm": 1.0652572389717716, "learning_rate": 4.841614174471212e-06, "loss": 0.3963, "step": 11312 }, { "epoch": 0.6889139238193831, "grad_norm": 1.0934253319213703, "learning_rate": 4.841586222540251e-06, "loss": 0.4927, "step": 11313 }, { "epoch": 0.688974819596261, "grad_norm": 0.944849597380424, "learning_rate": 4.84155826822373e-06, "loss": 0.4627, "step": 11314 }, { "epoch": 0.6890357153731389, "grad_norm": 1.108322134523907, "learning_rate": 4.841530311521679e-06, "loss": 0.3836, "step": 11315 }, { "epoch": 0.6890966111500167, "grad_norm": 0.9577221999891868, "learning_rate": 4.841502352434125e-06, "loss": 0.4969, "step": 11316 }, { "epoch": 0.6891575069268946, "grad_norm": 1.0578898570265776, "learning_rate": 4.841474390961097e-06, "loss": 0.4113, "step": 11317 }, { "epoch": 0.6892184027037725, "grad_norm": 0.997581703517481, "learning_rate": 4.841446427102624e-06, "loss": 0.4243, "step": 11318 }, { "epoch": 0.6892792984806504, "grad_norm": 0.9859796143611578, "learning_rate": 4.841418460858733e-06, "loss": 0.4558, "step": 11319 }, { "epoch": 0.6893401942575282, "grad_norm": 0.9372024043877244, "learning_rate": 4.841390492229454e-06, "loss": 0.472, "step": 11320 }, { "epoch": 0.6894010900344061, "grad_norm": 0.9678088542103643, "learning_rate": 4.841362521214816e-06, "loss": 0.3923, "step": 11321 }, { "epoch": 0.689461985811284, "grad_norm": 1.051735309133603, "learning_rate": 4.8413345478148455e-06, "loss": 0.4234, "step": 11322 }, { "epoch": 0.6895228815881619, "grad_norm": 1.0110765105944437, "learning_rate": 4.841306572029573e-06, "loss": 0.4555, "step": 11323 }, { "epoch": 0.6895837773650397, "grad_norm": 0.9038997783119908, "learning_rate": 4.841278593859025e-06, "loss": 0.4973, "step": 11324 }, { "epoch": 0.6896446731419176, "grad_norm": 0.9427757399110027, "learning_rate": 4.841250613303232e-06, "loss": 0.5462, "step": 11325 }, { "epoch": 0.6897055689187955, "grad_norm": 1.0054352749681015, "learning_rate": 4.8412226303622206e-06, "loss": 0.437, "step": 11326 }, { "epoch": 0.6897664646956734, "grad_norm": 0.9297029494696228, "learning_rate": 4.84119464503602e-06, "loss": 0.4611, "step": 11327 }, { "epoch": 0.6898273604725512, "grad_norm": 0.9881317155057507, "learning_rate": 4.841166657324661e-06, "loss": 0.4231, "step": 11328 }, { "epoch": 0.6898882562494291, "grad_norm": 1.0485649675237259, "learning_rate": 4.841138667228168e-06, "loss": 0.3834, "step": 11329 }, { "epoch": 0.689949152026307, "grad_norm": 1.0458900139245302, "learning_rate": 4.841110674746573e-06, "loss": 0.3816, "step": 11330 }, { "epoch": 0.6900100478031849, "grad_norm": 0.9877477748252222, "learning_rate": 4.841082679879902e-06, "loss": 0.4379, "step": 11331 }, { "epoch": 0.6900709435800627, "grad_norm": 0.9852565744597732, "learning_rate": 4.841054682628185e-06, "loss": 0.3685, "step": 11332 }, { "epoch": 0.6901318393569406, "grad_norm": 1.0842550198423666, "learning_rate": 4.84102668299145e-06, "loss": 0.3523, "step": 11333 }, { "epoch": 0.6901927351338185, "grad_norm": 1.0454659218731026, "learning_rate": 4.840998680969726e-06, "loss": 0.3909, "step": 11334 }, { "epoch": 0.6902536309106964, "grad_norm": 0.9680514844998669, "learning_rate": 4.840970676563041e-06, "loss": 0.4076, "step": 11335 }, { "epoch": 0.6903145266875742, "grad_norm": 1.0587870173296337, "learning_rate": 4.8409426697714244e-06, "loss": 0.4163, "step": 11336 }, { "epoch": 0.690375422464452, "grad_norm": 0.929973069124356, "learning_rate": 4.840914660594903e-06, "loss": 0.4329, "step": 11337 }, { "epoch": 0.69043631824133, "grad_norm": 1.0241341616539632, "learning_rate": 4.840886649033508e-06, "loss": 0.4166, "step": 11338 }, { "epoch": 0.6904972140182078, "grad_norm": 0.9734607294162269, "learning_rate": 4.840858635087265e-06, "loss": 0.4331, "step": 11339 }, { "epoch": 0.6905581097950857, "grad_norm": 0.9320558075766164, "learning_rate": 4.840830618756204e-06, "loss": 0.4473, "step": 11340 }, { "epoch": 0.6906190055719635, "grad_norm": 1.0313175763271245, "learning_rate": 4.840802600040354e-06, "loss": 0.3903, "step": 11341 }, { "epoch": 0.6906799013488415, "grad_norm": 0.9883327917516034, "learning_rate": 4.840774578939743e-06, "loss": 0.4057, "step": 11342 }, { "epoch": 0.6907407971257193, "grad_norm": 1.0052011614293905, "learning_rate": 4.840746555454399e-06, "loss": 0.4072, "step": 11343 }, { "epoch": 0.6908016929025972, "grad_norm": 1.0416028269088753, "learning_rate": 4.840718529584351e-06, "loss": 0.3604, "step": 11344 }, { "epoch": 0.690862588679475, "grad_norm": 1.106720988727926, "learning_rate": 4.840690501329628e-06, "loss": 0.3985, "step": 11345 }, { "epoch": 0.690923484456353, "grad_norm": 1.0379346740245485, "learning_rate": 4.840662470690259e-06, "loss": 0.4226, "step": 11346 }, { "epoch": 0.6909843802332308, "grad_norm": 0.9786893103981033, "learning_rate": 4.8406344376662695e-06, "loss": 0.4222, "step": 11347 }, { "epoch": 0.6910452760101087, "grad_norm": 1.0299936076983354, "learning_rate": 4.840606402257691e-06, "loss": 0.4206, "step": 11348 }, { "epoch": 0.6911061717869865, "grad_norm": 1.049512196551491, "learning_rate": 4.840578364464553e-06, "loss": 0.3879, "step": 11349 }, { "epoch": 0.6911670675638645, "grad_norm": 1.079732591225169, "learning_rate": 4.840550324286881e-06, "loss": 0.4382, "step": 11350 }, { "epoch": 0.6912279633407423, "grad_norm": 0.9844343094206075, "learning_rate": 4.840522281724706e-06, "loss": 0.4319, "step": 11351 }, { "epoch": 0.6912888591176202, "grad_norm": 1.056501198263205, "learning_rate": 4.840494236778054e-06, "loss": 0.365, "step": 11352 }, { "epoch": 0.691349754894498, "grad_norm": 0.9926846876643634, "learning_rate": 4.840466189446956e-06, "loss": 0.4539, "step": 11353 }, { "epoch": 0.691410650671376, "grad_norm": 1.0195358679089632, "learning_rate": 4.840438139731441e-06, "loss": 0.396, "step": 11354 }, { "epoch": 0.6914715464482538, "grad_norm": 0.9682313150456536, "learning_rate": 4.840410087631534e-06, "loss": 0.4297, "step": 11355 }, { "epoch": 0.6915324422251317, "grad_norm": 1.0611880901428044, "learning_rate": 4.8403820331472675e-06, "loss": 0.4021, "step": 11356 }, { "epoch": 0.6915933380020095, "grad_norm": 0.9389218989115373, "learning_rate": 4.840353976278668e-06, "loss": 0.4233, "step": 11357 }, { "epoch": 0.6916542337788875, "grad_norm": 0.9351485353529757, "learning_rate": 4.840325917025764e-06, "loss": 0.4142, "step": 11358 }, { "epoch": 0.6917151295557653, "grad_norm": 0.9839699459071233, "learning_rate": 4.840297855388585e-06, "loss": 0.4004, "step": 11359 }, { "epoch": 0.6917760253326432, "grad_norm": 0.9937192073033502, "learning_rate": 4.840269791367159e-06, "loss": 0.4434, "step": 11360 }, { "epoch": 0.6918369211095211, "grad_norm": 0.9701236730925777, "learning_rate": 4.840241724961515e-06, "loss": 0.4389, "step": 11361 }, { "epoch": 0.691897816886399, "grad_norm": 1.0725691938099917, "learning_rate": 4.840213656171682e-06, "loss": 0.378, "step": 11362 }, { "epoch": 0.6919587126632768, "grad_norm": 1.0769902311618988, "learning_rate": 4.840185584997687e-06, "loss": 0.4217, "step": 11363 }, { "epoch": 0.6920196084401546, "grad_norm": 1.032824693669767, "learning_rate": 4.84015751143956e-06, "loss": 0.4449, "step": 11364 }, { "epoch": 0.6920805042170326, "grad_norm": 0.9577944066307317, "learning_rate": 4.840129435497329e-06, "loss": 0.4268, "step": 11365 }, { "epoch": 0.6921413999939104, "grad_norm": 1.0488195297463971, "learning_rate": 4.840101357171023e-06, "loss": 0.3799, "step": 11366 }, { "epoch": 0.6922022957707883, "grad_norm": 0.8708481146449458, "learning_rate": 4.84007327646067e-06, "loss": 0.4709, "step": 11367 }, { "epoch": 0.6922631915476661, "grad_norm": 0.9954767377868325, "learning_rate": 4.8400451933662996e-06, "loss": 0.4379, "step": 11368 }, { "epoch": 0.6923240873245441, "grad_norm": 0.9960574660027427, "learning_rate": 4.84001710788794e-06, "loss": 0.4828, "step": 11369 }, { "epoch": 0.6923849831014219, "grad_norm": 1.0541861486595525, "learning_rate": 4.839989020025618e-06, "loss": 0.4155, "step": 11370 }, { "epoch": 0.6924458788782998, "grad_norm": 1.0601150028556532, "learning_rate": 4.839960929779366e-06, "loss": 0.3672, "step": 11371 }, { "epoch": 0.6925067746551776, "grad_norm": 0.9854474456468896, "learning_rate": 4.8399328371492095e-06, "loss": 0.4731, "step": 11372 }, { "epoch": 0.6925676704320556, "grad_norm": 1.0151418738122249, "learning_rate": 4.839904742135178e-06, "loss": 0.4147, "step": 11373 }, { "epoch": 0.6926285662089334, "grad_norm": 1.0136057330833823, "learning_rate": 4.839876644737299e-06, "loss": 0.4085, "step": 11374 }, { "epoch": 0.6926894619858113, "grad_norm": 0.9742038470179067, "learning_rate": 4.8398485449556045e-06, "loss": 0.4537, "step": 11375 }, { "epoch": 0.6927503577626891, "grad_norm": 1.0330853810288068, "learning_rate": 4.83982044279012e-06, "loss": 0.3733, "step": 11376 }, { "epoch": 0.6928112535395671, "grad_norm": 1.042839556992057, "learning_rate": 4.839792338240875e-06, "loss": 0.495, "step": 11377 }, { "epoch": 0.6928721493164449, "grad_norm": 1.0062166629696345, "learning_rate": 4.839764231307898e-06, "loss": 0.4354, "step": 11378 }, { "epoch": 0.6929330450933228, "grad_norm": 0.9987771812513735, "learning_rate": 4.839736121991218e-06, "loss": 0.4409, "step": 11379 }, { "epoch": 0.6929939408702006, "grad_norm": 1.053139526236559, "learning_rate": 4.839708010290863e-06, "loss": 0.4589, "step": 11380 }, { "epoch": 0.6930548366470786, "grad_norm": 0.9360736209203455, "learning_rate": 4.839679896206864e-06, "loss": 0.399, "step": 11381 }, { "epoch": 0.6931157324239564, "grad_norm": 0.973805821620556, "learning_rate": 4.8396517797392456e-06, "loss": 0.417, "step": 11382 }, { "epoch": 0.6931766282008343, "grad_norm": 1.0486834633240913, "learning_rate": 4.8396236608880396e-06, "loss": 0.4361, "step": 11383 }, { "epoch": 0.6932375239777121, "grad_norm": 1.0094092894466493, "learning_rate": 4.8395955396532735e-06, "loss": 0.3943, "step": 11384 }, { "epoch": 0.6932984197545901, "grad_norm": 0.9884880382678684, "learning_rate": 4.839567416034976e-06, "loss": 0.4197, "step": 11385 }, { "epoch": 0.6933593155314679, "grad_norm": 1.0041386993627106, "learning_rate": 4.839539290033176e-06, "loss": 0.4413, "step": 11386 }, { "epoch": 0.6934202113083457, "grad_norm": 1.0892161590429361, "learning_rate": 4.8395111616479016e-06, "loss": 0.4624, "step": 11387 }, { "epoch": 0.6934811070852236, "grad_norm": 0.9480407319216112, "learning_rate": 4.8394830308791826e-06, "loss": 0.422, "step": 11388 }, { "epoch": 0.6935420028621015, "grad_norm": 0.9623480822875962, "learning_rate": 4.8394548977270475e-06, "loss": 0.4866, "step": 11389 }, { "epoch": 0.6936028986389794, "grad_norm": 1.0481417975220757, "learning_rate": 4.839426762191523e-06, "loss": 0.4352, "step": 11390 }, { "epoch": 0.6936637944158572, "grad_norm": 1.1122118745774152, "learning_rate": 4.8393986242726394e-06, "loss": 0.4529, "step": 11391 }, { "epoch": 0.6937246901927351, "grad_norm": 0.9256962684609247, "learning_rate": 4.839370483970426e-06, "loss": 0.4666, "step": 11392 }, { "epoch": 0.693785585969613, "grad_norm": 0.9855019993785048, "learning_rate": 4.8393423412849094e-06, "loss": 0.4293, "step": 11393 }, { "epoch": 0.6938464817464909, "grad_norm": 1.019955343677854, "learning_rate": 4.83931419621612e-06, "loss": 0.431, "step": 11394 }, { "epoch": 0.6939073775233687, "grad_norm": 1.0650052258900848, "learning_rate": 4.839286048764086e-06, "loss": 0.4784, "step": 11395 }, { "epoch": 0.6939682733002466, "grad_norm": 0.9740329098786928, "learning_rate": 4.839257898928836e-06, "loss": 0.486, "step": 11396 }, { "epoch": 0.6940291690771245, "grad_norm": 0.9443226227925521, "learning_rate": 4.839229746710399e-06, "loss": 0.4681, "step": 11397 }, { "epoch": 0.6940900648540024, "grad_norm": 1.0183997903779232, "learning_rate": 4.839201592108802e-06, "loss": 0.4047, "step": 11398 }, { "epoch": 0.6941509606308802, "grad_norm": 0.9487110943027699, "learning_rate": 4.839173435124076e-06, "loss": 0.5005, "step": 11399 }, { "epoch": 0.6942118564077582, "grad_norm": 0.964650694393175, "learning_rate": 4.83914527575625e-06, "loss": 0.3708, "step": 11400 }, { "epoch": 0.694272752184636, "grad_norm": 1.0216570147411703, "learning_rate": 4.83911711400535e-06, "loss": 0.4746, "step": 11401 }, { "epoch": 0.6943336479615139, "grad_norm": 0.9723556698957829, "learning_rate": 4.839088949871406e-06, "loss": 0.4836, "step": 11402 }, { "epoch": 0.6943945437383917, "grad_norm": 1.0045298806316483, "learning_rate": 4.8390607833544466e-06, "loss": 0.467, "step": 11403 }, { "epoch": 0.6944554395152697, "grad_norm": 0.9519166157749933, "learning_rate": 4.839032614454502e-06, "loss": 0.4301, "step": 11404 }, { "epoch": 0.6945163352921475, "grad_norm": 1.0323794791812253, "learning_rate": 4.839004443171599e-06, "loss": 0.3966, "step": 11405 }, { "epoch": 0.6945772310690254, "grad_norm": 0.9288905086356615, "learning_rate": 4.838976269505766e-06, "loss": 0.3881, "step": 11406 }, { "epoch": 0.6946381268459032, "grad_norm": 1.0885762398348904, "learning_rate": 4.838948093457034e-06, "loss": 0.4232, "step": 11407 }, { "epoch": 0.6946990226227812, "grad_norm": 1.1016038548535343, "learning_rate": 4.8389199150254295e-06, "loss": 0.3953, "step": 11408 }, { "epoch": 0.694759918399659, "grad_norm": 0.9095310915985301, "learning_rate": 4.838891734210982e-06, "loss": 0.4261, "step": 11409 }, { "epoch": 0.6948208141765368, "grad_norm": 1.022869549341025, "learning_rate": 4.83886355101372e-06, "loss": 0.3925, "step": 11410 }, { "epoch": 0.6948817099534147, "grad_norm": 1.0243669806132112, "learning_rate": 4.838835365433674e-06, "loss": 0.422, "step": 11411 }, { "epoch": 0.6949426057302927, "grad_norm": 1.069145544635632, "learning_rate": 4.838807177470869e-06, "loss": 0.4408, "step": 11412 }, { "epoch": 0.6950035015071705, "grad_norm": 0.9920981961145152, "learning_rate": 4.8387789871253374e-06, "loss": 0.3985, "step": 11413 }, { "epoch": 0.6950643972840483, "grad_norm": 0.9183843751581654, "learning_rate": 4.838750794397105e-06, "loss": 0.4216, "step": 11414 }, { "epoch": 0.6951252930609262, "grad_norm": 1.0431412650104979, "learning_rate": 4.838722599286203e-06, "loss": 0.4548, "step": 11415 }, { "epoch": 0.6951861888378041, "grad_norm": 1.0040955173283366, "learning_rate": 4.838694401792659e-06, "loss": 0.4311, "step": 11416 }, { "epoch": 0.695247084614682, "grad_norm": 1.0344028234313296, "learning_rate": 4.838666201916502e-06, "loss": 0.3874, "step": 11417 }, { "epoch": 0.6953079803915598, "grad_norm": 1.0009040636839321, "learning_rate": 4.83863799965776e-06, "loss": 0.4257, "step": 11418 }, { "epoch": 0.6953688761684377, "grad_norm": 0.9438579542601448, "learning_rate": 4.838609795016463e-06, "loss": 0.4408, "step": 11419 }, { "epoch": 0.6954297719453156, "grad_norm": 1.002323910718985, "learning_rate": 4.838581587992637e-06, "loss": 0.4531, "step": 11420 }, { "epoch": 0.6954906677221935, "grad_norm": 1.0095397879634267, "learning_rate": 4.838553378586315e-06, "loss": 0.4522, "step": 11421 }, { "epoch": 0.6955515634990713, "grad_norm": 0.9779097850985147, "learning_rate": 4.838525166797522e-06, "loss": 0.4354, "step": 11422 }, { "epoch": 0.6956124592759492, "grad_norm": 1.0674557111128677, "learning_rate": 4.838496952626288e-06, "loss": 0.4232, "step": 11423 }, { "epoch": 0.6956733550528271, "grad_norm": 0.9527260559711861, "learning_rate": 4.838468736072643e-06, "loss": 0.4363, "step": 11424 }, { "epoch": 0.695734250829705, "grad_norm": 1.026364259996868, "learning_rate": 4.838440517136614e-06, "loss": 0.3805, "step": 11425 }, { "epoch": 0.6957951466065828, "grad_norm": 0.9020389270789557, "learning_rate": 4.838412295818231e-06, "loss": 0.4651, "step": 11426 }, { "epoch": 0.6958560423834607, "grad_norm": 1.0301919625766391, "learning_rate": 4.8383840721175215e-06, "loss": 0.3472, "step": 11427 }, { "epoch": 0.6959169381603386, "grad_norm": 0.9389428061191223, "learning_rate": 4.838355846034515e-06, "loss": 0.4163, "step": 11428 }, { "epoch": 0.6959778339372165, "grad_norm": 0.9520920091233587, "learning_rate": 4.8383276175692405e-06, "loss": 0.4045, "step": 11429 }, { "epoch": 0.6960387297140943, "grad_norm": 0.9904019878005922, "learning_rate": 4.838299386721726e-06, "loss": 0.4161, "step": 11430 }, { "epoch": 0.6960996254909722, "grad_norm": 0.9371068958497569, "learning_rate": 4.838271153492001e-06, "loss": 0.4002, "step": 11431 }, { "epoch": 0.6961605212678501, "grad_norm": 1.0762509862371568, "learning_rate": 4.838242917880095e-06, "loss": 0.385, "step": 11432 }, { "epoch": 0.696221417044728, "grad_norm": 1.0244724872955884, "learning_rate": 4.838214679886034e-06, "loss": 0.4434, "step": 11433 }, { "epoch": 0.6962823128216058, "grad_norm": 0.9642245897255973, "learning_rate": 4.83818643950985e-06, "loss": 0.4291, "step": 11434 }, { "epoch": 0.6963432085984836, "grad_norm": 1.0658638762244768, "learning_rate": 4.838158196751569e-06, "loss": 0.4698, "step": 11435 }, { "epoch": 0.6964041043753616, "grad_norm": 0.9586917812467968, "learning_rate": 4.838129951611221e-06, "loss": 0.3948, "step": 11436 }, { "epoch": 0.6964650001522394, "grad_norm": 0.9345607906692025, "learning_rate": 4.838101704088836e-06, "loss": 0.4648, "step": 11437 }, { "epoch": 0.6965258959291173, "grad_norm": 1.0270951041668255, "learning_rate": 4.8380734541844405e-06, "loss": 0.4072, "step": 11438 }, { "epoch": 0.6965867917059951, "grad_norm": 0.9052877654137222, "learning_rate": 4.838045201898065e-06, "loss": 0.3952, "step": 11439 }, { "epoch": 0.6966476874828731, "grad_norm": 1.0235585310704285, "learning_rate": 4.838016947229737e-06, "loss": 0.472, "step": 11440 }, { "epoch": 0.6967085832597509, "grad_norm": 0.9741091179006983, "learning_rate": 4.837988690179487e-06, "loss": 0.5209, "step": 11441 }, { "epoch": 0.6967694790366288, "grad_norm": 1.0100471246191258, "learning_rate": 4.837960430747341e-06, "loss": 0.3455, "step": 11442 }, { "epoch": 0.6968303748135067, "grad_norm": 0.9270123776022846, "learning_rate": 4.837932168933332e-06, "loss": 0.4229, "step": 11443 }, { "epoch": 0.6968912705903846, "grad_norm": 1.038714862979794, "learning_rate": 4.837903904737484e-06, "loss": 0.4834, "step": 11444 }, { "epoch": 0.6969521663672624, "grad_norm": 1.072848365788316, "learning_rate": 4.83787563815983e-06, "loss": 0.4398, "step": 11445 }, { "epoch": 0.6970130621441403, "grad_norm": 1.03116653560015, "learning_rate": 4.837847369200396e-06, "loss": 0.4462, "step": 11446 }, { "epoch": 0.6970739579210182, "grad_norm": 1.09527902901931, "learning_rate": 4.837819097859211e-06, "loss": 0.3823, "step": 11447 }, { "epoch": 0.6971348536978961, "grad_norm": 0.9875285156692952, "learning_rate": 4.837790824136306e-06, "loss": 0.3719, "step": 11448 }, { "epoch": 0.6971957494747739, "grad_norm": 1.0730101591209258, "learning_rate": 4.837762548031707e-06, "loss": 0.405, "step": 11449 }, { "epoch": 0.6972566452516518, "grad_norm": 1.1241888780613858, "learning_rate": 4.837734269545444e-06, "loss": 0.3777, "step": 11450 }, { "epoch": 0.6973175410285297, "grad_norm": 0.9885068155640494, "learning_rate": 4.837705988677547e-06, "loss": 0.4357, "step": 11451 }, { "epoch": 0.6973784368054076, "grad_norm": 0.9981771019634312, "learning_rate": 4.837677705428043e-06, "loss": 0.4764, "step": 11452 }, { "epoch": 0.6974393325822854, "grad_norm": 0.9353454430154761, "learning_rate": 4.837649419796962e-06, "loss": 0.4624, "step": 11453 }, { "epoch": 0.6975002283591633, "grad_norm": 0.9794756411903768, "learning_rate": 4.837621131784333e-06, "loss": 0.4853, "step": 11454 }, { "epoch": 0.6975611241360412, "grad_norm": 1.0139403418113513, "learning_rate": 4.837592841390183e-06, "loss": 0.4118, "step": 11455 }, { "epoch": 0.6976220199129191, "grad_norm": 0.9578798244457989, "learning_rate": 4.8375645486145425e-06, "loss": 0.4482, "step": 11456 }, { "epoch": 0.6976829156897969, "grad_norm": 1.0211739317921076, "learning_rate": 4.837536253457439e-06, "loss": 0.4186, "step": 11457 }, { "epoch": 0.6977438114666747, "grad_norm": 1.0022263550728083, "learning_rate": 4.837507955918903e-06, "loss": 0.4216, "step": 11458 }, { "epoch": 0.6978047072435527, "grad_norm": 1.0171329494740489, "learning_rate": 4.837479655998963e-06, "loss": 0.3982, "step": 11459 }, { "epoch": 0.6978656030204305, "grad_norm": 1.0326314104121936, "learning_rate": 4.837451353697647e-06, "loss": 0.4288, "step": 11460 }, { "epoch": 0.6979264987973084, "grad_norm": 1.0136101940052682, "learning_rate": 4.837423049014983e-06, "loss": 0.4315, "step": 11461 }, { "epoch": 0.6979873945741862, "grad_norm": 0.9525145715955237, "learning_rate": 4.837394741951003e-06, "loss": 0.5251, "step": 11462 }, { "epoch": 0.6980482903510642, "grad_norm": 1.0099499972431945, "learning_rate": 4.837366432505733e-06, "loss": 0.3773, "step": 11463 }, { "epoch": 0.698109186127942, "grad_norm": 0.9650364481010996, "learning_rate": 4.8373381206792015e-06, "loss": 0.4197, "step": 11464 }, { "epoch": 0.6981700819048199, "grad_norm": 1.0206920021943873, "learning_rate": 4.8373098064714395e-06, "loss": 0.3744, "step": 11465 }, { "epoch": 0.6982309776816977, "grad_norm": 1.0612863353522581, "learning_rate": 4.837281489882475e-06, "loss": 0.3719, "step": 11466 }, { "epoch": 0.6982918734585757, "grad_norm": 1.0220486120755252, "learning_rate": 4.837253170912336e-06, "loss": 0.4556, "step": 11467 }, { "epoch": 0.6983527692354535, "grad_norm": 0.9425623466822135, "learning_rate": 4.837224849561053e-06, "loss": 0.4419, "step": 11468 }, { "epoch": 0.6984136650123314, "grad_norm": 0.9348899325236141, "learning_rate": 4.837196525828653e-06, "loss": 0.384, "step": 11469 }, { "epoch": 0.6984745607892092, "grad_norm": 0.937626671244369, "learning_rate": 4.837168199715166e-06, "loss": 0.4493, "step": 11470 }, { "epoch": 0.6985354565660872, "grad_norm": 0.9593967207558884, "learning_rate": 4.837139871220622e-06, "loss": 0.4472, "step": 11471 }, { "epoch": 0.698596352342965, "grad_norm": 0.98934341834821, "learning_rate": 4.837111540345046e-06, "loss": 0.3762, "step": 11472 }, { "epoch": 0.6986572481198429, "grad_norm": 1.0023931652133746, "learning_rate": 4.837083207088471e-06, "loss": 0.4241, "step": 11473 }, { "epoch": 0.6987181438967207, "grad_norm": 0.9715594047667024, "learning_rate": 4.837054871450925e-06, "loss": 0.4472, "step": 11474 }, { "epoch": 0.6987790396735987, "grad_norm": 1.0313791862244066, "learning_rate": 4.837026533432434e-06, "loss": 0.4175, "step": 11475 }, { "epoch": 0.6988399354504765, "grad_norm": 1.0338079684732597, "learning_rate": 4.83699819303303e-06, "loss": 0.3919, "step": 11476 }, { "epoch": 0.6989008312273544, "grad_norm": 1.0138168212247602, "learning_rate": 4.83696985025274e-06, "loss": 0.4337, "step": 11477 }, { "epoch": 0.6989617270042322, "grad_norm": 1.030962466763444, "learning_rate": 4.836941505091596e-06, "loss": 0.3497, "step": 11478 }, { "epoch": 0.6990226227811102, "grad_norm": 1.0641747418770564, "learning_rate": 4.836913157549622e-06, "loss": 0.3638, "step": 11479 }, { "epoch": 0.699083518557988, "grad_norm": 0.9095917260504603, "learning_rate": 4.836884807626852e-06, "loss": 0.4232, "step": 11480 }, { "epoch": 0.6991444143348658, "grad_norm": 1.018079043890744, "learning_rate": 4.83685645532331e-06, "loss": 0.3931, "step": 11481 }, { "epoch": 0.6992053101117438, "grad_norm": 1.0873624963277422, "learning_rate": 4.836828100639029e-06, "loss": 0.4041, "step": 11482 }, { "epoch": 0.6992662058886217, "grad_norm": 1.0488000453589126, "learning_rate": 4.836799743574035e-06, "loss": 0.4151, "step": 11483 }, { "epoch": 0.6993271016654995, "grad_norm": 0.982943096510685, "learning_rate": 4.836771384128357e-06, "loss": 0.4622, "step": 11484 }, { "epoch": 0.6993879974423773, "grad_norm": 1.074005012184541, "learning_rate": 4.836743022302027e-06, "loss": 0.4014, "step": 11485 }, { "epoch": 0.6994488932192553, "grad_norm": 1.0444894003673866, "learning_rate": 4.836714658095071e-06, "loss": 0.5284, "step": 11486 }, { "epoch": 0.6995097889961331, "grad_norm": 0.9017833723653855, "learning_rate": 4.8366862915075186e-06, "loss": 0.3974, "step": 11487 }, { "epoch": 0.699570684773011, "grad_norm": 1.0467471315411339, "learning_rate": 4.8366579225393985e-06, "loss": 0.4077, "step": 11488 }, { "epoch": 0.6996315805498888, "grad_norm": 1.074745535425246, "learning_rate": 4.8366295511907405e-06, "loss": 0.3854, "step": 11489 }, { "epoch": 0.6996924763267668, "grad_norm": 0.9605347183396125, "learning_rate": 4.836601177461573e-06, "loss": 0.4329, "step": 11490 }, { "epoch": 0.6997533721036446, "grad_norm": 1.0405296195890514, "learning_rate": 4.836572801351924e-06, "loss": 0.4833, "step": 11491 }, { "epoch": 0.6998142678805225, "grad_norm": 1.0085136683523543, "learning_rate": 4.836544422861823e-06, "loss": 0.4376, "step": 11492 }, { "epoch": 0.6998751636574003, "grad_norm": 0.9748071312499873, "learning_rate": 4.8365160419913e-06, "loss": 0.4104, "step": 11493 }, { "epoch": 0.6999360594342783, "grad_norm": 0.948988121124257, "learning_rate": 4.836487658740383e-06, "loss": 0.4324, "step": 11494 }, { "epoch": 0.6999969552111561, "grad_norm": 1.0236301289229592, "learning_rate": 4.8364592731091005e-06, "loss": 0.3409, "step": 11495 }, { "epoch": 0.700057850988034, "grad_norm": 0.9904850704550154, "learning_rate": 4.836430885097482e-06, "loss": 0.3918, "step": 11496 }, { "epoch": 0.7001187467649118, "grad_norm": 1.0350110053190906, "learning_rate": 4.836402494705556e-06, "loss": 0.3849, "step": 11497 }, { "epoch": 0.7001796425417898, "grad_norm": 1.032768137078956, "learning_rate": 4.836374101933352e-06, "loss": 0.4092, "step": 11498 }, { "epoch": 0.7002405383186676, "grad_norm": 1.0261125111920155, "learning_rate": 4.836345706780899e-06, "loss": 0.4403, "step": 11499 }, { "epoch": 0.7003014340955455, "grad_norm": 0.9264258306040256, "learning_rate": 4.836317309248225e-06, "loss": 0.4402, "step": 11500 }, { "epoch": 0.7003623298724233, "grad_norm": 0.8843967243030083, "learning_rate": 4.836288909335361e-06, "loss": 0.4168, "step": 11501 }, { "epoch": 0.7004232256493013, "grad_norm": 1.032521565171973, "learning_rate": 4.836260507042333e-06, "loss": 0.3648, "step": 11502 }, { "epoch": 0.7004841214261791, "grad_norm": 1.0214215634052441, "learning_rate": 4.83623210236917e-06, "loss": 0.4734, "step": 11503 }, { "epoch": 0.700545017203057, "grad_norm": 1.0161578959015172, "learning_rate": 4.836203695315905e-06, "loss": 0.4547, "step": 11504 }, { "epoch": 0.7006059129799348, "grad_norm": 1.0413279204467953, "learning_rate": 4.836175285882563e-06, "loss": 0.372, "step": 11505 }, { "epoch": 0.7006668087568128, "grad_norm": 1.0343707345762168, "learning_rate": 4.836146874069174e-06, "loss": 0.5022, "step": 11506 }, { "epoch": 0.7007277045336906, "grad_norm": 1.047416242099494, "learning_rate": 4.836118459875768e-06, "loss": 0.4203, "step": 11507 }, { "epoch": 0.7007886003105684, "grad_norm": 1.0458577399782358, "learning_rate": 4.836090043302372e-06, "loss": 0.4571, "step": 11508 }, { "epoch": 0.7008494960874463, "grad_norm": 1.0558340841861351, "learning_rate": 4.836061624349016e-06, "loss": 0.4368, "step": 11509 }, { "epoch": 0.7009103918643242, "grad_norm": 1.047100839116054, "learning_rate": 4.8360332030157305e-06, "loss": 0.4182, "step": 11510 }, { "epoch": 0.7009712876412021, "grad_norm": 0.9810176844462284, "learning_rate": 4.836004779302542e-06, "loss": 0.4822, "step": 11511 }, { "epoch": 0.7010321834180799, "grad_norm": 1.1120739271842945, "learning_rate": 4.835976353209481e-06, "loss": 0.3931, "step": 11512 }, { "epoch": 0.7010930791949578, "grad_norm": 0.9546661834239346, "learning_rate": 4.835947924736575e-06, "loss": 0.5164, "step": 11513 }, { "epoch": 0.7011539749718357, "grad_norm": 1.075355811369139, "learning_rate": 4.8359194938838535e-06, "loss": 0.3434, "step": 11514 }, { "epoch": 0.7012148707487136, "grad_norm": 1.0440390780956357, "learning_rate": 4.835891060651346e-06, "loss": 0.5223, "step": 11515 }, { "epoch": 0.7012757665255914, "grad_norm": 0.9083148587812518, "learning_rate": 4.835862625039081e-06, "loss": 0.4025, "step": 11516 }, { "epoch": 0.7013366623024693, "grad_norm": 1.0325581716048235, "learning_rate": 4.835834187047089e-06, "loss": 0.4811, "step": 11517 }, { "epoch": 0.7013975580793472, "grad_norm": 1.0930028983115883, "learning_rate": 4.835805746675397e-06, "loss": 0.3565, "step": 11518 }, { "epoch": 0.7014584538562251, "grad_norm": 0.9714177500440017, "learning_rate": 4.835777303924035e-06, "loss": 0.5051, "step": 11519 }, { "epoch": 0.7015193496331029, "grad_norm": 0.9663911163494339, "learning_rate": 4.835748858793031e-06, "loss": 0.4737, "step": 11520 }, { "epoch": 0.7015802454099808, "grad_norm": 0.9465199115724305, "learning_rate": 4.835720411282415e-06, "loss": 0.4704, "step": 11521 }, { "epoch": 0.7016411411868587, "grad_norm": 0.9345818239043169, "learning_rate": 4.835691961392216e-06, "loss": 0.4436, "step": 11522 }, { "epoch": 0.7017020369637366, "grad_norm": 1.108554938610742, "learning_rate": 4.835663509122462e-06, "loss": 0.4298, "step": 11523 }, { "epoch": 0.7017629327406144, "grad_norm": 1.0007535927198812, "learning_rate": 4.8356350544731825e-06, "loss": 0.4126, "step": 11524 }, { "epoch": 0.7018238285174924, "grad_norm": 1.1170741580234544, "learning_rate": 4.835606597444407e-06, "loss": 0.3962, "step": 11525 }, { "epoch": 0.7018847242943702, "grad_norm": 1.0866068724342588, "learning_rate": 4.835578138036164e-06, "loss": 0.3965, "step": 11526 }, { "epoch": 0.701945620071248, "grad_norm": 1.0253035642385462, "learning_rate": 4.835549676248482e-06, "loss": 0.4052, "step": 11527 }, { "epoch": 0.7020065158481259, "grad_norm": 0.9861272940065823, "learning_rate": 4.835521212081391e-06, "loss": 0.4202, "step": 11528 }, { "epoch": 0.7020674116250039, "grad_norm": 1.017280301322898, "learning_rate": 4.83549274553492e-06, "loss": 0.3517, "step": 11529 }, { "epoch": 0.7021283074018817, "grad_norm": 0.9948039969673824, "learning_rate": 4.8354642766090964e-06, "loss": 0.3903, "step": 11530 }, { "epoch": 0.7021892031787595, "grad_norm": 0.9415203712562841, "learning_rate": 4.8354358053039515e-06, "loss": 0.4857, "step": 11531 }, { "epoch": 0.7022500989556374, "grad_norm": 1.0519619557613291, "learning_rate": 4.835407331619513e-06, "loss": 0.394, "step": 11532 }, { "epoch": 0.7023109947325153, "grad_norm": 1.0135458440714837, "learning_rate": 4.835378855555809e-06, "loss": 0.4393, "step": 11533 }, { "epoch": 0.7023718905093932, "grad_norm": 0.9549331181075554, "learning_rate": 4.835350377112871e-06, "loss": 0.469, "step": 11534 }, { "epoch": 0.702432786286271, "grad_norm": 1.025388614974714, "learning_rate": 4.835321896290725e-06, "loss": 0.4197, "step": 11535 }, { "epoch": 0.7024936820631489, "grad_norm": 0.9963495519852628, "learning_rate": 4.8352934130894025e-06, "loss": 0.3605, "step": 11536 }, { "epoch": 0.7025545778400268, "grad_norm": 1.0057167103372973, "learning_rate": 4.835264927508932e-06, "loss": 0.4324, "step": 11537 }, { "epoch": 0.7026154736169047, "grad_norm": 1.0241068049449804, "learning_rate": 4.835236439549341e-06, "loss": 0.5003, "step": 11538 }, { "epoch": 0.7026763693937825, "grad_norm": 0.987345202406134, "learning_rate": 4.835207949210661e-06, "loss": 0.4239, "step": 11539 }, { "epoch": 0.7027372651706604, "grad_norm": 0.9895197779022825, "learning_rate": 4.835179456492919e-06, "loss": 0.4266, "step": 11540 }, { "epoch": 0.7027981609475383, "grad_norm": 1.006072185877477, "learning_rate": 4.835150961396145e-06, "loss": 0.5089, "step": 11541 }, { "epoch": 0.7028590567244162, "grad_norm": 0.9262385038404428, "learning_rate": 4.835122463920368e-06, "loss": 0.4468, "step": 11542 }, { "epoch": 0.702919952501294, "grad_norm": 0.9751814685940107, "learning_rate": 4.835093964065616e-06, "loss": 0.4916, "step": 11543 }, { "epoch": 0.7029808482781719, "grad_norm": 0.9889261187742306, "learning_rate": 4.835065461831919e-06, "loss": 0.3382, "step": 11544 }, { "epoch": 0.7030417440550498, "grad_norm": 0.9540894677500805, "learning_rate": 4.835036957219306e-06, "loss": 0.3892, "step": 11545 }, { "epoch": 0.7031026398319277, "grad_norm": 1.0223482010502378, "learning_rate": 4.835008450227806e-06, "loss": 0.4079, "step": 11546 }, { "epoch": 0.7031635356088055, "grad_norm": 1.1141193886701088, "learning_rate": 4.834979940857448e-06, "loss": 0.3852, "step": 11547 }, { "epoch": 0.7032244313856834, "grad_norm": 1.0057748433051552, "learning_rate": 4.83495142910826e-06, "loss": 0.459, "step": 11548 }, { "epoch": 0.7032853271625613, "grad_norm": 1.0585273252013785, "learning_rate": 4.834922914980274e-06, "loss": 0.4528, "step": 11549 }, { "epoch": 0.7033462229394392, "grad_norm": 1.0721968023376454, "learning_rate": 4.8348943984735154e-06, "loss": 0.4347, "step": 11550 }, { "epoch": 0.703407118716317, "grad_norm": 0.9773179886462723, "learning_rate": 4.834865879588016e-06, "loss": 0.4288, "step": 11551 }, { "epoch": 0.7034680144931948, "grad_norm": 1.0237950385049175, "learning_rate": 4.834837358323803e-06, "loss": 0.4454, "step": 11552 }, { "epoch": 0.7035289102700728, "grad_norm": 0.9398818817057194, "learning_rate": 4.834808834680906e-06, "loss": 0.506, "step": 11553 }, { "epoch": 0.7035898060469506, "grad_norm": 0.928762974990354, "learning_rate": 4.834780308659355e-06, "loss": 0.4244, "step": 11554 }, { "epoch": 0.7036507018238285, "grad_norm": 1.0090288259922147, "learning_rate": 4.834751780259178e-06, "loss": 0.4336, "step": 11555 }, { "epoch": 0.7037115976007063, "grad_norm": 1.0559205527693198, "learning_rate": 4.8347232494804045e-06, "loss": 0.461, "step": 11556 }, { "epoch": 0.7037724933775843, "grad_norm": 0.9783249454435727, "learning_rate": 4.834694716323064e-06, "loss": 0.4393, "step": 11557 }, { "epoch": 0.7038333891544621, "grad_norm": 1.0258455936831192, "learning_rate": 4.834666180787184e-06, "loss": 0.4347, "step": 11558 }, { "epoch": 0.70389428493134, "grad_norm": 1.0453803827977217, "learning_rate": 4.834637642872796e-06, "loss": 0.3996, "step": 11559 }, { "epoch": 0.7039551807082178, "grad_norm": 1.0417274287270377, "learning_rate": 4.834609102579927e-06, "loss": 0.5049, "step": 11560 }, { "epoch": 0.7040160764850958, "grad_norm": 0.9721103729140425, "learning_rate": 4.834580559908607e-06, "loss": 0.4027, "step": 11561 }, { "epoch": 0.7040769722619736, "grad_norm": 1.033959509560581, "learning_rate": 4.834552014858864e-06, "loss": 0.4753, "step": 11562 }, { "epoch": 0.7041378680388515, "grad_norm": 0.9945621230860887, "learning_rate": 4.834523467430729e-06, "loss": 0.3935, "step": 11563 }, { "epoch": 0.7041987638157294, "grad_norm": 0.9667322078412678, "learning_rate": 4.8344949176242295e-06, "loss": 0.4901, "step": 11564 }, { "epoch": 0.7042596595926073, "grad_norm": 0.9318235526843194, "learning_rate": 4.834466365439396e-06, "loss": 0.4218, "step": 11565 }, { "epoch": 0.7043205553694851, "grad_norm": 0.9261132904656905, "learning_rate": 4.834437810876255e-06, "loss": 0.4638, "step": 11566 }, { "epoch": 0.704381451146363, "grad_norm": 1.0666959003799072, "learning_rate": 4.834409253934838e-06, "loss": 0.3872, "step": 11567 }, { "epoch": 0.7044423469232409, "grad_norm": 1.047901042378349, "learning_rate": 4.834380694615174e-06, "loss": 0.3865, "step": 11568 }, { "epoch": 0.7045032427001188, "grad_norm": 0.9391899168956402, "learning_rate": 4.834352132917291e-06, "loss": 0.4562, "step": 11569 }, { "epoch": 0.7045641384769966, "grad_norm": 0.9019880072679285, "learning_rate": 4.8343235688412185e-06, "loss": 0.4994, "step": 11570 }, { "epoch": 0.7046250342538745, "grad_norm": 0.9956217887333346, "learning_rate": 4.834295002386986e-06, "loss": 0.3982, "step": 11571 }, { "epoch": 0.7046859300307524, "grad_norm": 1.0685688134348816, "learning_rate": 4.834266433554622e-06, "loss": 0.401, "step": 11572 }, { "epoch": 0.7047468258076303, "grad_norm": 1.034314801648896, "learning_rate": 4.834237862344157e-06, "loss": 0.4102, "step": 11573 }, { "epoch": 0.7048077215845081, "grad_norm": 0.962129886643552, "learning_rate": 4.834209288755617e-06, "loss": 0.4392, "step": 11574 }, { "epoch": 0.704868617361386, "grad_norm": 1.0122334402403776, "learning_rate": 4.834180712789035e-06, "loss": 0.3693, "step": 11575 }, { "epoch": 0.7049295131382639, "grad_norm": 1.0095000874357678, "learning_rate": 4.834152134444436e-06, "loss": 0.3833, "step": 11576 }, { "epoch": 0.7049904089151418, "grad_norm": 1.0198751492969913, "learning_rate": 4.8341235537218536e-06, "loss": 0.3933, "step": 11577 }, { "epoch": 0.7050513046920196, "grad_norm": 1.0203538912669734, "learning_rate": 4.834094970621313e-06, "loss": 0.4044, "step": 11578 }, { "epoch": 0.7051122004688974, "grad_norm": 1.0402256147703026, "learning_rate": 4.834066385142846e-06, "loss": 0.4121, "step": 11579 }, { "epoch": 0.7051730962457754, "grad_norm": 0.9501913451895768, "learning_rate": 4.8340377972864805e-06, "loss": 0.4621, "step": 11580 }, { "epoch": 0.7052339920226532, "grad_norm": 1.045826179185689, "learning_rate": 4.834009207052246e-06, "loss": 0.4632, "step": 11581 }, { "epoch": 0.7052948877995311, "grad_norm": 1.1768973610584024, "learning_rate": 4.833980614440172e-06, "loss": 0.4113, "step": 11582 }, { "epoch": 0.7053557835764089, "grad_norm": 0.9615457022268701, "learning_rate": 4.833952019450286e-06, "loss": 0.4297, "step": 11583 }, { "epoch": 0.7054166793532869, "grad_norm": 1.0441611857361048, "learning_rate": 4.833923422082618e-06, "loss": 0.4117, "step": 11584 }, { "epoch": 0.7054775751301647, "grad_norm": 1.0946093583871606, "learning_rate": 4.8338948223371975e-06, "loss": 0.3995, "step": 11585 }, { "epoch": 0.7055384709070426, "grad_norm": 1.0076751309731942, "learning_rate": 4.833866220214054e-06, "loss": 0.4084, "step": 11586 }, { "epoch": 0.7055993666839204, "grad_norm": 1.0106513522430476, "learning_rate": 4.833837615713215e-06, "loss": 0.3841, "step": 11587 }, { "epoch": 0.7056602624607984, "grad_norm": 1.0327056686609102, "learning_rate": 4.833809008834712e-06, "loss": 0.3999, "step": 11588 }, { "epoch": 0.7057211582376762, "grad_norm": 1.025644333639754, "learning_rate": 4.8337803995785726e-06, "loss": 0.4648, "step": 11589 }, { "epoch": 0.7057820540145541, "grad_norm": 0.9363517862324277, "learning_rate": 4.833751787944826e-06, "loss": 0.4997, "step": 11590 }, { "epoch": 0.7058429497914319, "grad_norm": 0.9281627738545095, "learning_rate": 4.833723173933502e-06, "loss": 0.4037, "step": 11591 }, { "epoch": 0.7059038455683099, "grad_norm": 1.0068821711513685, "learning_rate": 4.833694557544629e-06, "loss": 0.4542, "step": 11592 }, { "epoch": 0.7059647413451877, "grad_norm": 1.074060530587589, "learning_rate": 4.833665938778237e-06, "loss": 0.3469, "step": 11593 }, { "epoch": 0.7060256371220656, "grad_norm": 1.048625352696612, "learning_rate": 4.833637317634354e-06, "loss": 0.4635, "step": 11594 }, { "epoch": 0.7060865328989434, "grad_norm": 1.126155941945451, "learning_rate": 4.83360869411301e-06, "loss": 0.4047, "step": 11595 }, { "epoch": 0.7061474286758214, "grad_norm": 0.9039758023325897, "learning_rate": 4.833580068214234e-06, "loss": 0.5026, "step": 11596 }, { "epoch": 0.7062083244526992, "grad_norm": 1.024133421238663, "learning_rate": 4.8335514399380554e-06, "loss": 0.4086, "step": 11597 }, { "epoch": 0.706269220229577, "grad_norm": 1.0197080081474164, "learning_rate": 4.8335228092845036e-06, "loss": 0.4125, "step": 11598 }, { "epoch": 0.7063301160064549, "grad_norm": 1.0403813810395186, "learning_rate": 4.8334941762536055e-06, "loss": 0.4294, "step": 11599 }, { "epoch": 0.7063910117833329, "grad_norm": 0.9438435549210288, "learning_rate": 4.833465540845393e-06, "loss": 0.3933, "step": 11600 }, { "epoch": 0.7064519075602107, "grad_norm": 0.979060154044363, "learning_rate": 4.833436903059895e-06, "loss": 0.4673, "step": 11601 }, { "epoch": 0.7065128033370885, "grad_norm": 1.0317526108652888, "learning_rate": 4.83340826289714e-06, "loss": 0.3866, "step": 11602 }, { "epoch": 0.7065736991139664, "grad_norm": 0.9465495672240413, "learning_rate": 4.833379620357157e-06, "loss": 0.4922, "step": 11603 }, { "epoch": 0.7066345948908443, "grad_norm": 0.9603809539428895, "learning_rate": 4.833350975439974e-06, "loss": 0.4441, "step": 11604 }, { "epoch": 0.7066954906677222, "grad_norm": 0.9529554389626578, "learning_rate": 4.833322328145623e-06, "loss": 0.4011, "step": 11605 }, { "epoch": 0.7067563864446, "grad_norm": 1.0945577078028956, "learning_rate": 4.833293678474131e-06, "loss": 0.4196, "step": 11606 }, { "epoch": 0.706817282221478, "grad_norm": 1.0803894070111963, "learning_rate": 4.833265026425529e-06, "loss": 0.4496, "step": 11607 }, { "epoch": 0.7068781779983558, "grad_norm": 0.9456626733113144, "learning_rate": 4.833236371999844e-06, "loss": 0.4435, "step": 11608 }, { "epoch": 0.7069390737752337, "grad_norm": 0.8991330700409316, "learning_rate": 4.833207715197106e-06, "loss": 0.4344, "step": 11609 }, { "epoch": 0.7069999695521115, "grad_norm": 0.9572348252505024, "learning_rate": 4.8331790560173455e-06, "loss": 0.4622, "step": 11610 }, { "epoch": 0.7070608653289895, "grad_norm": 0.9979612523835043, "learning_rate": 4.83315039446059e-06, "loss": 0.3805, "step": 11611 }, { "epoch": 0.7071217611058673, "grad_norm": 1.0665840471397094, "learning_rate": 4.83312173052687e-06, "loss": 0.4736, "step": 11612 }, { "epoch": 0.7071826568827452, "grad_norm": 0.9714073597522496, "learning_rate": 4.833093064216213e-06, "loss": 0.4061, "step": 11613 }, { "epoch": 0.707243552659623, "grad_norm": 1.0349760694206358, "learning_rate": 4.83306439552865e-06, "loss": 0.4319, "step": 11614 }, { "epoch": 0.707304448436501, "grad_norm": 1.1806173759334897, "learning_rate": 4.83303572446421e-06, "loss": 0.4483, "step": 11615 }, { "epoch": 0.7073653442133788, "grad_norm": 1.0385797247354969, "learning_rate": 4.833007051022921e-06, "loss": 0.5579, "step": 11616 }, { "epoch": 0.7074262399902567, "grad_norm": 1.0451859991806551, "learning_rate": 4.8329783752048136e-06, "loss": 0.4302, "step": 11617 }, { "epoch": 0.7074871357671345, "grad_norm": 0.9549136397089851, "learning_rate": 4.8329496970099155e-06, "loss": 0.4181, "step": 11618 }, { "epoch": 0.7075480315440125, "grad_norm": 1.1217326914813628, "learning_rate": 4.832921016438257e-06, "loss": 0.3816, "step": 11619 }, { "epoch": 0.7076089273208903, "grad_norm": 1.0360887236375638, "learning_rate": 4.832892333489868e-06, "loss": 0.3943, "step": 11620 }, { "epoch": 0.7076698230977682, "grad_norm": 1.107392004222655, "learning_rate": 4.832863648164775e-06, "loss": 0.4424, "step": 11621 }, { "epoch": 0.707730718874646, "grad_norm": 1.030131379888995, "learning_rate": 4.8328349604630105e-06, "loss": 0.4016, "step": 11622 }, { "epoch": 0.707791614651524, "grad_norm": 1.0057736212644066, "learning_rate": 4.832806270384602e-06, "loss": 0.3667, "step": 11623 }, { "epoch": 0.7078525104284018, "grad_norm": 0.9649343790221249, "learning_rate": 4.832777577929578e-06, "loss": 0.3773, "step": 11624 }, { "epoch": 0.7079134062052796, "grad_norm": 0.94743736480503, "learning_rate": 4.832748883097969e-06, "loss": 0.4117, "step": 11625 }, { "epoch": 0.7079743019821575, "grad_norm": 1.088074797613643, "learning_rate": 4.832720185889806e-06, "loss": 0.3769, "step": 11626 }, { "epoch": 0.7080351977590355, "grad_norm": 0.9300256063164335, "learning_rate": 4.832691486305114e-06, "loss": 0.4827, "step": 11627 }, { "epoch": 0.7080960935359133, "grad_norm": 1.000906755277509, "learning_rate": 4.832662784343925e-06, "loss": 0.4563, "step": 11628 }, { "epoch": 0.7081569893127911, "grad_norm": 0.9537776055893235, "learning_rate": 4.832634080006267e-06, "loss": 0.397, "step": 11629 }, { "epoch": 0.708217885089669, "grad_norm": 1.0018291348667572, "learning_rate": 4.8326053732921705e-06, "loss": 0.4257, "step": 11630 }, { "epoch": 0.7082787808665469, "grad_norm": 1.0069904970946277, "learning_rate": 4.832576664201664e-06, "loss": 0.4706, "step": 11631 }, { "epoch": 0.7083396766434248, "grad_norm": 1.0563548422466478, "learning_rate": 4.8325479527347776e-06, "loss": 0.4455, "step": 11632 }, { "epoch": 0.7084005724203026, "grad_norm": 1.0127761170869656, "learning_rate": 4.832519238891539e-06, "loss": 0.4474, "step": 11633 }, { "epoch": 0.7084614681971805, "grad_norm": 0.973026238654497, "learning_rate": 4.832490522671978e-06, "loss": 0.464, "step": 11634 }, { "epoch": 0.7085223639740584, "grad_norm": 1.0054566868052837, "learning_rate": 4.832461804076125e-06, "loss": 0.4334, "step": 11635 }, { "epoch": 0.7085832597509363, "grad_norm": 1.1021451757797747, "learning_rate": 4.832433083104009e-06, "loss": 0.4309, "step": 11636 }, { "epoch": 0.7086441555278141, "grad_norm": 1.0239717295174438, "learning_rate": 4.8324043597556575e-06, "loss": 0.4192, "step": 11637 }, { "epoch": 0.708705051304692, "grad_norm": 0.9282863303737752, "learning_rate": 4.832375634031101e-06, "loss": 0.4945, "step": 11638 }, { "epoch": 0.7087659470815699, "grad_norm": 1.0107945875917363, "learning_rate": 4.83234690593037e-06, "loss": 0.4309, "step": 11639 }, { "epoch": 0.7088268428584478, "grad_norm": 1.0864187539895134, "learning_rate": 4.832318175453491e-06, "loss": 0.4504, "step": 11640 }, { "epoch": 0.7088877386353256, "grad_norm": 1.0323023294986042, "learning_rate": 4.8322894426004954e-06, "loss": 0.5347, "step": 11641 }, { "epoch": 0.7089486344122035, "grad_norm": 0.9775427769153656, "learning_rate": 4.832260707371411e-06, "loss": 0.4392, "step": 11642 }, { "epoch": 0.7090095301890814, "grad_norm": 1.0944188236738777, "learning_rate": 4.832231969766268e-06, "loss": 0.3796, "step": 11643 }, { "epoch": 0.7090704259659593, "grad_norm": 1.0410045851016207, "learning_rate": 4.832203229785096e-06, "loss": 0.4101, "step": 11644 }, { "epoch": 0.7091313217428371, "grad_norm": 0.9222617587014451, "learning_rate": 4.832174487427924e-06, "loss": 0.467, "step": 11645 }, { "epoch": 0.7091922175197151, "grad_norm": 1.0112131089435477, "learning_rate": 4.832145742694781e-06, "loss": 0.4212, "step": 11646 }, { "epoch": 0.7092531132965929, "grad_norm": 1.07802834863394, "learning_rate": 4.832116995585696e-06, "loss": 0.3989, "step": 11647 }, { "epoch": 0.7093140090734708, "grad_norm": 0.9690299250289776, "learning_rate": 4.832088246100699e-06, "loss": 0.3885, "step": 11648 }, { "epoch": 0.7093749048503486, "grad_norm": 1.0239044019784, "learning_rate": 4.832059494239819e-06, "loss": 0.4542, "step": 11649 }, { "epoch": 0.7094358006272266, "grad_norm": 0.9794458823423287, "learning_rate": 4.8320307400030855e-06, "loss": 0.467, "step": 11650 }, { "epoch": 0.7094966964041044, "grad_norm": 0.8910905690836756, "learning_rate": 4.832001983390528e-06, "loss": 0.4296, "step": 11651 }, { "epoch": 0.7095575921809822, "grad_norm": 1.0596061516826256, "learning_rate": 4.8319732244021746e-06, "loss": 0.4423, "step": 11652 }, { "epoch": 0.7096184879578601, "grad_norm": 0.9897085058211019, "learning_rate": 4.831944463038055e-06, "loss": 0.4138, "step": 11653 }, { "epoch": 0.709679383734738, "grad_norm": 0.9908430575054042, "learning_rate": 4.831915699298199e-06, "loss": 0.4487, "step": 11654 }, { "epoch": 0.7097402795116159, "grad_norm": 0.9337273081221814, "learning_rate": 4.831886933182637e-06, "loss": 0.5129, "step": 11655 }, { "epoch": 0.7098011752884937, "grad_norm": 0.9676320394830529, "learning_rate": 4.8318581646913954e-06, "loss": 0.4503, "step": 11656 }, { "epoch": 0.7098620710653716, "grad_norm": 1.0278941982200958, "learning_rate": 4.831829393824506e-06, "loss": 0.4398, "step": 11657 }, { "epoch": 0.7099229668422495, "grad_norm": 0.9354288251007071, "learning_rate": 4.831800620581997e-06, "loss": 0.5074, "step": 11658 }, { "epoch": 0.7099838626191274, "grad_norm": 0.96636553552685, "learning_rate": 4.831771844963898e-06, "loss": 0.4367, "step": 11659 }, { "epoch": 0.7100447583960052, "grad_norm": 1.0731436155255993, "learning_rate": 4.831743066970239e-06, "loss": 0.3347, "step": 11660 }, { "epoch": 0.7101056541728831, "grad_norm": 1.0325482227013598, "learning_rate": 4.831714286601048e-06, "loss": 0.3876, "step": 11661 }, { "epoch": 0.710166549949761, "grad_norm": 0.9465462306387549, "learning_rate": 4.831685503856356e-06, "loss": 0.4283, "step": 11662 }, { "epoch": 0.7102274457266389, "grad_norm": 0.9830960475971922, "learning_rate": 4.83165671873619e-06, "loss": 0.4866, "step": 11663 }, { "epoch": 0.7102883415035167, "grad_norm": 1.0724713958836214, "learning_rate": 4.831627931240581e-06, "loss": 0.4398, "step": 11664 }, { "epoch": 0.7103492372803946, "grad_norm": 1.0052330248484767, "learning_rate": 4.8315991413695586e-06, "loss": 0.3791, "step": 11665 }, { "epoch": 0.7104101330572725, "grad_norm": 0.9398558130752938, "learning_rate": 4.8315703491231505e-06, "loss": 0.5526, "step": 11666 }, { "epoch": 0.7104710288341504, "grad_norm": 1.0569467049273693, "learning_rate": 4.831541554501388e-06, "loss": 0.3812, "step": 11667 }, { "epoch": 0.7105319246110282, "grad_norm": 0.9527090526215563, "learning_rate": 4.831512757504298e-06, "loss": 0.477, "step": 11668 }, { "epoch": 0.710592820387906, "grad_norm": 0.9387864395319958, "learning_rate": 4.8314839581319125e-06, "loss": 0.4383, "step": 11669 }, { "epoch": 0.710653716164784, "grad_norm": 0.942313781350403, "learning_rate": 4.831455156384259e-06, "loss": 0.4493, "step": 11670 }, { "epoch": 0.7107146119416619, "grad_norm": 1.0505553513462955, "learning_rate": 4.831426352261367e-06, "loss": 0.4096, "step": 11671 }, { "epoch": 0.7107755077185397, "grad_norm": 0.9475832387329387, "learning_rate": 4.831397545763268e-06, "loss": 0.429, "step": 11672 }, { "epoch": 0.7108364034954175, "grad_norm": 0.9783774235356271, "learning_rate": 4.831368736889988e-06, "loss": 0.3846, "step": 11673 }, { "epoch": 0.7108972992722955, "grad_norm": 1.0389285368630732, "learning_rate": 4.8313399256415585e-06, "loss": 0.384, "step": 11674 }, { "epoch": 0.7109581950491733, "grad_norm": 1.0247542560183267, "learning_rate": 4.831311112018009e-06, "loss": 0.3959, "step": 11675 }, { "epoch": 0.7110190908260512, "grad_norm": 1.0385698497611364, "learning_rate": 4.831282296019367e-06, "loss": 0.4498, "step": 11676 }, { "epoch": 0.711079986602929, "grad_norm": 0.9698620699384151, "learning_rate": 4.831253477645664e-06, "loss": 0.3759, "step": 11677 }, { "epoch": 0.711140882379807, "grad_norm": 0.9414164312705617, "learning_rate": 4.831224656896928e-06, "loss": 0.4638, "step": 11678 }, { "epoch": 0.7112017781566848, "grad_norm": 0.9443419845228004, "learning_rate": 4.831195833773189e-06, "loss": 0.4414, "step": 11679 }, { "epoch": 0.7112626739335627, "grad_norm": 1.1217164524900252, "learning_rate": 4.831167008274477e-06, "loss": 0.3961, "step": 11680 }, { "epoch": 0.7113235697104405, "grad_norm": 1.1025583228349143, "learning_rate": 4.83113818040082e-06, "loss": 0.4009, "step": 11681 }, { "epoch": 0.7113844654873185, "grad_norm": 0.9367160719202969, "learning_rate": 4.831109350152246e-06, "loss": 0.4982, "step": 11682 }, { "epoch": 0.7114453612641963, "grad_norm": 1.1206267628948015, "learning_rate": 4.831080517528788e-06, "loss": 0.3353, "step": 11683 }, { "epoch": 0.7115062570410742, "grad_norm": 0.9442577432739455, "learning_rate": 4.831051682530474e-06, "loss": 0.499, "step": 11684 }, { "epoch": 0.711567152817952, "grad_norm": 0.9881320254512179, "learning_rate": 4.831022845157333e-06, "loss": 0.3913, "step": 11685 }, { "epoch": 0.71162804859483, "grad_norm": 0.9883487916171778, "learning_rate": 4.830994005409393e-06, "loss": 0.4421, "step": 11686 }, { "epoch": 0.7116889443717078, "grad_norm": 1.0337960216962854, "learning_rate": 4.830965163286686e-06, "loss": 0.4525, "step": 11687 }, { "epoch": 0.7117498401485857, "grad_norm": 1.1244564746699635, "learning_rate": 4.8309363187892395e-06, "loss": 0.4074, "step": 11688 }, { "epoch": 0.7118107359254636, "grad_norm": 1.0898021378059362, "learning_rate": 4.830907471917084e-06, "loss": 0.4497, "step": 11689 }, { "epoch": 0.7118716317023415, "grad_norm": 1.0491316498123704, "learning_rate": 4.830878622670248e-06, "loss": 0.3688, "step": 11690 }, { "epoch": 0.7119325274792193, "grad_norm": 0.9840429006354926, "learning_rate": 4.830849771048761e-06, "loss": 0.46, "step": 11691 }, { "epoch": 0.7119934232560972, "grad_norm": 1.041639585875487, "learning_rate": 4.830820917052654e-06, "loss": 0.513, "step": 11692 }, { "epoch": 0.7120543190329751, "grad_norm": 0.9858801450209914, "learning_rate": 4.830792060681954e-06, "loss": 0.4389, "step": 11693 }, { "epoch": 0.712115214809853, "grad_norm": 0.9835866965594479, "learning_rate": 4.8307632019366924e-06, "loss": 0.3508, "step": 11694 }, { "epoch": 0.7121761105867308, "grad_norm": 0.9407301973161473, "learning_rate": 4.830734340816897e-06, "loss": 0.4117, "step": 11695 }, { "epoch": 0.7122370063636086, "grad_norm": 0.916663985793612, "learning_rate": 4.830705477322598e-06, "loss": 0.4612, "step": 11696 }, { "epoch": 0.7122979021404866, "grad_norm": 0.9966045366679273, "learning_rate": 4.830676611453824e-06, "loss": 0.3891, "step": 11697 }, { "epoch": 0.7123587979173645, "grad_norm": 1.0039907985092857, "learning_rate": 4.830647743210607e-06, "loss": 0.4641, "step": 11698 }, { "epoch": 0.7124196936942423, "grad_norm": 0.9990704161616812, "learning_rate": 4.830618872592973e-06, "loss": 0.4076, "step": 11699 }, { "epoch": 0.7124805894711201, "grad_norm": 0.9723059550324888, "learning_rate": 4.830589999600953e-06, "loss": 0.4551, "step": 11700 }, { "epoch": 0.7125414852479981, "grad_norm": 1.0594435021628468, "learning_rate": 4.830561124234577e-06, "loss": 0.3991, "step": 11701 }, { "epoch": 0.7126023810248759, "grad_norm": 0.9950241610786015, "learning_rate": 4.830532246493874e-06, "loss": 0.4544, "step": 11702 }, { "epoch": 0.7126632768017538, "grad_norm": 1.0830620924359553, "learning_rate": 4.830503366378872e-06, "loss": 0.4797, "step": 11703 }, { "epoch": 0.7127241725786316, "grad_norm": 0.9939241888664512, "learning_rate": 4.830474483889603e-06, "loss": 0.4196, "step": 11704 }, { "epoch": 0.7127850683555096, "grad_norm": 0.9842793010368324, "learning_rate": 4.830445599026095e-06, "loss": 0.4012, "step": 11705 }, { "epoch": 0.7128459641323874, "grad_norm": 1.019120454437109, "learning_rate": 4.830416711788376e-06, "loss": 0.4739, "step": 11706 }, { "epoch": 0.7129068599092653, "grad_norm": 0.9353653860045301, "learning_rate": 4.830387822176478e-06, "loss": 0.4882, "step": 11707 }, { "epoch": 0.7129677556861431, "grad_norm": 0.9526812610520835, "learning_rate": 4.830358930190429e-06, "loss": 0.4577, "step": 11708 }, { "epoch": 0.7130286514630211, "grad_norm": 0.9465169081893752, "learning_rate": 4.830330035830259e-06, "loss": 0.497, "step": 11709 }, { "epoch": 0.7130895472398989, "grad_norm": 0.9732175381871365, "learning_rate": 4.830301139095997e-06, "loss": 0.4045, "step": 11710 }, { "epoch": 0.7131504430167768, "grad_norm": 0.9744428701047677, "learning_rate": 4.8302722399876725e-06, "loss": 0.3848, "step": 11711 }, { "epoch": 0.7132113387936546, "grad_norm": 1.0284269646156845, "learning_rate": 4.830243338505316e-06, "loss": 0.4663, "step": 11712 }, { "epoch": 0.7132722345705326, "grad_norm": 0.9896821197234854, "learning_rate": 4.8302144346489556e-06, "loss": 0.4913, "step": 11713 }, { "epoch": 0.7133331303474104, "grad_norm": 0.930627416918415, "learning_rate": 4.8301855284186206e-06, "loss": 0.4048, "step": 11714 }, { "epoch": 0.7133940261242883, "grad_norm": 0.9693421497627284, "learning_rate": 4.830156619814341e-06, "loss": 0.4572, "step": 11715 }, { "epoch": 0.7134549219011661, "grad_norm": 0.9927473917436499, "learning_rate": 4.830127708836147e-06, "loss": 0.3829, "step": 11716 }, { "epoch": 0.7135158176780441, "grad_norm": 1.1334838402406675, "learning_rate": 4.830098795484067e-06, "loss": 0.3614, "step": 11717 }, { "epoch": 0.7135767134549219, "grad_norm": 1.0305240446730524, "learning_rate": 4.830069879758131e-06, "loss": 0.4425, "step": 11718 }, { "epoch": 0.7136376092317998, "grad_norm": 1.0277970169676076, "learning_rate": 4.8300409616583675e-06, "loss": 0.4148, "step": 11719 }, { "epoch": 0.7136985050086776, "grad_norm": 0.950095572765355, "learning_rate": 4.830012041184808e-06, "loss": 0.4786, "step": 11720 }, { "epoch": 0.7137594007855556, "grad_norm": 1.0495868073349228, "learning_rate": 4.829983118337479e-06, "loss": 0.4211, "step": 11721 }, { "epoch": 0.7138202965624334, "grad_norm": 1.060132610991573, "learning_rate": 4.8299541931164125e-06, "loss": 0.4117, "step": 11722 }, { "epoch": 0.7138811923393112, "grad_norm": 0.9783561284863928, "learning_rate": 4.829925265521637e-06, "loss": 0.4629, "step": 11723 }, { "epoch": 0.7139420881161891, "grad_norm": 1.0690543673740276, "learning_rate": 4.829896335553183e-06, "loss": 0.3931, "step": 11724 }, { "epoch": 0.714002983893067, "grad_norm": 1.1472488785964106, "learning_rate": 4.829867403211078e-06, "loss": 0.3954, "step": 11725 }, { "epoch": 0.7140638796699449, "grad_norm": 1.0958146832648181, "learning_rate": 4.8298384684953534e-06, "loss": 0.4284, "step": 11726 }, { "epoch": 0.7141247754468227, "grad_norm": 0.9259942758446917, "learning_rate": 4.829809531406037e-06, "loss": 0.4665, "step": 11727 }, { "epoch": 0.7141856712237007, "grad_norm": 0.9641763685239386, "learning_rate": 4.82978059194316e-06, "loss": 0.4057, "step": 11728 }, { "epoch": 0.7142465670005785, "grad_norm": 0.9768519009718721, "learning_rate": 4.8297516501067495e-06, "loss": 0.4774, "step": 11729 }, { "epoch": 0.7143074627774564, "grad_norm": 0.998764332929245, "learning_rate": 4.829722705896838e-06, "loss": 0.4117, "step": 11730 }, { "epoch": 0.7143683585543342, "grad_norm": 1.0532673859810127, "learning_rate": 4.829693759313452e-06, "loss": 0.5365, "step": 11731 }, { "epoch": 0.7144292543312122, "grad_norm": 1.0360018966903741, "learning_rate": 4.8296648103566235e-06, "loss": 0.3922, "step": 11732 }, { "epoch": 0.71449015010809, "grad_norm": 0.8963947244573238, "learning_rate": 4.82963585902638e-06, "loss": 0.5312, "step": 11733 }, { "epoch": 0.7145510458849679, "grad_norm": 1.0258578027960725, "learning_rate": 4.829606905322753e-06, "loss": 0.4262, "step": 11734 }, { "epoch": 0.7146119416618457, "grad_norm": 1.0676994405179683, "learning_rate": 4.829577949245771e-06, "loss": 0.3794, "step": 11735 }, { "epoch": 0.7146728374387237, "grad_norm": 0.9603674888842298, "learning_rate": 4.8295489907954615e-06, "loss": 0.4599, "step": 11736 }, { "epoch": 0.7147337332156015, "grad_norm": 1.020917586219788, "learning_rate": 4.829520029971858e-06, "loss": 0.4417, "step": 11737 }, { "epoch": 0.7147946289924794, "grad_norm": 1.0297231020354902, "learning_rate": 4.829491066774987e-06, "loss": 0.4878, "step": 11738 }, { "epoch": 0.7148555247693572, "grad_norm": 0.9820015952413239, "learning_rate": 4.829462101204879e-06, "loss": 0.3889, "step": 11739 }, { "epoch": 0.7149164205462352, "grad_norm": 0.9553652232447319, "learning_rate": 4.8294331332615635e-06, "loss": 0.4274, "step": 11740 }, { "epoch": 0.714977316323113, "grad_norm": 1.0079160902171571, "learning_rate": 4.829404162945071e-06, "loss": 0.445, "step": 11741 }, { "epoch": 0.7150382120999909, "grad_norm": 0.9598702667906507, "learning_rate": 4.829375190255428e-06, "loss": 0.4037, "step": 11742 }, { "epoch": 0.7150991078768687, "grad_norm": 0.9143868338199806, "learning_rate": 4.829346215192667e-06, "loss": 0.5236, "step": 11743 }, { "epoch": 0.7151600036537467, "grad_norm": 0.9733407795177734, "learning_rate": 4.8293172377568166e-06, "loss": 0.4431, "step": 11744 }, { "epoch": 0.7152208994306245, "grad_norm": 0.9972160563110352, "learning_rate": 4.829288257947906e-06, "loss": 0.4348, "step": 11745 }, { "epoch": 0.7152817952075023, "grad_norm": 0.9587069826947798, "learning_rate": 4.829259275765965e-06, "loss": 0.5559, "step": 11746 }, { "epoch": 0.7153426909843802, "grad_norm": 0.9473968378386369, "learning_rate": 4.829230291211023e-06, "loss": 0.4137, "step": 11747 }, { "epoch": 0.7154035867612581, "grad_norm": 0.9860750340767941, "learning_rate": 4.82920130428311e-06, "loss": 0.3765, "step": 11748 }, { "epoch": 0.715464482538136, "grad_norm": 1.0037142497623661, "learning_rate": 4.8291723149822546e-06, "loss": 0.3768, "step": 11749 }, { "epoch": 0.7155253783150138, "grad_norm": 1.0081409492163733, "learning_rate": 4.8291433233084875e-06, "loss": 0.3881, "step": 11750 }, { "epoch": 0.7155862740918917, "grad_norm": 0.9887092442324034, "learning_rate": 4.829114329261837e-06, "loss": 0.4867, "step": 11751 }, { "epoch": 0.7156471698687696, "grad_norm": 1.02746419174354, "learning_rate": 4.829085332842333e-06, "loss": 0.3645, "step": 11752 }, { "epoch": 0.7157080656456475, "grad_norm": 0.9259031038368843, "learning_rate": 4.8290563340500066e-06, "loss": 0.4879, "step": 11753 }, { "epoch": 0.7157689614225253, "grad_norm": 0.9821681583728322, "learning_rate": 4.829027332884885e-06, "loss": 0.4961, "step": 11754 }, { "epoch": 0.7158298571994032, "grad_norm": 0.9715976117593521, "learning_rate": 4.828998329346999e-06, "loss": 0.4788, "step": 11755 }, { "epoch": 0.7158907529762811, "grad_norm": 1.1102758003988338, "learning_rate": 4.8289693234363775e-06, "loss": 0.3816, "step": 11756 }, { "epoch": 0.715951648753159, "grad_norm": 0.9956478201908447, "learning_rate": 4.8289403151530515e-06, "loss": 0.4338, "step": 11757 }, { "epoch": 0.7160125445300368, "grad_norm": 1.034753398310687, "learning_rate": 4.828911304497048e-06, "loss": 0.4077, "step": 11758 }, { "epoch": 0.7160734403069147, "grad_norm": 1.0765610158365777, "learning_rate": 4.8288822914684e-06, "loss": 0.4608, "step": 11759 }, { "epoch": 0.7161343360837926, "grad_norm": 0.9023519620506719, "learning_rate": 4.828853276067133e-06, "loss": 0.5212, "step": 11760 }, { "epoch": 0.7161952318606705, "grad_norm": 1.051486515161748, "learning_rate": 4.82882425829328e-06, "loss": 0.4369, "step": 11761 }, { "epoch": 0.7162561276375483, "grad_norm": 0.9729547416105265, "learning_rate": 4.82879523814687e-06, "loss": 0.4746, "step": 11762 }, { "epoch": 0.7163170234144262, "grad_norm": 0.9797967220647058, "learning_rate": 4.82876621562793e-06, "loss": 0.4473, "step": 11763 }, { "epoch": 0.7163779191913041, "grad_norm": 1.1236264803399807, "learning_rate": 4.828737190736493e-06, "loss": 0.4235, "step": 11764 }, { "epoch": 0.716438814968182, "grad_norm": 1.005219963346824, "learning_rate": 4.828708163472586e-06, "loss": 0.3922, "step": 11765 }, { "epoch": 0.7164997107450598, "grad_norm": 1.0736163549043922, "learning_rate": 4.82867913383624e-06, "loss": 0.406, "step": 11766 }, { "epoch": 0.7165606065219376, "grad_norm": 1.0276643154289558, "learning_rate": 4.828650101827484e-06, "loss": 0.3751, "step": 11767 }, { "epoch": 0.7166215022988156, "grad_norm": 0.9677439378424713, "learning_rate": 4.8286210674463475e-06, "loss": 0.403, "step": 11768 }, { "epoch": 0.7166823980756934, "grad_norm": 1.0471078602924335, "learning_rate": 4.82859203069286e-06, "loss": 0.4429, "step": 11769 }, { "epoch": 0.7167432938525713, "grad_norm": 0.9980610237891964, "learning_rate": 4.828562991567052e-06, "loss": 0.4482, "step": 11770 }, { "epoch": 0.7168041896294493, "grad_norm": 1.0003062331060173, "learning_rate": 4.828533950068952e-06, "loss": 0.427, "step": 11771 }, { "epoch": 0.7168650854063271, "grad_norm": 1.0068978603627419, "learning_rate": 4.82850490619859e-06, "loss": 0.4473, "step": 11772 }, { "epoch": 0.7169259811832049, "grad_norm": 0.9928299045554683, "learning_rate": 4.828475859955996e-06, "loss": 0.4782, "step": 11773 }, { "epoch": 0.7169868769600828, "grad_norm": 0.9014837018230694, "learning_rate": 4.828446811341199e-06, "loss": 0.3956, "step": 11774 }, { "epoch": 0.7170477727369607, "grad_norm": 1.1104095971015011, "learning_rate": 4.8284177603542295e-06, "loss": 0.4192, "step": 11775 }, { "epoch": 0.7171086685138386, "grad_norm": 0.9526365921680005, "learning_rate": 4.828388706995115e-06, "loss": 0.3956, "step": 11776 }, { "epoch": 0.7171695642907164, "grad_norm": 0.9634802685535866, "learning_rate": 4.828359651263888e-06, "loss": 0.4405, "step": 11777 }, { "epoch": 0.7172304600675943, "grad_norm": 0.915223023068366, "learning_rate": 4.828330593160575e-06, "loss": 0.5035, "step": 11778 }, { "epoch": 0.7172913558444722, "grad_norm": 1.0162303629209009, "learning_rate": 4.828301532685208e-06, "loss": 0.4231, "step": 11779 }, { "epoch": 0.7173522516213501, "grad_norm": 0.9453872680852735, "learning_rate": 4.8282724698378155e-06, "loss": 0.4372, "step": 11780 }, { "epoch": 0.7174131473982279, "grad_norm": 1.0222210059189947, "learning_rate": 4.828243404618428e-06, "loss": 0.428, "step": 11781 }, { "epoch": 0.7174740431751058, "grad_norm": 0.9420441301567557, "learning_rate": 4.828214337027074e-06, "loss": 0.4121, "step": 11782 }, { "epoch": 0.7175349389519837, "grad_norm": 0.9778107784245841, "learning_rate": 4.828185267063784e-06, "loss": 0.4416, "step": 11783 }, { "epoch": 0.7175958347288616, "grad_norm": 1.0200915438318312, "learning_rate": 4.828156194728587e-06, "loss": 0.4659, "step": 11784 }, { "epoch": 0.7176567305057394, "grad_norm": 1.0483481838381041, "learning_rate": 4.828127120021513e-06, "loss": 0.3789, "step": 11785 }, { "epoch": 0.7177176262826173, "grad_norm": 1.089808503230547, "learning_rate": 4.828098042942592e-06, "loss": 0.397, "step": 11786 }, { "epoch": 0.7177785220594952, "grad_norm": 0.9987970176753054, "learning_rate": 4.828068963491852e-06, "loss": 0.419, "step": 11787 }, { "epoch": 0.7178394178363731, "grad_norm": 0.9782190278751962, "learning_rate": 4.828039881669325e-06, "loss": 0.4062, "step": 11788 }, { "epoch": 0.7179003136132509, "grad_norm": 1.0343790783580715, "learning_rate": 4.828010797475038e-06, "loss": 0.3973, "step": 11789 }, { "epoch": 0.7179612093901288, "grad_norm": 1.047176493710062, "learning_rate": 4.827981710909023e-06, "loss": 0.4447, "step": 11790 }, { "epoch": 0.7180221051670067, "grad_norm": 1.0679042191676564, "learning_rate": 4.827952621971309e-06, "loss": 0.4592, "step": 11791 }, { "epoch": 0.7180830009438846, "grad_norm": 1.0040756035842613, "learning_rate": 4.8279235306619245e-06, "loss": 0.4308, "step": 11792 }, { "epoch": 0.7181438967207624, "grad_norm": 1.0286913905986093, "learning_rate": 4.8278944369809e-06, "loss": 0.3967, "step": 11793 }, { "epoch": 0.7182047924976402, "grad_norm": 1.0823636820854983, "learning_rate": 4.827865340928265e-06, "loss": 0.3401, "step": 11794 }, { "epoch": 0.7182656882745182, "grad_norm": 1.0156799435993655, "learning_rate": 4.8278362425040495e-06, "loss": 0.4367, "step": 11795 }, { "epoch": 0.718326584051396, "grad_norm": 0.9475407313971973, "learning_rate": 4.827807141708282e-06, "loss": 0.4235, "step": 11796 }, { "epoch": 0.7183874798282739, "grad_norm": 1.0529652456590535, "learning_rate": 4.827778038540993e-06, "loss": 0.3197, "step": 11797 }, { "epoch": 0.7184483756051517, "grad_norm": 1.0359141670185719, "learning_rate": 4.827748933002213e-06, "loss": 0.3974, "step": 11798 }, { "epoch": 0.7185092713820297, "grad_norm": 0.9444886464282132, "learning_rate": 4.827719825091971e-06, "loss": 0.4616, "step": 11799 }, { "epoch": 0.7185701671589075, "grad_norm": 1.0722858309169112, "learning_rate": 4.827690714810296e-06, "loss": 0.4406, "step": 11800 }, { "epoch": 0.7186310629357854, "grad_norm": 1.0369155356205877, "learning_rate": 4.827661602157218e-06, "loss": 0.449, "step": 11801 }, { "epoch": 0.7186919587126632, "grad_norm": 0.9705328155517297, "learning_rate": 4.8276324871327664e-06, "loss": 0.4431, "step": 11802 }, { "epoch": 0.7187528544895412, "grad_norm": 0.9470376441510959, "learning_rate": 4.827603369736972e-06, "loss": 0.4856, "step": 11803 }, { "epoch": 0.718813750266419, "grad_norm": 1.0259982806068388, "learning_rate": 4.827574249969863e-06, "loss": 0.383, "step": 11804 }, { "epoch": 0.7188746460432969, "grad_norm": 1.1476563493895104, "learning_rate": 4.82754512783147e-06, "loss": 0.3762, "step": 11805 }, { "epoch": 0.7189355418201747, "grad_norm": 1.0273197794602482, "learning_rate": 4.827516003321823e-06, "loss": 0.4168, "step": 11806 }, { "epoch": 0.7189964375970527, "grad_norm": 0.9587689887503626, "learning_rate": 4.82748687644095e-06, "loss": 0.3868, "step": 11807 }, { "epoch": 0.7190573333739305, "grad_norm": 0.9541120730042973, "learning_rate": 4.8274577471888825e-06, "loss": 0.4366, "step": 11808 }, { "epoch": 0.7191182291508084, "grad_norm": 1.0802514658248699, "learning_rate": 4.827428615565649e-06, "loss": 0.3977, "step": 11809 }, { "epoch": 0.7191791249276863, "grad_norm": 1.0292504926402102, "learning_rate": 4.8273994815712795e-06, "loss": 0.4022, "step": 11810 }, { "epoch": 0.7192400207045642, "grad_norm": 1.1188849811761565, "learning_rate": 4.827370345205804e-06, "loss": 0.3432, "step": 11811 }, { "epoch": 0.719300916481442, "grad_norm": 1.0382884428764458, "learning_rate": 4.827341206469253e-06, "loss": 0.409, "step": 11812 }, { "epoch": 0.7193618122583199, "grad_norm": 0.9443184375907074, "learning_rate": 4.827312065361654e-06, "loss": 0.5082, "step": 11813 }, { "epoch": 0.7194227080351978, "grad_norm": 1.0149599367312607, "learning_rate": 4.827282921883038e-06, "loss": 0.3806, "step": 11814 }, { "epoch": 0.7194836038120757, "grad_norm": 1.0468153501833901, "learning_rate": 4.8272537760334345e-06, "loss": 0.4358, "step": 11815 }, { "epoch": 0.7195444995889535, "grad_norm": 0.9726841643101438, "learning_rate": 4.827224627812873e-06, "loss": 0.3828, "step": 11816 }, { "epoch": 0.7196053953658313, "grad_norm": 0.9380914196830467, "learning_rate": 4.8271954772213845e-06, "loss": 0.4018, "step": 11817 }, { "epoch": 0.7196662911427093, "grad_norm": 0.9777844004070382, "learning_rate": 4.827166324258997e-06, "loss": 0.4602, "step": 11818 }, { "epoch": 0.7197271869195871, "grad_norm": 0.9795705023998904, "learning_rate": 4.827137168925741e-06, "loss": 0.4213, "step": 11819 }, { "epoch": 0.719788082696465, "grad_norm": 1.0325320485632017, "learning_rate": 4.827108011221645e-06, "loss": 0.4326, "step": 11820 }, { "epoch": 0.7198489784733428, "grad_norm": 1.0183587547037154, "learning_rate": 4.827078851146741e-06, "loss": 0.4924, "step": 11821 }, { "epoch": 0.7199098742502208, "grad_norm": 1.009695329061626, "learning_rate": 4.827049688701057e-06, "loss": 0.4716, "step": 11822 }, { "epoch": 0.7199707700270986, "grad_norm": 1.0378815938786188, "learning_rate": 4.827020523884623e-06, "loss": 0.4772, "step": 11823 }, { "epoch": 0.7200316658039765, "grad_norm": 0.9805963434811339, "learning_rate": 4.826991356697469e-06, "loss": 0.4817, "step": 11824 }, { "epoch": 0.7200925615808543, "grad_norm": 1.0380179118156336, "learning_rate": 4.826962187139625e-06, "loss": 0.3346, "step": 11825 }, { "epoch": 0.7201534573577323, "grad_norm": 0.9126604776266244, "learning_rate": 4.82693301521112e-06, "loss": 0.4566, "step": 11826 }, { "epoch": 0.7202143531346101, "grad_norm": 1.010577144430645, "learning_rate": 4.826903840911984e-06, "loss": 0.4508, "step": 11827 }, { "epoch": 0.720275248911488, "grad_norm": 0.994951963359578, "learning_rate": 4.826874664242246e-06, "loss": 0.3656, "step": 11828 }, { "epoch": 0.7203361446883658, "grad_norm": 1.0199330809497111, "learning_rate": 4.8268454852019376e-06, "loss": 0.4554, "step": 11829 }, { "epoch": 0.7203970404652438, "grad_norm": 0.925808131715066, "learning_rate": 4.826816303791087e-06, "loss": 0.4225, "step": 11830 }, { "epoch": 0.7204579362421216, "grad_norm": 0.9455795294956787, "learning_rate": 4.826787120009725e-06, "loss": 0.4869, "step": 11831 }, { "epoch": 0.7205188320189995, "grad_norm": 1.0722972792976844, "learning_rate": 4.82675793385788e-06, "loss": 0.4238, "step": 11832 }, { "epoch": 0.7205797277958773, "grad_norm": 1.051796553540098, "learning_rate": 4.826728745335581e-06, "loss": 0.3602, "step": 11833 }, { "epoch": 0.7206406235727553, "grad_norm": 1.1265752906334203, "learning_rate": 4.826699554442861e-06, "loss": 0.5233, "step": 11834 }, { "epoch": 0.7207015193496331, "grad_norm": 0.9494942823021382, "learning_rate": 4.8266703611797474e-06, "loss": 0.4359, "step": 11835 }, { "epoch": 0.720762415126511, "grad_norm": 1.009930760817381, "learning_rate": 4.82664116554627e-06, "loss": 0.4707, "step": 11836 }, { "epoch": 0.7208233109033888, "grad_norm": 1.0921767155833768, "learning_rate": 4.82661196754246e-06, "loss": 0.3843, "step": 11837 }, { "epoch": 0.7208842066802668, "grad_norm": 1.0106056147346578, "learning_rate": 4.826582767168345e-06, "loss": 0.4353, "step": 11838 }, { "epoch": 0.7209451024571446, "grad_norm": 1.103776872381024, "learning_rate": 4.826553564423956e-06, "loss": 0.3829, "step": 11839 }, { "epoch": 0.7210059982340224, "grad_norm": 1.0545782645965567, "learning_rate": 4.826524359309323e-06, "loss": 0.5038, "step": 11840 }, { "epoch": 0.7210668940109003, "grad_norm": 0.9156878937987778, "learning_rate": 4.826495151824475e-06, "loss": 0.3874, "step": 11841 }, { "epoch": 0.7211277897877783, "grad_norm": 0.9256594597169081, "learning_rate": 4.826465941969442e-06, "loss": 0.4783, "step": 11842 }, { "epoch": 0.7211886855646561, "grad_norm": 1.0731106568940747, "learning_rate": 4.826436729744255e-06, "loss": 0.3745, "step": 11843 }, { "epoch": 0.7212495813415339, "grad_norm": 1.0897770595422245, "learning_rate": 4.826407515148942e-06, "loss": 0.3952, "step": 11844 }, { "epoch": 0.7213104771184118, "grad_norm": 1.019281830059066, "learning_rate": 4.8263782981835325e-06, "loss": 0.4536, "step": 11845 }, { "epoch": 0.7213713728952897, "grad_norm": 0.9842570026791093, "learning_rate": 4.826349078848058e-06, "loss": 0.4601, "step": 11846 }, { "epoch": 0.7214322686721676, "grad_norm": 1.008126906298017, "learning_rate": 4.826319857142547e-06, "loss": 0.4429, "step": 11847 }, { "epoch": 0.7214931644490454, "grad_norm": 0.9818334416230893, "learning_rate": 4.82629063306703e-06, "loss": 0.4007, "step": 11848 }, { "epoch": 0.7215540602259233, "grad_norm": 0.9301085934379173, "learning_rate": 4.8262614066215365e-06, "loss": 0.471, "step": 11849 }, { "epoch": 0.7216149560028012, "grad_norm": 0.91349594098339, "learning_rate": 4.8262321778060965e-06, "loss": 0.4245, "step": 11850 }, { "epoch": 0.7216758517796791, "grad_norm": 0.9412203111455544, "learning_rate": 4.8262029466207396e-06, "loss": 0.4276, "step": 11851 }, { "epoch": 0.7217367475565569, "grad_norm": 0.9603506981765906, "learning_rate": 4.8261737130654954e-06, "loss": 0.4705, "step": 11852 }, { "epoch": 0.7217976433334349, "grad_norm": 0.9981577274424864, "learning_rate": 4.826144477140393e-06, "loss": 0.424, "step": 11853 }, { "epoch": 0.7218585391103127, "grad_norm": 1.0204844246522182, "learning_rate": 4.826115238845463e-06, "loss": 0.3385, "step": 11854 }, { "epoch": 0.7219194348871906, "grad_norm": 0.9775623358669062, "learning_rate": 4.826085998180736e-06, "loss": 0.4112, "step": 11855 }, { "epoch": 0.7219803306640684, "grad_norm": 1.0614103856252375, "learning_rate": 4.8260567551462404e-06, "loss": 0.454, "step": 11856 }, { "epoch": 0.7220412264409464, "grad_norm": 1.00395074077946, "learning_rate": 4.8260275097420066e-06, "loss": 0.4309, "step": 11857 }, { "epoch": 0.7221021222178242, "grad_norm": 1.0986175154507325, "learning_rate": 4.825998261968064e-06, "loss": 0.4926, "step": 11858 }, { "epoch": 0.7221630179947021, "grad_norm": 0.9899064407347183, "learning_rate": 4.825969011824444e-06, "loss": 0.4051, "step": 11859 }, { "epoch": 0.7222239137715799, "grad_norm": 0.9951349248435766, "learning_rate": 4.825939759311174e-06, "loss": 0.4271, "step": 11860 }, { "epoch": 0.7222848095484579, "grad_norm": 0.9606713202151921, "learning_rate": 4.825910504428285e-06, "loss": 0.5056, "step": 11861 }, { "epoch": 0.7223457053253357, "grad_norm": 1.0622777036227589, "learning_rate": 4.825881247175807e-06, "loss": 0.3193, "step": 11862 }, { "epoch": 0.7224066011022136, "grad_norm": 0.9177672403022674, "learning_rate": 4.825851987553769e-06, "loss": 0.4381, "step": 11863 }, { "epoch": 0.7224674968790914, "grad_norm": 1.1991537386910225, "learning_rate": 4.825822725562203e-06, "loss": 0.3448, "step": 11864 }, { "epoch": 0.7225283926559694, "grad_norm": 0.9947887728377144, "learning_rate": 4.825793461201136e-06, "loss": 0.463, "step": 11865 }, { "epoch": 0.7225892884328472, "grad_norm": 0.9885236848812046, "learning_rate": 4.825764194470599e-06, "loss": 0.4433, "step": 11866 }, { "epoch": 0.722650184209725, "grad_norm": 0.9392812632686315, "learning_rate": 4.825734925370621e-06, "loss": 0.4648, "step": 11867 }, { "epoch": 0.7227110799866029, "grad_norm": 1.1333399975671017, "learning_rate": 4.825705653901234e-06, "loss": 0.3658, "step": 11868 }, { "epoch": 0.7227719757634808, "grad_norm": 1.0405899225181496, "learning_rate": 4.825676380062465e-06, "loss": 0.3977, "step": 11869 }, { "epoch": 0.7228328715403587, "grad_norm": 0.9614422504836262, "learning_rate": 4.8256471038543465e-06, "loss": 0.4976, "step": 11870 }, { "epoch": 0.7228937673172365, "grad_norm": 1.00370068945994, "learning_rate": 4.825617825276907e-06, "loss": 0.3464, "step": 11871 }, { "epoch": 0.7229546630941144, "grad_norm": 1.1612430667318658, "learning_rate": 4.825588544330175e-06, "loss": 0.4223, "step": 11872 }, { "epoch": 0.7230155588709923, "grad_norm": 1.0067501755317505, "learning_rate": 4.825559261014183e-06, "loss": 0.4152, "step": 11873 }, { "epoch": 0.7230764546478702, "grad_norm": 0.9703471407517762, "learning_rate": 4.8255299753289594e-06, "loss": 0.4342, "step": 11874 }, { "epoch": 0.723137350424748, "grad_norm": 0.998158739434957, "learning_rate": 4.825500687274535e-06, "loss": 0.44, "step": 11875 }, { "epoch": 0.7231982462016259, "grad_norm": 1.0760577043580213, "learning_rate": 4.825471396850938e-06, "loss": 0.4215, "step": 11876 }, { "epoch": 0.7232591419785038, "grad_norm": 1.1023016701483817, "learning_rate": 4.825442104058199e-06, "loss": 0.3955, "step": 11877 }, { "epoch": 0.7233200377553817, "grad_norm": 1.083910742321289, "learning_rate": 4.825412808896348e-06, "loss": 0.481, "step": 11878 }, { "epoch": 0.7233809335322595, "grad_norm": 1.0545826567462377, "learning_rate": 4.825383511365415e-06, "loss": 0.3828, "step": 11879 }, { "epoch": 0.7234418293091374, "grad_norm": 1.1247152232109814, "learning_rate": 4.82535421146543e-06, "loss": 0.3415, "step": 11880 }, { "epoch": 0.7235027250860153, "grad_norm": 1.0305143936319492, "learning_rate": 4.825324909196422e-06, "loss": 0.46, "step": 11881 }, { "epoch": 0.7235636208628932, "grad_norm": 1.0858280026564187, "learning_rate": 4.825295604558421e-06, "loss": 0.3747, "step": 11882 }, { "epoch": 0.723624516639771, "grad_norm": 1.052305178267875, "learning_rate": 4.825266297551457e-06, "loss": 0.3753, "step": 11883 }, { "epoch": 0.7236854124166489, "grad_norm": 0.9866489346669776, "learning_rate": 4.82523698817556e-06, "loss": 0.4081, "step": 11884 }, { "epoch": 0.7237463081935268, "grad_norm": 1.1088641403291803, "learning_rate": 4.825207676430761e-06, "loss": 0.3247, "step": 11885 }, { "epoch": 0.7238072039704047, "grad_norm": 0.9453208918184867, "learning_rate": 4.825178362317088e-06, "loss": 0.4289, "step": 11886 }, { "epoch": 0.7238680997472825, "grad_norm": 1.0198068242955614, "learning_rate": 4.825149045834572e-06, "loss": 0.3737, "step": 11887 }, { "epoch": 0.7239289955241603, "grad_norm": 0.944446975549897, "learning_rate": 4.825119726983241e-06, "loss": 0.4653, "step": 11888 }, { "epoch": 0.7239898913010383, "grad_norm": 1.0236773401555772, "learning_rate": 4.825090405763129e-06, "loss": 0.3918, "step": 11889 }, { "epoch": 0.7240507870779161, "grad_norm": 1.150364636860069, "learning_rate": 4.825061082174262e-06, "loss": 0.4935, "step": 11890 }, { "epoch": 0.724111682854794, "grad_norm": 0.9238322468652062, "learning_rate": 4.82503175621667e-06, "loss": 0.3851, "step": 11891 }, { "epoch": 0.724172578631672, "grad_norm": 0.9601477109249512, "learning_rate": 4.825002427890385e-06, "loss": 0.4304, "step": 11892 }, { "epoch": 0.7242334744085498, "grad_norm": 0.9312143563367804, "learning_rate": 4.8249730971954364e-06, "loss": 0.4653, "step": 11893 }, { "epoch": 0.7242943701854276, "grad_norm": 0.9819347858212228, "learning_rate": 4.824943764131853e-06, "loss": 0.3819, "step": 11894 }, { "epoch": 0.7243552659623055, "grad_norm": 1.0027703128743302, "learning_rate": 4.824914428699665e-06, "loss": 0.4487, "step": 11895 }, { "epoch": 0.7244161617391834, "grad_norm": 1.0349207442489745, "learning_rate": 4.824885090898903e-06, "loss": 0.427, "step": 11896 }, { "epoch": 0.7244770575160613, "grad_norm": 0.9433122798659715, "learning_rate": 4.824855750729596e-06, "loss": 0.4429, "step": 11897 }, { "epoch": 0.7245379532929391, "grad_norm": 1.0665253522783584, "learning_rate": 4.824826408191774e-06, "loss": 0.4292, "step": 11898 }, { "epoch": 0.724598849069817, "grad_norm": 0.9493631256839333, "learning_rate": 4.824797063285468e-06, "loss": 0.4325, "step": 11899 }, { "epoch": 0.7246597448466949, "grad_norm": 0.9834817920108143, "learning_rate": 4.824767716010707e-06, "loss": 0.4304, "step": 11900 }, { "epoch": 0.7247206406235728, "grad_norm": 1.0421497789114849, "learning_rate": 4.82473836636752e-06, "loss": 0.3499, "step": 11901 }, { "epoch": 0.7247815364004506, "grad_norm": 1.0526421956427343, "learning_rate": 4.824709014355939e-06, "loss": 0.4162, "step": 11902 }, { "epoch": 0.7248424321773285, "grad_norm": 1.073062565843298, "learning_rate": 4.824679659975992e-06, "loss": 0.4296, "step": 11903 }, { "epoch": 0.7249033279542064, "grad_norm": 1.0988277211205375, "learning_rate": 4.82465030322771e-06, "loss": 0.3618, "step": 11904 }, { "epoch": 0.7249642237310843, "grad_norm": 0.9998073310613875, "learning_rate": 4.824620944111122e-06, "loss": 0.4281, "step": 11905 }, { "epoch": 0.7250251195079621, "grad_norm": 0.9751645034477902, "learning_rate": 4.82459158262626e-06, "loss": 0.4099, "step": 11906 }, { "epoch": 0.72508601528484, "grad_norm": 0.9445381429660272, "learning_rate": 4.824562218773152e-06, "loss": 0.4979, "step": 11907 }, { "epoch": 0.7251469110617179, "grad_norm": 1.0350889214146917, "learning_rate": 4.824532852551828e-06, "loss": 0.4028, "step": 11908 }, { "epoch": 0.7252078068385958, "grad_norm": 1.074229397149009, "learning_rate": 4.824503483962318e-06, "loss": 0.435, "step": 11909 }, { "epoch": 0.7252687026154736, "grad_norm": 0.9848645925568652, "learning_rate": 4.8244741130046525e-06, "loss": 0.4803, "step": 11910 }, { "epoch": 0.7253295983923514, "grad_norm": 1.0026836519630018, "learning_rate": 4.824444739678861e-06, "loss": 0.4012, "step": 11911 }, { "epoch": 0.7253904941692294, "grad_norm": 1.0119818455119052, "learning_rate": 4.824415363984973e-06, "loss": 0.341, "step": 11912 }, { "epoch": 0.7254513899461073, "grad_norm": 0.9184923704292113, "learning_rate": 4.824385985923019e-06, "loss": 0.4691, "step": 11913 }, { "epoch": 0.7255122857229851, "grad_norm": 1.0429043425514954, "learning_rate": 4.82435660549303e-06, "loss": 0.4112, "step": 11914 }, { "epoch": 0.7255731814998629, "grad_norm": 0.9808493953779334, "learning_rate": 4.824327222695034e-06, "loss": 0.5003, "step": 11915 }, { "epoch": 0.7256340772767409, "grad_norm": 0.9917307945685514, "learning_rate": 4.824297837529062e-06, "loss": 0.4416, "step": 11916 }, { "epoch": 0.7256949730536187, "grad_norm": 0.9640565695389408, "learning_rate": 4.824268449995143e-06, "loss": 0.4247, "step": 11917 }, { "epoch": 0.7257558688304966, "grad_norm": 1.0384473200197182, "learning_rate": 4.8242390600933084e-06, "loss": 0.3706, "step": 11918 }, { "epoch": 0.7258167646073744, "grad_norm": 0.9954518932106681, "learning_rate": 4.824209667823587e-06, "loss": 0.4291, "step": 11919 }, { "epoch": 0.7258776603842524, "grad_norm": 1.0630226321895193, "learning_rate": 4.8241802731860095e-06, "loss": 0.3892, "step": 11920 }, { "epoch": 0.7259385561611302, "grad_norm": 0.9541653833376803, "learning_rate": 4.824150876180606e-06, "loss": 0.4342, "step": 11921 }, { "epoch": 0.7259994519380081, "grad_norm": 1.0696871065082691, "learning_rate": 4.824121476807405e-06, "loss": 0.3995, "step": 11922 }, { "epoch": 0.7260603477148859, "grad_norm": 0.9914628533216577, "learning_rate": 4.8240920750664365e-06, "loss": 0.4182, "step": 11923 }, { "epoch": 0.7261212434917639, "grad_norm": 1.0436624199098192, "learning_rate": 4.824062670957733e-06, "loss": 0.4271, "step": 11924 }, { "epoch": 0.7261821392686417, "grad_norm": 0.989060135922268, "learning_rate": 4.824033264481322e-06, "loss": 0.3857, "step": 11925 }, { "epoch": 0.7262430350455196, "grad_norm": 0.9461311226298055, "learning_rate": 4.824003855637234e-06, "loss": 0.4068, "step": 11926 }, { "epoch": 0.7263039308223974, "grad_norm": 0.9503453691206647, "learning_rate": 4.8239744444255e-06, "loss": 0.4833, "step": 11927 }, { "epoch": 0.7263648265992754, "grad_norm": 0.9737602270361837, "learning_rate": 4.823945030846149e-06, "loss": 0.4411, "step": 11928 }, { "epoch": 0.7264257223761532, "grad_norm": 1.005842968738042, "learning_rate": 4.823915614899211e-06, "loss": 0.4171, "step": 11929 }, { "epoch": 0.7264866181530311, "grad_norm": 1.0742941212471138, "learning_rate": 4.823886196584715e-06, "loss": 0.4636, "step": 11930 }, { "epoch": 0.7265475139299089, "grad_norm": 1.0724880048379293, "learning_rate": 4.823856775902693e-06, "loss": 0.3745, "step": 11931 }, { "epoch": 0.7266084097067869, "grad_norm": 1.0071401487712863, "learning_rate": 4.823827352853174e-06, "loss": 0.3601, "step": 11932 }, { "epoch": 0.7266693054836647, "grad_norm": 1.0142497766530318, "learning_rate": 4.823797927436188e-06, "loss": 0.4138, "step": 11933 }, { "epoch": 0.7267302012605426, "grad_norm": 0.9789725353905481, "learning_rate": 4.823768499651764e-06, "loss": 0.4565, "step": 11934 }, { "epoch": 0.7267910970374205, "grad_norm": 0.9187324759479093, "learning_rate": 4.823739069499934e-06, "loss": 0.449, "step": 11935 }, { "epoch": 0.7268519928142984, "grad_norm": 1.0510660911719867, "learning_rate": 4.823709636980728e-06, "loss": 0.3942, "step": 11936 }, { "epoch": 0.7269128885911762, "grad_norm": 1.0552611919986514, "learning_rate": 4.823680202094173e-06, "loss": 0.3879, "step": 11937 }, { "epoch": 0.726973784368054, "grad_norm": 1.0060130717633562, "learning_rate": 4.823650764840302e-06, "loss": 0.482, "step": 11938 }, { "epoch": 0.727034680144932, "grad_norm": 0.9012602974187273, "learning_rate": 4.8236213252191435e-06, "loss": 0.4847, "step": 11939 }, { "epoch": 0.7270955759218098, "grad_norm": 0.9646788637139624, "learning_rate": 4.8235918832307286e-06, "loss": 0.4187, "step": 11940 }, { "epoch": 0.7271564716986877, "grad_norm": 1.0375176486445323, "learning_rate": 4.823562438875086e-06, "loss": 0.3851, "step": 11941 }, { "epoch": 0.7272173674755655, "grad_norm": 0.9909226255420537, "learning_rate": 4.8235329921522464e-06, "loss": 0.4022, "step": 11942 }, { "epoch": 0.7272782632524435, "grad_norm": 1.026020678743728, "learning_rate": 4.823503543062239e-06, "loss": 0.4515, "step": 11943 }, { "epoch": 0.7273391590293213, "grad_norm": 0.9732449011727738, "learning_rate": 4.8234740916050956e-06, "loss": 0.4032, "step": 11944 }, { "epoch": 0.7274000548061992, "grad_norm": 0.968616563294295, "learning_rate": 4.823444637780844e-06, "loss": 0.4382, "step": 11945 }, { "epoch": 0.727460950583077, "grad_norm": 0.9676827188091601, "learning_rate": 4.8234151815895165e-06, "loss": 0.4432, "step": 11946 }, { "epoch": 0.727521846359955, "grad_norm": 0.9854025062083545, "learning_rate": 4.8233857230311406e-06, "loss": 0.4339, "step": 11947 }, { "epoch": 0.7275827421368328, "grad_norm": 0.9931304460397146, "learning_rate": 4.823356262105749e-06, "loss": 0.4582, "step": 11948 }, { "epoch": 0.7276436379137107, "grad_norm": 1.1222843836320797, "learning_rate": 4.823326798813369e-06, "loss": 0.3852, "step": 11949 }, { "epoch": 0.7277045336905885, "grad_norm": 0.9175910678512722, "learning_rate": 4.823297333154033e-06, "loss": 0.4205, "step": 11950 }, { "epoch": 0.7277654294674665, "grad_norm": 0.9282645972121089, "learning_rate": 4.82326786512777e-06, "loss": 0.4658, "step": 11951 }, { "epoch": 0.7278263252443443, "grad_norm": 0.8936287127351322, "learning_rate": 4.82323839473461e-06, "loss": 0.4615, "step": 11952 }, { "epoch": 0.7278872210212222, "grad_norm": 1.1130583047972558, "learning_rate": 4.823208921974583e-06, "loss": 0.3525, "step": 11953 }, { "epoch": 0.7279481167981, "grad_norm": 1.0309079589521295, "learning_rate": 4.823179446847717e-06, "loss": 0.4135, "step": 11954 }, { "epoch": 0.728009012574978, "grad_norm": 1.0981963803122798, "learning_rate": 4.823149969354047e-06, "loss": 0.3988, "step": 11955 }, { "epoch": 0.7280699083518558, "grad_norm": 0.9487122389244117, "learning_rate": 4.823120489493599e-06, "loss": 0.4435, "step": 11956 }, { "epoch": 0.7281308041287337, "grad_norm": 1.0026302640073124, "learning_rate": 4.823091007266404e-06, "loss": 0.396, "step": 11957 }, { "epoch": 0.7281916999056115, "grad_norm": 0.9312921206765016, "learning_rate": 4.823061522672492e-06, "loss": 0.4049, "step": 11958 }, { "epoch": 0.7282525956824895, "grad_norm": 0.935054709576301, "learning_rate": 4.823032035711893e-06, "loss": 0.4792, "step": 11959 }, { "epoch": 0.7283134914593673, "grad_norm": 1.0072983951117631, "learning_rate": 4.8230025463846385e-06, "loss": 0.4074, "step": 11960 }, { "epoch": 0.7283743872362451, "grad_norm": 0.982896362434727, "learning_rate": 4.822973054690756e-06, "loss": 0.4607, "step": 11961 }, { "epoch": 0.728435283013123, "grad_norm": 1.0160681795746687, "learning_rate": 4.822943560630278e-06, "loss": 0.4187, "step": 11962 }, { "epoch": 0.728496178790001, "grad_norm": 1.097525498223729, "learning_rate": 4.822914064203232e-06, "loss": 0.3708, "step": 11963 }, { "epoch": 0.7285570745668788, "grad_norm": 1.0129821605220293, "learning_rate": 4.82288456540965e-06, "loss": 0.4265, "step": 11964 }, { "epoch": 0.7286179703437566, "grad_norm": 0.9974082068158995, "learning_rate": 4.822855064249562e-06, "loss": 0.4328, "step": 11965 }, { "epoch": 0.7286788661206345, "grad_norm": 0.9801915534291052, "learning_rate": 4.822825560722998e-06, "loss": 0.5339, "step": 11966 }, { "epoch": 0.7287397618975124, "grad_norm": 1.0148540650170996, "learning_rate": 4.8227960548299865e-06, "loss": 0.4492, "step": 11967 }, { "epoch": 0.7288006576743903, "grad_norm": 1.0348933178571533, "learning_rate": 4.822766546570559e-06, "loss": 0.4676, "step": 11968 }, { "epoch": 0.7288615534512681, "grad_norm": 1.0194861591938913, "learning_rate": 4.822737035944746e-06, "loss": 0.4145, "step": 11969 }, { "epoch": 0.728922449228146, "grad_norm": 0.9815789066319779, "learning_rate": 4.822707522952575e-06, "loss": 0.4293, "step": 11970 }, { "epoch": 0.7289833450050239, "grad_norm": 1.0766636690766507, "learning_rate": 4.822678007594079e-06, "loss": 0.4151, "step": 11971 }, { "epoch": 0.7290442407819018, "grad_norm": 1.0432004902493195, "learning_rate": 4.822648489869287e-06, "loss": 0.4003, "step": 11972 }, { "epoch": 0.7291051365587796, "grad_norm": 1.0115945079854143, "learning_rate": 4.822618969778229e-06, "loss": 0.413, "step": 11973 }, { "epoch": 0.7291660323356576, "grad_norm": 0.9884360539863319, "learning_rate": 4.8225894473209354e-06, "loss": 0.4457, "step": 11974 }, { "epoch": 0.7292269281125354, "grad_norm": 0.9553745184990143, "learning_rate": 4.822559922497435e-06, "loss": 0.4285, "step": 11975 }, { "epoch": 0.7292878238894133, "grad_norm": 0.9633817981255595, "learning_rate": 4.8225303953077595e-06, "loss": 0.5685, "step": 11976 }, { "epoch": 0.7293487196662911, "grad_norm": 0.9630985516604308, "learning_rate": 4.822500865751938e-06, "loss": 0.4158, "step": 11977 }, { "epoch": 0.7294096154431691, "grad_norm": 1.0153858678929804, "learning_rate": 4.822471333830001e-06, "loss": 0.4111, "step": 11978 }, { "epoch": 0.7294705112200469, "grad_norm": 1.1248482151679597, "learning_rate": 4.822441799541979e-06, "loss": 0.4529, "step": 11979 }, { "epoch": 0.7295314069969248, "grad_norm": 0.9485384284495083, "learning_rate": 4.822412262887901e-06, "loss": 0.4515, "step": 11980 }, { "epoch": 0.7295923027738026, "grad_norm": 0.9683339849001861, "learning_rate": 4.822382723867798e-06, "loss": 0.4199, "step": 11981 }, { "epoch": 0.7296531985506806, "grad_norm": 0.9821148542644204, "learning_rate": 4.8223531824816996e-06, "loss": 0.4669, "step": 11982 }, { "epoch": 0.7297140943275584, "grad_norm": 0.9264973361187309, "learning_rate": 4.822323638729636e-06, "loss": 0.4506, "step": 11983 }, { "epoch": 0.7297749901044362, "grad_norm": 1.019484374981726, "learning_rate": 4.822294092611637e-06, "loss": 0.3865, "step": 11984 }, { "epoch": 0.7298358858813141, "grad_norm": 1.0246700758504652, "learning_rate": 4.822264544127734e-06, "loss": 0.3627, "step": 11985 }, { "epoch": 0.729896781658192, "grad_norm": 1.093574696209523, "learning_rate": 4.822234993277956e-06, "loss": 0.3975, "step": 11986 }, { "epoch": 0.7299576774350699, "grad_norm": 1.0116855852215985, "learning_rate": 4.822205440062333e-06, "loss": 0.3921, "step": 11987 }, { "epoch": 0.7300185732119477, "grad_norm": 0.9572246479298739, "learning_rate": 4.822175884480895e-06, "loss": 0.4622, "step": 11988 }, { "epoch": 0.7300794689888256, "grad_norm": 1.0923868516473958, "learning_rate": 4.822146326533673e-06, "loss": 0.4473, "step": 11989 }, { "epoch": 0.7301403647657035, "grad_norm": 1.064489113310927, "learning_rate": 4.822116766220696e-06, "loss": 0.3827, "step": 11990 }, { "epoch": 0.7302012605425814, "grad_norm": 0.9500843171760008, "learning_rate": 4.8220872035419954e-06, "loss": 0.402, "step": 11991 }, { "epoch": 0.7302621563194592, "grad_norm": 0.9701662034465359, "learning_rate": 4.8220576384976e-06, "loss": 0.4116, "step": 11992 }, { "epoch": 0.7303230520963371, "grad_norm": 0.9621032843047648, "learning_rate": 4.822028071087541e-06, "loss": 0.4078, "step": 11993 }, { "epoch": 0.730383947873215, "grad_norm": 0.9596326671099255, "learning_rate": 4.821998501311847e-06, "loss": 0.4291, "step": 11994 }, { "epoch": 0.7304448436500929, "grad_norm": 1.0958533781948059, "learning_rate": 4.821968929170551e-06, "loss": 0.4695, "step": 11995 }, { "epoch": 0.7305057394269707, "grad_norm": 0.9619648872608005, "learning_rate": 4.8219393546636806e-06, "loss": 0.5191, "step": 11996 }, { "epoch": 0.7305666352038486, "grad_norm": 1.016253130647302, "learning_rate": 4.8219097777912666e-06, "loss": 0.4461, "step": 11997 }, { "epoch": 0.7306275309807265, "grad_norm": 0.8793823172575211, "learning_rate": 4.821880198553339e-06, "loss": 0.4631, "step": 11998 }, { "epoch": 0.7306884267576044, "grad_norm": 1.0150451378206824, "learning_rate": 4.821850616949929e-06, "loss": 0.4233, "step": 11999 }, { "epoch": 0.7307493225344822, "grad_norm": 1.0055072758677053, "learning_rate": 4.821821032981064e-06, "loss": 0.4385, "step": 12000 }, { "epoch": 0.7308102183113601, "grad_norm": 1.0046486779332475, "learning_rate": 4.821791446646777e-06, "loss": 0.4203, "step": 12001 }, { "epoch": 0.730871114088238, "grad_norm": 0.9685275863948677, "learning_rate": 4.821761857947098e-06, "loss": 0.465, "step": 12002 }, { "epoch": 0.7309320098651159, "grad_norm": 0.9702474518947402, "learning_rate": 4.821732266882055e-06, "loss": 0.3983, "step": 12003 }, { "epoch": 0.7309929056419937, "grad_norm": 0.9409758255428782, "learning_rate": 4.82170267345168e-06, "loss": 0.4822, "step": 12004 }, { "epoch": 0.7310538014188716, "grad_norm": 1.0293229627173834, "learning_rate": 4.821673077656003e-06, "loss": 0.4199, "step": 12005 }, { "epoch": 0.7311146971957495, "grad_norm": 0.9722145664566345, "learning_rate": 4.821643479495053e-06, "loss": 0.3871, "step": 12006 }, { "epoch": 0.7311755929726274, "grad_norm": 1.055563317280209, "learning_rate": 4.821613878968862e-06, "loss": 0.4155, "step": 12007 }, { "epoch": 0.7312364887495052, "grad_norm": 1.0260875418937454, "learning_rate": 4.821584276077458e-06, "loss": 0.5142, "step": 12008 }, { "epoch": 0.731297384526383, "grad_norm": 0.9891806748597383, "learning_rate": 4.821554670820873e-06, "loss": 0.4184, "step": 12009 }, { "epoch": 0.731358280303261, "grad_norm": 1.1756900065770384, "learning_rate": 4.821525063199135e-06, "loss": 0.4231, "step": 12010 }, { "epoch": 0.7314191760801388, "grad_norm": 0.9936031273102771, "learning_rate": 4.821495453212277e-06, "loss": 0.4711, "step": 12011 }, { "epoch": 0.7314800718570167, "grad_norm": 1.0417195854484191, "learning_rate": 4.821465840860326e-06, "loss": 0.3736, "step": 12012 }, { "epoch": 0.7315409676338945, "grad_norm": 0.9937433000389105, "learning_rate": 4.8214362261433155e-06, "loss": 0.4189, "step": 12013 }, { "epoch": 0.7316018634107725, "grad_norm": 0.8798868585796044, "learning_rate": 4.821406609061273e-06, "loss": 0.521, "step": 12014 }, { "epoch": 0.7316627591876503, "grad_norm": 1.041360954791574, "learning_rate": 4.8213769896142306e-06, "loss": 0.4172, "step": 12015 }, { "epoch": 0.7317236549645282, "grad_norm": 1.0387193879671772, "learning_rate": 4.821347367802218e-06, "loss": 0.4071, "step": 12016 }, { "epoch": 0.7317845507414061, "grad_norm": 0.9622071243076626, "learning_rate": 4.821317743625263e-06, "loss": 0.413, "step": 12017 }, { "epoch": 0.731845446518284, "grad_norm": 1.0081732010315931, "learning_rate": 4.821288117083399e-06, "loss": 0.404, "step": 12018 }, { "epoch": 0.7319063422951618, "grad_norm": 1.1028126700277774, "learning_rate": 4.821258488176656e-06, "loss": 0.444, "step": 12019 }, { "epoch": 0.7319672380720397, "grad_norm": 1.0145653595661983, "learning_rate": 4.821228856905062e-06, "loss": 0.4628, "step": 12020 }, { "epoch": 0.7320281338489176, "grad_norm": 1.0638290079133759, "learning_rate": 4.8211992232686474e-06, "loss": 0.3611, "step": 12021 }, { "epoch": 0.7320890296257955, "grad_norm": 0.9343452660165699, "learning_rate": 4.821169587267444e-06, "loss": 0.4227, "step": 12022 }, { "epoch": 0.7321499254026733, "grad_norm": 0.9511693006606449, "learning_rate": 4.821139948901482e-06, "loss": 0.3873, "step": 12023 }, { "epoch": 0.7322108211795512, "grad_norm": 1.024545470857719, "learning_rate": 4.82111030817079e-06, "loss": 0.375, "step": 12024 }, { "epoch": 0.7322717169564291, "grad_norm": 0.928284304154627, "learning_rate": 4.8210806650753994e-06, "loss": 0.4775, "step": 12025 }, { "epoch": 0.732332612733307, "grad_norm": 1.0099875238593246, "learning_rate": 4.8210510196153396e-06, "loss": 0.4347, "step": 12026 }, { "epoch": 0.7323935085101848, "grad_norm": 1.055663032861314, "learning_rate": 4.8210213717906415e-06, "loss": 0.472, "step": 12027 }, { "epoch": 0.7324544042870627, "grad_norm": 0.9745184141597961, "learning_rate": 4.820991721601336e-06, "loss": 0.5176, "step": 12028 }, { "epoch": 0.7325153000639406, "grad_norm": 0.9900273598808249, "learning_rate": 4.820962069047451e-06, "loss": 0.4278, "step": 12029 }, { "epoch": 0.7325761958408185, "grad_norm": 1.0452341717788247, "learning_rate": 4.820932414129019e-06, "loss": 0.4477, "step": 12030 }, { "epoch": 0.7326370916176963, "grad_norm": 1.05545735286896, "learning_rate": 4.82090275684607e-06, "loss": 0.4586, "step": 12031 }, { "epoch": 0.7326979873945741, "grad_norm": 1.017540196050174, "learning_rate": 4.820873097198632e-06, "loss": 0.4081, "step": 12032 }, { "epoch": 0.7327588831714521, "grad_norm": 1.0389182699629265, "learning_rate": 4.8208434351867375e-06, "loss": 0.3658, "step": 12033 }, { "epoch": 0.73281977894833, "grad_norm": 1.0032081580163243, "learning_rate": 4.820813770810416e-06, "loss": 0.4226, "step": 12034 }, { "epoch": 0.7328806747252078, "grad_norm": 1.0630420721782685, "learning_rate": 4.820784104069698e-06, "loss": 0.4032, "step": 12035 }, { "epoch": 0.7329415705020856, "grad_norm": 0.9701133304425897, "learning_rate": 4.8207544349646115e-06, "loss": 0.432, "step": 12036 }, { "epoch": 0.7330024662789636, "grad_norm": 0.9223626941051282, "learning_rate": 4.82072476349519e-06, "loss": 0.4134, "step": 12037 }, { "epoch": 0.7330633620558414, "grad_norm": 1.012298315028513, "learning_rate": 4.820695089661463e-06, "loss": 0.4405, "step": 12038 }, { "epoch": 0.7331242578327193, "grad_norm": 0.9405701629749376, "learning_rate": 4.820665413463459e-06, "loss": 0.3597, "step": 12039 }, { "epoch": 0.7331851536095971, "grad_norm": 0.9710294821758905, "learning_rate": 4.82063573490121e-06, "loss": 0.4118, "step": 12040 }, { "epoch": 0.7332460493864751, "grad_norm": 1.0058944488052235, "learning_rate": 4.820606053974746e-06, "loss": 0.3618, "step": 12041 }, { "epoch": 0.7333069451633529, "grad_norm": 0.8788979374695549, "learning_rate": 4.820576370684096e-06, "loss": 0.4224, "step": 12042 }, { "epoch": 0.7333678409402308, "grad_norm": 0.9849742268748367, "learning_rate": 4.820546685029292e-06, "loss": 0.4101, "step": 12043 }, { "epoch": 0.7334287367171086, "grad_norm": 0.9619758760426178, "learning_rate": 4.820516997010361e-06, "loss": 0.4447, "step": 12044 }, { "epoch": 0.7334896324939866, "grad_norm": 0.9895230401142099, "learning_rate": 4.820487306627337e-06, "loss": 0.42, "step": 12045 }, { "epoch": 0.7335505282708644, "grad_norm": 1.127849502488636, "learning_rate": 4.8204576138802495e-06, "loss": 0.3576, "step": 12046 }, { "epoch": 0.7336114240477423, "grad_norm": 1.0082979323737642, "learning_rate": 4.820427918769127e-06, "loss": 0.5181, "step": 12047 }, { "epoch": 0.7336723198246201, "grad_norm": 1.0214554611036732, "learning_rate": 4.820398221294002e-06, "loss": 0.3946, "step": 12048 }, { "epoch": 0.7337332156014981, "grad_norm": 1.0590821457009187, "learning_rate": 4.820368521454902e-06, "loss": 0.3844, "step": 12049 }, { "epoch": 0.7337941113783759, "grad_norm": 0.9931890662134282, "learning_rate": 4.82033881925186e-06, "loss": 0.3925, "step": 12050 }, { "epoch": 0.7338550071552538, "grad_norm": 1.0539921346327037, "learning_rate": 4.820309114684903e-06, "loss": 0.3577, "step": 12051 }, { "epoch": 0.7339159029321316, "grad_norm": 1.0435711495137592, "learning_rate": 4.820279407754066e-06, "loss": 0.455, "step": 12052 }, { "epoch": 0.7339767987090096, "grad_norm": 0.9700581486250969, "learning_rate": 4.820249698459375e-06, "loss": 0.3972, "step": 12053 }, { "epoch": 0.7340376944858874, "grad_norm": 1.0181601830214946, "learning_rate": 4.820219986800862e-06, "loss": 0.4408, "step": 12054 }, { "epoch": 0.7340985902627652, "grad_norm": 1.1024162694368327, "learning_rate": 4.8201902727785574e-06, "loss": 0.4062, "step": 12055 }, { "epoch": 0.7341594860396432, "grad_norm": 1.0198798396228075, "learning_rate": 4.820160556392491e-06, "loss": 0.4712, "step": 12056 }, { "epoch": 0.734220381816521, "grad_norm": 1.0478318325700735, "learning_rate": 4.820130837642694e-06, "loss": 0.4831, "step": 12057 }, { "epoch": 0.7342812775933989, "grad_norm": 0.9755435420697164, "learning_rate": 4.820101116529195e-06, "loss": 0.4415, "step": 12058 }, { "epoch": 0.7343421733702767, "grad_norm": 0.8997453818903313, "learning_rate": 4.820071393052025e-06, "loss": 0.4881, "step": 12059 }, { "epoch": 0.7344030691471547, "grad_norm": 0.9017572964114545, "learning_rate": 4.820041667211215e-06, "loss": 0.518, "step": 12060 }, { "epoch": 0.7344639649240325, "grad_norm": 1.0356824531257376, "learning_rate": 4.8200119390067956e-06, "loss": 0.4097, "step": 12061 }, { "epoch": 0.7345248607009104, "grad_norm": 0.894928055528807, "learning_rate": 4.819982208438795e-06, "loss": 0.4694, "step": 12062 }, { "epoch": 0.7345857564777882, "grad_norm": 0.962048115957244, "learning_rate": 4.819952475507246e-06, "loss": 0.4216, "step": 12063 }, { "epoch": 0.7346466522546662, "grad_norm": 1.0273742599801803, "learning_rate": 4.819922740212176e-06, "loss": 0.425, "step": 12064 }, { "epoch": 0.734707548031544, "grad_norm": 1.029273132636173, "learning_rate": 4.819893002553618e-06, "loss": 0.3902, "step": 12065 }, { "epoch": 0.7347684438084219, "grad_norm": 0.95789191138158, "learning_rate": 4.819863262531601e-06, "loss": 0.4675, "step": 12066 }, { "epoch": 0.7348293395852997, "grad_norm": 1.1518271206443993, "learning_rate": 4.819833520146156e-06, "loss": 0.3998, "step": 12067 }, { "epoch": 0.7348902353621777, "grad_norm": 0.9925611556359, "learning_rate": 4.8198037753973125e-06, "loss": 0.4089, "step": 12068 }, { "epoch": 0.7349511311390555, "grad_norm": 0.955870330897976, "learning_rate": 4.819774028285101e-06, "loss": 0.4178, "step": 12069 }, { "epoch": 0.7350120269159334, "grad_norm": 1.033577865370774, "learning_rate": 4.819744278809552e-06, "loss": 0.4843, "step": 12070 }, { "epoch": 0.7350729226928112, "grad_norm": 1.0714516814857633, "learning_rate": 4.819714526970696e-06, "loss": 0.3792, "step": 12071 }, { "epoch": 0.7351338184696892, "grad_norm": 0.9543094997636596, "learning_rate": 4.819684772768562e-06, "loss": 0.4496, "step": 12072 }, { "epoch": 0.735194714246567, "grad_norm": 1.0067721686036444, "learning_rate": 4.819655016203183e-06, "loss": 0.4338, "step": 12073 }, { "epoch": 0.7352556100234449, "grad_norm": 1.0407644870415091, "learning_rate": 4.819625257274587e-06, "loss": 0.3863, "step": 12074 }, { "epoch": 0.7353165058003227, "grad_norm": 0.9551254252525678, "learning_rate": 4.819595495982805e-06, "loss": 0.4296, "step": 12075 }, { "epoch": 0.7353774015772007, "grad_norm": 0.9805092023551991, "learning_rate": 4.819565732327868e-06, "loss": 0.3473, "step": 12076 }, { "epoch": 0.7354382973540785, "grad_norm": 1.0768568629195594, "learning_rate": 4.819535966309804e-06, "loss": 0.4307, "step": 12077 }, { "epoch": 0.7354991931309564, "grad_norm": 1.00699246341728, "learning_rate": 4.819506197928646e-06, "loss": 0.432, "step": 12078 }, { "epoch": 0.7355600889078342, "grad_norm": 0.9344000081833338, "learning_rate": 4.819476427184424e-06, "loss": 0.4188, "step": 12079 }, { "epoch": 0.7356209846847122, "grad_norm": 0.9736248672457057, "learning_rate": 4.819446654077166e-06, "loss": 0.4127, "step": 12080 }, { "epoch": 0.73568188046159, "grad_norm": 0.9307329522045851, "learning_rate": 4.8194168786069055e-06, "loss": 0.4839, "step": 12081 }, { "epoch": 0.7357427762384678, "grad_norm": 0.9184725788740027, "learning_rate": 4.81938710077367e-06, "loss": 0.4674, "step": 12082 }, { "epoch": 0.7358036720153457, "grad_norm": 0.9549321617744012, "learning_rate": 4.819357320577492e-06, "loss": 0.3653, "step": 12083 }, { "epoch": 0.7358645677922236, "grad_norm": 0.963485331279195, "learning_rate": 4.819327538018401e-06, "loss": 0.4292, "step": 12084 }, { "epoch": 0.7359254635691015, "grad_norm": 1.0623736118164255, "learning_rate": 4.8192977530964275e-06, "loss": 0.4158, "step": 12085 }, { "epoch": 0.7359863593459793, "grad_norm": 1.1216143983440585, "learning_rate": 4.819267965811602e-06, "loss": 0.4829, "step": 12086 }, { "epoch": 0.7360472551228572, "grad_norm": 1.1291801890230215, "learning_rate": 4.8192381761639525e-06, "loss": 0.425, "step": 12087 }, { "epoch": 0.7361081508997351, "grad_norm": 0.9684599096809373, "learning_rate": 4.819208384153513e-06, "loss": 0.4324, "step": 12088 }, { "epoch": 0.736169046676613, "grad_norm": 0.970209004675152, "learning_rate": 4.819178589780313e-06, "loss": 0.3928, "step": 12089 }, { "epoch": 0.7362299424534908, "grad_norm": 1.0563450835737553, "learning_rate": 4.819148793044381e-06, "loss": 0.3901, "step": 12090 }, { "epoch": 0.7362908382303687, "grad_norm": 0.9493088411313318, "learning_rate": 4.819118993945747e-06, "loss": 0.4418, "step": 12091 }, { "epoch": 0.7363517340072466, "grad_norm": 0.9918855426958799, "learning_rate": 4.819089192484444e-06, "loss": 0.41, "step": 12092 }, { "epoch": 0.7364126297841245, "grad_norm": 0.9586563690526835, "learning_rate": 4.819059388660502e-06, "loss": 0.4044, "step": 12093 }, { "epoch": 0.7364735255610023, "grad_norm": 0.9463275447696194, "learning_rate": 4.8190295824739495e-06, "loss": 0.4335, "step": 12094 }, { "epoch": 0.7365344213378802, "grad_norm": 0.8918016330244029, "learning_rate": 4.818999773924818e-06, "loss": 0.4782, "step": 12095 }, { "epoch": 0.7365953171147581, "grad_norm": 1.1082928666950682, "learning_rate": 4.818969963013138e-06, "loss": 0.4374, "step": 12096 }, { "epoch": 0.736656212891636, "grad_norm": 1.0195664519343115, "learning_rate": 4.81894014973894e-06, "loss": 0.3588, "step": 12097 }, { "epoch": 0.7367171086685138, "grad_norm": 0.9162703363168513, "learning_rate": 4.818910334102254e-06, "loss": 0.406, "step": 12098 }, { "epoch": 0.7367780044453918, "grad_norm": 1.001149244644632, "learning_rate": 4.818880516103109e-06, "loss": 0.4032, "step": 12099 }, { "epoch": 0.7368389002222696, "grad_norm": 0.9630122343703958, "learning_rate": 4.8188506957415385e-06, "loss": 0.4746, "step": 12100 }, { "epoch": 0.7368997959991475, "grad_norm": 0.9522097225863442, "learning_rate": 4.81882087301757e-06, "loss": 0.4303, "step": 12101 }, { "epoch": 0.7369606917760253, "grad_norm": 1.0278483783178112, "learning_rate": 4.818791047931235e-06, "loss": 0.3686, "step": 12102 }, { "epoch": 0.7370215875529033, "grad_norm": 0.934131264522919, "learning_rate": 4.818761220482564e-06, "loss": 0.4733, "step": 12103 }, { "epoch": 0.7370824833297811, "grad_norm": 1.0439380725738088, "learning_rate": 4.8187313906715886e-06, "loss": 0.4506, "step": 12104 }, { "epoch": 0.737143379106659, "grad_norm": 1.0688529319037954, "learning_rate": 4.818701558498336e-06, "loss": 0.3993, "step": 12105 }, { "epoch": 0.7372042748835368, "grad_norm": 1.0353023581276455, "learning_rate": 4.818671723962839e-06, "loss": 0.3969, "step": 12106 }, { "epoch": 0.7372651706604147, "grad_norm": 1.0238344711699083, "learning_rate": 4.8186418870651274e-06, "loss": 0.3864, "step": 12107 }, { "epoch": 0.7373260664372926, "grad_norm": 0.9645236550094799, "learning_rate": 4.818612047805232e-06, "loss": 0.3569, "step": 12108 }, { "epoch": 0.7373869622141704, "grad_norm": 0.9359024582350095, "learning_rate": 4.8185822061831835e-06, "loss": 0.3802, "step": 12109 }, { "epoch": 0.7374478579910483, "grad_norm": 0.9767897574055296, "learning_rate": 4.81855236219901e-06, "loss": 0.4791, "step": 12110 }, { "epoch": 0.7375087537679262, "grad_norm": 1.059045960983005, "learning_rate": 4.818522515852745e-06, "loss": 0.4051, "step": 12111 }, { "epoch": 0.7375696495448041, "grad_norm": 1.0889544457404958, "learning_rate": 4.818492667144417e-06, "loss": 0.4981, "step": 12112 }, { "epoch": 0.7376305453216819, "grad_norm": 1.0291457642220447, "learning_rate": 4.818462816074056e-06, "loss": 0.4474, "step": 12113 }, { "epoch": 0.7376914410985598, "grad_norm": 0.8666679589924906, "learning_rate": 4.818432962641695e-06, "loss": 0.4676, "step": 12114 }, { "epoch": 0.7377523368754377, "grad_norm": 0.9633241697221968, "learning_rate": 4.818403106847361e-06, "loss": 0.4395, "step": 12115 }, { "epoch": 0.7378132326523156, "grad_norm": 0.9221012917141802, "learning_rate": 4.818373248691087e-06, "loss": 0.4323, "step": 12116 }, { "epoch": 0.7378741284291934, "grad_norm": 1.0078381135479304, "learning_rate": 4.818343388172903e-06, "loss": 0.3737, "step": 12117 }, { "epoch": 0.7379350242060713, "grad_norm": 0.9943967844427501, "learning_rate": 4.818313525292838e-06, "loss": 0.3881, "step": 12118 }, { "epoch": 0.7379959199829492, "grad_norm": 0.9922199193823389, "learning_rate": 4.818283660050924e-06, "loss": 0.4302, "step": 12119 }, { "epoch": 0.7380568157598271, "grad_norm": 1.0032036653461884, "learning_rate": 4.81825379244719e-06, "loss": 0.3862, "step": 12120 }, { "epoch": 0.7381177115367049, "grad_norm": 1.047793627043778, "learning_rate": 4.818223922481669e-06, "loss": 0.3712, "step": 12121 }, { "epoch": 0.7381786073135828, "grad_norm": 1.0309792110958207, "learning_rate": 4.818194050154388e-06, "loss": 0.3847, "step": 12122 }, { "epoch": 0.7382395030904607, "grad_norm": 1.0284781718673932, "learning_rate": 4.8181641754653794e-06, "loss": 0.4658, "step": 12123 }, { "epoch": 0.7383003988673386, "grad_norm": 1.0926820808624436, "learning_rate": 4.818134298414674e-06, "loss": 0.3858, "step": 12124 }, { "epoch": 0.7383612946442164, "grad_norm": 1.0136608419497641, "learning_rate": 4.8181044190023e-06, "loss": 0.3965, "step": 12125 }, { "epoch": 0.7384221904210942, "grad_norm": 1.0607452658379464, "learning_rate": 4.818074537228291e-06, "loss": 0.4326, "step": 12126 }, { "epoch": 0.7384830861979722, "grad_norm": 1.0284355632957003, "learning_rate": 4.818044653092675e-06, "loss": 0.4207, "step": 12127 }, { "epoch": 0.73854398197485, "grad_norm": 1.0327289458753486, "learning_rate": 4.8180147665954835e-06, "loss": 0.4022, "step": 12128 }, { "epoch": 0.7386048777517279, "grad_norm": 1.0120806807410294, "learning_rate": 4.817984877736747e-06, "loss": 0.414, "step": 12129 }, { "epoch": 0.7386657735286057, "grad_norm": 0.912615262270647, "learning_rate": 4.8179549865164955e-06, "loss": 0.4565, "step": 12130 }, { "epoch": 0.7387266693054837, "grad_norm": 1.0122344262509246, "learning_rate": 4.81792509293476e-06, "loss": 0.411, "step": 12131 }, { "epoch": 0.7387875650823615, "grad_norm": 0.9420065209964164, "learning_rate": 4.81789519699157e-06, "loss": 0.4352, "step": 12132 }, { "epoch": 0.7388484608592394, "grad_norm": 1.0289671296877398, "learning_rate": 4.8178652986869575e-06, "loss": 0.3866, "step": 12133 }, { "epoch": 0.7389093566361172, "grad_norm": 0.9733952405860914, "learning_rate": 4.817835398020951e-06, "loss": 0.4167, "step": 12134 }, { "epoch": 0.7389702524129952, "grad_norm": 1.0184684109793198, "learning_rate": 4.817805494993583e-06, "loss": 0.4982, "step": 12135 }, { "epoch": 0.739031148189873, "grad_norm": 1.0448301735656675, "learning_rate": 4.817775589604882e-06, "loss": 0.3988, "step": 12136 }, { "epoch": 0.7390920439667509, "grad_norm": 1.0702176170759765, "learning_rate": 4.8177456818548795e-06, "loss": 0.4526, "step": 12137 }, { "epoch": 0.7391529397436288, "grad_norm": 0.9813194358705501, "learning_rate": 4.817715771743606e-06, "loss": 0.3969, "step": 12138 }, { "epoch": 0.7392138355205067, "grad_norm": 1.0525816798167458, "learning_rate": 4.817685859271092e-06, "loss": 0.404, "step": 12139 }, { "epoch": 0.7392747312973845, "grad_norm": 1.0695144258048952, "learning_rate": 4.817655944437368e-06, "loss": 0.5061, "step": 12140 }, { "epoch": 0.7393356270742624, "grad_norm": 1.0174395427317418, "learning_rate": 4.817626027242465e-06, "loss": 0.4096, "step": 12141 }, { "epoch": 0.7393965228511403, "grad_norm": 0.9825339338688607, "learning_rate": 4.817596107686412e-06, "loss": 0.4196, "step": 12142 }, { "epoch": 0.7394574186280182, "grad_norm": 1.02708727804392, "learning_rate": 4.81756618576924e-06, "loss": 0.4082, "step": 12143 }, { "epoch": 0.739518314404896, "grad_norm": 1.1038531837234433, "learning_rate": 4.8175362614909794e-06, "loss": 0.3764, "step": 12144 }, { "epoch": 0.7395792101817739, "grad_norm": 0.9134845682466807, "learning_rate": 4.8175063348516615e-06, "loss": 0.4626, "step": 12145 }, { "epoch": 0.7396401059586518, "grad_norm": 1.0167452097339769, "learning_rate": 4.817476405851317e-06, "loss": 0.4125, "step": 12146 }, { "epoch": 0.7397010017355297, "grad_norm": 1.0152499648457323, "learning_rate": 4.817446474489975e-06, "loss": 0.5082, "step": 12147 }, { "epoch": 0.7397618975124075, "grad_norm": 0.9557162359913786, "learning_rate": 4.817416540767667e-06, "loss": 0.4095, "step": 12148 }, { "epoch": 0.7398227932892854, "grad_norm": 1.0057511195148734, "learning_rate": 4.817386604684423e-06, "loss": 0.4162, "step": 12149 }, { "epoch": 0.7398836890661633, "grad_norm": 1.0167664260324005, "learning_rate": 4.8173566662402736e-06, "loss": 0.4148, "step": 12150 }, { "epoch": 0.7399445848430412, "grad_norm": 0.9950880634527898, "learning_rate": 4.8173267254352495e-06, "loss": 0.4476, "step": 12151 }, { "epoch": 0.740005480619919, "grad_norm": 0.9693162208857585, "learning_rate": 4.817296782269382e-06, "loss": 0.3772, "step": 12152 }, { "epoch": 0.7400663763967968, "grad_norm": 0.9775125409971821, "learning_rate": 4.817266836742699e-06, "loss": 0.3877, "step": 12153 }, { "epoch": 0.7401272721736748, "grad_norm": 0.9661775965484798, "learning_rate": 4.817236888855234e-06, "loss": 0.4447, "step": 12154 }, { "epoch": 0.7401881679505526, "grad_norm": 1.0086717113498187, "learning_rate": 4.817206938607016e-06, "loss": 0.4113, "step": 12155 }, { "epoch": 0.7402490637274305, "grad_norm": 1.0212288662124582, "learning_rate": 4.817176985998075e-06, "loss": 0.4528, "step": 12156 }, { "epoch": 0.7403099595043083, "grad_norm": 0.9441250887727409, "learning_rate": 4.8171470310284426e-06, "loss": 0.431, "step": 12157 }, { "epoch": 0.7403708552811863, "grad_norm": 0.9736672650650974, "learning_rate": 4.8171170736981495e-06, "loss": 0.4302, "step": 12158 }, { "epoch": 0.7404317510580641, "grad_norm": 1.0493311490887487, "learning_rate": 4.8170871140072265e-06, "loss": 0.3579, "step": 12159 }, { "epoch": 0.740492646834942, "grad_norm": 1.0427100727312535, "learning_rate": 4.817057151955702e-06, "loss": 0.4335, "step": 12160 }, { "epoch": 0.7405535426118198, "grad_norm": 1.0090595293906004, "learning_rate": 4.817027187543608e-06, "loss": 0.4768, "step": 12161 }, { "epoch": 0.7406144383886978, "grad_norm": 1.1180068065685733, "learning_rate": 4.816997220770975e-06, "loss": 0.5142, "step": 12162 }, { "epoch": 0.7406753341655756, "grad_norm": 0.9367910179620716, "learning_rate": 4.816967251637833e-06, "loss": 0.416, "step": 12163 }, { "epoch": 0.7407362299424535, "grad_norm": 1.070390704410206, "learning_rate": 4.816937280144213e-06, "loss": 0.4217, "step": 12164 }, { "epoch": 0.7407971257193313, "grad_norm": 0.9436362425291729, "learning_rate": 4.8169073062901465e-06, "loss": 0.4677, "step": 12165 }, { "epoch": 0.7408580214962093, "grad_norm": 0.9003955453495184, "learning_rate": 4.816877330075662e-06, "loss": 0.4629, "step": 12166 }, { "epoch": 0.7409189172730871, "grad_norm": 1.0585629167268815, "learning_rate": 4.816847351500792e-06, "loss": 0.4022, "step": 12167 }, { "epoch": 0.740979813049965, "grad_norm": 1.0217614678117142, "learning_rate": 4.816817370565565e-06, "loss": 0.4144, "step": 12168 }, { "epoch": 0.7410407088268428, "grad_norm": 0.9641186966775398, "learning_rate": 4.816787387270013e-06, "loss": 0.3909, "step": 12169 }, { "epoch": 0.7411016046037208, "grad_norm": 0.9315945725994502, "learning_rate": 4.816757401614166e-06, "loss": 0.4686, "step": 12170 }, { "epoch": 0.7411625003805986, "grad_norm": 0.9612465807991062, "learning_rate": 4.8167274135980554e-06, "loss": 0.4744, "step": 12171 }, { "epoch": 0.7412233961574765, "grad_norm": 1.001179990086669, "learning_rate": 4.8166974232217105e-06, "loss": 0.4529, "step": 12172 }, { "epoch": 0.7412842919343543, "grad_norm": 0.9947830181154241, "learning_rate": 4.8166674304851625e-06, "loss": 0.42, "step": 12173 }, { "epoch": 0.7413451877112323, "grad_norm": 0.934105601686934, "learning_rate": 4.816637435388443e-06, "loss": 0.4558, "step": 12174 }, { "epoch": 0.7414060834881101, "grad_norm": 1.0641719156366751, "learning_rate": 4.81660743793158e-06, "loss": 0.5144, "step": 12175 }, { "epoch": 0.741466979264988, "grad_norm": 1.109440991708071, "learning_rate": 4.8165774381146055e-06, "loss": 0.3999, "step": 12176 }, { "epoch": 0.7415278750418658, "grad_norm": 1.0790882006160119, "learning_rate": 4.816547435937551e-06, "loss": 0.4335, "step": 12177 }, { "epoch": 0.7415887708187437, "grad_norm": 1.0425240823512385, "learning_rate": 4.816517431400446e-06, "loss": 0.3958, "step": 12178 }, { "epoch": 0.7416496665956216, "grad_norm": 1.0056853999237299, "learning_rate": 4.81648742450332e-06, "loss": 0.4368, "step": 12179 }, { "epoch": 0.7417105623724994, "grad_norm": 1.099892990376384, "learning_rate": 4.816457415246206e-06, "loss": 0.3262, "step": 12180 }, { "epoch": 0.7417714581493774, "grad_norm": 1.0464761920568422, "learning_rate": 4.816427403629133e-06, "loss": 0.4947, "step": 12181 }, { "epoch": 0.7418323539262552, "grad_norm": 0.9393412176110904, "learning_rate": 4.816397389652131e-06, "loss": 0.4253, "step": 12182 }, { "epoch": 0.7418932497031331, "grad_norm": 0.9424920009166102, "learning_rate": 4.816367373315233e-06, "loss": 0.4715, "step": 12183 }, { "epoch": 0.7419541454800109, "grad_norm": 1.0327445694226143, "learning_rate": 4.816337354618468e-06, "loss": 0.3939, "step": 12184 }, { "epoch": 0.7420150412568889, "grad_norm": 0.9451788018444015, "learning_rate": 4.816307333561866e-06, "loss": 0.4335, "step": 12185 }, { "epoch": 0.7420759370337667, "grad_norm": 1.045556139394314, "learning_rate": 4.816277310145458e-06, "loss": 0.3956, "step": 12186 }, { "epoch": 0.7421368328106446, "grad_norm": 1.007577020133167, "learning_rate": 4.816247284369276e-06, "loss": 0.4367, "step": 12187 }, { "epoch": 0.7421977285875224, "grad_norm": 0.9632267308530007, "learning_rate": 4.816217256233348e-06, "loss": 0.4962, "step": 12188 }, { "epoch": 0.7422586243644004, "grad_norm": 0.9964262336418948, "learning_rate": 4.816187225737706e-06, "loss": 0.4958, "step": 12189 }, { "epoch": 0.7423195201412782, "grad_norm": 1.012167974992191, "learning_rate": 4.816157192882382e-06, "loss": 0.3865, "step": 12190 }, { "epoch": 0.7423804159181561, "grad_norm": 1.0278391819567274, "learning_rate": 4.816127157667404e-06, "loss": 0.4494, "step": 12191 }, { "epoch": 0.7424413116950339, "grad_norm": 0.9925319424363196, "learning_rate": 4.816097120092804e-06, "loss": 0.4493, "step": 12192 }, { "epoch": 0.7425022074719119, "grad_norm": 1.042290741252736, "learning_rate": 4.816067080158613e-06, "loss": 0.4387, "step": 12193 }, { "epoch": 0.7425631032487897, "grad_norm": 0.9213351488471182, "learning_rate": 4.816037037864861e-06, "loss": 0.4324, "step": 12194 }, { "epoch": 0.7426239990256676, "grad_norm": 0.9903337213877166, "learning_rate": 4.816006993211578e-06, "loss": 0.3927, "step": 12195 }, { "epoch": 0.7426848948025454, "grad_norm": 1.0851421382963395, "learning_rate": 4.815976946198795e-06, "loss": 0.4129, "step": 12196 }, { "epoch": 0.7427457905794234, "grad_norm": 0.9715898484563145, "learning_rate": 4.815946896826544e-06, "loss": 0.4297, "step": 12197 }, { "epoch": 0.7428066863563012, "grad_norm": 0.9825788240802872, "learning_rate": 4.815916845094853e-06, "loss": 0.4453, "step": 12198 }, { "epoch": 0.742867582133179, "grad_norm": 1.036014505033384, "learning_rate": 4.815886791003756e-06, "loss": 0.3983, "step": 12199 }, { "epoch": 0.7429284779100569, "grad_norm": 1.0519263058157606, "learning_rate": 4.81585673455328e-06, "loss": 0.4517, "step": 12200 }, { "epoch": 0.7429893736869349, "grad_norm": 1.0187218544960244, "learning_rate": 4.815826675743458e-06, "loss": 0.4289, "step": 12201 }, { "epoch": 0.7430502694638127, "grad_norm": 1.0093136576770292, "learning_rate": 4.815796614574319e-06, "loss": 0.4313, "step": 12202 }, { "epoch": 0.7431111652406905, "grad_norm": 1.0164740962769823, "learning_rate": 4.8157665510458965e-06, "loss": 0.4603, "step": 12203 }, { "epoch": 0.7431720610175684, "grad_norm": 1.0631133963966357, "learning_rate": 4.815736485158218e-06, "loss": 0.3308, "step": 12204 }, { "epoch": 0.7432329567944463, "grad_norm": 1.174674845379732, "learning_rate": 4.815706416911316e-06, "loss": 0.4148, "step": 12205 }, { "epoch": 0.7432938525713242, "grad_norm": 0.9966385844350696, "learning_rate": 4.815676346305219e-06, "loss": 0.4065, "step": 12206 }, { "epoch": 0.743354748348202, "grad_norm": 1.0133345567573697, "learning_rate": 4.81564627333996e-06, "loss": 0.4196, "step": 12207 }, { "epoch": 0.7434156441250799, "grad_norm": 1.0109886915316053, "learning_rate": 4.815616198015568e-06, "loss": 0.4204, "step": 12208 }, { "epoch": 0.7434765399019578, "grad_norm": 1.002180839926736, "learning_rate": 4.815586120332076e-06, "loss": 0.393, "step": 12209 }, { "epoch": 0.7435374356788357, "grad_norm": 1.0846934170555027, "learning_rate": 4.8155560402895115e-06, "loss": 0.4398, "step": 12210 }, { "epoch": 0.7435983314557135, "grad_norm": 0.9612579188834157, "learning_rate": 4.8155259578879064e-06, "loss": 0.4202, "step": 12211 }, { "epoch": 0.7436592272325914, "grad_norm": 1.0669313189020362, "learning_rate": 4.815495873127293e-06, "loss": 0.4158, "step": 12212 }, { "epoch": 0.7437201230094693, "grad_norm": 0.9810470020463001, "learning_rate": 4.8154657860077e-06, "loss": 0.4318, "step": 12213 }, { "epoch": 0.7437810187863472, "grad_norm": 0.9759829613210826, "learning_rate": 4.815435696529158e-06, "loss": 0.4662, "step": 12214 }, { "epoch": 0.743841914563225, "grad_norm": 1.0183479850108965, "learning_rate": 4.815405604691698e-06, "loss": 0.4375, "step": 12215 }, { "epoch": 0.7439028103401029, "grad_norm": 1.0174736575431373, "learning_rate": 4.8153755104953525e-06, "loss": 0.4002, "step": 12216 }, { "epoch": 0.7439637061169808, "grad_norm": 1.014767168128371, "learning_rate": 4.8153454139401496e-06, "loss": 0.4052, "step": 12217 }, { "epoch": 0.7440246018938587, "grad_norm": 1.0803926863936018, "learning_rate": 4.815315315026121e-06, "loss": 0.397, "step": 12218 }, { "epoch": 0.7440854976707365, "grad_norm": 1.0205399036187741, "learning_rate": 4.815285213753298e-06, "loss": 0.3654, "step": 12219 }, { "epoch": 0.7441463934476145, "grad_norm": 0.9590637565562924, "learning_rate": 4.81525511012171e-06, "loss": 0.4071, "step": 12220 }, { "epoch": 0.7442072892244923, "grad_norm": 1.0811406122769704, "learning_rate": 4.815225004131387e-06, "loss": 0.3457, "step": 12221 }, { "epoch": 0.7442681850013702, "grad_norm": 1.059280277381278, "learning_rate": 4.815194895782363e-06, "loss": 0.4255, "step": 12222 }, { "epoch": 0.744329080778248, "grad_norm": 0.9997453422201993, "learning_rate": 4.815164785074665e-06, "loss": 0.4065, "step": 12223 }, { "epoch": 0.744389976555126, "grad_norm": 1.0212763505975353, "learning_rate": 4.815134672008326e-06, "loss": 0.5241, "step": 12224 }, { "epoch": 0.7444508723320038, "grad_norm": 1.089602119974823, "learning_rate": 4.815104556583375e-06, "loss": 0.3826, "step": 12225 }, { "epoch": 0.7445117681088816, "grad_norm": 0.984820718682522, "learning_rate": 4.815074438799845e-06, "loss": 0.5168, "step": 12226 }, { "epoch": 0.7445726638857595, "grad_norm": 1.0743847456126208, "learning_rate": 4.815044318657765e-06, "loss": 0.4135, "step": 12227 }, { "epoch": 0.7446335596626374, "grad_norm": 0.9910066059204116, "learning_rate": 4.815014196157165e-06, "loss": 0.4379, "step": 12228 }, { "epoch": 0.7446944554395153, "grad_norm": 0.975892808756109, "learning_rate": 4.814984071298078e-06, "loss": 0.4262, "step": 12229 }, { "epoch": 0.7447553512163931, "grad_norm": 1.0385843870669031, "learning_rate": 4.814953944080532e-06, "loss": 0.4337, "step": 12230 }, { "epoch": 0.744816246993271, "grad_norm": 1.0061499610246374, "learning_rate": 4.814923814504559e-06, "loss": 0.4642, "step": 12231 }, { "epoch": 0.7448771427701489, "grad_norm": 0.9902205745016802, "learning_rate": 4.814893682570191e-06, "loss": 0.3966, "step": 12232 }, { "epoch": 0.7449380385470268, "grad_norm": 0.987828851743519, "learning_rate": 4.814863548277457e-06, "loss": 0.3508, "step": 12233 }, { "epoch": 0.7449989343239046, "grad_norm": 0.9926326286666151, "learning_rate": 4.814833411626389e-06, "loss": 0.3684, "step": 12234 }, { "epoch": 0.7450598301007825, "grad_norm": 0.9158862809697386, "learning_rate": 4.814803272617015e-06, "loss": 0.4519, "step": 12235 }, { "epoch": 0.7451207258776604, "grad_norm": 1.0133282040201685, "learning_rate": 4.814773131249368e-06, "loss": 0.4588, "step": 12236 }, { "epoch": 0.7451816216545383, "grad_norm": 0.9762108841340611, "learning_rate": 4.814742987523479e-06, "loss": 0.4121, "step": 12237 }, { "epoch": 0.7452425174314161, "grad_norm": 0.952748103012947, "learning_rate": 4.814712841439378e-06, "loss": 0.453, "step": 12238 }, { "epoch": 0.745303413208294, "grad_norm": 0.936465770251524, "learning_rate": 4.814682692997095e-06, "loss": 0.4288, "step": 12239 }, { "epoch": 0.7453643089851719, "grad_norm": 1.0056510233272604, "learning_rate": 4.814652542196661e-06, "loss": 0.4164, "step": 12240 }, { "epoch": 0.7454252047620498, "grad_norm": 1.0686165195181443, "learning_rate": 4.814622389038109e-06, "loss": 0.4248, "step": 12241 }, { "epoch": 0.7454861005389276, "grad_norm": 0.8937740767248115, "learning_rate": 4.814592233521467e-06, "loss": 0.4734, "step": 12242 }, { "epoch": 0.7455469963158055, "grad_norm": 0.9998011763768885, "learning_rate": 4.814562075646766e-06, "loss": 0.4089, "step": 12243 }, { "epoch": 0.7456078920926834, "grad_norm": 0.9914133159007127, "learning_rate": 4.814531915414037e-06, "loss": 0.4144, "step": 12244 }, { "epoch": 0.7456687878695613, "grad_norm": 1.0589472636416417, "learning_rate": 4.814501752823312e-06, "loss": 0.3884, "step": 12245 }, { "epoch": 0.7457296836464391, "grad_norm": 0.9003501502851283, "learning_rate": 4.81447158787462e-06, "loss": 0.5168, "step": 12246 }, { "epoch": 0.745790579423317, "grad_norm": 1.0228227970624784, "learning_rate": 4.814441420567993e-06, "loss": 0.4027, "step": 12247 }, { "epoch": 0.7458514752001949, "grad_norm": 0.9475740542630768, "learning_rate": 4.8144112509034605e-06, "loss": 0.4412, "step": 12248 }, { "epoch": 0.7459123709770727, "grad_norm": 0.980825728280638, "learning_rate": 4.814381078881055e-06, "loss": 0.437, "step": 12249 }, { "epoch": 0.7459732667539506, "grad_norm": 0.929099087401875, "learning_rate": 4.8143509045008055e-06, "loss": 0.5301, "step": 12250 }, { "epoch": 0.7460341625308284, "grad_norm": 1.132399407359302, "learning_rate": 4.814320727762743e-06, "loss": 0.3551, "step": 12251 }, { "epoch": 0.7460950583077064, "grad_norm": 0.9642367887406145, "learning_rate": 4.814290548666899e-06, "loss": 0.4156, "step": 12252 }, { "epoch": 0.7461559540845842, "grad_norm": 0.9516691513268866, "learning_rate": 4.814260367213305e-06, "loss": 0.4633, "step": 12253 }, { "epoch": 0.7462168498614621, "grad_norm": 0.9131973073747602, "learning_rate": 4.814230183401989e-06, "loss": 0.4605, "step": 12254 }, { "epoch": 0.7462777456383399, "grad_norm": 0.9911363877097429, "learning_rate": 4.814199997232984e-06, "loss": 0.4059, "step": 12255 }, { "epoch": 0.7463386414152179, "grad_norm": 1.0283117405631912, "learning_rate": 4.814169808706321e-06, "loss": 0.4476, "step": 12256 }, { "epoch": 0.7463995371920957, "grad_norm": 0.9813424474588548, "learning_rate": 4.814139617822029e-06, "loss": 0.3984, "step": 12257 }, { "epoch": 0.7464604329689736, "grad_norm": 0.9247926497256053, "learning_rate": 4.814109424580139e-06, "loss": 0.4869, "step": 12258 }, { "epoch": 0.7465213287458514, "grad_norm": 1.0370918361663588, "learning_rate": 4.8140792289806836e-06, "loss": 0.4263, "step": 12259 }, { "epoch": 0.7465822245227294, "grad_norm": 1.1092828575725153, "learning_rate": 4.814049031023692e-06, "loss": 0.478, "step": 12260 }, { "epoch": 0.7466431202996072, "grad_norm": 0.9011618510151125, "learning_rate": 4.814018830709195e-06, "loss": 0.4839, "step": 12261 }, { "epoch": 0.7467040160764851, "grad_norm": 1.0316608110726813, "learning_rate": 4.813988628037224e-06, "loss": 0.4393, "step": 12262 }, { "epoch": 0.746764911853363, "grad_norm": 0.9860081212606925, "learning_rate": 4.813958423007809e-06, "loss": 0.4207, "step": 12263 }, { "epoch": 0.7468258076302409, "grad_norm": 1.0278281909475404, "learning_rate": 4.813928215620983e-06, "loss": 0.3445, "step": 12264 }, { "epoch": 0.7468867034071187, "grad_norm": 0.9935971342637204, "learning_rate": 4.813898005876774e-06, "loss": 0.5231, "step": 12265 }, { "epoch": 0.7469475991839966, "grad_norm": 1.0666041098548305, "learning_rate": 4.813867793775213e-06, "loss": 0.3438, "step": 12266 }, { "epoch": 0.7470084949608745, "grad_norm": 0.9836410079112765, "learning_rate": 4.8138375793163325e-06, "loss": 0.4143, "step": 12267 }, { "epoch": 0.7470693907377524, "grad_norm": 1.096534278348939, "learning_rate": 4.8138073625001626e-06, "loss": 0.4192, "step": 12268 }, { "epoch": 0.7471302865146302, "grad_norm": 1.0204730463667335, "learning_rate": 4.813777143326733e-06, "loss": 0.4436, "step": 12269 }, { "epoch": 0.747191182291508, "grad_norm": 0.9730378322472946, "learning_rate": 4.813746921796077e-06, "loss": 0.4254, "step": 12270 }, { "epoch": 0.747252078068386, "grad_norm": 0.9417024959462177, "learning_rate": 4.813716697908222e-06, "loss": 0.4897, "step": 12271 }, { "epoch": 0.7473129738452639, "grad_norm": 1.0541430853389668, "learning_rate": 4.813686471663201e-06, "loss": 0.4232, "step": 12272 }, { "epoch": 0.7473738696221417, "grad_norm": 0.9620401601988975, "learning_rate": 4.813656243061045e-06, "loss": 0.4611, "step": 12273 }, { "epoch": 0.7474347653990195, "grad_norm": 1.0379245200184148, "learning_rate": 4.813626012101783e-06, "loss": 0.4391, "step": 12274 }, { "epoch": 0.7474956611758975, "grad_norm": 1.0838534249535798, "learning_rate": 4.813595778785447e-06, "loss": 0.3883, "step": 12275 }, { "epoch": 0.7475565569527753, "grad_norm": 1.002137465487846, "learning_rate": 4.813565543112068e-06, "loss": 0.4123, "step": 12276 }, { "epoch": 0.7476174527296532, "grad_norm": 0.9758194409790849, "learning_rate": 4.813535305081677e-06, "loss": 0.4481, "step": 12277 }, { "epoch": 0.747678348506531, "grad_norm": 0.9614700932512557, "learning_rate": 4.813505064694305e-06, "loss": 0.4262, "step": 12278 }, { "epoch": 0.747739244283409, "grad_norm": 1.0116153424629948, "learning_rate": 4.81347482194998e-06, "loss": 0.4186, "step": 12279 }, { "epoch": 0.7478001400602868, "grad_norm": 1.0371148776455827, "learning_rate": 4.813444576848737e-06, "loss": 0.3447, "step": 12280 }, { "epoch": 0.7478610358371647, "grad_norm": 0.9793886897669134, "learning_rate": 4.8134143293906035e-06, "loss": 0.4886, "step": 12281 }, { "epoch": 0.7479219316140425, "grad_norm": 1.0774093635417374, "learning_rate": 4.813384079575612e-06, "loss": 0.4085, "step": 12282 }, { "epoch": 0.7479828273909205, "grad_norm": 1.014754360816434, "learning_rate": 4.813353827403793e-06, "loss": 0.4754, "step": 12283 }, { "epoch": 0.7480437231677983, "grad_norm": 1.0532766475858641, "learning_rate": 4.813323572875177e-06, "loss": 0.3863, "step": 12284 }, { "epoch": 0.7481046189446762, "grad_norm": 0.9919269812201309, "learning_rate": 4.813293315989796e-06, "loss": 0.3753, "step": 12285 }, { "epoch": 0.748165514721554, "grad_norm": 0.9352144324711303, "learning_rate": 4.813263056747678e-06, "loss": 0.4943, "step": 12286 }, { "epoch": 0.748226410498432, "grad_norm": 0.9213641621180542, "learning_rate": 4.813232795148856e-06, "loss": 0.4312, "step": 12287 }, { "epoch": 0.7482873062753098, "grad_norm": 0.9679397474840722, "learning_rate": 4.813202531193362e-06, "loss": 0.4388, "step": 12288 }, { "epoch": 0.7483482020521877, "grad_norm": 1.0335001286209866, "learning_rate": 4.813172264881224e-06, "loss": 0.3943, "step": 12289 }, { "epoch": 0.7484090978290655, "grad_norm": 1.0495274769224527, "learning_rate": 4.813141996212476e-06, "loss": 0.4407, "step": 12290 }, { "epoch": 0.7484699936059435, "grad_norm": 1.0108350339419736, "learning_rate": 4.813111725187145e-06, "loss": 0.3602, "step": 12291 }, { "epoch": 0.7485308893828213, "grad_norm": 0.9643979901560995, "learning_rate": 4.813081451805265e-06, "loss": 0.4641, "step": 12292 }, { "epoch": 0.7485917851596992, "grad_norm": 1.0060289310667294, "learning_rate": 4.813051176066865e-06, "loss": 0.382, "step": 12293 }, { "epoch": 0.748652680936577, "grad_norm": 0.9880285200518549, "learning_rate": 4.813020897971977e-06, "loss": 0.379, "step": 12294 }, { "epoch": 0.748713576713455, "grad_norm": 0.8950879926252695, "learning_rate": 4.812990617520632e-06, "loss": 0.5283, "step": 12295 }, { "epoch": 0.7487744724903328, "grad_norm": 0.9695172381072482, "learning_rate": 4.812960334712859e-06, "loss": 0.4346, "step": 12296 }, { "epoch": 0.7488353682672106, "grad_norm": 0.9973975300149688, "learning_rate": 4.812930049548691e-06, "loss": 0.4207, "step": 12297 }, { "epoch": 0.7488962640440885, "grad_norm": 0.9583911738670096, "learning_rate": 4.812899762028157e-06, "loss": 0.4307, "step": 12298 }, { "epoch": 0.7489571598209664, "grad_norm": 1.0019186592049503, "learning_rate": 4.81286947215129e-06, "loss": 0.4189, "step": 12299 }, { "epoch": 0.7490180555978443, "grad_norm": 1.1734439175513747, "learning_rate": 4.812839179918118e-06, "loss": 0.4192, "step": 12300 }, { "epoch": 0.7490789513747221, "grad_norm": 1.0254582852322824, "learning_rate": 4.812808885328675e-06, "loss": 0.3789, "step": 12301 }, { "epoch": 0.7491398471516001, "grad_norm": 1.0525653251184606, "learning_rate": 4.81277858838299e-06, "loss": 0.4243, "step": 12302 }, { "epoch": 0.7492007429284779, "grad_norm": 1.1007430531796032, "learning_rate": 4.812748289081095e-06, "loss": 0.4519, "step": 12303 }, { "epoch": 0.7492616387053558, "grad_norm": 0.9713752325142969, "learning_rate": 4.812717987423019e-06, "loss": 0.4324, "step": 12304 }, { "epoch": 0.7493225344822336, "grad_norm": 1.0108503752648759, "learning_rate": 4.812687683408794e-06, "loss": 0.4025, "step": 12305 }, { "epoch": 0.7493834302591116, "grad_norm": 0.9488452085725082, "learning_rate": 4.8126573770384514e-06, "loss": 0.4602, "step": 12306 }, { "epoch": 0.7494443260359894, "grad_norm": 1.1165617589576224, "learning_rate": 4.812627068312021e-06, "loss": 0.3731, "step": 12307 }, { "epoch": 0.7495052218128673, "grad_norm": 1.055521247840377, "learning_rate": 4.812596757229535e-06, "loss": 0.3727, "step": 12308 }, { "epoch": 0.7495661175897451, "grad_norm": 0.975890490555377, "learning_rate": 4.8125664437910236e-06, "loss": 0.4281, "step": 12309 }, { "epoch": 0.7496270133666231, "grad_norm": 1.0117786538856486, "learning_rate": 4.812536127996517e-06, "loss": 0.4074, "step": 12310 }, { "epoch": 0.7496879091435009, "grad_norm": 1.0128203270499294, "learning_rate": 4.8125058098460465e-06, "loss": 0.4082, "step": 12311 }, { "epoch": 0.7497488049203788, "grad_norm": 0.9722004697868167, "learning_rate": 4.812475489339644e-06, "loss": 0.4233, "step": 12312 }, { "epoch": 0.7498097006972566, "grad_norm": 0.9825128719316545, "learning_rate": 4.812445166477338e-06, "loss": 0.4556, "step": 12313 }, { "epoch": 0.7498705964741346, "grad_norm": 0.9905440822839195, "learning_rate": 4.812414841259162e-06, "loss": 0.4546, "step": 12314 }, { "epoch": 0.7499314922510124, "grad_norm": 1.1106274246281742, "learning_rate": 4.812384513685146e-06, "loss": 0.3569, "step": 12315 }, { "epoch": 0.7499923880278903, "grad_norm": 1.0819797409365086, "learning_rate": 4.812354183755321e-06, "loss": 0.3898, "step": 12316 }, { "epoch": 0.7500532838047681, "grad_norm": 0.9882773714576718, "learning_rate": 4.812323851469717e-06, "loss": 0.4251, "step": 12317 }, { "epoch": 0.7501141795816461, "grad_norm": 1.008322597379449, "learning_rate": 4.8122935168283655e-06, "loss": 0.375, "step": 12318 }, { "epoch": 0.7501750753585239, "grad_norm": 0.9736262993884812, "learning_rate": 4.812263179831298e-06, "loss": 0.4431, "step": 12319 }, { "epoch": 0.7502359711354017, "grad_norm": 0.9674666698440513, "learning_rate": 4.812232840478544e-06, "loss": 0.4071, "step": 12320 }, { "epoch": 0.7502968669122796, "grad_norm": 1.0523655580396456, "learning_rate": 4.812202498770136e-06, "loss": 0.3711, "step": 12321 }, { "epoch": 0.7503577626891575, "grad_norm": 0.9893269333343842, "learning_rate": 4.812172154706104e-06, "loss": 0.4121, "step": 12322 }, { "epoch": 0.7504186584660354, "grad_norm": 1.0559963658651106, "learning_rate": 4.8121418082864785e-06, "loss": 0.4313, "step": 12323 }, { "epoch": 0.7504795542429132, "grad_norm": 1.0079017931513619, "learning_rate": 4.812111459511291e-06, "loss": 0.4798, "step": 12324 }, { "epoch": 0.7505404500197911, "grad_norm": 1.008379518459967, "learning_rate": 4.812081108380573e-06, "loss": 0.4533, "step": 12325 }, { "epoch": 0.750601345796669, "grad_norm": 0.9476445091355604, "learning_rate": 4.812050754894355e-06, "loss": 0.3766, "step": 12326 }, { "epoch": 0.7506622415735469, "grad_norm": 0.97398451964911, "learning_rate": 4.8120203990526675e-06, "loss": 0.4201, "step": 12327 }, { "epoch": 0.7507231373504247, "grad_norm": 0.9915000405914315, "learning_rate": 4.811990040855542e-06, "loss": 0.4865, "step": 12328 }, { "epoch": 0.7507840331273026, "grad_norm": 0.9872488830916155, "learning_rate": 4.811959680303009e-06, "loss": 0.4238, "step": 12329 }, { "epoch": 0.7508449289041805, "grad_norm": 1.1087528334327257, "learning_rate": 4.8119293173950985e-06, "loss": 0.4181, "step": 12330 }, { "epoch": 0.7509058246810584, "grad_norm": 1.1045730614667975, "learning_rate": 4.811898952131844e-06, "loss": 0.4776, "step": 12331 }, { "epoch": 0.7509667204579362, "grad_norm": 0.9712475237486787, "learning_rate": 4.811868584513274e-06, "loss": 0.4022, "step": 12332 }, { "epoch": 0.7510276162348141, "grad_norm": 0.9648414724037931, "learning_rate": 4.81183821453942e-06, "loss": 0.4199, "step": 12333 }, { "epoch": 0.751088512011692, "grad_norm": 0.9705026206546331, "learning_rate": 4.8118078422103146e-06, "loss": 0.4709, "step": 12334 }, { "epoch": 0.7511494077885699, "grad_norm": 0.9149588499951282, "learning_rate": 4.811777467525986e-06, "loss": 0.5017, "step": 12335 }, { "epoch": 0.7512103035654477, "grad_norm": 1.0265765597572774, "learning_rate": 4.8117470904864675e-06, "loss": 0.424, "step": 12336 }, { "epoch": 0.7512711993423256, "grad_norm": 0.9793874969733943, "learning_rate": 4.811716711091789e-06, "loss": 0.4184, "step": 12337 }, { "epoch": 0.7513320951192035, "grad_norm": 0.93073238982874, "learning_rate": 4.811686329341981e-06, "loss": 0.4506, "step": 12338 }, { "epoch": 0.7513929908960814, "grad_norm": 1.0220831307647777, "learning_rate": 4.811655945237076e-06, "loss": 0.4127, "step": 12339 }, { "epoch": 0.7514538866729592, "grad_norm": 1.1175867862778524, "learning_rate": 4.811625558777103e-06, "loss": 0.445, "step": 12340 }, { "epoch": 0.751514782449837, "grad_norm": 1.0358051379922266, "learning_rate": 4.811595169962094e-06, "loss": 0.3993, "step": 12341 }, { "epoch": 0.751575678226715, "grad_norm": 0.9977881447989416, "learning_rate": 4.811564778792081e-06, "loss": 0.4288, "step": 12342 }, { "epoch": 0.7516365740035928, "grad_norm": 0.972948132920042, "learning_rate": 4.811534385267093e-06, "loss": 0.4144, "step": 12343 }, { "epoch": 0.7516974697804707, "grad_norm": 0.977304837372828, "learning_rate": 4.811503989387161e-06, "loss": 0.3951, "step": 12344 }, { "epoch": 0.7517583655573487, "grad_norm": 0.9263947534957987, "learning_rate": 4.8114735911523194e-06, "loss": 0.4432, "step": 12345 }, { "epoch": 0.7518192613342265, "grad_norm": 1.052108067613392, "learning_rate": 4.811443190562595e-06, "loss": 0.3971, "step": 12346 }, { "epoch": 0.7518801571111043, "grad_norm": 1.020184665545349, "learning_rate": 4.811412787618019e-06, "loss": 0.4099, "step": 12347 }, { "epoch": 0.7519410528879822, "grad_norm": 0.9945935176275129, "learning_rate": 4.811382382318626e-06, "loss": 0.4198, "step": 12348 }, { "epoch": 0.7520019486648601, "grad_norm": 1.033576862450199, "learning_rate": 4.811351974664443e-06, "loss": 0.4689, "step": 12349 }, { "epoch": 0.752062844441738, "grad_norm": 1.0568862895754938, "learning_rate": 4.811321564655503e-06, "loss": 0.4182, "step": 12350 }, { "epoch": 0.7521237402186158, "grad_norm": 1.0498217610793703, "learning_rate": 4.811291152291838e-06, "loss": 0.4168, "step": 12351 }, { "epoch": 0.7521846359954937, "grad_norm": 1.0484903319991397, "learning_rate": 4.811260737573476e-06, "loss": 0.491, "step": 12352 }, { "epoch": 0.7522455317723716, "grad_norm": 1.0617869080382007, "learning_rate": 4.8112303205004504e-06, "loss": 0.3775, "step": 12353 }, { "epoch": 0.7523064275492495, "grad_norm": 1.030532633564949, "learning_rate": 4.811199901072792e-06, "loss": 0.3964, "step": 12354 }, { "epoch": 0.7523673233261273, "grad_norm": 1.0021773551409783, "learning_rate": 4.8111694792905295e-06, "loss": 0.429, "step": 12355 }, { "epoch": 0.7524282191030052, "grad_norm": 0.893616106407899, "learning_rate": 4.811139055153697e-06, "loss": 0.4899, "step": 12356 }, { "epoch": 0.7524891148798831, "grad_norm": 0.9872002398409527, "learning_rate": 4.811108628662323e-06, "loss": 0.3667, "step": 12357 }, { "epoch": 0.752550010656761, "grad_norm": 0.9900281059047101, "learning_rate": 4.8110781998164404e-06, "loss": 0.4571, "step": 12358 }, { "epoch": 0.7526109064336388, "grad_norm": 0.9732979317561089, "learning_rate": 4.811047768616079e-06, "loss": 0.403, "step": 12359 }, { "epoch": 0.7526718022105167, "grad_norm": 1.001959075045483, "learning_rate": 4.811017335061271e-06, "loss": 0.4197, "step": 12360 }, { "epoch": 0.7527326979873946, "grad_norm": 0.984923937017285, "learning_rate": 4.810986899152046e-06, "loss": 0.3433, "step": 12361 }, { "epoch": 0.7527935937642725, "grad_norm": 1.0569738170053693, "learning_rate": 4.810956460888435e-06, "loss": 0.455, "step": 12362 }, { "epoch": 0.7528544895411503, "grad_norm": 0.9864381346690697, "learning_rate": 4.81092602027047e-06, "loss": 0.3991, "step": 12363 }, { "epoch": 0.7529153853180282, "grad_norm": 1.0633455081951562, "learning_rate": 4.810895577298182e-06, "loss": 0.4238, "step": 12364 }, { "epoch": 0.7529762810949061, "grad_norm": 1.03396952752175, "learning_rate": 4.810865131971602e-06, "loss": 0.3746, "step": 12365 }, { "epoch": 0.753037176871784, "grad_norm": 1.0141009874069113, "learning_rate": 4.810834684290759e-06, "loss": 0.4286, "step": 12366 }, { "epoch": 0.7530980726486618, "grad_norm": 0.941625234515785, "learning_rate": 4.810804234255687e-06, "loss": 0.4758, "step": 12367 }, { "epoch": 0.7531589684255396, "grad_norm": 1.037452207724749, "learning_rate": 4.810773781866415e-06, "loss": 0.397, "step": 12368 }, { "epoch": 0.7532198642024176, "grad_norm": 1.018397586975057, "learning_rate": 4.810743327122975e-06, "loss": 0.4204, "step": 12369 }, { "epoch": 0.7532807599792954, "grad_norm": 0.9378034611504955, "learning_rate": 4.810712870025398e-06, "loss": 0.4671, "step": 12370 }, { "epoch": 0.7533416557561733, "grad_norm": 1.0739268774281843, "learning_rate": 4.810682410573715e-06, "loss": 0.361, "step": 12371 }, { "epoch": 0.7534025515330511, "grad_norm": 0.9949907741936609, "learning_rate": 4.810651948767956e-06, "loss": 0.4002, "step": 12372 }, { "epoch": 0.7534634473099291, "grad_norm": 0.9982543422215596, "learning_rate": 4.810621484608153e-06, "loss": 0.3998, "step": 12373 }, { "epoch": 0.7535243430868069, "grad_norm": 0.9284246453333874, "learning_rate": 4.810591018094337e-06, "loss": 0.4415, "step": 12374 }, { "epoch": 0.7535852388636848, "grad_norm": 0.9852965101209674, "learning_rate": 4.81056054922654e-06, "loss": 0.4009, "step": 12375 }, { "epoch": 0.7536461346405626, "grad_norm": 0.9380978223011259, "learning_rate": 4.81053007800479e-06, "loss": 0.4506, "step": 12376 }, { "epoch": 0.7537070304174406, "grad_norm": 0.981059935437083, "learning_rate": 4.810499604429121e-06, "loss": 0.3863, "step": 12377 }, { "epoch": 0.7537679261943184, "grad_norm": 1.1655371675704194, "learning_rate": 4.810469128499563e-06, "loss": 0.3674, "step": 12378 }, { "epoch": 0.7538288219711963, "grad_norm": 1.028522873005049, "learning_rate": 4.8104386502161475e-06, "loss": 0.4415, "step": 12379 }, { "epoch": 0.7538897177480741, "grad_norm": 1.0532465855329123, "learning_rate": 4.810408169578905e-06, "loss": 0.4153, "step": 12380 }, { "epoch": 0.7539506135249521, "grad_norm": 1.014709008656167, "learning_rate": 4.810377686587866e-06, "loss": 0.4534, "step": 12381 }, { "epoch": 0.7540115093018299, "grad_norm": 1.1025260086615374, "learning_rate": 4.810347201243063e-06, "loss": 0.3875, "step": 12382 }, { "epoch": 0.7540724050787078, "grad_norm": 1.0493117414273294, "learning_rate": 4.810316713544526e-06, "loss": 0.3774, "step": 12383 }, { "epoch": 0.7541333008555857, "grad_norm": 0.9586791937438466, "learning_rate": 4.810286223492286e-06, "loss": 0.4465, "step": 12384 }, { "epoch": 0.7541941966324636, "grad_norm": 1.0482966165655714, "learning_rate": 4.8102557310863744e-06, "loss": 0.4374, "step": 12385 }, { "epoch": 0.7542550924093414, "grad_norm": 0.9736165763863719, "learning_rate": 4.810225236326822e-06, "loss": 0.406, "step": 12386 }, { "epoch": 0.7543159881862193, "grad_norm": 0.9678778099739466, "learning_rate": 4.810194739213661e-06, "loss": 0.4033, "step": 12387 }, { "epoch": 0.7543768839630972, "grad_norm": 1.0465439302259196, "learning_rate": 4.810164239746922e-06, "loss": 0.3886, "step": 12388 }, { "epoch": 0.7544377797399751, "grad_norm": 1.0128325341382574, "learning_rate": 4.810133737926635e-06, "loss": 0.3722, "step": 12389 }, { "epoch": 0.7544986755168529, "grad_norm": 0.9610734686413759, "learning_rate": 4.810103233752832e-06, "loss": 0.4467, "step": 12390 }, { "epoch": 0.7545595712937307, "grad_norm": 0.9543063465047177, "learning_rate": 4.8100727272255435e-06, "loss": 0.3999, "step": 12391 }, { "epoch": 0.7546204670706087, "grad_norm": 1.0244616762967285, "learning_rate": 4.810042218344802e-06, "loss": 0.3721, "step": 12392 }, { "epoch": 0.7546813628474865, "grad_norm": 1.0382764586077218, "learning_rate": 4.810011707110636e-06, "loss": 0.4016, "step": 12393 }, { "epoch": 0.7547422586243644, "grad_norm": 0.9768761016618149, "learning_rate": 4.809981193523079e-06, "loss": 0.3856, "step": 12394 }, { "epoch": 0.7548031544012422, "grad_norm": 0.9151653211657594, "learning_rate": 4.809950677582161e-06, "loss": 0.4534, "step": 12395 }, { "epoch": 0.7548640501781202, "grad_norm": 0.9173508294997843, "learning_rate": 4.809920159287913e-06, "loss": 0.4876, "step": 12396 }, { "epoch": 0.754924945954998, "grad_norm": 0.9606646898535601, "learning_rate": 4.809889638640367e-06, "loss": 0.4027, "step": 12397 }, { "epoch": 0.7549858417318759, "grad_norm": 0.9915439900351306, "learning_rate": 4.8098591156395526e-06, "loss": 0.4347, "step": 12398 }, { "epoch": 0.7550467375087537, "grad_norm": 1.0079857813052715, "learning_rate": 4.8098285902855025e-06, "loss": 0.4212, "step": 12399 }, { "epoch": 0.7551076332856317, "grad_norm": 1.1036615503831404, "learning_rate": 4.809798062578247e-06, "loss": 0.5055, "step": 12400 }, { "epoch": 0.7551685290625095, "grad_norm": 1.07655150956737, "learning_rate": 4.809767532517817e-06, "loss": 0.4122, "step": 12401 }, { "epoch": 0.7552294248393874, "grad_norm": 1.0094064370224898, "learning_rate": 4.809737000104244e-06, "loss": 0.4492, "step": 12402 }, { "epoch": 0.7552903206162652, "grad_norm": 0.987737826916768, "learning_rate": 4.809706465337559e-06, "loss": 0.466, "step": 12403 }, { "epoch": 0.7553512163931432, "grad_norm": 0.9380185134940173, "learning_rate": 4.809675928217793e-06, "loss": 0.4492, "step": 12404 }, { "epoch": 0.755412112170021, "grad_norm": 1.0034409568199456, "learning_rate": 4.809645388744977e-06, "loss": 0.5012, "step": 12405 }, { "epoch": 0.7554730079468989, "grad_norm": 1.0473521501446477, "learning_rate": 4.809614846919142e-06, "loss": 0.4403, "step": 12406 }, { "epoch": 0.7555339037237767, "grad_norm": 1.0726584562474966, "learning_rate": 4.80958430274032e-06, "loss": 0.3484, "step": 12407 }, { "epoch": 0.7555947995006547, "grad_norm": 0.9549203599400969, "learning_rate": 4.809553756208541e-06, "loss": 0.4546, "step": 12408 }, { "epoch": 0.7556556952775325, "grad_norm": 0.9720756285337271, "learning_rate": 4.809523207323837e-06, "loss": 0.4075, "step": 12409 }, { "epoch": 0.7557165910544104, "grad_norm": 1.0062419997835073, "learning_rate": 4.809492656086239e-06, "loss": 0.3827, "step": 12410 }, { "epoch": 0.7557774868312882, "grad_norm": 0.9054141776911234, "learning_rate": 4.809462102495778e-06, "loss": 0.4216, "step": 12411 }, { "epoch": 0.7558383826081662, "grad_norm": 0.9979617683893383, "learning_rate": 4.809431546552484e-06, "loss": 0.344, "step": 12412 }, { "epoch": 0.755899278385044, "grad_norm": 1.0434306839784375, "learning_rate": 4.809400988256391e-06, "loss": 0.4049, "step": 12413 }, { "epoch": 0.7559601741619218, "grad_norm": 0.9421426280251548, "learning_rate": 4.809370427607527e-06, "loss": 0.4015, "step": 12414 }, { "epoch": 0.7560210699387997, "grad_norm": 1.0558876369311696, "learning_rate": 4.809339864605924e-06, "loss": 0.4436, "step": 12415 }, { "epoch": 0.7560819657156777, "grad_norm": 0.9415055221287417, "learning_rate": 4.809309299251614e-06, "loss": 0.4456, "step": 12416 }, { "epoch": 0.7561428614925555, "grad_norm": 0.9853949796421223, "learning_rate": 4.8092787315446285e-06, "loss": 0.4884, "step": 12417 }, { "epoch": 0.7562037572694333, "grad_norm": 1.0171121944221688, "learning_rate": 4.809248161484998e-06, "loss": 0.3879, "step": 12418 }, { "epoch": 0.7562646530463112, "grad_norm": 1.0327613204097648, "learning_rate": 4.8092175890727515e-06, "loss": 0.4317, "step": 12419 }, { "epoch": 0.7563255488231891, "grad_norm": 1.056997949950676, "learning_rate": 4.809187014307924e-06, "loss": 0.4515, "step": 12420 }, { "epoch": 0.756386444600067, "grad_norm": 1.0867847869669407, "learning_rate": 4.809156437190543e-06, "loss": 0.3409, "step": 12421 }, { "epoch": 0.7564473403769448, "grad_norm": 0.9542082885043561, "learning_rate": 4.809125857720643e-06, "loss": 0.4375, "step": 12422 }, { "epoch": 0.7565082361538227, "grad_norm": 1.0639430123930549, "learning_rate": 4.809095275898253e-06, "loss": 0.3855, "step": 12423 }, { "epoch": 0.7565691319307006, "grad_norm": 0.9846381721049937, "learning_rate": 4.809064691723405e-06, "loss": 0.4319, "step": 12424 }, { "epoch": 0.7566300277075785, "grad_norm": 1.0332733893802126, "learning_rate": 4.80903410519613e-06, "loss": 0.4504, "step": 12425 }, { "epoch": 0.7566909234844563, "grad_norm": 1.1395892109282388, "learning_rate": 4.809003516316458e-06, "loss": 0.4286, "step": 12426 }, { "epoch": 0.7567518192613343, "grad_norm": 0.9746244079223141, "learning_rate": 4.808972925084423e-06, "loss": 0.4336, "step": 12427 }, { "epoch": 0.7568127150382121, "grad_norm": 1.0738085221173823, "learning_rate": 4.808942331500053e-06, "loss": 0.404, "step": 12428 }, { "epoch": 0.75687361081509, "grad_norm": 1.0078212766286887, "learning_rate": 4.808911735563381e-06, "loss": 0.4095, "step": 12429 }, { "epoch": 0.7569345065919678, "grad_norm": 0.978292945184026, "learning_rate": 4.808881137274437e-06, "loss": 0.4442, "step": 12430 }, { "epoch": 0.7569954023688458, "grad_norm": 1.031753008014521, "learning_rate": 4.808850536633254e-06, "loss": 0.3744, "step": 12431 }, { "epoch": 0.7570562981457236, "grad_norm": 1.0103909253409988, "learning_rate": 4.808819933639862e-06, "loss": 0.3972, "step": 12432 }, { "epoch": 0.7571171939226015, "grad_norm": 1.0434079017171214, "learning_rate": 4.808789328294291e-06, "loss": 0.4155, "step": 12433 }, { "epoch": 0.7571780896994793, "grad_norm": 1.0644222843695381, "learning_rate": 4.808758720596574e-06, "loss": 0.4138, "step": 12434 }, { "epoch": 0.7572389854763573, "grad_norm": 1.0507465186279414, "learning_rate": 4.808728110546743e-06, "loss": 0.4238, "step": 12435 }, { "epoch": 0.7572998812532351, "grad_norm": 1.0187775886036936, "learning_rate": 4.808697498144827e-06, "loss": 0.4259, "step": 12436 }, { "epoch": 0.757360777030113, "grad_norm": 1.1550285084886995, "learning_rate": 4.808666883390858e-06, "loss": 0.4163, "step": 12437 }, { "epoch": 0.7574216728069908, "grad_norm": 1.002057694880759, "learning_rate": 4.8086362662848666e-06, "loss": 0.4213, "step": 12438 }, { "epoch": 0.7574825685838688, "grad_norm": 0.9750664069789279, "learning_rate": 4.808605646826885e-06, "loss": 0.4189, "step": 12439 }, { "epoch": 0.7575434643607466, "grad_norm": 1.1544956165189932, "learning_rate": 4.8085750250169436e-06, "loss": 0.3495, "step": 12440 }, { "epoch": 0.7576043601376244, "grad_norm": 1.0013812526029884, "learning_rate": 4.808544400855074e-06, "loss": 0.4399, "step": 12441 }, { "epoch": 0.7576652559145023, "grad_norm": 1.0369536403457245, "learning_rate": 4.808513774341308e-06, "loss": 0.3675, "step": 12442 }, { "epoch": 0.7577261516913802, "grad_norm": 0.9897815885637692, "learning_rate": 4.808483145475675e-06, "loss": 0.4049, "step": 12443 }, { "epoch": 0.7577870474682581, "grad_norm": 1.0186293592163977, "learning_rate": 4.808452514258208e-06, "loss": 0.4025, "step": 12444 }, { "epoch": 0.7578479432451359, "grad_norm": 0.9726095202325272, "learning_rate": 4.808421880688939e-06, "loss": 0.4491, "step": 12445 }, { "epoch": 0.7579088390220138, "grad_norm": 1.0679540464847368, "learning_rate": 4.808391244767896e-06, "loss": 0.3401, "step": 12446 }, { "epoch": 0.7579697347988917, "grad_norm": 0.950873853816483, "learning_rate": 4.808360606495112e-06, "loss": 0.4436, "step": 12447 }, { "epoch": 0.7580306305757696, "grad_norm": 1.0256805836796974, "learning_rate": 4.808329965870619e-06, "loss": 0.3845, "step": 12448 }, { "epoch": 0.7580915263526474, "grad_norm": 1.0212846656414925, "learning_rate": 4.8082993228944465e-06, "loss": 0.3308, "step": 12449 }, { "epoch": 0.7581524221295253, "grad_norm": 1.0176175489453259, "learning_rate": 4.808268677566628e-06, "loss": 0.4649, "step": 12450 }, { "epoch": 0.7582133179064032, "grad_norm": 1.031974078067242, "learning_rate": 4.808238029887192e-06, "loss": 0.4415, "step": 12451 }, { "epoch": 0.7582742136832811, "grad_norm": 0.9296084994985522, "learning_rate": 4.808207379856172e-06, "loss": 0.4035, "step": 12452 }, { "epoch": 0.7583351094601589, "grad_norm": 0.952298001745192, "learning_rate": 4.808176727473598e-06, "loss": 0.4611, "step": 12453 }, { "epoch": 0.7583960052370368, "grad_norm": 1.0448952397949351, "learning_rate": 4.808146072739501e-06, "loss": 0.379, "step": 12454 }, { "epoch": 0.7584569010139147, "grad_norm": 0.9077611806458112, "learning_rate": 4.808115415653913e-06, "loss": 0.4589, "step": 12455 }, { "epoch": 0.7585177967907926, "grad_norm": 1.0752327039062297, "learning_rate": 4.808084756216866e-06, "loss": 0.4036, "step": 12456 }, { "epoch": 0.7585786925676704, "grad_norm": 0.9538466104683283, "learning_rate": 4.808054094428389e-06, "loss": 0.4155, "step": 12457 }, { "epoch": 0.7586395883445483, "grad_norm": 0.9727105889116691, "learning_rate": 4.8080234302885156e-06, "loss": 0.4299, "step": 12458 }, { "epoch": 0.7587004841214262, "grad_norm": 0.975073163452236, "learning_rate": 4.807992763797275e-06, "loss": 0.5055, "step": 12459 }, { "epoch": 0.7587613798983041, "grad_norm": 0.9632715269294374, "learning_rate": 4.8079620949547005e-06, "loss": 0.442, "step": 12460 }, { "epoch": 0.7588222756751819, "grad_norm": 0.9904228318005891, "learning_rate": 4.807931423760821e-06, "loss": 0.4501, "step": 12461 }, { "epoch": 0.7588831714520597, "grad_norm": 0.986932505510325, "learning_rate": 4.8079007502156695e-06, "loss": 0.3689, "step": 12462 }, { "epoch": 0.7589440672289377, "grad_norm": 0.9790320170718706, "learning_rate": 4.807870074319276e-06, "loss": 0.4643, "step": 12463 }, { "epoch": 0.7590049630058155, "grad_norm": 1.0019039672192838, "learning_rate": 4.807839396071673e-06, "loss": 0.3783, "step": 12464 }, { "epoch": 0.7590658587826934, "grad_norm": 1.0311013019089268, "learning_rate": 4.807808715472891e-06, "loss": 0.3935, "step": 12465 }, { "epoch": 0.7591267545595713, "grad_norm": 1.066117657206384, "learning_rate": 4.807778032522961e-06, "loss": 0.4024, "step": 12466 }, { "epoch": 0.7591876503364492, "grad_norm": 1.0631956745896582, "learning_rate": 4.807747347221916e-06, "loss": 0.3913, "step": 12467 }, { "epoch": 0.759248546113327, "grad_norm": 0.9744231628313069, "learning_rate": 4.807716659569786e-06, "loss": 0.4456, "step": 12468 }, { "epoch": 0.7593094418902049, "grad_norm": 0.9964454948781631, "learning_rate": 4.8076859695666015e-06, "loss": 0.4201, "step": 12469 }, { "epoch": 0.7593703376670828, "grad_norm": 1.0217604036600902, "learning_rate": 4.807655277212394e-06, "loss": 0.4646, "step": 12470 }, { "epoch": 0.7594312334439607, "grad_norm": 0.988540804467585, "learning_rate": 4.8076245825071965e-06, "loss": 0.5213, "step": 12471 }, { "epoch": 0.7594921292208385, "grad_norm": 0.9932404245243046, "learning_rate": 4.807593885451038e-06, "loss": 0.348, "step": 12472 }, { "epoch": 0.7595530249977164, "grad_norm": 1.0673801623267165, "learning_rate": 4.807563186043951e-06, "loss": 0.4421, "step": 12473 }, { "epoch": 0.7596139207745943, "grad_norm": 1.1291387352375988, "learning_rate": 4.8075324842859674e-06, "loss": 0.4362, "step": 12474 }, { "epoch": 0.7596748165514722, "grad_norm": 1.0329497160142884, "learning_rate": 4.807501780177117e-06, "loss": 0.3985, "step": 12475 }, { "epoch": 0.75973571232835, "grad_norm": 1.0291306979400154, "learning_rate": 4.8074710737174315e-06, "loss": 0.4251, "step": 12476 }, { "epoch": 0.7597966081052279, "grad_norm": 0.9721994179835373, "learning_rate": 4.807440364906944e-06, "loss": 0.4401, "step": 12477 }, { "epoch": 0.7598575038821058, "grad_norm": 1.0647018849560177, "learning_rate": 4.807409653745683e-06, "loss": 0.3856, "step": 12478 }, { "epoch": 0.7599183996589837, "grad_norm": 0.9224753561674639, "learning_rate": 4.8073789402336805e-06, "loss": 0.4555, "step": 12479 }, { "epoch": 0.7599792954358615, "grad_norm": 1.0315393310535683, "learning_rate": 4.807348224370969e-06, "loss": 0.3764, "step": 12480 }, { "epoch": 0.7600401912127394, "grad_norm": 1.0174156638175613, "learning_rate": 4.807317506157579e-06, "loss": 0.4212, "step": 12481 }, { "epoch": 0.7601010869896173, "grad_norm": 1.0383442640282994, "learning_rate": 4.807286785593542e-06, "loss": 0.4088, "step": 12482 }, { "epoch": 0.7601619827664952, "grad_norm": 0.9199307246791882, "learning_rate": 4.807256062678889e-06, "loss": 0.466, "step": 12483 }, { "epoch": 0.760222878543373, "grad_norm": 0.9583357898731003, "learning_rate": 4.807225337413651e-06, "loss": 0.4656, "step": 12484 }, { "epoch": 0.7602837743202508, "grad_norm": 0.9836466558978731, "learning_rate": 4.807194609797861e-06, "loss": 0.436, "step": 12485 }, { "epoch": 0.7603446700971288, "grad_norm": 0.9239764502816191, "learning_rate": 4.807163879831548e-06, "loss": 0.4073, "step": 12486 }, { "epoch": 0.7604055658740067, "grad_norm": 0.998785434746075, "learning_rate": 4.8071331475147455e-06, "loss": 0.381, "step": 12487 }, { "epoch": 0.7604664616508845, "grad_norm": 0.9617777312170951, "learning_rate": 4.807102412847483e-06, "loss": 0.4301, "step": 12488 }, { "epoch": 0.7605273574277623, "grad_norm": 0.9448727493603354, "learning_rate": 4.8070716758297915e-06, "loss": 0.4744, "step": 12489 }, { "epoch": 0.7605882532046403, "grad_norm": 1.1172301028525833, "learning_rate": 4.807040936461705e-06, "loss": 0.3627, "step": 12490 }, { "epoch": 0.7606491489815181, "grad_norm": 1.0036954281770427, "learning_rate": 4.807010194743253e-06, "loss": 0.377, "step": 12491 }, { "epoch": 0.760710044758396, "grad_norm": 0.9753131853087437, "learning_rate": 4.806979450674467e-06, "loss": 0.4685, "step": 12492 }, { "epoch": 0.7607709405352738, "grad_norm": 1.035638880791734, "learning_rate": 4.806948704255377e-06, "loss": 0.3879, "step": 12493 }, { "epoch": 0.7608318363121518, "grad_norm": 0.9917964992295697, "learning_rate": 4.806917955486017e-06, "loss": 0.4438, "step": 12494 }, { "epoch": 0.7608927320890296, "grad_norm": 1.0095410093128676, "learning_rate": 4.806887204366416e-06, "loss": 0.397, "step": 12495 }, { "epoch": 0.7609536278659075, "grad_norm": 1.0024203175148279, "learning_rate": 4.806856450896608e-06, "loss": 0.4715, "step": 12496 }, { "epoch": 0.7610145236427853, "grad_norm": 0.950035790754671, "learning_rate": 4.80682569507662e-06, "loss": 0.4135, "step": 12497 }, { "epoch": 0.7610754194196633, "grad_norm": 0.9351053649094054, "learning_rate": 4.8067949369064884e-06, "loss": 0.3942, "step": 12498 }, { "epoch": 0.7611363151965411, "grad_norm": 1.0382738049320923, "learning_rate": 4.80676417638624e-06, "loss": 0.4489, "step": 12499 }, { "epoch": 0.761197210973419, "grad_norm": 0.9598869068429333, "learning_rate": 4.806733413515909e-06, "loss": 0.3781, "step": 12500 }, { "epoch": 0.7612581067502968, "grad_norm": 1.0881799881492675, "learning_rate": 4.806702648295527e-06, "loss": 0.3647, "step": 12501 }, { "epoch": 0.7613190025271748, "grad_norm": 0.9736240598964426, "learning_rate": 4.8066718807251234e-06, "loss": 0.4529, "step": 12502 }, { "epoch": 0.7613798983040526, "grad_norm": 1.0246996244671531, "learning_rate": 4.8066411108047305e-06, "loss": 0.3671, "step": 12503 }, { "epoch": 0.7614407940809305, "grad_norm": 0.9237087715298364, "learning_rate": 4.806610338534379e-06, "loss": 0.4983, "step": 12504 }, { "epoch": 0.7615016898578083, "grad_norm": 0.9302584621988275, "learning_rate": 4.806579563914102e-06, "loss": 0.4089, "step": 12505 }, { "epoch": 0.7615625856346863, "grad_norm": 1.061618702504618, "learning_rate": 4.806548786943929e-06, "loss": 0.4121, "step": 12506 }, { "epoch": 0.7616234814115641, "grad_norm": 1.0236360654147305, "learning_rate": 4.806518007623892e-06, "loss": 0.4143, "step": 12507 }, { "epoch": 0.761684377188442, "grad_norm": 0.9683428414613496, "learning_rate": 4.806487225954023e-06, "loss": 0.4566, "step": 12508 }, { "epoch": 0.7617452729653199, "grad_norm": 1.0217545949980338, "learning_rate": 4.806456441934351e-06, "loss": 0.462, "step": 12509 }, { "epoch": 0.7618061687421978, "grad_norm": 0.9826687895110144, "learning_rate": 4.8064256555649115e-06, "loss": 0.4029, "step": 12510 }, { "epoch": 0.7618670645190756, "grad_norm": 0.9880433588099539, "learning_rate": 4.806394866845733e-06, "loss": 0.5184, "step": 12511 }, { "epoch": 0.7619279602959534, "grad_norm": 0.9591118756557824, "learning_rate": 4.806364075776847e-06, "loss": 0.4168, "step": 12512 }, { "epoch": 0.7619888560728314, "grad_norm": 1.0944445204437943, "learning_rate": 4.806333282358284e-06, "loss": 0.3199, "step": 12513 }, { "epoch": 0.7620497518497092, "grad_norm": 0.9568371004221425, "learning_rate": 4.806302486590078e-06, "loss": 0.4028, "step": 12514 }, { "epoch": 0.7621106476265871, "grad_norm": 1.0297378511816715, "learning_rate": 4.806271688472259e-06, "loss": 0.4713, "step": 12515 }, { "epoch": 0.7621715434034649, "grad_norm": 1.1341178367689082, "learning_rate": 4.806240888004858e-06, "loss": 0.3857, "step": 12516 }, { "epoch": 0.7622324391803429, "grad_norm": 0.9508147549880874, "learning_rate": 4.8062100851879065e-06, "loss": 0.4131, "step": 12517 }, { "epoch": 0.7622933349572207, "grad_norm": 0.9687241582086175, "learning_rate": 4.806179280021436e-06, "loss": 0.4907, "step": 12518 }, { "epoch": 0.7623542307340986, "grad_norm": 0.9431386366888977, "learning_rate": 4.806148472505479e-06, "loss": 0.4098, "step": 12519 }, { "epoch": 0.7624151265109764, "grad_norm": 0.9871075682341156, "learning_rate": 4.806117662640065e-06, "loss": 0.3746, "step": 12520 }, { "epoch": 0.7624760222878544, "grad_norm": 0.9527065265086795, "learning_rate": 4.806086850425226e-06, "loss": 0.4387, "step": 12521 }, { "epoch": 0.7625369180647322, "grad_norm": 1.0309216870945175, "learning_rate": 4.806056035860994e-06, "loss": 0.4515, "step": 12522 }, { "epoch": 0.7625978138416101, "grad_norm": 1.011335155409051, "learning_rate": 4.806025218947401e-06, "loss": 0.3969, "step": 12523 }, { "epoch": 0.7626587096184879, "grad_norm": 0.9495985284555469, "learning_rate": 4.805994399684476e-06, "loss": 0.424, "step": 12524 }, { "epoch": 0.7627196053953659, "grad_norm": 1.0452696968583866, "learning_rate": 4.805963578072253e-06, "loss": 0.3665, "step": 12525 }, { "epoch": 0.7627805011722437, "grad_norm": 0.9866351720325519, "learning_rate": 4.8059327541107614e-06, "loss": 0.4324, "step": 12526 }, { "epoch": 0.7628413969491216, "grad_norm": 1.0379946719183464, "learning_rate": 4.805901927800034e-06, "loss": 0.4112, "step": 12527 }, { "epoch": 0.7629022927259994, "grad_norm": 0.9847418633513918, "learning_rate": 4.805871099140101e-06, "loss": 0.4646, "step": 12528 }, { "epoch": 0.7629631885028774, "grad_norm": 0.9338497056402852, "learning_rate": 4.805840268130996e-06, "loss": 0.5008, "step": 12529 }, { "epoch": 0.7630240842797552, "grad_norm": 1.0955494311884408, "learning_rate": 4.805809434772747e-06, "loss": 0.4354, "step": 12530 }, { "epoch": 0.763084980056633, "grad_norm": 1.0707346527035655, "learning_rate": 4.805778599065388e-06, "loss": 0.4138, "step": 12531 }, { "epoch": 0.7631458758335109, "grad_norm": 0.9687917382345025, "learning_rate": 4.80574776100895e-06, "loss": 0.41, "step": 12532 }, { "epoch": 0.7632067716103889, "grad_norm": 1.0395322235396829, "learning_rate": 4.8057169206034635e-06, "loss": 0.4605, "step": 12533 }, { "epoch": 0.7632676673872667, "grad_norm": 0.9467360326985292, "learning_rate": 4.8056860778489614e-06, "loss": 0.473, "step": 12534 }, { "epoch": 0.7633285631641445, "grad_norm": 0.9871966171856148, "learning_rate": 4.805655232745474e-06, "loss": 0.4302, "step": 12535 }, { "epoch": 0.7633894589410224, "grad_norm": 1.0423043732348023, "learning_rate": 4.805624385293033e-06, "loss": 0.4247, "step": 12536 }, { "epoch": 0.7634503547179003, "grad_norm": 1.0052322278304489, "learning_rate": 4.80559353549167e-06, "loss": 0.3925, "step": 12537 }, { "epoch": 0.7635112504947782, "grad_norm": 0.9963160232730389, "learning_rate": 4.805562683341415e-06, "loss": 0.4469, "step": 12538 }, { "epoch": 0.763572146271656, "grad_norm": 0.9904816539460317, "learning_rate": 4.805531828842301e-06, "loss": 0.3958, "step": 12539 }, { "epoch": 0.7636330420485339, "grad_norm": 1.0170119073639117, "learning_rate": 4.80550097199436e-06, "loss": 0.3763, "step": 12540 }, { "epoch": 0.7636939378254118, "grad_norm": 0.8967643962219198, "learning_rate": 4.805470112797622e-06, "loss": 0.4504, "step": 12541 }, { "epoch": 0.7637548336022897, "grad_norm": 0.9173773950140155, "learning_rate": 4.80543925125212e-06, "loss": 0.3979, "step": 12542 }, { "epoch": 0.7638157293791675, "grad_norm": 0.9589642718184688, "learning_rate": 4.805408387357883e-06, "loss": 0.4447, "step": 12543 }, { "epoch": 0.7638766251560454, "grad_norm": 0.9487311800628426, "learning_rate": 4.805377521114945e-06, "loss": 0.4252, "step": 12544 }, { "epoch": 0.7639375209329233, "grad_norm": 0.9520501607754458, "learning_rate": 4.805346652523335e-06, "loss": 0.4513, "step": 12545 }, { "epoch": 0.7639984167098012, "grad_norm": 0.991016064875841, "learning_rate": 4.805315781583086e-06, "loss": 0.4209, "step": 12546 }, { "epoch": 0.764059312486679, "grad_norm": 0.9225509377755874, "learning_rate": 4.805284908294231e-06, "loss": 0.4252, "step": 12547 }, { "epoch": 0.764120208263557, "grad_norm": 0.9360118783032118, "learning_rate": 4.8052540326567975e-06, "loss": 0.4594, "step": 12548 }, { "epoch": 0.7641811040404348, "grad_norm": 1.119036934264317, "learning_rate": 4.8052231546708205e-06, "loss": 0.4043, "step": 12549 }, { "epoch": 0.7642419998173127, "grad_norm": 1.0765404178580096, "learning_rate": 4.8051922743363296e-06, "loss": 0.4201, "step": 12550 }, { "epoch": 0.7643028955941905, "grad_norm": 1.0011952528362265, "learning_rate": 4.805161391653357e-06, "loss": 0.3594, "step": 12551 }, { "epoch": 0.7643637913710685, "grad_norm": 0.973971146280345, "learning_rate": 4.805130506621933e-06, "loss": 0.4537, "step": 12552 }, { "epoch": 0.7644246871479463, "grad_norm": 1.0980115433236601, "learning_rate": 4.805099619242091e-06, "loss": 0.3658, "step": 12553 }, { "epoch": 0.7644855829248242, "grad_norm": 0.9859959607378946, "learning_rate": 4.805068729513861e-06, "loss": 0.4786, "step": 12554 }, { "epoch": 0.764546478701702, "grad_norm": 0.9292547591034541, "learning_rate": 4.8050378374372745e-06, "loss": 0.4575, "step": 12555 }, { "epoch": 0.76460737447858, "grad_norm": 0.9729623650638003, "learning_rate": 4.805006943012364e-06, "loss": 0.4333, "step": 12556 }, { "epoch": 0.7646682702554578, "grad_norm": 1.036512286673859, "learning_rate": 4.80497604623916e-06, "loss": 0.4165, "step": 12557 }, { "epoch": 0.7647291660323356, "grad_norm": 0.9639932347397182, "learning_rate": 4.804945147117694e-06, "loss": 0.3974, "step": 12558 }, { "epoch": 0.7647900618092135, "grad_norm": 0.900703038471696, "learning_rate": 4.804914245647999e-06, "loss": 0.434, "step": 12559 }, { "epoch": 0.7648509575860915, "grad_norm": 1.0387812565174752, "learning_rate": 4.804883341830104e-06, "loss": 0.4348, "step": 12560 }, { "epoch": 0.7649118533629693, "grad_norm": 1.0613855394926914, "learning_rate": 4.804852435664042e-06, "loss": 0.426, "step": 12561 }, { "epoch": 0.7649727491398471, "grad_norm": 0.9770436222498448, "learning_rate": 4.804821527149845e-06, "loss": 0.4817, "step": 12562 }, { "epoch": 0.765033644916725, "grad_norm": 0.9991489272507644, "learning_rate": 4.804790616287543e-06, "loss": 0.4281, "step": 12563 }, { "epoch": 0.7650945406936029, "grad_norm": 1.0477934186305602, "learning_rate": 4.804759703077169e-06, "loss": 0.4166, "step": 12564 }, { "epoch": 0.7651554364704808, "grad_norm": 0.9417096555920945, "learning_rate": 4.804728787518753e-06, "loss": 0.4568, "step": 12565 }, { "epoch": 0.7652163322473586, "grad_norm": 0.9834801999016792, "learning_rate": 4.804697869612328e-06, "loss": 0.4644, "step": 12566 }, { "epoch": 0.7652772280242365, "grad_norm": 0.966542586137245, "learning_rate": 4.804666949357924e-06, "loss": 0.4217, "step": 12567 }, { "epoch": 0.7653381238011144, "grad_norm": 1.0188582727751374, "learning_rate": 4.8046360267555735e-06, "loss": 0.454, "step": 12568 }, { "epoch": 0.7653990195779923, "grad_norm": 1.0116469969366324, "learning_rate": 4.804605101805308e-06, "loss": 0.4229, "step": 12569 }, { "epoch": 0.7654599153548701, "grad_norm": 0.9968985011077774, "learning_rate": 4.804574174507159e-06, "loss": 0.4275, "step": 12570 }, { "epoch": 0.765520811131748, "grad_norm": 1.0559324499114444, "learning_rate": 4.8045432448611564e-06, "loss": 0.3978, "step": 12571 }, { "epoch": 0.7655817069086259, "grad_norm": 1.077019692199011, "learning_rate": 4.804512312867335e-06, "loss": 0.4258, "step": 12572 }, { "epoch": 0.7656426026855038, "grad_norm": 0.9334792755296023, "learning_rate": 4.804481378525722e-06, "loss": 0.4967, "step": 12573 }, { "epoch": 0.7657034984623816, "grad_norm": 0.93832401522952, "learning_rate": 4.804450441836352e-06, "loss": 0.4894, "step": 12574 }, { "epoch": 0.7657643942392595, "grad_norm": 0.9425209622240545, "learning_rate": 4.804419502799257e-06, "loss": 0.4488, "step": 12575 }, { "epoch": 0.7658252900161374, "grad_norm": 1.0181288685766494, "learning_rate": 4.804388561414467e-06, "loss": 0.3639, "step": 12576 }, { "epoch": 0.7658861857930153, "grad_norm": 1.0107928784211384, "learning_rate": 4.804357617682013e-06, "loss": 0.4181, "step": 12577 }, { "epoch": 0.7659470815698931, "grad_norm": 0.98100241778763, "learning_rate": 4.804326671601928e-06, "loss": 0.4179, "step": 12578 }, { "epoch": 0.766007977346771, "grad_norm": 0.9992966996296273, "learning_rate": 4.804295723174243e-06, "loss": 0.3528, "step": 12579 }, { "epoch": 0.7660688731236489, "grad_norm": 0.9387834273803584, "learning_rate": 4.8042647723989885e-06, "loss": 0.4638, "step": 12580 }, { "epoch": 0.7661297689005268, "grad_norm": 1.0069666554960681, "learning_rate": 4.8042338192761984e-06, "loss": 0.4018, "step": 12581 }, { "epoch": 0.7661906646774046, "grad_norm": 1.0298628029007917, "learning_rate": 4.8042028638059015e-06, "loss": 0.4105, "step": 12582 }, { "epoch": 0.7662515604542824, "grad_norm": 0.9182515426073626, "learning_rate": 4.8041719059881306e-06, "loss": 0.4609, "step": 12583 }, { "epoch": 0.7663124562311604, "grad_norm": 1.0337453185773118, "learning_rate": 4.804140945822918e-06, "loss": 0.3764, "step": 12584 }, { "epoch": 0.7663733520080382, "grad_norm": 0.9833040168447316, "learning_rate": 4.804109983310294e-06, "loss": 0.4274, "step": 12585 }, { "epoch": 0.7664342477849161, "grad_norm": 1.0036000530600213, "learning_rate": 4.804079018450291e-06, "loss": 0.4565, "step": 12586 }, { "epoch": 0.7664951435617939, "grad_norm": 1.035152107496607, "learning_rate": 4.804048051242941e-06, "loss": 0.3899, "step": 12587 }, { "epoch": 0.7665560393386719, "grad_norm": 0.9184991395908584, "learning_rate": 4.804017081688273e-06, "loss": 0.4224, "step": 12588 }, { "epoch": 0.7666169351155497, "grad_norm": 0.9878407489088152, "learning_rate": 4.8039861097863214e-06, "loss": 0.4836, "step": 12589 }, { "epoch": 0.7666778308924276, "grad_norm": 1.1543786468625958, "learning_rate": 4.803955135537118e-06, "loss": 0.4345, "step": 12590 }, { "epoch": 0.7667387266693055, "grad_norm": 0.910254365907776, "learning_rate": 4.803924158940691e-06, "loss": 0.4255, "step": 12591 }, { "epoch": 0.7667996224461834, "grad_norm": 1.025952534686121, "learning_rate": 4.803893179997074e-06, "loss": 0.411, "step": 12592 }, { "epoch": 0.7668605182230612, "grad_norm": 1.046218155683437, "learning_rate": 4.803862198706299e-06, "loss": 0.3866, "step": 12593 }, { "epoch": 0.7669214139999391, "grad_norm": 1.0589780849596693, "learning_rate": 4.803831215068397e-06, "loss": 0.4382, "step": 12594 }, { "epoch": 0.766982309776817, "grad_norm": 1.052955662194168, "learning_rate": 4.803800229083399e-06, "loss": 0.3845, "step": 12595 }, { "epoch": 0.7670432055536949, "grad_norm": 1.0610082450067015, "learning_rate": 4.8037692407513384e-06, "loss": 0.3853, "step": 12596 }, { "epoch": 0.7671041013305727, "grad_norm": 0.9412921201285303, "learning_rate": 4.803738250072245e-06, "loss": 0.5013, "step": 12597 }, { "epoch": 0.7671649971074506, "grad_norm": 1.0371766943101792, "learning_rate": 4.803707257046151e-06, "loss": 0.3477, "step": 12598 }, { "epoch": 0.7672258928843285, "grad_norm": 1.0130634304400947, "learning_rate": 4.803676261673088e-06, "loss": 0.4912, "step": 12599 }, { "epoch": 0.7672867886612064, "grad_norm": 0.9466090054735065, "learning_rate": 4.803645263953088e-06, "loss": 0.4216, "step": 12600 }, { "epoch": 0.7673476844380842, "grad_norm": 0.9304172426923855, "learning_rate": 4.803614263886182e-06, "loss": 0.4325, "step": 12601 }, { "epoch": 0.767408580214962, "grad_norm": 0.975900885597734, "learning_rate": 4.803583261472401e-06, "loss": 0.4768, "step": 12602 }, { "epoch": 0.76746947599184, "grad_norm": 0.9997808089803476, "learning_rate": 4.8035522567117775e-06, "loss": 0.3681, "step": 12603 }, { "epoch": 0.7675303717687179, "grad_norm": 1.0314713804752604, "learning_rate": 4.803521249604343e-06, "loss": 0.3823, "step": 12604 }, { "epoch": 0.7675912675455957, "grad_norm": 1.098004635210432, "learning_rate": 4.803490240150129e-06, "loss": 0.4015, "step": 12605 }, { "epoch": 0.7676521633224735, "grad_norm": 0.9214800413270566, "learning_rate": 4.803459228349166e-06, "loss": 0.4467, "step": 12606 }, { "epoch": 0.7677130590993515, "grad_norm": 0.8947648411725629, "learning_rate": 4.8034282142014885e-06, "loss": 0.4599, "step": 12607 }, { "epoch": 0.7677739548762293, "grad_norm": 1.060833870135549, "learning_rate": 4.803397197707125e-06, "loss": 0.43, "step": 12608 }, { "epoch": 0.7678348506531072, "grad_norm": 1.0756907205075208, "learning_rate": 4.803366178866109e-06, "loss": 0.3864, "step": 12609 }, { "epoch": 0.767895746429985, "grad_norm": 0.9661705519587169, "learning_rate": 4.803335157678471e-06, "loss": 0.4203, "step": 12610 }, { "epoch": 0.767956642206863, "grad_norm": 0.9974005892350608, "learning_rate": 4.803304134144242e-06, "loss": 0.4048, "step": 12611 }, { "epoch": 0.7680175379837408, "grad_norm": 0.8969173336407608, "learning_rate": 4.8032731082634566e-06, "loss": 0.4476, "step": 12612 }, { "epoch": 0.7680784337606187, "grad_norm": 1.037017947021021, "learning_rate": 4.803242080036143e-06, "loss": 0.4093, "step": 12613 }, { "epoch": 0.7681393295374965, "grad_norm": 1.0041774972007484, "learning_rate": 4.803211049462335e-06, "loss": 0.4448, "step": 12614 }, { "epoch": 0.7682002253143745, "grad_norm": 1.0246668337704186, "learning_rate": 4.803180016542063e-06, "loss": 0.3628, "step": 12615 }, { "epoch": 0.7682611210912523, "grad_norm": 0.8933915937939234, "learning_rate": 4.80314898127536e-06, "loss": 0.4526, "step": 12616 }, { "epoch": 0.7683220168681302, "grad_norm": 0.9701471238385304, "learning_rate": 4.8031179436622555e-06, "loss": 0.4196, "step": 12617 }, { "epoch": 0.768382912645008, "grad_norm": 0.8955210731646142, "learning_rate": 4.8030869037027835e-06, "loss": 0.4664, "step": 12618 }, { "epoch": 0.768443808421886, "grad_norm": 0.9445072536912256, "learning_rate": 4.8030558613969735e-06, "loss": 0.4246, "step": 12619 }, { "epoch": 0.7685047041987638, "grad_norm": 0.9629234205940567, "learning_rate": 4.8030248167448586e-06, "loss": 0.4263, "step": 12620 }, { "epoch": 0.7685655999756417, "grad_norm": 0.9589643081253837, "learning_rate": 4.80299376974647e-06, "loss": 0.4639, "step": 12621 }, { "epoch": 0.7686264957525195, "grad_norm": 1.0361508637514558, "learning_rate": 4.802962720401838e-06, "loss": 0.4194, "step": 12622 }, { "epoch": 0.7686873915293975, "grad_norm": 0.9804078118268289, "learning_rate": 4.802931668710996e-06, "loss": 0.4291, "step": 12623 }, { "epoch": 0.7687482873062753, "grad_norm": 1.026629131849047, "learning_rate": 4.802900614673977e-06, "loss": 0.463, "step": 12624 }, { "epoch": 0.7688091830831532, "grad_norm": 0.9019759596083552, "learning_rate": 4.802869558290808e-06, "loss": 0.4889, "step": 12625 }, { "epoch": 0.768870078860031, "grad_norm": 1.1360917698850084, "learning_rate": 4.802838499561525e-06, "loss": 0.4012, "step": 12626 }, { "epoch": 0.768930974636909, "grad_norm": 0.8969421086165633, "learning_rate": 4.802807438486158e-06, "loss": 0.4386, "step": 12627 }, { "epoch": 0.7689918704137868, "grad_norm": 1.0022663244303727, "learning_rate": 4.802776375064737e-06, "loss": 0.4226, "step": 12628 }, { "epoch": 0.7690527661906646, "grad_norm": 0.996263081805426, "learning_rate": 4.802745309297297e-06, "loss": 0.3901, "step": 12629 }, { "epoch": 0.7691136619675426, "grad_norm": 1.0921359832234794, "learning_rate": 4.802714241183868e-06, "loss": 0.4024, "step": 12630 }, { "epoch": 0.7691745577444205, "grad_norm": 0.9743170151972802, "learning_rate": 4.802683170724481e-06, "loss": 0.362, "step": 12631 }, { "epoch": 0.7692354535212983, "grad_norm": 0.9736399285803987, "learning_rate": 4.802652097919168e-06, "loss": 0.4575, "step": 12632 }, { "epoch": 0.7692963492981761, "grad_norm": 1.0270985921851563, "learning_rate": 4.802621022767961e-06, "loss": 0.4175, "step": 12633 }, { "epoch": 0.7693572450750541, "grad_norm": 1.0390329738332285, "learning_rate": 4.802589945270893e-06, "loss": 0.3986, "step": 12634 }, { "epoch": 0.7694181408519319, "grad_norm": 1.0259955597455737, "learning_rate": 4.802558865427993e-06, "loss": 0.441, "step": 12635 }, { "epoch": 0.7694790366288098, "grad_norm": 1.017452192815951, "learning_rate": 4.8025277832392934e-06, "loss": 0.463, "step": 12636 }, { "epoch": 0.7695399324056876, "grad_norm": 1.1142150153999297, "learning_rate": 4.802496698704827e-06, "loss": 0.4357, "step": 12637 }, { "epoch": 0.7696008281825656, "grad_norm": 0.9723547272539911, "learning_rate": 4.802465611824625e-06, "loss": 0.4285, "step": 12638 }, { "epoch": 0.7696617239594434, "grad_norm": 1.0064220973657143, "learning_rate": 4.802434522598718e-06, "loss": 0.4207, "step": 12639 }, { "epoch": 0.7697226197363213, "grad_norm": 1.0559601496772824, "learning_rate": 4.802403431027139e-06, "loss": 0.4187, "step": 12640 }, { "epoch": 0.7697835155131991, "grad_norm": 1.056265174561772, "learning_rate": 4.802372337109921e-06, "loss": 0.4149, "step": 12641 }, { "epoch": 0.7698444112900771, "grad_norm": 1.005448972238836, "learning_rate": 4.802341240847092e-06, "loss": 0.423, "step": 12642 }, { "epoch": 0.7699053070669549, "grad_norm": 0.9545275868470788, "learning_rate": 4.802310142238686e-06, "loss": 0.4451, "step": 12643 }, { "epoch": 0.7699662028438328, "grad_norm": 1.0177299187401134, "learning_rate": 4.802279041284735e-06, "loss": 0.4121, "step": 12644 }, { "epoch": 0.7700270986207106, "grad_norm": 0.903537137284966, "learning_rate": 4.802247937985268e-06, "loss": 0.4034, "step": 12645 }, { "epoch": 0.7700879943975886, "grad_norm": 0.9411206459255883, "learning_rate": 4.8022168323403205e-06, "loss": 0.4311, "step": 12646 }, { "epoch": 0.7701488901744664, "grad_norm": 0.9054842252882854, "learning_rate": 4.802185724349922e-06, "loss": 0.438, "step": 12647 }, { "epoch": 0.7702097859513443, "grad_norm": 0.975703558573771, "learning_rate": 4.802154614014104e-06, "loss": 0.3952, "step": 12648 }, { "epoch": 0.7702706817282221, "grad_norm": 1.05512648158568, "learning_rate": 4.802123501332899e-06, "loss": 0.4049, "step": 12649 }, { "epoch": 0.7703315775051001, "grad_norm": 0.8906571133623628, "learning_rate": 4.802092386306339e-06, "loss": 0.4541, "step": 12650 }, { "epoch": 0.7703924732819779, "grad_norm": 1.0496540033915807, "learning_rate": 4.802061268934455e-06, "loss": 0.4066, "step": 12651 }, { "epoch": 0.7704533690588558, "grad_norm": 0.95360861992862, "learning_rate": 4.802030149217278e-06, "loss": 0.4539, "step": 12652 }, { "epoch": 0.7705142648357336, "grad_norm": 0.8976303272242726, "learning_rate": 4.801999027154841e-06, "loss": 0.4865, "step": 12653 }, { "epoch": 0.7705751606126116, "grad_norm": 1.0052899405641105, "learning_rate": 4.801967902747175e-06, "loss": 0.3697, "step": 12654 }, { "epoch": 0.7706360563894894, "grad_norm": 1.0038626460032876, "learning_rate": 4.801936775994313e-06, "loss": 0.5101, "step": 12655 }, { "epoch": 0.7706969521663672, "grad_norm": 0.9752146550479063, "learning_rate": 4.8019056468962854e-06, "loss": 0.3917, "step": 12656 }, { "epoch": 0.7707578479432451, "grad_norm": 1.110168221858251, "learning_rate": 4.801874515453123e-06, "loss": 0.4003, "step": 12657 }, { "epoch": 0.770818743720123, "grad_norm": 0.9286283944986045, "learning_rate": 4.80184338166486e-06, "loss": 0.409, "step": 12658 }, { "epoch": 0.7708796394970009, "grad_norm": 0.9731121447558871, "learning_rate": 4.8018122455315265e-06, "loss": 0.4395, "step": 12659 }, { "epoch": 0.7709405352738787, "grad_norm": 1.0613710170133643, "learning_rate": 4.8017811070531535e-06, "loss": 0.5259, "step": 12660 }, { "epoch": 0.7710014310507566, "grad_norm": 1.043499640428673, "learning_rate": 4.801749966229775e-06, "loss": 0.386, "step": 12661 }, { "epoch": 0.7710623268276345, "grad_norm": 0.9430514638525392, "learning_rate": 4.801718823061421e-06, "loss": 0.4623, "step": 12662 }, { "epoch": 0.7711232226045124, "grad_norm": 1.0817024583647998, "learning_rate": 4.8016876775481245e-06, "loss": 0.4097, "step": 12663 }, { "epoch": 0.7711841183813902, "grad_norm": 1.005005283282771, "learning_rate": 4.801656529689915e-06, "loss": 0.39, "step": 12664 }, { "epoch": 0.7712450141582681, "grad_norm": 1.015952710843648, "learning_rate": 4.801625379486827e-06, "loss": 0.4579, "step": 12665 }, { "epoch": 0.771305909935146, "grad_norm": 0.9901017912202364, "learning_rate": 4.80159422693889e-06, "loss": 0.3483, "step": 12666 }, { "epoch": 0.7713668057120239, "grad_norm": 1.105338027884472, "learning_rate": 4.801563072046137e-06, "loss": 0.3739, "step": 12667 }, { "epoch": 0.7714277014889017, "grad_norm": 0.9689809911952333, "learning_rate": 4.801531914808599e-06, "loss": 0.4203, "step": 12668 }, { "epoch": 0.7714885972657796, "grad_norm": 0.9774860202606791, "learning_rate": 4.801500755226309e-06, "loss": 0.4158, "step": 12669 }, { "epoch": 0.7715494930426575, "grad_norm": 1.0065519345066691, "learning_rate": 4.801469593299297e-06, "loss": 0.4246, "step": 12670 }, { "epoch": 0.7716103888195354, "grad_norm": 0.9529282075578572, "learning_rate": 4.801438429027596e-06, "loss": 0.4995, "step": 12671 }, { "epoch": 0.7716712845964132, "grad_norm": 1.1116592094723539, "learning_rate": 4.801407262411238e-06, "loss": 0.532, "step": 12672 }, { "epoch": 0.7717321803732912, "grad_norm": 1.0950331943483549, "learning_rate": 4.801376093450254e-06, "loss": 0.4204, "step": 12673 }, { "epoch": 0.771793076150169, "grad_norm": 1.0207110661403602, "learning_rate": 4.801344922144675e-06, "loss": 0.404, "step": 12674 }, { "epoch": 0.7718539719270469, "grad_norm": 1.1244751222529765, "learning_rate": 4.801313748494534e-06, "loss": 0.391, "step": 12675 }, { "epoch": 0.7719148677039247, "grad_norm": 0.9938689265908248, "learning_rate": 4.801282572499862e-06, "loss": 0.3982, "step": 12676 }, { "epoch": 0.7719757634808027, "grad_norm": 1.00231172645757, "learning_rate": 4.801251394160692e-06, "loss": 0.4486, "step": 12677 }, { "epoch": 0.7720366592576805, "grad_norm": 1.0431143498289785, "learning_rate": 4.801220213477054e-06, "loss": 0.4132, "step": 12678 }, { "epoch": 0.7720975550345583, "grad_norm": 0.9561574458878275, "learning_rate": 4.801189030448982e-06, "loss": 0.3965, "step": 12679 }, { "epoch": 0.7721584508114362, "grad_norm": 1.0399670414838171, "learning_rate": 4.801157845076506e-06, "loss": 0.4004, "step": 12680 }, { "epoch": 0.7722193465883141, "grad_norm": 1.192979816766944, "learning_rate": 4.801126657359658e-06, "loss": 0.4072, "step": 12681 }, { "epoch": 0.772280242365192, "grad_norm": 0.933655079013556, "learning_rate": 4.801095467298469e-06, "loss": 0.4494, "step": 12682 }, { "epoch": 0.7723411381420698, "grad_norm": 1.0592140985468064, "learning_rate": 4.801064274892973e-06, "loss": 0.4053, "step": 12683 }, { "epoch": 0.7724020339189477, "grad_norm": 0.9755088705756415, "learning_rate": 4.8010330801432e-06, "loss": 0.4832, "step": 12684 }, { "epoch": 0.7724629296958256, "grad_norm": 0.946876663609507, "learning_rate": 4.801001883049183e-06, "loss": 0.4214, "step": 12685 }, { "epoch": 0.7725238254727035, "grad_norm": 0.9514549711870877, "learning_rate": 4.800970683610953e-06, "loss": 0.4809, "step": 12686 }, { "epoch": 0.7725847212495813, "grad_norm": 0.9568317290402534, "learning_rate": 4.800939481828542e-06, "loss": 0.3791, "step": 12687 }, { "epoch": 0.7726456170264592, "grad_norm": 1.216802621569982, "learning_rate": 4.8009082777019814e-06, "loss": 0.4019, "step": 12688 }, { "epoch": 0.7727065128033371, "grad_norm": 1.008081835083653, "learning_rate": 4.800877071231302e-06, "loss": 0.3771, "step": 12689 }, { "epoch": 0.772767408580215, "grad_norm": 0.9856782933843785, "learning_rate": 4.800845862416539e-06, "loss": 0.3887, "step": 12690 }, { "epoch": 0.7728283043570928, "grad_norm": 1.0609963123640778, "learning_rate": 4.8008146512577205e-06, "loss": 0.4351, "step": 12691 }, { "epoch": 0.7728892001339707, "grad_norm": 1.0487416783100605, "learning_rate": 4.8007834377548815e-06, "loss": 0.4088, "step": 12692 }, { "epoch": 0.7729500959108486, "grad_norm": 1.0607608785569909, "learning_rate": 4.800752221908051e-06, "loss": 0.4483, "step": 12693 }, { "epoch": 0.7730109916877265, "grad_norm": 1.0600376715475583, "learning_rate": 4.800721003717261e-06, "loss": 0.3566, "step": 12694 }, { "epoch": 0.7730718874646043, "grad_norm": 1.047764020265285, "learning_rate": 4.800689783182546e-06, "loss": 0.3733, "step": 12695 }, { "epoch": 0.7731327832414822, "grad_norm": 0.9782622753430016, "learning_rate": 4.800658560303936e-06, "loss": 0.4028, "step": 12696 }, { "epoch": 0.7731936790183601, "grad_norm": 0.9348671416319975, "learning_rate": 4.8006273350814625e-06, "loss": 0.4703, "step": 12697 }, { "epoch": 0.773254574795238, "grad_norm": 1.0043435212727452, "learning_rate": 4.800596107515158e-06, "loss": 0.4538, "step": 12698 }, { "epoch": 0.7733154705721158, "grad_norm": 1.0599066982513412, "learning_rate": 4.800564877605053e-06, "loss": 0.39, "step": 12699 }, { "epoch": 0.7733763663489936, "grad_norm": 1.001094131208797, "learning_rate": 4.800533645351181e-06, "loss": 0.4132, "step": 12700 }, { "epoch": 0.7734372621258716, "grad_norm": 0.9742651528032131, "learning_rate": 4.800502410753573e-06, "loss": 0.5091, "step": 12701 }, { "epoch": 0.7734981579027495, "grad_norm": 0.9864469126868797, "learning_rate": 4.800471173812261e-06, "loss": 0.4042, "step": 12702 }, { "epoch": 0.7735590536796273, "grad_norm": 1.0692106250574938, "learning_rate": 4.800439934527276e-06, "loss": 0.3876, "step": 12703 }, { "epoch": 0.7736199494565051, "grad_norm": 0.9158790208301233, "learning_rate": 4.800408692898652e-06, "loss": 0.3992, "step": 12704 }, { "epoch": 0.7736808452333831, "grad_norm": 0.9200341453128864, "learning_rate": 4.80037744892642e-06, "loss": 0.4358, "step": 12705 }, { "epoch": 0.7737417410102609, "grad_norm": 0.9953126657645472, "learning_rate": 4.800346202610609e-06, "loss": 0.3529, "step": 12706 }, { "epoch": 0.7738026367871388, "grad_norm": 0.9663602090422002, "learning_rate": 4.800314953951255e-06, "loss": 0.4611, "step": 12707 }, { "epoch": 0.7738635325640166, "grad_norm": 1.0769749914849045, "learning_rate": 4.800283702948387e-06, "loss": 0.4783, "step": 12708 }, { "epoch": 0.7739244283408946, "grad_norm": 0.9912723938213266, "learning_rate": 4.800252449602038e-06, "loss": 0.4319, "step": 12709 }, { "epoch": 0.7739853241177724, "grad_norm": 1.0169686627501315, "learning_rate": 4.80022119391224e-06, "loss": 0.4139, "step": 12710 }, { "epoch": 0.7740462198946503, "grad_norm": 0.9500381119941365, "learning_rate": 4.800189935879024e-06, "loss": 0.4825, "step": 12711 }, { "epoch": 0.7741071156715282, "grad_norm": 1.0339343804387326, "learning_rate": 4.800158675502423e-06, "loss": 0.4295, "step": 12712 }, { "epoch": 0.7741680114484061, "grad_norm": 1.0588777059040317, "learning_rate": 4.800127412782467e-06, "loss": 0.3571, "step": 12713 }, { "epoch": 0.7742289072252839, "grad_norm": 0.9710747901885496, "learning_rate": 4.80009614771919e-06, "loss": 0.3901, "step": 12714 }, { "epoch": 0.7742898030021618, "grad_norm": 1.0579805636422748, "learning_rate": 4.800064880312623e-06, "loss": 0.4125, "step": 12715 }, { "epoch": 0.7743506987790397, "grad_norm": 1.0198325939051576, "learning_rate": 4.800033610562797e-06, "loss": 0.4346, "step": 12716 }, { "epoch": 0.7744115945559176, "grad_norm": 0.9698731924935259, "learning_rate": 4.8000023384697446e-06, "loss": 0.4015, "step": 12717 }, { "epoch": 0.7744724903327954, "grad_norm": 0.8714762472100042, "learning_rate": 4.799971064033498e-06, "loss": 0.4531, "step": 12718 }, { "epoch": 0.7745333861096733, "grad_norm": 0.9600109631834646, "learning_rate": 4.799939787254089e-06, "loss": 0.4546, "step": 12719 }, { "epoch": 0.7745942818865512, "grad_norm": 1.0339501300424396, "learning_rate": 4.799908508131548e-06, "loss": 0.4167, "step": 12720 }, { "epoch": 0.7746551776634291, "grad_norm": 1.0050795542550588, "learning_rate": 4.799877226665909e-06, "loss": 0.3932, "step": 12721 }, { "epoch": 0.7747160734403069, "grad_norm": 0.968929434055138, "learning_rate": 4.7998459428572035e-06, "loss": 0.4265, "step": 12722 }, { "epoch": 0.7747769692171848, "grad_norm": 0.9588346370168667, "learning_rate": 4.7998146567054615e-06, "loss": 0.4765, "step": 12723 }, { "epoch": 0.7748378649940627, "grad_norm": 1.0824923279680896, "learning_rate": 4.799783368210716e-06, "loss": 0.3861, "step": 12724 }, { "epoch": 0.7748987607709406, "grad_norm": 1.0957824857552747, "learning_rate": 4.799752077373e-06, "loss": 0.4418, "step": 12725 }, { "epoch": 0.7749596565478184, "grad_norm": 0.9335638627284314, "learning_rate": 4.799720784192343e-06, "loss": 0.3947, "step": 12726 }, { "epoch": 0.7750205523246962, "grad_norm": 0.9991282214427661, "learning_rate": 4.79968948866878e-06, "loss": 0.4247, "step": 12727 }, { "epoch": 0.7750814481015742, "grad_norm": 0.9249605507765726, "learning_rate": 4.799658190802341e-06, "loss": 0.4496, "step": 12728 }, { "epoch": 0.775142343878452, "grad_norm": 0.9638104167054681, "learning_rate": 4.799626890593057e-06, "loss": 0.4691, "step": 12729 }, { "epoch": 0.7752032396553299, "grad_norm": 0.9928037851739773, "learning_rate": 4.799595588040962e-06, "loss": 0.4166, "step": 12730 }, { "epoch": 0.7752641354322077, "grad_norm": 0.9966345844210762, "learning_rate": 4.799564283146085e-06, "loss": 0.362, "step": 12731 }, { "epoch": 0.7753250312090857, "grad_norm": 0.9648176475153312, "learning_rate": 4.799532975908462e-06, "loss": 0.4167, "step": 12732 }, { "epoch": 0.7753859269859635, "grad_norm": 0.9317295503574077, "learning_rate": 4.799501666328121e-06, "loss": 0.4766, "step": 12733 }, { "epoch": 0.7754468227628414, "grad_norm": 0.9526612907688714, "learning_rate": 4.799470354405096e-06, "loss": 0.417, "step": 12734 }, { "epoch": 0.7755077185397192, "grad_norm": 0.9886158399801629, "learning_rate": 4.7994390401394186e-06, "loss": 0.4666, "step": 12735 }, { "epoch": 0.7755686143165972, "grad_norm": 1.059541629244231, "learning_rate": 4.7994077235311205e-06, "loss": 0.4404, "step": 12736 }, { "epoch": 0.775629510093475, "grad_norm": 0.9461848031847704, "learning_rate": 4.799376404580234e-06, "loss": 0.45, "step": 12737 }, { "epoch": 0.7756904058703529, "grad_norm": 1.0903868988863243, "learning_rate": 4.799345083286789e-06, "loss": 0.3854, "step": 12738 }, { "epoch": 0.7757513016472307, "grad_norm": 0.9382180938468018, "learning_rate": 4.799313759650821e-06, "loss": 0.3938, "step": 12739 }, { "epoch": 0.7758121974241087, "grad_norm": 0.9597552904768974, "learning_rate": 4.799282433672359e-06, "loss": 0.5245, "step": 12740 }, { "epoch": 0.7758730932009865, "grad_norm": 0.9431983513094354, "learning_rate": 4.799251105351436e-06, "loss": 0.483, "step": 12741 }, { "epoch": 0.7759339889778644, "grad_norm": 0.9754442734252652, "learning_rate": 4.7992197746880835e-06, "loss": 0.3688, "step": 12742 }, { "epoch": 0.7759948847547422, "grad_norm": 0.9662570475037163, "learning_rate": 4.799188441682335e-06, "loss": 0.5038, "step": 12743 }, { "epoch": 0.7760557805316202, "grad_norm": 1.028960521601434, "learning_rate": 4.799157106334219e-06, "loss": 0.4061, "step": 12744 }, { "epoch": 0.776116676308498, "grad_norm": 0.969280692339814, "learning_rate": 4.799125768643771e-06, "loss": 0.4058, "step": 12745 }, { "epoch": 0.7761775720853759, "grad_norm": 1.0446533473919941, "learning_rate": 4.799094428611021e-06, "loss": 0.409, "step": 12746 }, { "epoch": 0.7762384678622537, "grad_norm": 1.0211637093271846, "learning_rate": 4.799063086236001e-06, "loss": 0.4727, "step": 12747 }, { "epoch": 0.7762993636391317, "grad_norm": 0.9794278315519896, "learning_rate": 4.799031741518744e-06, "loss": 0.4002, "step": 12748 }, { "epoch": 0.7763602594160095, "grad_norm": 0.9345440031732596, "learning_rate": 4.799000394459281e-06, "loss": 0.4123, "step": 12749 }, { "epoch": 0.7764211551928873, "grad_norm": 1.0647051361435813, "learning_rate": 4.798969045057644e-06, "loss": 0.4058, "step": 12750 }, { "epoch": 0.7764820509697652, "grad_norm": 0.9612875836639482, "learning_rate": 4.798937693313866e-06, "loss": 0.4261, "step": 12751 }, { "epoch": 0.7765429467466431, "grad_norm": 0.9994985354016155, "learning_rate": 4.798906339227977e-06, "loss": 0.465, "step": 12752 }, { "epoch": 0.776603842523521, "grad_norm": 0.9630755923777948, "learning_rate": 4.79887498280001e-06, "loss": 0.4289, "step": 12753 }, { "epoch": 0.7766647383003988, "grad_norm": 0.9874275738090238, "learning_rate": 4.798843624029998e-06, "loss": 0.3554, "step": 12754 }, { "epoch": 0.7767256340772768, "grad_norm": 0.9479337519353525, "learning_rate": 4.7988122629179714e-06, "loss": 0.4333, "step": 12755 }, { "epoch": 0.7767865298541546, "grad_norm": 1.0036983352690803, "learning_rate": 4.798780899463963e-06, "loss": 0.4322, "step": 12756 }, { "epoch": 0.7768474256310325, "grad_norm": 1.085301289730312, "learning_rate": 4.7987495336680035e-06, "loss": 0.4158, "step": 12757 }, { "epoch": 0.7769083214079103, "grad_norm": 0.8609500249366987, "learning_rate": 4.798718165530127e-06, "loss": 0.4332, "step": 12758 }, { "epoch": 0.7769692171847883, "grad_norm": 1.0794009857723101, "learning_rate": 4.798686795050363e-06, "loss": 0.4303, "step": 12759 }, { "epoch": 0.7770301129616661, "grad_norm": 1.0956604547440882, "learning_rate": 4.798655422228745e-06, "loss": 0.4489, "step": 12760 }, { "epoch": 0.777091008738544, "grad_norm": 1.0373063763376975, "learning_rate": 4.798624047065305e-06, "loss": 0.427, "step": 12761 }, { "epoch": 0.7771519045154218, "grad_norm": 1.1664659118185998, "learning_rate": 4.798592669560075e-06, "loss": 0.4064, "step": 12762 }, { "epoch": 0.7772128002922998, "grad_norm": 1.0083009697414163, "learning_rate": 4.7985612897130855e-06, "loss": 0.4524, "step": 12763 }, { "epoch": 0.7772736960691776, "grad_norm": 1.048681669670957, "learning_rate": 4.79852990752437e-06, "loss": 0.4156, "step": 12764 }, { "epoch": 0.7773345918460555, "grad_norm": 1.1683726738865992, "learning_rate": 4.79849852299396e-06, "loss": 0.3252, "step": 12765 }, { "epoch": 0.7773954876229333, "grad_norm": 0.9847686537520561, "learning_rate": 4.798467136121888e-06, "loss": 0.4469, "step": 12766 }, { "epoch": 0.7774563833998113, "grad_norm": 0.9852109065025174, "learning_rate": 4.798435746908185e-06, "loss": 0.4337, "step": 12767 }, { "epoch": 0.7775172791766891, "grad_norm": 0.9821287078627041, "learning_rate": 4.7984043553528836e-06, "loss": 0.4073, "step": 12768 }, { "epoch": 0.777578174953567, "grad_norm": 0.9930289502484219, "learning_rate": 4.798372961456016e-06, "loss": 0.3928, "step": 12769 }, { "epoch": 0.7776390707304448, "grad_norm": 1.0186779929275227, "learning_rate": 4.798341565217612e-06, "loss": 0.4145, "step": 12770 }, { "epoch": 0.7776999665073228, "grad_norm": 1.0198155463010654, "learning_rate": 4.7983101666377075e-06, "loss": 0.3857, "step": 12771 }, { "epoch": 0.7777608622842006, "grad_norm": 0.9593915319522637, "learning_rate": 4.798278765716332e-06, "loss": 0.5208, "step": 12772 }, { "epoch": 0.7778217580610784, "grad_norm": 0.9935869486571158, "learning_rate": 4.798247362453517e-06, "loss": 0.4415, "step": 12773 }, { "epoch": 0.7778826538379563, "grad_norm": 1.0490683500660074, "learning_rate": 4.798215956849296e-06, "loss": 0.3758, "step": 12774 }, { "epoch": 0.7779435496148343, "grad_norm": 1.0765214423945684, "learning_rate": 4.798184548903701e-06, "loss": 0.4063, "step": 12775 }, { "epoch": 0.7780044453917121, "grad_norm": 0.9264864798523126, "learning_rate": 4.798153138616762e-06, "loss": 0.4458, "step": 12776 }, { "epoch": 0.7780653411685899, "grad_norm": 0.9720729685871387, "learning_rate": 4.798121725988513e-06, "loss": 0.4837, "step": 12777 }, { "epoch": 0.7781262369454678, "grad_norm": 0.9973684504844111, "learning_rate": 4.7980903110189845e-06, "loss": 0.455, "step": 12778 }, { "epoch": 0.7781871327223457, "grad_norm": 1.055255083788754, "learning_rate": 4.798058893708211e-06, "loss": 0.4337, "step": 12779 }, { "epoch": 0.7782480284992236, "grad_norm": 0.907918163311152, "learning_rate": 4.798027474056222e-06, "loss": 0.445, "step": 12780 }, { "epoch": 0.7783089242761014, "grad_norm": 1.0520497499460064, "learning_rate": 4.797996052063051e-06, "loss": 0.4021, "step": 12781 }, { "epoch": 0.7783698200529793, "grad_norm": 1.1245632149016211, "learning_rate": 4.797964627728728e-06, "loss": 0.363, "step": 12782 }, { "epoch": 0.7784307158298572, "grad_norm": 0.9872457851482981, "learning_rate": 4.797933201053288e-06, "loss": 0.3846, "step": 12783 }, { "epoch": 0.7784916116067351, "grad_norm": 0.9993506050641364, "learning_rate": 4.797901772036761e-06, "loss": 0.3542, "step": 12784 }, { "epoch": 0.7785525073836129, "grad_norm": 1.008716569144231, "learning_rate": 4.797870340679178e-06, "loss": 0.422, "step": 12785 }, { "epoch": 0.7786134031604908, "grad_norm": 0.9973762083169658, "learning_rate": 4.797838906980574e-06, "loss": 0.3828, "step": 12786 }, { "epoch": 0.7786742989373687, "grad_norm": 0.9033273009290488, "learning_rate": 4.7978074709409785e-06, "loss": 0.5164, "step": 12787 }, { "epoch": 0.7787351947142466, "grad_norm": 0.9896868013077841, "learning_rate": 4.797776032560425e-06, "loss": 0.4402, "step": 12788 }, { "epoch": 0.7787960904911244, "grad_norm": 0.9584125410905074, "learning_rate": 4.797744591838946e-06, "loss": 0.4579, "step": 12789 }, { "epoch": 0.7788569862680023, "grad_norm": 1.063242733427789, "learning_rate": 4.797713148776571e-06, "loss": 0.3891, "step": 12790 }, { "epoch": 0.7789178820448802, "grad_norm": 1.0070165022964943, "learning_rate": 4.797681703373335e-06, "loss": 0.4084, "step": 12791 }, { "epoch": 0.7789787778217581, "grad_norm": 0.9041982865318591, "learning_rate": 4.797650255629268e-06, "loss": 0.4127, "step": 12792 }, { "epoch": 0.7790396735986359, "grad_norm": 0.9858132195813079, "learning_rate": 4.7976188055444024e-06, "loss": 0.4364, "step": 12793 }, { "epoch": 0.7791005693755139, "grad_norm": 1.019744812167505, "learning_rate": 4.797587353118771e-06, "loss": 0.3967, "step": 12794 }, { "epoch": 0.7791614651523917, "grad_norm": 0.973166362329539, "learning_rate": 4.797555898352405e-06, "loss": 0.3787, "step": 12795 }, { "epoch": 0.7792223609292696, "grad_norm": 1.072161459172859, "learning_rate": 4.7975244412453374e-06, "loss": 0.4229, "step": 12796 }, { "epoch": 0.7792832567061474, "grad_norm": 1.0416809406414087, "learning_rate": 4.797492981797599e-06, "loss": 0.3626, "step": 12797 }, { "epoch": 0.7793441524830254, "grad_norm": 1.0881862698079736, "learning_rate": 4.797461520009224e-06, "loss": 0.4397, "step": 12798 }, { "epoch": 0.7794050482599032, "grad_norm": 1.01074004655159, "learning_rate": 4.797430055880241e-06, "loss": 0.38, "step": 12799 }, { "epoch": 0.779465944036781, "grad_norm": 1.0631276364154467, "learning_rate": 4.797398589410685e-06, "loss": 0.4473, "step": 12800 }, { "epoch": 0.7795268398136589, "grad_norm": 1.0222430483251312, "learning_rate": 4.797367120600586e-06, "loss": 0.3778, "step": 12801 }, { "epoch": 0.7795877355905368, "grad_norm": 1.0424995974577056, "learning_rate": 4.797335649449979e-06, "loss": 0.4025, "step": 12802 }, { "epoch": 0.7796486313674147, "grad_norm": 1.1028181899910887, "learning_rate": 4.797304175958893e-06, "loss": 0.4502, "step": 12803 }, { "epoch": 0.7797095271442925, "grad_norm": 0.9713276274351405, "learning_rate": 4.797272700127361e-06, "loss": 0.4239, "step": 12804 }, { "epoch": 0.7797704229211704, "grad_norm": 0.9555573712698266, "learning_rate": 4.797241221955417e-06, "loss": 0.4437, "step": 12805 }, { "epoch": 0.7798313186980483, "grad_norm": 0.980005136418736, "learning_rate": 4.79720974144309e-06, "loss": 0.4002, "step": 12806 }, { "epoch": 0.7798922144749262, "grad_norm": 0.9995773089239456, "learning_rate": 4.797178258590413e-06, "loss": 0.3364, "step": 12807 }, { "epoch": 0.779953110251804, "grad_norm": 1.028087874416001, "learning_rate": 4.79714677339742e-06, "loss": 0.3762, "step": 12808 }, { "epoch": 0.7800140060286819, "grad_norm": 0.9496771339857385, "learning_rate": 4.79711528586414e-06, "loss": 0.3668, "step": 12809 }, { "epoch": 0.7800749018055598, "grad_norm": 1.0122754656369684, "learning_rate": 4.797083795990608e-06, "loss": 0.446, "step": 12810 }, { "epoch": 0.7801357975824377, "grad_norm": 1.0433416267624525, "learning_rate": 4.797052303776854e-06, "loss": 0.3535, "step": 12811 }, { "epoch": 0.7801966933593155, "grad_norm": 1.0012122440517095, "learning_rate": 4.797020809222912e-06, "loss": 0.403, "step": 12812 }, { "epoch": 0.7802575891361934, "grad_norm": 1.0635328392632353, "learning_rate": 4.796989312328812e-06, "loss": 0.4044, "step": 12813 }, { "epoch": 0.7803184849130713, "grad_norm": 0.9726084434261154, "learning_rate": 4.7969578130945875e-06, "loss": 0.4389, "step": 12814 }, { "epoch": 0.7803793806899492, "grad_norm": 1.0214884972892766, "learning_rate": 4.796926311520269e-06, "loss": 0.4118, "step": 12815 }, { "epoch": 0.780440276466827, "grad_norm": 1.0684364585779667, "learning_rate": 4.79689480760589e-06, "loss": 0.4472, "step": 12816 }, { "epoch": 0.7805011722437049, "grad_norm": 1.1455545517963175, "learning_rate": 4.796863301351484e-06, "loss": 0.3635, "step": 12817 }, { "epoch": 0.7805620680205828, "grad_norm": 1.0460279133999044, "learning_rate": 4.79683179275708e-06, "loss": 0.4347, "step": 12818 }, { "epoch": 0.7806229637974607, "grad_norm": 1.0505046188143252, "learning_rate": 4.796800281822712e-06, "loss": 0.4367, "step": 12819 }, { "epoch": 0.7806838595743385, "grad_norm": 0.9879575268733163, "learning_rate": 4.796768768548412e-06, "loss": 0.3751, "step": 12820 }, { "epoch": 0.7807447553512163, "grad_norm": 1.0306770607768605, "learning_rate": 4.7967372529342115e-06, "loss": 0.3553, "step": 12821 }, { "epoch": 0.7808056511280943, "grad_norm": 1.0758189151865516, "learning_rate": 4.7967057349801425e-06, "loss": 0.4956, "step": 12822 }, { "epoch": 0.7808665469049721, "grad_norm": 0.9210023477772008, "learning_rate": 4.796674214686237e-06, "loss": 0.436, "step": 12823 }, { "epoch": 0.78092744268185, "grad_norm": 0.9074874410601753, "learning_rate": 4.796642692052528e-06, "loss": 0.4196, "step": 12824 }, { "epoch": 0.7809883384587278, "grad_norm": 0.9832570039909039, "learning_rate": 4.796611167079048e-06, "loss": 0.4052, "step": 12825 }, { "epoch": 0.7810492342356058, "grad_norm": 1.084886114066157, "learning_rate": 4.796579639765827e-06, "loss": 0.3692, "step": 12826 }, { "epoch": 0.7811101300124836, "grad_norm": 1.0361687526743337, "learning_rate": 4.796548110112899e-06, "loss": 0.3949, "step": 12827 }, { "epoch": 0.7811710257893615, "grad_norm": 1.0876396279445812, "learning_rate": 4.796516578120296e-06, "loss": 0.4219, "step": 12828 }, { "epoch": 0.7812319215662393, "grad_norm": 0.9157589240359834, "learning_rate": 4.796485043788049e-06, "loss": 0.4603, "step": 12829 }, { "epoch": 0.7812928173431173, "grad_norm": 1.0318689401764523, "learning_rate": 4.7964535071161915e-06, "loss": 0.3985, "step": 12830 }, { "epoch": 0.7813537131199951, "grad_norm": 0.9908554252661861, "learning_rate": 4.796421968104754e-06, "loss": 0.3971, "step": 12831 }, { "epoch": 0.781414608896873, "grad_norm": 0.9628971134864286, "learning_rate": 4.79639042675377e-06, "loss": 0.4147, "step": 12832 }, { "epoch": 0.7814755046737508, "grad_norm": 1.0005666140897411, "learning_rate": 4.79635888306327e-06, "loss": 0.4557, "step": 12833 }, { "epoch": 0.7815364004506288, "grad_norm": 1.086180236485511, "learning_rate": 4.796327337033289e-06, "loss": 0.3957, "step": 12834 }, { "epoch": 0.7815972962275066, "grad_norm": 1.048861265631583, "learning_rate": 4.796295788663857e-06, "loss": 0.4929, "step": 12835 }, { "epoch": 0.7816581920043845, "grad_norm": 0.9776006533247312, "learning_rate": 4.796264237955006e-06, "loss": 0.4405, "step": 12836 }, { "epoch": 0.7817190877812624, "grad_norm": 0.945325966107918, "learning_rate": 4.796232684906769e-06, "loss": 0.3904, "step": 12837 }, { "epoch": 0.7817799835581403, "grad_norm": 1.0102361178619939, "learning_rate": 4.796201129519178e-06, "loss": 0.3396, "step": 12838 }, { "epoch": 0.7818408793350181, "grad_norm": 0.959833760323073, "learning_rate": 4.796169571792265e-06, "loss": 0.4062, "step": 12839 }, { "epoch": 0.781901775111896, "grad_norm": 0.9075921805983578, "learning_rate": 4.796138011726063e-06, "loss": 0.4916, "step": 12840 }, { "epoch": 0.7819626708887739, "grad_norm": 0.9737043527605134, "learning_rate": 4.7961064493206025e-06, "loss": 0.4078, "step": 12841 }, { "epoch": 0.7820235666656518, "grad_norm": 1.0653239689361933, "learning_rate": 4.796074884575917e-06, "loss": 0.3966, "step": 12842 }, { "epoch": 0.7820844624425296, "grad_norm": 0.9524832561256481, "learning_rate": 4.796043317492037e-06, "loss": 0.4088, "step": 12843 }, { "epoch": 0.7821453582194074, "grad_norm": 0.9328848706877827, "learning_rate": 4.796011748068997e-06, "loss": 0.4515, "step": 12844 }, { "epoch": 0.7822062539962854, "grad_norm": 1.0768418748519641, "learning_rate": 4.795980176306827e-06, "loss": 0.398, "step": 12845 }, { "epoch": 0.7822671497731633, "grad_norm": 0.982921226063071, "learning_rate": 4.7959486022055605e-06, "loss": 0.4462, "step": 12846 }, { "epoch": 0.7823280455500411, "grad_norm": 1.0103401753330628, "learning_rate": 4.79591702576523e-06, "loss": 0.4392, "step": 12847 }, { "epoch": 0.7823889413269189, "grad_norm": 0.9644529320466012, "learning_rate": 4.795885446985866e-06, "loss": 0.4359, "step": 12848 }, { "epoch": 0.7824498371037969, "grad_norm": 1.1109401577095772, "learning_rate": 4.795853865867502e-06, "loss": 0.3904, "step": 12849 }, { "epoch": 0.7825107328806747, "grad_norm": 1.082984609692948, "learning_rate": 4.79582228241017e-06, "loss": 0.4141, "step": 12850 }, { "epoch": 0.7825716286575526, "grad_norm": 1.0192818259210161, "learning_rate": 4.7957906966139015e-06, "loss": 0.4208, "step": 12851 }, { "epoch": 0.7826325244344304, "grad_norm": 0.978162708517692, "learning_rate": 4.79575910847873e-06, "loss": 0.4211, "step": 12852 }, { "epoch": 0.7826934202113084, "grad_norm": 0.9541693251481568, "learning_rate": 4.7957275180046855e-06, "loss": 0.4129, "step": 12853 }, { "epoch": 0.7827543159881862, "grad_norm": 0.9349369764600014, "learning_rate": 4.795695925191803e-06, "loss": 0.4449, "step": 12854 }, { "epoch": 0.7828152117650641, "grad_norm": 0.9571242131418672, "learning_rate": 4.795664330040113e-06, "loss": 0.4449, "step": 12855 }, { "epoch": 0.7828761075419419, "grad_norm": 0.9358842783420489, "learning_rate": 4.7956327325496465e-06, "loss": 0.4203, "step": 12856 }, { "epoch": 0.7829370033188199, "grad_norm": 0.980900931964412, "learning_rate": 4.7956011327204385e-06, "loss": 0.375, "step": 12857 }, { "epoch": 0.7829978990956977, "grad_norm": 0.8810238882887181, "learning_rate": 4.795569530552519e-06, "loss": 0.4706, "step": 12858 }, { "epoch": 0.7830587948725756, "grad_norm": 1.023147424325759, "learning_rate": 4.795537926045922e-06, "loss": 0.3743, "step": 12859 }, { "epoch": 0.7831196906494534, "grad_norm": 0.960254045000249, "learning_rate": 4.795506319200678e-06, "loss": 0.4149, "step": 12860 }, { "epoch": 0.7831805864263314, "grad_norm": 0.8908554427662069, "learning_rate": 4.7954747100168196e-06, "loss": 0.4121, "step": 12861 }, { "epoch": 0.7832414822032092, "grad_norm": 1.0403735590071252, "learning_rate": 4.79544309849438e-06, "loss": 0.4083, "step": 12862 }, { "epoch": 0.7833023779800871, "grad_norm": 0.9682667589307011, "learning_rate": 4.79541148463339e-06, "loss": 0.4196, "step": 12863 }, { "epoch": 0.7833632737569649, "grad_norm": 1.0762041864462106, "learning_rate": 4.795379868433883e-06, "loss": 0.4922, "step": 12864 }, { "epoch": 0.7834241695338429, "grad_norm": 1.0052463697169283, "learning_rate": 4.79534824989589e-06, "loss": 0.4284, "step": 12865 }, { "epoch": 0.7834850653107207, "grad_norm": 0.9671273385438368, "learning_rate": 4.795316629019445e-06, "loss": 0.4236, "step": 12866 }, { "epoch": 0.7835459610875986, "grad_norm": 1.0605437169202174, "learning_rate": 4.795285005804578e-06, "loss": 0.4144, "step": 12867 }, { "epoch": 0.7836068568644764, "grad_norm": 1.0172343713292156, "learning_rate": 4.7952533802513235e-06, "loss": 0.5292, "step": 12868 }, { "epoch": 0.7836677526413544, "grad_norm": 1.046418582600929, "learning_rate": 4.795221752359712e-06, "loss": 0.431, "step": 12869 }, { "epoch": 0.7837286484182322, "grad_norm": 1.0472635287720442, "learning_rate": 4.795190122129777e-06, "loss": 0.4572, "step": 12870 }, { "epoch": 0.78378954419511, "grad_norm": 0.9697705074797274, "learning_rate": 4.795158489561549e-06, "loss": 0.3888, "step": 12871 }, { "epoch": 0.7838504399719879, "grad_norm": 1.1708838348029407, "learning_rate": 4.795126854655062e-06, "loss": 0.3677, "step": 12872 }, { "epoch": 0.7839113357488658, "grad_norm": 0.9317513153445716, "learning_rate": 4.795095217410347e-06, "loss": 0.4595, "step": 12873 }, { "epoch": 0.7839722315257437, "grad_norm": 0.8934882485294031, "learning_rate": 4.795063577827437e-06, "loss": 0.4922, "step": 12874 }, { "epoch": 0.7840331273026215, "grad_norm": 0.9668294318761956, "learning_rate": 4.7950319359063635e-06, "loss": 0.4591, "step": 12875 }, { "epoch": 0.7840940230794995, "grad_norm": 0.9807857292638039, "learning_rate": 4.7950002916471596e-06, "loss": 0.4688, "step": 12876 }, { "epoch": 0.7841549188563773, "grad_norm": 1.0218684783875596, "learning_rate": 4.794968645049857e-06, "loss": 0.4243, "step": 12877 }, { "epoch": 0.7842158146332552, "grad_norm": 1.035569391817696, "learning_rate": 4.794936996114488e-06, "loss": 0.4697, "step": 12878 }, { "epoch": 0.784276710410133, "grad_norm": 1.0202629195924544, "learning_rate": 4.794905344841085e-06, "loss": 0.3718, "step": 12879 }, { "epoch": 0.784337606187011, "grad_norm": 0.9641352167077727, "learning_rate": 4.79487369122968e-06, "loss": 0.403, "step": 12880 }, { "epoch": 0.7843985019638888, "grad_norm": 1.0097900273810863, "learning_rate": 4.794842035280305e-06, "loss": 0.3686, "step": 12881 }, { "epoch": 0.7844593977407667, "grad_norm": 0.9526534981588575, "learning_rate": 4.7948103769929934e-06, "loss": 0.4754, "step": 12882 }, { "epoch": 0.7845202935176445, "grad_norm": 0.9187082128188052, "learning_rate": 4.794778716367776e-06, "loss": 0.4487, "step": 12883 }, { "epoch": 0.7845811892945225, "grad_norm": 1.0393354104164108, "learning_rate": 4.794747053404686e-06, "loss": 0.3953, "step": 12884 }, { "epoch": 0.7846420850714003, "grad_norm": 1.0615278812486624, "learning_rate": 4.794715388103756e-06, "loss": 0.3574, "step": 12885 }, { "epoch": 0.7847029808482782, "grad_norm": 1.0244687375575712, "learning_rate": 4.794683720465016e-06, "loss": 0.3851, "step": 12886 }, { "epoch": 0.784763876625156, "grad_norm": 1.02126760696053, "learning_rate": 4.794652050488502e-06, "loss": 0.3692, "step": 12887 }, { "epoch": 0.784824772402034, "grad_norm": 0.9975872930506405, "learning_rate": 4.794620378174244e-06, "loss": 0.3628, "step": 12888 }, { "epoch": 0.7848856681789118, "grad_norm": 1.0227525552844992, "learning_rate": 4.794588703522273e-06, "loss": 0.415, "step": 12889 }, { "epoch": 0.7849465639557897, "grad_norm": 1.0661786251946384, "learning_rate": 4.794557026532623e-06, "loss": 0.4329, "step": 12890 }, { "epoch": 0.7850074597326675, "grad_norm": 0.9999104409745524, "learning_rate": 4.794525347205328e-06, "loss": 0.3489, "step": 12891 }, { "epoch": 0.7850683555095455, "grad_norm": 0.9959467499594046, "learning_rate": 4.794493665540416e-06, "loss": 0.4317, "step": 12892 }, { "epoch": 0.7851292512864233, "grad_norm": 1.0712144039766136, "learning_rate": 4.794461981537922e-06, "loss": 0.4133, "step": 12893 }, { "epoch": 0.7851901470633011, "grad_norm": 1.024845974381754, "learning_rate": 4.7944302951978784e-06, "loss": 0.4104, "step": 12894 }, { "epoch": 0.785251042840179, "grad_norm": 1.1174941958684097, "learning_rate": 4.7943986065203175e-06, "loss": 0.4139, "step": 12895 }, { "epoch": 0.785311938617057, "grad_norm": 1.0433295013726978, "learning_rate": 4.794366915505269e-06, "loss": 0.3756, "step": 12896 }, { "epoch": 0.7853728343939348, "grad_norm": 1.0671084280466054, "learning_rate": 4.794335222152769e-06, "loss": 0.4224, "step": 12897 }, { "epoch": 0.7854337301708126, "grad_norm": 1.0146339589688236, "learning_rate": 4.794303526462848e-06, "loss": 0.4337, "step": 12898 }, { "epoch": 0.7854946259476905, "grad_norm": 1.0043904314311702, "learning_rate": 4.7942718284355374e-06, "loss": 0.4037, "step": 12899 }, { "epoch": 0.7855555217245684, "grad_norm": 0.9763805511500581, "learning_rate": 4.794240128070871e-06, "loss": 0.4666, "step": 12900 }, { "epoch": 0.7856164175014463, "grad_norm": 1.043597788929724, "learning_rate": 4.79420842536888e-06, "loss": 0.3377, "step": 12901 }, { "epoch": 0.7856773132783241, "grad_norm": 0.9469370781444413, "learning_rate": 4.7941767203295975e-06, "loss": 0.4803, "step": 12902 }, { "epoch": 0.785738209055202, "grad_norm": 0.9023152198746117, "learning_rate": 4.794145012953055e-06, "loss": 0.397, "step": 12903 }, { "epoch": 0.7857991048320799, "grad_norm": 1.0756775644350782, "learning_rate": 4.794113303239285e-06, "loss": 0.3717, "step": 12904 }, { "epoch": 0.7858600006089578, "grad_norm": 0.9856236979887598, "learning_rate": 4.794081591188321e-06, "loss": 0.4051, "step": 12905 }, { "epoch": 0.7859208963858356, "grad_norm": 1.0240854542560522, "learning_rate": 4.794049876800194e-06, "loss": 0.3792, "step": 12906 }, { "epoch": 0.7859817921627135, "grad_norm": 0.90482093139294, "learning_rate": 4.794018160074936e-06, "loss": 0.456, "step": 12907 }, { "epoch": 0.7860426879395914, "grad_norm": 1.0275846567735452, "learning_rate": 4.7939864410125806e-06, "loss": 0.3666, "step": 12908 }, { "epoch": 0.7861035837164693, "grad_norm": 0.9790047215100995, "learning_rate": 4.7939547196131595e-06, "loss": 0.486, "step": 12909 }, { "epoch": 0.7861644794933471, "grad_norm": 0.9349329648029272, "learning_rate": 4.793922995876705e-06, "loss": 0.4521, "step": 12910 }, { "epoch": 0.786225375270225, "grad_norm": 1.0595566833926624, "learning_rate": 4.793891269803249e-06, "loss": 0.4006, "step": 12911 }, { "epoch": 0.7862862710471029, "grad_norm": 1.0102711795974932, "learning_rate": 4.7938595413928256e-06, "loss": 0.3875, "step": 12912 }, { "epoch": 0.7863471668239808, "grad_norm": 0.9793263316914373, "learning_rate": 4.793827810645465e-06, "loss": 0.4518, "step": 12913 }, { "epoch": 0.7864080626008586, "grad_norm": 0.9977961732985887, "learning_rate": 4.793796077561201e-06, "loss": 0.4989, "step": 12914 }, { "epoch": 0.7864689583777364, "grad_norm": 1.0212263194847488, "learning_rate": 4.793764342140064e-06, "loss": 0.3948, "step": 12915 }, { "epoch": 0.7865298541546144, "grad_norm": 0.9484044299583645, "learning_rate": 4.793732604382088e-06, "loss": 0.3921, "step": 12916 }, { "epoch": 0.7865907499314923, "grad_norm": 1.079220016404707, "learning_rate": 4.793700864287304e-06, "loss": 0.3788, "step": 12917 }, { "epoch": 0.7866516457083701, "grad_norm": 1.0248970380977322, "learning_rate": 4.793669121855746e-06, "loss": 0.4033, "step": 12918 }, { "epoch": 0.786712541485248, "grad_norm": 1.003521515433648, "learning_rate": 4.7936373770874465e-06, "loss": 0.443, "step": 12919 }, { "epoch": 0.7867734372621259, "grad_norm": 1.0279651597677282, "learning_rate": 4.793605629982436e-06, "loss": 0.471, "step": 12920 }, { "epoch": 0.7868343330390037, "grad_norm": 1.1712135414067317, "learning_rate": 4.793573880540749e-06, "loss": 0.3938, "step": 12921 }, { "epoch": 0.7868952288158816, "grad_norm": 0.9909624343055037, "learning_rate": 4.7935421287624155e-06, "loss": 0.433, "step": 12922 }, { "epoch": 0.7869561245927595, "grad_norm": 1.0146791431709279, "learning_rate": 4.793510374647468e-06, "loss": 0.4126, "step": 12923 }, { "epoch": 0.7870170203696374, "grad_norm": 0.9128743907626945, "learning_rate": 4.793478618195942e-06, "loss": 0.4648, "step": 12924 }, { "epoch": 0.7870779161465152, "grad_norm": 0.9661176043331805, "learning_rate": 4.793446859407865e-06, "loss": 0.4041, "step": 12925 }, { "epoch": 0.7871388119233931, "grad_norm": 0.9569190082308467, "learning_rate": 4.793415098283274e-06, "loss": 0.495, "step": 12926 }, { "epoch": 0.787199707700271, "grad_norm": 0.9721736829401232, "learning_rate": 4.793383334822199e-06, "loss": 0.4069, "step": 12927 }, { "epoch": 0.7872606034771489, "grad_norm": 0.9842871127431253, "learning_rate": 4.793351569024673e-06, "loss": 0.4168, "step": 12928 }, { "epoch": 0.7873214992540267, "grad_norm": 1.0142372323654951, "learning_rate": 4.793319800890728e-06, "loss": 0.4164, "step": 12929 }, { "epoch": 0.7873823950309046, "grad_norm": 0.976551636973649, "learning_rate": 4.7932880304203955e-06, "loss": 0.4351, "step": 12930 }, { "epoch": 0.7874432908077825, "grad_norm": 0.9868929348986406, "learning_rate": 4.79325625761371e-06, "loss": 0.4771, "step": 12931 }, { "epoch": 0.7875041865846604, "grad_norm": 1.0053471979684898, "learning_rate": 4.793224482470702e-06, "loss": 0.3778, "step": 12932 }, { "epoch": 0.7875650823615382, "grad_norm": 1.1122634747682087, "learning_rate": 4.793192704991405e-06, "loss": 0.4097, "step": 12933 }, { "epoch": 0.7876259781384161, "grad_norm": 1.0642560835353265, "learning_rate": 4.793160925175851e-06, "loss": 0.3846, "step": 12934 }, { "epoch": 0.787686873915294, "grad_norm": 0.8879993061993314, "learning_rate": 4.793129143024072e-06, "loss": 0.4427, "step": 12935 }, { "epoch": 0.7877477696921719, "grad_norm": 1.0025889164828852, "learning_rate": 4.7930973585361005e-06, "loss": 0.4488, "step": 12936 }, { "epoch": 0.7878086654690497, "grad_norm": 1.0101154277323274, "learning_rate": 4.79306557171197e-06, "loss": 0.4804, "step": 12937 }, { "epoch": 0.7878695612459276, "grad_norm": 1.047500190821884, "learning_rate": 4.793033782551711e-06, "loss": 0.4399, "step": 12938 }, { "epoch": 0.7879304570228055, "grad_norm": 1.0269307899497024, "learning_rate": 4.793001991055357e-06, "loss": 0.4256, "step": 12939 }, { "epoch": 0.7879913527996834, "grad_norm": 0.9545591670488273, "learning_rate": 4.792970197222941e-06, "loss": 0.41, "step": 12940 }, { "epoch": 0.7880522485765612, "grad_norm": 1.0485973962026218, "learning_rate": 4.792938401054494e-06, "loss": 0.3504, "step": 12941 }, { "epoch": 0.788113144353439, "grad_norm": 1.0870141380010974, "learning_rate": 4.792906602550049e-06, "loss": 0.3618, "step": 12942 }, { "epoch": 0.788174040130317, "grad_norm": 1.001411248108663, "learning_rate": 4.792874801709639e-06, "loss": 0.4482, "step": 12943 }, { "epoch": 0.7882349359071948, "grad_norm": 1.0430088162832554, "learning_rate": 4.792842998533295e-06, "loss": 0.3903, "step": 12944 }, { "epoch": 0.7882958316840727, "grad_norm": 0.9707251520224953, "learning_rate": 4.792811193021051e-06, "loss": 0.4317, "step": 12945 }, { "epoch": 0.7883567274609505, "grad_norm": 0.9243960780233895, "learning_rate": 4.792779385172938e-06, "loss": 0.4178, "step": 12946 }, { "epoch": 0.7884176232378285, "grad_norm": 1.035810671681013, "learning_rate": 4.79274757498899e-06, "loss": 0.3801, "step": 12947 }, { "epoch": 0.7884785190147063, "grad_norm": 0.9718994568770585, "learning_rate": 4.792715762469237e-06, "loss": 0.3779, "step": 12948 }, { "epoch": 0.7885394147915842, "grad_norm": 1.0232930783610976, "learning_rate": 4.792683947613714e-06, "loss": 0.387, "step": 12949 }, { "epoch": 0.788600310568462, "grad_norm": 1.067041498837982, "learning_rate": 4.792652130422451e-06, "loss": 0.3583, "step": 12950 }, { "epoch": 0.78866120634534, "grad_norm": 0.9392653931822716, "learning_rate": 4.792620310895483e-06, "loss": 0.4559, "step": 12951 }, { "epoch": 0.7887221021222178, "grad_norm": 0.962813022245222, "learning_rate": 4.792588489032841e-06, "loss": 0.4206, "step": 12952 }, { "epoch": 0.7887829978990957, "grad_norm": 0.9957777331645866, "learning_rate": 4.792556664834557e-06, "loss": 0.4352, "step": 12953 }, { "epoch": 0.7888438936759735, "grad_norm": 0.9509757047143039, "learning_rate": 4.792524838300664e-06, "loss": 0.3776, "step": 12954 }, { "epoch": 0.7889047894528515, "grad_norm": 0.9305695049244579, "learning_rate": 4.792493009431194e-06, "loss": 0.3923, "step": 12955 }, { "epoch": 0.7889656852297293, "grad_norm": 1.0305036633407438, "learning_rate": 4.7924611782261805e-06, "loss": 0.448, "step": 12956 }, { "epoch": 0.7890265810066072, "grad_norm": 1.0298824831735653, "learning_rate": 4.792429344685655e-06, "loss": 0.3823, "step": 12957 }, { "epoch": 0.7890874767834851, "grad_norm": 1.0154344516927205, "learning_rate": 4.79239750880965e-06, "loss": 0.4657, "step": 12958 }, { "epoch": 0.789148372560363, "grad_norm": 0.9543571037672499, "learning_rate": 4.792365670598198e-06, "loss": 0.4628, "step": 12959 }, { "epoch": 0.7892092683372408, "grad_norm": 0.980910099512124, "learning_rate": 4.792333830051332e-06, "loss": 0.4105, "step": 12960 }, { "epoch": 0.7892701641141187, "grad_norm": 0.9364774147730054, "learning_rate": 4.792301987169083e-06, "loss": 0.4441, "step": 12961 }, { "epoch": 0.7893310598909966, "grad_norm": 1.1226827275893603, "learning_rate": 4.792270141951485e-06, "loss": 0.4485, "step": 12962 }, { "epoch": 0.7893919556678745, "grad_norm": 0.9601573779105891, "learning_rate": 4.79223829439857e-06, "loss": 0.4143, "step": 12963 }, { "epoch": 0.7894528514447523, "grad_norm": 1.019212698012444, "learning_rate": 4.79220644451037e-06, "loss": 0.4586, "step": 12964 }, { "epoch": 0.7895137472216301, "grad_norm": 1.006203557818235, "learning_rate": 4.792174592286918e-06, "loss": 0.3769, "step": 12965 }, { "epoch": 0.7895746429985081, "grad_norm": 0.9884614351414648, "learning_rate": 4.792142737728246e-06, "loss": 0.4649, "step": 12966 }, { "epoch": 0.789635538775386, "grad_norm": 0.9729654139248642, "learning_rate": 4.792110880834387e-06, "loss": 0.4393, "step": 12967 }, { "epoch": 0.7896964345522638, "grad_norm": 0.9967765611543823, "learning_rate": 4.792079021605372e-06, "loss": 0.3891, "step": 12968 }, { "epoch": 0.7897573303291416, "grad_norm": 1.0875606968053437, "learning_rate": 4.792047160041235e-06, "loss": 0.4308, "step": 12969 }, { "epoch": 0.7898182261060196, "grad_norm": 1.0595879822949106, "learning_rate": 4.792015296142009e-06, "loss": 0.3743, "step": 12970 }, { "epoch": 0.7898791218828974, "grad_norm": 0.9932196198479512, "learning_rate": 4.791983429907725e-06, "loss": 0.4202, "step": 12971 }, { "epoch": 0.7899400176597753, "grad_norm": 0.9845329085425347, "learning_rate": 4.791951561338415e-06, "loss": 0.4585, "step": 12972 }, { "epoch": 0.7900009134366531, "grad_norm": 1.0410751584425433, "learning_rate": 4.7919196904341136e-06, "loss": 0.4473, "step": 12973 }, { "epoch": 0.7900618092135311, "grad_norm": 0.9600949168363881, "learning_rate": 4.7918878171948514e-06, "loss": 0.3744, "step": 12974 }, { "epoch": 0.7901227049904089, "grad_norm": 1.1049195473597426, "learning_rate": 4.791855941620662e-06, "loss": 0.3794, "step": 12975 }, { "epoch": 0.7901836007672868, "grad_norm": 0.9663529991742598, "learning_rate": 4.7918240637115764e-06, "loss": 0.4141, "step": 12976 }, { "epoch": 0.7902444965441646, "grad_norm": 1.0141659910076672, "learning_rate": 4.79179218346763e-06, "loss": 0.3659, "step": 12977 }, { "epoch": 0.7903053923210426, "grad_norm": 0.9168498620228736, "learning_rate": 4.791760300888851e-06, "loss": 0.4252, "step": 12978 }, { "epoch": 0.7903662880979204, "grad_norm": 1.0009224533190306, "learning_rate": 4.791728415975276e-06, "loss": 0.4053, "step": 12979 }, { "epoch": 0.7904271838747983, "grad_norm": 1.0202099817613546, "learning_rate": 4.791696528726934e-06, "loss": 0.3653, "step": 12980 }, { "epoch": 0.7904880796516761, "grad_norm": 0.9507590863719507, "learning_rate": 4.791664639143861e-06, "loss": 0.4585, "step": 12981 }, { "epoch": 0.7905489754285541, "grad_norm": 0.9789463604589671, "learning_rate": 4.791632747226087e-06, "loss": 0.3785, "step": 12982 }, { "epoch": 0.7906098712054319, "grad_norm": 1.0679895507083395, "learning_rate": 4.791600852973646e-06, "loss": 0.4377, "step": 12983 }, { "epoch": 0.7906707669823098, "grad_norm": 1.0234547031990031, "learning_rate": 4.7915689563865685e-06, "loss": 0.3704, "step": 12984 }, { "epoch": 0.7907316627591876, "grad_norm": 1.050602079170671, "learning_rate": 4.791537057464889e-06, "loss": 0.3606, "step": 12985 }, { "epoch": 0.7907925585360656, "grad_norm": 0.9770070592912589, "learning_rate": 4.791505156208639e-06, "loss": 0.3879, "step": 12986 }, { "epoch": 0.7908534543129434, "grad_norm": 1.1364992292863207, "learning_rate": 4.791473252617851e-06, "loss": 0.4281, "step": 12987 }, { "epoch": 0.7909143500898212, "grad_norm": 0.9541622588555151, "learning_rate": 4.791441346692558e-06, "loss": 0.4289, "step": 12988 }, { "epoch": 0.7909752458666991, "grad_norm": 0.93012834413104, "learning_rate": 4.791409438432792e-06, "loss": 0.3873, "step": 12989 }, { "epoch": 0.791036141643577, "grad_norm": 1.0367374683961261, "learning_rate": 4.791377527838585e-06, "loss": 0.4178, "step": 12990 }, { "epoch": 0.7910970374204549, "grad_norm": 0.9982302857299669, "learning_rate": 4.7913456149099716e-06, "loss": 0.4107, "step": 12991 }, { "epoch": 0.7911579331973327, "grad_norm": 1.0081698547556646, "learning_rate": 4.7913136996469816e-06, "loss": 0.4402, "step": 12992 }, { "epoch": 0.7912188289742106, "grad_norm": 0.9684391885631602, "learning_rate": 4.791281782049649e-06, "loss": 0.4642, "step": 12993 }, { "epoch": 0.7912797247510885, "grad_norm": 0.96605732957712, "learning_rate": 4.791249862118007e-06, "loss": 0.4477, "step": 12994 }, { "epoch": 0.7913406205279664, "grad_norm": 0.9557348308050183, "learning_rate": 4.791217939852087e-06, "loss": 0.396, "step": 12995 }, { "epoch": 0.7914015163048442, "grad_norm": 0.9325004898798374, "learning_rate": 4.791186015251922e-06, "loss": 0.5007, "step": 12996 }, { "epoch": 0.7914624120817221, "grad_norm": 1.0137076872093322, "learning_rate": 4.791154088317544e-06, "loss": 0.4922, "step": 12997 }, { "epoch": 0.7915233078586, "grad_norm": 0.9829883808320226, "learning_rate": 4.791122159048985e-06, "loss": 0.4674, "step": 12998 }, { "epoch": 0.7915842036354779, "grad_norm": 0.9178965403757321, "learning_rate": 4.7910902274462805e-06, "loss": 0.3896, "step": 12999 }, { "epoch": 0.7916450994123557, "grad_norm": 1.0228572300230716, "learning_rate": 4.791058293509458e-06, "loss": 0.4255, "step": 13000 }, { "epoch": 0.7917059951892337, "grad_norm": 1.009407054575552, "learning_rate": 4.791026357238555e-06, "loss": 0.4877, "step": 13001 }, { "epoch": 0.7917668909661115, "grad_norm": 0.9814496071182438, "learning_rate": 4.790994418633602e-06, "loss": 0.3974, "step": 13002 }, { "epoch": 0.7918277867429894, "grad_norm": 0.8973295515826255, "learning_rate": 4.790962477694631e-06, "loss": 0.4529, "step": 13003 }, { "epoch": 0.7918886825198672, "grad_norm": 1.0285638351728583, "learning_rate": 4.790930534421675e-06, "loss": 0.4055, "step": 13004 }, { "epoch": 0.7919495782967452, "grad_norm": 0.8820718770651046, "learning_rate": 4.790898588814767e-06, "loss": 0.4414, "step": 13005 }, { "epoch": 0.792010474073623, "grad_norm": 1.0358980705997967, "learning_rate": 4.790866640873939e-06, "loss": 0.4491, "step": 13006 }, { "epoch": 0.7920713698505009, "grad_norm": 0.9969429834778242, "learning_rate": 4.7908346905992235e-06, "loss": 0.4377, "step": 13007 }, { "epoch": 0.7921322656273787, "grad_norm": 0.9254358145899755, "learning_rate": 4.790802737990653e-06, "loss": 0.4427, "step": 13008 }, { "epoch": 0.7921931614042567, "grad_norm": 0.9610759806533696, "learning_rate": 4.790770783048261e-06, "loss": 0.3757, "step": 13009 }, { "epoch": 0.7922540571811345, "grad_norm": 1.011351952240975, "learning_rate": 4.790738825772078e-06, "loss": 0.3938, "step": 13010 }, { "epoch": 0.7923149529580124, "grad_norm": 0.9329506590712717, "learning_rate": 4.79070686616214e-06, "loss": 0.3963, "step": 13011 }, { "epoch": 0.7923758487348902, "grad_norm": 1.0167837272495637, "learning_rate": 4.790674904218476e-06, "loss": 0.4443, "step": 13012 }, { "epoch": 0.7924367445117682, "grad_norm": 1.048388910997831, "learning_rate": 4.79064293994112e-06, "loss": 0.4519, "step": 13013 }, { "epoch": 0.792497640288646, "grad_norm": 1.0354324893134752, "learning_rate": 4.790610973330105e-06, "loss": 0.3992, "step": 13014 }, { "epoch": 0.7925585360655238, "grad_norm": 0.9509923127733703, "learning_rate": 4.7905790043854635e-06, "loss": 0.4302, "step": 13015 }, { "epoch": 0.7926194318424017, "grad_norm": 1.004326030787231, "learning_rate": 4.7905470331072276e-06, "loss": 0.409, "step": 13016 }, { "epoch": 0.7926803276192796, "grad_norm": 0.9121881396996515, "learning_rate": 4.790515059495429e-06, "loss": 0.4141, "step": 13017 }, { "epoch": 0.7927412233961575, "grad_norm": 0.9695677152812116, "learning_rate": 4.790483083550103e-06, "loss": 0.4755, "step": 13018 }, { "epoch": 0.7928021191730353, "grad_norm": 1.0345423776736118, "learning_rate": 4.7904511052712785e-06, "loss": 0.4256, "step": 13019 }, { "epoch": 0.7928630149499132, "grad_norm": 0.9949393864771411, "learning_rate": 4.790419124658992e-06, "loss": 0.4441, "step": 13020 }, { "epoch": 0.7929239107267911, "grad_norm": 0.9907856921565877, "learning_rate": 4.790387141713273e-06, "loss": 0.3298, "step": 13021 }, { "epoch": 0.792984806503669, "grad_norm": 1.036254576734454, "learning_rate": 4.7903551564341555e-06, "loss": 0.4327, "step": 13022 }, { "epoch": 0.7930457022805468, "grad_norm": 1.0290755745187108, "learning_rate": 4.790323168821671e-06, "loss": 0.415, "step": 13023 }, { "epoch": 0.7931065980574247, "grad_norm": 1.0271117682464672, "learning_rate": 4.790291178875854e-06, "loss": 0.4182, "step": 13024 }, { "epoch": 0.7931674938343026, "grad_norm": 1.0023448838707445, "learning_rate": 4.790259186596735e-06, "loss": 0.4337, "step": 13025 }, { "epoch": 0.7932283896111805, "grad_norm": 0.9688865274127839, "learning_rate": 4.7902271919843475e-06, "loss": 0.3481, "step": 13026 }, { "epoch": 0.7932892853880583, "grad_norm": 1.0520334712998705, "learning_rate": 4.7901951950387254e-06, "loss": 0.4009, "step": 13027 }, { "epoch": 0.7933501811649362, "grad_norm": 0.9725116093670895, "learning_rate": 4.790163195759899e-06, "loss": 0.3676, "step": 13028 }, { "epoch": 0.7934110769418141, "grad_norm": 1.0071774039855832, "learning_rate": 4.7901311941479025e-06, "loss": 0.4932, "step": 13029 }, { "epoch": 0.793471972718692, "grad_norm": 1.0825275247095951, "learning_rate": 4.790099190202769e-06, "loss": 0.4319, "step": 13030 }, { "epoch": 0.7935328684955698, "grad_norm": 1.0043533817135768, "learning_rate": 4.790067183924528e-06, "loss": 0.4328, "step": 13031 }, { "epoch": 0.7935937642724477, "grad_norm": 0.9989818785241754, "learning_rate": 4.790035175313214e-06, "loss": 0.5233, "step": 13032 }, { "epoch": 0.7936546600493256, "grad_norm": 0.997167443411966, "learning_rate": 4.790003164368862e-06, "loss": 0.3864, "step": 13033 }, { "epoch": 0.7937155558262035, "grad_norm": 1.0804682103670809, "learning_rate": 4.789971151091501e-06, "loss": 0.3764, "step": 13034 }, { "epoch": 0.7937764516030813, "grad_norm": 1.0715017546611214, "learning_rate": 4.7899391354811645e-06, "loss": 0.3872, "step": 13035 }, { "epoch": 0.7938373473799591, "grad_norm": 1.0285945431551002, "learning_rate": 4.789907117537886e-06, "loss": 0.4723, "step": 13036 }, { "epoch": 0.7938982431568371, "grad_norm": 1.0685093860561565, "learning_rate": 4.789875097261698e-06, "loss": 0.3748, "step": 13037 }, { "epoch": 0.793959138933715, "grad_norm": 0.9664246837128945, "learning_rate": 4.7898430746526334e-06, "loss": 0.4538, "step": 13038 }, { "epoch": 0.7940200347105928, "grad_norm": 1.0148388744532721, "learning_rate": 4.789811049710723e-06, "loss": 0.3897, "step": 13039 }, { "epoch": 0.7940809304874707, "grad_norm": 1.0682527107350723, "learning_rate": 4.7897790224360015e-06, "loss": 0.4373, "step": 13040 }, { "epoch": 0.7941418262643486, "grad_norm": 0.9731773136693981, "learning_rate": 4.789746992828501e-06, "loss": 0.4643, "step": 13041 }, { "epoch": 0.7942027220412264, "grad_norm": 0.9604591439389215, "learning_rate": 4.789714960888254e-06, "loss": 0.3885, "step": 13042 }, { "epoch": 0.7942636178181043, "grad_norm": 0.971043339411984, "learning_rate": 4.7896829266152925e-06, "loss": 0.4356, "step": 13043 }, { "epoch": 0.7943245135949822, "grad_norm": 1.0579292347069968, "learning_rate": 4.78965089000965e-06, "loss": 0.3872, "step": 13044 }, { "epoch": 0.7943854093718601, "grad_norm": 0.961775938108673, "learning_rate": 4.789618851071358e-06, "loss": 0.4246, "step": 13045 }, { "epoch": 0.7944463051487379, "grad_norm": 0.9299690165198281, "learning_rate": 4.78958680980045e-06, "loss": 0.4312, "step": 13046 }, { "epoch": 0.7945072009256158, "grad_norm": 1.0116203314883903, "learning_rate": 4.789554766196959e-06, "loss": 0.3681, "step": 13047 }, { "epoch": 0.7945680967024937, "grad_norm": 0.9907736008666257, "learning_rate": 4.789522720260917e-06, "loss": 0.4616, "step": 13048 }, { "epoch": 0.7946289924793716, "grad_norm": 0.9299679194617333, "learning_rate": 4.789490671992357e-06, "loss": 0.4851, "step": 13049 }, { "epoch": 0.7946898882562494, "grad_norm": 1.1052068546332339, "learning_rate": 4.789458621391312e-06, "loss": 0.3549, "step": 13050 }, { "epoch": 0.7947507840331273, "grad_norm": 0.9709547401214411, "learning_rate": 4.789426568457814e-06, "loss": 0.4637, "step": 13051 }, { "epoch": 0.7948116798100052, "grad_norm": 0.9688248764491729, "learning_rate": 4.7893945131918955e-06, "loss": 0.3901, "step": 13052 }, { "epoch": 0.7948725755868831, "grad_norm": 1.0603614333799358, "learning_rate": 4.789362455593589e-06, "loss": 0.4013, "step": 13053 }, { "epoch": 0.7949334713637609, "grad_norm": 1.0183677425435957, "learning_rate": 4.789330395662928e-06, "loss": 0.4052, "step": 13054 }, { "epoch": 0.7949943671406388, "grad_norm": 1.0373103004736366, "learning_rate": 4.789298333399944e-06, "loss": 0.4768, "step": 13055 }, { "epoch": 0.7950552629175167, "grad_norm": 1.0404691089932336, "learning_rate": 4.789266268804672e-06, "loss": 0.405, "step": 13056 }, { "epoch": 0.7951161586943946, "grad_norm": 1.0428080263540251, "learning_rate": 4.789234201877142e-06, "loss": 0.4269, "step": 13057 }, { "epoch": 0.7951770544712724, "grad_norm": 0.9135755333256684, "learning_rate": 4.789202132617389e-06, "loss": 0.455, "step": 13058 }, { "epoch": 0.7952379502481502, "grad_norm": 0.9463712228879706, "learning_rate": 4.7891700610254436e-06, "loss": 0.4437, "step": 13059 }, { "epoch": 0.7952988460250282, "grad_norm": 0.9868840898407625, "learning_rate": 4.7891379871013385e-06, "loss": 0.4667, "step": 13060 }, { "epoch": 0.795359741801906, "grad_norm": 1.0553519247348429, "learning_rate": 4.7891059108451086e-06, "loss": 0.3529, "step": 13061 }, { "epoch": 0.7954206375787839, "grad_norm": 0.9630657984397558, "learning_rate": 4.789073832256784e-06, "loss": 0.4402, "step": 13062 }, { "epoch": 0.7954815333556617, "grad_norm": 0.9743070887142377, "learning_rate": 4.7890417513364e-06, "loss": 0.419, "step": 13063 }, { "epoch": 0.7955424291325397, "grad_norm": 1.037846392345482, "learning_rate": 4.789009668083987e-06, "loss": 0.4177, "step": 13064 }, { "epoch": 0.7956033249094175, "grad_norm": 0.9467664387682367, "learning_rate": 4.788977582499579e-06, "loss": 0.3897, "step": 13065 }, { "epoch": 0.7956642206862954, "grad_norm": 0.9538212930844834, "learning_rate": 4.788945494583208e-06, "loss": 0.4367, "step": 13066 }, { "epoch": 0.7957251164631732, "grad_norm": 0.9878064305130954, "learning_rate": 4.788913404334906e-06, "loss": 0.4484, "step": 13067 }, { "epoch": 0.7957860122400512, "grad_norm": 0.9257777221996661, "learning_rate": 4.788881311754708e-06, "loss": 0.4446, "step": 13068 }, { "epoch": 0.795846908016929, "grad_norm": 1.0066298026361347, "learning_rate": 4.788849216842645e-06, "loss": 0.3536, "step": 13069 }, { "epoch": 0.7959078037938069, "grad_norm": 1.0226652830231007, "learning_rate": 4.7888171195987495e-06, "loss": 0.4053, "step": 13070 }, { "epoch": 0.7959686995706847, "grad_norm": 1.15987851236605, "learning_rate": 4.788785020023055e-06, "loss": 0.4601, "step": 13071 }, { "epoch": 0.7960295953475627, "grad_norm": 1.0421085600834548, "learning_rate": 4.788752918115593e-06, "loss": 0.4281, "step": 13072 }, { "epoch": 0.7960904911244405, "grad_norm": 1.0799787846164688, "learning_rate": 4.788720813876398e-06, "loss": 0.3772, "step": 13073 }, { "epoch": 0.7961513869013184, "grad_norm": 0.9713244906903589, "learning_rate": 4.788688707305502e-06, "loss": 0.4314, "step": 13074 }, { "epoch": 0.7962122826781962, "grad_norm": 1.031149270701065, "learning_rate": 4.788656598402938e-06, "loss": 0.3898, "step": 13075 }, { "epoch": 0.7962731784550742, "grad_norm": 1.0643551819181785, "learning_rate": 4.788624487168737e-06, "loss": 0.4196, "step": 13076 }, { "epoch": 0.796334074231952, "grad_norm": 1.0738579331743787, "learning_rate": 4.788592373602934e-06, "loss": 0.5126, "step": 13077 }, { "epoch": 0.7963949700088299, "grad_norm": 1.0640777484390231, "learning_rate": 4.78856025770556e-06, "loss": 0.459, "step": 13078 }, { "epoch": 0.7964558657857077, "grad_norm": 1.0519501991158213, "learning_rate": 4.788528139476648e-06, "loss": 0.4415, "step": 13079 }, { "epoch": 0.7965167615625857, "grad_norm": 0.9803701917145277, "learning_rate": 4.788496018916232e-06, "loss": 0.4699, "step": 13080 }, { "epoch": 0.7965776573394635, "grad_norm": 0.9649900167282086, "learning_rate": 4.788463896024343e-06, "loss": 0.4743, "step": 13081 }, { "epoch": 0.7966385531163414, "grad_norm": 0.9985399065563603, "learning_rate": 4.788431770801015e-06, "loss": 0.428, "step": 13082 }, { "epoch": 0.7966994488932193, "grad_norm": 1.0188167394479217, "learning_rate": 4.788399643246281e-06, "loss": 0.3607, "step": 13083 }, { "epoch": 0.7967603446700972, "grad_norm": 0.9634332959447907, "learning_rate": 4.788367513360173e-06, "loss": 0.4438, "step": 13084 }, { "epoch": 0.796821240446975, "grad_norm": 0.9554391045761035, "learning_rate": 4.788335381142722e-06, "loss": 0.4863, "step": 13085 }, { "epoch": 0.7968821362238528, "grad_norm": 0.9298080591578196, "learning_rate": 4.788303246593964e-06, "loss": 0.4665, "step": 13086 }, { "epoch": 0.7969430320007308, "grad_norm": 0.9586546112872101, "learning_rate": 4.78827110971393e-06, "loss": 0.4248, "step": 13087 }, { "epoch": 0.7970039277776086, "grad_norm": 0.9268489670341614, "learning_rate": 4.788238970502653e-06, "loss": 0.489, "step": 13088 }, { "epoch": 0.7970648235544865, "grad_norm": 1.0428930833748025, "learning_rate": 4.788206828960165e-06, "loss": 0.4115, "step": 13089 }, { "epoch": 0.7971257193313643, "grad_norm": 1.002735533007527, "learning_rate": 4.7881746850865e-06, "loss": 0.412, "step": 13090 }, { "epoch": 0.7971866151082423, "grad_norm": 1.0280004152563724, "learning_rate": 4.78814253888169e-06, "loss": 0.3263, "step": 13091 }, { "epoch": 0.7972475108851201, "grad_norm": 0.9606524861522716, "learning_rate": 4.788110390345768e-06, "loss": 0.4384, "step": 13092 }, { "epoch": 0.797308406661998, "grad_norm": 1.147689259449308, "learning_rate": 4.788078239478767e-06, "loss": 0.3664, "step": 13093 }, { "epoch": 0.7973693024388758, "grad_norm": 1.0411402759216755, "learning_rate": 4.78804608628072e-06, "loss": 0.5126, "step": 13094 }, { "epoch": 0.7974301982157538, "grad_norm": 1.013949702319973, "learning_rate": 4.788013930751658e-06, "loss": 0.3918, "step": 13095 }, { "epoch": 0.7974910939926316, "grad_norm": 1.070775196350441, "learning_rate": 4.787981772891616e-06, "loss": 0.4077, "step": 13096 }, { "epoch": 0.7975519897695095, "grad_norm": 0.9664539862067882, "learning_rate": 4.787949612700625e-06, "loss": 0.4663, "step": 13097 }, { "epoch": 0.7976128855463873, "grad_norm": 0.9852753468675022, "learning_rate": 4.787917450178719e-06, "loss": 0.5049, "step": 13098 }, { "epoch": 0.7976737813232653, "grad_norm": 1.035198255646841, "learning_rate": 4.787885285325929e-06, "loss": 0.4117, "step": 13099 }, { "epoch": 0.7977346771001431, "grad_norm": 0.9666567580274309, "learning_rate": 4.7878531181422904e-06, "loss": 0.4344, "step": 13100 }, { "epoch": 0.797795572877021, "grad_norm": 0.9860369430396799, "learning_rate": 4.787820948627835e-06, "loss": 0.3891, "step": 13101 }, { "epoch": 0.7978564686538988, "grad_norm": 1.0515559716151577, "learning_rate": 4.787788776782593e-06, "loss": 0.3332, "step": 13102 }, { "epoch": 0.7979173644307768, "grad_norm": 1.0057289988253428, "learning_rate": 4.787756602606602e-06, "loss": 0.3918, "step": 13103 }, { "epoch": 0.7979782602076546, "grad_norm": 0.9769353007783873, "learning_rate": 4.787724426099891e-06, "loss": 0.3846, "step": 13104 }, { "epoch": 0.7980391559845325, "grad_norm": 0.9847849835451379, "learning_rate": 4.787692247262494e-06, "loss": 0.4657, "step": 13105 }, { "epoch": 0.7981000517614103, "grad_norm": 1.0450465443999626, "learning_rate": 4.787660066094443e-06, "loss": 0.4297, "step": 13106 }, { "epoch": 0.7981609475382883, "grad_norm": 1.004363822832171, "learning_rate": 4.787627882595772e-06, "loss": 0.409, "step": 13107 }, { "epoch": 0.7982218433151661, "grad_norm": 1.0308675683040727, "learning_rate": 4.787595696766514e-06, "loss": 0.3849, "step": 13108 }, { "epoch": 0.798282739092044, "grad_norm": 1.0089673885086086, "learning_rate": 4.7875635086067e-06, "loss": 0.385, "step": 13109 }, { "epoch": 0.7983436348689218, "grad_norm": 1.0009221969591424, "learning_rate": 4.787531318116364e-06, "loss": 0.4481, "step": 13110 }, { "epoch": 0.7984045306457997, "grad_norm": 0.9542972859630674, "learning_rate": 4.7874991252955395e-06, "loss": 0.4509, "step": 13111 }, { "epoch": 0.7984654264226776, "grad_norm": 1.0407079061787465, "learning_rate": 4.787466930144257e-06, "loss": 0.4287, "step": 13112 }, { "epoch": 0.7985263221995554, "grad_norm": 1.0064328495814334, "learning_rate": 4.787434732662552e-06, "loss": 0.4187, "step": 13113 }, { "epoch": 0.7985872179764333, "grad_norm": 1.020611092232792, "learning_rate": 4.787402532850456e-06, "loss": 0.4285, "step": 13114 }, { "epoch": 0.7986481137533112, "grad_norm": 1.038301323327143, "learning_rate": 4.787370330708001e-06, "loss": 0.3663, "step": 13115 }, { "epoch": 0.7987090095301891, "grad_norm": 1.0586553675423438, "learning_rate": 4.787338126235221e-06, "loss": 0.3228, "step": 13116 }, { "epoch": 0.7987699053070669, "grad_norm": 1.055734465875612, "learning_rate": 4.787305919432149e-06, "loss": 0.4463, "step": 13117 }, { "epoch": 0.7988308010839448, "grad_norm": 0.9761613225274606, "learning_rate": 4.787273710298817e-06, "loss": 0.3966, "step": 13118 }, { "epoch": 0.7988916968608227, "grad_norm": 0.9749798978401041, "learning_rate": 4.7872414988352576e-06, "loss": 0.3874, "step": 13119 }, { "epoch": 0.7989525926377006, "grad_norm": 1.0177598536873331, "learning_rate": 4.787209285041505e-06, "loss": 0.4438, "step": 13120 }, { "epoch": 0.7990134884145784, "grad_norm": 0.9594037322562391, "learning_rate": 4.78717706891759e-06, "loss": 0.3971, "step": 13121 }, { "epoch": 0.7990743841914564, "grad_norm": 1.021872825230866, "learning_rate": 4.787144850463547e-06, "loss": 0.3902, "step": 13122 }, { "epoch": 0.7991352799683342, "grad_norm": 0.9764749693989682, "learning_rate": 4.787112629679409e-06, "loss": 0.3963, "step": 13123 }, { "epoch": 0.7991961757452121, "grad_norm": 1.0004199430597418, "learning_rate": 4.787080406565208e-06, "loss": 0.3943, "step": 13124 }, { "epoch": 0.7992570715220899, "grad_norm": 1.014950045816023, "learning_rate": 4.787048181120977e-06, "loss": 0.3585, "step": 13125 }, { "epoch": 0.7993179672989679, "grad_norm": 0.974569652884851, "learning_rate": 4.787015953346749e-06, "loss": 0.3991, "step": 13126 }, { "epoch": 0.7993788630758457, "grad_norm": 0.9762549881702526, "learning_rate": 4.786983723242555e-06, "loss": 0.5037, "step": 13127 }, { "epoch": 0.7994397588527236, "grad_norm": 0.9918746150894123, "learning_rate": 4.786951490808431e-06, "loss": 0.3896, "step": 13128 }, { "epoch": 0.7995006546296014, "grad_norm": 0.9807158937339595, "learning_rate": 4.7869192560444085e-06, "loss": 0.4624, "step": 13129 }, { "epoch": 0.7995615504064794, "grad_norm": 0.9846496565671631, "learning_rate": 4.78688701895052e-06, "loss": 0.442, "step": 13130 }, { "epoch": 0.7996224461833572, "grad_norm": 1.0330892024139977, "learning_rate": 4.786854779526799e-06, "loss": 0.4158, "step": 13131 }, { "epoch": 0.799683341960235, "grad_norm": 0.9984501661545877, "learning_rate": 4.786822537773277e-06, "loss": 0.467, "step": 13132 }, { "epoch": 0.7997442377371129, "grad_norm": 1.06431859783503, "learning_rate": 4.786790293689989e-06, "loss": 0.4397, "step": 13133 }, { "epoch": 0.7998051335139909, "grad_norm": 0.9933308382860222, "learning_rate": 4.786758047276965e-06, "loss": 0.4192, "step": 13134 }, { "epoch": 0.7998660292908687, "grad_norm": 1.0510869102866331, "learning_rate": 4.786725798534241e-06, "loss": 0.3632, "step": 13135 }, { "epoch": 0.7999269250677465, "grad_norm": 1.0211327568250854, "learning_rate": 4.786693547461847e-06, "loss": 0.3152, "step": 13136 }, { "epoch": 0.7999878208446244, "grad_norm": 0.9636601704004367, "learning_rate": 4.786661294059818e-06, "loss": 0.3764, "step": 13137 }, { "epoch": 0.8000487166215023, "grad_norm": 0.9865908124346997, "learning_rate": 4.786629038328185e-06, "loss": 0.3568, "step": 13138 }, { "epoch": 0.8001096123983802, "grad_norm": 0.9950150995913035, "learning_rate": 4.786596780266983e-06, "loss": 0.507, "step": 13139 }, { "epoch": 0.800170508175258, "grad_norm": 0.9724259679627666, "learning_rate": 4.7865645198762435e-06, "loss": 0.4182, "step": 13140 }, { "epoch": 0.8002314039521359, "grad_norm": 1.0124746468444845, "learning_rate": 4.786532257155999e-06, "loss": 0.3914, "step": 13141 }, { "epoch": 0.8002922997290138, "grad_norm": 0.9413555608438043, "learning_rate": 4.786499992106284e-06, "loss": 0.4542, "step": 13142 }, { "epoch": 0.8003531955058917, "grad_norm": 1.0268195342358373, "learning_rate": 4.7864677247271296e-06, "loss": 0.4532, "step": 13143 }, { "epoch": 0.8004140912827695, "grad_norm": 1.0360115008013693, "learning_rate": 4.7864354550185695e-06, "loss": 0.4152, "step": 13144 }, { "epoch": 0.8004749870596474, "grad_norm": 0.9903613344711152, "learning_rate": 4.786403182980636e-06, "loss": 0.4069, "step": 13145 }, { "epoch": 0.8005358828365253, "grad_norm": 1.094066571306657, "learning_rate": 4.786370908613363e-06, "loss": 0.4007, "step": 13146 }, { "epoch": 0.8005967786134032, "grad_norm": 0.9111457623317688, "learning_rate": 4.786338631916783e-06, "loss": 0.4904, "step": 13147 }, { "epoch": 0.800657674390281, "grad_norm": 1.0098981468622485, "learning_rate": 4.786306352890928e-06, "loss": 0.396, "step": 13148 }, { "epoch": 0.8007185701671589, "grad_norm": 1.0623615939158355, "learning_rate": 4.786274071535833e-06, "loss": 0.404, "step": 13149 }, { "epoch": 0.8007794659440368, "grad_norm": 1.0517275915812303, "learning_rate": 4.786241787851529e-06, "loss": 0.4039, "step": 13150 }, { "epoch": 0.8008403617209147, "grad_norm": 1.0827530271661898, "learning_rate": 4.786209501838049e-06, "loss": 0.4121, "step": 13151 }, { "epoch": 0.8009012574977925, "grad_norm": 0.9952051633923994, "learning_rate": 4.7861772134954264e-06, "loss": 0.3914, "step": 13152 }, { "epoch": 0.8009621532746704, "grad_norm": 1.0198755908077475, "learning_rate": 4.786144922823694e-06, "loss": 0.3779, "step": 13153 }, { "epoch": 0.8010230490515483, "grad_norm": 1.0597213731618218, "learning_rate": 4.786112629822885e-06, "loss": 0.3659, "step": 13154 }, { "epoch": 0.8010839448284262, "grad_norm": 1.029125435129854, "learning_rate": 4.7860803344930314e-06, "loss": 0.4241, "step": 13155 }, { "epoch": 0.801144840605304, "grad_norm": 1.0065133286706367, "learning_rate": 4.786048036834166e-06, "loss": 0.3944, "step": 13156 }, { "epoch": 0.8012057363821818, "grad_norm": 0.9703796821590648, "learning_rate": 4.786015736846324e-06, "loss": 0.3974, "step": 13157 }, { "epoch": 0.8012666321590598, "grad_norm": 1.0414932278207567, "learning_rate": 4.7859834345295365e-06, "loss": 0.3809, "step": 13158 }, { "epoch": 0.8013275279359376, "grad_norm": 0.9532654992074717, "learning_rate": 4.785951129883835e-06, "loss": 0.4413, "step": 13159 }, { "epoch": 0.8013884237128155, "grad_norm": 1.0368422986386985, "learning_rate": 4.785918822909256e-06, "loss": 0.4207, "step": 13160 }, { "epoch": 0.8014493194896933, "grad_norm": 0.9642913755086415, "learning_rate": 4.78588651360583e-06, "loss": 0.438, "step": 13161 }, { "epoch": 0.8015102152665713, "grad_norm": 0.9121874427600382, "learning_rate": 4.785854201973589e-06, "loss": 0.4528, "step": 13162 }, { "epoch": 0.8015711110434491, "grad_norm": 1.0109968313446436, "learning_rate": 4.785821888012568e-06, "loss": 0.4914, "step": 13163 }, { "epoch": 0.801632006820327, "grad_norm": 1.0880775800837412, "learning_rate": 4.7857895717228e-06, "loss": 0.3926, "step": 13164 }, { "epoch": 0.8016929025972049, "grad_norm": 0.9971086511914816, "learning_rate": 4.785757253104316e-06, "loss": 0.4846, "step": 13165 }, { "epoch": 0.8017537983740828, "grad_norm": 1.043789764539818, "learning_rate": 4.785724932157151e-06, "loss": 0.4003, "step": 13166 }, { "epoch": 0.8018146941509606, "grad_norm": 0.9099470010365567, "learning_rate": 4.785692608881336e-06, "loss": 0.4375, "step": 13167 }, { "epoch": 0.8018755899278385, "grad_norm": 1.0223337426486865, "learning_rate": 4.785660283276906e-06, "loss": 0.3802, "step": 13168 }, { "epoch": 0.8019364857047164, "grad_norm": 1.045647013383843, "learning_rate": 4.785627955343892e-06, "loss": 0.3634, "step": 13169 }, { "epoch": 0.8019973814815943, "grad_norm": 0.9990106028361039, "learning_rate": 4.7855956250823275e-06, "loss": 0.4143, "step": 13170 }, { "epoch": 0.8020582772584721, "grad_norm": 0.9851209746040018, "learning_rate": 4.785563292492247e-06, "loss": 0.4626, "step": 13171 }, { "epoch": 0.80211917303535, "grad_norm": 1.0063895870868207, "learning_rate": 4.785530957573681e-06, "loss": 0.4303, "step": 13172 }, { "epoch": 0.8021800688122279, "grad_norm": 0.986980180621702, "learning_rate": 4.785498620326664e-06, "loss": 0.4367, "step": 13173 }, { "epoch": 0.8022409645891058, "grad_norm": 1.0599339993255303, "learning_rate": 4.785466280751228e-06, "loss": 0.3993, "step": 13174 }, { "epoch": 0.8023018603659836, "grad_norm": 1.0342789501763228, "learning_rate": 4.7854339388474065e-06, "loss": 0.434, "step": 13175 }, { "epoch": 0.8023627561428615, "grad_norm": 0.8922946551168803, "learning_rate": 4.7854015946152335e-06, "loss": 0.4733, "step": 13176 }, { "epoch": 0.8024236519197394, "grad_norm": 0.983272585757078, "learning_rate": 4.78536924805474e-06, "loss": 0.4196, "step": 13177 }, { "epoch": 0.8024845476966173, "grad_norm": 0.9916147296174822, "learning_rate": 4.78533689916596e-06, "loss": 0.4161, "step": 13178 }, { "epoch": 0.8025454434734951, "grad_norm": 1.015661750638271, "learning_rate": 4.785304547948925e-06, "loss": 0.4465, "step": 13179 }, { "epoch": 0.802606339250373, "grad_norm": 0.9158700382034447, "learning_rate": 4.785272194403672e-06, "loss": 0.4103, "step": 13180 }, { "epoch": 0.8026672350272509, "grad_norm": 0.9383203010177089, "learning_rate": 4.785239838530229e-06, "loss": 0.4421, "step": 13181 }, { "epoch": 0.8027281308041287, "grad_norm": 1.014750681888262, "learning_rate": 4.785207480328632e-06, "loss": 0.4422, "step": 13182 }, { "epoch": 0.8027890265810066, "grad_norm": 1.1121100319850088, "learning_rate": 4.785175119798913e-06, "loss": 0.4526, "step": 13183 }, { "epoch": 0.8028499223578844, "grad_norm": 1.1570470280240017, "learning_rate": 4.785142756941105e-06, "loss": 0.3778, "step": 13184 }, { "epoch": 0.8029108181347624, "grad_norm": 1.1689540836005545, "learning_rate": 4.785110391755241e-06, "loss": 0.4105, "step": 13185 }, { "epoch": 0.8029717139116402, "grad_norm": 0.9379515384181218, "learning_rate": 4.7850780242413545e-06, "loss": 0.4671, "step": 13186 }, { "epoch": 0.8030326096885181, "grad_norm": 1.035949658611482, "learning_rate": 4.785045654399477e-06, "loss": 0.4529, "step": 13187 }, { "epoch": 0.8030935054653959, "grad_norm": 1.119323854869936, "learning_rate": 4.785013282229644e-06, "loss": 0.4428, "step": 13188 }, { "epoch": 0.8031544012422739, "grad_norm": 1.1064161436193036, "learning_rate": 4.784980907731886e-06, "loss": 0.4819, "step": 13189 }, { "epoch": 0.8032152970191517, "grad_norm": 1.0194378235894175, "learning_rate": 4.784948530906237e-06, "loss": 0.3775, "step": 13190 }, { "epoch": 0.8032761927960296, "grad_norm": 0.9442597739261845, "learning_rate": 4.78491615175273e-06, "loss": 0.455, "step": 13191 }, { "epoch": 0.8033370885729074, "grad_norm": 1.006986449671345, "learning_rate": 4.784883770271398e-06, "loss": 0.4234, "step": 13192 }, { "epoch": 0.8033979843497854, "grad_norm": 1.011499904117075, "learning_rate": 4.7848513864622735e-06, "loss": 0.4151, "step": 13193 }, { "epoch": 0.8034588801266632, "grad_norm": 0.9157772929688146, "learning_rate": 4.784819000325391e-06, "loss": 0.4049, "step": 13194 }, { "epoch": 0.8035197759035411, "grad_norm": 1.0946604004582863, "learning_rate": 4.784786611860781e-06, "loss": 0.3807, "step": 13195 }, { "epoch": 0.8035806716804189, "grad_norm": 1.0263453116639618, "learning_rate": 4.784754221068479e-06, "loss": 0.4373, "step": 13196 }, { "epoch": 0.8036415674572969, "grad_norm": 0.8976769034787738, "learning_rate": 4.784721827948517e-06, "loss": 0.4722, "step": 13197 }, { "epoch": 0.8037024632341747, "grad_norm": 0.9757375185715305, "learning_rate": 4.7846894325009276e-06, "loss": 0.3988, "step": 13198 }, { "epoch": 0.8037633590110526, "grad_norm": 0.9717982998688813, "learning_rate": 4.784657034725744e-06, "loss": 0.3866, "step": 13199 }, { "epoch": 0.8038242547879304, "grad_norm": 0.9535736436662046, "learning_rate": 4.784624634622999e-06, "loss": 0.4746, "step": 13200 }, { "epoch": 0.8038851505648084, "grad_norm": 1.0111205194761341, "learning_rate": 4.784592232192726e-06, "loss": 0.4218, "step": 13201 }, { "epoch": 0.8039460463416862, "grad_norm": 0.950055396235979, "learning_rate": 4.784559827434958e-06, "loss": 0.4569, "step": 13202 }, { "epoch": 0.804006942118564, "grad_norm": 1.0524091037264438, "learning_rate": 4.7845274203497285e-06, "loss": 0.4157, "step": 13203 }, { "epoch": 0.804067837895442, "grad_norm": 1.0886182165730047, "learning_rate": 4.784495010937069e-06, "loss": 0.3655, "step": 13204 }, { "epoch": 0.8041287336723199, "grad_norm": 0.9654211462535479, "learning_rate": 4.784462599197014e-06, "loss": 0.5271, "step": 13205 }, { "epoch": 0.8041896294491977, "grad_norm": 1.0301366097665918, "learning_rate": 4.784430185129596e-06, "loss": 0.3898, "step": 13206 }, { "epoch": 0.8042505252260755, "grad_norm": 0.961534830703482, "learning_rate": 4.784397768734848e-06, "loss": 0.4316, "step": 13207 }, { "epoch": 0.8043114210029535, "grad_norm": 0.9413100259466298, "learning_rate": 4.784365350012803e-06, "loss": 0.3699, "step": 13208 }, { "epoch": 0.8043723167798313, "grad_norm": 1.0054538794674432, "learning_rate": 4.784332928963494e-06, "loss": 0.422, "step": 13209 }, { "epoch": 0.8044332125567092, "grad_norm": 0.9351016288729934, "learning_rate": 4.7843005055869546e-06, "loss": 0.4444, "step": 13210 }, { "epoch": 0.804494108333587, "grad_norm": 0.9926050442790793, "learning_rate": 4.784268079883217e-06, "loss": 0.4102, "step": 13211 }, { "epoch": 0.804555004110465, "grad_norm": 0.9980844817934545, "learning_rate": 4.784235651852315e-06, "loss": 0.3877, "step": 13212 }, { "epoch": 0.8046158998873428, "grad_norm": 0.9944913672215905, "learning_rate": 4.784203221494281e-06, "loss": 0.3603, "step": 13213 }, { "epoch": 0.8046767956642207, "grad_norm": 1.044154938100063, "learning_rate": 4.784170788809147e-06, "loss": 0.3912, "step": 13214 }, { "epoch": 0.8047376914410985, "grad_norm": 1.0410658587868105, "learning_rate": 4.784138353796949e-06, "loss": 0.4234, "step": 13215 }, { "epoch": 0.8047985872179765, "grad_norm": 0.9359740067829851, "learning_rate": 4.7841059164577175e-06, "loss": 0.3988, "step": 13216 }, { "epoch": 0.8048594829948543, "grad_norm": 1.0501989405803982, "learning_rate": 4.784073476791487e-06, "loss": 0.3863, "step": 13217 }, { "epoch": 0.8049203787717322, "grad_norm": 1.0265168063386847, "learning_rate": 4.7840410347982894e-06, "loss": 0.3944, "step": 13218 }, { "epoch": 0.80498127454861, "grad_norm": 0.9031841582011395, "learning_rate": 4.784008590478157e-06, "loss": 0.4371, "step": 13219 }, { "epoch": 0.805042170325488, "grad_norm": 0.9852664053015633, "learning_rate": 4.783976143831126e-06, "loss": 0.3956, "step": 13220 }, { "epoch": 0.8051030661023658, "grad_norm": 0.9955472159488067, "learning_rate": 4.783943694857227e-06, "loss": 0.4064, "step": 13221 }, { "epoch": 0.8051639618792437, "grad_norm": 1.1196179523333576, "learning_rate": 4.783911243556494e-06, "loss": 0.4092, "step": 13222 }, { "epoch": 0.8052248576561215, "grad_norm": 1.0180194041292392, "learning_rate": 4.78387878992896e-06, "loss": 0.359, "step": 13223 }, { "epoch": 0.8052857534329995, "grad_norm": 0.9624016745789218, "learning_rate": 4.783846333974656e-06, "loss": 0.4275, "step": 13224 }, { "epoch": 0.8053466492098773, "grad_norm": 1.0064564531697995, "learning_rate": 4.783813875693618e-06, "loss": 0.4116, "step": 13225 }, { "epoch": 0.8054075449867552, "grad_norm": 0.9921100485414344, "learning_rate": 4.783781415085879e-06, "loss": 0.488, "step": 13226 }, { "epoch": 0.805468440763633, "grad_norm": 1.021714875695211, "learning_rate": 4.783748952151469e-06, "loss": 0.416, "step": 13227 }, { "epoch": 0.805529336540511, "grad_norm": 1.038465467020534, "learning_rate": 4.783716486890424e-06, "loss": 0.3726, "step": 13228 }, { "epoch": 0.8055902323173888, "grad_norm": 1.070676691792393, "learning_rate": 4.783684019302776e-06, "loss": 0.3864, "step": 13229 }, { "epoch": 0.8056511280942666, "grad_norm": 1.0506980899958869, "learning_rate": 4.7836515493885585e-06, "loss": 0.4126, "step": 13230 }, { "epoch": 0.8057120238711445, "grad_norm": 1.018856503238034, "learning_rate": 4.783619077147804e-06, "loss": 0.4537, "step": 13231 }, { "epoch": 0.8057729196480224, "grad_norm": 0.9518414703994326, "learning_rate": 4.783586602580546e-06, "loss": 0.3888, "step": 13232 }, { "epoch": 0.8058338154249003, "grad_norm": 0.9497156673568098, "learning_rate": 4.783554125686817e-06, "loss": 0.4208, "step": 13233 }, { "epoch": 0.8058947112017781, "grad_norm": 0.9758858165250323, "learning_rate": 4.783521646466651e-06, "loss": 0.4107, "step": 13234 }, { "epoch": 0.805955606978656, "grad_norm": 0.931480229493935, "learning_rate": 4.783489164920081e-06, "loss": 0.4258, "step": 13235 }, { "epoch": 0.8060165027555339, "grad_norm": 0.9804244204067465, "learning_rate": 4.783456681047139e-06, "loss": 0.41, "step": 13236 }, { "epoch": 0.8060773985324118, "grad_norm": 1.094507400953937, "learning_rate": 4.783424194847859e-06, "loss": 0.3835, "step": 13237 }, { "epoch": 0.8061382943092896, "grad_norm": 0.9907787438981636, "learning_rate": 4.783391706322274e-06, "loss": 0.3684, "step": 13238 }, { "epoch": 0.8061991900861675, "grad_norm": 0.9927662478835689, "learning_rate": 4.7833592154704165e-06, "loss": 0.3495, "step": 13239 }, { "epoch": 0.8062600858630454, "grad_norm": 0.9851808546164983, "learning_rate": 4.783326722292321e-06, "loss": 0.4511, "step": 13240 }, { "epoch": 0.8063209816399233, "grad_norm": 1.0827647936102978, "learning_rate": 4.783294226788019e-06, "loss": 0.3457, "step": 13241 }, { "epoch": 0.8063818774168011, "grad_norm": 0.9574431919851554, "learning_rate": 4.783261728957544e-06, "loss": 0.4284, "step": 13242 }, { "epoch": 0.806442773193679, "grad_norm": 1.0766050059714636, "learning_rate": 4.7832292288009306e-06, "loss": 0.4518, "step": 13243 }, { "epoch": 0.8065036689705569, "grad_norm": 1.0239593398047784, "learning_rate": 4.78319672631821e-06, "loss": 0.4008, "step": 13244 }, { "epoch": 0.8065645647474348, "grad_norm": 1.0465578000139852, "learning_rate": 4.7831642215094165e-06, "loss": 0.4326, "step": 13245 }, { "epoch": 0.8066254605243126, "grad_norm": 1.0236195842544231, "learning_rate": 4.783131714374582e-06, "loss": 0.4463, "step": 13246 }, { "epoch": 0.8066863563011906, "grad_norm": 0.9887690778401448, "learning_rate": 4.783099204913741e-06, "loss": 0.3827, "step": 13247 }, { "epoch": 0.8067472520780684, "grad_norm": 0.9669395025459301, "learning_rate": 4.7830666931269255e-06, "loss": 0.4802, "step": 13248 }, { "epoch": 0.8068081478549463, "grad_norm": 1.035513306662622, "learning_rate": 4.78303417901417e-06, "loss": 0.3492, "step": 13249 }, { "epoch": 0.8068690436318241, "grad_norm": 1.039698662997154, "learning_rate": 4.783001662575506e-06, "loss": 0.3927, "step": 13250 }, { "epoch": 0.8069299394087021, "grad_norm": 0.9374506647363836, "learning_rate": 4.782969143810967e-06, "loss": 0.4324, "step": 13251 }, { "epoch": 0.8069908351855799, "grad_norm": 1.0274174704420262, "learning_rate": 4.7829366227205875e-06, "loss": 0.4136, "step": 13252 }, { "epoch": 0.8070517309624577, "grad_norm": 0.9786149015765341, "learning_rate": 4.782904099304399e-06, "loss": 0.4852, "step": 13253 }, { "epoch": 0.8071126267393356, "grad_norm": 1.0025478311403773, "learning_rate": 4.7828715735624355e-06, "loss": 0.3992, "step": 13254 }, { "epoch": 0.8071735225162135, "grad_norm": 0.9251075525917501, "learning_rate": 4.78283904549473e-06, "loss": 0.4485, "step": 13255 }, { "epoch": 0.8072344182930914, "grad_norm": 0.9136292866134662, "learning_rate": 4.7828065151013155e-06, "loss": 0.4882, "step": 13256 }, { "epoch": 0.8072953140699692, "grad_norm": 1.0466608420961687, "learning_rate": 4.782773982382225e-06, "loss": 0.4199, "step": 13257 }, { "epoch": 0.8073562098468471, "grad_norm": 0.9852715899183601, "learning_rate": 4.782741447337492e-06, "loss": 0.4718, "step": 13258 }, { "epoch": 0.807417105623725, "grad_norm": 1.0708808513175008, "learning_rate": 4.782708909967149e-06, "loss": 0.3775, "step": 13259 }, { "epoch": 0.8074780014006029, "grad_norm": 1.0140752373412834, "learning_rate": 4.78267637027123e-06, "loss": 0.4046, "step": 13260 }, { "epoch": 0.8075388971774807, "grad_norm": 1.0387144241210922, "learning_rate": 4.782643828249768e-06, "loss": 0.4034, "step": 13261 }, { "epoch": 0.8075997929543586, "grad_norm": 0.9994474102608057, "learning_rate": 4.782611283902795e-06, "loss": 0.4042, "step": 13262 }, { "epoch": 0.8076606887312365, "grad_norm": 1.0521592890268618, "learning_rate": 4.7825787372303455e-06, "loss": 0.4049, "step": 13263 }, { "epoch": 0.8077215845081144, "grad_norm": 0.9260633137673026, "learning_rate": 4.782546188232453e-06, "loss": 0.4361, "step": 13264 }, { "epoch": 0.8077824802849922, "grad_norm": 0.9427947943795814, "learning_rate": 4.7825136369091495e-06, "loss": 0.4626, "step": 13265 }, { "epoch": 0.8078433760618701, "grad_norm": 1.032656383849258, "learning_rate": 4.782481083260468e-06, "loss": 0.3642, "step": 13266 }, { "epoch": 0.807904271838748, "grad_norm": 1.0928056459491706, "learning_rate": 4.7824485272864425e-06, "loss": 0.3358, "step": 13267 }, { "epoch": 0.8079651676156259, "grad_norm": 1.095743837149125, "learning_rate": 4.782415968987106e-06, "loss": 0.403, "step": 13268 }, { "epoch": 0.8080260633925037, "grad_norm": 1.0693167464772018, "learning_rate": 4.782383408362492e-06, "loss": 0.4118, "step": 13269 }, { "epoch": 0.8080869591693816, "grad_norm": 0.9016532899203505, "learning_rate": 4.782350845412633e-06, "loss": 0.4666, "step": 13270 }, { "epoch": 0.8081478549462595, "grad_norm": 0.988883526529999, "learning_rate": 4.7823182801375626e-06, "loss": 0.4021, "step": 13271 }, { "epoch": 0.8082087507231374, "grad_norm": 1.1442402781270804, "learning_rate": 4.7822857125373134e-06, "loss": 0.4724, "step": 13272 }, { "epoch": 0.8082696465000152, "grad_norm": 0.9298332698316908, "learning_rate": 4.782253142611919e-06, "loss": 0.4569, "step": 13273 }, { "epoch": 0.808330542276893, "grad_norm": 0.9599847733906339, "learning_rate": 4.7822205703614124e-06, "loss": 0.391, "step": 13274 }, { "epoch": 0.808391438053771, "grad_norm": 1.055580947279908, "learning_rate": 4.7821879957858275e-06, "loss": 0.3479, "step": 13275 }, { "epoch": 0.8084523338306489, "grad_norm": 1.0070944532081858, "learning_rate": 4.782155418885196e-06, "loss": 0.503, "step": 13276 }, { "epoch": 0.8085132296075267, "grad_norm": 0.9661588825795513, "learning_rate": 4.782122839659552e-06, "loss": 0.3885, "step": 13277 }, { "epoch": 0.8085741253844045, "grad_norm": 1.00076897157924, "learning_rate": 4.7820902581089305e-06, "loss": 0.3868, "step": 13278 }, { "epoch": 0.8086350211612825, "grad_norm": 1.01774324633492, "learning_rate": 4.782057674233362e-06, "loss": 0.3823, "step": 13279 }, { "epoch": 0.8086959169381603, "grad_norm": 0.9566907485962821, "learning_rate": 4.782025088032879e-06, "loss": 0.422, "step": 13280 }, { "epoch": 0.8087568127150382, "grad_norm": 0.9296948769203092, "learning_rate": 4.781992499507518e-06, "loss": 0.4547, "step": 13281 }, { "epoch": 0.808817708491916, "grad_norm": 1.045382620124847, "learning_rate": 4.78195990865731e-06, "loss": 0.3944, "step": 13282 }, { "epoch": 0.808878604268794, "grad_norm": 1.045540527308778, "learning_rate": 4.781927315482289e-06, "loss": 0.3863, "step": 13283 }, { "epoch": 0.8089395000456718, "grad_norm": 0.9437692962840368, "learning_rate": 4.781894719982487e-06, "loss": 0.4667, "step": 13284 }, { "epoch": 0.8090003958225497, "grad_norm": 1.0604640142172435, "learning_rate": 4.781862122157939e-06, "loss": 0.3747, "step": 13285 }, { "epoch": 0.8090612915994276, "grad_norm": 1.018818523291482, "learning_rate": 4.781829522008676e-06, "loss": 0.3722, "step": 13286 }, { "epoch": 0.8091221873763055, "grad_norm": 0.9491905916529033, "learning_rate": 4.7817969195347345e-06, "loss": 0.4437, "step": 13287 }, { "epoch": 0.8091830831531833, "grad_norm": 0.9893819270921902, "learning_rate": 4.781764314736144e-06, "loss": 0.514, "step": 13288 }, { "epoch": 0.8092439789300612, "grad_norm": 1.0015987721222477, "learning_rate": 4.78173170761294e-06, "loss": 0.3927, "step": 13289 }, { "epoch": 0.8093048747069391, "grad_norm": 1.0220601295547251, "learning_rate": 4.7816990981651555e-06, "loss": 0.3861, "step": 13290 }, { "epoch": 0.809365770483817, "grad_norm": 1.040661661531611, "learning_rate": 4.7816664863928235e-06, "loss": 0.3553, "step": 13291 }, { "epoch": 0.8094266662606948, "grad_norm": 1.037178867236975, "learning_rate": 4.781633872295977e-06, "loss": 0.3767, "step": 13292 }, { "epoch": 0.8094875620375727, "grad_norm": 1.0053114318612661, "learning_rate": 4.78160125587465e-06, "loss": 0.4613, "step": 13293 }, { "epoch": 0.8095484578144506, "grad_norm": 1.063251981703087, "learning_rate": 4.781568637128874e-06, "loss": 0.3663, "step": 13294 }, { "epoch": 0.8096093535913285, "grad_norm": 1.060796323942673, "learning_rate": 4.781536016058683e-06, "loss": 0.3337, "step": 13295 }, { "epoch": 0.8096702493682063, "grad_norm": 1.0213890971133368, "learning_rate": 4.781503392664111e-06, "loss": 0.4785, "step": 13296 }, { "epoch": 0.8097311451450842, "grad_norm": 0.9597344366675314, "learning_rate": 4.781470766945191e-06, "loss": 0.3915, "step": 13297 }, { "epoch": 0.8097920409219621, "grad_norm": 1.0132532309296804, "learning_rate": 4.781438138901956e-06, "loss": 0.4226, "step": 13298 }, { "epoch": 0.80985293669884, "grad_norm": 0.9585703173260982, "learning_rate": 4.781405508534439e-06, "loss": 0.4384, "step": 13299 }, { "epoch": 0.8099138324757178, "grad_norm": 1.0460955110554961, "learning_rate": 4.781372875842675e-06, "loss": 0.3841, "step": 13300 }, { "epoch": 0.8099747282525956, "grad_norm": 1.0695192195534293, "learning_rate": 4.781340240826694e-06, "loss": 0.3939, "step": 13301 }, { "epoch": 0.8100356240294736, "grad_norm": 0.997248974055564, "learning_rate": 4.7813076034865315e-06, "loss": 0.451, "step": 13302 }, { "epoch": 0.8100965198063514, "grad_norm": 0.9995238406766132, "learning_rate": 4.78127496382222e-06, "loss": 0.4338, "step": 13303 }, { "epoch": 0.8101574155832293, "grad_norm": 0.9708306754924818, "learning_rate": 4.781242321833793e-06, "loss": 0.3636, "step": 13304 }, { "epoch": 0.8102183113601071, "grad_norm": 1.0409843348783472, "learning_rate": 4.781209677521284e-06, "loss": 0.4358, "step": 13305 }, { "epoch": 0.8102792071369851, "grad_norm": 1.0086510759997114, "learning_rate": 4.781177030884726e-06, "loss": 0.4288, "step": 13306 }, { "epoch": 0.8103401029138629, "grad_norm": 1.0692399440625924, "learning_rate": 4.781144381924152e-06, "loss": 0.4031, "step": 13307 }, { "epoch": 0.8104009986907408, "grad_norm": 1.0781831302021365, "learning_rate": 4.781111730639596e-06, "loss": 0.3445, "step": 13308 }, { "epoch": 0.8104618944676186, "grad_norm": 0.9734659060159916, "learning_rate": 4.7810790770310896e-06, "loss": 0.4719, "step": 13309 }, { "epoch": 0.8105227902444966, "grad_norm": 0.9944569932450318, "learning_rate": 4.7810464210986685e-06, "loss": 0.4378, "step": 13310 }, { "epoch": 0.8105836860213744, "grad_norm": 1.0187668903442915, "learning_rate": 4.781013762842365e-06, "loss": 0.4535, "step": 13311 }, { "epoch": 0.8106445817982523, "grad_norm": 1.0329412931610287, "learning_rate": 4.780981102262211e-06, "loss": 0.4028, "step": 13312 }, { "epoch": 0.8107054775751301, "grad_norm": 1.0587975503092533, "learning_rate": 4.780948439358242e-06, "loss": 0.3595, "step": 13313 }, { "epoch": 0.8107663733520081, "grad_norm": 0.9848788899729789, "learning_rate": 4.780915774130489e-06, "loss": 0.4108, "step": 13314 }, { "epoch": 0.8108272691288859, "grad_norm": 1.0306068469620742, "learning_rate": 4.780883106578987e-06, "loss": 0.399, "step": 13315 }, { "epoch": 0.8108881649057638, "grad_norm": 1.0486913776549014, "learning_rate": 4.780850436703769e-06, "loss": 0.4197, "step": 13316 }, { "epoch": 0.8109490606826416, "grad_norm": 1.004033178941646, "learning_rate": 4.780817764504868e-06, "loss": 0.3652, "step": 13317 }, { "epoch": 0.8110099564595196, "grad_norm": 1.0026931599982967, "learning_rate": 4.7807850899823164e-06, "loss": 0.3974, "step": 13318 }, { "epoch": 0.8110708522363974, "grad_norm": 1.0227637072286933, "learning_rate": 4.78075241313615e-06, "loss": 0.3834, "step": 13319 }, { "epoch": 0.8111317480132753, "grad_norm": 0.9863466464675043, "learning_rate": 4.780719733966399e-06, "loss": 0.3788, "step": 13320 }, { "epoch": 0.8111926437901531, "grad_norm": 1.0701107248300978, "learning_rate": 4.780687052473098e-06, "loss": 0.4163, "step": 13321 }, { "epoch": 0.8112535395670311, "grad_norm": 1.0031299606145974, "learning_rate": 4.7806543686562814e-06, "loss": 0.4327, "step": 13322 }, { "epoch": 0.8113144353439089, "grad_norm": 0.9772987177673577, "learning_rate": 4.780621682515981e-06, "loss": 0.5127, "step": 13323 }, { "epoch": 0.8113753311207867, "grad_norm": 1.0564548290806535, "learning_rate": 4.780588994052231e-06, "loss": 0.3592, "step": 13324 }, { "epoch": 0.8114362268976646, "grad_norm": 0.9956217776906531, "learning_rate": 4.780556303265064e-06, "loss": 0.4832, "step": 13325 }, { "epoch": 0.8114971226745425, "grad_norm": 1.09606343806533, "learning_rate": 4.780523610154514e-06, "loss": 0.4399, "step": 13326 }, { "epoch": 0.8115580184514204, "grad_norm": 0.9811290824270568, "learning_rate": 4.780490914720613e-06, "loss": 0.3824, "step": 13327 }, { "epoch": 0.8116189142282982, "grad_norm": 1.015426271138587, "learning_rate": 4.780458216963396e-06, "loss": 0.4339, "step": 13328 }, { "epoch": 0.8116798100051762, "grad_norm": 1.00010402907508, "learning_rate": 4.780425516882896e-06, "loss": 0.3735, "step": 13329 }, { "epoch": 0.811740705782054, "grad_norm": 0.9680688939069201, "learning_rate": 4.780392814479146e-06, "loss": 0.3704, "step": 13330 }, { "epoch": 0.8118016015589319, "grad_norm": 0.9467480558129089, "learning_rate": 4.780360109752178e-06, "loss": 0.3989, "step": 13331 }, { "epoch": 0.8118624973358097, "grad_norm": 1.071194356569108, "learning_rate": 4.7803274027020276e-06, "loss": 0.3576, "step": 13332 }, { "epoch": 0.8119233931126877, "grad_norm": 1.038134649099734, "learning_rate": 4.780294693328727e-06, "loss": 0.324, "step": 13333 }, { "epoch": 0.8119842888895655, "grad_norm": 0.9497379632568382, "learning_rate": 4.780261981632309e-06, "loss": 0.4092, "step": 13334 }, { "epoch": 0.8120451846664434, "grad_norm": 1.0259961474049022, "learning_rate": 4.780229267612808e-06, "loss": 0.3703, "step": 13335 }, { "epoch": 0.8121060804433212, "grad_norm": 1.0717858200749883, "learning_rate": 4.780196551270256e-06, "loss": 0.3599, "step": 13336 }, { "epoch": 0.8121669762201992, "grad_norm": 1.0326033610547198, "learning_rate": 4.780163832604688e-06, "loss": 0.4568, "step": 13337 }, { "epoch": 0.812227871997077, "grad_norm": 0.9850093125555969, "learning_rate": 4.780131111616136e-06, "loss": 0.396, "step": 13338 }, { "epoch": 0.8122887677739549, "grad_norm": 1.152234018525591, "learning_rate": 4.780098388304634e-06, "loss": 0.3822, "step": 13339 }, { "epoch": 0.8123496635508327, "grad_norm": 0.9199128001994564, "learning_rate": 4.780065662670215e-06, "loss": 0.4567, "step": 13340 }, { "epoch": 0.8124105593277107, "grad_norm": 0.9965727191361305, "learning_rate": 4.780032934712913e-06, "loss": 0.4357, "step": 13341 }, { "epoch": 0.8124714551045885, "grad_norm": 1.0761193183379802, "learning_rate": 4.780000204432761e-06, "loss": 0.395, "step": 13342 }, { "epoch": 0.8125323508814664, "grad_norm": 0.9996567859468025, "learning_rate": 4.779967471829792e-06, "loss": 0.4523, "step": 13343 }, { "epoch": 0.8125932466583442, "grad_norm": 1.0209884657386783, "learning_rate": 4.779934736904039e-06, "loss": 0.4103, "step": 13344 }, { "epoch": 0.8126541424352222, "grad_norm": 1.0555973261658496, "learning_rate": 4.7799019996555365e-06, "loss": 0.3812, "step": 13345 }, { "epoch": 0.8127150382121, "grad_norm": 0.9584890792556215, "learning_rate": 4.779869260084317e-06, "loss": 0.3874, "step": 13346 }, { "epoch": 0.8127759339889779, "grad_norm": 0.9979550079286955, "learning_rate": 4.779836518190414e-06, "loss": 0.4001, "step": 13347 }, { "epoch": 0.8128368297658557, "grad_norm": 1.047684553465827, "learning_rate": 4.7798037739738604e-06, "loss": 0.3809, "step": 13348 }, { "epoch": 0.8128977255427337, "grad_norm": 0.916372756577186, "learning_rate": 4.779771027434691e-06, "loss": 0.4459, "step": 13349 }, { "epoch": 0.8129586213196115, "grad_norm": 0.9870419013976851, "learning_rate": 4.779738278572938e-06, "loss": 0.427, "step": 13350 }, { "epoch": 0.8130195170964893, "grad_norm": 1.0616662012627316, "learning_rate": 4.779705527388635e-06, "loss": 0.3507, "step": 13351 }, { "epoch": 0.8130804128733672, "grad_norm": 0.9824062166490907, "learning_rate": 4.779672773881816e-06, "loss": 0.402, "step": 13352 }, { "epoch": 0.8131413086502451, "grad_norm": 1.0061864068907143, "learning_rate": 4.779640018052513e-06, "loss": 0.4717, "step": 13353 }, { "epoch": 0.813202204427123, "grad_norm": 0.9660954222801982, "learning_rate": 4.77960725990076e-06, "loss": 0.413, "step": 13354 }, { "epoch": 0.8132631002040008, "grad_norm": 0.9874181513974819, "learning_rate": 4.779574499426591e-06, "loss": 0.4569, "step": 13355 }, { "epoch": 0.8133239959808787, "grad_norm": 0.9339066755066711, "learning_rate": 4.779541736630039e-06, "loss": 0.4488, "step": 13356 }, { "epoch": 0.8133848917577566, "grad_norm": 1.0338457188938865, "learning_rate": 4.779508971511136e-06, "loss": 0.4174, "step": 13357 }, { "epoch": 0.8134457875346345, "grad_norm": 1.0244616771930983, "learning_rate": 4.7794762040699186e-06, "loss": 0.5477, "step": 13358 }, { "epoch": 0.8135066833115123, "grad_norm": 0.9528471307272485, "learning_rate": 4.779443434306417e-06, "loss": 0.4517, "step": 13359 }, { "epoch": 0.8135675790883902, "grad_norm": 1.044954030492398, "learning_rate": 4.779410662220667e-06, "loss": 0.3789, "step": 13360 }, { "epoch": 0.8136284748652681, "grad_norm": 1.0604082444467147, "learning_rate": 4.7793778878127e-06, "loss": 0.3695, "step": 13361 }, { "epoch": 0.813689370642146, "grad_norm": 1.0093505196527703, "learning_rate": 4.779345111082549e-06, "loss": 0.3865, "step": 13362 }, { "epoch": 0.8137502664190238, "grad_norm": 1.0724915949971447, "learning_rate": 4.7793123320302505e-06, "loss": 0.3682, "step": 13363 }, { "epoch": 0.8138111621959017, "grad_norm": 0.965945494689146, "learning_rate": 4.7792795506558354e-06, "loss": 0.4126, "step": 13364 }, { "epoch": 0.8138720579727796, "grad_norm": 0.9344586402936288, "learning_rate": 4.779246766959337e-06, "loss": 0.4866, "step": 13365 }, { "epoch": 0.8139329537496575, "grad_norm": 1.1047848330342682, "learning_rate": 4.77921398094079e-06, "loss": 0.4287, "step": 13366 }, { "epoch": 0.8139938495265353, "grad_norm": 0.9976850591151769, "learning_rate": 4.779181192600227e-06, "loss": 0.4669, "step": 13367 }, { "epoch": 0.8140547453034133, "grad_norm": 1.0000175263550934, "learning_rate": 4.779148401937682e-06, "loss": 0.3706, "step": 13368 }, { "epoch": 0.8141156410802911, "grad_norm": 1.0305007956604444, "learning_rate": 4.779115608953188e-06, "loss": 0.4012, "step": 13369 }, { "epoch": 0.814176536857169, "grad_norm": 0.9725042635826425, "learning_rate": 4.779082813646777e-06, "loss": 0.4404, "step": 13370 }, { "epoch": 0.8142374326340468, "grad_norm": 1.01645416964533, "learning_rate": 4.779050016018485e-06, "loss": 0.4228, "step": 13371 }, { "epoch": 0.8142983284109248, "grad_norm": 1.03463224456493, "learning_rate": 4.779017216068345e-06, "loss": 0.383, "step": 13372 }, { "epoch": 0.8143592241878026, "grad_norm": 1.128344249716388, "learning_rate": 4.7789844137963884e-06, "loss": 0.3841, "step": 13373 }, { "epoch": 0.8144201199646804, "grad_norm": 1.1198128819721997, "learning_rate": 4.77895160920265e-06, "loss": 0.3941, "step": 13374 }, { "epoch": 0.8144810157415583, "grad_norm": 0.9907765570286627, "learning_rate": 4.778918802287163e-06, "loss": 0.4283, "step": 13375 }, { "epoch": 0.8145419115184362, "grad_norm": 1.0478828524486132, "learning_rate": 4.778885993049962e-06, "loss": 0.4046, "step": 13376 }, { "epoch": 0.8146028072953141, "grad_norm": 0.9593661943744682, "learning_rate": 4.778853181491078e-06, "loss": 0.4894, "step": 13377 }, { "epoch": 0.8146637030721919, "grad_norm": 0.9370956564269131, "learning_rate": 4.778820367610546e-06, "loss": 0.4144, "step": 13378 }, { "epoch": 0.8147245988490698, "grad_norm": 0.9574122437504947, "learning_rate": 4.7787875514084e-06, "loss": 0.4634, "step": 13379 }, { "epoch": 0.8147854946259477, "grad_norm": 1.0165393880875606, "learning_rate": 4.778754732884672e-06, "loss": 0.4687, "step": 13380 }, { "epoch": 0.8148463904028256, "grad_norm": 0.9927725793006987, "learning_rate": 4.778721912039396e-06, "loss": 0.4586, "step": 13381 }, { "epoch": 0.8149072861797034, "grad_norm": 0.9393977147613531, "learning_rate": 4.778689088872606e-06, "loss": 0.3945, "step": 13382 }, { "epoch": 0.8149681819565813, "grad_norm": 1.0221273635170953, "learning_rate": 4.778656263384334e-06, "loss": 0.349, "step": 13383 }, { "epoch": 0.8150290777334592, "grad_norm": 0.972248377570832, "learning_rate": 4.778623435574615e-06, "loss": 0.4522, "step": 13384 }, { "epoch": 0.8150899735103371, "grad_norm": 0.9742179021291648, "learning_rate": 4.778590605443482e-06, "loss": 0.5457, "step": 13385 }, { "epoch": 0.8151508692872149, "grad_norm": 0.9738151576698231, "learning_rate": 4.778557772990968e-06, "loss": 0.4129, "step": 13386 }, { "epoch": 0.8152117650640928, "grad_norm": 1.0002533993438896, "learning_rate": 4.778524938217107e-06, "loss": 0.4219, "step": 13387 }, { "epoch": 0.8152726608409707, "grad_norm": 1.0127351565607332, "learning_rate": 4.778492101121932e-06, "loss": 0.4311, "step": 13388 }, { "epoch": 0.8153335566178486, "grad_norm": 0.9577399254721662, "learning_rate": 4.7784592617054755e-06, "loss": 0.4519, "step": 13389 }, { "epoch": 0.8153944523947264, "grad_norm": 0.9944582521270345, "learning_rate": 4.778426419967774e-06, "loss": 0.4352, "step": 13390 }, { "epoch": 0.8154553481716043, "grad_norm": 1.0485277780808706, "learning_rate": 4.778393575908858e-06, "loss": 0.3884, "step": 13391 }, { "epoch": 0.8155162439484822, "grad_norm": 1.0213411522770655, "learning_rate": 4.778360729528762e-06, "loss": 0.4504, "step": 13392 }, { "epoch": 0.8155771397253601, "grad_norm": 1.0460845462198833, "learning_rate": 4.77832788082752e-06, "loss": 0.4119, "step": 13393 }, { "epoch": 0.8156380355022379, "grad_norm": 1.0081985163129172, "learning_rate": 4.778295029805164e-06, "loss": 0.3915, "step": 13394 }, { "epoch": 0.8156989312791157, "grad_norm": 1.0263803317986844, "learning_rate": 4.778262176461728e-06, "loss": 0.3292, "step": 13395 }, { "epoch": 0.8157598270559937, "grad_norm": 1.0057087475332867, "learning_rate": 4.778229320797248e-06, "loss": 0.4555, "step": 13396 }, { "epoch": 0.8158207228328715, "grad_norm": 1.0500175206913316, "learning_rate": 4.7781964628117535e-06, "loss": 0.4189, "step": 13397 }, { "epoch": 0.8158816186097494, "grad_norm": 1.0073988319154752, "learning_rate": 4.778163602505281e-06, "loss": 0.3989, "step": 13398 }, { "epoch": 0.8159425143866272, "grad_norm": 1.0685314358626297, "learning_rate": 4.778130739877862e-06, "loss": 0.5127, "step": 13399 }, { "epoch": 0.8160034101635052, "grad_norm": 0.9440412184501553, "learning_rate": 4.778097874929531e-06, "loss": 0.4443, "step": 13400 }, { "epoch": 0.816064305940383, "grad_norm": 1.0697784634396805, "learning_rate": 4.7780650076603205e-06, "loss": 0.4154, "step": 13401 }, { "epoch": 0.8161252017172609, "grad_norm": 0.9384123319815725, "learning_rate": 4.778032138070266e-06, "loss": 0.451, "step": 13402 }, { "epoch": 0.8161860974941387, "grad_norm": 0.9812581945115503, "learning_rate": 4.777999266159398e-06, "loss": 0.3997, "step": 13403 }, { "epoch": 0.8162469932710167, "grad_norm": 1.0227854398049268, "learning_rate": 4.777966391927754e-06, "loss": 0.4714, "step": 13404 }, { "epoch": 0.8163078890478945, "grad_norm": 0.9409718573535637, "learning_rate": 4.777933515375364e-06, "loss": 0.4035, "step": 13405 }, { "epoch": 0.8163687848247724, "grad_norm": 0.9788828286681553, "learning_rate": 4.777900636502263e-06, "loss": 0.3675, "step": 13406 }, { "epoch": 0.8164296806016502, "grad_norm": 0.9108802728785759, "learning_rate": 4.777867755308484e-06, "loss": 0.4347, "step": 13407 }, { "epoch": 0.8164905763785282, "grad_norm": 1.0050231574729613, "learning_rate": 4.7778348717940606e-06, "loss": 0.4471, "step": 13408 }, { "epoch": 0.816551472155406, "grad_norm": 1.0115688425745177, "learning_rate": 4.777801985959026e-06, "loss": 0.4416, "step": 13409 }, { "epoch": 0.8166123679322839, "grad_norm": 1.0125040689397702, "learning_rate": 4.777769097803414e-06, "loss": 0.3711, "step": 13410 }, { "epoch": 0.8166732637091618, "grad_norm": 0.9740545154533906, "learning_rate": 4.777736207327259e-06, "loss": 0.3974, "step": 13411 }, { "epoch": 0.8167341594860397, "grad_norm": 0.9655558962795969, "learning_rate": 4.777703314530594e-06, "loss": 0.4605, "step": 13412 }, { "epoch": 0.8167950552629175, "grad_norm": 0.9772582698912465, "learning_rate": 4.7776704194134516e-06, "loss": 0.3637, "step": 13413 }, { "epoch": 0.8168559510397954, "grad_norm": 0.9153880705830465, "learning_rate": 4.777637521975866e-06, "loss": 0.4716, "step": 13414 }, { "epoch": 0.8169168468166733, "grad_norm": 1.0257810041275537, "learning_rate": 4.777604622217871e-06, "loss": 0.389, "step": 13415 }, { "epoch": 0.8169777425935512, "grad_norm": 1.0448941035318218, "learning_rate": 4.777571720139499e-06, "loss": 0.4502, "step": 13416 }, { "epoch": 0.817038638370429, "grad_norm": 1.042533372089832, "learning_rate": 4.777538815740784e-06, "loss": 0.3842, "step": 13417 }, { "epoch": 0.8170995341473068, "grad_norm": 1.0421301372322194, "learning_rate": 4.777505909021761e-06, "loss": 0.3495, "step": 13418 }, { "epoch": 0.8171604299241848, "grad_norm": 1.0178883713871727, "learning_rate": 4.777472999982462e-06, "loss": 0.4841, "step": 13419 }, { "epoch": 0.8172213257010627, "grad_norm": 0.9821682745744857, "learning_rate": 4.77744008862292e-06, "loss": 0.4636, "step": 13420 }, { "epoch": 0.8172822214779405, "grad_norm": 1.0514907336954784, "learning_rate": 4.7774071749431705e-06, "loss": 0.3801, "step": 13421 }, { "epoch": 0.8173431172548183, "grad_norm": 1.0088842750653617, "learning_rate": 4.7773742589432455e-06, "loss": 0.4088, "step": 13422 }, { "epoch": 0.8174040130316963, "grad_norm": 1.0120850864864122, "learning_rate": 4.777341340623179e-06, "loss": 0.3875, "step": 13423 }, { "epoch": 0.8174649088085741, "grad_norm": 0.9982505753175821, "learning_rate": 4.777308419983005e-06, "loss": 0.3825, "step": 13424 }, { "epoch": 0.817525804585452, "grad_norm": 1.0688968096809648, "learning_rate": 4.777275497022755e-06, "loss": 0.3455, "step": 13425 }, { "epoch": 0.8175867003623298, "grad_norm": 0.9964866533268517, "learning_rate": 4.777242571742465e-06, "loss": 0.3819, "step": 13426 }, { "epoch": 0.8176475961392078, "grad_norm": 0.9309203033245744, "learning_rate": 4.777209644142168e-06, "loss": 0.4685, "step": 13427 }, { "epoch": 0.8177084919160856, "grad_norm": 0.9712532698814959, "learning_rate": 4.777176714221896e-06, "loss": 0.3961, "step": 13428 }, { "epoch": 0.8177693876929635, "grad_norm": 1.0598892514812088, "learning_rate": 4.777143781981685e-06, "loss": 0.41, "step": 13429 }, { "epoch": 0.8178302834698413, "grad_norm": 0.9998922825328327, "learning_rate": 4.777110847421566e-06, "loss": 0.483, "step": 13430 }, { "epoch": 0.8178911792467193, "grad_norm": 1.0815483913836708, "learning_rate": 4.777077910541575e-06, "loss": 0.3692, "step": 13431 }, { "epoch": 0.8179520750235971, "grad_norm": 1.1246468682802795, "learning_rate": 4.777044971341745e-06, "loss": 0.3625, "step": 13432 }, { "epoch": 0.818012970800475, "grad_norm": 1.0479988075368347, "learning_rate": 4.777012029822107e-06, "loss": 0.4295, "step": 13433 }, { "epoch": 0.8180738665773528, "grad_norm": 1.0406935545153877, "learning_rate": 4.776979085982697e-06, "loss": 0.3452, "step": 13434 }, { "epoch": 0.8181347623542308, "grad_norm": 1.0994050303971685, "learning_rate": 4.776946139823549e-06, "loss": 0.3882, "step": 13435 }, { "epoch": 0.8181956581311086, "grad_norm": 0.9677167195028884, "learning_rate": 4.776913191344695e-06, "loss": 0.43, "step": 13436 }, { "epoch": 0.8182565539079865, "grad_norm": 1.0646355081135084, "learning_rate": 4.776880240546169e-06, "loss": 0.3369, "step": 13437 }, { "epoch": 0.8183174496848643, "grad_norm": 0.9951432588324068, "learning_rate": 4.776847287428005e-06, "loss": 0.4051, "step": 13438 }, { "epoch": 0.8183783454617423, "grad_norm": 0.9830192842560461, "learning_rate": 4.776814331990236e-06, "loss": 0.4156, "step": 13439 }, { "epoch": 0.8184392412386201, "grad_norm": 1.0204848044355712, "learning_rate": 4.776781374232896e-06, "loss": 0.4161, "step": 13440 }, { "epoch": 0.818500137015498, "grad_norm": 0.9611301347868175, "learning_rate": 4.776748414156019e-06, "loss": 0.4268, "step": 13441 }, { "epoch": 0.8185610327923758, "grad_norm": 1.0598988486124887, "learning_rate": 4.776715451759637e-06, "loss": 0.3834, "step": 13442 }, { "epoch": 0.8186219285692538, "grad_norm": 1.0745148527965442, "learning_rate": 4.776682487043786e-06, "loss": 0.3853, "step": 13443 }, { "epoch": 0.8186828243461316, "grad_norm": 1.0336440305587833, "learning_rate": 4.776649520008498e-06, "loss": 0.3974, "step": 13444 }, { "epoch": 0.8187437201230094, "grad_norm": 0.9842696306896921, "learning_rate": 4.7766165506538055e-06, "loss": 0.4089, "step": 13445 }, { "epoch": 0.8188046158998873, "grad_norm": 0.9902231263680222, "learning_rate": 4.776583578979744e-06, "loss": 0.3697, "step": 13446 }, { "epoch": 0.8188655116767652, "grad_norm": 1.0690212573924367, "learning_rate": 4.776550604986346e-06, "loss": 0.3548, "step": 13447 }, { "epoch": 0.8189264074536431, "grad_norm": 1.1282245854084234, "learning_rate": 4.776517628673647e-06, "loss": 0.3345, "step": 13448 }, { "epoch": 0.8189873032305209, "grad_norm": 1.0049447745590216, "learning_rate": 4.776484650041678e-06, "loss": 0.4615, "step": 13449 }, { "epoch": 0.8190481990073989, "grad_norm": 0.9598431268874669, "learning_rate": 4.776451669090475e-06, "loss": 0.4267, "step": 13450 }, { "epoch": 0.8191090947842767, "grad_norm": 0.9914468187547912, "learning_rate": 4.776418685820069e-06, "loss": 0.4411, "step": 13451 }, { "epoch": 0.8191699905611546, "grad_norm": 1.0180367193362867, "learning_rate": 4.776385700230496e-06, "loss": 0.4269, "step": 13452 }, { "epoch": 0.8192308863380324, "grad_norm": 1.0083597715012693, "learning_rate": 4.776352712321788e-06, "loss": 0.4471, "step": 13453 }, { "epoch": 0.8192917821149104, "grad_norm": 1.0451415835897218, "learning_rate": 4.77631972209398e-06, "loss": 0.425, "step": 13454 }, { "epoch": 0.8193526778917882, "grad_norm": 0.9429674898102955, "learning_rate": 4.776286729547104e-06, "loss": 0.4026, "step": 13455 }, { "epoch": 0.8194135736686661, "grad_norm": 1.135776767112741, "learning_rate": 4.776253734681194e-06, "loss": 0.4271, "step": 13456 }, { "epoch": 0.8194744694455439, "grad_norm": 0.9931582123504419, "learning_rate": 4.7762207374962845e-06, "loss": 0.3848, "step": 13457 }, { "epoch": 0.8195353652224219, "grad_norm": 0.9615150129950969, "learning_rate": 4.776187737992408e-06, "loss": 0.4296, "step": 13458 }, { "epoch": 0.8195962609992997, "grad_norm": 1.0067778813210402, "learning_rate": 4.776154736169599e-06, "loss": 0.4216, "step": 13459 }, { "epoch": 0.8196571567761776, "grad_norm": 0.9448472658764921, "learning_rate": 4.7761217320278915e-06, "loss": 0.4117, "step": 13460 }, { "epoch": 0.8197180525530554, "grad_norm": 1.0067807576582068, "learning_rate": 4.7760887255673185e-06, "loss": 0.4716, "step": 13461 }, { "epoch": 0.8197789483299334, "grad_norm": 0.9802605998204461, "learning_rate": 4.776055716787913e-06, "loss": 0.4421, "step": 13462 }, { "epoch": 0.8198398441068112, "grad_norm": 1.001786604316204, "learning_rate": 4.77602270568971e-06, "loss": 0.5117, "step": 13463 }, { "epoch": 0.8199007398836891, "grad_norm": 1.043454221359388, "learning_rate": 4.775989692272742e-06, "loss": 0.367, "step": 13464 }, { "epoch": 0.8199616356605669, "grad_norm": 1.0139520501840382, "learning_rate": 4.775956676537044e-06, "loss": 0.4366, "step": 13465 }, { "epoch": 0.8200225314374449, "grad_norm": 1.06265554282373, "learning_rate": 4.775923658482647e-06, "loss": 0.3857, "step": 13466 }, { "epoch": 0.8200834272143227, "grad_norm": 1.0149743987636843, "learning_rate": 4.775890638109587e-06, "loss": 0.3868, "step": 13467 }, { "epoch": 0.8201443229912005, "grad_norm": 0.957710613761784, "learning_rate": 4.775857615417897e-06, "loss": 0.408, "step": 13468 }, { "epoch": 0.8202052187680784, "grad_norm": 1.0300768560051345, "learning_rate": 4.775824590407611e-06, "loss": 0.3904, "step": 13469 }, { "epoch": 0.8202661145449563, "grad_norm": 1.0594440594240069, "learning_rate": 4.7757915630787614e-06, "loss": 0.4226, "step": 13470 }, { "epoch": 0.8203270103218342, "grad_norm": 0.9543696727644081, "learning_rate": 4.775758533431382e-06, "loss": 0.4613, "step": 13471 }, { "epoch": 0.820387906098712, "grad_norm": 1.1653374341276246, "learning_rate": 4.775725501465509e-06, "loss": 0.4632, "step": 13472 }, { "epoch": 0.8204488018755899, "grad_norm": 1.0372093646923863, "learning_rate": 4.775692467181173e-06, "loss": 0.4193, "step": 13473 }, { "epoch": 0.8205096976524678, "grad_norm": 1.006461135853921, "learning_rate": 4.7756594305784094e-06, "loss": 0.4027, "step": 13474 }, { "epoch": 0.8205705934293457, "grad_norm": 1.0631245028889007, "learning_rate": 4.775626391657251e-06, "loss": 0.3581, "step": 13475 }, { "epoch": 0.8206314892062235, "grad_norm": 0.9834043908429875, "learning_rate": 4.775593350417732e-06, "loss": 0.4612, "step": 13476 }, { "epoch": 0.8206923849831014, "grad_norm": 1.097043836590727, "learning_rate": 4.775560306859885e-06, "loss": 0.4889, "step": 13477 }, { "epoch": 0.8207532807599793, "grad_norm": 1.034149607883703, "learning_rate": 4.775527260983745e-06, "loss": 0.492, "step": 13478 }, { "epoch": 0.8208141765368572, "grad_norm": 0.9429307972324549, "learning_rate": 4.775494212789346e-06, "loss": 0.4292, "step": 13479 }, { "epoch": 0.820875072313735, "grad_norm": 1.0533773934141786, "learning_rate": 4.77546116227672e-06, "loss": 0.4422, "step": 13480 }, { "epoch": 0.8209359680906129, "grad_norm": 0.9227913112647228, "learning_rate": 4.775428109445901e-06, "loss": 0.4158, "step": 13481 }, { "epoch": 0.8209968638674908, "grad_norm": 0.9535799387267573, "learning_rate": 4.775395054296924e-06, "loss": 0.441, "step": 13482 }, { "epoch": 0.8210577596443687, "grad_norm": 0.9206212482714071, "learning_rate": 4.775361996829821e-06, "loss": 0.4319, "step": 13483 }, { "epoch": 0.8211186554212465, "grad_norm": 0.9594499882269678, "learning_rate": 4.775328937044627e-06, "loss": 0.4382, "step": 13484 }, { "epoch": 0.8211795511981244, "grad_norm": 0.9905347912644554, "learning_rate": 4.775295874941375e-06, "loss": 0.3828, "step": 13485 }, { "epoch": 0.8212404469750023, "grad_norm": 0.9988188308820448, "learning_rate": 4.7752628105201e-06, "loss": 0.3632, "step": 13486 }, { "epoch": 0.8213013427518802, "grad_norm": 1.0167540667764083, "learning_rate": 4.775229743780833e-06, "loss": 0.4052, "step": 13487 }, { "epoch": 0.821362238528758, "grad_norm": 1.0308115490896776, "learning_rate": 4.775196674723609e-06, "loss": 0.4661, "step": 13488 }, { "epoch": 0.8214231343056358, "grad_norm": 1.0043712527042132, "learning_rate": 4.775163603348462e-06, "loss": 0.389, "step": 13489 }, { "epoch": 0.8214840300825138, "grad_norm": 0.9769446372952727, "learning_rate": 4.775130529655428e-06, "loss": 0.4669, "step": 13490 }, { "epoch": 0.8215449258593917, "grad_norm": 1.0005890411504994, "learning_rate": 4.775097453644536e-06, "loss": 0.4012, "step": 13491 }, { "epoch": 0.8216058216362695, "grad_norm": 1.1102174201242931, "learning_rate": 4.7750643753158225e-06, "loss": 0.3324, "step": 13492 }, { "epoch": 0.8216667174131475, "grad_norm": 1.0212705687337285, "learning_rate": 4.7750312946693215e-06, "loss": 0.4022, "step": 13493 }, { "epoch": 0.8217276131900253, "grad_norm": 1.1236631085889879, "learning_rate": 4.7749982117050656e-06, "loss": 0.3427, "step": 13494 }, { "epoch": 0.8217885089669031, "grad_norm": 0.9382586675894744, "learning_rate": 4.774965126423088e-06, "loss": 0.4506, "step": 13495 }, { "epoch": 0.821849404743781, "grad_norm": 0.9616985375026155, "learning_rate": 4.774932038823423e-06, "loss": 0.4375, "step": 13496 }, { "epoch": 0.8219103005206589, "grad_norm": 0.9726549113905312, "learning_rate": 4.774898948906106e-06, "loss": 0.3676, "step": 13497 }, { "epoch": 0.8219711962975368, "grad_norm": 1.0546273423240393, "learning_rate": 4.774865856671168e-06, "loss": 0.3623, "step": 13498 }, { "epoch": 0.8220320920744146, "grad_norm": 1.0135123910260238, "learning_rate": 4.774832762118645e-06, "loss": 0.3912, "step": 13499 }, { "epoch": 0.8220929878512925, "grad_norm": 1.0636945138697584, "learning_rate": 4.774799665248569e-06, "loss": 0.4264, "step": 13500 }, { "epoch": 0.8221538836281704, "grad_norm": 0.9693294080638706, "learning_rate": 4.774766566060974e-06, "loss": 0.4695, "step": 13501 }, { "epoch": 0.8222147794050483, "grad_norm": 1.1257853860923985, "learning_rate": 4.7747334645558955e-06, "loss": 0.3154, "step": 13502 }, { "epoch": 0.8222756751819261, "grad_norm": 0.9699749078938937, "learning_rate": 4.774700360733364e-06, "loss": 0.4049, "step": 13503 }, { "epoch": 0.822336570958804, "grad_norm": 1.0784029373124446, "learning_rate": 4.774667254593417e-06, "loss": 0.3631, "step": 13504 }, { "epoch": 0.8223974667356819, "grad_norm": 1.0160847752678939, "learning_rate": 4.774634146136086e-06, "loss": 0.3639, "step": 13505 }, { "epoch": 0.8224583625125598, "grad_norm": 1.0341304838237146, "learning_rate": 4.774601035361404e-06, "loss": 0.4148, "step": 13506 }, { "epoch": 0.8225192582894376, "grad_norm": 0.990667568540425, "learning_rate": 4.774567922269406e-06, "loss": 0.3663, "step": 13507 }, { "epoch": 0.8225801540663155, "grad_norm": 0.9156384945974227, "learning_rate": 4.7745348068601256e-06, "loss": 0.4853, "step": 13508 }, { "epoch": 0.8226410498431934, "grad_norm": 0.9780007569237504, "learning_rate": 4.774501689133596e-06, "loss": 0.393, "step": 13509 }, { "epoch": 0.8227019456200713, "grad_norm": 1.0634981265425572, "learning_rate": 4.774468569089852e-06, "loss": 0.452, "step": 13510 }, { "epoch": 0.8227628413969491, "grad_norm": 0.9720343628326041, "learning_rate": 4.7744354467289265e-06, "loss": 0.3557, "step": 13511 }, { "epoch": 0.822823737173827, "grad_norm": 1.0337141767819757, "learning_rate": 4.774402322050854e-06, "loss": 0.4045, "step": 13512 }, { "epoch": 0.8228846329507049, "grad_norm": 0.9603361292791281, "learning_rate": 4.774369195055667e-06, "loss": 0.3963, "step": 13513 }, { "epoch": 0.8229455287275828, "grad_norm": 0.9276926411206511, "learning_rate": 4.7743360657434e-06, "loss": 0.402, "step": 13514 }, { "epoch": 0.8230064245044606, "grad_norm": 0.9711941079184805, "learning_rate": 4.774302934114087e-06, "loss": 0.4978, "step": 13515 }, { "epoch": 0.8230673202813384, "grad_norm": 0.9763588167469501, "learning_rate": 4.7742698001677615e-06, "loss": 0.4517, "step": 13516 }, { "epoch": 0.8231282160582164, "grad_norm": 0.9237951347284915, "learning_rate": 4.774236663904457e-06, "loss": 0.4384, "step": 13517 }, { "epoch": 0.8231891118350942, "grad_norm": 0.9798578142251217, "learning_rate": 4.774203525324207e-06, "loss": 0.4598, "step": 13518 }, { "epoch": 0.8232500076119721, "grad_norm": 1.0296864746862853, "learning_rate": 4.774170384427046e-06, "loss": 0.3939, "step": 13519 }, { "epoch": 0.8233109033888499, "grad_norm": 1.030605896859927, "learning_rate": 4.774137241213008e-06, "loss": 0.417, "step": 13520 }, { "epoch": 0.8233717991657279, "grad_norm": 0.9964401529194284, "learning_rate": 4.774104095682126e-06, "loss": 0.3801, "step": 13521 }, { "epoch": 0.8234326949426057, "grad_norm": 0.9721780697278843, "learning_rate": 4.774070947834434e-06, "loss": 0.4638, "step": 13522 }, { "epoch": 0.8234935907194836, "grad_norm": 1.0390388262113415, "learning_rate": 4.774037797669966e-06, "loss": 0.3454, "step": 13523 }, { "epoch": 0.8235544864963614, "grad_norm": 1.0866914285146685, "learning_rate": 4.7740046451887555e-06, "loss": 0.3695, "step": 13524 }, { "epoch": 0.8236153822732394, "grad_norm": 0.8865320017040234, "learning_rate": 4.773971490390836e-06, "loss": 0.404, "step": 13525 }, { "epoch": 0.8236762780501172, "grad_norm": 0.9767454603245083, "learning_rate": 4.773938333276242e-06, "loss": 0.4643, "step": 13526 }, { "epoch": 0.8237371738269951, "grad_norm": 1.019679728096081, "learning_rate": 4.773905173845006e-06, "loss": 0.3858, "step": 13527 }, { "epoch": 0.8237980696038729, "grad_norm": 1.05330947267765, "learning_rate": 4.773872012097164e-06, "loss": 0.3982, "step": 13528 }, { "epoch": 0.8238589653807509, "grad_norm": 0.9852882069608082, "learning_rate": 4.773838848032748e-06, "loss": 0.4184, "step": 13529 }, { "epoch": 0.8239198611576287, "grad_norm": 0.9889396260188494, "learning_rate": 4.7738056816517915e-06, "loss": 0.4654, "step": 13530 }, { "epoch": 0.8239807569345066, "grad_norm": 0.984845126658746, "learning_rate": 4.77377251295433e-06, "loss": 0.399, "step": 13531 }, { "epoch": 0.8240416527113845, "grad_norm": 1.0013339500368195, "learning_rate": 4.773739341940396e-06, "loss": 0.3834, "step": 13532 }, { "epoch": 0.8241025484882624, "grad_norm": 1.0841941202300134, "learning_rate": 4.773706168610024e-06, "loss": 0.3775, "step": 13533 }, { "epoch": 0.8241634442651402, "grad_norm": 0.9801069903155866, "learning_rate": 4.773672992963247e-06, "loss": 0.4767, "step": 13534 }, { "epoch": 0.824224340042018, "grad_norm": 1.0087525765984802, "learning_rate": 4.7736398150000996e-06, "loss": 0.3971, "step": 13535 }, { "epoch": 0.824285235818896, "grad_norm": 0.9969768159024629, "learning_rate": 4.773606634720615e-06, "loss": 0.3786, "step": 13536 }, { "epoch": 0.8243461315957739, "grad_norm": 1.008531891284475, "learning_rate": 4.7735734521248266e-06, "loss": 0.4004, "step": 13537 }, { "epoch": 0.8244070273726517, "grad_norm": 1.0824725047915493, "learning_rate": 4.773540267212769e-06, "loss": 0.321, "step": 13538 }, { "epoch": 0.8244679231495295, "grad_norm": 0.989070290217063, "learning_rate": 4.773507079984477e-06, "loss": 0.4554, "step": 13539 }, { "epoch": 0.8245288189264075, "grad_norm": 1.0782717333046072, "learning_rate": 4.773473890439983e-06, "loss": 0.3744, "step": 13540 }, { "epoch": 0.8245897147032853, "grad_norm": 0.9221216466309802, "learning_rate": 4.7734406985793205e-06, "loss": 0.469, "step": 13541 }, { "epoch": 0.8246506104801632, "grad_norm": 1.0401618112793145, "learning_rate": 4.773407504402524e-06, "loss": 0.4457, "step": 13542 }, { "epoch": 0.824711506257041, "grad_norm": 0.9413967166659266, "learning_rate": 4.773374307909626e-06, "loss": 0.4161, "step": 13543 }, { "epoch": 0.824772402033919, "grad_norm": 0.949656906652463, "learning_rate": 4.7733411091006636e-06, "loss": 0.4098, "step": 13544 }, { "epoch": 0.8248332978107968, "grad_norm": 0.9953792729675063, "learning_rate": 4.773307907975667e-06, "loss": 0.4379, "step": 13545 }, { "epoch": 0.8248941935876747, "grad_norm": 0.9575533915584148, "learning_rate": 4.773274704534673e-06, "loss": 0.4423, "step": 13546 }, { "epoch": 0.8249550893645525, "grad_norm": 1.0116942263001274, "learning_rate": 4.7732414987777125e-06, "loss": 0.3947, "step": 13547 }, { "epoch": 0.8250159851414305, "grad_norm": 1.006917167240661, "learning_rate": 4.773208290704822e-06, "loss": 0.4142, "step": 13548 }, { "epoch": 0.8250768809183083, "grad_norm": 1.0171411476795478, "learning_rate": 4.773175080316033e-06, "loss": 0.4802, "step": 13549 }, { "epoch": 0.8251377766951862, "grad_norm": 0.9779522141423729, "learning_rate": 4.773141867611382e-06, "loss": 0.392, "step": 13550 }, { "epoch": 0.825198672472064, "grad_norm": 0.9957095854583876, "learning_rate": 4.7731086525909e-06, "loss": 0.3699, "step": 13551 }, { "epoch": 0.825259568248942, "grad_norm": 1.100846684410702, "learning_rate": 4.773075435254622e-06, "loss": 0.4127, "step": 13552 }, { "epoch": 0.8253204640258198, "grad_norm": 1.0361930156552481, "learning_rate": 4.773042215602584e-06, "loss": 0.3785, "step": 13553 }, { "epoch": 0.8253813598026977, "grad_norm": 0.9211846858439277, "learning_rate": 4.773008993634815e-06, "loss": 0.5275, "step": 13554 }, { "epoch": 0.8254422555795755, "grad_norm": 1.032660193494922, "learning_rate": 4.772975769351353e-06, "loss": 0.4274, "step": 13555 }, { "epoch": 0.8255031513564535, "grad_norm": 1.0549616717675123, "learning_rate": 4.772942542752231e-06, "loss": 0.4128, "step": 13556 }, { "epoch": 0.8255640471333313, "grad_norm": 0.9947873915353787, "learning_rate": 4.7729093138374825e-06, "loss": 0.3924, "step": 13557 }, { "epoch": 0.8256249429102092, "grad_norm": 1.0087446995957607, "learning_rate": 4.7728760826071404e-06, "loss": 0.4061, "step": 13558 }, { "epoch": 0.825685838687087, "grad_norm": 0.9987033700404577, "learning_rate": 4.772842849061241e-06, "loss": 0.4278, "step": 13559 }, { "epoch": 0.825746734463965, "grad_norm": 0.9371893695305235, "learning_rate": 4.7728096131998145e-06, "loss": 0.4536, "step": 13560 }, { "epoch": 0.8258076302408428, "grad_norm": 1.0265318069848441, "learning_rate": 4.7727763750228985e-06, "loss": 0.3632, "step": 13561 }, { "epoch": 0.8258685260177206, "grad_norm": 0.9204108371966472, "learning_rate": 4.772743134530524e-06, "loss": 0.4548, "step": 13562 }, { "epoch": 0.8259294217945985, "grad_norm": 0.9618027149440074, "learning_rate": 4.772709891722726e-06, "loss": 0.4308, "step": 13563 }, { "epoch": 0.8259903175714765, "grad_norm": 1.091897077298049, "learning_rate": 4.772676646599539e-06, "loss": 0.3641, "step": 13564 }, { "epoch": 0.8260512133483543, "grad_norm": 0.9594430196691368, "learning_rate": 4.772643399160997e-06, "loss": 0.4167, "step": 13565 }, { "epoch": 0.8261121091252321, "grad_norm": 1.0154938607137296, "learning_rate": 4.772610149407132e-06, "loss": 0.4168, "step": 13566 }, { "epoch": 0.82617300490211, "grad_norm": 0.9718023890609474, "learning_rate": 4.772576897337979e-06, "loss": 0.4527, "step": 13567 }, { "epoch": 0.8262339006789879, "grad_norm": 1.0625463329812852, "learning_rate": 4.772543642953572e-06, "loss": 0.4782, "step": 13568 }, { "epoch": 0.8262947964558658, "grad_norm": 0.9895881595254984, "learning_rate": 4.772510386253945e-06, "loss": 0.4579, "step": 13569 }, { "epoch": 0.8263556922327436, "grad_norm": 1.0275622127685855, "learning_rate": 4.772477127239132e-06, "loss": 0.3773, "step": 13570 }, { "epoch": 0.8264165880096215, "grad_norm": 1.0272181121141595, "learning_rate": 4.772443865909167e-06, "loss": 0.3556, "step": 13571 }, { "epoch": 0.8264774837864994, "grad_norm": 0.9677806950102685, "learning_rate": 4.772410602264082e-06, "loss": 0.4831, "step": 13572 }, { "epoch": 0.8265383795633773, "grad_norm": 1.0265725352656223, "learning_rate": 4.772377336303913e-06, "loss": 0.3961, "step": 13573 }, { "epoch": 0.8265992753402551, "grad_norm": 0.9933571705027584, "learning_rate": 4.7723440680286935e-06, "loss": 0.4067, "step": 13574 }, { "epoch": 0.8266601711171331, "grad_norm": 1.0114268136120395, "learning_rate": 4.7723107974384566e-06, "loss": 0.3949, "step": 13575 }, { "epoch": 0.8267210668940109, "grad_norm": 0.9898038960160417, "learning_rate": 4.772277524533237e-06, "loss": 0.4374, "step": 13576 }, { "epoch": 0.8267819626708888, "grad_norm": 1.012103208289907, "learning_rate": 4.772244249313068e-06, "loss": 0.4307, "step": 13577 }, { "epoch": 0.8268428584477666, "grad_norm": 1.109569608781535, "learning_rate": 4.772210971777984e-06, "loss": 0.422, "step": 13578 }, { "epoch": 0.8269037542246446, "grad_norm": 0.9885898761458884, "learning_rate": 4.772177691928019e-06, "loss": 0.4178, "step": 13579 }, { "epoch": 0.8269646500015224, "grad_norm": 1.0122949730600657, "learning_rate": 4.772144409763206e-06, "loss": 0.3863, "step": 13580 }, { "epoch": 0.8270255457784003, "grad_norm": 0.9286954230869864, "learning_rate": 4.77211112528358e-06, "loss": 0.4485, "step": 13581 }, { "epoch": 0.8270864415552781, "grad_norm": 1.1387091714014534, "learning_rate": 4.772077838489174e-06, "loss": 0.4124, "step": 13582 }, { "epoch": 0.8271473373321561, "grad_norm": 0.975597920258727, "learning_rate": 4.772044549380023e-06, "loss": 0.4187, "step": 13583 }, { "epoch": 0.8272082331090339, "grad_norm": 1.0505758522223048, "learning_rate": 4.7720112579561595e-06, "loss": 0.4443, "step": 13584 }, { "epoch": 0.8272691288859118, "grad_norm": 1.083441869565888, "learning_rate": 4.771977964217619e-06, "loss": 0.3538, "step": 13585 }, { "epoch": 0.8273300246627896, "grad_norm": 1.0261246647578364, "learning_rate": 4.771944668164434e-06, "loss": 0.4602, "step": 13586 }, { "epoch": 0.8273909204396676, "grad_norm": 0.9465333096515725, "learning_rate": 4.771911369796639e-06, "loss": 0.4681, "step": 13587 }, { "epoch": 0.8274518162165454, "grad_norm": 0.9470603695619325, "learning_rate": 4.771878069114269e-06, "loss": 0.4093, "step": 13588 }, { "epoch": 0.8275127119934232, "grad_norm": 0.9440530435761456, "learning_rate": 4.771844766117355e-06, "loss": 0.4508, "step": 13589 }, { "epoch": 0.8275736077703011, "grad_norm": 0.9957379940534045, "learning_rate": 4.771811460805934e-06, "loss": 0.4508, "step": 13590 }, { "epoch": 0.827634503547179, "grad_norm": 0.9405696468319878, "learning_rate": 4.771778153180038e-06, "loss": 0.4387, "step": 13591 }, { "epoch": 0.8276953993240569, "grad_norm": 0.9858016931942265, "learning_rate": 4.771744843239702e-06, "loss": 0.4378, "step": 13592 }, { "epoch": 0.8277562951009347, "grad_norm": 1.0064394794610163, "learning_rate": 4.77171153098496e-06, "loss": 0.3486, "step": 13593 }, { "epoch": 0.8278171908778126, "grad_norm": 0.9517343924062538, "learning_rate": 4.771678216415845e-06, "loss": 0.3352, "step": 13594 }, { "epoch": 0.8278780866546905, "grad_norm": 0.9365424431736886, "learning_rate": 4.771644899532391e-06, "loss": 0.4304, "step": 13595 }, { "epoch": 0.8279389824315684, "grad_norm": 0.9312277212394438, "learning_rate": 4.771611580334634e-06, "loss": 0.3709, "step": 13596 }, { "epoch": 0.8279998782084462, "grad_norm": 0.9422289142077387, "learning_rate": 4.771578258822605e-06, "loss": 0.4803, "step": 13597 }, { "epoch": 0.8280607739853241, "grad_norm": 0.9571032389205657, "learning_rate": 4.77154493499634e-06, "loss": 0.4175, "step": 13598 }, { "epoch": 0.828121669762202, "grad_norm": 1.0407764493681382, "learning_rate": 4.771511608855872e-06, "loss": 0.3964, "step": 13599 }, { "epoch": 0.8281825655390799, "grad_norm": 1.0048755164702692, "learning_rate": 4.771478280401235e-06, "loss": 0.4268, "step": 13600 }, { "epoch": 0.8282434613159577, "grad_norm": 1.0292827277576875, "learning_rate": 4.7714449496324635e-06, "loss": 0.3668, "step": 13601 }, { "epoch": 0.8283043570928356, "grad_norm": 0.9659463127060847, "learning_rate": 4.77141161654959e-06, "loss": 0.4321, "step": 13602 }, { "epoch": 0.8283652528697135, "grad_norm": 0.9919540392132843, "learning_rate": 4.771378281152651e-06, "loss": 0.4227, "step": 13603 }, { "epoch": 0.8284261486465914, "grad_norm": 1.1735543895099643, "learning_rate": 4.771344943441679e-06, "loss": 0.3693, "step": 13604 }, { "epoch": 0.8284870444234692, "grad_norm": 0.9936213857689242, "learning_rate": 4.771311603416707e-06, "loss": 0.4864, "step": 13605 }, { "epoch": 0.828547940200347, "grad_norm": 1.0207865813806758, "learning_rate": 4.7712782610777705e-06, "loss": 0.4569, "step": 13606 }, { "epoch": 0.828608835977225, "grad_norm": 0.97195680843358, "learning_rate": 4.771244916424903e-06, "loss": 0.4252, "step": 13607 }, { "epoch": 0.8286697317541029, "grad_norm": 1.0014637371098265, "learning_rate": 4.771211569458138e-06, "loss": 0.361, "step": 13608 }, { "epoch": 0.8287306275309807, "grad_norm": 0.995168647649751, "learning_rate": 4.7711782201775105e-06, "loss": 0.421, "step": 13609 }, { "epoch": 0.8287915233078585, "grad_norm": 0.9676422760461195, "learning_rate": 4.771144868583053e-06, "loss": 0.4305, "step": 13610 }, { "epoch": 0.8288524190847365, "grad_norm": 1.066965346854118, "learning_rate": 4.771111514674801e-06, "loss": 0.3716, "step": 13611 }, { "epoch": 0.8289133148616143, "grad_norm": 1.0379735091976146, "learning_rate": 4.771078158452788e-06, "loss": 0.4255, "step": 13612 }, { "epoch": 0.8289742106384922, "grad_norm": 0.9630970164554443, "learning_rate": 4.771044799917047e-06, "loss": 0.4165, "step": 13613 }, { "epoch": 0.8290351064153701, "grad_norm": 0.9889105230394116, "learning_rate": 4.771011439067613e-06, "loss": 0.4247, "step": 13614 }, { "epoch": 0.829096002192248, "grad_norm": 1.0012054537902568, "learning_rate": 4.77097807590452e-06, "loss": 0.4294, "step": 13615 }, { "epoch": 0.8291568979691258, "grad_norm": 0.9880105210136206, "learning_rate": 4.770944710427802e-06, "loss": 0.4585, "step": 13616 }, { "epoch": 0.8292177937460037, "grad_norm": 0.9914356844710202, "learning_rate": 4.7709113426374924e-06, "loss": 0.5219, "step": 13617 }, { "epoch": 0.8292786895228816, "grad_norm": 0.9773014704263622, "learning_rate": 4.770877972533625e-06, "loss": 0.4, "step": 13618 }, { "epoch": 0.8293395852997595, "grad_norm": 1.0261775408742606, "learning_rate": 4.770844600116236e-06, "loss": 0.3747, "step": 13619 }, { "epoch": 0.8294004810766373, "grad_norm": 0.9668389221386277, "learning_rate": 4.7708112253853565e-06, "loss": 0.3968, "step": 13620 }, { "epoch": 0.8294613768535152, "grad_norm": 0.9766730857704164, "learning_rate": 4.770777848341022e-06, "loss": 0.3889, "step": 13621 }, { "epoch": 0.8295222726303931, "grad_norm": 0.9906548305725494, "learning_rate": 4.770744468983266e-06, "loss": 0.3756, "step": 13622 }, { "epoch": 0.829583168407271, "grad_norm": 1.0013000165495418, "learning_rate": 4.770711087312122e-06, "loss": 0.409, "step": 13623 }, { "epoch": 0.8296440641841488, "grad_norm": 0.9857885831699157, "learning_rate": 4.7706777033276264e-06, "loss": 0.4437, "step": 13624 }, { "epoch": 0.8297049599610267, "grad_norm": 0.9963912148767377, "learning_rate": 4.770644317029811e-06, "loss": 0.3701, "step": 13625 }, { "epoch": 0.8297658557379046, "grad_norm": 1.1179414700060968, "learning_rate": 4.77061092841871e-06, "loss": 0.4029, "step": 13626 }, { "epoch": 0.8298267515147825, "grad_norm": 1.0547751522887852, "learning_rate": 4.770577537494357e-06, "loss": 0.4242, "step": 13627 }, { "epoch": 0.8298876472916603, "grad_norm": 1.0453691859660936, "learning_rate": 4.7705441442567886e-06, "loss": 0.374, "step": 13628 }, { "epoch": 0.8299485430685382, "grad_norm": 1.0567612471544658, "learning_rate": 4.770510748706037e-06, "loss": 0.4178, "step": 13629 }, { "epoch": 0.8300094388454161, "grad_norm": 0.9947701595941904, "learning_rate": 4.770477350842134e-06, "loss": 0.4339, "step": 13630 }, { "epoch": 0.830070334622294, "grad_norm": 0.9865708652256412, "learning_rate": 4.770443950665118e-06, "loss": 0.3906, "step": 13631 }, { "epoch": 0.8301312303991718, "grad_norm": 1.032003353722166, "learning_rate": 4.7704105481750205e-06, "loss": 0.429, "step": 13632 }, { "epoch": 0.8301921261760496, "grad_norm": 1.0592059071809672, "learning_rate": 4.770377143371875e-06, "loss": 0.4076, "step": 13633 }, { "epoch": 0.8302530219529276, "grad_norm": 0.941156311494253, "learning_rate": 4.770343736255717e-06, "loss": 0.4058, "step": 13634 }, { "epoch": 0.8303139177298055, "grad_norm": 0.9959721993024002, "learning_rate": 4.770310326826581e-06, "loss": 0.3995, "step": 13635 }, { "epoch": 0.8303748135066833, "grad_norm": 0.9872295052297176, "learning_rate": 4.770276915084498e-06, "loss": 0.4831, "step": 13636 }, { "epoch": 0.8304357092835611, "grad_norm": 0.9757976081779488, "learning_rate": 4.770243501029506e-06, "loss": 0.4704, "step": 13637 }, { "epoch": 0.8304966050604391, "grad_norm": 0.9577938709782847, "learning_rate": 4.770210084661636e-06, "loss": 0.4433, "step": 13638 }, { "epoch": 0.8305575008373169, "grad_norm": 1.0053987555825414, "learning_rate": 4.770176665980924e-06, "loss": 0.4521, "step": 13639 }, { "epoch": 0.8306183966141948, "grad_norm": 0.9285058527284112, "learning_rate": 4.770143244987403e-06, "loss": 0.4044, "step": 13640 }, { "epoch": 0.8306792923910726, "grad_norm": 1.0954934351289651, "learning_rate": 4.770109821681107e-06, "loss": 0.5028, "step": 13641 }, { "epoch": 0.8307401881679506, "grad_norm": 1.000513990890976, "learning_rate": 4.77007639606207e-06, "loss": 0.4048, "step": 13642 }, { "epoch": 0.8308010839448284, "grad_norm": 1.0685930287318008, "learning_rate": 4.770042968130327e-06, "loss": 0.4252, "step": 13643 }, { "epoch": 0.8308619797217063, "grad_norm": 1.203196848294939, "learning_rate": 4.770009537885911e-06, "loss": 0.4096, "step": 13644 }, { "epoch": 0.8309228754985841, "grad_norm": 1.0387812565481487, "learning_rate": 4.769976105328856e-06, "loss": 0.5284, "step": 13645 }, { "epoch": 0.8309837712754621, "grad_norm": 1.1185640953114997, "learning_rate": 4.769942670459198e-06, "loss": 0.4069, "step": 13646 }, { "epoch": 0.8310446670523399, "grad_norm": 0.9537486790954954, "learning_rate": 4.769909233276968e-06, "loss": 0.3955, "step": 13647 }, { "epoch": 0.8311055628292178, "grad_norm": 1.0042765123582427, "learning_rate": 4.769875793782202e-06, "loss": 0.3509, "step": 13648 }, { "epoch": 0.8311664586060956, "grad_norm": 0.9757714082231317, "learning_rate": 4.7698423519749346e-06, "loss": 0.4379, "step": 13649 }, { "epoch": 0.8312273543829736, "grad_norm": 0.9972299033283395, "learning_rate": 4.769808907855199e-06, "loss": 0.397, "step": 13650 }, { "epoch": 0.8312882501598514, "grad_norm": 1.0042080053075157, "learning_rate": 4.769775461423029e-06, "loss": 0.3567, "step": 13651 }, { "epoch": 0.8313491459367293, "grad_norm": 1.0383446783138868, "learning_rate": 4.769742012678459e-06, "loss": 0.3381, "step": 13652 }, { "epoch": 0.8314100417136071, "grad_norm": 0.9435605997045952, "learning_rate": 4.7697085616215226e-06, "loss": 0.4275, "step": 13653 }, { "epoch": 0.8314709374904851, "grad_norm": 0.9650736599896385, "learning_rate": 4.769675108252254e-06, "loss": 0.4272, "step": 13654 }, { "epoch": 0.8315318332673629, "grad_norm": 0.9914705144702968, "learning_rate": 4.769641652570688e-06, "loss": 0.3353, "step": 13655 }, { "epoch": 0.8315927290442408, "grad_norm": 0.9477147504207332, "learning_rate": 4.769608194576859e-06, "loss": 0.4581, "step": 13656 }, { "epoch": 0.8316536248211187, "grad_norm": 1.0477694649435214, "learning_rate": 4.769574734270799e-06, "loss": 0.4104, "step": 13657 }, { "epoch": 0.8317145205979966, "grad_norm": 1.1169083636980095, "learning_rate": 4.769541271652545e-06, "loss": 0.3845, "step": 13658 }, { "epoch": 0.8317754163748744, "grad_norm": 1.0662126976914, "learning_rate": 4.769507806722128e-06, "loss": 0.352, "step": 13659 }, { "epoch": 0.8318363121517522, "grad_norm": 1.1102898226899292, "learning_rate": 4.769474339479584e-06, "loss": 0.4267, "step": 13660 }, { "epoch": 0.8318972079286302, "grad_norm": 1.0177101527282002, "learning_rate": 4.769440869924948e-06, "loss": 0.3692, "step": 13661 }, { "epoch": 0.831958103705508, "grad_norm": 1.0047573593755954, "learning_rate": 4.769407398058252e-06, "loss": 0.4639, "step": 13662 }, { "epoch": 0.8320189994823859, "grad_norm": 0.9765216545240666, "learning_rate": 4.76937392387953e-06, "loss": 0.3825, "step": 13663 }, { "epoch": 0.8320798952592637, "grad_norm": 1.0012776092189197, "learning_rate": 4.769340447388819e-06, "loss": 0.4221, "step": 13664 }, { "epoch": 0.8321407910361417, "grad_norm": 1.0185504719006224, "learning_rate": 4.7693069685861494e-06, "loss": 0.3842, "step": 13665 }, { "epoch": 0.8322016868130195, "grad_norm": 1.0241999046938268, "learning_rate": 4.769273487471557e-06, "loss": 0.4241, "step": 13666 }, { "epoch": 0.8322625825898974, "grad_norm": 0.9324056238248658, "learning_rate": 4.769240004045077e-06, "loss": 0.5575, "step": 13667 }, { "epoch": 0.8323234783667752, "grad_norm": 0.940766787411794, "learning_rate": 4.769206518306741e-06, "loss": 0.4662, "step": 13668 }, { "epoch": 0.8323843741436532, "grad_norm": 1.004053722628415, "learning_rate": 4.769173030256586e-06, "loss": 0.4038, "step": 13669 }, { "epoch": 0.832445269920531, "grad_norm": 0.9344086121008316, "learning_rate": 4.769139539894645e-06, "loss": 0.4283, "step": 13670 }, { "epoch": 0.8325061656974089, "grad_norm": 1.0388060966922399, "learning_rate": 4.769106047220951e-06, "loss": 0.3641, "step": 13671 }, { "epoch": 0.8325670614742867, "grad_norm": 1.0008910216442506, "learning_rate": 4.769072552235539e-06, "loss": 0.3968, "step": 13672 }, { "epoch": 0.8326279572511647, "grad_norm": 0.962308515299304, "learning_rate": 4.769039054938444e-06, "loss": 0.4113, "step": 13673 }, { "epoch": 0.8326888530280425, "grad_norm": 0.999883482596564, "learning_rate": 4.769005555329698e-06, "loss": 0.406, "step": 13674 }, { "epoch": 0.8327497488049204, "grad_norm": 0.9832738853748295, "learning_rate": 4.768972053409336e-06, "loss": 0.4145, "step": 13675 }, { "epoch": 0.8328106445817982, "grad_norm": 0.9564261218280773, "learning_rate": 4.7689385491773934e-06, "loss": 0.4226, "step": 13676 }, { "epoch": 0.8328715403586762, "grad_norm": 1.0678668683033232, "learning_rate": 4.768905042633902e-06, "loss": 0.4327, "step": 13677 }, { "epoch": 0.832932436135554, "grad_norm": 1.0383567321900598, "learning_rate": 4.7688715337788995e-06, "loss": 0.3894, "step": 13678 }, { "epoch": 0.8329933319124319, "grad_norm": 0.9322774307425099, "learning_rate": 4.768838022612417e-06, "loss": 0.4277, "step": 13679 }, { "epoch": 0.8330542276893097, "grad_norm": 1.0120028975675293, "learning_rate": 4.768804509134488e-06, "loss": 0.4781, "step": 13680 }, { "epoch": 0.8331151234661877, "grad_norm": 0.9464085710342116, "learning_rate": 4.76877099334515e-06, "loss": 0.4179, "step": 13681 }, { "epoch": 0.8331760192430655, "grad_norm": 1.0629902061270184, "learning_rate": 4.7687374752444345e-06, "loss": 0.4654, "step": 13682 }, { "epoch": 0.8332369150199433, "grad_norm": 1.069355973440086, "learning_rate": 4.768703954832376e-06, "loss": 0.4149, "step": 13683 }, { "epoch": 0.8332978107968212, "grad_norm": 1.065279978065118, "learning_rate": 4.76867043210901e-06, "loss": 0.452, "step": 13684 }, { "epoch": 0.8333587065736991, "grad_norm": 1.0094258818930122, "learning_rate": 4.768636907074369e-06, "loss": 0.4046, "step": 13685 }, { "epoch": 0.833419602350577, "grad_norm": 0.9898183969812605, "learning_rate": 4.768603379728489e-06, "loss": 0.3867, "step": 13686 }, { "epoch": 0.8334804981274548, "grad_norm": 0.9721517145764698, "learning_rate": 4.7685698500714016e-06, "loss": 0.4204, "step": 13687 }, { "epoch": 0.8335413939043327, "grad_norm": 1.042411066640602, "learning_rate": 4.768536318103143e-06, "loss": 0.4126, "step": 13688 }, { "epoch": 0.8336022896812106, "grad_norm": 0.961790620085323, "learning_rate": 4.768502783823748e-06, "loss": 0.4825, "step": 13689 }, { "epoch": 0.8336631854580885, "grad_norm": 1.0723975853720944, "learning_rate": 4.7684692472332476e-06, "loss": 0.3924, "step": 13690 }, { "epoch": 0.8337240812349663, "grad_norm": 0.9494083275641326, "learning_rate": 4.768435708331678e-06, "loss": 0.4516, "step": 13691 }, { "epoch": 0.8337849770118442, "grad_norm": 1.017271972214318, "learning_rate": 4.768402167119074e-06, "loss": 0.4393, "step": 13692 }, { "epoch": 0.8338458727887221, "grad_norm": 0.8745565724804664, "learning_rate": 4.768368623595469e-06, "loss": 0.4332, "step": 13693 }, { "epoch": 0.8339067685656, "grad_norm": 0.9686757664888994, "learning_rate": 4.768335077760897e-06, "loss": 0.4802, "step": 13694 }, { "epoch": 0.8339676643424778, "grad_norm": 0.9534891899133318, "learning_rate": 4.768301529615394e-06, "loss": 0.4818, "step": 13695 }, { "epoch": 0.8340285601193558, "grad_norm": 0.9549648431285899, "learning_rate": 4.76826797915899e-06, "loss": 0.4734, "step": 13696 }, { "epoch": 0.8340894558962336, "grad_norm": 1.0023289516888927, "learning_rate": 4.768234426391723e-06, "loss": 0.4288, "step": 13697 }, { "epoch": 0.8341503516731115, "grad_norm": 1.1129436281992988, "learning_rate": 4.768200871313626e-06, "loss": 0.3937, "step": 13698 }, { "epoch": 0.8342112474499893, "grad_norm": 1.0502931326493978, "learning_rate": 4.768167313924733e-06, "loss": 0.4084, "step": 13699 }, { "epoch": 0.8342721432268673, "grad_norm": 0.876905315707353, "learning_rate": 4.768133754225077e-06, "loss": 0.4349, "step": 13700 }, { "epoch": 0.8343330390037451, "grad_norm": 1.0354648707495813, "learning_rate": 4.768100192214695e-06, "loss": 0.3457, "step": 13701 }, { "epoch": 0.834393934780623, "grad_norm": 0.99866977724207, "learning_rate": 4.768066627893619e-06, "loss": 0.3741, "step": 13702 }, { "epoch": 0.8344548305575008, "grad_norm": 0.9449683059272791, "learning_rate": 4.768033061261885e-06, "loss": 0.4455, "step": 13703 }, { "epoch": 0.8345157263343788, "grad_norm": 1.051035516755037, "learning_rate": 4.767999492319525e-06, "loss": 0.4377, "step": 13704 }, { "epoch": 0.8345766221112566, "grad_norm": 0.9549477156635945, "learning_rate": 4.767965921066575e-06, "loss": 0.3795, "step": 13705 }, { "epoch": 0.8346375178881345, "grad_norm": 0.9411978799885229, "learning_rate": 4.767932347503068e-06, "loss": 0.4501, "step": 13706 }, { "epoch": 0.8346984136650123, "grad_norm": 1.0015568991610122, "learning_rate": 4.7678987716290384e-06, "loss": 0.4224, "step": 13707 }, { "epoch": 0.8347593094418903, "grad_norm": 1.0824629902773815, "learning_rate": 4.767865193444521e-06, "loss": 0.3543, "step": 13708 }, { "epoch": 0.8348202052187681, "grad_norm": 0.9760555350664815, "learning_rate": 4.7678316129495496e-06, "loss": 0.4059, "step": 13709 }, { "epoch": 0.8348811009956459, "grad_norm": 1.08015593371916, "learning_rate": 4.767798030144158e-06, "loss": 0.4034, "step": 13710 }, { "epoch": 0.8349419967725238, "grad_norm": 1.0968882229232604, "learning_rate": 4.767764445028382e-06, "loss": 0.3876, "step": 13711 }, { "epoch": 0.8350028925494017, "grad_norm": 0.9308203229298343, "learning_rate": 4.767730857602254e-06, "loss": 0.4639, "step": 13712 }, { "epoch": 0.8350637883262796, "grad_norm": 1.0260117188725324, "learning_rate": 4.76769726786581e-06, "loss": 0.4369, "step": 13713 }, { "epoch": 0.8351246841031574, "grad_norm": 0.9562593404823762, "learning_rate": 4.767663675819082e-06, "loss": 0.4222, "step": 13714 }, { "epoch": 0.8351855798800353, "grad_norm": 0.8615280650127894, "learning_rate": 4.767630081462106e-06, "loss": 0.4948, "step": 13715 }, { "epoch": 0.8352464756569132, "grad_norm": 0.9822638647132412, "learning_rate": 4.767596484794915e-06, "loss": 0.4193, "step": 13716 }, { "epoch": 0.8353073714337911, "grad_norm": 1.0109978792008114, "learning_rate": 4.767562885817544e-06, "loss": 0.3877, "step": 13717 }, { "epoch": 0.8353682672106689, "grad_norm": 0.9604316733381794, "learning_rate": 4.767529284530028e-06, "loss": 0.4398, "step": 13718 }, { "epoch": 0.8354291629875468, "grad_norm": 1.0349523485500436, "learning_rate": 4.767495680932399e-06, "loss": 0.4313, "step": 13719 }, { "epoch": 0.8354900587644247, "grad_norm": 0.9376598424100143, "learning_rate": 4.767462075024694e-06, "loss": 0.4184, "step": 13720 }, { "epoch": 0.8355509545413026, "grad_norm": 1.0041013651770316, "learning_rate": 4.767428466806945e-06, "loss": 0.4507, "step": 13721 }, { "epoch": 0.8356118503181804, "grad_norm": 0.9660830704535923, "learning_rate": 4.767394856279186e-06, "loss": 0.4669, "step": 13722 }, { "epoch": 0.8356727460950583, "grad_norm": 1.0084234988248317, "learning_rate": 4.767361243441453e-06, "loss": 0.3627, "step": 13723 }, { "epoch": 0.8357336418719362, "grad_norm": 1.0856652632228647, "learning_rate": 4.76732762829378e-06, "loss": 0.4221, "step": 13724 }, { "epoch": 0.8357945376488141, "grad_norm": 1.0627617427006, "learning_rate": 4.7672940108362e-06, "loss": 0.3964, "step": 13725 }, { "epoch": 0.8358554334256919, "grad_norm": 0.9804603795004702, "learning_rate": 4.767260391068749e-06, "loss": 0.4096, "step": 13726 }, { "epoch": 0.8359163292025698, "grad_norm": 1.06639098464143, "learning_rate": 4.76722676899146e-06, "loss": 0.4089, "step": 13727 }, { "epoch": 0.8359772249794477, "grad_norm": 0.9708773569455487, "learning_rate": 4.767193144604367e-06, "loss": 0.3769, "step": 13728 }, { "epoch": 0.8360381207563256, "grad_norm": 1.0106443104070382, "learning_rate": 4.767159517907505e-06, "loss": 0.3417, "step": 13729 }, { "epoch": 0.8360990165332034, "grad_norm": 0.993788122197714, "learning_rate": 4.767125888900907e-06, "loss": 0.4589, "step": 13730 }, { "epoch": 0.8361599123100812, "grad_norm": 1.0230934926663406, "learning_rate": 4.76709225758461e-06, "loss": 0.3952, "step": 13731 }, { "epoch": 0.8362208080869592, "grad_norm": 0.9173756456041495, "learning_rate": 4.767058623958646e-06, "loss": 0.4816, "step": 13732 }, { "epoch": 0.836281703863837, "grad_norm": 0.9937307289827457, "learning_rate": 4.767024988023049e-06, "loss": 0.4322, "step": 13733 }, { "epoch": 0.8363425996407149, "grad_norm": 1.0246186258710317, "learning_rate": 4.766991349777855e-06, "loss": 0.4136, "step": 13734 }, { "epoch": 0.8364034954175927, "grad_norm": 1.0152268444377495, "learning_rate": 4.766957709223097e-06, "loss": 0.4003, "step": 13735 }, { "epoch": 0.8364643911944707, "grad_norm": 0.9337317534172133, "learning_rate": 4.76692406635881e-06, "loss": 0.4466, "step": 13736 }, { "epoch": 0.8365252869713485, "grad_norm": 0.9905350543085012, "learning_rate": 4.7668904211850275e-06, "loss": 0.4079, "step": 13737 }, { "epoch": 0.8365861827482264, "grad_norm": 1.0295761095261537, "learning_rate": 4.766856773701784e-06, "loss": 0.428, "step": 13738 }, { "epoch": 0.8366470785251043, "grad_norm": 0.9422565319818954, "learning_rate": 4.7668231239091145e-06, "loss": 0.4366, "step": 13739 }, { "epoch": 0.8367079743019822, "grad_norm": 1.0070493089303085, "learning_rate": 4.766789471807052e-06, "loss": 0.3937, "step": 13740 }, { "epoch": 0.83676887007886, "grad_norm": 1.0844249761253828, "learning_rate": 4.766755817395632e-06, "loss": 0.423, "step": 13741 }, { "epoch": 0.8368297658557379, "grad_norm": 0.9450753141754159, "learning_rate": 4.766722160674888e-06, "loss": 0.3989, "step": 13742 }, { "epoch": 0.8368906616326158, "grad_norm": 1.047961012912035, "learning_rate": 4.766688501644855e-06, "loss": 0.3559, "step": 13743 }, { "epoch": 0.8369515574094937, "grad_norm": 0.9379563605759806, "learning_rate": 4.766654840305566e-06, "loss": 0.3904, "step": 13744 }, { "epoch": 0.8370124531863715, "grad_norm": 0.9123664904588519, "learning_rate": 4.766621176657058e-06, "loss": 0.4488, "step": 13745 }, { "epoch": 0.8370733489632494, "grad_norm": 0.974474394447223, "learning_rate": 4.766587510699362e-06, "loss": 0.3637, "step": 13746 }, { "epoch": 0.8371342447401273, "grad_norm": 1.002279298424339, "learning_rate": 4.766553842432514e-06, "loss": 0.3747, "step": 13747 }, { "epoch": 0.8371951405170052, "grad_norm": 0.9585255056415724, "learning_rate": 4.766520171856548e-06, "loss": 0.4167, "step": 13748 }, { "epoch": 0.837256036293883, "grad_norm": 1.1026073840398753, "learning_rate": 4.7664864989714985e-06, "loss": 0.4369, "step": 13749 }, { "epoch": 0.8373169320707609, "grad_norm": 1.0809134324232725, "learning_rate": 4.7664528237774e-06, "loss": 0.372, "step": 13750 }, { "epoch": 0.8373778278476388, "grad_norm": 0.9871831360359894, "learning_rate": 4.766419146274286e-06, "loss": 0.4484, "step": 13751 }, { "epoch": 0.8374387236245167, "grad_norm": 1.1026521389811694, "learning_rate": 4.766385466462191e-06, "loss": 0.3806, "step": 13752 }, { "epoch": 0.8374996194013945, "grad_norm": 1.1002516649822633, "learning_rate": 4.76635178434115e-06, "loss": 0.3747, "step": 13753 }, { "epoch": 0.8375605151782723, "grad_norm": 0.9706956168963501, "learning_rate": 4.7663180999111975e-06, "loss": 0.4719, "step": 13754 }, { "epoch": 0.8376214109551503, "grad_norm": 0.9831438683552574, "learning_rate": 4.766284413172367e-06, "loss": 0.4589, "step": 13755 }, { "epoch": 0.8376823067320281, "grad_norm": 1.0443188953812754, "learning_rate": 4.7662507241246925e-06, "loss": 0.3699, "step": 13756 }, { "epoch": 0.837743202508906, "grad_norm": 0.9886215386707945, "learning_rate": 4.766217032768209e-06, "loss": 0.4141, "step": 13757 }, { "epoch": 0.8378040982857838, "grad_norm": 0.9754760225363541, "learning_rate": 4.766183339102951e-06, "loss": 0.4077, "step": 13758 }, { "epoch": 0.8378649940626618, "grad_norm": 0.9620003081736435, "learning_rate": 4.766149643128952e-06, "loss": 0.3866, "step": 13759 }, { "epoch": 0.8379258898395396, "grad_norm": 1.0017409955277659, "learning_rate": 4.766115944846248e-06, "loss": 0.3497, "step": 13760 }, { "epoch": 0.8379867856164175, "grad_norm": 1.0085300203288665, "learning_rate": 4.766082244254871e-06, "loss": 0.4252, "step": 13761 }, { "epoch": 0.8380476813932953, "grad_norm": 1.0370586799816952, "learning_rate": 4.766048541354857e-06, "loss": 0.3487, "step": 13762 }, { "epoch": 0.8381085771701733, "grad_norm": 1.0355317827721586, "learning_rate": 4.76601483614624e-06, "loss": 0.407, "step": 13763 }, { "epoch": 0.8381694729470511, "grad_norm": 1.010426355404685, "learning_rate": 4.765981128629054e-06, "loss": 0.4508, "step": 13764 }, { "epoch": 0.838230368723929, "grad_norm": 0.9854225275279045, "learning_rate": 4.765947418803334e-06, "loss": 0.3899, "step": 13765 }, { "epoch": 0.8382912645008068, "grad_norm": 0.9664873079252655, "learning_rate": 4.765913706669113e-06, "loss": 0.3766, "step": 13766 }, { "epoch": 0.8383521602776848, "grad_norm": 0.9832793513514666, "learning_rate": 4.765879992226426e-06, "loss": 0.4508, "step": 13767 }, { "epoch": 0.8384130560545626, "grad_norm": 0.9477937810975237, "learning_rate": 4.765846275475309e-06, "loss": 0.4209, "step": 13768 }, { "epoch": 0.8384739518314405, "grad_norm": 1.1017718878950247, "learning_rate": 4.7658125564157945e-06, "loss": 0.3774, "step": 13769 }, { "epoch": 0.8385348476083183, "grad_norm": 1.1410762822588363, "learning_rate": 4.765778835047916e-06, "loss": 0.4129, "step": 13770 }, { "epoch": 0.8385957433851963, "grad_norm": 0.9915717912848404, "learning_rate": 4.765745111371711e-06, "loss": 0.3454, "step": 13771 }, { "epoch": 0.8386566391620741, "grad_norm": 1.0271542491069467, "learning_rate": 4.76571138538721e-06, "loss": 0.5246, "step": 13772 }, { "epoch": 0.838717534938952, "grad_norm": 1.018214698588612, "learning_rate": 4.765677657094451e-06, "loss": 0.4077, "step": 13773 }, { "epoch": 0.8387784307158298, "grad_norm": 1.0237384569727714, "learning_rate": 4.765643926493466e-06, "loss": 0.4364, "step": 13774 }, { "epoch": 0.8388393264927078, "grad_norm": 0.9580345316551615, "learning_rate": 4.76561019358429e-06, "loss": 0.4371, "step": 13775 }, { "epoch": 0.8389002222695856, "grad_norm": 1.1676902212562073, "learning_rate": 4.765576458366958e-06, "loss": 0.3403, "step": 13776 }, { "epoch": 0.8389611180464634, "grad_norm": 1.0288546936020566, "learning_rate": 4.765542720841503e-06, "loss": 0.3608, "step": 13777 }, { "epoch": 0.8390220138233414, "grad_norm": 1.0613051612156503, "learning_rate": 4.765508981007961e-06, "loss": 0.3712, "step": 13778 }, { "epoch": 0.8390829096002193, "grad_norm": 0.9755180000459381, "learning_rate": 4.765475238866365e-06, "loss": 0.4131, "step": 13779 }, { "epoch": 0.8391438053770971, "grad_norm": 1.029446291778636, "learning_rate": 4.7654414944167505e-06, "loss": 0.4186, "step": 13780 }, { "epoch": 0.8392047011539749, "grad_norm": 1.0326434765771781, "learning_rate": 4.765407747659151e-06, "loss": 0.3593, "step": 13781 }, { "epoch": 0.8392655969308529, "grad_norm": 0.9804771913378901, "learning_rate": 4.765373998593601e-06, "loss": 0.3734, "step": 13782 }, { "epoch": 0.8393264927077307, "grad_norm": 1.062570146003364, "learning_rate": 4.7653402472201345e-06, "loss": 0.3545, "step": 13783 }, { "epoch": 0.8393873884846086, "grad_norm": 0.9054143524446101, "learning_rate": 4.765306493538787e-06, "loss": 0.4447, "step": 13784 }, { "epoch": 0.8394482842614864, "grad_norm": 0.9864043799617974, "learning_rate": 4.765272737549593e-06, "loss": 0.4098, "step": 13785 }, { "epoch": 0.8395091800383644, "grad_norm": 0.8841579548605218, "learning_rate": 4.765238979252586e-06, "loss": 0.5067, "step": 13786 }, { "epoch": 0.8395700758152422, "grad_norm": 1.0555430316701166, "learning_rate": 4.7652052186478e-06, "loss": 0.401, "step": 13787 }, { "epoch": 0.8396309715921201, "grad_norm": 0.9226530758617256, "learning_rate": 4.76517145573527e-06, "loss": 0.4207, "step": 13788 }, { "epoch": 0.8396918673689979, "grad_norm": 1.0699075783992635, "learning_rate": 4.765137690515031e-06, "loss": 0.3621, "step": 13789 }, { "epoch": 0.8397527631458759, "grad_norm": 1.063433643140824, "learning_rate": 4.765103922987117e-06, "loss": 0.4204, "step": 13790 }, { "epoch": 0.8398136589227537, "grad_norm": 1.0292836709961728, "learning_rate": 4.765070153151561e-06, "loss": 0.3864, "step": 13791 }, { "epoch": 0.8398745546996316, "grad_norm": 0.934640356457134, "learning_rate": 4.7650363810084e-06, "loss": 0.4309, "step": 13792 }, { "epoch": 0.8399354504765094, "grad_norm": 0.9099219543121125, "learning_rate": 4.765002606557666e-06, "loss": 0.4892, "step": 13793 }, { "epoch": 0.8399963462533874, "grad_norm": 0.9622430003537962, "learning_rate": 4.7649688297993946e-06, "loss": 0.4253, "step": 13794 }, { "epoch": 0.8400572420302652, "grad_norm": 0.9175593617209713, "learning_rate": 4.764935050733621e-06, "loss": 0.483, "step": 13795 }, { "epoch": 0.8401181378071431, "grad_norm": 1.0018808999748128, "learning_rate": 4.764901269360377e-06, "loss": 0.3912, "step": 13796 }, { "epoch": 0.8401790335840209, "grad_norm": 1.0412074623468497, "learning_rate": 4.7648674856797e-06, "loss": 0.3899, "step": 13797 }, { "epoch": 0.8402399293608989, "grad_norm": 1.0478918416247154, "learning_rate": 4.764833699691623e-06, "loss": 0.4533, "step": 13798 }, { "epoch": 0.8403008251377767, "grad_norm": 1.1239957406775996, "learning_rate": 4.764799911396179e-06, "loss": 0.4192, "step": 13799 }, { "epoch": 0.8403617209146546, "grad_norm": 1.0009265621808145, "learning_rate": 4.764766120793406e-06, "loss": 0.4032, "step": 13800 }, { "epoch": 0.8404226166915324, "grad_norm": 1.0576137173053912, "learning_rate": 4.7647323278833355e-06, "loss": 0.4265, "step": 13801 }, { "epoch": 0.8404835124684104, "grad_norm": 0.9895348449201645, "learning_rate": 4.764698532666003e-06, "loss": 0.4261, "step": 13802 }, { "epoch": 0.8405444082452882, "grad_norm": 0.9756990363619527, "learning_rate": 4.764664735141443e-06, "loss": 0.4485, "step": 13803 }, { "epoch": 0.840605304022166, "grad_norm": 0.9576593964995735, "learning_rate": 4.764630935309689e-06, "loss": 0.3928, "step": 13804 }, { "epoch": 0.8406661997990439, "grad_norm": 0.9760106245186904, "learning_rate": 4.764597133170776e-06, "loss": 0.493, "step": 13805 }, { "epoch": 0.8407270955759218, "grad_norm": 1.050895876055306, "learning_rate": 4.764563328724739e-06, "loss": 0.3916, "step": 13806 }, { "epoch": 0.8407879913527997, "grad_norm": 0.9305464195688168, "learning_rate": 4.764529521971612e-06, "loss": 0.4288, "step": 13807 }, { "epoch": 0.8408488871296775, "grad_norm": 0.9698170290107911, "learning_rate": 4.764495712911429e-06, "loss": 0.4236, "step": 13808 }, { "epoch": 0.8409097829065554, "grad_norm": 1.1710386628072325, "learning_rate": 4.764461901544225e-06, "loss": 0.4353, "step": 13809 }, { "epoch": 0.8409706786834333, "grad_norm": 0.9797966466611386, "learning_rate": 4.7644280878700345e-06, "loss": 0.4312, "step": 13810 }, { "epoch": 0.8410315744603112, "grad_norm": 1.002897553078735, "learning_rate": 4.764394271888891e-06, "loss": 0.4273, "step": 13811 }, { "epoch": 0.841092470237189, "grad_norm": 0.946675749611563, "learning_rate": 4.76436045360083e-06, "loss": 0.4544, "step": 13812 }, { "epoch": 0.8411533660140669, "grad_norm": 0.9794263514741629, "learning_rate": 4.764326633005887e-06, "loss": 0.4229, "step": 13813 }, { "epoch": 0.8412142617909448, "grad_norm": 1.0573425777588825, "learning_rate": 4.764292810104093e-06, "loss": 0.3752, "step": 13814 }, { "epoch": 0.8412751575678227, "grad_norm": 0.9633469650098133, "learning_rate": 4.7642589848954855e-06, "loss": 0.3901, "step": 13815 }, { "epoch": 0.8413360533447005, "grad_norm": 1.0008074856420324, "learning_rate": 4.764225157380098e-06, "loss": 0.3553, "step": 13816 }, { "epoch": 0.8413969491215784, "grad_norm": 0.9019151594473391, "learning_rate": 4.764191327557966e-06, "loss": 0.499, "step": 13817 }, { "epoch": 0.8414578448984563, "grad_norm": 1.0043232687008012, "learning_rate": 4.764157495429121e-06, "loss": 0.453, "step": 13818 }, { "epoch": 0.8415187406753342, "grad_norm": 0.9707617195586231, "learning_rate": 4.764123660993601e-06, "loss": 0.3804, "step": 13819 }, { "epoch": 0.841579636452212, "grad_norm": 0.974098266104385, "learning_rate": 4.7640898242514385e-06, "loss": 0.3757, "step": 13820 }, { "epoch": 0.84164053222909, "grad_norm": 1.0789624382030916, "learning_rate": 4.764055985202668e-06, "loss": 0.4438, "step": 13821 }, { "epoch": 0.8417014280059678, "grad_norm": 1.04426854284939, "learning_rate": 4.764022143847324e-06, "loss": 0.3996, "step": 13822 }, { "epoch": 0.8417623237828457, "grad_norm": 0.9908966997610237, "learning_rate": 4.7639883001854426e-06, "loss": 0.3938, "step": 13823 }, { "epoch": 0.8418232195597235, "grad_norm": 1.114778215446856, "learning_rate": 4.763954454217055e-06, "loss": 0.447, "step": 13824 }, { "epoch": 0.8418841153366015, "grad_norm": 0.9773674666451796, "learning_rate": 4.763920605942199e-06, "loss": 0.4082, "step": 13825 }, { "epoch": 0.8419450111134793, "grad_norm": 1.0077103988147151, "learning_rate": 4.763886755360908e-06, "loss": 0.4479, "step": 13826 }, { "epoch": 0.8420059068903571, "grad_norm": 1.0752678326785907, "learning_rate": 4.763852902473216e-06, "loss": 0.3937, "step": 13827 }, { "epoch": 0.842066802667235, "grad_norm": 0.9991542233730061, "learning_rate": 4.763819047279158e-06, "loss": 0.4167, "step": 13828 }, { "epoch": 0.842127698444113, "grad_norm": 1.030765964857511, "learning_rate": 4.763785189778768e-06, "loss": 0.4371, "step": 13829 }, { "epoch": 0.8421885942209908, "grad_norm": 0.9548280478338499, "learning_rate": 4.76375132997208e-06, "loss": 0.4775, "step": 13830 }, { "epoch": 0.8422494899978686, "grad_norm": 0.9252271703727993, "learning_rate": 4.76371746785913e-06, "loss": 0.4206, "step": 13831 }, { "epoch": 0.8423103857747465, "grad_norm": 1.027041337474208, "learning_rate": 4.763683603439951e-06, "loss": 0.3653, "step": 13832 }, { "epoch": 0.8423712815516244, "grad_norm": 0.9859107637238524, "learning_rate": 4.763649736714579e-06, "loss": 0.4185, "step": 13833 }, { "epoch": 0.8424321773285023, "grad_norm": 0.9177008752510332, "learning_rate": 4.763615867683047e-06, "loss": 0.3967, "step": 13834 }, { "epoch": 0.8424930731053801, "grad_norm": 0.9632235268722295, "learning_rate": 4.763581996345391e-06, "loss": 0.4644, "step": 13835 }, { "epoch": 0.842553968882258, "grad_norm": 0.9459923063180311, "learning_rate": 4.763548122701644e-06, "loss": 0.4874, "step": 13836 }, { "epoch": 0.8426148646591359, "grad_norm": 0.9799090751486921, "learning_rate": 4.763514246751841e-06, "loss": 0.382, "step": 13837 }, { "epoch": 0.8426757604360138, "grad_norm": 1.0489619806728654, "learning_rate": 4.763480368496018e-06, "loss": 0.4155, "step": 13838 }, { "epoch": 0.8427366562128916, "grad_norm": 1.0359963088560675, "learning_rate": 4.763446487934208e-06, "loss": 0.3688, "step": 13839 }, { "epoch": 0.8427975519897695, "grad_norm": 0.9986548792348645, "learning_rate": 4.763412605066445e-06, "loss": 0.384, "step": 13840 }, { "epoch": 0.8428584477666474, "grad_norm": 1.023786443320976, "learning_rate": 4.763378719892765e-06, "loss": 0.4971, "step": 13841 }, { "epoch": 0.8429193435435253, "grad_norm": 1.1178113103615048, "learning_rate": 4.763344832413201e-06, "loss": 0.3994, "step": 13842 }, { "epoch": 0.8429802393204031, "grad_norm": 0.9732744756341627, "learning_rate": 4.763310942627789e-06, "loss": 0.4628, "step": 13843 }, { "epoch": 0.843041135097281, "grad_norm": 0.9849479976264284, "learning_rate": 4.7632770505365625e-06, "loss": 0.4072, "step": 13844 }, { "epoch": 0.8431020308741589, "grad_norm": 1.0111888642345805, "learning_rate": 4.763243156139556e-06, "loss": 0.4369, "step": 13845 }, { "epoch": 0.8431629266510368, "grad_norm": 0.9413544907972571, "learning_rate": 4.763209259436805e-06, "loss": 0.4651, "step": 13846 }, { "epoch": 0.8432238224279146, "grad_norm": 0.9598460361292558, "learning_rate": 4.763175360428344e-06, "loss": 0.4382, "step": 13847 }, { "epoch": 0.8432847182047924, "grad_norm": 0.9443828462949749, "learning_rate": 4.763141459114207e-06, "loss": 0.4041, "step": 13848 }, { "epoch": 0.8433456139816704, "grad_norm": 1.0090772169265827, "learning_rate": 4.763107555494427e-06, "loss": 0.4299, "step": 13849 }, { "epoch": 0.8434065097585483, "grad_norm": 1.0581874916868588, "learning_rate": 4.763073649569041e-06, "loss": 0.4018, "step": 13850 }, { "epoch": 0.8434674055354261, "grad_norm": 0.9976544949491846, "learning_rate": 4.763039741338083e-06, "loss": 0.4129, "step": 13851 }, { "epoch": 0.8435283013123039, "grad_norm": 0.9925426357412104, "learning_rate": 4.763005830801587e-06, "loss": 0.4117, "step": 13852 }, { "epoch": 0.8435891970891819, "grad_norm": 1.0099151534180353, "learning_rate": 4.762971917959587e-06, "loss": 0.3794, "step": 13853 }, { "epoch": 0.8436500928660597, "grad_norm": 1.0103876221147239, "learning_rate": 4.762938002812119e-06, "loss": 0.4238, "step": 13854 }, { "epoch": 0.8437109886429376, "grad_norm": 1.020538102700544, "learning_rate": 4.7629040853592166e-06, "loss": 0.424, "step": 13855 }, { "epoch": 0.8437718844198154, "grad_norm": 1.0596496064320335, "learning_rate": 4.762870165600915e-06, "loss": 0.4502, "step": 13856 }, { "epoch": 0.8438327801966934, "grad_norm": 1.0073453475476093, "learning_rate": 4.762836243537247e-06, "loss": 0.4069, "step": 13857 }, { "epoch": 0.8438936759735712, "grad_norm": 1.0335004462193953, "learning_rate": 4.7628023191682495e-06, "loss": 0.367, "step": 13858 }, { "epoch": 0.8439545717504491, "grad_norm": 0.9225528377561096, "learning_rate": 4.762768392493956e-06, "loss": 0.4528, "step": 13859 }, { "epoch": 0.844015467527327, "grad_norm": 1.0084753510650797, "learning_rate": 4.762734463514401e-06, "loss": 0.4158, "step": 13860 }, { "epoch": 0.8440763633042049, "grad_norm": 1.0466079334588356, "learning_rate": 4.76270053222962e-06, "loss": 0.4012, "step": 13861 }, { "epoch": 0.8441372590810827, "grad_norm": 1.145150242669326, "learning_rate": 4.762666598639646e-06, "loss": 0.3769, "step": 13862 }, { "epoch": 0.8441981548579606, "grad_norm": 0.9760487110992617, "learning_rate": 4.762632662744513e-06, "loss": 0.4138, "step": 13863 }, { "epoch": 0.8442590506348385, "grad_norm": 0.9371964615761901, "learning_rate": 4.7625987245442584e-06, "loss": 0.4384, "step": 13864 }, { "epoch": 0.8443199464117164, "grad_norm": 0.9935450624569454, "learning_rate": 4.762564784038915e-06, "loss": 0.4404, "step": 13865 }, { "epoch": 0.8443808421885942, "grad_norm": 0.9939383533335019, "learning_rate": 4.762530841228518e-06, "loss": 0.4756, "step": 13866 }, { "epoch": 0.8444417379654721, "grad_norm": 1.0426784544331311, "learning_rate": 4.762496896113101e-06, "loss": 0.3451, "step": 13867 }, { "epoch": 0.84450263374235, "grad_norm": 1.0486614349259789, "learning_rate": 4.7624629486927e-06, "loss": 0.3614, "step": 13868 }, { "epoch": 0.8445635295192279, "grad_norm": 0.9677824318113679, "learning_rate": 4.762428998967348e-06, "loss": 0.4254, "step": 13869 }, { "epoch": 0.8446244252961057, "grad_norm": 0.9453981645356412, "learning_rate": 4.7623950469370805e-06, "loss": 0.4036, "step": 13870 }, { "epoch": 0.8446853210729836, "grad_norm": 0.8752953421773605, "learning_rate": 4.7623610926019315e-06, "loss": 0.4821, "step": 13871 }, { "epoch": 0.8447462168498615, "grad_norm": 1.0450710177654754, "learning_rate": 4.762327135961937e-06, "loss": 0.4373, "step": 13872 }, { "epoch": 0.8448071126267394, "grad_norm": 0.9784993373559642, "learning_rate": 4.76229317701713e-06, "loss": 0.4673, "step": 13873 }, { "epoch": 0.8448680084036172, "grad_norm": 1.0574412612006892, "learning_rate": 4.762259215767547e-06, "loss": 0.348, "step": 13874 }, { "epoch": 0.844928904180495, "grad_norm": 1.1282907899775851, "learning_rate": 4.76222525221322e-06, "loss": 0.3843, "step": 13875 }, { "epoch": 0.844989799957373, "grad_norm": 1.033439210615238, "learning_rate": 4.762191286354185e-06, "loss": 0.3787, "step": 13876 }, { "epoch": 0.8450506957342508, "grad_norm": 1.0007319101646832, "learning_rate": 4.762157318190477e-06, "loss": 0.4202, "step": 13877 }, { "epoch": 0.8451115915111287, "grad_norm": 1.0559660470697367, "learning_rate": 4.762123347722129e-06, "loss": 0.373, "step": 13878 }, { "epoch": 0.8451724872880065, "grad_norm": 1.052159336650759, "learning_rate": 4.762089374949178e-06, "loss": 0.4409, "step": 13879 }, { "epoch": 0.8452333830648845, "grad_norm": 1.0254255963139327, "learning_rate": 4.762055399871657e-06, "loss": 0.3361, "step": 13880 }, { "epoch": 0.8452942788417623, "grad_norm": 1.0039518047723228, "learning_rate": 4.7620214224896014e-06, "loss": 0.5025, "step": 13881 }, { "epoch": 0.8453551746186402, "grad_norm": 1.0010576736180736, "learning_rate": 4.761987442803045e-06, "loss": 0.4588, "step": 13882 }, { "epoch": 0.845416070395518, "grad_norm": 1.0585853561011962, "learning_rate": 4.761953460812023e-06, "loss": 0.421, "step": 13883 }, { "epoch": 0.845476966172396, "grad_norm": 0.974925317980617, "learning_rate": 4.76191947651657e-06, "loss": 0.4611, "step": 13884 }, { "epoch": 0.8455378619492738, "grad_norm": 0.9504996833718273, "learning_rate": 4.76188548991672e-06, "loss": 0.4237, "step": 13885 }, { "epoch": 0.8455987577261517, "grad_norm": 1.0569086571818842, "learning_rate": 4.761851501012509e-06, "loss": 0.4215, "step": 13886 }, { "epoch": 0.8456596535030295, "grad_norm": 0.9963299495467646, "learning_rate": 4.7618175098039696e-06, "loss": 0.3859, "step": 13887 }, { "epoch": 0.8457205492799075, "grad_norm": 0.9988024684035763, "learning_rate": 4.761783516291138e-06, "loss": 0.5225, "step": 13888 }, { "epoch": 0.8457814450567853, "grad_norm": 1.0383947829743048, "learning_rate": 4.7617495204740485e-06, "loss": 0.3999, "step": 13889 }, { "epoch": 0.8458423408336632, "grad_norm": 0.9584225339842402, "learning_rate": 4.761715522352736e-06, "loss": 0.4469, "step": 13890 }, { "epoch": 0.845903236610541, "grad_norm": 0.9902564681697921, "learning_rate": 4.761681521927234e-06, "loss": 0.4722, "step": 13891 }, { "epoch": 0.845964132387419, "grad_norm": 0.971475544890025, "learning_rate": 4.761647519197578e-06, "loss": 0.3966, "step": 13892 }, { "epoch": 0.8460250281642968, "grad_norm": 0.9343684275726005, "learning_rate": 4.761613514163803e-06, "loss": 0.3986, "step": 13893 }, { "epoch": 0.8460859239411747, "grad_norm": 1.0518805473851798, "learning_rate": 4.761579506825942e-06, "loss": 0.4795, "step": 13894 }, { "epoch": 0.8461468197180525, "grad_norm": 0.9500151351185737, "learning_rate": 4.761545497184032e-06, "loss": 0.459, "step": 13895 }, { "epoch": 0.8462077154949305, "grad_norm": 1.0077502748742826, "learning_rate": 4.761511485238106e-06, "loss": 0.3758, "step": 13896 }, { "epoch": 0.8462686112718083, "grad_norm": 1.0715453193001954, "learning_rate": 4.761477470988199e-06, "loss": 0.4213, "step": 13897 }, { "epoch": 0.8463295070486861, "grad_norm": 0.9772800026322429, "learning_rate": 4.761443454434346e-06, "loss": 0.392, "step": 13898 }, { "epoch": 0.846390402825564, "grad_norm": 0.9700637431978252, "learning_rate": 4.7614094355765816e-06, "loss": 0.3898, "step": 13899 }, { "epoch": 0.846451298602442, "grad_norm": 1.011433186182143, "learning_rate": 4.76137541441494e-06, "loss": 0.43, "step": 13900 }, { "epoch": 0.8465121943793198, "grad_norm": 0.8432159067273428, "learning_rate": 4.761341390949456e-06, "loss": 0.4724, "step": 13901 }, { "epoch": 0.8465730901561976, "grad_norm": 0.9720952116237898, "learning_rate": 4.761307365180165e-06, "loss": 0.366, "step": 13902 }, { "epoch": 0.8466339859330756, "grad_norm": 0.9992143253595954, "learning_rate": 4.7612733371071e-06, "loss": 0.4157, "step": 13903 }, { "epoch": 0.8466948817099534, "grad_norm": 0.9419336708575511, "learning_rate": 4.761239306730299e-06, "loss": 0.4259, "step": 13904 }, { "epoch": 0.8467557774868313, "grad_norm": 1.0189177214815006, "learning_rate": 4.7612052740497925e-06, "loss": 0.3661, "step": 13905 }, { "epoch": 0.8468166732637091, "grad_norm": 0.9608374223596708, "learning_rate": 4.761171239065617e-06, "loss": 0.4793, "step": 13906 }, { "epoch": 0.8468775690405871, "grad_norm": 1.002000182482249, "learning_rate": 4.761137201777808e-06, "loss": 0.4195, "step": 13907 }, { "epoch": 0.8469384648174649, "grad_norm": 1.0493594839201175, "learning_rate": 4.761103162186399e-06, "loss": 0.473, "step": 13908 }, { "epoch": 0.8469993605943428, "grad_norm": 0.9836582585907899, "learning_rate": 4.761069120291425e-06, "loss": 0.4286, "step": 13909 }, { "epoch": 0.8470602563712206, "grad_norm": 1.009142598194822, "learning_rate": 4.761035076092922e-06, "loss": 0.4018, "step": 13910 }, { "epoch": 0.8471211521480986, "grad_norm": 1.0040668509882165, "learning_rate": 4.761001029590922e-06, "loss": 0.395, "step": 13911 }, { "epoch": 0.8471820479249764, "grad_norm": 1.0182566867884784, "learning_rate": 4.760966980785461e-06, "loss": 0.4445, "step": 13912 }, { "epoch": 0.8472429437018543, "grad_norm": 1.1235585989400318, "learning_rate": 4.760932929676575e-06, "loss": 0.4383, "step": 13913 }, { "epoch": 0.8473038394787321, "grad_norm": 0.9643987364156974, "learning_rate": 4.760898876264297e-06, "loss": 0.4261, "step": 13914 }, { "epoch": 0.8473647352556101, "grad_norm": 0.9527094613798729, "learning_rate": 4.760864820548662e-06, "loss": 0.4378, "step": 13915 }, { "epoch": 0.8474256310324879, "grad_norm": 0.9088716923575447, "learning_rate": 4.760830762529705e-06, "loss": 0.4364, "step": 13916 }, { "epoch": 0.8474865268093658, "grad_norm": 1.0129231052799255, "learning_rate": 4.760796702207461e-06, "loss": 0.3929, "step": 13917 }, { "epoch": 0.8475474225862436, "grad_norm": 0.9883392524098676, "learning_rate": 4.760762639581964e-06, "loss": 0.4795, "step": 13918 }, { "epoch": 0.8476083183631216, "grad_norm": 0.9800227299476805, "learning_rate": 4.760728574653249e-06, "loss": 0.3966, "step": 13919 }, { "epoch": 0.8476692141399994, "grad_norm": 1.0234851295763037, "learning_rate": 4.760694507421352e-06, "loss": 0.4273, "step": 13920 }, { "epoch": 0.8477301099168773, "grad_norm": 0.9884814101096651, "learning_rate": 4.760660437886304e-06, "loss": 0.3952, "step": 13921 }, { "epoch": 0.8477910056937551, "grad_norm": 1.0466594252425803, "learning_rate": 4.7606263660481444e-06, "loss": 0.3555, "step": 13922 }, { "epoch": 0.847851901470633, "grad_norm": 0.9853034657529396, "learning_rate": 4.7605922919069045e-06, "loss": 0.4909, "step": 13923 }, { "epoch": 0.8479127972475109, "grad_norm": 0.9043589784898665, "learning_rate": 4.760558215462621e-06, "loss": 0.4366, "step": 13924 }, { "epoch": 0.8479736930243887, "grad_norm": 0.9868215724674804, "learning_rate": 4.760524136715327e-06, "loss": 0.409, "step": 13925 }, { "epoch": 0.8480345888012666, "grad_norm": 0.9497228670847645, "learning_rate": 4.760490055665058e-06, "loss": 0.4281, "step": 13926 }, { "epoch": 0.8480954845781445, "grad_norm": 0.9940339908842402, "learning_rate": 4.760455972311849e-06, "loss": 0.4634, "step": 13927 }, { "epoch": 0.8481563803550224, "grad_norm": 1.077449073540843, "learning_rate": 4.760421886655734e-06, "loss": 0.4025, "step": 13928 }, { "epoch": 0.8482172761319002, "grad_norm": 1.107358937769684, "learning_rate": 4.760387798696748e-06, "loss": 0.43, "step": 13929 }, { "epoch": 0.8482781719087781, "grad_norm": 1.0199584101396346, "learning_rate": 4.760353708434927e-06, "loss": 0.434, "step": 13930 }, { "epoch": 0.848339067685656, "grad_norm": 1.0240338243539266, "learning_rate": 4.760319615870305e-06, "loss": 0.3935, "step": 13931 }, { "epoch": 0.8483999634625339, "grad_norm": 1.1095027986903128, "learning_rate": 4.760285521002915e-06, "loss": 0.4781, "step": 13932 }, { "epoch": 0.8484608592394117, "grad_norm": 1.0852490347466872, "learning_rate": 4.760251423832793e-06, "loss": 0.3411, "step": 13933 }, { "epoch": 0.8485217550162896, "grad_norm": 1.200608491823153, "learning_rate": 4.760217324359975e-06, "loss": 0.3214, "step": 13934 }, { "epoch": 0.8485826507931675, "grad_norm": 1.0970712720226827, "learning_rate": 4.7601832225844935e-06, "loss": 0.5147, "step": 13935 }, { "epoch": 0.8486435465700454, "grad_norm": 1.0354408711712304, "learning_rate": 4.760149118506385e-06, "loss": 0.3895, "step": 13936 }, { "epoch": 0.8487044423469232, "grad_norm": 1.0193392788039282, "learning_rate": 4.760115012125684e-06, "loss": 0.462, "step": 13937 }, { "epoch": 0.8487653381238011, "grad_norm": 1.1066559941724345, "learning_rate": 4.760080903442423e-06, "loss": 0.4641, "step": 13938 }, { "epoch": 0.848826233900679, "grad_norm": 0.9541305925620216, "learning_rate": 4.760046792456641e-06, "loss": 0.452, "step": 13939 }, { "epoch": 0.8488871296775569, "grad_norm": 1.0027284404427077, "learning_rate": 4.760012679168369e-06, "loss": 0.4431, "step": 13940 }, { "epoch": 0.8489480254544347, "grad_norm": 0.9578605742116526, "learning_rate": 4.759978563577643e-06, "loss": 0.4415, "step": 13941 }, { "epoch": 0.8490089212313127, "grad_norm": 1.0149148389690419, "learning_rate": 4.759944445684498e-06, "loss": 0.4502, "step": 13942 }, { "epoch": 0.8490698170081905, "grad_norm": 0.9738609172426246, "learning_rate": 4.7599103254889684e-06, "loss": 0.4372, "step": 13943 }, { "epoch": 0.8491307127850684, "grad_norm": 1.0516779255134163, "learning_rate": 4.759876202991089e-06, "loss": 0.4671, "step": 13944 }, { "epoch": 0.8491916085619462, "grad_norm": 1.0866368230561927, "learning_rate": 4.759842078190895e-06, "loss": 0.4791, "step": 13945 }, { "epoch": 0.8492525043388242, "grad_norm": 0.9141235039131221, "learning_rate": 4.759807951088421e-06, "loss": 0.4422, "step": 13946 }, { "epoch": 0.849313400115702, "grad_norm": 1.063541927341962, "learning_rate": 4.759773821683702e-06, "loss": 0.3436, "step": 13947 }, { "epoch": 0.8493742958925798, "grad_norm": 0.9488881838186528, "learning_rate": 4.759739689976772e-06, "loss": 0.3971, "step": 13948 }, { "epoch": 0.8494351916694577, "grad_norm": 0.8942015536256573, "learning_rate": 4.759705555967666e-06, "loss": 0.4531, "step": 13949 }, { "epoch": 0.8494960874463356, "grad_norm": 0.9605394236043014, "learning_rate": 4.7596714196564185e-06, "loss": 0.3975, "step": 13950 }, { "epoch": 0.8495569832232135, "grad_norm": 0.9989341787335478, "learning_rate": 4.759637281043066e-06, "loss": 0.4812, "step": 13951 }, { "epoch": 0.8496178790000913, "grad_norm": 0.9630986272558636, "learning_rate": 4.759603140127641e-06, "loss": 0.436, "step": 13952 }, { "epoch": 0.8496787747769692, "grad_norm": 0.9865994344206316, "learning_rate": 4.75956899691018e-06, "loss": 0.4433, "step": 13953 }, { "epoch": 0.8497396705538471, "grad_norm": 1.0196872864878002, "learning_rate": 4.759534851390716e-06, "loss": 0.3877, "step": 13954 }, { "epoch": 0.849800566330725, "grad_norm": 1.0382571328531558, "learning_rate": 4.759500703569286e-06, "loss": 0.353, "step": 13955 }, { "epoch": 0.8498614621076028, "grad_norm": 0.9702583217457971, "learning_rate": 4.759466553445923e-06, "loss": 0.4581, "step": 13956 }, { "epoch": 0.8499223578844807, "grad_norm": 0.9320664784744476, "learning_rate": 4.759432401020662e-06, "loss": 0.3607, "step": 13957 }, { "epoch": 0.8499832536613586, "grad_norm": 0.9790896987806237, "learning_rate": 4.75939824629354e-06, "loss": 0.4654, "step": 13958 }, { "epoch": 0.8500441494382365, "grad_norm": 1.0096768764098651, "learning_rate": 4.759364089264589e-06, "loss": 0.4002, "step": 13959 }, { "epoch": 0.8501050452151143, "grad_norm": 1.0052308668196859, "learning_rate": 4.7593299299338444e-06, "loss": 0.4327, "step": 13960 }, { "epoch": 0.8501659409919922, "grad_norm": 1.0359214447246083, "learning_rate": 4.7592957683013415e-06, "loss": 0.3767, "step": 13961 }, { "epoch": 0.8502268367688701, "grad_norm": 1.0312680324982249, "learning_rate": 4.7592616043671155e-06, "loss": 0.4089, "step": 13962 }, { "epoch": 0.850287732545748, "grad_norm": 0.9998133901762825, "learning_rate": 4.759227438131201e-06, "loss": 0.4098, "step": 13963 }, { "epoch": 0.8503486283226258, "grad_norm": 0.9935865534097635, "learning_rate": 4.759193269593633e-06, "loss": 0.4073, "step": 13964 }, { "epoch": 0.8504095240995037, "grad_norm": 1.1486469202122285, "learning_rate": 4.759159098754444e-06, "loss": 0.4009, "step": 13965 }, { "epoch": 0.8504704198763816, "grad_norm": 1.0015936989216674, "learning_rate": 4.759124925613672e-06, "loss": 0.4471, "step": 13966 }, { "epoch": 0.8505313156532595, "grad_norm": 1.043390036208736, "learning_rate": 4.759090750171351e-06, "loss": 0.4114, "step": 13967 }, { "epoch": 0.8505922114301373, "grad_norm": 0.9590432890666265, "learning_rate": 4.759056572427514e-06, "loss": 0.4487, "step": 13968 }, { "epoch": 0.8506531072070151, "grad_norm": 0.9083458254306219, "learning_rate": 4.759022392382198e-06, "loss": 0.423, "step": 13969 }, { "epoch": 0.8507140029838931, "grad_norm": 1.0466789310546927, "learning_rate": 4.758988210035436e-06, "loss": 0.4261, "step": 13970 }, { "epoch": 0.850774898760771, "grad_norm": 1.0213329286118624, "learning_rate": 4.758954025387265e-06, "loss": 0.3843, "step": 13971 }, { "epoch": 0.8508357945376488, "grad_norm": 1.001375246185868, "learning_rate": 4.7589198384377175e-06, "loss": 0.4247, "step": 13972 }, { "epoch": 0.8508966903145266, "grad_norm": 0.995783413874708, "learning_rate": 4.758885649186831e-06, "loss": 0.436, "step": 13973 }, { "epoch": 0.8509575860914046, "grad_norm": 1.003958036038364, "learning_rate": 4.758851457634638e-06, "loss": 0.4007, "step": 13974 }, { "epoch": 0.8510184818682824, "grad_norm": 1.0508642166712598, "learning_rate": 4.758817263781173e-06, "loss": 0.4092, "step": 13975 }, { "epoch": 0.8510793776451603, "grad_norm": 1.016756707448115, "learning_rate": 4.758783067626473e-06, "loss": 0.4035, "step": 13976 }, { "epoch": 0.8511402734220381, "grad_norm": 0.9862923598967659, "learning_rate": 4.758748869170571e-06, "loss": 0.4043, "step": 13977 }, { "epoch": 0.8512011691989161, "grad_norm": 1.003545438374843, "learning_rate": 4.758714668413503e-06, "loss": 0.4198, "step": 13978 }, { "epoch": 0.8512620649757939, "grad_norm": 1.0112858846325923, "learning_rate": 4.7586804653553045e-06, "loss": 0.4625, "step": 13979 }, { "epoch": 0.8513229607526718, "grad_norm": 0.9559285953572779, "learning_rate": 4.758646259996008e-06, "loss": 0.4287, "step": 13980 }, { "epoch": 0.8513838565295496, "grad_norm": 1.0423994184770582, "learning_rate": 4.75861205233565e-06, "loss": 0.4458, "step": 13981 }, { "epoch": 0.8514447523064276, "grad_norm": 1.0064282690774926, "learning_rate": 4.758577842374265e-06, "loss": 0.4048, "step": 13982 }, { "epoch": 0.8515056480833054, "grad_norm": 0.9650945512697064, "learning_rate": 4.758543630111888e-06, "loss": 0.4529, "step": 13983 }, { "epoch": 0.8515665438601833, "grad_norm": 0.8834503705455306, "learning_rate": 4.758509415548553e-06, "loss": 0.4421, "step": 13984 }, { "epoch": 0.8516274396370612, "grad_norm": 0.982865871977215, "learning_rate": 4.758475198684297e-06, "loss": 0.4149, "step": 13985 }, { "epoch": 0.8516883354139391, "grad_norm": 0.9911666060196924, "learning_rate": 4.758440979519152e-06, "loss": 0.4307, "step": 13986 }, { "epoch": 0.8517492311908169, "grad_norm": 0.9933505251707011, "learning_rate": 4.758406758053156e-06, "loss": 0.3652, "step": 13987 }, { "epoch": 0.8518101269676948, "grad_norm": 0.9767510857137565, "learning_rate": 4.758372534286341e-06, "loss": 0.3861, "step": 13988 }, { "epoch": 0.8518710227445727, "grad_norm": 0.971097333126795, "learning_rate": 4.758338308218743e-06, "loss": 0.4532, "step": 13989 }, { "epoch": 0.8519319185214506, "grad_norm": 1.0328271718696347, "learning_rate": 4.758304079850396e-06, "loss": 0.4096, "step": 13990 }, { "epoch": 0.8519928142983284, "grad_norm": 1.0168522325440816, "learning_rate": 4.758269849181337e-06, "loss": 0.4177, "step": 13991 }, { "epoch": 0.8520537100752062, "grad_norm": 0.9842842140264606, "learning_rate": 4.758235616211599e-06, "loss": 0.4624, "step": 13992 }, { "epoch": 0.8521146058520842, "grad_norm": 1.0663829618734606, "learning_rate": 4.758201380941219e-06, "loss": 0.4526, "step": 13993 }, { "epoch": 0.852175501628962, "grad_norm": 1.0201306817266644, "learning_rate": 4.758167143370228e-06, "loss": 0.4241, "step": 13994 }, { "epoch": 0.8522363974058399, "grad_norm": 1.054125756338609, "learning_rate": 4.758132903498665e-06, "loss": 0.3881, "step": 13995 }, { "epoch": 0.8522972931827177, "grad_norm": 0.9332150659037413, "learning_rate": 4.758098661326562e-06, "loss": 0.4095, "step": 13996 }, { "epoch": 0.8523581889595957, "grad_norm": 0.9850742110120959, "learning_rate": 4.7580644168539565e-06, "loss": 0.4098, "step": 13997 }, { "epoch": 0.8524190847364735, "grad_norm": 0.9603006640478216, "learning_rate": 4.758030170080881e-06, "loss": 0.4105, "step": 13998 }, { "epoch": 0.8524799805133514, "grad_norm": 0.9610911904206996, "learning_rate": 4.757995921007371e-06, "loss": 0.4475, "step": 13999 }, { "epoch": 0.8525408762902292, "grad_norm": 1.0439355259210825, "learning_rate": 4.757961669633462e-06, "loss": 0.4951, "step": 14000 }, { "epoch": 0.8526017720671072, "grad_norm": 1.0707373519110992, "learning_rate": 4.757927415959189e-06, "loss": 0.3733, "step": 14001 }, { "epoch": 0.852662667843985, "grad_norm": 1.035437681758609, "learning_rate": 4.757893159984586e-06, "loss": 0.4472, "step": 14002 }, { "epoch": 0.8527235636208629, "grad_norm": 1.1096511459081684, "learning_rate": 4.757858901709689e-06, "loss": 0.414, "step": 14003 }, { "epoch": 0.8527844593977407, "grad_norm": 0.9564707234396949, "learning_rate": 4.757824641134531e-06, "loss": 0.4046, "step": 14004 }, { "epoch": 0.8528453551746187, "grad_norm": 1.0118822470032713, "learning_rate": 4.75779037825915e-06, "loss": 0.408, "step": 14005 }, { "epoch": 0.8529062509514965, "grad_norm": 0.9455087388057719, "learning_rate": 4.7577561130835775e-06, "loss": 0.4011, "step": 14006 }, { "epoch": 0.8529671467283744, "grad_norm": 1.045479159020315, "learning_rate": 4.757721845607851e-06, "loss": 0.4446, "step": 14007 }, { "epoch": 0.8530280425052522, "grad_norm": 0.9796763497581727, "learning_rate": 4.757687575832004e-06, "loss": 0.4068, "step": 14008 }, { "epoch": 0.8530889382821302, "grad_norm": 1.004583660363609, "learning_rate": 4.757653303756072e-06, "loss": 0.4265, "step": 14009 }, { "epoch": 0.853149834059008, "grad_norm": 1.060285235060137, "learning_rate": 4.757619029380089e-06, "loss": 0.422, "step": 14010 }, { "epoch": 0.8532107298358859, "grad_norm": 0.9790290706269182, "learning_rate": 4.757584752704092e-06, "loss": 0.4697, "step": 14011 }, { "epoch": 0.8532716256127637, "grad_norm": 0.9684321370077192, "learning_rate": 4.757550473728114e-06, "loss": 0.4064, "step": 14012 }, { "epoch": 0.8533325213896417, "grad_norm": 1.0081389527016287, "learning_rate": 4.75751619245219e-06, "loss": 0.4749, "step": 14013 }, { "epoch": 0.8533934171665195, "grad_norm": 1.0562389827846699, "learning_rate": 4.757481908876356e-06, "loss": 0.4251, "step": 14014 }, { "epoch": 0.8534543129433974, "grad_norm": 0.9852927350001829, "learning_rate": 4.757447623000646e-06, "loss": 0.4077, "step": 14015 }, { "epoch": 0.8535152087202752, "grad_norm": 1.019935333110812, "learning_rate": 4.7574133348250955e-06, "loss": 0.4222, "step": 14016 }, { "epoch": 0.8535761044971532, "grad_norm": 0.984061328281806, "learning_rate": 4.757379044349739e-06, "loss": 0.386, "step": 14017 }, { "epoch": 0.853637000274031, "grad_norm": 0.9766380297997526, "learning_rate": 4.757344751574612e-06, "loss": 0.4079, "step": 14018 }, { "epoch": 0.8536978960509088, "grad_norm": 0.9609642104445718, "learning_rate": 4.7573104564997494e-06, "loss": 0.3877, "step": 14019 }, { "epoch": 0.8537587918277867, "grad_norm": 0.89595863208561, "learning_rate": 4.757276159125186e-06, "loss": 0.4741, "step": 14020 }, { "epoch": 0.8538196876046646, "grad_norm": 1.000400728338152, "learning_rate": 4.7572418594509555e-06, "loss": 0.3699, "step": 14021 }, { "epoch": 0.8538805833815425, "grad_norm": 0.9452010827170487, "learning_rate": 4.757207557477095e-06, "loss": 0.4047, "step": 14022 }, { "epoch": 0.8539414791584203, "grad_norm": 1.0002890388292658, "learning_rate": 4.757173253203637e-06, "loss": 0.407, "step": 14023 }, { "epoch": 0.8540023749352983, "grad_norm": 0.995535838673118, "learning_rate": 4.757138946630619e-06, "loss": 0.4107, "step": 14024 }, { "epoch": 0.8540632707121761, "grad_norm": 0.9487858292901878, "learning_rate": 4.7571046377580745e-06, "loss": 0.3963, "step": 14025 }, { "epoch": 0.854124166489054, "grad_norm": 1.0707819362540487, "learning_rate": 4.757070326586038e-06, "loss": 0.3938, "step": 14026 }, { "epoch": 0.8541850622659318, "grad_norm": 1.0700425934866176, "learning_rate": 4.757036013114546e-06, "loss": 0.4421, "step": 14027 }, { "epoch": 0.8542459580428098, "grad_norm": 1.03853669672067, "learning_rate": 4.757001697343633e-06, "loss": 0.3594, "step": 14028 }, { "epoch": 0.8543068538196876, "grad_norm": 1.0360748657170515, "learning_rate": 4.756967379273333e-06, "loss": 0.4149, "step": 14029 }, { "epoch": 0.8543677495965655, "grad_norm": 1.025281904187832, "learning_rate": 4.756933058903681e-06, "loss": 0.3912, "step": 14030 }, { "epoch": 0.8544286453734433, "grad_norm": 1.0235801580146635, "learning_rate": 4.7568987362347135e-06, "loss": 0.354, "step": 14031 }, { "epoch": 0.8544895411503213, "grad_norm": 0.9907292115627687, "learning_rate": 4.756864411266464e-06, "loss": 0.3983, "step": 14032 }, { "epoch": 0.8545504369271991, "grad_norm": 0.9689359371533346, "learning_rate": 4.7568300839989685e-06, "loss": 0.4575, "step": 14033 }, { "epoch": 0.854611332704077, "grad_norm": 1.0339043201058293, "learning_rate": 4.756795754432261e-06, "loss": 0.4009, "step": 14034 }, { "epoch": 0.8546722284809548, "grad_norm": 0.9818804303004974, "learning_rate": 4.756761422566377e-06, "loss": 0.4334, "step": 14035 }, { "epoch": 0.8547331242578328, "grad_norm": 1.073042157743819, "learning_rate": 4.756727088401351e-06, "loss": 0.294, "step": 14036 }, { "epoch": 0.8547940200347106, "grad_norm": 1.0376232049063427, "learning_rate": 4.7566927519372184e-06, "loss": 0.3216, "step": 14037 }, { "epoch": 0.8548549158115885, "grad_norm": 0.9411437089719284, "learning_rate": 4.756658413174015e-06, "loss": 0.4261, "step": 14038 }, { "epoch": 0.8549158115884663, "grad_norm": 1.033004136635273, "learning_rate": 4.756624072111774e-06, "loss": 0.4394, "step": 14039 }, { "epoch": 0.8549767073653443, "grad_norm": 1.020885728088282, "learning_rate": 4.756589728750532e-06, "loss": 0.3721, "step": 14040 }, { "epoch": 0.8550376031422221, "grad_norm": 1.0055861385467508, "learning_rate": 4.7565553830903225e-06, "loss": 0.3767, "step": 14041 }, { "epoch": 0.8550984989191, "grad_norm": 1.0585353465391913, "learning_rate": 4.756521035131182e-06, "loss": 0.4052, "step": 14042 }, { "epoch": 0.8551593946959778, "grad_norm": 1.0227456575007172, "learning_rate": 4.756486684873144e-06, "loss": 0.3213, "step": 14043 }, { "epoch": 0.8552202904728557, "grad_norm": 0.9716193889623629, "learning_rate": 4.756452332316245e-06, "loss": 0.4526, "step": 14044 }, { "epoch": 0.8552811862497336, "grad_norm": 1.0888219502375367, "learning_rate": 4.756417977460519e-06, "loss": 0.4265, "step": 14045 }, { "epoch": 0.8553420820266114, "grad_norm": 0.9602620457042285, "learning_rate": 4.756383620306002e-06, "loss": 0.4128, "step": 14046 }, { "epoch": 0.8554029778034893, "grad_norm": 0.9735252676668618, "learning_rate": 4.756349260852728e-06, "loss": 0.4197, "step": 14047 }, { "epoch": 0.8554638735803672, "grad_norm": 1.0606047338935605, "learning_rate": 4.756314899100731e-06, "loss": 0.3927, "step": 14048 }, { "epoch": 0.8555247693572451, "grad_norm": 0.9657624036887292, "learning_rate": 4.7562805350500485e-06, "loss": 0.4603, "step": 14049 }, { "epoch": 0.8555856651341229, "grad_norm": 0.9603846135435387, "learning_rate": 4.756246168700714e-06, "loss": 0.4371, "step": 14050 }, { "epoch": 0.8556465609110008, "grad_norm": 1.048895433761498, "learning_rate": 4.7562118000527624e-06, "loss": 0.3633, "step": 14051 }, { "epoch": 0.8557074566878787, "grad_norm": 1.0061532057917182, "learning_rate": 4.75617742910623e-06, "loss": 0.3823, "step": 14052 }, { "epoch": 0.8557683524647566, "grad_norm": 0.9278482699938039, "learning_rate": 4.7561430558611505e-06, "loss": 0.4211, "step": 14053 }, { "epoch": 0.8558292482416344, "grad_norm": 0.9369403511477437, "learning_rate": 4.75610868031756e-06, "loss": 0.4304, "step": 14054 }, { "epoch": 0.8558901440185123, "grad_norm": 1.000128293972222, "learning_rate": 4.7560743024754915e-06, "loss": 0.3884, "step": 14055 }, { "epoch": 0.8559510397953902, "grad_norm": 1.0024100531451638, "learning_rate": 4.756039922334982e-06, "loss": 0.4148, "step": 14056 }, { "epoch": 0.8560119355722681, "grad_norm": 1.1105955630113145, "learning_rate": 4.756005539896066e-06, "loss": 0.3837, "step": 14057 }, { "epoch": 0.8560728313491459, "grad_norm": 0.931905393229969, "learning_rate": 4.755971155158778e-06, "loss": 0.4729, "step": 14058 }, { "epoch": 0.8561337271260238, "grad_norm": 1.0358416587695012, "learning_rate": 4.755936768123155e-06, "loss": 0.4414, "step": 14059 }, { "epoch": 0.8561946229029017, "grad_norm": 0.935376137675858, "learning_rate": 4.7559023787892285e-06, "loss": 0.4807, "step": 14060 }, { "epoch": 0.8562555186797796, "grad_norm": 1.02311757548586, "learning_rate": 4.755867987157037e-06, "loss": 0.4257, "step": 14061 }, { "epoch": 0.8563164144566574, "grad_norm": 0.9232477591580349, "learning_rate": 4.755833593226614e-06, "loss": 0.433, "step": 14062 }, { "epoch": 0.8563773102335352, "grad_norm": 0.9482034605803383, "learning_rate": 4.755799196997994e-06, "loss": 0.3817, "step": 14063 }, { "epoch": 0.8564382060104132, "grad_norm": 1.0073902104922963, "learning_rate": 4.755764798471213e-06, "loss": 0.3369, "step": 14064 }, { "epoch": 0.856499101787291, "grad_norm": 0.8994731543266342, "learning_rate": 4.755730397646306e-06, "loss": 0.4319, "step": 14065 }, { "epoch": 0.8565599975641689, "grad_norm": 0.9702855882321023, "learning_rate": 4.755695994523307e-06, "loss": 0.3885, "step": 14066 }, { "epoch": 0.8566208933410469, "grad_norm": 0.9391277351251075, "learning_rate": 4.7556615891022525e-06, "loss": 0.4202, "step": 14067 }, { "epoch": 0.8566817891179247, "grad_norm": 0.9394065750627769, "learning_rate": 4.755627181383177e-06, "loss": 0.3767, "step": 14068 }, { "epoch": 0.8567426848948025, "grad_norm": 0.9423821027598617, "learning_rate": 4.7555927713661156e-06, "loss": 0.4339, "step": 14069 }, { "epoch": 0.8568035806716804, "grad_norm": 0.9789368009047269, "learning_rate": 4.7555583590511024e-06, "loss": 0.4252, "step": 14070 }, { "epoch": 0.8568644764485583, "grad_norm": 1.033706110333744, "learning_rate": 4.755523944438173e-06, "loss": 0.4294, "step": 14071 }, { "epoch": 0.8569253722254362, "grad_norm": 0.9354091537228418, "learning_rate": 4.755489527527364e-06, "loss": 0.3829, "step": 14072 }, { "epoch": 0.856986268002314, "grad_norm": 1.0461525710729, "learning_rate": 4.755455108318708e-06, "loss": 0.3765, "step": 14073 }, { "epoch": 0.8570471637791919, "grad_norm": 0.965808820125706, "learning_rate": 4.755420686812241e-06, "loss": 0.4715, "step": 14074 }, { "epoch": 0.8571080595560698, "grad_norm": 1.0290338620817487, "learning_rate": 4.7553862630079994e-06, "loss": 0.4188, "step": 14075 }, { "epoch": 0.8571689553329477, "grad_norm": 0.9121597862000657, "learning_rate": 4.755351836906017e-06, "loss": 0.4919, "step": 14076 }, { "epoch": 0.8572298511098255, "grad_norm": 0.9407696656223788, "learning_rate": 4.755317408506328e-06, "loss": 0.4062, "step": 14077 }, { "epoch": 0.8572907468867034, "grad_norm": 1.022208110531049, "learning_rate": 4.755282977808969e-06, "loss": 0.4149, "step": 14078 }, { "epoch": 0.8573516426635813, "grad_norm": 0.9514623216508314, "learning_rate": 4.7552485448139755e-06, "loss": 0.4106, "step": 14079 }, { "epoch": 0.8574125384404592, "grad_norm": 1.0898964818721504, "learning_rate": 4.755214109521381e-06, "loss": 0.4839, "step": 14080 }, { "epoch": 0.857473434217337, "grad_norm": 1.040606150092237, "learning_rate": 4.755179671931221e-06, "loss": 0.3704, "step": 14081 }, { "epoch": 0.8575343299942149, "grad_norm": 1.0065694556120701, "learning_rate": 4.755145232043531e-06, "loss": 0.3985, "step": 14082 }, { "epoch": 0.8575952257710928, "grad_norm": 1.0624029763509182, "learning_rate": 4.7551107898583456e-06, "loss": 0.3984, "step": 14083 }, { "epoch": 0.8576561215479707, "grad_norm": 1.0123178462419282, "learning_rate": 4.755076345375701e-06, "loss": 0.3833, "step": 14084 }, { "epoch": 0.8577170173248485, "grad_norm": 1.0106504541794692, "learning_rate": 4.755041898595631e-06, "loss": 0.4016, "step": 14085 }, { "epoch": 0.8577779131017264, "grad_norm": 1.050307563367751, "learning_rate": 4.755007449518172e-06, "loss": 0.4134, "step": 14086 }, { "epoch": 0.8578388088786043, "grad_norm": 1.003355929800399, "learning_rate": 4.754972998143358e-06, "loss": 0.411, "step": 14087 }, { "epoch": 0.8578997046554822, "grad_norm": 1.083252420601163, "learning_rate": 4.754938544471223e-06, "loss": 0.3807, "step": 14088 }, { "epoch": 0.85796060043236, "grad_norm": 0.9524834795723431, "learning_rate": 4.754904088501805e-06, "loss": 0.4634, "step": 14089 }, { "epoch": 0.8580214962092378, "grad_norm": 0.9707557172926664, "learning_rate": 4.7548696302351365e-06, "loss": 0.3901, "step": 14090 }, { "epoch": 0.8580823919861158, "grad_norm": 1.032517712542958, "learning_rate": 4.754835169671255e-06, "loss": 0.4179, "step": 14091 }, { "epoch": 0.8581432877629936, "grad_norm": 0.9828920727356496, "learning_rate": 4.754800706810193e-06, "loss": 0.3977, "step": 14092 }, { "epoch": 0.8582041835398715, "grad_norm": 1.0155887748819625, "learning_rate": 4.754766241651988e-06, "loss": 0.3394, "step": 14093 }, { "epoch": 0.8582650793167493, "grad_norm": 1.1019287843323395, "learning_rate": 4.754731774196673e-06, "loss": 0.3897, "step": 14094 }, { "epoch": 0.8583259750936273, "grad_norm": 1.0239915634836094, "learning_rate": 4.754697304444285e-06, "loss": 0.381, "step": 14095 }, { "epoch": 0.8583868708705051, "grad_norm": 1.0095861445691272, "learning_rate": 4.754662832394858e-06, "loss": 0.4263, "step": 14096 }, { "epoch": 0.858447766647383, "grad_norm": 0.9483654233346034, "learning_rate": 4.754628358048427e-06, "loss": 0.4077, "step": 14097 }, { "epoch": 0.8585086624242608, "grad_norm": 0.9781894798896903, "learning_rate": 4.754593881405028e-06, "loss": 0.4502, "step": 14098 }, { "epoch": 0.8585695582011388, "grad_norm": 0.9357454068692527, "learning_rate": 4.754559402464696e-06, "loss": 0.4659, "step": 14099 }, { "epoch": 0.8586304539780166, "grad_norm": 0.9992690777713152, "learning_rate": 4.754524921227465e-06, "loss": 0.4313, "step": 14100 }, { "epoch": 0.8586913497548945, "grad_norm": 1.1037214980651875, "learning_rate": 4.754490437693371e-06, "loss": 0.3726, "step": 14101 }, { "epoch": 0.8587522455317723, "grad_norm": 1.1070041606092553, "learning_rate": 4.754455951862449e-06, "loss": 0.3981, "step": 14102 }, { "epoch": 0.8588131413086503, "grad_norm": 1.0002504927764582, "learning_rate": 4.754421463734734e-06, "loss": 0.4149, "step": 14103 }, { "epoch": 0.8588740370855281, "grad_norm": 1.0502541330006367, "learning_rate": 4.754386973310261e-06, "loss": 0.4051, "step": 14104 }, { "epoch": 0.858934932862406, "grad_norm": 1.0180162379025703, "learning_rate": 4.754352480589067e-06, "loss": 0.406, "step": 14105 }, { "epoch": 0.8589958286392839, "grad_norm": 1.0934353486501813, "learning_rate": 4.754317985571184e-06, "loss": 0.4026, "step": 14106 }, { "epoch": 0.8590567244161618, "grad_norm": 1.0038259926043334, "learning_rate": 4.754283488256649e-06, "loss": 0.3749, "step": 14107 }, { "epoch": 0.8591176201930396, "grad_norm": 1.1486232330937745, "learning_rate": 4.754248988645498e-06, "loss": 0.3705, "step": 14108 }, { "epoch": 0.8591785159699175, "grad_norm": 1.005670162520737, "learning_rate": 4.7542144867377636e-06, "loss": 0.408, "step": 14109 }, { "epoch": 0.8592394117467954, "grad_norm": 0.9976388242333479, "learning_rate": 4.754179982533483e-06, "loss": 0.4288, "step": 14110 }, { "epoch": 0.8593003075236733, "grad_norm": 0.99905034546202, "learning_rate": 4.75414547603269e-06, "loss": 0.4051, "step": 14111 }, { "epoch": 0.8593612033005511, "grad_norm": 1.0776930872892905, "learning_rate": 4.754110967235421e-06, "loss": 0.3838, "step": 14112 }, { "epoch": 0.859422099077429, "grad_norm": 1.026357019122579, "learning_rate": 4.75407645614171e-06, "loss": 0.4288, "step": 14113 }, { "epoch": 0.8594829948543069, "grad_norm": 1.0490601910710045, "learning_rate": 4.754041942751594e-06, "loss": 0.4141, "step": 14114 }, { "epoch": 0.8595438906311847, "grad_norm": 0.9861702299817668, "learning_rate": 4.754007427065107e-06, "loss": 0.3654, "step": 14115 }, { "epoch": 0.8596047864080626, "grad_norm": 1.0314718938844778, "learning_rate": 4.753972909082283e-06, "loss": 0.437, "step": 14116 }, { "epoch": 0.8596656821849404, "grad_norm": 1.0390550796354903, "learning_rate": 4.753938388803159e-06, "loss": 0.4314, "step": 14117 }, { "epoch": 0.8597265779618184, "grad_norm": 0.9798020741804969, "learning_rate": 4.753903866227769e-06, "loss": 0.4012, "step": 14118 }, { "epoch": 0.8597874737386962, "grad_norm": 0.9883960374484567, "learning_rate": 4.7538693413561485e-06, "loss": 0.4143, "step": 14119 }, { "epoch": 0.8598483695155741, "grad_norm": 1.0507158435924657, "learning_rate": 4.753834814188333e-06, "loss": 0.3843, "step": 14120 }, { "epoch": 0.8599092652924519, "grad_norm": 0.9483895647071777, "learning_rate": 4.753800284724357e-06, "loss": 0.3694, "step": 14121 }, { "epoch": 0.8599701610693299, "grad_norm": 0.9712971871962569, "learning_rate": 4.753765752964257e-06, "loss": 0.3513, "step": 14122 }, { "epoch": 0.8600310568462077, "grad_norm": 1.136098649008431, "learning_rate": 4.753731218908066e-06, "loss": 0.3874, "step": 14123 }, { "epoch": 0.8600919526230856, "grad_norm": 1.004681122359956, "learning_rate": 4.753696682555822e-06, "loss": 0.3887, "step": 14124 }, { "epoch": 0.8601528483999634, "grad_norm": 1.0243127637083373, "learning_rate": 4.753662143907558e-06, "loss": 0.4422, "step": 14125 }, { "epoch": 0.8602137441768414, "grad_norm": 0.9998281402213173, "learning_rate": 4.75362760296331e-06, "loss": 0.5365, "step": 14126 }, { "epoch": 0.8602746399537192, "grad_norm": 0.9862071707108815, "learning_rate": 4.753593059723113e-06, "loss": 0.4357, "step": 14127 }, { "epoch": 0.8603355357305971, "grad_norm": 0.9141221573558151, "learning_rate": 4.753558514187003e-06, "loss": 0.4874, "step": 14128 }, { "epoch": 0.8603964315074749, "grad_norm": 1.0199284883618982, "learning_rate": 4.753523966355013e-06, "loss": 0.3743, "step": 14129 }, { "epoch": 0.8604573272843529, "grad_norm": 0.9448367787358394, "learning_rate": 4.753489416227179e-06, "loss": 0.4464, "step": 14130 }, { "epoch": 0.8605182230612307, "grad_norm": 1.0915725562453598, "learning_rate": 4.753454863803539e-06, "loss": 0.3728, "step": 14131 }, { "epoch": 0.8605791188381086, "grad_norm": 0.9337673306093512, "learning_rate": 4.753420309084125e-06, "loss": 0.4303, "step": 14132 }, { "epoch": 0.8606400146149864, "grad_norm": 0.9630721813211156, "learning_rate": 4.753385752068973e-06, "loss": 0.4178, "step": 14133 }, { "epoch": 0.8607009103918644, "grad_norm": 0.9509920806871713, "learning_rate": 4.753351192758119e-06, "loss": 0.433, "step": 14134 }, { "epoch": 0.8607618061687422, "grad_norm": 1.244476609296183, "learning_rate": 4.7533166311515975e-06, "loss": 0.4444, "step": 14135 }, { "epoch": 0.86082270194562, "grad_norm": 0.947889618585394, "learning_rate": 4.753282067249444e-06, "loss": 0.4419, "step": 14136 }, { "epoch": 0.8608835977224979, "grad_norm": 1.1322706919420078, "learning_rate": 4.7532475010516935e-06, "loss": 0.377, "step": 14137 }, { "epoch": 0.8609444934993759, "grad_norm": 0.9381695428330157, "learning_rate": 4.753212932558381e-06, "loss": 0.3748, "step": 14138 }, { "epoch": 0.8610053892762537, "grad_norm": 0.9792442439465211, "learning_rate": 4.753178361769542e-06, "loss": 0.4846, "step": 14139 }, { "epoch": 0.8610662850531315, "grad_norm": 0.9952201308564917, "learning_rate": 4.753143788685212e-06, "loss": 0.4794, "step": 14140 }, { "epoch": 0.8611271808300094, "grad_norm": 1.027525517916498, "learning_rate": 4.753109213305425e-06, "loss": 0.4081, "step": 14141 }, { "epoch": 0.8611880766068873, "grad_norm": 1.0392197638790155, "learning_rate": 4.7530746356302195e-06, "loss": 0.3755, "step": 14142 }, { "epoch": 0.8612489723837652, "grad_norm": 0.9879900897638734, "learning_rate": 4.753040055659627e-06, "loss": 0.3814, "step": 14143 }, { "epoch": 0.861309868160643, "grad_norm": 1.047841673652561, "learning_rate": 4.753005473393684e-06, "loss": 0.3706, "step": 14144 }, { "epoch": 0.8613707639375209, "grad_norm": 1.0905700929282238, "learning_rate": 4.752970888832425e-06, "loss": 0.3638, "step": 14145 }, { "epoch": 0.8614316597143988, "grad_norm": 1.0329191258243253, "learning_rate": 4.752936301975888e-06, "loss": 0.402, "step": 14146 }, { "epoch": 0.8614925554912767, "grad_norm": 1.0151634585563674, "learning_rate": 4.752901712824105e-06, "loss": 0.4159, "step": 14147 }, { "epoch": 0.8615534512681545, "grad_norm": 0.9695726425538382, "learning_rate": 4.752867121377113e-06, "loss": 0.4346, "step": 14148 }, { "epoch": 0.8616143470450325, "grad_norm": 1.1197464204988457, "learning_rate": 4.7528325276349465e-06, "loss": 0.4145, "step": 14149 }, { "epoch": 0.8616752428219103, "grad_norm": 1.0539474348363347, "learning_rate": 4.752797931597642e-06, "loss": 0.3511, "step": 14150 }, { "epoch": 0.8617361385987882, "grad_norm": 0.9719157169938533, "learning_rate": 4.752763333265233e-06, "loss": 0.4214, "step": 14151 }, { "epoch": 0.861797034375666, "grad_norm": 0.9619694157884096, "learning_rate": 4.7527287326377555e-06, "loss": 0.4151, "step": 14152 }, { "epoch": 0.861857930152544, "grad_norm": 1.0135815056998745, "learning_rate": 4.752694129715245e-06, "loss": 0.4047, "step": 14153 }, { "epoch": 0.8619188259294218, "grad_norm": 0.9936058554078275, "learning_rate": 4.7526595244977364e-06, "loss": 0.3433, "step": 14154 }, { "epoch": 0.8619797217062997, "grad_norm": 0.9981325628005401, "learning_rate": 4.7526249169852655e-06, "loss": 0.3767, "step": 14155 }, { "epoch": 0.8620406174831775, "grad_norm": 1.026313810723485, "learning_rate": 4.7525903071778665e-06, "loss": 0.3567, "step": 14156 }, { "epoch": 0.8621015132600555, "grad_norm": 1.0521859110153233, "learning_rate": 4.752555695075576e-06, "loss": 0.3645, "step": 14157 }, { "epoch": 0.8621624090369333, "grad_norm": 1.048458720340386, "learning_rate": 4.7525210806784285e-06, "loss": 0.3733, "step": 14158 }, { "epoch": 0.8622233048138112, "grad_norm": 0.9186071843569338, "learning_rate": 4.752486463986458e-06, "loss": 0.3954, "step": 14159 }, { "epoch": 0.862284200590689, "grad_norm": 0.9728939641371933, "learning_rate": 4.752451844999703e-06, "loss": 0.4859, "step": 14160 }, { "epoch": 0.862345096367567, "grad_norm": 1.0747705402911483, "learning_rate": 4.752417223718197e-06, "loss": 0.4026, "step": 14161 }, { "epoch": 0.8624059921444448, "grad_norm": 1.110957309199019, "learning_rate": 4.752382600141974e-06, "loss": 0.3908, "step": 14162 }, { "epoch": 0.8624668879213226, "grad_norm": 0.9913114731196313, "learning_rate": 4.752347974271071e-06, "loss": 0.4515, "step": 14163 }, { "epoch": 0.8625277836982005, "grad_norm": 0.9965158158588485, "learning_rate": 4.752313346105522e-06, "loss": 0.4164, "step": 14164 }, { "epoch": 0.8625886794750784, "grad_norm": 0.9733236113347703, "learning_rate": 4.752278715645364e-06, "loss": 0.404, "step": 14165 }, { "epoch": 0.8626495752519563, "grad_norm": 0.9845612699720149, "learning_rate": 4.75224408289063e-06, "loss": 0.3762, "step": 14166 }, { "epoch": 0.8627104710288341, "grad_norm": 1.0317353161069913, "learning_rate": 4.752209447841358e-06, "loss": 0.4019, "step": 14167 }, { "epoch": 0.862771366805712, "grad_norm": 1.0449663061844445, "learning_rate": 4.752174810497581e-06, "loss": 0.4284, "step": 14168 }, { "epoch": 0.8628322625825899, "grad_norm": 0.9155977455715557, "learning_rate": 4.752140170859335e-06, "loss": 0.4238, "step": 14169 }, { "epoch": 0.8628931583594678, "grad_norm": 1.0627808726393986, "learning_rate": 4.752105528926656e-06, "loss": 0.3482, "step": 14170 }, { "epoch": 0.8629540541363456, "grad_norm": 1.016940089352355, "learning_rate": 4.7520708846995785e-06, "loss": 0.4314, "step": 14171 }, { "epoch": 0.8630149499132235, "grad_norm": 0.9805359516165089, "learning_rate": 4.752036238178139e-06, "loss": 0.4316, "step": 14172 }, { "epoch": 0.8630758456901014, "grad_norm": 1.048128597071623, "learning_rate": 4.752001589362369e-06, "loss": 0.4525, "step": 14173 }, { "epoch": 0.8631367414669793, "grad_norm": 0.9405976180922178, "learning_rate": 4.75196693825231e-06, "loss": 0.3899, "step": 14174 }, { "epoch": 0.8631976372438571, "grad_norm": 1.000532677466723, "learning_rate": 4.751932284847991e-06, "loss": 0.3641, "step": 14175 }, { "epoch": 0.863258533020735, "grad_norm": 0.8922678004378656, "learning_rate": 4.751897629149451e-06, "loss": 0.4829, "step": 14176 }, { "epoch": 0.8633194287976129, "grad_norm": 0.9244112186393043, "learning_rate": 4.7518629711567255e-06, "loss": 0.4522, "step": 14177 }, { "epoch": 0.8633803245744908, "grad_norm": 1.0608701742373678, "learning_rate": 4.751828310869848e-06, "loss": 0.3477, "step": 14178 }, { "epoch": 0.8634412203513686, "grad_norm": 0.9853285246341431, "learning_rate": 4.751793648288855e-06, "loss": 0.3675, "step": 14179 }, { "epoch": 0.8635021161282465, "grad_norm": 0.978321563305757, "learning_rate": 4.7517589834137815e-06, "loss": 0.4855, "step": 14180 }, { "epoch": 0.8635630119051244, "grad_norm": 1.0033013218283102, "learning_rate": 4.751724316244662e-06, "loss": 0.3558, "step": 14181 }, { "epoch": 0.8636239076820023, "grad_norm": 1.0448356610861929, "learning_rate": 4.751689646781534e-06, "loss": 0.4478, "step": 14182 }, { "epoch": 0.8636848034588801, "grad_norm": 1.0955280921000632, "learning_rate": 4.7516549750244306e-06, "loss": 0.378, "step": 14183 }, { "epoch": 0.863745699235758, "grad_norm": 0.9958489996599663, "learning_rate": 4.751620300973387e-06, "loss": 0.4319, "step": 14184 }, { "epoch": 0.8638065950126359, "grad_norm": 0.995257627415673, "learning_rate": 4.751585624628441e-06, "loss": 0.4317, "step": 14185 }, { "epoch": 0.8638674907895137, "grad_norm": 1.099670727895086, "learning_rate": 4.751550945989626e-06, "loss": 0.3327, "step": 14186 }, { "epoch": 0.8639283865663916, "grad_norm": 0.94818889843359, "learning_rate": 4.7515162650569776e-06, "loss": 0.3877, "step": 14187 }, { "epoch": 0.8639892823432695, "grad_norm": 1.0058311215661222, "learning_rate": 4.751481581830531e-06, "loss": 0.3452, "step": 14188 }, { "epoch": 0.8640501781201474, "grad_norm": 0.987742882740327, "learning_rate": 4.751446896310322e-06, "loss": 0.4334, "step": 14189 }, { "epoch": 0.8641110738970252, "grad_norm": 1.0366055038707322, "learning_rate": 4.751412208496385e-06, "loss": 0.4488, "step": 14190 }, { "epoch": 0.8641719696739031, "grad_norm": 1.0294509542352837, "learning_rate": 4.751377518388757e-06, "loss": 0.4026, "step": 14191 }, { "epoch": 0.864232865450781, "grad_norm": 0.9730649227309303, "learning_rate": 4.751342825987472e-06, "loss": 0.4217, "step": 14192 }, { "epoch": 0.8642937612276589, "grad_norm": 0.9805901961367383, "learning_rate": 4.751308131292566e-06, "loss": 0.4103, "step": 14193 }, { "epoch": 0.8643546570045367, "grad_norm": 1.0381155001499212, "learning_rate": 4.7512734343040735e-06, "loss": 0.4126, "step": 14194 }, { "epoch": 0.8644155527814146, "grad_norm": 0.9749744506647263, "learning_rate": 4.751238735022031e-06, "loss": 0.458, "step": 14195 }, { "epoch": 0.8644764485582925, "grad_norm": 0.9441913967450875, "learning_rate": 4.751204033446473e-06, "loss": 0.4156, "step": 14196 }, { "epoch": 0.8645373443351704, "grad_norm": 1.0969419287062119, "learning_rate": 4.7511693295774354e-06, "loss": 0.3669, "step": 14197 }, { "epoch": 0.8645982401120482, "grad_norm": 0.9644133875112566, "learning_rate": 4.751134623414953e-06, "loss": 0.4611, "step": 14198 }, { "epoch": 0.8646591358889261, "grad_norm": 1.0952955923117451, "learning_rate": 4.751099914959061e-06, "loss": 0.4672, "step": 14199 }, { "epoch": 0.864720031665804, "grad_norm": 1.1015255029778872, "learning_rate": 4.751065204209796e-06, "loss": 0.4429, "step": 14200 }, { "epoch": 0.8647809274426819, "grad_norm": 0.9988706168041221, "learning_rate": 4.751030491167192e-06, "loss": 0.4811, "step": 14201 }, { "epoch": 0.8648418232195597, "grad_norm": 1.0022041929404475, "learning_rate": 4.750995775831286e-06, "loss": 0.4089, "step": 14202 }, { "epoch": 0.8649027189964376, "grad_norm": 0.9679102952979749, "learning_rate": 4.750961058202111e-06, "loss": 0.4489, "step": 14203 }, { "epoch": 0.8649636147733155, "grad_norm": 0.9360701775507747, "learning_rate": 4.750926338279704e-06, "loss": 0.5162, "step": 14204 }, { "epoch": 0.8650245105501934, "grad_norm": 0.9761905819108645, "learning_rate": 4.750891616064101e-06, "loss": 0.4272, "step": 14205 }, { "epoch": 0.8650854063270712, "grad_norm": 0.9701984029115782, "learning_rate": 4.750856891555335e-06, "loss": 0.3779, "step": 14206 }, { "epoch": 0.865146302103949, "grad_norm": 1.003406226863636, "learning_rate": 4.750822164753443e-06, "loss": 0.4605, "step": 14207 }, { "epoch": 0.865207197880827, "grad_norm": 0.9178837620702155, "learning_rate": 4.750787435658462e-06, "loss": 0.4772, "step": 14208 }, { "epoch": 0.8652680936577049, "grad_norm": 0.9388383665008293, "learning_rate": 4.750752704270424e-06, "loss": 0.3875, "step": 14209 }, { "epoch": 0.8653289894345827, "grad_norm": 1.062885506072342, "learning_rate": 4.750717970589365e-06, "loss": 0.3768, "step": 14210 }, { "epoch": 0.8653898852114605, "grad_norm": 1.0906621889983952, "learning_rate": 4.750683234615323e-06, "loss": 0.3851, "step": 14211 }, { "epoch": 0.8654507809883385, "grad_norm": 0.9510745377062487, "learning_rate": 4.750648496348332e-06, "loss": 0.4187, "step": 14212 }, { "epoch": 0.8655116767652163, "grad_norm": 1.1376240941565903, "learning_rate": 4.750613755788426e-06, "loss": 0.3253, "step": 14213 }, { "epoch": 0.8655725725420942, "grad_norm": 0.9588441782769065, "learning_rate": 4.750579012935642e-06, "loss": 0.4242, "step": 14214 }, { "epoch": 0.865633468318972, "grad_norm": 0.8835401274038787, "learning_rate": 4.750544267790015e-06, "loss": 0.4829, "step": 14215 }, { "epoch": 0.86569436409585, "grad_norm": 1.0106193715342537, "learning_rate": 4.750509520351579e-06, "loss": 0.4974, "step": 14216 }, { "epoch": 0.8657552598727278, "grad_norm": 1.0503839864529707, "learning_rate": 4.750474770620372e-06, "loss": 0.4187, "step": 14217 }, { "epoch": 0.8658161556496057, "grad_norm": 0.9845119304576267, "learning_rate": 4.750440018596428e-06, "loss": 0.453, "step": 14218 }, { "epoch": 0.8658770514264835, "grad_norm": 1.087457902427173, "learning_rate": 4.7504052642797825e-06, "loss": 0.4691, "step": 14219 }, { "epoch": 0.8659379472033615, "grad_norm": 0.9941270980752415, "learning_rate": 4.750370507670471e-06, "loss": 0.44, "step": 14220 }, { "epoch": 0.8659988429802393, "grad_norm": 0.8715453259223432, "learning_rate": 4.750335748768527e-06, "loss": 0.4561, "step": 14221 }, { "epoch": 0.8660597387571172, "grad_norm": 1.0324317707268376, "learning_rate": 4.7503009875739906e-06, "loss": 0.3683, "step": 14222 }, { "epoch": 0.866120634533995, "grad_norm": 0.9769254423241222, "learning_rate": 4.750266224086892e-06, "loss": 0.4125, "step": 14223 }, { "epoch": 0.866181530310873, "grad_norm": 0.9943052699338825, "learning_rate": 4.75023145830727e-06, "loss": 0.4397, "step": 14224 }, { "epoch": 0.8662424260877508, "grad_norm": 0.9961365798959542, "learning_rate": 4.7501966902351596e-06, "loss": 0.4399, "step": 14225 }, { "epoch": 0.8663033218646287, "grad_norm": 0.979216589274489, "learning_rate": 4.750161919870594e-06, "loss": 0.449, "step": 14226 }, { "epoch": 0.8663642176415065, "grad_norm": 0.9354080856889154, "learning_rate": 4.7501271472136115e-06, "loss": 0.4029, "step": 14227 }, { "epoch": 0.8664251134183845, "grad_norm": 0.9586183739897005, "learning_rate": 4.750092372264246e-06, "loss": 0.4618, "step": 14228 }, { "epoch": 0.8664860091952623, "grad_norm": 0.9590122035706182, "learning_rate": 4.750057595022533e-06, "loss": 0.386, "step": 14229 }, { "epoch": 0.8665469049721402, "grad_norm": 0.9828983424611952, "learning_rate": 4.750022815488507e-06, "loss": 0.4425, "step": 14230 }, { "epoch": 0.8666078007490181, "grad_norm": 0.9473102852608781, "learning_rate": 4.7499880336622065e-06, "loss": 0.4194, "step": 14231 }, { "epoch": 0.866668696525896, "grad_norm": 1.0401741735860455, "learning_rate": 4.749953249543664e-06, "loss": 0.3555, "step": 14232 }, { "epoch": 0.8667295923027738, "grad_norm": 1.0515630303635473, "learning_rate": 4.749918463132916e-06, "loss": 0.4052, "step": 14233 }, { "epoch": 0.8667904880796516, "grad_norm": 0.9951321766124468, "learning_rate": 4.749883674429998e-06, "loss": 0.3718, "step": 14234 }, { "epoch": 0.8668513838565296, "grad_norm": 0.9951829513940772, "learning_rate": 4.749848883434945e-06, "loss": 0.408, "step": 14235 }, { "epoch": 0.8669122796334074, "grad_norm": 0.9398794321720849, "learning_rate": 4.749814090147793e-06, "loss": 0.4502, "step": 14236 }, { "epoch": 0.8669731754102853, "grad_norm": 1.0024061263350292, "learning_rate": 4.749779294568577e-06, "loss": 0.3758, "step": 14237 }, { "epoch": 0.8670340711871631, "grad_norm": 0.9838807873402277, "learning_rate": 4.749744496697333e-06, "loss": 0.4053, "step": 14238 }, { "epoch": 0.8670949669640411, "grad_norm": 1.0218318001450457, "learning_rate": 4.749709696534095e-06, "loss": 0.3959, "step": 14239 }, { "epoch": 0.8671558627409189, "grad_norm": 1.1283005164168296, "learning_rate": 4.7496748940789005e-06, "loss": 0.3531, "step": 14240 }, { "epoch": 0.8672167585177968, "grad_norm": 0.9870958457949249, "learning_rate": 4.749640089331784e-06, "loss": 0.3654, "step": 14241 }, { "epoch": 0.8672776542946746, "grad_norm": 1.013486311629105, "learning_rate": 4.749605282292781e-06, "loss": 0.4443, "step": 14242 }, { "epoch": 0.8673385500715526, "grad_norm": 1.0678933730246456, "learning_rate": 4.749570472961926e-06, "loss": 0.3924, "step": 14243 }, { "epoch": 0.8673994458484304, "grad_norm": 1.0740229846839666, "learning_rate": 4.749535661339256e-06, "loss": 0.4057, "step": 14244 }, { "epoch": 0.8674603416253083, "grad_norm": 0.9466520797208909, "learning_rate": 4.749500847424806e-06, "loss": 0.4393, "step": 14245 }, { "epoch": 0.8675212374021861, "grad_norm": 1.0736569899571506, "learning_rate": 4.749466031218612e-06, "loss": 0.3731, "step": 14246 }, { "epoch": 0.8675821331790641, "grad_norm": 0.998258835866514, "learning_rate": 4.749431212720707e-06, "loss": 0.4453, "step": 14247 }, { "epoch": 0.8676430289559419, "grad_norm": 0.9730819652668152, "learning_rate": 4.749396391931129e-06, "loss": 0.4512, "step": 14248 }, { "epoch": 0.8677039247328198, "grad_norm": 0.9921290284166121, "learning_rate": 4.749361568849913e-06, "loss": 0.3916, "step": 14249 }, { "epoch": 0.8677648205096976, "grad_norm": 1.0107859118468954, "learning_rate": 4.749326743477094e-06, "loss": 0.4273, "step": 14250 }, { "epoch": 0.8678257162865756, "grad_norm": 1.0302470572154248, "learning_rate": 4.749291915812709e-06, "loss": 0.4039, "step": 14251 }, { "epoch": 0.8678866120634534, "grad_norm": 1.0347617198554289, "learning_rate": 4.7492570858567896e-06, "loss": 0.3955, "step": 14252 }, { "epoch": 0.8679475078403313, "grad_norm": 1.0191658029917607, "learning_rate": 4.749222253609375e-06, "loss": 0.3968, "step": 14253 }, { "epoch": 0.8680084036172091, "grad_norm": 0.9216259528166217, "learning_rate": 4.749187419070501e-06, "loss": 0.5267, "step": 14254 }, { "epoch": 0.8680692993940871, "grad_norm": 0.9309285765533821, "learning_rate": 4.749152582240199e-06, "loss": 0.3927, "step": 14255 }, { "epoch": 0.8681301951709649, "grad_norm": 1.0532226379771763, "learning_rate": 4.749117743118509e-06, "loss": 0.344, "step": 14256 }, { "epoch": 0.8681910909478427, "grad_norm": 1.0224503275321977, "learning_rate": 4.749082901705464e-06, "loss": 0.4362, "step": 14257 }, { "epoch": 0.8682519867247206, "grad_norm": 0.969390871137734, "learning_rate": 4.7490480580011005e-06, "loss": 0.3693, "step": 14258 }, { "epoch": 0.8683128825015985, "grad_norm": 0.9764304066239455, "learning_rate": 4.749013212005453e-06, "loss": 0.4589, "step": 14259 }, { "epoch": 0.8683737782784764, "grad_norm": 1.0100388453451017, "learning_rate": 4.7489783637185585e-06, "loss": 0.3998, "step": 14260 }, { "epoch": 0.8684346740553542, "grad_norm": 0.9161703493750296, "learning_rate": 4.74894351314045e-06, "loss": 0.4483, "step": 14261 }, { "epoch": 0.8684955698322321, "grad_norm": 1.0305471666292367, "learning_rate": 4.748908660271167e-06, "loss": 0.4225, "step": 14262 }, { "epoch": 0.86855646560911, "grad_norm": 0.9864086863411614, "learning_rate": 4.74887380511074e-06, "loss": 0.4438, "step": 14263 }, { "epoch": 0.8686173613859879, "grad_norm": 1.097034479264504, "learning_rate": 4.7488389476592086e-06, "loss": 0.3878, "step": 14264 }, { "epoch": 0.8686782571628657, "grad_norm": 1.0656897816455941, "learning_rate": 4.748804087916607e-06, "loss": 0.4145, "step": 14265 }, { "epoch": 0.8687391529397436, "grad_norm": 1.0341376224279653, "learning_rate": 4.74876922588297e-06, "loss": 0.3899, "step": 14266 }, { "epoch": 0.8688000487166215, "grad_norm": 1.0481527726873585, "learning_rate": 4.748734361558334e-06, "loss": 0.4181, "step": 14267 }, { "epoch": 0.8688609444934994, "grad_norm": 1.0925903245978432, "learning_rate": 4.748699494942733e-06, "loss": 0.448, "step": 14268 }, { "epoch": 0.8689218402703772, "grad_norm": 0.9679282961215118, "learning_rate": 4.748664626036205e-06, "loss": 0.3919, "step": 14269 }, { "epoch": 0.8689827360472552, "grad_norm": 0.9119885565080313, "learning_rate": 4.7486297548387836e-06, "loss": 0.3697, "step": 14270 }, { "epoch": 0.869043631824133, "grad_norm": 0.9987721045336917, "learning_rate": 4.748594881350506e-06, "loss": 0.3976, "step": 14271 }, { "epoch": 0.8691045276010109, "grad_norm": 0.98813673698971, "learning_rate": 4.748560005571405e-06, "loss": 0.4088, "step": 14272 }, { "epoch": 0.8691654233778887, "grad_norm": 1.0195350404828947, "learning_rate": 4.748525127501519e-06, "loss": 0.3782, "step": 14273 }, { "epoch": 0.8692263191547667, "grad_norm": 1.0393798716059302, "learning_rate": 4.748490247140882e-06, "loss": 0.4557, "step": 14274 }, { "epoch": 0.8692872149316445, "grad_norm": 0.9877094963405013, "learning_rate": 4.74845536448953e-06, "loss": 0.3971, "step": 14275 }, { "epoch": 0.8693481107085224, "grad_norm": 1.0684658677639698, "learning_rate": 4.748420479547499e-06, "loss": 0.3688, "step": 14276 }, { "epoch": 0.8694090064854002, "grad_norm": 1.0654901872605023, "learning_rate": 4.748385592314822e-06, "loss": 0.3513, "step": 14277 }, { "epoch": 0.8694699022622782, "grad_norm": 1.0155180753712914, "learning_rate": 4.748350702791539e-06, "loss": 0.3533, "step": 14278 }, { "epoch": 0.869530798039156, "grad_norm": 0.9457721257996816, "learning_rate": 4.748315810977681e-06, "loss": 0.5021, "step": 14279 }, { "epoch": 0.8695916938160339, "grad_norm": 0.9482563442738191, "learning_rate": 4.7482809168732865e-06, "loss": 0.4607, "step": 14280 }, { "epoch": 0.8696525895929117, "grad_norm": 0.99582108795415, "learning_rate": 4.74824602047839e-06, "loss": 0.3544, "step": 14281 }, { "epoch": 0.8697134853697897, "grad_norm": 0.9570312018639087, "learning_rate": 4.7482111217930275e-06, "loss": 0.424, "step": 14282 }, { "epoch": 0.8697743811466675, "grad_norm": 1.0234826255997802, "learning_rate": 4.748176220817235e-06, "loss": 0.3902, "step": 14283 }, { "epoch": 0.8698352769235453, "grad_norm": 0.9802731167405594, "learning_rate": 4.748141317551046e-06, "loss": 0.3417, "step": 14284 }, { "epoch": 0.8698961727004232, "grad_norm": 0.9221245469306375, "learning_rate": 4.748106411994497e-06, "loss": 0.4315, "step": 14285 }, { "epoch": 0.8699570684773011, "grad_norm": 1.0117663481694097, "learning_rate": 4.7480715041476254e-06, "loss": 0.4545, "step": 14286 }, { "epoch": 0.870017964254179, "grad_norm": 1.0442999098921886, "learning_rate": 4.748036594010465e-06, "loss": 0.3697, "step": 14287 }, { "epoch": 0.8700788600310568, "grad_norm": 0.9536818877779124, "learning_rate": 4.748001681583051e-06, "loss": 0.4424, "step": 14288 }, { "epoch": 0.8701397558079347, "grad_norm": 1.0709190311468193, "learning_rate": 4.7479667668654195e-06, "loss": 0.4105, "step": 14289 }, { "epoch": 0.8702006515848126, "grad_norm": 0.9686633898453828, "learning_rate": 4.747931849857606e-06, "loss": 0.3823, "step": 14290 }, { "epoch": 0.8702615473616905, "grad_norm": 0.9785168589888881, "learning_rate": 4.747896930559647e-06, "loss": 0.4075, "step": 14291 }, { "epoch": 0.8703224431385683, "grad_norm": 0.9624291907254737, "learning_rate": 4.7478620089715774e-06, "loss": 0.4757, "step": 14292 }, { "epoch": 0.8703833389154462, "grad_norm": 0.9224074804872048, "learning_rate": 4.747827085093432e-06, "loss": 0.4593, "step": 14293 }, { "epoch": 0.8704442346923241, "grad_norm": 0.9793837885888096, "learning_rate": 4.747792158925248e-06, "loss": 0.432, "step": 14294 }, { "epoch": 0.870505130469202, "grad_norm": 0.9828868832667006, "learning_rate": 4.74775723046706e-06, "loss": 0.3813, "step": 14295 }, { "epoch": 0.8705660262460798, "grad_norm": 0.9557529316388698, "learning_rate": 4.747722299718903e-06, "loss": 0.4824, "step": 14296 }, { "epoch": 0.8706269220229577, "grad_norm": 1.0585226411290451, "learning_rate": 4.747687366680813e-06, "loss": 0.434, "step": 14297 }, { "epoch": 0.8706878177998356, "grad_norm": 0.9488664211071531, "learning_rate": 4.7476524313528276e-06, "loss": 0.456, "step": 14298 }, { "epoch": 0.8707487135767135, "grad_norm": 1.0155332066117344, "learning_rate": 4.747617493734978e-06, "loss": 0.4264, "step": 14299 }, { "epoch": 0.8708096093535913, "grad_norm": 0.9887385630890742, "learning_rate": 4.7475825538273045e-06, "loss": 0.3473, "step": 14300 }, { "epoch": 0.8708705051304692, "grad_norm": 1.0218984596153982, "learning_rate": 4.74754761162984e-06, "loss": 0.3876, "step": 14301 }, { "epoch": 0.8709314009073471, "grad_norm": 1.0318245902788525, "learning_rate": 4.747512667142621e-06, "loss": 0.3729, "step": 14302 }, { "epoch": 0.870992296684225, "grad_norm": 0.9990484240519166, "learning_rate": 4.747477720365682e-06, "loss": 0.4128, "step": 14303 }, { "epoch": 0.8710531924611028, "grad_norm": 1.00030817106462, "learning_rate": 4.74744277129906e-06, "loss": 0.4275, "step": 14304 }, { "epoch": 0.8711140882379806, "grad_norm": 0.9160681091605117, "learning_rate": 4.74740781994279e-06, "loss": 0.4377, "step": 14305 }, { "epoch": 0.8711749840148586, "grad_norm": 0.9633232171486275, "learning_rate": 4.747372866296908e-06, "loss": 0.4128, "step": 14306 }, { "epoch": 0.8712358797917364, "grad_norm": 0.9657000371297081, "learning_rate": 4.747337910361448e-06, "loss": 0.3686, "step": 14307 }, { "epoch": 0.8712967755686143, "grad_norm": 0.9471914083096904, "learning_rate": 4.7473029521364485e-06, "loss": 0.3983, "step": 14308 }, { "epoch": 0.8713576713454921, "grad_norm": 0.9350823546805849, "learning_rate": 4.747267991621942e-06, "loss": 0.4103, "step": 14309 }, { "epoch": 0.8714185671223701, "grad_norm": 1.0549103260163408, "learning_rate": 4.747233028817967e-06, "loss": 0.3814, "step": 14310 }, { "epoch": 0.8714794628992479, "grad_norm": 1.0883858010403118, "learning_rate": 4.7471980637245565e-06, "loss": 0.3879, "step": 14311 }, { "epoch": 0.8715403586761258, "grad_norm": 1.051689725630461, "learning_rate": 4.747163096341748e-06, "loss": 0.4616, "step": 14312 }, { "epoch": 0.8716012544530037, "grad_norm": 1.0748383359184512, "learning_rate": 4.747128126669577e-06, "loss": 0.4112, "step": 14313 }, { "epoch": 0.8716621502298816, "grad_norm": 0.987286150919315, "learning_rate": 4.747093154708077e-06, "loss": 0.3946, "step": 14314 }, { "epoch": 0.8717230460067594, "grad_norm": 1.068084405764322, "learning_rate": 4.747058180457287e-06, "loss": 0.4173, "step": 14315 }, { "epoch": 0.8717839417836373, "grad_norm": 0.9802416258682105, "learning_rate": 4.747023203917239e-06, "loss": 0.3846, "step": 14316 }, { "epoch": 0.8718448375605152, "grad_norm": 1.0392321322258906, "learning_rate": 4.7469882250879716e-06, "loss": 0.4085, "step": 14317 }, { "epoch": 0.8719057333373931, "grad_norm": 1.018710083181439, "learning_rate": 4.746953243969519e-06, "loss": 0.3828, "step": 14318 }, { "epoch": 0.8719666291142709, "grad_norm": 1.0963680391879769, "learning_rate": 4.746918260561918e-06, "loss": 0.4083, "step": 14319 }, { "epoch": 0.8720275248911488, "grad_norm": 1.067015239856877, "learning_rate": 4.746883274865203e-06, "loss": 0.454, "step": 14320 }, { "epoch": 0.8720884206680267, "grad_norm": 1.0506953501710419, "learning_rate": 4.746848286879409e-06, "loss": 0.3375, "step": 14321 }, { "epoch": 0.8721493164449046, "grad_norm": 1.08758022996793, "learning_rate": 4.746813296604573e-06, "loss": 0.3464, "step": 14322 }, { "epoch": 0.8722102122217824, "grad_norm": 1.0521398282810073, "learning_rate": 4.746778304040731e-06, "loss": 0.419, "step": 14323 }, { "epoch": 0.8722711079986603, "grad_norm": 1.014534967065755, "learning_rate": 4.746743309187918e-06, "loss": 0.3995, "step": 14324 }, { "epoch": 0.8723320037755382, "grad_norm": 1.0305805736491895, "learning_rate": 4.746708312046169e-06, "loss": 0.4281, "step": 14325 }, { "epoch": 0.8723928995524161, "grad_norm": 1.0006850945318384, "learning_rate": 4.746673312615521e-06, "loss": 0.4329, "step": 14326 }, { "epoch": 0.8724537953292939, "grad_norm": 0.8947384359780896, "learning_rate": 4.746638310896008e-06, "loss": 0.4317, "step": 14327 }, { "epoch": 0.8725146911061717, "grad_norm": 0.9847122917108299, "learning_rate": 4.746603306887667e-06, "loss": 0.3337, "step": 14328 }, { "epoch": 0.8725755868830497, "grad_norm": 1.03908897720167, "learning_rate": 4.746568300590534e-06, "loss": 0.4342, "step": 14329 }, { "epoch": 0.8726364826599275, "grad_norm": 1.0166532596537192, "learning_rate": 4.746533292004643e-06, "loss": 0.4709, "step": 14330 }, { "epoch": 0.8726973784368054, "grad_norm": 1.0350680636007226, "learning_rate": 4.746498281130031e-06, "loss": 0.3759, "step": 14331 }, { "epoch": 0.8727582742136832, "grad_norm": 1.0197211556983352, "learning_rate": 4.746463267966733e-06, "loss": 0.4094, "step": 14332 }, { "epoch": 0.8728191699905612, "grad_norm": 1.0193718874800592, "learning_rate": 4.7464282525147855e-06, "loss": 0.4027, "step": 14333 }, { "epoch": 0.872880065767439, "grad_norm": 1.0515085143019682, "learning_rate": 4.746393234774223e-06, "loss": 0.4128, "step": 14334 }, { "epoch": 0.8729409615443169, "grad_norm": 1.0434243744484284, "learning_rate": 4.746358214745082e-06, "loss": 0.431, "step": 14335 }, { "epoch": 0.8730018573211947, "grad_norm": 1.0421668148958283, "learning_rate": 4.746323192427398e-06, "loss": 0.3518, "step": 14336 }, { "epoch": 0.8730627530980727, "grad_norm": 0.9746852372464369, "learning_rate": 4.746288167821207e-06, "loss": 0.4203, "step": 14337 }, { "epoch": 0.8731236488749505, "grad_norm": 1.042933227089408, "learning_rate": 4.7462531409265435e-06, "loss": 0.4145, "step": 14338 }, { "epoch": 0.8731845446518284, "grad_norm": 1.0246865061613373, "learning_rate": 4.746218111743444e-06, "loss": 0.3938, "step": 14339 }, { "epoch": 0.8732454404287062, "grad_norm": 0.9156374559199164, "learning_rate": 4.746183080271944e-06, "loss": 0.4415, "step": 14340 }, { "epoch": 0.8733063362055842, "grad_norm": 1.0297669827618254, "learning_rate": 4.74614804651208e-06, "loss": 0.4009, "step": 14341 }, { "epoch": 0.873367231982462, "grad_norm": 0.9169661141457784, "learning_rate": 4.7461130104638875e-06, "loss": 0.4078, "step": 14342 }, { "epoch": 0.8734281277593399, "grad_norm": 0.9641554388667539, "learning_rate": 4.746077972127402e-06, "loss": 0.4722, "step": 14343 }, { "epoch": 0.8734890235362177, "grad_norm": 0.9775251593023352, "learning_rate": 4.746042931502657e-06, "loss": 0.3988, "step": 14344 }, { "epoch": 0.8735499193130957, "grad_norm": 1.0506716675835905, "learning_rate": 4.746007888589692e-06, "loss": 0.3942, "step": 14345 }, { "epoch": 0.8736108150899735, "grad_norm": 0.9897715548020586, "learning_rate": 4.74597284338854e-06, "loss": 0.3895, "step": 14346 }, { "epoch": 0.8736717108668514, "grad_norm": 0.9655440838831585, "learning_rate": 4.745937795899238e-06, "loss": 0.3625, "step": 14347 }, { "epoch": 0.8737326066437292, "grad_norm": 0.8949135782939489, "learning_rate": 4.745902746121821e-06, "loss": 0.4454, "step": 14348 }, { "epoch": 0.8737935024206072, "grad_norm": 0.9541092247889399, "learning_rate": 4.745867694056326e-06, "loss": 0.3588, "step": 14349 }, { "epoch": 0.873854398197485, "grad_norm": 0.9530381476602772, "learning_rate": 4.745832639702786e-06, "loss": 0.4485, "step": 14350 }, { "epoch": 0.8739152939743629, "grad_norm": 0.9935903308410027, "learning_rate": 4.745797583061239e-06, "loss": 0.3905, "step": 14351 }, { "epoch": 0.8739761897512408, "grad_norm": 1.0641148734921375, "learning_rate": 4.74576252413172e-06, "loss": 0.3903, "step": 14352 }, { "epoch": 0.8740370855281187, "grad_norm": 0.9679986456332468, "learning_rate": 4.745727462914265e-06, "loss": 0.3872, "step": 14353 }, { "epoch": 0.8740979813049965, "grad_norm": 1.062461590637756, "learning_rate": 4.74569239940891e-06, "loss": 0.4127, "step": 14354 }, { "epoch": 0.8741588770818743, "grad_norm": 0.9685707791369721, "learning_rate": 4.74565733361569e-06, "loss": 0.4151, "step": 14355 }, { "epoch": 0.8742197728587523, "grad_norm": 1.0383533912385268, "learning_rate": 4.745622265534641e-06, "loss": 0.3944, "step": 14356 }, { "epoch": 0.8742806686356301, "grad_norm": 1.0467607838374884, "learning_rate": 4.745587195165798e-06, "loss": 0.3825, "step": 14357 }, { "epoch": 0.874341564412508, "grad_norm": 0.9638530150843526, "learning_rate": 4.745552122509199e-06, "loss": 0.4166, "step": 14358 }, { "epoch": 0.8744024601893858, "grad_norm": 1.0165084740413666, "learning_rate": 4.745517047564876e-06, "loss": 0.3782, "step": 14359 }, { "epoch": 0.8744633559662638, "grad_norm": 1.030579467535583, "learning_rate": 4.745481970332869e-06, "loss": 0.4035, "step": 14360 }, { "epoch": 0.8745242517431416, "grad_norm": 0.9397432408993766, "learning_rate": 4.745446890813211e-06, "loss": 0.4542, "step": 14361 }, { "epoch": 0.8745851475200195, "grad_norm": 1.004207472311812, "learning_rate": 4.745411809005937e-06, "loss": 0.3267, "step": 14362 }, { "epoch": 0.8746460432968973, "grad_norm": 0.9634486047436805, "learning_rate": 4.745376724911086e-06, "loss": 0.458, "step": 14363 }, { "epoch": 0.8747069390737753, "grad_norm": 0.9964816656500883, "learning_rate": 4.7453416385286915e-06, "loss": 0.4218, "step": 14364 }, { "epoch": 0.8747678348506531, "grad_norm": 1.01218599664229, "learning_rate": 4.745306549858789e-06, "loss": 0.4434, "step": 14365 }, { "epoch": 0.874828730627531, "grad_norm": 1.1058918093922911, "learning_rate": 4.7452714589014156e-06, "loss": 0.3965, "step": 14366 }, { "epoch": 0.8748896264044088, "grad_norm": 1.0249230021201379, "learning_rate": 4.745236365656606e-06, "loss": 0.3694, "step": 14367 }, { "epoch": 0.8749505221812868, "grad_norm": 1.0600431038755311, "learning_rate": 4.745201270124396e-06, "loss": 0.4025, "step": 14368 }, { "epoch": 0.8750114179581646, "grad_norm": 0.9613532152972636, "learning_rate": 4.745166172304821e-06, "loss": 0.3883, "step": 14369 }, { "epoch": 0.8750723137350425, "grad_norm": 1.0667217449451023, "learning_rate": 4.745131072197919e-06, "loss": 0.4014, "step": 14370 }, { "epoch": 0.8751332095119203, "grad_norm": 0.9663288010063927, "learning_rate": 4.745095969803724e-06, "loss": 0.4541, "step": 14371 }, { "epoch": 0.8751941052887983, "grad_norm": 1.0601928256452788, "learning_rate": 4.74506086512227e-06, "loss": 0.4355, "step": 14372 }, { "epoch": 0.8752550010656761, "grad_norm": 0.9655970975252673, "learning_rate": 4.745025758153597e-06, "loss": 0.443, "step": 14373 }, { "epoch": 0.875315896842554, "grad_norm": 1.039619767062742, "learning_rate": 4.7449906488977375e-06, "loss": 0.3509, "step": 14374 }, { "epoch": 0.8753767926194318, "grad_norm": 0.9575384539164524, "learning_rate": 4.7449555373547275e-06, "loss": 0.4351, "step": 14375 }, { "epoch": 0.8754376883963098, "grad_norm": 1.026881262520265, "learning_rate": 4.744920423524604e-06, "loss": 0.464, "step": 14376 }, { "epoch": 0.8754985841731876, "grad_norm": 1.0119858116952871, "learning_rate": 4.744885307407402e-06, "loss": 0.4301, "step": 14377 }, { "epoch": 0.8755594799500654, "grad_norm": 0.969602476869353, "learning_rate": 4.744850189003158e-06, "loss": 0.4155, "step": 14378 }, { "epoch": 0.8756203757269433, "grad_norm": 1.0164888747115899, "learning_rate": 4.744815068311907e-06, "loss": 0.3746, "step": 14379 }, { "epoch": 0.8756812715038212, "grad_norm": 0.9406593981624397, "learning_rate": 4.7447799453336855e-06, "loss": 0.3628, "step": 14380 }, { "epoch": 0.8757421672806991, "grad_norm": 0.8976707500484432, "learning_rate": 4.744744820068528e-06, "loss": 0.4549, "step": 14381 }, { "epoch": 0.8758030630575769, "grad_norm": 1.0320550107952682, "learning_rate": 4.744709692516472e-06, "loss": 0.4727, "step": 14382 }, { "epoch": 0.8758639588344548, "grad_norm": 0.9349979820003352, "learning_rate": 4.744674562677552e-06, "loss": 0.3949, "step": 14383 }, { "epoch": 0.8759248546113327, "grad_norm": 1.0265376188253845, "learning_rate": 4.744639430551804e-06, "loss": 0.3943, "step": 14384 }, { "epoch": 0.8759857503882106, "grad_norm": 1.0635586233694831, "learning_rate": 4.744604296139265e-06, "loss": 0.4279, "step": 14385 }, { "epoch": 0.8760466461650884, "grad_norm": 1.004594114507389, "learning_rate": 4.744569159439969e-06, "loss": 0.382, "step": 14386 }, { "epoch": 0.8761075419419663, "grad_norm": 1.175795449408467, "learning_rate": 4.744534020453952e-06, "loss": 0.4632, "step": 14387 }, { "epoch": 0.8761684377188442, "grad_norm": 0.9423059123249811, "learning_rate": 4.744498879181252e-06, "loss": 0.4403, "step": 14388 }, { "epoch": 0.8762293334957221, "grad_norm": 0.9726217359886837, "learning_rate": 4.744463735621903e-06, "loss": 0.4055, "step": 14389 }, { "epoch": 0.8762902292725999, "grad_norm": 1.053190965753798, "learning_rate": 4.74442858977594e-06, "loss": 0.5099, "step": 14390 }, { "epoch": 0.8763511250494778, "grad_norm": 1.024482957803972, "learning_rate": 4.7443934416434e-06, "loss": 0.438, "step": 14391 }, { "epoch": 0.8764120208263557, "grad_norm": 1.0304328108683454, "learning_rate": 4.744358291224318e-06, "loss": 0.4376, "step": 14392 }, { "epoch": 0.8764729166032336, "grad_norm": 0.9343081393389727, "learning_rate": 4.744323138518732e-06, "loss": 0.3625, "step": 14393 }, { "epoch": 0.8765338123801114, "grad_norm": 0.9683336110720762, "learning_rate": 4.7442879835266755e-06, "loss": 0.4814, "step": 14394 }, { "epoch": 0.8765947081569894, "grad_norm": 0.9245148297609843, "learning_rate": 4.744252826248185e-06, "loss": 0.4851, "step": 14395 }, { "epoch": 0.8766556039338672, "grad_norm": 1.0313129670997532, "learning_rate": 4.744217666683296e-06, "loss": 0.368, "step": 14396 }, { "epoch": 0.8767164997107451, "grad_norm": 0.8995618521559422, "learning_rate": 4.744182504832046e-06, "loss": 0.3905, "step": 14397 }, { "epoch": 0.8767773954876229, "grad_norm": 0.9929068076728901, "learning_rate": 4.744147340694468e-06, "loss": 0.3444, "step": 14398 }, { "epoch": 0.8768382912645009, "grad_norm": 0.9722866648729838, "learning_rate": 4.7441121742706e-06, "loss": 0.4689, "step": 14399 }, { "epoch": 0.8768991870413787, "grad_norm": 0.8688339528516025, "learning_rate": 4.744077005560478e-06, "loss": 0.5063, "step": 14400 }, { "epoch": 0.8769600828182565, "grad_norm": 1.0890388329133727, "learning_rate": 4.7440418345641355e-06, "loss": 0.3927, "step": 14401 }, { "epoch": 0.8770209785951344, "grad_norm": 1.061535901509664, "learning_rate": 4.744006661281611e-06, "loss": 0.3812, "step": 14402 }, { "epoch": 0.8770818743720123, "grad_norm": 1.0685463348578699, "learning_rate": 4.743971485712938e-06, "loss": 0.3497, "step": 14403 }, { "epoch": 0.8771427701488902, "grad_norm": 0.9528442019629739, "learning_rate": 4.743936307858155e-06, "loss": 0.4471, "step": 14404 }, { "epoch": 0.877203665925768, "grad_norm": 1.0238892470373346, "learning_rate": 4.7439011277172954e-06, "loss": 0.3771, "step": 14405 }, { "epoch": 0.8772645617026459, "grad_norm": 0.9336897387720962, "learning_rate": 4.743865945290396e-06, "loss": 0.4904, "step": 14406 }, { "epoch": 0.8773254574795238, "grad_norm": 0.9167781626024873, "learning_rate": 4.743830760577493e-06, "loss": 0.395, "step": 14407 }, { "epoch": 0.8773863532564017, "grad_norm": 0.9939001089351472, "learning_rate": 4.743795573578621e-06, "loss": 0.4672, "step": 14408 }, { "epoch": 0.8774472490332795, "grad_norm": 0.971078828848291, "learning_rate": 4.743760384293818e-06, "loss": 0.3498, "step": 14409 }, { "epoch": 0.8775081448101574, "grad_norm": 1.0613812126345659, "learning_rate": 4.743725192723118e-06, "loss": 0.391, "step": 14410 }, { "epoch": 0.8775690405870353, "grad_norm": 0.9519135997866905, "learning_rate": 4.7436899988665566e-06, "loss": 0.3892, "step": 14411 }, { "epoch": 0.8776299363639132, "grad_norm": 1.0452575813511789, "learning_rate": 4.743654802724171e-06, "loss": 0.3582, "step": 14412 }, { "epoch": 0.877690832140791, "grad_norm": 0.9726390417988041, "learning_rate": 4.743619604295997e-06, "loss": 0.363, "step": 14413 }, { "epoch": 0.8777517279176689, "grad_norm": 0.9792958801803574, "learning_rate": 4.74358440358207e-06, "loss": 0.4734, "step": 14414 }, { "epoch": 0.8778126236945468, "grad_norm": 1.1392311998606721, "learning_rate": 4.743549200582426e-06, "loss": 0.4014, "step": 14415 }, { "epoch": 0.8778735194714247, "grad_norm": 0.9380533404588, "learning_rate": 4.7435139952971e-06, "loss": 0.4034, "step": 14416 }, { "epoch": 0.8779344152483025, "grad_norm": 0.9706027203762945, "learning_rate": 4.743478787726129e-06, "loss": 0.4937, "step": 14417 }, { "epoch": 0.8779953110251804, "grad_norm": 0.9762276962741057, "learning_rate": 4.743443577869548e-06, "loss": 0.3892, "step": 14418 }, { "epoch": 0.8780562068020583, "grad_norm": 0.9206202015783388, "learning_rate": 4.743408365727394e-06, "loss": 0.4789, "step": 14419 }, { "epoch": 0.8781171025789362, "grad_norm": 1.031879802242589, "learning_rate": 4.743373151299701e-06, "loss": 0.4163, "step": 14420 }, { "epoch": 0.878177998355814, "grad_norm": 0.975500077453231, "learning_rate": 4.743337934586507e-06, "loss": 0.4259, "step": 14421 }, { "epoch": 0.8782388941326918, "grad_norm": 1.0448210527766117, "learning_rate": 4.743302715587847e-06, "loss": 0.3864, "step": 14422 }, { "epoch": 0.8782997899095698, "grad_norm": 1.0244783446925496, "learning_rate": 4.743267494303757e-06, "loss": 0.3672, "step": 14423 }, { "epoch": 0.8783606856864477, "grad_norm": 1.0445066208321934, "learning_rate": 4.743232270734273e-06, "loss": 0.357, "step": 14424 }, { "epoch": 0.8784215814633255, "grad_norm": 1.0177206673479557, "learning_rate": 4.74319704487943e-06, "loss": 0.3889, "step": 14425 }, { "epoch": 0.8784824772402033, "grad_norm": 1.1658081075032096, "learning_rate": 4.743161816739264e-06, "loss": 0.3547, "step": 14426 }, { "epoch": 0.8785433730170813, "grad_norm": 1.055745062495525, "learning_rate": 4.743126586313812e-06, "loss": 0.4391, "step": 14427 }, { "epoch": 0.8786042687939591, "grad_norm": 0.9359382707602398, "learning_rate": 4.74309135360311e-06, "loss": 0.4133, "step": 14428 }, { "epoch": 0.878665164570837, "grad_norm": 1.0042960756907824, "learning_rate": 4.743056118607192e-06, "loss": 0.4628, "step": 14429 }, { "epoch": 0.8787260603477148, "grad_norm": 0.9509043228809609, "learning_rate": 4.743020881326095e-06, "loss": 0.4887, "step": 14430 }, { "epoch": 0.8787869561245928, "grad_norm": 0.9669517463959684, "learning_rate": 4.7429856417598555e-06, "loss": 0.4157, "step": 14431 }, { "epoch": 0.8788478519014706, "grad_norm": 1.003313329356823, "learning_rate": 4.742950399908509e-06, "loss": 0.4508, "step": 14432 }, { "epoch": 0.8789087476783485, "grad_norm": 1.0015259271359314, "learning_rate": 4.742915155772091e-06, "loss": 0.3558, "step": 14433 }, { "epoch": 0.8789696434552264, "grad_norm": 1.167555854986982, "learning_rate": 4.7428799093506376e-06, "loss": 0.4121, "step": 14434 }, { "epoch": 0.8790305392321043, "grad_norm": 0.9482355630527127, "learning_rate": 4.742844660644185e-06, "loss": 0.3704, "step": 14435 }, { "epoch": 0.8790914350089821, "grad_norm": 1.000911796618158, "learning_rate": 4.742809409652769e-06, "loss": 0.4219, "step": 14436 }, { "epoch": 0.87915233078586, "grad_norm": 0.9992487083977356, "learning_rate": 4.742774156376425e-06, "loss": 0.3919, "step": 14437 }, { "epoch": 0.8792132265627379, "grad_norm": 1.0351006362039092, "learning_rate": 4.742738900815189e-06, "loss": 0.4318, "step": 14438 }, { "epoch": 0.8792741223396158, "grad_norm": 0.925115198762538, "learning_rate": 4.742703642969098e-06, "loss": 0.4025, "step": 14439 }, { "epoch": 0.8793350181164936, "grad_norm": 1.058299836633651, "learning_rate": 4.742668382838187e-06, "loss": 0.4147, "step": 14440 }, { "epoch": 0.8793959138933715, "grad_norm": 0.9241320270156539, "learning_rate": 4.7426331204224916e-06, "loss": 0.4897, "step": 14441 }, { "epoch": 0.8794568096702494, "grad_norm": 0.9515500374146644, "learning_rate": 4.742597855722049e-06, "loss": 0.4313, "step": 14442 }, { "epoch": 0.8795177054471273, "grad_norm": 1.0400679513583302, "learning_rate": 4.742562588736893e-06, "loss": 0.3705, "step": 14443 }, { "epoch": 0.8795786012240051, "grad_norm": 0.9192023971923061, "learning_rate": 4.742527319467062e-06, "loss": 0.4384, "step": 14444 }, { "epoch": 0.879639497000883, "grad_norm": 0.9621774272884461, "learning_rate": 4.742492047912591e-06, "loss": 0.4552, "step": 14445 }, { "epoch": 0.8797003927777609, "grad_norm": 0.9465818948980886, "learning_rate": 4.742456774073515e-06, "loss": 0.4363, "step": 14446 }, { "epoch": 0.8797612885546388, "grad_norm": 0.9821128021278347, "learning_rate": 4.7424214979498704e-06, "loss": 0.4737, "step": 14447 }, { "epoch": 0.8798221843315166, "grad_norm": 1.0859573100848932, "learning_rate": 4.742386219541695e-06, "loss": 0.4309, "step": 14448 }, { "epoch": 0.8798830801083944, "grad_norm": 1.0506883955268131, "learning_rate": 4.742350938849022e-06, "loss": 0.4014, "step": 14449 }, { "epoch": 0.8799439758852724, "grad_norm": 0.9895280230595283, "learning_rate": 4.742315655871888e-06, "loss": 0.403, "step": 14450 }, { "epoch": 0.8800048716621502, "grad_norm": 1.105224283559004, "learning_rate": 4.74228037061033e-06, "loss": 0.3648, "step": 14451 }, { "epoch": 0.8800657674390281, "grad_norm": 0.8947364629666656, "learning_rate": 4.742245083064383e-06, "loss": 0.4656, "step": 14452 }, { "epoch": 0.8801266632159059, "grad_norm": 0.9453040101969821, "learning_rate": 4.742209793234084e-06, "loss": 0.4599, "step": 14453 }, { "epoch": 0.8801875589927839, "grad_norm": 1.0216202012038815, "learning_rate": 4.742174501119468e-06, "loss": 0.3395, "step": 14454 }, { "epoch": 0.8802484547696617, "grad_norm": 1.0131361951322082, "learning_rate": 4.742139206720571e-06, "loss": 0.4658, "step": 14455 }, { "epoch": 0.8803093505465396, "grad_norm": 0.935257081619263, "learning_rate": 4.7421039100374286e-06, "loss": 0.4562, "step": 14456 }, { "epoch": 0.8803702463234174, "grad_norm": 1.0259473946453035, "learning_rate": 4.742068611070079e-06, "loss": 0.3848, "step": 14457 }, { "epoch": 0.8804311421002954, "grad_norm": 0.9720554818087248, "learning_rate": 4.742033309818556e-06, "loss": 0.3811, "step": 14458 }, { "epoch": 0.8804920378771732, "grad_norm": 0.9951536421472483, "learning_rate": 4.741998006282895e-06, "loss": 0.3777, "step": 14459 }, { "epoch": 0.8805529336540511, "grad_norm": 0.9388363141604015, "learning_rate": 4.741962700463134e-06, "loss": 0.4092, "step": 14460 }, { "epoch": 0.8806138294309289, "grad_norm": 0.9625898568113833, "learning_rate": 4.741927392359308e-06, "loss": 0.4184, "step": 14461 }, { "epoch": 0.8806747252078069, "grad_norm": 1.003878293621341, "learning_rate": 4.741892081971452e-06, "loss": 0.4249, "step": 14462 }, { "epoch": 0.8807356209846847, "grad_norm": 0.9551810673544985, "learning_rate": 4.7418567692996045e-06, "loss": 0.4684, "step": 14463 }, { "epoch": 0.8807965167615626, "grad_norm": 0.9891350787541618, "learning_rate": 4.741821454343799e-06, "loss": 0.3722, "step": 14464 }, { "epoch": 0.8808574125384404, "grad_norm": 1.0753429091806381, "learning_rate": 4.741786137104072e-06, "loss": 0.4401, "step": 14465 }, { "epoch": 0.8809183083153184, "grad_norm": 1.0516177667003155, "learning_rate": 4.7417508175804605e-06, "loss": 0.4251, "step": 14466 }, { "epoch": 0.8809792040921962, "grad_norm": 1.0719702357771819, "learning_rate": 4.7417154957729996e-06, "loss": 0.4327, "step": 14467 }, { "epoch": 0.8810400998690741, "grad_norm": 1.1418843012405901, "learning_rate": 4.741680171681726e-06, "loss": 0.3783, "step": 14468 }, { "epoch": 0.8811009956459519, "grad_norm": 1.0099262637994455, "learning_rate": 4.7416448453066755e-06, "loss": 0.4486, "step": 14469 }, { "epoch": 0.8811618914228299, "grad_norm": 1.0392709085390979, "learning_rate": 4.741609516647883e-06, "loss": 0.3912, "step": 14470 }, { "epoch": 0.8812227871997077, "grad_norm": 1.076457762524847, "learning_rate": 4.741574185705386e-06, "loss": 0.4055, "step": 14471 }, { "epoch": 0.8812836829765855, "grad_norm": 1.036784156645465, "learning_rate": 4.741538852479219e-06, "loss": 0.3681, "step": 14472 }, { "epoch": 0.8813445787534634, "grad_norm": 1.033463375381724, "learning_rate": 4.741503516969419e-06, "loss": 0.4232, "step": 14473 }, { "epoch": 0.8814054745303413, "grad_norm": 1.0212112956976376, "learning_rate": 4.741468179176023e-06, "loss": 0.3746, "step": 14474 }, { "epoch": 0.8814663703072192, "grad_norm": 0.9799223306723667, "learning_rate": 4.741432839099065e-06, "loss": 0.3985, "step": 14475 }, { "epoch": 0.881527266084097, "grad_norm": 1.0787352389293985, "learning_rate": 4.741397496738581e-06, "loss": 0.4216, "step": 14476 }, { "epoch": 0.881588161860975, "grad_norm": 0.9513724893172208, "learning_rate": 4.741362152094609e-06, "loss": 0.4241, "step": 14477 }, { "epoch": 0.8816490576378528, "grad_norm": 1.0153780941122073, "learning_rate": 4.7413268051671835e-06, "loss": 0.4313, "step": 14478 }, { "epoch": 0.8817099534147307, "grad_norm": 1.0002500872878237, "learning_rate": 4.741291455956341e-06, "loss": 0.3805, "step": 14479 }, { "epoch": 0.8817708491916085, "grad_norm": 1.0430512321235892, "learning_rate": 4.741256104462117e-06, "loss": 0.4516, "step": 14480 }, { "epoch": 0.8818317449684865, "grad_norm": 0.9621022429803102, "learning_rate": 4.741220750684547e-06, "loss": 0.4294, "step": 14481 }, { "epoch": 0.8818926407453643, "grad_norm": 0.9743183864822187, "learning_rate": 4.741185394623669e-06, "loss": 0.3455, "step": 14482 }, { "epoch": 0.8819535365222422, "grad_norm": 0.994725342386019, "learning_rate": 4.741150036279518e-06, "loss": 0.445, "step": 14483 }, { "epoch": 0.88201443229912, "grad_norm": 1.0007569098127698, "learning_rate": 4.74111467565213e-06, "loss": 0.3869, "step": 14484 }, { "epoch": 0.882075328075998, "grad_norm": 0.9862990478071, "learning_rate": 4.7410793127415415e-06, "loss": 0.4386, "step": 14485 }, { "epoch": 0.8821362238528758, "grad_norm": 0.9269452060136568, "learning_rate": 4.741043947547787e-06, "loss": 0.3801, "step": 14486 }, { "epoch": 0.8821971196297537, "grad_norm": 0.9752199397949666, "learning_rate": 4.741008580070904e-06, "loss": 0.4088, "step": 14487 }, { "epoch": 0.8822580154066315, "grad_norm": 1.0385083128285115, "learning_rate": 4.740973210310927e-06, "loss": 0.3753, "step": 14488 }, { "epoch": 0.8823189111835095, "grad_norm": 1.1016489575715414, "learning_rate": 4.7409378382678945e-06, "loss": 0.3967, "step": 14489 }, { "epoch": 0.8823798069603873, "grad_norm": 1.0943107943224744, "learning_rate": 4.7409024639418404e-06, "loss": 0.4531, "step": 14490 }, { "epoch": 0.8824407027372652, "grad_norm": 0.9672239679121124, "learning_rate": 4.740867087332801e-06, "loss": 0.4118, "step": 14491 }, { "epoch": 0.882501598514143, "grad_norm": 0.960082584214183, "learning_rate": 4.7408317084408136e-06, "loss": 0.4057, "step": 14492 }, { "epoch": 0.882562494291021, "grad_norm": 0.9730187147699458, "learning_rate": 4.740796327265914e-06, "loss": 0.348, "step": 14493 }, { "epoch": 0.8826233900678988, "grad_norm": 1.075373567106094, "learning_rate": 4.740760943808136e-06, "loss": 0.4804, "step": 14494 }, { "epoch": 0.8826842858447767, "grad_norm": 0.9690075145166939, "learning_rate": 4.740725558067519e-06, "loss": 0.3885, "step": 14495 }, { "epoch": 0.8827451816216545, "grad_norm": 0.9762235285553776, "learning_rate": 4.7406901700440964e-06, "loss": 0.464, "step": 14496 }, { "epoch": 0.8828060773985325, "grad_norm": 0.9712096347198031, "learning_rate": 4.740654779737905e-06, "loss": 0.4031, "step": 14497 }, { "epoch": 0.8828669731754103, "grad_norm": 0.9469470136721643, "learning_rate": 4.740619387148982e-06, "loss": 0.4568, "step": 14498 }, { "epoch": 0.8829278689522881, "grad_norm": 0.9335031815847059, "learning_rate": 4.740583992277362e-06, "loss": 0.526, "step": 14499 }, { "epoch": 0.882988764729166, "grad_norm": 0.9942093896989689, "learning_rate": 4.740548595123082e-06, "loss": 0.462, "step": 14500 }, { "epoch": 0.8830496605060439, "grad_norm": 0.955547330543164, "learning_rate": 4.7405131956861775e-06, "loss": 0.4061, "step": 14501 }, { "epoch": 0.8831105562829218, "grad_norm": 1.0464130323653558, "learning_rate": 4.740477793966685e-06, "loss": 0.4067, "step": 14502 }, { "epoch": 0.8831714520597996, "grad_norm": 0.9780745999920086, "learning_rate": 4.740442389964639e-06, "loss": 0.4259, "step": 14503 }, { "epoch": 0.8832323478366775, "grad_norm": 0.9512465893580787, "learning_rate": 4.7404069836800785e-06, "loss": 0.4266, "step": 14504 }, { "epoch": 0.8832932436135554, "grad_norm": 1.0665511732805326, "learning_rate": 4.740371575113037e-06, "loss": 0.4193, "step": 14505 }, { "epoch": 0.8833541393904333, "grad_norm": 1.1521430298133832, "learning_rate": 4.740336164263551e-06, "loss": 0.4075, "step": 14506 }, { "epoch": 0.8834150351673111, "grad_norm": 0.9126513460664654, "learning_rate": 4.740300751131659e-06, "loss": 0.4712, "step": 14507 }, { "epoch": 0.883475930944189, "grad_norm": 0.9889226756543323, "learning_rate": 4.740265335717394e-06, "loss": 0.4268, "step": 14508 }, { "epoch": 0.8835368267210669, "grad_norm": 1.0668023546145835, "learning_rate": 4.740229918020793e-06, "loss": 0.4045, "step": 14509 }, { "epoch": 0.8835977224979448, "grad_norm": 0.8995727790628836, "learning_rate": 4.740194498041893e-06, "loss": 0.4514, "step": 14510 }, { "epoch": 0.8836586182748226, "grad_norm": 0.9223442167738378, "learning_rate": 4.740159075780729e-06, "loss": 0.3788, "step": 14511 }, { "epoch": 0.8837195140517005, "grad_norm": 0.929875080616968, "learning_rate": 4.740123651237337e-06, "loss": 0.4404, "step": 14512 }, { "epoch": 0.8837804098285784, "grad_norm": 1.0517312122988491, "learning_rate": 4.740088224411754e-06, "loss": 0.4, "step": 14513 }, { "epoch": 0.8838413056054563, "grad_norm": 1.0797091997367152, "learning_rate": 4.740052795304017e-06, "loss": 0.4398, "step": 14514 }, { "epoch": 0.8839022013823341, "grad_norm": 1.0009173508664315, "learning_rate": 4.7400173639141585e-06, "loss": 0.3526, "step": 14515 }, { "epoch": 0.8839630971592121, "grad_norm": 0.9956458884866951, "learning_rate": 4.739981930242218e-06, "loss": 0.4336, "step": 14516 }, { "epoch": 0.8840239929360899, "grad_norm": 0.9334818039767662, "learning_rate": 4.739946494288231e-06, "loss": 0.5231, "step": 14517 }, { "epoch": 0.8840848887129678, "grad_norm": 1.0611088204710637, "learning_rate": 4.739911056052233e-06, "loss": 0.3727, "step": 14518 }, { "epoch": 0.8841457844898456, "grad_norm": 1.0033002277385017, "learning_rate": 4.7398756155342596e-06, "loss": 0.4419, "step": 14519 }, { "epoch": 0.8842066802667236, "grad_norm": 0.9812325064032946, "learning_rate": 4.7398401727343474e-06, "loss": 0.4221, "step": 14520 }, { "epoch": 0.8842675760436014, "grad_norm": 0.9744488003566376, "learning_rate": 4.739804727652533e-06, "loss": 0.464, "step": 14521 }, { "epoch": 0.8843284718204792, "grad_norm": 1.0097808030990718, "learning_rate": 4.739769280288852e-06, "loss": 0.3958, "step": 14522 }, { "epoch": 0.8843893675973571, "grad_norm": 1.035779224903127, "learning_rate": 4.739733830643341e-06, "loss": 0.4384, "step": 14523 }, { "epoch": 0.884450263374235, "grad_norm": 1.0814706857304213, "learning_rate": 4.739698378716035e-06, "loss": 0.3515, "step": 14524 }, { "epoch": 0.8845111591511129, "grad_norm": 1.0000554484321795, "learning_rate": 4.739662924506971e-06, "loss": 0.4392, "step": 14525 }, { "epoch": 0.8845720549279907, "grad_norm": 1.0113120509901972, "learning_rate": 4.7396274680161855e-06, "loss": 0.4121, "step": 14526 }, { "epoch": 0.8846329507048686, "grad_norm": 1.0409825449464758, "learning_rate": 4.739592009243713e-06, "loss": 0.3944, "step": 14527 }, { "epoch": 0.8846938464817465, "grad_norm": 1.0028406306299844, "learning_rate": 4.739556548189592e-06, "loss": 0.4013, "step": 14528 }, { "epoch": 0.8847547422586244, "grad_norm": 1.068934250744627, "learning_rate": 4.739521084853857e-06, "loss": 0.3823, "step": 14529 }, { "epoch": 0.8848156380355022, "grad_norm": 0.9517523908959626, "learning_rate": 4.739485619236544e-06, "loss": 0.4481, "step": 14530 }, { "epoch": 0.8848765338123801, "grad_norm": 0.9780323110317466, "learning_rate": 4.73945015133769e-06, "loss": 0.407, "step": 14531 }, { "epoch": 0.884937429589258, "grad_norm": 1.0108050103446857, "learning_rate": 4.7394146811573305e-06, "loss": 0.3992, "step": 14532 }, { "epoch": 0.8849983253661359, "grad_norm": 1.0877438190450344, "learning_rate": 4.739379208695502e-06, "loss": 0.4209, "step": 14533 }, { "epoch": 0.8850592211430137, "grad_norm": 1.028004449326137, "learning_rate": 4.739343733952241e-06, "loss": 0.4361, "step": 14534 }, { "epoch": 0.8851201169198916, "grad_norm": 0.9624677112586669, "learning_rate": 4.739308256927582e-06, "loss": 0.4773, "step": 14535 }, { "epoch": 0.8851810126967695, "grad_norm": 0.9280116839476207, "learning_rate": 4.739272777621563e-06, "loss": 0.4672, "step": 14536 }, { "epoch": 0.8852419084736474, "grad_norm": 0.9535842624709401, "learning_rate": 4.73923729603422e-06, "loss": 0.4545, "step": 14537 }, { "epoch": 0.8853028042505252, "grad_norm": 1.0244863569298093, "learning_rate": 4.739201812165588e-06, "loss": 0.2957, "step": 14538 }, { "epoch": 0.8853637000274031, "grad_norm": 1.051072244332287, "learning_rate": 4.739166326015704e-06, "loss": 0.433, "step": 14539 }, { "epoch": 0.885424595804281, "grad_norm": 1.0650461557430668, "learning_rate": 4.7391308375846035e-06, "loss": 0.3717, "step": 14540 }, { "epoch": 0.8854854915811589, "grad_norm": 0.996565039582909, "learning_rate": 4.739095346872324e-06, "loss": 0.4119, "step": 14541 }, { "epoch": 0.8855463873580367, "grad_norm": 1.045540283584445, "learning_rate": 4.739059853878899e-06, "loss": 0.4147, "step": 14542 }, { "epoch": 0.8856072831349145, "grad_norm": 1.0123750728287209, "learning_rate": 4.739024358604368e-06, "loss": 0.4381, "step": 14543 }, { "epoch": 0.8856681789117925, "grad_norm": 0.968466843738504, "learning_rate": 4.7389888610487645e-06, "loss": 0.4439, "step": 14544 }, { "epoch": 0.8857290746886703, "grad_norm": 0.9092569144038399, "learning_rate": 4.738953361212126e-06, "loss": 0.4834, "step": 14545 }, { "epoch": 0.8857899704655482, "grad_norm": 0.9112460649501349, "learning_rate": 4.738917859094488e-06, "loss": 0.4395, "step": 14546 }, { "epoch": 0.885850866242426, "grad_norm": 1.0231523199334638, "learning_rate": 4.738882354695888e-06, "loss": 0.3921, "step": 14547 }, { "epoch": 0.885911762019304, "grad_norm": 1.0413209133824624, "learning_rate": 4.73884684801636e-06, "loss": 0.3243, "step": 14548 }, { "epoch": 0.8859726577961818, "grad_norm": 0.9371246234564123, "learning_rate": 4.738811339055943e-06, "loss": 0.4432, "step": 14549 }, { "epoch": 0.8860335535730597, "grad_norm": 1.0524331633837578, "learning_rate": 4.73877582781467e-06, "loss": 0.4003, "step": 14550 }, { "epoch": 0.8860944493499375, "grad_norm": 1.015795811502872, "learning_rate": 4.73874031429258e-06, "loss": 0.4075, "step": 14551 }, { "epoch": 0.8861553451268155, "grad_norm": 0.9708388627278547, "learning_rate": 4.738704798489707e-06, "loss": 0.434, "step": 14552 }, { "epoch": 0.8862162409036933, "grad_norm": 0.9300749920371756, "learning_rate": 4.738669280406088e-06, "loss": 0.4218, "step": 14553 }, { "epoch": 0.8862771366805712, "grad_norm": 0.976299281692475, "learning_rate": 4.738633760041761e-06, "loss": 0.4097, "step": 14554 }, { "epoch": 0.886338032457449, "grad_norm": 1.0538313456559718, "learning_rate": 4.738598237396759e-06, "loss": 0.477, "step": 14555 }, { "epoch": 0.886398928234327, "grad_norm": 0.9879550330562429, "learning_rate": 4.738562712471119e-06, "loss": 0.4551, "step": 14556 }, { "epoch": 0.8864598240112048, "grad_norm": 0.9578665359830799, "learning_rate": 4.7385271852648785e-06, "loss": 0.3763, "step": 14557 }, { "epoch": 0.8865207197880827, "grad_norm": 0.9067445593756864, "learning_rate": 4.738491655778074e-06, "loss": 0.491, "step": 14558 }, { "epoch": 0.8865816155649606, "grad_norm": 0.9731341255582402, "learning_rate": 4.73845612401074e-06, "loss": 0.3917, "step": 14559 }, { "epoch": 0.8866425113418385, "grad_norm": 0.9339879793126641, "learning_rate": 4.738420589962912e-06, "loss": 0.4691, "step": 14560 }, { "epoch": 0.8867034071187163, "grad_norm": 1.0560254196146126, "learning_rate": 4.73838505363463e-06, "loss": 0.3551, "step": 14561 }, { "epoch": 0.8867643028955942, "grad_norm": 1.0649635513873796, "learning_rate": 4.738349515025926e-06, "loss": 0.365, "step": 14562 }, { "epoch": 0.8868251986724721, "grad_norm": 0.9748495733102497, "learning_rate": 4.738313974136839e-06, "loss": 0.4319, "step": 14563 }, { "epoch": 0.88688609444935, "grad_norm": 0.988174441167375, "learning_rate": 4.738278430967405e-06, "loss": 0.3858, "step": 14564 }, { "epoch": 0.8869469902262278, "grad_norm": 1.1106843820268293, "learning_rate": 4.7382428855176585e-06, "loss": 0.3797, "step": 14565 }, { "epoch": 0.8870078860031057, "grad_norm": 0.9940972027690443, "learning_rate": 4.738207337787637e-06, "loss": 0.3502, "step": 14566 }, { "epoch": 0.8870687817799836, "grad_norm": 0.9880489497593371, "learning_rate": 4.7381717877773756e-06, "loss": 0.4851, "step": 14567 }, { "epoch": 0.8871296775568615, "grad_norm": 0.9139355845570356, "learning_rate": 4.738136235486912e-06, "loss": 0.4494, "step": 14568 }, { "epoch": 0.8871905733337393, "grad_norm": 1.0939381003043427, "learning_rate": 4.7381006809162825e-06, "loss": 0.3689, "step": 14569 }, { "epoch": 0.8872514691106171, "grad_norm": 0.9686008646424754, "learning_rate": 4.73806512406552e-06, "loss": 0.4243, "step": 14570 }, { "epoch": 0.8873123648874951, "grad_norm": 0.9796330666161722, "learning_rate": 4.7380295649346664e-06, "loss": 0.3928, "step": 14571 }, { "epoch": 0.8873732606643729, "grad_norm": 1.0046364993327526, "learning_rate": 4.737994003523752e-06, "loss": 0.3906, "step": 14572 }, { "epoch": 0.8874341564412508, "grad_norm": 0.9152075073051713, "learning_rate": 4.737958439832818e-06, "loss": 0.4662, "step": 14573 }, { "epoch": 0.8874950522181286, "grad_norm": 1.013044527235108, "learning_rate": 4.737922873861898e-06, "loss": 0.396, "step": 14574 }, { "epoch": 0.8875559479950066, "grad_norm": 1.0499203970147273, "learning_rate": 4.737887305611028e-06, "loss": 0.3929, "step": 14575 }, { "epoch": 0.8876168437718844, "grad_norm": 0.9450213899542637, "learning_rate": 4.737851735080245e-06, "loss": 0.407, "step": 14576 }, { "epoch": 0.8876777395487623, "grad_norm": 1.015764705188937, "learning_rate": 4.737816162269586e-06, "loss": 0.471, "step": 14577 }, { "epoch": 0.8877386353256401, "grad_norm": 0.9533202782802459, "learning_rate": 4.737780587179086e-06, "loss": 0.3714, "step": 14578 }, { "epoch": 0.8877995311025181, "grad_norm": 0.9226234147268944, "learning_rate": 4.7377450098087814e-06, "loss": 0.4274, "step": 14579 }, { "epoch": 0.8878604268793959, "grad_norm": 1.0396868897339313, "learning_rate": 4.737709430158709e-06, "loss": 0.3849, "step": 14580 }, { "epoch": 0.8879213226562738, "grad_norm": 0.9961534043787854, "learning_rate": 4.737673848228905e-06, "loss": 0.4264, "step": 14581 }, { "epoch": 0.8879822184331516, "grad_norm": 1.0009029228484168, "learning_rate": 4.737638264019406e-06, "loss": 0.3365, "step": 14582 }, { "epoch": 0.8880431142100296, "grad_norm": 1.0152726504596241, "learning_rate": 4.737602677530246e-06, "loss": 0.3612, "step": 14583 }, { "epoch": 0.8881040099869074, "grad_norm": 1.1067812720613983, "learning_rate": 4.737567088761463e-06, "loss": 0.3433, "step": 14584 }, { "epoch": 0.8881649057637853, "grad_norm": 0.9979882513661466, "learning_rate": 4.737531497713094e-06, "loss": 0.4591, "step": 14585 }, { "epoch": 0.8882258015406631, "grad_norm": 0.9514773959268985, "learning_rate": 4.737495904385174e-06, "loss": 0.5134, "step": 14586 }, { "epoch": 0.8882866973175411, "grad_norm": 1.0492515787383712, "learning_rate": 4.73746030877774e-06, "loss": 0.4016, "step": 14587 }, { "epoch": 0.8883475930944189, "grad_norm": 1.1161398062167265, "learning_rate": 4.737424710890829e-06, "loss": 0.3593, "step": 14588 }, { "epoch": 0.8884084888712968, "grad_norm": 1.0431128722378011, "learning_rate": 4.737389110724474e-06, "loss": 0.3768, "step": 14589 }, { "epoch": 0.8884693846481746, "grad_norm": 1.0005263833416898, "learning_rate": 4.7373535082787146e-06, "loss": 0.3559, "step": 14590 }, { "epoch": 0.8885302804250526, "grad_norm": 0.9214437359646214, "learning_rate": 4.737317903553587e-06, "loss": 0.4516, "step": 14591 }, { "epoch": 0.8885911762019304, "grad_norm": 0.998183505884088, "learning_rate": 4.737282296549125e-06, "loss": 0.4453, "step": 14592 }, { "epoch": 0.8886520719788082, "grad_norm": 0.9548685817502276, "learning_rate": 4.737246687265367e-06, "loss": 0.4398, "step": 14593 }, { "epoch": 0.8887129677556861, "grad_norm": 0.9961354594993673, "learning_rate": 4.7372110757023485e-06, "loss": 0.3978, "step": 14594 }, { "epoch": 0.888773863532564, "grad_norm": 1.035383326108782, "learning_rate": 4.737175461860105e-06, "loss": 0.3908, "step": 14595 }, { "epoch": 0.8888347593094419, "grad_norm": 1.022749155360547, "learning_rate": 4.737139845738674e-06, "loss": 0.3792, "step": 14596 }, { "epoch": 0.8888956550863197, "grad_norm": 0.9357577114958264, "learning_rate": 4.7371042273380925e-06, "loss": 0.4248, "step": 14597 }, { "epoch": 0.8889565508631977, "grad_norm": 1.0079603174449427, "learning_rate": 4.737068606658395e-06, "loss": 0.4077, "step": 14598 }, { "epoch": 0.8890174466400755, "grad_norm": 1.003386186318365, "learning_rate": 4.737032983699618e-06, "loss": 0.3684, "step": 14599 }, { "epoch": 0.8890783424169534, "grad_norm": 0.9742030279061273, "learning_rate": 4.736997358461799e-06, "loss": 0.4047, "step": 14600 }, { "epoch": 0.8891392381938312, "grad_norm": 0.9460953874903515, "learning_rate": 4.736961730944973e-06, "loss": 0.4052, "step": 14601 }, { "epoch": 0.8892001339707092, "grad_norm": 0.9936709614953948, "learning_rate": 4.736926101149178e-06, "loss": 0.3533, "step": 14602 }, { "epoch": 0.889261029747587, "grad_norm": 0.9227494887170096, "learning_rate": 4.736890469074449e-06, "loss": 0.4544, "step": 14603 }, { "epoch": 0.8893219255244649, "grad_norm": 0.9274576375511996, "learning_rate": 4.7368548347208206e-06, "loss": 0.407, "step": 14604 }, { "epoch": 0.8893828213013427, "grad_norm": 0.9796836128178578, "learning_rate": 4.7368191980883325e-06, "loss": 0.3979, "step": 14605 }, { "epoch": 0.8894437170782207, "grad_norm": 1.0915524606697988, "learning_rate": 4.73678355917702e-06, "loss": 0.3814, "step": 14606 }, { "epoch": 0.8895046128550985, "grad_norm": 1.024656065511365, "learning_rate": 4.736747917986918e-06, "loss": 0.4274, "step": 14607 }, { "epoch": 0.8895655086319764, "grad_norm": 0.9998399522715631, "learning_rate": 4.736712274518064e-06, "loss": 0.3751, "step": 14608 }, { "epoch": 0.8896264044088542, "grad_norm": 1.0257726576492439, "learning_rate": 4.7366766287704944e-06, "loss": 0.4415, "step": 14609 }, { "epoch": 0.8896873001857322, "grad_norm": 1.0261594184745948, "learning_rate": 4.736640980744245e-06, "loss": 0.3986, "step": 14610 }, { "epoch": 0.88974819596261, "grad_norm": 1.0334604159601593, "learning_rate": 4.736605330439352e-06, "loss": 0.36, "step": 14611 }, { "epoch": 0.8898090917394879, "grad_norm": 0.994751901516981, "learning_rate": 4.736569677855852e-06, "loss": 0.4558, "step": 14612 }, { "epoch": 0.8898699875163657, "grad_norm": 1.019122242156888, "learning_rate": 4.736534022993782e-06, "loss": 0.4652, "step": 14613 }, { "epoch": 0.8899308832932437, "grad_norm": 0.9245441315251041, "learning_rate": 4.736498365853177e-06, "loss": 0.402, "step": 14614 }, { "epoch": 0.8899917790701215, "grad_norm": 0.9699428489998548, "learning_rate": 4.736462706434073e-06, "loss": 0.4314, "step": 14615 }, { "epoch": 0.8900526748469993, "grad_norm": 0.9132132429191736, "learning_rate": 4.736427044736509e-06, "loss": 0.4377, "step": 14616 }, { "epoch": 0.8901135706238772, "grad_norm": 1.006824293757874, "learning_rate": 4.7363913807605184e-06, "loss": 0.4566, "step": 14617 }, { "epoch": 0.8901744664007551, "grad_norm": 1.0670463485532913, "learning_rate": 4.73635571450614e-06, "loss": 0.3668, "step": 14618 }, { "epoch": 0.890235362177633, "grad_norm": 1.0242944012069313, "learning_rate": 4.736320045973407e-06, "loss": 0.3176, "step": 14619 }, { "epoch": 0.8902962579545108, "grad_norm": 1.0121866475918242, "learning_rate": 4.73628437516236e-06, "loss": 0.431, "step": 14620 }, { "epoch": 0.8903571537313887, "grad_norm": 0.994089111212936, "learning_rate": 4.7362487020730315e-06, "loss": 0.4041, "step": 14621 }, { "epoch": 0.8904180495082666, "grad_norm": 1.0005333840825674, "learning_rate": 4.73621302670546e-06, "loss": 0.407, "step": 14622 }, { "epoch": 0.8904789452851445, "grad_norm": 1.0018002921576594, "learning_rate": 4.73617734905968e-06, "loss": 0.3399, "step": 14623 }, { "epoch": 0.8905398410620223, "grad_norm": 0.9588241043248339, "learning_rate": 4.73614166913573e-06, "loss": 0.4644, "step": 14624 }, { "epoch": 0.8906007368389002, "grad_norm": 0.9648544430104163, "learning_rate": 4.736105986933645e-06, "loss": 0.4308, "step": 14625 }, { "epoch": 0.8906616326157781, "grad_norm": 0.980354168900484, "learning_rate": 4.736070302453462e-06, "loss": 0.4581, "step": 14626 }, { "epoch": 0.890722528392656, "grad_norm": 1.1266998185458954, "learning_rate": 4.736034615695217e-06, "loss": 0.4156, "step": 14627 }, { "epoch": 0.8907834241695338, "grad_norm": 1.002022123367537, "learning_rate": 4.735998926658946e-06, "loss": 0.3851, "step": 14628 }, { "epoch": 0.8908443199464117, "grad_norm": 0.9746119256458304, "learning_rate": 4.735963235344686e-06, "loss": 0.4715, "step": 14629 }, { "epoch": 0.8909052157232896, "grad_norm": 0.9924099913939048, "learning_rate": 4.735927541752473e-06, "loss": 0.4454, "step": 14630 }, { "epoch": 0.8909661115001675, "grad_norm": 1.0295877685709038, "learning_rate": 4.735891845882345e-06, "loss": 0.4584, "step": 14631 }, { "epoch": 0.8910270072770453, "grad_norm": 1.0280112256970313, "learning_rate": 4.7358561477343345e-06, "loss": 0.3963, "step": 14632 }, { "epoch": 0.8910879030539232, "grad_norm": 1.1537098841804116, "learning_rate": 4.7358204473084816e-06, "loss": 0.4103, "step": 14633 }, { "epoch": 0.8911487988308011, "grad_norm": 0.9859567136798626, "learning_rate": 4.735784744604821e-06, "loss": 0.4091, "step": 14634 }, { "epoch": 0.891209694607679, "grad_norm": 0.9956400892416488, "learning_rate": 4.73574903962339e-06, "loss": 0.4844, "step": 14635 }, { "epoch": 0.8912705903845568, "grad_norm": 0.8785410173930918, "learning_rate": 4.735713332364223e-06, "loss": 0.4777, "step": 14636 }, { "epoch": 0.8913314861614346, "grad_norm": 1.1514180265151868, "learning_rate": 4.735677622827359e-06, "loss": 0.3878, "step": 14637 }, { "epoch": 0.8913923819383126, "grad_norm": 0.9380558875321965, "learning_rate": 4.735641911012833e-06, "loss": 0.4564, "step": 14638 }, { "epoch": 0.8914532777151905, "grad_norm": 0.9584839151769291, "learning_rate": 4.735606196920681e-06, "loss": 0.4152, "step": 14639 }, { "epoch": 0.8915141734920683, "grad_norm": 0.9925864861992524, "learning_rate": 4.7355704805509395e-06, "loss": 0.3937, "step": 14640 }, { "epoch": 0.8915750692689463, "grad_norm": 0.9673594386249813, "learning_rate": 4.735534761903646e-06, "loss": 0.3844, "step": 14641 }, { "epoch": 0.8916359650458241, "grad_norm": 1.034176335610969, "learning_rate": 4.735499040978836e-06, "loss": 0.3648, "step": 14642 }, { "epoch": 0.8916968608227019, "grad_norm": 0.990426877633575, "learning_rate": 4.735463317776547e-06, "loss": 0.3715, "step": 14643 }, { "epoch": 0.8917577565995798, "grad_norm": 0.9706206319147549, "learning_rate": 4.735427592296813e-06, "loss": 0.4322, "step": 14644 }, { "epoch": 0.8918186523764577, "grad_norm": 1.0953443265410499, "learning_rate": 4.735391864539672e-06, "loss": 0.3846, "step": 14645 }, { "epoch": 0.8918795481533356, "grad_norm": 1.0611402412902053, "learning_rate": 4.73535613450516e-06, "loss": 0.404, "step": 14646 }, { "epoch": 0.8919404439302134, "grad_norm": 0.9673528231781577, "learning_rate": 4.735320402193315e-06, "loss": 0.4297, "step": 14647 }, { "epoch": 0.8920013397070913, "grad_norm": 1.0016450466391027, "learning_rate": 4.7352846676041706e-06, "loss": 0.4093, "step": 14648 }, { "epoch": 0.8920622354839692, "grad_norm": 1.0498053406900354, "learning_rate": 4.735248930737766e-06, "loss": 0.4057, "step": 14649 }, { "epoch": 0.8921231312608471, "grad_norm": 0.9697142725083524, "learning_rate": 4.735213191594136e-06, "loss": 0.421, "step": 14650 }, { "epoch": 0.8921840270377249, "grad_norm": 1.0404258118824574, "learning_rate": 4.735177450173316e-06, "loss": 0.3444, "step": 14651 }, { "epoch": 0.8922449228146028, "grad_norm": 1.0157197500918906, "learning_rate": 4.735141706475345e-06, "loss": 0.4117, "step": 14652 }, { "epoch": 0.8923058185914807, "grad_norm": 1.0414127420652828, "learning_rate": 4.735105960500258e-06, "loss": 0.476, "step": 14653 }, { "epoch": 0.8923667143683586, "grad_norm": 1.0291053424446857, "learning_rate": 4.735070212248091e-06, "loss": 0.4641, "step": 14654 }, { "epoch": 0.8924276101452364, "grad_norm": 1.006003237925429, "learning_rate": 4.735034461718881e-06, "loss": 0.4355, "step": 14655 }, { "epoch": 0.8924885059221143, "grad_norm": 0.9688892531018964, "learning_rate": 4.734998708912664e-06, "loss": 0.4336, "step": 14656 }, { "epoch": 0.8925494016989922, "grad_norm": 1.0659299277655565, "learning_rate": 4.7349629538294775e-06, "loss": 0.3986, "step": 14657 }, { "epoch": 0.8926102974758701, "grad_norm": 1.0549191479425488, "learning_rate": 4.734927196469356e-06, "loss": 0.3714, "step": 14658 }, { "epoch": 0.8926711932527479, "grad_norm": 1.067801520051494, "learning_rate": 4.734891436832339e-06, "loss": 0.3607, "step": 14659 }, { "epoch": 0.8927320890296258, "grad_norm": 1.0073310761697678, "learning_rate": 4.734855674918461e-06, "loss": 0.3763, "step": 14660 }, { "epoch": 0.8927929848065037, "grad_norm": 0.961418788464633, "learning_rate": 4.734819910727757e-06, "loss": 0.4342, "step": 14661 }, { "epoch": 0.8928538805833816, "grad_norm": 0.9470477728309236, "learning_rate": 4.734784144260265e-06, "loss": 0.4048, "step": 14662 }, { "epoch": 0.8929147763602594, "grad_norm": 1.0572607251670336, "learning_rate": 4.734748375516023e-06, "loss": 0.4263, "step": 14663 }, { "epoch": 0.8929756721371372, "grad_norm": 1.023993439182348, "learning_rate": 4.734712604495064e-06, "loss": 0.3604, "step": 14664 }, { "epoch": 0.8930365679140152, "grad_norm": 1.0543530250277289, "learning_rate": 4.734676831197427e-06, "loss": 0.4434, "step": 14665 }, { "epoch": 0.893097463690893, "grad_norm": 0.9716947185638822, "learning_rate": 4.7346410556231485e-06, "loss": 0.3986, "step": 14666 }, { "epoch": 0.8931583594677709, "grad_norm": 0.9679657080687325, "learning_rate": 4.734605277772263e-06, "loss": 0.4703, "step": 14667 }, { "epoch": 0.8932192552446487, "grad_norm": 1.0575564692730302, "learning_rate": 4.734569497644809e-06, "loss": 0.3912, "step": 14668 }, { "epoch": 0.8932801510215267, "grad_norm": 0.9727063874295306, "learning_rate": 4.734533715240821e-06, "loss": 0.3548, "step": 14669 }, { "epoch": 0.8933410467984045, "grad_norm": 1.0526341085795003, "learning_rate": 4.734497930560337e-06, "loss": 0.3915, "step": 14670 }, { "epoch": 0.8934019425752824, "grad_norm": 1.0130256445105974, "learning_rate": 4.734462143603393e-06, "loss": 0.3593, "step": 14671 }, { "epoch": 0.8934628383521602, "grad_norm": 0.9570914577395336, "learning_rate": 4.734426354370025e-06, "loss": 0.4474, "step": 14672 }, { "epoch": 0.8935237341290382, "grad_norm": 0.9781701993300214, "learning_rate": 4.734390562860271e-06, "loss": 0.4253, "step": 14673 }, { "epoch": 0.893584629905916, "grad_norm": 1.0142796074820146, "learning_rate": 4.734354769074165e-06, "loss": 0.384, "step": 14674 }, { "epoch": 0.8936455256827939, "grad_norm": 0.9513996891251644, "learning_rate": 4.7343189730117465e-06, "loss": 0.3911, "step": 14675 }, { "epoch": 0.8937064214596717, "grad_norm": 0.9842042834339912, "learning_rate": 4.734283174673049e-06, "loss": 0.4261, "step": 14676 }, { "epoch": 0.8937673172365497, "grad_norm": 1.048173042501158, "learning_rate": 4.734247374058111e-06, "loss": 0.4926, "step": 14677 }, { "epoch": 0.8938282130134275, "grad_norm": 0.9452615484103196, "learning_rate": 4.734211571166966e-06, "loss": 0.4533, "step": 14678 }, { "epoch": 0.8938891087903054, "grad_norm": 0.9990234184611058, "learning_rate": 4.734175765999656e-06, "loss": 0.47, "step": 14679 }, { "epoch": 0.8939500045671833, "grad_norm": 1.0048850342038116, "learning_rate": 4.734139958556212e-06, "loss": 0.3952, "step": 14680 }, { "epoch": 0.8940109003440612, "grad_norm": 1.0158039621397243, "learning_rate": 4.734104148836673e-06, "loss": 0.3723, "step": 14681 }, { "epoch": 0.894071796120939, "grad_norm": 0.9153864253838628, "learning_rate": 4.734068336841076e-06, "loss": 0.4445, "step": 14682 }, { "epoch": 0.8941326918978169, "grad_norm": 0.9327293362966655, "learning_rate": 4.734032522569456e-06, "loss": 0.415, "step": 14683 }, { "epoch": 0.8941935876746948, "grad_norm": 1.0569485098883837, "learning_rate": 4.7339967060218505e-06, "loss": 0.4291, "step": 14684 }, { "epoch": 0.8942544834515727, "grad_norm": 0.9852569916298514, "learning_rate": 4.733960887198295e-06, "loss": 0.4163, "step": 14685 }, { "epoch": 0.8943153792284505, "grad_norm": 1.0989183686945219, "learning_rate": 4.7339250660988265e-06, "loss": 0.473, "step": 14686 }, { "epoch": 0.8943762750053283, "grad_norm": 0.9252179304020337, "learning_rate": 4.7338892427234824e-06, "loss": 0.4169, "step": 14687 }, { "epoch": 0.8944371707822063, "grad_norm": 1.0134298606135095, "learning_rate": 4.733853417072298e-06, "loss": 0.3601, "step": 14688 }, { "epoch": 0.8944980665590841, "grad_norm": 1.0652894125660364, "learning_rate": 4.73381758914531e-06, "loss": 0.4269, "step": 14689 }, { "epoch": 0.894558962335962, "grad_norm": 1.192962807106631, "learning_rate": 4.733781758942556e-06, "loss": 0.384, "step": 14690 }, { "epoch": 0.8946198581128398, "grad_norm": 0.9207254629605826, "learning_rate": 4.733745926464071e-06, "loss": 0.4159, "step": 14691 }, { "epoch": 0.8946807538897178, "grad_norm": 0.9469570635078458, "learning_rate": 4.733710091709891e-06, "loss": 0.4357, "step": 14692 }, { "epoch": 0.8947416496665956, "grad_norm": 0.9995858184623855, "learning_rate": 4.733674254680055e-06, "loss": 0.3888, "step": 14693 }, { "epoch": 0.8948025454434735, "grad_norm": 0.9513177349981244, "learning_rate": 4.733638415374597e-06, "loss": 0.4136, "step": 14694 }, { "epoch": 0.8948634412203513, "grad_norm": 0.9445966473081283, "learning_rate": 4.733602573793555e-06, "loss": 0.4229, "step": 14695 }, { "epoch": 0.8949243369972293, "grad_norm": 1.0139849934168779, "learning_rate": 4.733566729936966e-06, "loss": 0.4581, "step": 14696 }, { "epoch": 0.8949852327741071, "grad_norm": 1.005606096421508, "learning_rate": 4.733530883804865e-06, "loss": 0.353, "step": 14697 }, { "epoch": 0.895046128550985, "grad_norm": 1.0675601201156681, "learning_rate": 4.733495035397289e-06, "loss": 0.4253, "step": 14698 }, { "epoch": 0.8951070243278628, "grad_norm": 1.0378307106960727, "learning_rate": 4.733459184714276e-06, "loss": 0.3358, "step": 14699 }, { "epoch": 0.8951679201047408, "grad_norm": 1.0607124607269152, "learning_rate": 4.733423331755859e-06, "loss": 0.4902, "step": 14700 }, { "epoch": 0.8952288158816186, "grad_norm": 1.0296967250532527, "learning_rate": 4.733387476522077e-06, "loss": 0.3611, "step": 14701 }, { "epoch": 0.8952897116584965, "grad_norm": 0.9864394851781558, "learning_rate": 4.733351619012968e-06, "loss": 0.5028, "step": 14702 }, { "epoch": 0.8953506074353743, "grad_norm": 1.117776067109716, "learning_rate": 4.733315759228566e-06, "loss": 0.3493, "step": 14703 }, { "epoch": 0.8954115032122523, "grad_norm": 1.0121899872909834, "learning_rate": 4.733279897168908e-06, "loss": 0.386, "step": 14704 }, { "epoch": 0.8954723989891301, "grad_norm": 0.9564989183900076, "learning_rate": 4.733244032834031e-06, "loss": 0.4569, "step": 14705 }, { "epoch": 0.895533294766008, "grad_norm": 0.9110541267131114, "learning_rate": 4.733208166223972e-06, "loss": 0.3976, "step": 14706 }, { "epoch": 0.8955941905428858, "grad_norm": 0.9727164074399459, "learning_rate": 4.733172297338766e-06, "loss": 0.4406, "step": 14707 }, { "epoch": 0.8956550863197638, "grad_norm": 1.0618540373469474, "learning_rate": 4.7331364261784515e-06, "loss": 0.3532, "step": 14708 }, { "epoch": 0.8957159820966416, "grad_norm": 0.9154899278624605, "learning_rate": 4.733100552743063e-06, "loss": 0.4731, "step": 14709 }, { "epoch": 0.8957768778735195, "grad_norm": 1.0227803423058903, "learning_rate": 4.733064677032639e-06, "loss": 0.3983, "step": 14710 }, { "epoch": 0.8958377736503973, "grad_norm": 0.9803137597237719, "learning_rate": 4.733028799047215e-06, "loss": 0.3979, "step": 14711 }, { "epoch": 0.8958986694272753, "grad_norm": 0.9287760626871222, "learning_rate": 4.732992918786828e-06, "loss": 0.3669, "step": 14712 }, { "epoch": 0.8959595652041531, "grad_norm": 0.9993217848536802, "learning_rate": 4.732957036251513e-06, "loss": 0.3857, "step": 14713 }, { "epoch": 0.8960204609810309, "grad_norm": 0.9462551372987617, "learning_rate": 4.732921151441308e-06, "loss": 0.4663, "step": 14714 }, { "epoch": 0.8960813567579088, "grad_norm": 1.0739304470077295, "learning_rate": 4.73288526435625e-06, "loss": 0.4159, "step": 14715 }, { "epoch": 0.8961422525347867, "grad_norm": 0.981191994806953, "learning_rate": 4.732849374996375e-06, "loss": 0.4423, "step": 14716 }, { "epoch": 0.8962031483116646, "grad_norm": 0.9984110732125402, "learning_rate": 4.732813483361719e-06, "loss": 0.3184, "step": 14717 }, { "epoch": 0.8962640440885424, "grad_norm": 1.0761881238140336, "learning_rate": 4.7327775894523196e-06, "loss": 0.3781, "step": 14718 }, { "epoch": 0.8963249398654203, "grad_norm": 1.0641899016074947, "learning_rate": 4.732741693268212e-06, "loss": 0.3845, "step": 14719 }, { "epoch": 0.8963858356422982, "grad_norm": 1.0054020560551704, "learning_rate": 4.732705794809434e-06, "loss": 0.3993, "step": 14720 }, { "epoch": 0.8964467314191761, "grad_norm": 1.0611634113338613, "learning_rate": 4.732669894076022e-06, "loss": 0.4468, "step": 14721 }, { "epoch": 0.8965076271960539, "grad_norm": 1.1170818422553623, "learning_rate": 4.732633991068012e-06, "loss": 0.4673, "step": 14722 }, { "epoch": 0.8965685229729319, "grad_norm": 0.991453295440396, "learning_rate": 4.732598085785441e-06, "loss": 0.4426, "step": 14723 }, { "epoch": 0.8966294187498097, "grad_norm": 1.0715156529665135, "learning_rate": 4.732562178228346e-06, "loss": 0.3926, "step": 14724 }, { "epoch": 0.8966903145266876, "grad_norm": 1.0419064972844359, "learning_rate": 4.732526268396762e-06, "loss": 0.3954, "step": 14725 }, { "epoch": 0.8967512103035654, "grad_norm": 0.9166140599744076, "learning_rate": 4.732490356290727e-06, "loss": 0.3853, "step": 14726 }, { "epoch": 0.8968121060804434, "grad_norm": 0.9231511803663452, "learning_rate": 4.732454441910278e-06, "loss": 0.4536, "step": 14727 }, { "epoch": 0.8968730018573212, "grad_norm": 0.9575226763746894, "learning_rate": 4.73241852525545e-06, "loss": 0.4033, "step": 14728 }, { "epoch": 0.8969338976341991, "grad_norm": 1.0010951786763047, "learning_rate": 4.732382606326281e-06, "loss": 0.3718, "step": 14729 }, { "epoch": 0.8969947934110769, "grad_norm": 1.0166352354157981, "learning_rate": 4.7323466851228065e-06, "loss": 0.3923, "step": 14730 }, { "epoch": 0.8970556891879549, "grad_norm": 1.0034345342145254, "learning_rate": 4.732310761645064e-06, "loss": 0.3744, "step": 14731 }, { "epoch": 0.8971165849648327, "grad_norm": 1.0100774760039217, "learning_rate": 4.732274835893089e-06, "loss": 0.3482, "step": 14732 }, { "epoch": 0.8971774807417106, "grad_norm": 0.929102043103311, "learning_rate": 4.73223890786692e-06, "loss": 0.4223, "step": 14733 }, { "epoch": 0.8972383765185884, "grad_norm": 0.9597049810394762, "learning_rate": 4.732202977566592e-06, "loss": 0.3863, "step": 14734 }, { "epoch": 0.8972992722954664, "grad_norm": 1.0724699609271977, "learning_rate": 4.732167044992142e-06, "loss": 0.4111, "step": 14735 }, { "epoch": 0.8973601680723442, "grad_norm": 1.0068181649095078, "learning_rate": 4.732131110143606e-06, "loss": 0.4028, "step": 14736 }, { "epoch": 0.897421063849222, "grad_norm": 1.0886380642541515, "learning_rate": 4.732095173021022e-06, "loss": 0.3789, "step": 14737 }, { "epoch": 0.8974819596260999, "grad_norm": 1.0239022753990905, "learning_rate": 4.732059233624426e-06, "loss": 0.4187, "step": 14738 }, { "epoch": 0.8975428554029778, "grad_norm": 1.1160763073008986, "learning_rate": 4.732023291953854e-06, "loss": 0.3623, "step": 14739 }, { "epoch": 0.8976037511798557, "grad_norm": 1.0418638385193137, "learning_rate": 4.731987348009342e-06, "loss": 0.3544, "step": 14740 }, { "epoch": 0.8976646469567335, "grad_norm": 1.0604419704570462, "learning_rate": 4.731951401790929e-06, "loss": 0.4797, "step": 14741 }, { "epoch": 0.8977255427336114, "grad_norm": 1.0459616370909977, "learning_rate": 4.73191545329865e-06, "loss": 0.3644, "step": 14742 }, { "epoch": 0.8977864385104893, "grad_norm": 1.0503479894051482, "learning_rate": 4.731879502532541e-06, "loss": 0.3587, "step": 14743 }, { "epoch": 0.8978473342873672, "grad_norm": 1.0438555838284191, "learning_rate": 4.731843549492642e-06, "loss": 0.3974, "step": 14744 }, { "epoch": 0.897908230064245, "grad_norm": 1.055149610260259, "learning_rate": 4.731807594178985e-06, "loss": 0.4164, "step": 14745 }, { "epoch": 0.8979691258411229, "grad_norm": 0.9914131120730288, "learning_rate": 4.73177163659161e-06, "loss": 0.421, "step": 14746 }, { "epoch": 0.8980300216180008, "grad_norm": 0.9707508112285465, "learning_rate": 4.731735676730552e-06, "loss": 0.4512, "step": 14747 }, { "epoch": 0.8980909173948787, "grad_norm": 0.869955730811528, "learning_rate": 4.731699714595847e-06, "loss": 0.4094, "step": 14748 }, { "epoch": 0.8981518131717565, "grad_norm": 1.008107194814038, "learning_rate": 4.731663750187535e-06, "loss": 0.4271, "step": 14749 }, { "epoch": 0.8982127089486344, "grad_norm": 1.0213011302098292, "learning_rate": 4.731627783505649e-06, "loss": 0.4712, "step": 14750 }, { "epoch": 0.8982736047255123, "grad_norm": 0.9542058023358428, "learning_rate": 4.731591814550227e-06, "loss": 0.4665, "step": 14751 }, { "epoch": 0.8983345005023902, "grad_norm": 0.9707793814382296, "learning_rate": 4.731555843321306e-06, "loss": 0.3811, "step": 14752 }, { "epoch": 0.898395396279268, "grad_norm": 1.0552986344716142, "learning_rate": 4.731519869818923e-06, "loss": 0.3998, "step": 14753 }, { "epoch": 0.8984562920561459, "grad_norm": 0.9807014870650854, "learning_rate": 4.731483894043112e-06, "loss": 0.4542, "step": 14754 }, { "epoch": 0.8985171878330238, "grad_norm": 1.1314993627728256, "learning_rate": 4.731447915993913e-06, "loss": 0.4667, "step": 14755 }, { "epoch": 0.8985780836099017, "grad_norm": 0.9142938121223234, "learning_rate": 4.731411935671361e-06, "loss": 0.432, "step": 14756 }, { "epoch": 0.8986389793867795, "grad_norm": 0.9913725748276836, "learning_rate": 4.731375953075492e-06, "loss": 0.3615, "step": 14757 }, { "epoch": 0.8986998751636573, "grad_norm": 0.8774392196655394, "learning_rate": 4.731339968206346e-06, "loss": 0.4273, "step": 14758 }, { "epoch": 0.8987607709405353, "grad_norm": 1.1518554928211755, "learning_rate": 4.731303981063955e-06, "loss": 0.3939, "step": 14759 }, { "epoch": 0.8988216667174131, "grad_norm": 1.0455817464090764, "learning_rate": 4.731267991648358e-06, "loss": 0.3957, "step": 14760 }, { "epoch": 0.898882562494291, "grad_norm": 1.042051472600766, "learning_rate": 4.731231999959592e-06, "loss": 0.4704, "step": 14761 }, { "epoch": 0.898943458271169, "grad_norm": 1.0015059015367607, "learning_rate": 4.731196005997694e-06, "loss": 0.3673, "step": 14762 }, { "epoch": 0.8990043540480468, "grad_norm": 1.0388060417198681, "learning_rate": 4.7311600097627e-06, "loss": 0.4578, "step": 14763 }, { "epoch": 0.8990652498249246, "grad_norm": 0.9964583984724342, "learning_rate": 4.7311240112546455e-06, "loss": 0.4447, "step": 14764 }, { "epoch": 0.8991261456018025, "grad_norm": 1.0195197892620804, "learning_rate": 4.731088010473568e-06, "loss": 0.3801, "step": 14765 }, { "epoch": 0.8991870413786804, "grad_norm": 1.038309092796527, "learning_rate": 4.731052007419505e-06, "loss": 0.4433, "step": 14766 }, { "epoch": 0.8992479371555583, "grad_norm": 1.0227243909538348, "learning_rate": 4.731016002092493e-06, "loss": 0.4236, "step": 14767 }, { "epoch": 0.8993088329324361, "grad_norm": 0.9666285478380208, "learning_rate": 4.730979994492568e-06, "loss": 0.411, "step": 14768 }, { "epoch": 0.899369728709314, "grad_norm": 0.9780046206951722, "learning_rate": 4.730943984619767e-06, "loss": 0.4905, "step": 14769 }, { "epoch": 0.8994306244861919, "grad_norm": 1.0567119155366314, "learning_rate": 4.7309079724741254e-06, "loss": 0.4109, "step": 14770 }, { "epoch": 0.8994915202630698, "grad_norm": 0.992353812060162, "learning_rate": 4.730871958055683e-06, "loss": 0.3801, "step": 14771 }, { "epoch": 0.8995524160399476, "grad_norm": 1.020114426196907, "learning_rate": 4.7308359413644735e-06, "loss": 0.3626, "step": 14772 }, { "epoch": 0.8996133118168255, "grad_norm": 1.1487624329203525, "learning_rate": 4.730799922400535e-06, "loss": 0.3897, "step": 14773 }, { "epoch": 0.8996742075937034, "grad_norm": 0.9925832630156053, "learning_rate": 4.730763901163904e-06, "loss": 0.3653, "step": 14774 }, { "epoch": 0.8997351033705813, "grad_norm": 0.9864631293247157, "learning_rate": 4.730727877654617e-06, "loss": 0.5114, "step": 14775 }, { "epoch": 0.8997959991474591, "grad_norm": 1.153742223786449, "learning_rate": 4.730691851872712e-06, "loss": 0.446, "step": 14776 }, { "epoch": 0.899856894924337, "grad_norm": 0.9973605172942891, "learning_rate": 4.7306558238182235e-06, "loss": 0.3845, "step": 14777 }, { "epoch": 0.8999177907012149, "grad_norm": 1.0194518511812543, "learning_rate": 4.730619793491189e-06, "loss": 0.4354, "step": 14778 }, { "epoch": 0.8999786864780928, "grad_norm": 0.9204705537472149, "learning_rate": 4.730583760891645e-06, "loss": 0.4347, "step": 14779 }, { "epoch": 0.9000395822549706, "grad_norm": 1.052898337078873, "learning_rate": 4.7305477260196295e-06, "loss": 0.3883, "step": 14780 }, { "epoch": 0.9001004780318484, "grad_norm": 0.9497108228291742, "learning_rate": 4.730511688875178e-06, "loss": 0.4295, "step": 14781 }, { "epoch": 0.9001613738087264, "grad_norm": 1.016478442477868, "learning_rate": 4.730475649458328e-06, "loss": 0.4333, "step": 14782 }, { "epoch": 0.9002222695856043, "grad_norm": 0.9726688595511491, "learning_rate": 4.730439607769115e-06, "loss": 0.4604, "step": 14783 }, { "epoch": 0.9002831653624821, "grad_norm": 1.121019950945538, "learning_rate": 4.730403563807577e-06, "loss": 0.4173, "step": 14784 }, { "epoch": 0.9003440611393599, "grad_norm": 1.0410375759983872, "learning_rate": 4.730367517573751e-06, "loss": 0.4372, "step": 14785 }, { "epoch": 0.9004049569162379, "grad_norm": 0.9305528610280218, "learning_rate": 4.730331469067672e-06, "loss": 0.5091, "step": 14786 }, { "epoch": 0.9004658526931157, "grad_norm": 0.9471030758191482, "learning_rate": 4.730295418289377e-06, "loss": 0.4621, "step": 14787 }, { "epoch": 0.9005267484699936, "grad_norm": 0.9306959989767687, "learning_rate": 4.730259365238904e-06, "loss": 0.4366, "step": 14788 }, { "epoch": 0.9005876442468714, "grad_norm": 0.9597833773202952, "learning_rate": 4.7302233099162895e-06, "loss": 0.3805, "step": 14789 }, { "epoch": 0.9006485400237494, "grad_norm": 1.0749522932404074, "learning_rate": 4.73018725232157e-06, "loss": 0.4183, "step": 14790 }, { "epoch": 0.9007094358006272, "grad_norm": 0.9990489339249387, "learning_rate": 4.730151192454781e-06, "loss": 0.3973, "step": 14791 }, { "epoch": 0.9007703315775051, "grad_norm": 1.0013704592495516, "learning_rate": 4.730115130315962e-06, "loss": 0.4219, "step": 14792 }, { "epoch": 0.9008312273543829, "grad_norm": 0.985527784796445, "learning_rate": 4.7300790659051475e-06, "loss": 0.3971, "step": 14793 }, { "epoch": 0.9008921231312609, "grad_norm": 0.9798391551459623, "learning_rate": 4.730042999222374e-06, "loss": 0.4729, "step": 14794 }, { "epoch": 0.9009530189081387, "grad_norm": 0.9363670129373202, "learning_rate": 4.73000693026768e-06, "loss": 0.4811, "step": 14795 }, { "epoch": 0.9010139146850166, "grad_norm": 1.0301040303411657, "learning_rate": 4.7299708590411005e-06, "loss": 0.4579, "step": 14796 }, { "epoch": 0.9010748104618944, "grad_norm": 1.015743690492752, "learning_rate": 4.729934785542673e-06, "loss": 0.4439, "step": 14797 }, { "epoch": 0.9011357062387724, "grad_norm": 0.9411048001831362, "learning_rate": 4.7298987097724344e-06, "loss": 0.384, "step": 14798 }, { "epoch": 0.9011966020156502, "grad_norm": 0.9372641697109522, "learning_rate": 4.729862631730423e-06, "loss": 0.5279, "step": 14799 }, { "epoch": 0.9012574977925281, "grad_norm": 1.0424258859928048, "learning_rate": 4.729826551416672e-06, "loss": 0.3492, "step": 14800 }, { "epoch": 0.9013183935694059, "grad_norm": 1.0005954731815978, "learning_rate": 4.72979046883122e-06, "loss": 0.4516, "step": 14801 }, { "epoch": 0.9013792893462839, "grad_norm": 1.089272883168513, "learning_rate": 4.729754383974105e-06, "loss": 0.391, "step": 14802 }, { "epoch": 0.9014401851231617, "grad_norm": 1.0737590575313611, "learning_rate": 4.729718296845362e-06, "loss": 0.3713, "step": 14803 }, { "epoch": 0.9015010809000396, "grad_norm": 0.9571207211635987, "learning_rate": 4.729682207445029e-06, "loss": 0.4572, "step": 14804 }, { "epoch": 0.9015619766769175, "grad_norm": 0.9990047028792214, "learning_rate": 4.729646115773141e-06, "loss": 0.3632, "step": 14805 }, { "epoch": 0.9016228724537954, "grad_norm": 1.003551484855728, "learning_rate": 4.729610021829737e-06, "loss": 0.4016, "step": 14806 }, { "epoch": 0.9016837682306732, "grad_norm": 1.0119161941646717, "learning_rate": 4.729573925614852e-06, "loss": 0.429, "step": 14807 }, { "epoch": 0.901744664007551, "grad_norm": 0.9737816529391146, "learning_rate": 4.729537827128523e-06, "loss": 0.4639, "step": 14808 }, { "epoch": 0.901805559784429, "grad_norm": 0.9910335454828204, "learning_rate": 4.729501726370788e-06, "loss": 0.4157, "step": 14809 }, { "epoch": 0.9018664555613068, "grad_norm": 1.0336034659241622, "learning_rate": 4.729465623341683e-06, "loss": 0.375, "step": 14810 }, { "epoch": 0.9019273513381847, "grad_norm": 0.9388442506544576, "learning_rate": 4.729429518041244e-06, "loss": 0.4744, "step": 14811 }, { "epoch": 0.9019882471150625, "grad_norm": 0.9475017236710784, "learning_rate": 4.729393410469509e-06, "loss": 0.3941, "step": 14812 }, { "epoch": 0.9020491428919405, "grad_norm": 0.8853918380031226, "learning_rate": 4.729357300626514e-06, "loss": 0.4197, "step": 14813 }, { "epoch": 0.9021100386688183, "grad_norm": 1.0385634609507775, "learning_rate": 4.729321188512297e-06, "loss": 0.3792, "step": 14814 }, { "epoch": 0.9021709344456962, "grad_norm": 0.944215505421361, "learning_rate": 4.729285074126894e-06, "loss": 0.4251, "step": 14815 }, { "epoch": 0.902231830222574, "grad_norm": 1.123894122655681, "learning_rate": 4.7292489574703405e-06, "loss": 0.4577, "step": 14816 }, { "epoch": 0.902292725999452, "grad_norm": 1.020361586872317, "learning_rate": 4.729212838542676e-06, "loss": 0.3863, "step": 14817 }, { "epoch": 0.9023536217763298, "grad_norm": 1.0914122132781925, "learning_rate": 4.729176717343934e-06, "loss": 0.397, "step": 14818 }, { "epoch": 0.9024145175532077, "grad_norm": 0.9390169430525022, "learning_rate": 4.7291405938741545e-06, "loss": 0.427, "step": 14819 }, { "epoch": 0.9024754133300855, "grad_norm": 0.9797231605951189, "learning_rate": 4.729104468133373e-06, "loss": 0.4605, "step": 14820 }, { "epoch": 0.9025363091069635, "grad_norm": 0.9655470762728652, "learning_rate": 4.7290683401216256e-06, "loss": 0.4123, "step": 14821 }, { "epoch": 0.9025972048838413, "grad_norm": 0.9626369490201921, "learning_rate": 4.72903220983895e-06, "loss": 0.3489, "step": 14822 }, { "epoch": 0.9026581006607192, "grad_norm": 0.9406730116807526, "learning_rate": 4.728996077285382e-06, "loss": 0.4052, "step": 14823 }, { "epoch": 0.902718996437597, "grad_norm": 0.9420504430850849, "learning_rate": 4.72895994246096e-06, "loss": 0.4306, "step": 14824 }, { "epoch": 0.902779892214475, "grad_norm": 0.9833369101726708, "learning_rate": 4.72892380536572e-06, "loss": 0.4253, "step": 14825 }, { "epoch": 0.9028407879913528, "grad_norm": 1.0193364339021498, "learning_rate": 4.728887665999698e-06, "loss": 0.4099, "step": 14826 }, { "epoch": 0.9029016837682307, "grad_norm": 0.9605888922180169, "learning_rate": 4.728851524362932e-06, "loss": 0.3976, "step": 14827 }, { "epoch": 0.9029625795451085, "grad_norm": 1.0090926751965617, "learning_rate": 4.728815380455458e-06, "loss": 0.4046, "step": 14828 }, { "epoch": 0.9030234753219865, "grad_norm": 1.0348669491286246, "learning_rate": 4.7287792342773146e-06, "loss": 0.4883, "step": 14829 }, { "epoch": 0.9030843710988643, "grad_norm": 0.9738730698790232, "learning_rate": 4.728743085828535e-06, "loss": 0.4454, "step": 14830 }, { "epoch": 0.9031452668757421, "grad_norm": 0.8365110598024909, "learning_rate": 4.72870693510916e-06, "loss": 0.4833, "step": 14831 }, { "epoch": 0.90320616265262, "grad_norm": 1.0372147133093788, "learning_rate": 4.728670782119224e-06, "loss": 0.427, "step": 14832 }, { "epoch": 0.903267058429498, "grad_norm": 1.0013894293994208, "learning_rate": 4.728634626858765e-06, "loss": 0.4732, "step": 14833 }, { "epoch": 0.9033279542063758, "grad_norm": 0.9582368589481151, "learning_rate": 4.72859846932782e-06, "loss": 0.4188, "step": 14834 }, { "epoch": 0.9033888499832536, "grad_norm": 0.9969409881088852, "learning_rate": 4.728562309526424e-06, "loss": 0.4663, "step": 14835 }, { "epoch": 0.9034497457601315, "grad_norm": 0.9838821781498216, "learning_rate": 4.728526147454615e-06, "loss": 0.4162, "step": 14836 }, { "epoch": 0.9035106415370094, "grad_norm": 1.0496801264888749, "learning_rate": 4.728489983112431e-06, "loss": 0.4072, "step": 14837 }, { "epoch": 0.9035715373138873, "grad_norm": 0.9652857517272564, "learning_rate": 4.728453816499907e-06, "loss": 0.4095, "step": 14838 }, { "epoch": 0.9036324330907651, "grad_norm": 1.085994976831757, "learning_rate": 4.728417647617081e-06, "loss": 0.325, "step": 14839 }, { "epoch": 0.903693328867643, "grad_norm": 1.0458537778075485, "learning_rate": 4.728381476463989e-06, "loss": 0.3917, "step": 14840 }, { "epoch": 0.9037542246445209, "grad_norm": 0.9855108911461644, "learning_rate": 4.7283453030406676e-06, "loss": 0.4566, "step": 14841 }, { "epoch": 0.9038151204213988, "grad_norm": 1.034201134530925, "learning_rate": 4.728309127347156e-06, "loss": 0.3468, "step": 14842 }, { "epoch": 0.9038760161982766, "grad_norm": 1.1342085995779174, "learning_rate": 4.7282729493834875e-06, "loss": 0.4119, "step": 14843 }, { "epoch": 0.9039369119751546, "grad_norm": 1.1159248996923845, "learning_rate": 4.728236769149702e-06, "loss": 0.363, "step": 14844 }, { "epoch": 0.9039978077520324, "grad_norm": 1.0032037054143343, "learning_rate": 4.728200586645836e-06, "loss": 0.4182, "step": 14845 }, { "epoch": 0.9040587035289103, "grad_norm": 1.0352299163496819, "learning_rate": 4.7281644018719245e-06, "loss": 0.3823, "step": 14846 }, { "epoch": 0.9041195993057881, "grad_norm": 1.0404633123947689, "learning_rate": 4.728128214828005e-06, "loss": 0.3801, "step": 14847 }, { "epoch": 0.9041804950826661, "grad_norm": 1.0010231774824867, "learning_rate": 4.728092025514116e-06, "loss": 0.397, "step": 14848 }, { "epoch": 0.9042413908595439, "grad_norm": 0.9075356793895574, "learning_rate": 4.728055833930292e-06, "loss": 0.4762, "step": 14849 }, { "epoch": 0.9043022866364218, "grad_norm": 0.9490866445502599, "learning_rate": 4.728019640076572e-06, "loss": 0.4392, "step": 14850 }, { "epoch": 0.9043631824132996, "grad_norm": 0.9732598437708929, "learning_rate": 4.727983443952991e-06, "loss": 0.3716, "step": 14851 }, { "epoch": 0.9044240781901776, "grad_norm": 1.0630556064486916, "learning_rate": 4.7279472455595875e-06, "loss": 0.3414, "step": 14852 }, { "epoch": 0.9044849739670554, "grad_norm": 0.9759529056694707, "learning_rate": 4.727911044896397e-06, "loss": 0.4099, "step": 14853 }, { "epoch": 0.9045458697439333, "grad_norm": 0.9944907989341534, "learning_rate": 4.727874841963458e-06, "loss": 0.3671, "step": 14854 }, { "epoch": 0.9046067655208111, "grad_norm": 1.0596977910478598, "learning_rate": 4.727838636760806e-06, "loss": 0.3602, "step": 14855 }, { "epoch": 0.904667661297689, "grad_norm": 1.028935036924502, "learning_rate": 4.727802429288478e-06, "loss": 0.3976, "step": 14856 }, { "epoch": 0.9047285570745669, "grad_norm": 0.9426398728100358, "learning_rate": 4.727766219546511e-06, "loss": 0.3739, "step": 14857 }, { "epoch": 0.9047894528514447, "grad_norm": 0.9583587547224909, "learning_rate": 4.727730007534943e-06, "loss": 0.4795, "step": 14858 }, { "epoch": 0.9048503486283226, "grad_norm": 1.0310276492831034, "learning_rate": 4.7276937932538095e-06, "loss": 0.3823, "step": 14859 }, { "epoch": 0.9049112444052005, "grad_norm": 1.0699780587773946, "learning_rate": 4.727657576703148e-06, "loss": 0.4116, "step": 14860 }, { "epoch": 0.9049721401820784, "grad_norm": 0.9736711275377722, "learning_rate": 4.727621357882995e-06, "loss": 0.4026, "step": 14861 }, { "epoch": 0.9050330359589562, "grad_norm": 1.0061309927956792, "learning_rate": 4.7275851367933875e-06, "loss": 0.3773, "step": 14862 }, { "epoch": 0.9050939317358341, "grad_norm": 0.9711557650724193, "learning_rate": 4.7275489134343635e-06, "loss": 0.4398, "step": 14863 }, { "epoch": 0.905154827512712, "grad_norm": 1.0024806697650968, "learning_rate": 4.727512687805959e-06, "loss": 0.4187, "step": 14864 }, { "epoch": 0.9052157232895899, "grad_norm": 1.0013663244918263, "learning_rate": 4.72747645990821e-06, "loss": 0.4306, "step": 14865 }, { "epoch": 0.9052766190664677, "grad_norm": 1.023665389934853, "learning_rate": 4.727440229741155e-06, "loss": 0.4026, "step": 14866 }, { "epoch": 0.9053375148433456, "grad_norm": 1.1612437642212712, "learning_rate": 4.727403997304829e-06, "loss": 0.3983, "step": 14867 }, { "epoch": 0.9053984106202235, "grad_norm": 1.027436407402743, "learning_rate": 4.727367762599271e-06, "loss": 0.4136, "step": 14868 }, { "epoch": 0.9054593063971014, "grad_norm": 0.9665913997564279, "learning_rate": 4.727331525624517e-06, "loss": 0.4425, "step": 14869 }, { "epoch": 0.9055202021739792, "grad_norm": 0.9969291776886547, "learning_rate": 4.727295286380604e-06, "loss": 0.4151, "step": 14870 }, { "epoch": 0.9055810979508571, "grad_norm": 0.950909838693013, "learning_rate": 4.727259044867569e-06, "loss": 0.3989, "step": 14871 }, { "epoch": 0.905641993727735, "grad_norm": 1.0029000498267313, "learning_rate": 4.727222801085448e-06, "loss": 0.4143, "step": 14872 }, { "epoch": 0.9057028895046129, "grad_norm": 1.0478629657393956, "learning_rate": 4.727186555034279e-06, "loss": 0.3761, "step": 14873 }, { "epoch": 0.9057637852814907, "grad_norm": 0.9498415255920489, "learning_rate": 4.727150306714099e-06, "loss": 0.4175, "step": 14874 }, { "epoch": 0.9058246810583686, "grad_norm": 0.9149214422196207, "learning_rate": 4.727114056124945e-06, "loss": 0.4417, "step": 14875 }, { "epoch": 0.9058855768352465, "grad_norm": 1.0313091451358256, "learning_rate": 4.727077803266853e-06, "loss": 0.3824, "step": 14876 }, { "epoch": 0.9059464726121244, "grad_norm": 1.0974724817162929, "learning_rate": 4.7270415481398606e-06, "loss": 0.4002, "step": 14877 }, { "epoch": 0.9060073683890022, "grad_norm": 1.0294902623499242, "learning_rate": 4.727005290744004e-06, "loss": 0.3516, "step": 14878 }, { "epoch": 0.90606826416588, "grad_norm": 1.0250604563254309, "learning_rate": 4.726969031079321e-06, "loss": 0.3738, "step": 14879 }, { "epoch": 0.906129159942758, "grad_norm": 0.9445220667005653, "learning_rate": 4.726932769145848e-06, "loss": 0.3809, "step": 14880 }, { "epoch": 0.9061900557196358, "grad_norm": 1.0904720058074975, "learning_rate": 4.726896504943623e-06, "loss": 0.3751, "step": 14881 }, { "epoch": 0.9062509514965137, "grad_norm": 1.0773976174477522, "learning_rate": 4.726860238472681e-06, "loss": 0.418, "step": 14882 }, { "epoch": 0.9063118472733915, "grad_norm": 0.9931250540890058, "learning_rate": 4.7268239697330615e-06, "loss": 0.4152, "step": 14883 }, { "epoch": 0.9063727430502695, "grad_norm": 0.9292463845702691, "learning_rate": 4.726787698724799e-06, "loss": 0.4611, "step": 14884 }, { "epoch": 0.9064336388271473, "grad_norm": 1.0711182794464353, "learning_rate": 4.726751425447932e-06, "loss": 0.444, "step": 14885 }, { "epoch": 0.9064945346040252, "grad_norm": 1.0490070329998662, "learning_rate": 4.7267151499024964e-06, "loss": 0.3973, "step": 14886 }, { "epoch": 0.9065554303809031, "grad_norm": 1.070607015584213, "learning_rate": 4.726678872088531e-06, "loss": 0.4148, "step": 14887 }, { "epoch": 0.906616326157781, "grad_norm": 1.0285693515721346, "learning_rate": 4.72664259200607e-06, "loss": 0.3866, "step": 14888 }, { "epoch": 0.9066772219346588, "grad_norm": 1.1383177830048818, "learning_rate": 4.726606309655152e-06, "loss": 0.4067, "step": 14889 }, { "epoch": 0.9067381177115367, "grad_norm": 1.0973012610933814, "learning_rate": 4.726570025035814e-06, "loss": 0.3492, "step": 14890 }, { "epoch": 0.9067990134884146, "grad_norm": 1.0579636713429232, "learning_rate": 4.726533738148093e-06, "loss": 0.3404, "step": 14891 }, { "epoch": 0.9068599092652925, "grad_norm": 1.077943751573989, "learning_rate": 4.726497448992026e-06, "loss": 0.4367, "step": 14892 }, { "epoch": 0.9069208050421703, "grad_norm": 0.9082715592367657, "learning_rate": 4.726461157567649e-06, "loss": 0.4658, "step": 14893 }, { "epoch": 0.9069817008190482, "grad_norm": 0.9722878506491679, "learning_rate": 4.726424863875e-06, "loss": 0.4444, "step": 14894 }, { "epoch": 0.9070425965959261, "grad_norm": 1.0301734846403434, "learning_rate": 4.726388567914115e-06, "loss": 0.3668, "step": 14895 }, { "epoch": 0.907103492372804, "grad_norm": 0.96866534592879, "learning_rate": 4.7263522696850325e-06, "loss": 0.3944, "step": 14896 }, { "epoch": 0.9071643881496818, "grad_norm": 0.9557392598218502, "learning_rate": 4.726315969187788e-06, "loss": 0.4246, "step": 14897 }, { "epoch": 0.9072252839265597, "grad_norm": 0.9711393365910623, "learning_rate": 4.72627966642242e-06, "loss": 0.418, "step": 14898 }, { "epoch": 0.9072861797034376, "grad_norm": 1.1109768572236163, "learning_rate": 4.726243361388963e-06, "loss": 0.3611, "step": 14899 }, { "epoch": 0.9073470754803155, "grad_norm": 1.0241070257837055, "learning_rate": 4.726207054087457e-06, "loss": 0.4072, "step": 14900 }, { "epoch": 0.9074079712571933, "grad_norm": 0.8899440320061562, "learning_rate": 4.726170744517937e-06, "loss": 0.5137, "step": 14901 }, { "epoch": 0.9074688670340711, "grad_norm": 0.9870884294764678, "learning_rate": 4.72613443268044e-06, "loss": 0.3949, "step": 14902 }, { "epoch": 0.9075297628109491, "grad_norm": 0.9593826690627496, "learning_rate": 4.726098118575005e-06, "loss": 0.4355, "step": 14903 }, { "epoch": 0.907590658587827, "grad_norm": 0.9624023011116711, "learning_rate": 4.7260618022016655e-06, "loss": 0.4374, "step": 14904 }, { "epoch": 0.9076515543647048, "grad_norm": 0.8912388616667314, "learning_rate": 4.726025483560463e-06, "loss": 0.4364, "step": 14905 }, { "epoch": 0.9077124501415826, "grad_norm": 0.9611205496066912, "learning_rate": 4.72598916265143e-06, "loss": 0.4073, "step": 14906 }, { "epoch": 0.9077733459184606, "grad_norm": 1.0248610125825077, "learning_rate": 4.725952839474606e-06, "loss": 0.4031, "step": 14907 }, { "epoch": 0.9078342416953384, "grad_norm": 0.9833748002470936, "learning_rate": 4.725916514030028e-06, "loss": 0.3966, "step": 14908 }, { "epoch": 0.9078951374722163, "grad_norm": 1.0282781195366881, "learning_rate": 4.725880186317732e-06, "loss": 0.4381, "step": 14909 }, { "epoch": 0.9079560332490941, "grad_norm": 1.0437459347416853, "learning_rate": 4.725843856337755e-06, "loss": 0.4252, "step": 14910 }, { "epoch": 0.9080169290259721, "grad_norm": 1.010249868744938, "learning_rate": 4.725807524090136e-06, "loss": 0.3685, "step": 14911 }, { "epoch": 0.9080778248028499, "grad_norm": 1.0338705863167936, "learning_rate": 4.725771189574909e-06, "loss": 0.3647, "step": 14912 }, { "epoch": 0.9081387205797278, "grad_norm": 1.0742468086518162, "learning_rate": 4.725734852792114e-06, "loss": 0.4049, "step": 14913 }, { "epoch": 0.9081996163566056, "grad_norm": 1.056904683874264, "learning_rate": 4.725698513741786e-06, "loss": 0.38, "step": 14914 }, { "epoch": 0.9082605121334836, "grad_norm": 1.012539450188778, "learning_rate": 4.725662172423963e-06, "loss": 0.3921, "step": 14915 }, { "epoch": 0.9083214079103614, "grad_norm": 0.9878089713652595, "learning_rate": 4.7256258288386806e-06, "loss": 0.3578, "step": 14916 }, { "epoch": 0.9083823036872393, "grad_norm": 1.02997036398044, "learning_rate": 4.7255894829859776e-06, "loss": 0.3742, "step": 14917 }, { "epoch": 0.9084431994641171, "grad_norm": 1.049385193546294, "learning_rate": 4.725553134865891e-06, "loss": 0.4522, "step": 14918 }, { "epoch": 0.9085040952409951, "grad_norm": 0.9832032155618525, "learning_rate": 4.725516784478456e-06, "loss": 0.3834, "step": 14919 }, { "epoch": 0.9085649910178729, "grad_norm": 0.910805775992434, "learning_rate": 4.725480431823711e-06, "loss": 0.3928, "step": 14920 }, { "epoch": 0.9086258867947508, "grad_norm": 1.0180771857857496, "learning_rate": 4.725444076901693e-06, "loss": 0.4316, "step": 14921 }, { "epoch": 0.9086867825716286, "grad_norm": 1.0034725759445844, "learning_rate": 4.72540771971244e-06, "loss": 0.4146, "step": 14922 }, { "epoch": 0.9087476783485066, "grad_norm": 0.9982558747416732, "learning_rate": 4.725371360255986e-06, "loss": 0.3965, "step": 14923 }, { "epoch": 0.9088085741253844, "grad_norm": 1.0048542259883397, "learning_rate": 4.725334998532371e-06, "loss": 0.4476, "step": 14924 }, { "epoch": 0.9088694699022623, "grad_norm": 0.9939359957757747, "learning_rate": 4.72529863454163e-06, "loss": 0.3092, "step": 14925 }, { "epoch": 0.9089303656791402, "grad_norm": 0.9549355973417422, "learning_rate": 4.725262268283802e-06, "loss": 0.3707, "step": 14926 }, { "epoch": 0.908991261456018, "grad_norm": 0.9492501128826466, "learning_rate": 4.725225899758922e-06, "loss": 0.3464, "step": 14927 }, { "epoch": 0.9090521572328959, "grad_norm": 0.9530148613684094, "learning_rate": 4.725189528967029e-06, "loss": 0.3964, "step": 14928 }, { "epoch": 0.9091130530097737, "grad_norm": 1.0209305866782512, "learning_rate": 4.72515315590816e-06, "loss": 0.359, "step": 14929 }, { "epoch": 0.9091739487866517, "grad_norm": 1.0430865631341795, "learning_rate": 4.72511678058235e-06, "loss": 0.3851, "step": 14930 }, { "epoch": 0.9092348445635295, "grad_norm": 1.0751806981801215, "learning_rate": 4.725080402989637e-06, "loss": 0.3821, "step": 14931 }, { "epoch": 0.9092957403404074, "grad_norm": 1.0529444247702364, "learning_rate": 4.725044023130058e-06, "loss": 0.3836, "step": 14932 }, { "epoch": 0.9093566361172852, "grad_norm": 0.9862351549471231, "learning_rate": 4.725007641003652e-06, "loss": 0.4629, "step": 14933 }, { "epoch": 0.9094175318941632, "grad_norm": 0.9940773911113044, "learning_rate": 4.724971256610454e-06, "loss": 0.4413, "step": 14934 }, { "epoch": 0.909478427671041, "grad_norm": 1.0413900784349897, "learning_rate": 4.724934869950501e-06, "loss": 0.3479, "step": 14935 }, { "epoch": 0.9095393234479189, "grad_norm": 1.0248775184646142, "learning_rate": 4.724898481023831e-06, "loss": 0.403, "step": 14936 }, { "epoch": 0.9096002192247967, "grad_norm": 1.0000560044318227, "learning_rate": 4.7248620898304805e-06, "loss": 0.4304, "step": 14937 }, { "epoch": 0.9096611150016747, "grad_norm": 1.028917708285986, "learning_rate": 4.724825696370487e-06, "loss": 0.36, "step": 14938 }, { "epoch": 0.9097220107785525, "grad_norm": 1.0143870524323737, "learning_rate": 4.724789300643887e-06, "loss": 0.3658, "step": 14939 }, { "epoch": 0.9097829065554304, "grad_norm": 1.0048910785469676, "learning_rate": 4.724752902650718e-06, "loss": 0.5247, "step": 14940 }, { "epoch": 0.9098438023323082, "grad_norm": 1.0727921533845994, "learning_rate": 4.724716502391017e-06, "loss": 0.3204, "step": 14941 }, { "epoch": 0.9099046981091862, "grad_norm": 0.9668445069482415, "learning_rate": 4.724680099864821e-06, "loss": 0.4059, "step": 14942 }, { "epoch": 0.909965593886064, "grad_norm": 1.036396180487488, "learning_rate": 4.7246436950721675e-06, "loss": 0.3603, "step": 14943 }, { "epoch": 0.9100264896629419, "grad_norm": 1.0217764219079177, "learning_rate": 4.724607288013093e-06, "loss": 0.4416, "step": 14944 }, { "epoch": 0.9100873854398197, "grad_norm": 0.9835025765660783, "learning_rate": 4.724570878687636e-06, "loss": 0.394, "step": 14945 }, { "epoch": 0.9101482812166977, "grad_norm": 0.9767661571594418, "learning_rate": 4.724534467095831e-06, "loss": 0.4984, "step": 14946 }, { "epoch": 0.9102091769935755, "grad_norm": 1.083893611533474, "learning_rate": 4.724498053237716e-06, "loss": 0.4909, "step": 14947 }, { "epoch": 0.9102700727704534, "grad_norm": 0.9665108988026272, "learning_rate": 4.72446163711333e-06, "loss": 0.3786, "step": 14948 }, { "epoch": 0.9103309685473312, "grad_norm": 0.970906468678085, "learning_rate": 4.724425218722708e-06, "loss": 0.3866, "step": 14949 }, { "epoch": 0.9103918643242092, "grad_norm": 0.9908226104055374, "learning_rate": 4.724388798065888e-06, "loss": 0.3765, "step": 14950 }, { "epoch": 0.910452760101087, "grad_norm": 0.9897293644959326, "learning_rate": 4.724352375142908e-06, "loss": 0.3785, "step": 14951 }, { "epoch": 0.9105136558779648, "grad_norm": 0.9422974393419119, "learning_rate": 4.724315949953803e-06, "loss": 0.4087, "step": 14952 }, { "epoch": 0.9105745516548427, "grad_norm": 0.9464790108298221, "learning_rate": 4.724279522498611e-06, "loss": 0.4167, "step": 14953 }, { "epoch": 0.9106354474317206, "grad_norm": 0.9122735427487563, "learning_rate": 4.72424309277737e-06, "loss": 0.4407, "step": 14954 }, { "epoch": 0.9106963432085985, "grad_norm": 0.9762661848487343, "learning_rate": 4.724206660790115e-06, "loss": 0.4385, "step": 14955 }, { "epoch": 0.9107572389854763, "grad_norm": 1.0334183947853566, "learning_rate": 4.724170226536886e-06, "loss": 0.3579, "step": 14956 }, { "epoch": 0.9108181347623542, "grad_norm": 0.9904488165785493, "learning_rate": 4.7241337900177176e-06, "loss": 0.4277, "step": 14957 }, { "epoch": 0.9108790305392321, "grad_norm": 1.0115762128412644, "learning_rate": 4.724097351232648e-06, "loss": 0.3867, "step": 14958 }, { "epoch": 0.91093992631611, "grad_norm": 1.035794961404478, "learning_rate": 4.724060910181715e-06, "loss": 0.4619, "step": 14959 }, { "epoch": 0.9110008220929878, "grad_norm": 0.9968112668469694, "learning_rate": 4.724024466864955e-06, "loss": 0.3826, "step": 14960 }, { "epoch": 0.9110617178698657, "grad_norm": 1.0848336371199114, "learning_rate": 4.723988021282404e-06, "loss": 0.3466, "step": 14961 }, { "epoch": 0.9111226136467436, "grad_norm": 1.0201790706200213, "learning_rate": 4.723951573434101e-06, "loss": 0.436, "step": 14962 }, { "epoch": 0.9111835094236215, "grad_norm": 1.0098323312152258, "learning_rate": 4.7239151233200824e-06, "loss": 0.4307, "step": 14963 }, { "epoch": 0.9112444052004993, "grad_norm": 1.0521336779675072, "learning_rate": 4.723878670940385e-06, "loss": 0.3787, "step": 14964 }, { "epoch": 0.9113053009773772, "grad_norm": 1.0220070468559035, "learning_rate": 4.723842216295047e-06, "loss": 0.4299, "step": 14965 }, { "epoch": 0.9113661967542551, "grad_norm": 1.0789880594220458, "learning_rate": 4.723805759384104e-06, "loss": 0.3704, "step": 14966 }, { "epoch": 0.911427092531133, "grad_norm": 1.0215743874309136, "learning_rate": 4.723769300207595e-06, "loss": 0.3824, "step": 14967 }, { "epoch": 0.9114879883080108, "grad_norm": 1.0531916942433444, "learning_rate": 4.723732838765554e-06, "loss": 0.5084, "step": 14968 }, { "epoch": 0.9115488840848888, "grad_norm": 0.9501874912394844, "learning_rate": 4.723696375058022e-06, "loss": 0.4472, "step": 14969 }, { "epoch": 0.9116097798617666, "grad_norm": 1.0271728378817784, "learning_rate": 4.723659909085034e-06, "loss": 0.3425, "step": 14970 }, { "epoch": 0.9116706756386445, "grad_norm": 0.9771140115498127, "learning_rate": 4.7236234408466275e-06, "loss": 0.429, "step": 14971 }, { "epoch": 0.9117315714155223, "grad_norm": 0.9970812795902219, "learning_rate": 4.723586970342839e-06, "loss": 0.3186, "step": 14972 }, { "epoch": 0.9117924671924003, "grad_norm": 1.025030967272079, "learning_rate": 4.723550497573707e-06, "loss": 0.455, "step": 14973 }, { "epoch": 0.9118533629692781, "grad_norm": 1.0840283903797991, "learning_rate": 4.723514022539268e-06, "loss": 0.3115, "step": 14974 }, { "epoch": 0.911914258746156, "grad_norm": 1.1523307765227133, "learning_rate": 4.723477545239559e-06, "loss": 0.3858, "step": 14975 }, { "epoch": 0.9119751545230338, "grad_norm": 0.9514979709303614, "learning_rate": 4.723441065674617e-06, "loss": 0.4275, "step": 14976 }, { "epoch": 0.9120360502999117, "grad_norm": 1.0540302656757676, "learning_rate": 4.72340458384448e-06, "loss": 0.3743, "step": 14977 }, { "epoch": 0.9120969460767896, "grad_norm": 0.9753095598599023, "learning_rate": 4.723368099749184e-06, "loss": 0.4127, "step": 14978 }, { "epoch": 0.9121578418536674, "grad_norm": 1.0105378254106239, "learning_rate": 4.723331613388767e-06, "loss": 0.4451, "step": 14979 }, { "epoch": 0.9122187376305453, "grad_norm": 0.9632915745435403, "learning_rate": 4.723295124763266e-06, "loss": 0.4104, "step": 14980 }, { "epoch": 0.9122796334074232, "grad_norm": 1.060115577712447, "learning_rate": 4.723258633872719e-06, "loss": 0.3937, "step": 14981 }, { "epoch": 0.9123405291843011, "grad_norm": 1.1339457145401919, "learning_rate": 4.723222140717161e-06, "loss": 0.3439, "step": 14982 }, { "epoch": 0.9124014249611789, "grad_norm": 0.9275045106759068, "learning_rate": 4.723185645296631e-06, "loss": 0.4251, "step": 14983 }, { "epoch": 0.9124623207380568, "grad_norm": 0.9159507457375337, "learning_rate": 4.723149147611167e-06, "loss": 0.4459, "step": 14984 }, { "epoch": 0.9125232165149347, "grad_norm": 1.0773321791293073, "learning_rate": 4.723112647660803e-06, "loss": 0.3919, "step": 14985 }, { "epoch": 0.9125841122918126, "grad_norm": 0.9779050825228353, "learning_rate": 4.723076145445579e-06, "loss": 0.4251, "step": 14986 }, { "epoch": 0.9126450080686904, "grad_norm": 0.9391226310811666, "learning_rate": 4.7230396409655305e-06, "loss": 0.4322, "step": 14987 }, { "epoch": 0.9127059038455683, "grad_norm": 1.0329015883324062, "learning_rate": 4.723003134220696e-06, "loss": 0.4463, "step": 14988 }, { "epoch": 0.9127667996224462, "grad_norm": 0.9463682306392299, "learning_rate": 4.722966625211112e-06, "loss": 0.4003, "step": 14989 }, { "epoch": 0.9128276953993241, "grad_norm": 1.0013983430477529, "learning_rate": 4.7229301139368155e-06, "loss": 0.4198, "step": 14990 }, { "epoch": 0.9128885911762019, "grad_norm": 1.0834086612265763, "learning_rate": 4.722893600397845e-06, "loss": 0.3478, "step": 14991 }, { "epoch": 0.9129494869530798, "grad_norm": 1.0346523341910343, "learning_rate": 4.722857084594236e-06, "loss": 0.4195, "step": 14992 }, { "epoch": 0.9130103827299577, "grad_norm": 0.9867254540656516, "learning_rate": 4.722820566526026e-06, "loss": 0.4156, "step": 14993 }, { "epoch": 0.9130712785068356, "grad_norm": 1.00312647791664, "learning_rate": 4.722784046193254e-06, "loss": 0.4269, "step": 14994 }, { "epoch": 0.9131321742837134, "grad_norm": 1.0963528846196704, "learning_rate": 4.722747523595954e-06, "loss": 0.3593, "step": 14995 }, { "epoch": 0.9131930700605912, "grad_norm": 1.071025537010091, "learning_rate": 4.722710998734167e-06, "loss": 0.3612, "step": 14996 }, { "epoch": 0.9132539658374692, "grad_norm": 1.004769938143866, "learning_rate": 4.7226744716079265e-06, "loss": 0.4504, "step": 14997 }, { "epoch": 0.913314861614347, "grad_norm": 1.0043200970171524, "learning_rate": 4.7226379422172725e-06, "loss": 0.3392, "step": 14998 }, { "epoch": 0.9133757573912249, "grad_norm": 0.9716895667921257, "learning_rate": 4.722601410562241e-06, "loss": 0.4005, "step": 14999 }, { "epoch": 0.9134366531681027, "grad_norm": 1.0176323244781045, "learning_rate": 4.722564876642869e-06, "loss": 0.3756, "step": 15000 }, { "epoch": 0.9134975489449807, "grad_norm": 1.0552289809834459, "learning_rate": 4.722528340459194e-06, "loss": 0.3558, "step": 15001 }, { "epoch": 0.9135584447218585, "grad_norm": 1.0757181563745803, "learning_rate": 4.722491802011254e-06, "loss": 0.4037, "step": 15002 }, { "epoch": 0.9136193404987364, "grad_norm": 0.9823919375341932, "learning_rate": 4.722455261299085e-06, "loss": 0.4395, "step": 15003 }, { "epoch": 0.9136802362756142, "grad_norm": 0.9871433704966969, "learning_rate": 4.722418718322726e-06, "loss": 0.4427, "step": 15004 }, { "epoch": 0.9137411320524922, "grad_norm": 1.0838873172155823, "learning_rate": 4.722382173082212e-06, "loss": 0.4138, "step": 15005 }, { "epoch": 0.91380202782937, "grad_norm": 1.0596091114436383, "learning_rate": 4.722345625577581e-06, "loss": 0.3345, "step": 15006 }, { "epoch": 0.9138629236062479, "grad_norm": 1.023306379738522, "learning_rate": 4.72230907580887e-06, "loss": 0.3481, "step": 15007 }, { "epoch": 0.9139238193831258, "grad_norm": 1.076602518470696, "learning_rate": 4.722272523776118e-06, "loss": 0.4027, "step": 15008 }, { "epoch": 0.9139847151600037, "grad_norm": 1.0530377578282493, "learning_rate": 4.72223596947936e-06, "loss": 0.3847, "step": 15009 }, { "epoch": 0.9140456109368815, "grad_norm": 1.102155632495736, "learning_rate": 4.7221994129186355e-06, "loss": 0.3591, "step": 15010 }, { "epoch": 0.9141065067137594, "grad_norm": 1.0615437746545688, "learning_rate": 4.722162854093979e-06, "loss": 0.3909, "step": 15011 }, { "epoch": 0.9141674024906373, "grad_norm": 1.0424599494755233, "learning_rate": 4.72212629300543e-06, "loss": 0.4574, "step": 15012 }, { "epoch": 0.9142282982675152, "grad_norm": 1.0185123058569263, "learning_rate": 4.722089729653025e-06, "loss": 0.4836, "step": 15013 }, { "epoch": 0.914289194044393, "grad_norm": 0.9384150878234403, "learning_rate": 4.7220531640368e-06, "loss": 0.4168, "step": 15014 }, { "epoch": 0.9143500898212709, "grad_norm": 0.9930805742151175, "learning_rate": 4.7220165961567955e-06, "loss": 0.3848, "step": 15015 }, { "epoch": 0.9144109855981488, "grad_norm": 1.0763083207118982, "learning_rate": 4.721980026013045e-06, "loss": 0.4786, "step": 15016 }, { "epoch": 0.9144718813750267, "grad_norm": 0.9811980954749207, "learning_rate": 4.721943453605588e-06, "loss": 0.371, "step": 15017 }, { "epoch": 0.9145327771519045, "grad_norm": 0.9802601060791288, "learning_rate": 4.721906878934461e-06, "loss": 0.4138, "step": 15018 }, { "epoch": 0.9145936729287824, "grad_norm": 0.9280266025599198, "learning_rate": 4.721870301999702e-06, "loss": 0.4837, "step": 15019 }, { "epoch": 0.9146545687056603, "grad_norm": 0.9779027138059663, "learning_rate": 4.721833722801347e-06, "loss": 0.4322, "step": 15020 }, { "epoch": 0.9147154644825382, "grad_norm": 1.0908389785883215, "learning_rate": 4.721797141339435e-06, "loss": 0.4271, "step": 15021 }, { "epoch": 0.914776360259416, "grad_norm": 0.9797539429866874, "learning_rate": 4.721760557614002e-06, "loss": 0.374, "step": 15022 }, { "epoch": 0.9148372560362938, "grad_norm": 0.9728058567777116, "learning_rate": 4.721723971625085e-06, "loss": 0.429, "step": 15023 }, { "epoch": 0.9148981518131718, "grad_norm": 1.1022509700299148, "learning_rate": 4.721687383372721e-06, "loss": 0.34, "step": 15024 }, { "epoch": 0.9149590475900496, "grad_norm": 0.9346908068168155, "learning_rate": 4.7216507928569494e-06, "loss": 0.4367, "step": 15025 }, { "epoch": 0.9150199433669275, "grad_norm": 1.174477661182615, "learning_rate": 4.7216142000778056e-06, "loss": 0.353, "step": 15026 }, { "epoch": 0.9150808391438053, "grad_norm": 1.0448288012530251, "learning_rate": 4.721577605035328e-06, "loss": 0.3744, "step": 15027 }, { "epoch": 0.9151417349206833, "grad_norm": 0.9873005380418324, "learning_rate": 4.7215410077295535e-06, "loss": 0.3865, "step": 15028 }, { "epoch": 0.9152026306975611, "grad_norm": 0.9322665819943107, "learning_rate": 4.721504408160518e-06, "loss": 0.4591, "step": 15029 }, { "epoch": 0.915263526474439, "grad_norm": 1.0145006852157241, "learning_rate": 4.721467806328261e-06, "loss": 0.3838, "step": 15030 }, { "epoch": 0.9153244222513168, "grad_norm": 1.0221205956870945, "learning_rate": 4.721431202232818e-06, "loss": 0.4157, "step": 15031 }, { "epoch": 0.9153853180281948, "grad_norm": 0.9383631743181826, "learning_rate": 4.721394595874228e-06, "loss": 0.3966, "step": 15032 }, { "epoch": 0.9154462138050726, "grad_norm": 1.0663542187774748, "learning_rate": 4.721357987252526e-06, "loss": 0.3355, "step": 15033 }, { "epoch": 0.9155071095819505, "grad_norm": 1.0742416277913374, "learning_rate": 4.7213213763677525e-06, "loss": 0.3513, "step": 15034 }, { "epoch": 0.9155680053588283, "grad_norm": 0.9798802842366126, "learning_rate": 4.721284763219942e-06, "loss": 0.4091, "step": 15035 }, { "epoch": 0.9156289011357063, "grad_norm": 1.0470499973717413, "learning_rate": 4.7212481478091325e-06, "loss": 0.4115, "step": 15036 }, { "epoch": 0.9156897969125841, "grad_norm": 1.0320572082489456, "learning_rate": 4.7212115301353625e-06, "loss": 0.4898, "step": 15037 }, { "epoch": 0.915750692689462, "grad_norm": 1.0027535348870784, "learning_rate": 4.721174910198667e-06, "loss": 0.4148, "step": 15038 }, { "epoch": 0.9158115884663398, "grad_norm": 0.9491449301893058, "learning_rate": 4.721138287999086e-06, "loss": 0.4233, "step": 15039 }, { "epoch": 0.9158724842432178, "grad_norm": 1.0697937020996129, "learning_rate": 4.721101663536655e-06, "loss": 0.465, "step": 15040 }, { "epoch": 0.9159333800200956, "grad_norm": 1.0742917167435924, "learning_rate": 4.721065036811411e-06, "loss": 0.4897, "step": 15041 }, { "epoch": 0.9159942757969735, "grad_norm": 1.0475926576008683, "learning_rate": 4.7210284078233935e-06, "loss": 0.4397, "step": 15042 }, { "epoch": 0.9160551715738513, "grad_norm": 0.9639375553661539, "learning_rate": 4.7209917765726375e-06, "loss": 0.4458, "step": 15043 }, { "epoch": 0.9161160673507293, "grad_norm": 0.9528680998972826, "learning_rate": 4.720955143059182e-06, "loss": 0.3868, "step": 15044 }, { "epoch": 0.9161769631276071, "grad_norm": 1.0522592536649065, "learning_rate": 4.720918507283063e-06, "loss": 0.4341, "step": 15045 }, { "epoch": 0.916237858904485, "grad_norm": 0.9985658967888413, "learning_rate": 4.720881869244318e-06, "loss": 0.4071, "step": 15046 }, { "epoch": 0.9162987546813628, "grad_norm": 1.0847347334408004, "learning_rate": 4.720845228942985e-06, "loss": 0.4279, "step": 15047 }, { "epoch": 0.9163596504582407, "grad_norm": 0.9211901591402532, "learning_rate": 4.720808586379102e-06, "loss": 0.4714, "step": 15048 }, { "epoch": 0.9164205462351186, "grad_norm": 0.9696631533017847, "learning_rate": 4.720771941552705e-06, "loss": 0.4308, "step": 15049 }, { "epoch": 0.9164814420119964, "grad_norm": 1.0108401523081778, "learning_rate": 4.720735294463831e-06, "loss": 0.4319, "step": 15050 }, { "epoch": 0.9165423377888744, "grad_norm": 1.0037320288030351, "learning_rate": 4.720698645112518e-06, "loss": 0.3694, "step": 15051 }, { "epoch": 0.9166032335657522, "grad_norm": 1.0476083766948043, "learning_rate": 4.720661993498804e-06, "loss": 0.3922, "step": 15052 }, { "epoch": 0.9166641293426301, "grad_norm": 0.994649595145193, "learning_rate": 4.720625339622725e-06, "loss": 0.4845, "step": 15053 }, { "epoch": 0.9167250251195079, "grad_norm": 1.0380023215834777, "learning_rate": 4.720588683484321e-06, "loss": 0.4119, "step": 15054 }, { "epoch": 0.9167859208963859, "grad_norm": 0.9256707925644008, "learning_rate": 4.720552025083626e-06, "loss": 0.4362, "step": 15055 }, { "epoch": 0.9168468166732637, "grad_norm": 0.9341959969602399, "learning_rate": 4.720515364420678e-06, "loss": 0.4776, "step": 15056 }, { "epoch": 0.9169077124501416, "grad_norm": 0.9872894967827803, "learning_rate": 4.7204787014955165e-06, "loss": 0.3992, "step": 15057 }, { "epoch": 0.9169686082270194, "grad_norm": 1.0169405718434452, "learning_rate": 4.720442036308176e-06, "loss": 0.4146, "step": 15058 }, { "epoch": 0.9170295040038974, "grad_norm": 0.9754867213696448, "learning_rate": 4.720405368858696e-06, "loss": 0.4159, "step": 15059 }, { "epoch": 0.9170903997807752, "grad_norm": 1.047426368694935, "learning_rate": 4.720368699147114e-06, "loss": 0.4332, "step": 15060 }, { "epoch": 0.9171512955576531, "grad_norm": 1.0154219098718318, "learning_rate": 4.720332027173466e-06, "loss": 0.3785, "step": 15061 }, { "epoch": 0.9172121913345309, "grad_norm": 1.0266681461638059, "learning_rate": 4.72029535293779e-06, "loss": 0.3238, "step": 15062 }, { "epoch": 0.9172730871114089, "grad_norm": 1.0178284478540545, "learning_rate": 4.7202586764401225e-06, "loss": 0.4495, "step": 15063 }, { "epoch": 0.9173339828882867, "grad_norm": 1.0195936282945028, "learning_rate": 4.720221997680502e-06, "loss": 0.4057, "step": 15064 }, { "epoch": 0.9173948786651646, "grad_norm": 1.093853001019347, "learning_rate": 4.720185316658966e-06, "loss": 0.3888, "step": 15065 }, { "epoch": 0.9174557744420424, "grad_norm": 1.011191755902728, "learning_rate": 4.720148633375551e-06, "loss": 0.4374, "step": 15066 }, { "epoch": 0.9175166702189204, "grad_norm": 0.9611093613791839, "learning_rate": 4.720111947830295e-06, "loss": 0.3875, "step": 15067 }, { "epoch": 0.9175775659957982, "grad_norm": 1.0084085438338937, "learning_rate": 4.7200752600232345e-06, "loss": 0.4684, "step": 15068 }, { "epoch": 0.917638461772676, "grad_norm": 0.9628017498465442, "learning_rate": 4.720038569954408e-06, "loss": 0.3532, "step": 15069 }, { "epoch": 0.9176993575495539, "grad_norm": 1.0985781715222256, "learning_rate": 4.720001877623852e-06, "loss": 0.4037, "step": 15070 }, { "epoch": 0.9177602533264319, "grad_norm": 1.011493513170904, "learning_rate": 4.7199651830316055e-06, "loss": 0.4292, "step": 15071 }, { "epoch": 0.9178211491033097, "grad_norm": 0.9818242762133107, "learning_rate": 4.719928486177703e-06, "loss": 0.4205, "step": 15072 }, { "epoch": 0.9178820448801875, "grad_norm": 0.9634036263404434, "learning_rate": 4.719891787062184e-06, "loss": 0.3634, "step": 15073 }, { "epoch": 0.9179429406570654, "grad_norm": 1.116488374037647, "learning_rate": 4.719855085685085e-06, "loss": 0.4319, "step": 15074 }, { "epoch": 0.9180038364339433, "grad_norm": 1.02148416947821, "learning_rate": 4.719818382046445e-06, "loss": 0.3961, "step": 15075 }, { "epoch": 0.9180647322108212, "grad_norm": 1.0894822421118777, "learning_rate": 4.719781676146299e-06, "loss": 0.3576, "step": 15076 }, { "epoch": 0.918125627987699, "grad_norm": 0.9357003190678439, "learning_rate": 4.719744967984686e-06, "loss": 0.4011, "step": 15077 }, { "epoch": 0.9181865237645769, "grad_norm": 0.9655973010233426, "learning_rate": 4.7197082575616435e-06, "loss": 0.4913, "step": 15078 }, { "epoch": 0.9182474195414548, "grad_norm": 0.9460434734930288, "learning_rate": 4.719671544877208e-06, "loss": 0.3672, "step": 15079 }, { "epoch": 0.9183083153183327, "grad_norm": 0.9691746685258105, "learning_rate": 4.719634829931417e-06, "loss": 0.3938, "step": 15080 }, { "epoch": 0.9183692110952105, "grad_norm": 0.9943669661935959, "learning_rate": 4.7195981127243085e-06, "loss": 0.3753, "step": 15081 }, { "epoch": 0.9184301068720884, "grad_norm": 1.016156076818688, "learning_rate": 4.719561393255919e-06, "loss": 0.3685, "step": 15082 }, { "epoch": 0.9184910026489663, "grad_norm": 1.007896428095135, "learning_rate": 4.7195246715262875e-06, "loss": 0.4614, "step": 15083 }, { "epoch": 0.9185518984258442, "grad_norm": 0.9468517489569229, "learning_rate": 4.719487947535449e-06, "loss": 0.405, "step": 15084 }, { "epoch": 0.918612794202722, "grad_norm": 0.9595632356327112, "learning_rate": 4.719451221283444e-06, "loss": 0.4307, "step": 15085 }, { "epoch": 0.9186736899795999, "grad_norm": 1.0669010531294392, "learning_rate": 4.719414492770307e-06, "loss": 0.3782, "step": 15086 }, { "epoch": 0.9187345857564778, "grad_norm": 1.0050646274045738, "learning_rate": 4.719377761996077e-06, "loss": 0.449, "step": 15087 }, { "epoch": 0.9187954815333557, "grad_norm": 1.0616328769954568, "learning_rate": 4.719341028960791e-06, "loss": 0.3803, "step": 15088 }, { "epoch": 0.9188563773102335, "grad_norm": 0.9838773152140824, "learning_rate": 4.719304293664487e-06, "loss": 0.4362, "step": 15089 }, { "epoch": 0.9189172730871115, "grad_norm": 0.9289253087083972, "learning_rate": 4.719267556107202e-06, "loss": 0.4788, "step": 15090 }, { "epoch": 0.9189781688639893, "grad_norm": 1.0259766633694427, "learning_rate": 4.7192308162889725e-06, "loss": 0.4288, "step": 15091 }, { "epoch": 0.9190390646408672, "grad_norm": 1.1266204015783647, "learning_rate": 4.719194074209837e-06, "loss": 0.3954, "step": 15092 }, { "epoch": 0.919099960417745, "grad_norm": 0.9794425839814231, "learning_rate": 4.7191573298698336e-06, "loss": 0.4154, "step": 15093 }, { "epoch": 0.919160856194623, "grad_norm": 0.9545434905514993, "learning_rate": 4.719120583268998e-06, "loss": 0.4519, "step": 15094 }, { "epoch": 0.9192217519715008, "grad_norm": 1.06154858954236, "learning_rate": 4.7190838344073685e-06, "loss": 0.4276, "step": 15095 }, { "epoch": 0.9192826477483786, "grad_norm": 1.0767167760322247, "learning_rate": 4.719047083284982e-06, "loss": 0.3799, "step": 15096 }, { "epoch": 0.9193435435252565, "grad_norm": 0.931970772824337, "learning_rate": 4.719010329901877e-06, "loss": 0.405, "step": 15097 }, { "epoch": 0.9194044393021344, "grad_norm": 0.8831825408962046, "learning_rate": 4.718973574258091e-06, "loss": 0.4544, "step": 15098 }, { "epoch": 0.9194653350790123, "grad_norm": 0.9783875958743765, "learning_rate": 4.718936816353661e-06, "loss": 0.387, "step": 15099 }, { "epoch": 0.9195262308558901, "grad_norm": 0.9709028760261574, "learning_rate": 4.718900056188623e-06, "loss": 0.4444, "step": 15100 }, { "epoch": 0.919587126632768, "grad_norm": 1.0402211759008193, "learning_rate": 4.718863293763017e-06, "loss": 0.4415, "step": 15101 }, { "epoch": 0.9196480224096459, "grad_norm": 1.1062002975471414, "learning_rate": 4.718826529076879e-06, "loss": 0.4088, "step": 15102 }, { "epoch": 0.9197089181865238, "grad_norm": 1.0980191951276508, "learning_rate": 4.718789762130246e-06, "loss": 0.3614, "step": 15103 }, { "epoch": 0.9197698139634016, "grad_norm": 0.9852381067379413, "learning_rate": 4.718752992923157e-06, "loss": 0.3951, "step": 15104 }, { "epoch": 0.9198307097402795, "grad_norm": 0.8961018141869102, "learning_rate": 4.7187162214556475e-06, "loss": 0.4101, "step": 15105 }, { "epoch": 0.9198916055171574, "grad_norm": 1.0525093394101792, "learning_rate": 4.718679447727756e-06, "loss": 0.4397, "step": 15106 }, { "epoch": 0.9199525012940353, "grad_norm": 0.973706975161938, "learning_rate": 4.718642671739521e-06, "loss": 0.3878, "step": 15107 }, { "epoch": 0.9200133970709131, "grad_norm": 0.9816058709493978, "learning_rate": 4.7186058934909785e-06, "loss": 0.3712, "step": 15108 }, { "epoch": 0.920074292847791, "grad_norm": 1.0647075014355016, "learning_rate": 4.718569112982167e-06, "loss": 0.4018, "step": 15109 }, { "epoch": 0.9201351886246689, "grad_norm": 0.9538812336991171, "learning_rate": 4.718532330213123e-06, "loss": 0.4397, "step": 15110 }, { "epoch": 0.9201960844015468, "grad_norm": 0.9842778428599118, "learning_rate": 4.718495545183884e-06, "loss": 0.3885, "step": 15111 }, { "epoch": 0.9202569801784246, "grad_norm": 0.9789750900925541, "learning_rate": 4.718458757894489e-06, "loss": 0.5702, "step": 15112 }, { "epoch": 0.9203178759553025, "grad_norm": 0.9812416218397388, "learning_rate": 4.7184219683449725e-06, "loss": 0.4301, "step": 15113 }, { "epoch": 0.9203787717321804, "grad_norm": 1.0910179961752013, "learning_rate": 4.718385176535375e-06, "loss": 0.3894, "step": 15114 }, { "epoch": 0.9204396675090583, "grad_norm": 0.9229789844956473, "learning_rate": 4.718348382465733e-06, "loss": 0.3917, "step": 15115 }, { "epoch": 0.9205005632859361, "grad_norm": 0.9824905923491243, "learning_rate": 4.718311586136083e-06, "loss": 0.4064, "step": 15116 }, { "epoch": 0.920561459062814, "grad_norm": 1.0146586225653231, "learning_rate": 4.718274787546464e-06, "loss": 0.4106, "step": 15117 }, { "epoch": 0.9206223548396919, "grad_norm": 0.9739981295458661, "learning_rate": 4.718237986696913e-06, "loss": 0.3702, "step": 15118 }, { "epoch": 0.9206832506165697, "grad_norm": 0.9939186126821224, "learning_rate": 4.718201183587466e-06, "loss": 0.4191, "step": 15119 }, { "epoch": 0.9207441463934476, "grad_norm": 1.0169674315448858, "learning_rate": 4.718164378218163e-06, "loss": 0.4077, "step": 15120 }, { "epoch": 0.9208050421703254, "grad_norm": 1.0276154225213914, "learning_rate": 4.71812757058904e-06, "loss": 0.3753, "step": 15121 }, { "epoch": 0.9208659379472034, "grad_norm": 1.0496377342914462, "learning_rate": 4.718090760700134e-06, "loss": 0.3317, "step": 15122 }, { "epoch": 0.9209268337240812, "grad_norm": 1.0038212454903428, "learning_rate": 4.718053948551483e-06, "loss": 0.379, "step": 15123 }, { "epoch": 0.9209877295009591, "grad_norm": 1.0005269657355402, "learning_rate": 4.718017134143126e-06, "loss": 0.3999, "step": 15124 }, { "epoch": 0.9210486252778369, "grad_norm": 1.0062659297553551, "learning_rate": 4.717980317475099e-06, "loss": 0.4097, "step": 15125 }, { "epoch": 0.9211095210547149, "grad_norm": 1.023422731873376, "learning_rate": 4.717943498547439e-06, "loss": 0.426, "step": 15126 }, { "epoch": 0.9211704168315927, "grad_norm": 1.0023852298341924, "learning_rate": 4.717906677360185e-06, "loss": 0.3807, "step": 15127 }, { "epoch": 0.9212313126084706, "grad_norm": 0.963207179991146, "learning_rate": 4.717869853913373e-06, "loss": 0.4092, "step": 15128 }, { "epoch": 0.9212922083853484, "grad_norm": 0.9171218115762179, "learning_rate": 4.717833028207042e-06, "loss": 0.4023, "step": 15129 }, { "epoch": 0.9213531041622264, "grad_norm": 0.9973502148581731, "learning_rate": 4.717796200241229e-06, "loss": 0.3575, "step": 15130 }, { "epoch": 0.9214139999391042, "grad_norm": 0.9138260003433619, "learning_rate": 4.7177593700159706e-06, "loss": 0.3942, "step": 15131 }, { "epoch": 0.9214748957159821, "grad_norm": 0.9993344621340532, "learning_rate": 4.717722537531306e-06, "loss": 0.3982, "step": 15132 }, { "epoch": 0.92153579149286, "grad_norm": 0.9066495345322211, "learning_rate": 4.717685702787271e-06, "loss": 0.451, "step": 15133 }, { "epoch": 0.9215966872697379, "grad_norm": 1.1033657582675611, "learning_rate": 4.7176488657839045e-06, "loss": 0.3431, "step": 15134 }, { "epoch": 0.9216575830466157, "grad_norm": 0.9356528447976336, "learning_rate": 4.7176120265212435e-06, "loss": 0.4271, "step": 15135 }, { "epoch": 0.9217184788234936, "grad_norm": 1.03566046896529, "learning_rate": 4.7175751849993245e-06, "loss": 0.3567, "step": 15136 }, { "epoch": 0.9217793746003715, "grad_norm": 1.047305113473478, "learning_rate": 4.717538341218187e-06, "loss": 0.4098, "step": 15137 }, { "epoch": 0.9218402703772494, "grad_norm": 1.001195802948034, "learning_rate": 4.717501495177867e-06, "loss": 0.4037, "step": 15138 }, { "epoch": 0.9219011661541272, "grad_norm": 1.0856575971794655, "learning_rate": 4.717464646878403e-06, "loss": 0.4402, "step": 15139 }, { "epoch": 0.921962061931005, "grad_norm": 0.9428470848675857, "learning_rate": 4.7174277963198316e-06, "loss": 0.3935, "step": 15140 }, { "epoch": 0.922022957707883, "grad_norm": 0.9267807448541827, "learning_rate": 4.717390943502191e-06, "loss": 0.4453, "step": 15141 }, { "epoch": 0.9220838534847609, "grad_norm": 1.0823946887269713, "learning_rate": 4.717354088425518e-06, "loss": 0.3747, "step": 15142 }, { "epoch": 0.9221447492616387, "grad_norm": 0.9409249067263157, "learning_rate": 4.7173172310898525e-06, "loss": 0.395, "step": 15143 }, { "epoch": 0.9222056450385165, "grad_norm": 1.0429689011327823, "learning_rate": 4.7172803714952294e-06, "loss": 0.3737, "step": 15144 }, { "epoch": 0.9222665408153945, "grad_norm": 0.9624782375565075, "learning_rate": 4.717243509641687e-06, "loss": 0.4032, "step": 15145 }, { "epoch": 0.9223274365922723, "grad_norm": 0.9659642184670693, "learning_rate": 4.717206645529263e-06, "loss": 0.4433, "step": 15146 }, { "epoch": 0.9223883323691502, "grad_norm": 1.0014046744480891, "learning_rate": 4.7171697791579954e-06, "loss": 0.3637, "step": 15147 }, { "epoch": 0.922449228146028, "grad_norm": 0.9909102450725672, "learning_rate": 4.71713291052792e-06, "loss": 0.3875, "step": 15148 }, { "epoch": 0.922510123922906, "grad_norm": 1.0983338568826102, "learning_rate": 4.717096039639077e-06, "loss": 0.4005, "step": 15149 }, { "epoch": 0.9225710196997838, "grad_norm": 1.0665131933456138, "learning_rate": 4.717059166491502e-06, "loss": 0.3489, "step": 15150 }, { "epoch": 0.9226319154766617, "grad_norm": 1.0036756037557473, "learning_rate": 4.717022291085234e-06, "loss": 0.34, "step": 15151 }, { "epoch": 0.9226928112535395, "grad_norm": 1.011877112576629, "learning_rate": 4.716985413420309e-06, "loss": 0.3773, "step": 15152 }, { "epoch": 0.9227537070304175, "grad_norm": 0.9270677274547438, "learning_rate": 4.716948533496766e-06, "loss": 0.4593, "step": 15153 }, { "epoch": 0.9228146028072953, "grad_norm": 1.0802917218072303, "learning_rate": 4.716911651314641e-06, "loss": 0.4642, "step": 15154 }, { "epoch": 0.9228754985841732, "grad_norm": 1.0342017869343334, "learning_rate": 4.7168747668739735e-06, "loss": 0.4352, "step": 15155 }, { "epoch": 0.922936394361051, "grad_norm": 0.9997536023360486, "learning_rate": 4.716837880174799e-06, "loss": 0.3664, "step": 15156 }, { "epoch": 0.922997290137929, "grad_norm": 0.9058624514928368, "learning_rate": 4.716800991217157e-06, "loss": 0.4191, "step": 15157 }, { "epoch": 0.9230581859148068, "grad_norm": 0.9977722378614469, "learning_rate": 4.716764100001084e-06, "loss": 0.4643, "step": 15158 }, { "epoch": 0.9231190816916847, "grad_norm": 0.9073043971536905, "learning_rate": 4.716727206526618e-06, "loss": 0.4373, "step": 15159 }, { "epoch": 0.9231799774685625, "grad_norm": 0.9564312795487191, "learning_rate": 4.716690310793797e-06, "loss": 0.3781, "step": 15160 }, { "epoch": 0.9232408732454405, "grad_norm": 0.94060228951378, "learning_rate": 4.716653412802656e-06, "loss": 0.3957, "step": 15161 }, { "epoch": 0.9233017690223183, "grad_norm": 0.9415927542711248, "learning_rate": 4.716616512553237e-06, "loss": 0.3925, "step": 15162 }, { "epoch": 0.9233626647991962, "grad_norm": 1.0215518587938137, "learning_rate": 4.716579610045573e-06, "loss": 0.4002, "step": 15163 }, { "epoch": 0.923423560576074, "grad_norm": 0.9801999403595391, "learning_rate": 4.716542705279705e-06, "loss": 0.4144, "step": 15164 }, { "epoch": 0.923484456352952, "grad_norm": 1.1955913932182116, "learning_rate": 4.71650579825567e-06, "loss": 0.3776, "step": 15165 }, { "epoch": 0.9235453521298298, "grad_norm": 0.9712040943939568, "learning_rate": 4.716468888973504e-06, "loss": 0.4183, "step": 15166 }, { "epoch": 0.9236062479067076, "grad_norm": 1.0473258779539687, "learning_rate": 4.716431977433246e-06, "loss": 0.3536, "step": 15167 }, { "epoch": 0.9236671436835855, "grad_norm": 0.9948905951820991, "learning_rate": 4.716395063634932e-06, "loss": 0.4415, "step": 15168 }, { "epoch": 0.9237280394604634, "grad_norm": 1.1570007891770617, "learning_rate": 4.7163581475786025e-06, "loss": 0.3625, "step": 15169 }, { "epoch": 0.9237889352373413, "grad_norm": 0.9464153259126534, "learning_rate": 4.716321229264292e-06, "loss": 0.3724, "step": 15170 }, { "epoch": 0.9238498310142191, "grad_norm": 1.0258583102248726, "learning_rate": 4.716284308692041e-06, "loss": 0.4305, "step": 15171 }, { "epoch": 0.9239107267910971, "grad_norm": 0.9351496375613665, "learning_rate": 4.716247385861884e-06, "loss": 0.4062, "step": 15172 }, { "epoch": 0.9239716225679749, "grad_norm": 0.9263705301490915, "learning_rate": 4.716210460773861e-06, "loss": 0.4001, "step": 15173 }, { "epoch": 0.9240325183448528, "grad_norm": 1.1016339008378304, "learning_rate": 4.7161735334280086e-06, "loss": 0.4126, "step": 15174 }, { "epoch": 0.9240934141217306, "grad_norm": 0.9954556689951171, "learning_rate": 4.716136603824365e-06, "loss": 0.3978, "step": 15175 }, { "epoch": 0.9241543098986086, "grad_norm": 0.9588765147825776, "learning_rate": 4.716099671962968e-06, "loss": 0.4858, "step": 15176 }, { "epoch": 0.9242152056754864, "grad_norm": 1.022668310042247, "learning_rate": 4.716062737843854e-06, "loss": 0.4224, "step": 15177 }, { "epoch": 0.9242761014523643, "grad_norm": 1.0615440775139358, "learning_rate": 4.716025801467061e-06, "loss": 0.452, "step": 15178 }, { "epoch": 0.9243369972292421, "grad_norm": 1.030570594995032, "learning_rate": 4.715988862832628e-06, "loss": 0.4026, "step": 15179 }, { "epoch": 0.9243978930061201, "grad_norm": 0.9825857389361724, "learning_rate": 4.715951921940591e-06, "loss": 0.3659, "step": 15180 }, { "epoch": 0.9244587887829979, "grad_norm": 1.0123672177631844, "learning_rate": 4.715914978790989e-06, "loss": 0.4157, "step": 15181 }, { "epoch": 0.9245196845598758, "grad_norm": 1.004375001039204, "learning_rate": 4.715878033383857e-06, "loss": 0.4022, "step": 15182 }, { "epoch": 0.9245805803367536, "grad_norm": 0.9875237975923132, "learning_rate": 4.715841085719236e-06, "loss": 0.4286, "step": 15183 }, { "epoch": 0.9246414761136316, "grad_norm": 1.0620928548018476, "learning_rate": 4.715804135797162e-06, "loss": 0.4225, "step": 15184 }, { "epoch": 0.9247023718905094, "grad_norm": 0.9872736885329042, "learning_rate": 4.715767183617673e-06, "loss": 0.4123, "step": 15185 }, { "epoch": 0.9247632676673873, "grad_norm": 0.9542536372281955, "learning_rate": 4.715730229180806e-06, "loss": 0.428, "step": 15186 }, { "epoch": 0.9248241634442651, "grad_norm": 1.1352211254556939, "learning_rate": 4.7156932724866e-06, "loss": 0.376, "step": 15187 }, { "epoch": 0.9248850592211431, "grad_norm": 1.0565625292246545, "learning_rate": 4.715656313535091e-06, "loss": 0.4346, "step": 15188 }, { "epoch": 0.9249459549980209, "grad_norm": 0.9517270302201438, "learning_rate": 4.715619352326317e-06, "loss": 0.4523, "step": 15189 }, { "epoch": 0.9250068507748987, "grad_norm": 0.9529897508327908, "learning_rate": 4.715582388860317e-06, "loss": 0.4198, "step": 15190 }, { "epoch": 0.9250677465517766, "grad_norm": 0.9652013240093414, "learning_rate": 4.715545423137126e-06, "loss": 0.4421, "step": 15191 }, { "epoch": 0.9251286423286545, "grad_norm": 0.9933862955358957, "learning_rate": 4.715508455156786e-06, "loss": 0.4136, "step": 15192 }, { "epoch": 0.9251895381055324, "grad_norm": 0.971198456725615, "learning_rate": 4.715471484919331e-06, "loss": 0.3864, "step": 15193 }, { "epoch": 0.9252504338824102, "grad_norm": 1.0490974306308565, "learning_rate": 4.715434512424799e-06, "loss": 0.3651, "step": 15194 }, { "epoch": 0.9253113296592881, "grad_norm": 1.0619887138625241, "learning_rate": 4.715397537673228e-06, "loss": 0.4221, "step": 15195 }, { "epoch": 0.925372225436166, "grad_norm": 1.0326348305413413, "learning_rate": 4.715360560664658e-06, "loss": 0.436, "step": 15196 }, { "epoch": 0.9254331212130439, "grad_norm": 1.0182119951410404, "learning_rate": 4.715323581399123e-06, "loss": 0.3832, "step": 15197 }, { "epoch": 0.9254940169899217, "grad_norm": 1.000241877764681, "learning_rate": 4.715286599876664e-06, "loss": 0.4231, "step": 15198 }, { "epoch": 0.9255549127667996, "grad_norm": 0.9951831388159181, "learning_rate": 4.7152496160973145e-06, "loss": 0.4474, "step": 15199 }, { "epoch": 0.9256158085436775, "grad_norm": 0.9683952084672196, "learning_rate": 4.7152126300611164e-06, "loss": 0.3632, "step": 15200 }, { "epoch": 0.9256767043205554, "grad_norm": 0.9756849157093773, "learning_rate": 4.715175641768106e-06, "loss": 0.4036, "step": 15201 }, { "epoch": 0.9257376000974332, "grad_norm": 1.0621510963682494, "learning_rate": 4.715138651218321e-06, "loss": 0.3839, "step": 15202 }, { "epoch": 0.9257984958743111, "grad_norm": 1.0247053795352161, "learning_rate": 4.7151016584117985e-06, "loss": 0.4281, "step": 15203 }, { "epoch": 0.925859391651189, "grad_norm": 0.9861612998770383, "learning_rate": 4.7150646633485754e-06, "loss": 0.4222, "step": 15204 }, { "epoch": 0.9259202874280669, "grad_norm": 0.998445341043185, "learning_rate": 4.715027666028691e-06, "loss": 0.3771, "step": 15205 }, { "epoch": 0.9259811832049447, "grad_norm": 1.0132554694829294, "learning_rate": 4.714990666452183e-06, "loss": 0.4022, "step": 15206 }, { "epoch": 0.9260420789818226, "grad_norm": 0.9840812044616607, "learning_rate": 4.714953664619088e-06, "loss": 0.3941, "step": 15207 }, { "epoch": 0.9261029747587005, "grad_norm": 1.022204861977761, "learning_rate": 4.714916660529444e-06, "loss": 0.3758, "step": 15208 }, { "epoch": 0.9261638705355784, "grad_norm": 1.0655664045060487, "learning_rate": 4.71487965418329e-06, "loss": 0.4427, "step": 15209 }, { "epoch": 0.9262247663124562, "grad_norm": 1.0834443276082617, "learning_rate": 4.714842645580662e-06, "loss": 0.3796, "step": 15210 }, { "epoch": 0.926285662089334, "grad_norm": 1.044692846572255, "learning_rate": 4.7148056347215984e-06, "loss": 0.4119, "step": 15211 }, { "epoch": 0.926346557866212, "grad_norm": 1.0250385834678999, "learning_rate": 4.714768621606137e-06, "loss": 0.385, "step": 15212 }, { "epoch": 0.9264074536430899, "grad_norm": 1.0210541482306676, "learning_rate": 4.714731606234315e-06, "loss": 0.3778, "step": 15213 }, { "epoch": 0.9264683494199677, "grad_norm": 0.9679488299422847, "learning_rate": 4.714694588606171e-06, "loss": 0.4262, "step": 15214 }, { "epoch": 0.9265292451968457, "grad_norm": 0.9869571988652257, "learning_rate": 4.7146575687217426e-06, "loss": 0.4272, "step": 15215 }, { "epoch": 0.9265901409737235, "grad_norm": 0.9385894586618572, "learning_rate": 4.714620546581066e-06, "loss": 0.391, "step": 15216 }, { "epoch": 0.9266510367506013, "grad_norm": 1.0472318927790987, "learning_rate": 4.71458352218418e-06, "loss": 0.358, "step": 15217 }, { "epoch": 0.9267119325274792, "grad_norm": 0.9995019327626069, "learning_rate": 4.714546495531123e-06, "loss": 0.3471, "step": 15218 }, { "epoch": 0.9267728283043571, "grad_norm": 0.9530155430731841, "learning_rate": 4.714509466621933e-06, "loss": 0.4732, "step": 15219 }, { "epoch": 0.926833724081235, "grad_norm": 0.9826383600350933, "learning_rate": 4.714472435456645e-06, "loss": 0.4115, "step": 15220 }, { "epoch": 0.9268946198581128, "grad_norm": 0.9453885587138082, "learning_rate": 4.714435402035299e-06, "loss": 0.4098, "step": 15221 }, { "epoch": 0.9269555156349907, "grad_norm": 1.0205048724795083, "learning_rate": 4.714398366357933e-06, "loss": 0.4535, "step": 15222 }, { "epoch": 0.9270164114118686, "grad_norm": 0.937227991961561, "learning_rate": 4.714361328424583e-06, "loss": 0.4326, "step": 15223 }, { "epoch": 0.9270773071887465, "grad_norm": 1.0336669488747956, "learning_rate": 4.714324288235289e-06, "loss": 0.3901, "step": 15224 }, { "epoch": 0.9271382029656243, "grad_norm": 0.9773692495119285, "learning_rate": 4.714287245790086e-06, "loss": 0.4262, "step": 15225 }, { "epoch": 0.9271990987425022, "grad_norm": 0.9707886559619588, "learning_rate": 4.714250201089014e-06, "loss": 0.3752, "step": 15226 }, { "epoch": 0.9272599945193801, "grad_norm": 1.0558005561669455, "learning_rate": 4.71421315413211e-06, "loss": 0.4631, "step": 15227 }, { "epoch": 0.927320890296258, "grad_norm": 0.9672782318797569, "learning_rate": 4.714176104919411e-06, "loss": 0.3953, "step": 15228 }, { "epoch": 0.9273817860731358, "grad_norm": 1.0680403145602033, "learning_rate": 4.714139053450956e-06, "loss": 0.4203, "step": 15229 }, { "epoch": 0.9274426818500137, "grad_norm": 0.9736826789638839, "learning_rate": 4.714101999726783e-06, "loss": 0.4607, "step": 15230 }, { "epoch": 0.9275035776268916, "grad_norm": 0.9251150294217217, "learning_rate": 4.714064943746927e-06, "loss": 0.3946, "step": 15231 }, { "epoch": 0.9275644734037695, "grad_norm": 0.9242193871157857, "learning_rate": 4.714027885511429e-06, "loss": 0.4041, "step": 15232 }, { "epoch": 0.9276253691806473, "grad_norm": 1.0092426116772308, "learning_rate": 4.713990825020325e-06, "loss": 0.4644, "step": 15233 }, { "epoch": 0.9276862649575252, "grad_norm": 0.9573693650242362, "learning_rate": 4.713953762273653e-06, "loss": 0.441, "step": 15234 }, { "epoch": 0.9277471607344031, "grad_norm": 1.0961043728803506, "learning_rate": 4.713916697271451e-06, "loss": 0.4009, "step": 15235 }, { "epoch": 0.927808056511281, "grad_norm": 0.9425187574131461, "learning_rate": 4.7138796300137566e-06, "loss": 0.3742, "step": 15236 }, { "epoch": 0.9278689522881588, "grad_norm": 0.9571273444326102, "learning_rate": 4.713842560500608e-06, "loss": 0.4072, "step": 15237 }, { "epoch": 0.9279298480650366, "grad_norm": 0.96550653368326, "learning_rate": 4.713805488732042e-06, "loss": 0.4399, "step": 15238 }, { "epoch": 0.9279907438419146, "grad_norm": 1.0535281054842889, "learning_rate": 4.713768414708099e-06, "loss": 0.4641, "step": 15239 }, { "epoch": 0.9280516396187924, "grad_norm": 0.992209305367028, "learning_rate": 4.713731338428813e-06, "loss": 0.467, "step": 15240 }, { "epoch": 0.9281125353956703, "grad_norm": 1.00929572780801, "learning_rate": 4.713694259894224e-06, "loss": 0.3942, "step": 15241 }, { "epoch": 0.9281734311725481, "grad_norm": 0.9429702508775255, "learning_rate": 4.713657179104368e-06, "loss": 0.4224, "step": 15242 }, { "epoch": 0.9282343269494261, "grad_norm": 0.9698854269209315, "learning_rate": 4.713620096059286e-06, "loss": 0.4071, "step": 15243 }, { "epoch": 0.9282952227263039, "grad_norm": 0.9462553068018321, "learning_rate": 4.713583010759013e-06, "loss": 0.4923, "step": 15244 }, { "epoch": 0.9283561185031818, "grad_norm": 1.1030196074859464, "learning_rate": 4.7135459232035874e-06, "loss": 0.397, "step": 15245 }, { "epoch": 0.9284170142800596, "grad_norm": 0.9315910937767609, "learning_rate": 4.713508833393048e-06, "loss": 0.4672, "step": 15246 }, { "epoch": 0.9284779100569376, "grad_norm": 1.1335813879857888, "learning_rate": 4.713471741327431e-06, "loss": 0.3697, "step": 15247 }, { "epoch": 0.9285388058338154, "grad_norm": 0.9585945624373037, "learning_rate": 4.713434647006776e-06, "loss": 0.4122, "step": 15248 }, { "epoch": 0.9285997016106933, "grad_norm": 1.0407433951746445, "learning_rate": 4.713397550431119e-06, "loss": 0.4128, "step": 15249 }, { "epoch": 0.9286605973875711, "grad_norm": 1.0297048572320606, "learning_rate": 4.713360451600499e-06, "loss": 0.4081, "step": 15250 }, { "epoch": 0.9287214931644491, "grad_norm": 1.047755197632074, "learning_rate": 4.713323350514953e-06, "loss": 0.4632, "step": 15251 }, { "epoch": 0.9287823889413269, "grad_norm": 0.9546352026056543, "learning_rate": 4.713286247174519e-06, "loss": 0.3913, "step": 15252 }, { "epoch": 0.9288432847182048, "grad_norm": 1.0358045313460855, "learning_rate": 4.713249141579236e-06, "loss": 0.3612, "step": 15253 }, { "epoch": 0.9289041804950827, "grad_norm": 1.0310854124686157, "learning_rate": 4.7132120337291395e-06, "loss": 0.4835, "step": 15254 }, { "epoch": 0.9289650762719606, "grad_norm": 1.0160112634957572, "learning_rate": 4.71317492362427e-06, "loss": 0.364, "step": 15255 }, { "epoch": 0.9290259720488384, "grad_norm": 0.9910563332774294, "learning_rate": 4.713137811264663e-06, "loss": 0.4415, "step": 15256 }, { "epoch": 0.9290868678257163, "grad_norm": 1.013434052058888, "learning_rate": 4.713100696650358e-06, "loss": 0.4049, "step": 15257 }, { "epoch": 0.9291477636025942, "grad_norm": 1.012102284837412, "learning_rate": 4.71306357978139e-06, "loss": 0.36, "step": 15258 }, { "epoch": 0.9292086593794721, "grad_norm": 0.9665757457001548, "learning_rate": 4.713026460657801e-06, "loss": 0.4054, "step": 15259 }, { "epoch": 0.9292695551563499, "grad_norm": 0.933438778121114, "learning_rate": 4.712989339279626e-06, "loss": 0.4503, "step": 15260 }, { "epoch": 0.9293304509332277, "grad_norm": 1.0091061836432411, "learning_rate": 4.712952215646903e-06, "loss": 0.4033, "step": 15261 }, { "epoch": 0.9293913467101057, "grad_norm": 0.9332329812095672, "learning_rate": 4.712915089759671e-06, "loss": 0.3929, "step": 15262 }, { "epoch": 0.9294522424869835, "grad_norm": 1.063532560113865, "learning_rate": 4.712877961617967e-06, "loss": 0.3857, "step": 15263 }, { "epoch": 0.9295131382638614, "grad_norm": 1.0510369840991762, "learning_rate": 4.712840831221828e-06, "loss": 0.4557, "step": 15264 }, { "epoch": 0.9295740340407392, "grad_norm": 1.0407479520728677, "learning_rate": 4.712803698571294e-06, "loss": 0.3961, "step": 15265 }, { "epoch": 0.9296349298176172, "grad_norm": 1.0096216750302132, "learning_rate": 4.7127665636664014e-06, "loss": 0.353, "step": 15266 }, { "epoch": 0.929695825594495, "grad_norm": 1.006873228288592, "learning_rate": 4.712729426507187e-06, "loss": 0.4306, "step": 15267 }, { "epoch": 0.9297567213713729, "grad_norm": 1.0290555489635012, "learning_rate": 4.712692287093691e-06, "loss": 0.3456, "step": 15268 }, { "epoch": 0.9298176171482507, "grad_norm": 0.9943870295773239, "learning_rate": 4.71265514542595e-06, "loss": 0.4057, "step": 15269 }, { "epoch": 0.9298785129251287, "grad_norm": 0.9763658457577852, "learning_rate": 4.7126180015040015e-06, "loss": 0.5063, "step": 15270 }, { "epoch": 0.9299394087020065, "grad_norm": 0.9641867810189255, "learning_rate": 4.712580855327884e-06, "loss": 0.3949, "step": 15271 }, { "epoch": 0.9300003044788844, "grad_norm": 0.9187098914065841, "learning_rate": 4.712543706897636e-06, "loss": 0.4107, "step": 15272 }, { "epoch": 0.9300612002557622, "grad_norm": 1.0709087889079878, "learning_rate": 4.712506556213293e-06, "loss": 0.3645, "step": 15273 }, { "epoch": 0.9301220960326402, "grad_norm": 1.0023439068339377, "learning_rate": 4.7124694032748955e-06, "loss": 0.4252, "step": 15274 }, { "epoch": 0.930182991809518, "grad_norm": 1.1147351126790757, "learning_rate": 4.712432248082479e-06, "loss": 0.395, "step": 15275 }, { "epoch": 0.9302438875863959, "grad_norm": 0.9056347655265692, "learning_rate": 4.712395090636084e-06, "loss": 0.4282, "step": 15276 }, { "epoch": 0.9303047833632737, "grad_norm": 1.0190690584146196, "learning_rate": 4.712357930935746e-06, "loss": 0.3726, "step": 15277 }, { "epoch": 0.9303656791401517, "grad_norm": 0.9935348532689008, "learning_rate": 4.712320768981503e-06, "loss": 0.4699, "step": 15278 }, { "epoch": 0.9304265749170295, "grad_norm": 1.0971948451187092, "learning_rate": 4.7122836047733944e-06, "loss": 0.3566, "step": 15279 }, { "epoch": 0.9304874706939074, "grad_norm": 0.9366819138584908, "learning_rate": 4.712246438311457e-06, "loss": 0.3678, "step": 15280 }, { "epoch": 0.9305483664707852, "grad_norm": 1.0405548251040349, "learning_rate": 4.71220926959573e-06, "loss": 0.4132, "step": 15281 }, { "epoch": 0.9306092622476632, "grad_norm": 0.9696651165512841, "learning_rate": 4.712172098626249e-06, "loss": 0.3683, "step": 15282 }, { "epoch": 0.930670158024541, "grad_norm": 1.0446204226499307, "learning_rate": 4.7121349254030535e-06, "loss": 0.4325, "step": 15283 }, { "epoch": 0.9307310538014189, "grad_norm": 0.9579231851115656, "learning_rate": 4.712097749926181e-06, "loss": 0.4054, "step": 15284 }, { "epoch": 0.9307919495782967, "grad_norm": 1.0165820270403192, "learning_rate": 4.7120605721956696e-06, "loss": 0.4351, "step": 15285 }, { "epoch": 0.9308528453551747, "grad_norm": 0.897146005327998, "learning_rate": 4.712023392211556e-06, "loss": 0.4186, "step": 15286 }, { "epoch": 0.9309137411320525, "grad_norm": 0.9181203963686819, "learning_rate": 4.711986209973879e-06, "loss": 0.5203, "step": 15287 }, { "epoch": 0.9309746369089303, "grad_norm": 0.9508379766507573, "learning_rate": 4.7119490254826775e-06, "loss": 0.4918, "step": 15288 }, { "epoch": 0.9310355326858082, "grad_norm": 1.0327413219777577, "learning_rate": 4.711911838737987e-06, "loss": 0.4098, "step": 15289 }, { "epoch": 0.9310964284626861, "grad_norm": 0.9324983486293862, "learning_rate": 4.711874649739847e-06, "loss": 0.4501, "step": 15290 }, { "epoch": 0.931157324239564, "grad_norm": 0.9474725768639107, "learning_rate": 4.711837458488296e-06, "loss": 0.4258, "step": 15291 }, { "epoch": 0.9312182200164418, "grad_norm": 0.900371392957501, "learning_rate": 4.71180026498337e-06, "loss": 0.4682, "step": 15292 }, { "epoch": 0.9312791157933197, "grad_norm": 0.9554429902396148, "learning_rate": 4.7117630692251084e-06, "loss": 0.4396, "step": 15293 }, { "epoch": 0.9313400115701976, "grad_norm": 0.9844038597365486, "learning_rate": 4.7117258712135485e-06, "loss": 0.4166, "step": 15294 }, { "epoch": 0.9314009073470755, "grad_norm": 0.9244529077008092, "learning_rate": 4.7116886709487285e-06, "loss": 0.4128, "step": 15295 }, { "epoch": 0.9314618031239533, "grad_norm": 1.050315802218992, "learning_rate": 4.711651468430686e-06, "loss": 0.4351, "step": 15296 }, { "epoch": 0.9315226989008313, "grad_norm": 0.9887979015146541, "learning_rate": 4.711614263659459e-06, "loss": 0.4404, "step": 15297 }, { "epoch": 0.9315835946777091, "grad_norm": 0.9908534917854082, "learning_rate": 4.711577056635085e-06, "loss": 0.4673, "step": 15298 }, { "epoch": 0.931644490454587, "grad_norm": 1.0745192387917644, "learning_rate": 4.711539847357602e-06, "loss": 0.3221, "step": 15299 }, { "epoch": 0.9317053862314648, "grad_norm": 0.947597144402304, "learning_rate": 4.711502635827049e-06, "loss": 0.4601, "step": 15300 }, { "epoch": 0.9317662820083428, "grad_norm": 0.934028386536339, "learning_rate": 4.711465422043463e-06, "loss": 0.3845, "step": 15301 }, { "epoch": 0.9318271777852206, "grad_norm": 1.0037556519705346, "learning_rate": 4.711428206006882e-06, "loss": 0.3749, "step": 15302 }, { "epoch": 0.9318880735620985, "grad_norm": 0.96299390741456, "learning_rate": 4.711390987717343e-06, "loss": 0.3734, "step": 15303 }, { "epoch": 0.9319489693389763, "grad_norm": 1.00848334826525, "learning_rate": 4.711353767174885e-06, "loss": 0.3597, "step": 15304 }, { "epoch": 0.9320098651158543, "grad_norm": 0.9234286000249986, "learning_rate": 4.711316544379547e-06, "loss": 0.407, "step": 15305 }, { "epoch": 0.9320707608927321, "grad_norm": 0.9884932069800747, "learning_rate": 4.7112793193313645e-06, "loss": 0.4505, "step": 15306 }, { "epoch": 0.93213165666961, "grad_norm": 1.0186963100470807, "learning_rate": 4.711242092030377e-06, "loss": 0.3728, "step": 15307 }, { "epoch": 0.9321925524464878, "grad_norm": 0.9979358437552822, "learning_rate": 4.711204862476622e-06, "loss": 0.5104, "step": 15308 }, { "epoch": 0.9322534482233658, "grad_norm": 1.0154225353464241, "learning_rate": 4.711167630670137e-06, "loss": 0.4574, "step": 15309 }, { "epoch": 0.9323143440002436, "grad_norm": 0.9618255624576327, "learning_rate": 4.711130396610961e-06, "loss": 0.3918, "step": 15310 }, { "epoch": 0.9323752397771214, "grad_norm": 0.9878605317171026, "learning_rate": 4.711093160299132e-06, "loss": 0.4398, "step": 15311 }, { "epoch": 0.9324361355539993, "grad_norm": 1.0400059545269782, "learning_rate": 4.711055921734686e-06, "loss": 0.3838, "step": 15312 }, { "epoch": 0.9324970313308772, "grad_norm": 0.9941510720407297, "learning_rate": 4.711018680917663e-06, "loss": 0.4495, "step": 15313 }, { "epoch": 0.9325579271077551, "grad_norm": 0.955658079018839, "learning_rate": 4.710981437848099e-06, "loss": 0.4466, "step": 15314 }, { "epoch": 0.9326188228846329, "grad_norm": 0.9924186734120606, "learning_rate": 4.710944192526035e-06, "loss": 0.3489, "step": 15315 }, { "epoch": 0.9326797186615108, "grad_norm": 0.9266522139040106, "learning_rate": 4.710906944951506e-06, "loss": 0.4869, "step": 15316 }, { "epoch": 0.9327406144383887, "grad_norm": 0.9186848505294151, "learning_rate": 4.71086969512455e-06, "loss": 0.4238, "step": 15317 }, { "epoch": 0.9328015102152666, "grad_norm": 1.0042661775326858, "learning_rate": 4.710832443045207e-06, "loss": 0.4044, "step": 15318 }, { "epoch": 0.9328624059921444, "grad_norm": 1.103007760321851, "learning_rate": 4.710795188713514e-06, "loss": 0.3737, "step": 15319 }, { "epoch": 0.9329233017690223, "grad_norm": 0.9604476744806938, "learning_rate": 4.710757932129508e-06, "loss": 0.4574, "step": 15320 }, { "epoch": 0.9329841975459002, "grad_norm": 0.9866161526133975, "learning_rate": 4.710720673293229e-06, "loss": 0.4062, "step": 15321 }, { "epoch": 0.9330450933227781, "grad_norm": 1.0142348839687878, "learning_rate": 4.710683412204713e-06, "loss": 0.4014, "step": 15322 }, { "epoch": 0.9331059890996559, "grad_norm": 1.0526406187783408, "learning_rate": 4.710646148863998e-06, "loss": 0.3759, "step": 15323 }, { "epoch": 0.9331668848765338, "grad_norm": 0.992496256520619, "learning_rate": 4.710608883271123e-06, "loss": 0.3515, "step": 15324 }, { "epoch": 0.9332277806534117, "grad_norm": 1.030847540996335, "learning_rate": 4.710571615426126e-06, "loss": 0.4022, "step": 15325 }, { "epoch": 0.9332886764302896, "grad_norm": 0.9779223920853227, "learning_rate": 4.710534345329046e-06, "loss": 0.3961, "step": 15326 }, { "epoch": 0.9333495722071674, "grad_norm": 0.9755729731698415, "learning_rate": 4.710497072979917e-06, "loss": 0.448, "step": 15327 }, { "epoch": 0.9334104679840453, "grad_norm": 1.0291032909556508, "learning_rate": 4.7104597983787805e-06, "loss": 0.3803, "step": 15328 }, { "epoch": 0.9334713637609232, "grad_norm": 1.0298070003103927, "learning_rate": 4.710422521525673e-06, "loss": 0.4577, "step": 15329 }, { "epoch": 0.9335322595378011, "grad_norm": 0.9811796549274837, "learning_rate": 4.710385242420635e-06, "loss": 0.3965, "step": 15330 }, { "epoch": 0.9335931553146789, "grad_norm": 0.9463123849277493, "learning_rate": 4.710347961063701e-06, "loss": 0.4695, "step": 15331 }, { "epoch": 0.9336540510915567, "grad_norm": 0.9824277306519186, "learning_rate": 4.71031067745491e-06, "loss": 0.3511, "step": 15332 }, { "epoch": 0.9337149468684347, "grad_norm": 1.0090769375813666, "learning_rate": 4.710273391594301e-06, "loss": 0.4065, "step": 15333 }, { "epoch": 0.9337758426453125, "grad_norm": 1.0109465842636578, "learning_rate": 4.710236103481912e-06, "loss": 0.3874, "step": 15334 }, { "epoch": 0.9338367384221904, "grad_norm": 1.111491158100388, "learning_rate": 4.71019881311778e-06, "loss": 0.3158, "step": 15335 }, { "epoch": 0.9338976341990683, "grad_norm": 0.9983000181078318, "learning_rate": 4.710161520501944e-06, "loss": 0.4008, "step": 15336 }, { "epoch": 0.9339585299759462, "grad_norm": 0.9940995198126811, "learning_rate": 4.71012422563444e-06, "loss": 0.4891, "step": 15337 }, { "epoch": 0.934019425752824, "grad_norm": 0.9647420007608748, "learning_rate": 4.710086928515309e-06, "loss": 0.4846, "step": 15338 }, { "epoch": 0.9340803215297019, "grad_norm": 0.9731612374384189, "learning_rate": 4.710049629144585e-06, "loss": 0.4095, "step": 15339 }, { "epoch": 0.9341412173065798, "grad_norm": 0.9768744175321294, "learning_rate": 4.71001232752231e-06, "loss": 0.4701, "step": 15340 }, { "epoch": 0.9342021130834577, "grad_norm": 0.9590285287039639, "learning_rate": 4.709975023648521e-06, "loss": 0.4433, "step": 15341 }, { "epoch": 0.9342630088603355, "grad_norm": 0.9958794595411469, "learning_rate": 4.7099377175232545e-06, "loss": 0.4114, "step": 15342 }, { "epoch": 0.9343239046372134, "grad_norm": 0.9816143916027386, "learning_rate": 4.709900409146549e-06, "loss": 0.4774, "step": 15343 }, { "epoch": 0.9343848004140913, "grad_norm": 0.9220998597814397, "learning_rate": 4.709863098518444e-06, "loss": 0.4482, "step": 15344 }, { "epoch": 0.9344456961909692, "grad_norm": 0.9864981178539675, "learning_rate": 4.7098257856389754e-06, "loss": 0.3638, "step": 15345 }, { "epoch": 0.934506591967847, "grad_norm": 0.9590695164322655, "learning_rate": 4.7097884705081834e-06, "loss": 0.5061, "step": 15346 }, { "epoch": 0.9345674877447249, "grad_norm": 0.9940063358911418, "learning_rate": 4.709751153126103e-06, "loss": 0.4143, "step": 15347 }, { "epoch": 0.9346283835216028, "grad_norm": 0.8586637229790017, "learning_rate": 4.709713833492776e-06, "loss": 0.427, "step": 15348 }, { "epoch": 0.9346892792984807, "grad_norm": 0.99089820010201, "learning_rate": 4.7096765116082374e-06, "loss": 0.3956, "step": 15349 }, { "epoch": 0.9347501750753585, "grad_norm": 0.9872298608589711, "learning_rate": 4.709639187472526e-06, "loss": 0.4377, "step": 15350 }, { "epoch": 0.9348110708522364, "grad_norm": 0.9826683021901469, "learning_rate": 4.709601861085681e-06, "loss": 0.4361, "step": 15351 }, { "epoch": 0.9348719666291143, "grad_norm": 0.9318010946812185, "learning_rate": 4.709564532447739e-06, "loss": 0.3866, "step": 15352 }, { "epoch": 0.9349328624059922, "grad_norm": 0.9494602513241668, "learning_rate": 4.70952720155874e-06, "loss": 0.4326, "step": 15353 }, { "epoch": 0.93499375818287, "grad_norm": 0.9781365658983473, "learning_rate": 4.709489868418719e-06, "loss": 0.4066, "step": 15354 }, { "epoch": 0.9350546539597479, "grad_norm": 0.9450665244332705, "learning_rate": 4.709452533027717e-06, "loss": 0.4451, "step": 15355 }, { "epoch": 0.9351155497366258, "grad_norm": 0.9797510745731755, "learning_rate": 4.709415195385769e-06, "loss": 0.4306, "step": 15356 }, { "epoch": 0.9351764455135037, "grad_norm": 1.0525709233179905, "learning_rate": 4.7093778554929156e-06, "loss": 0.3873, "step": 15357 }, { "epoch": 0.9352373412903815, "grad_norm": 1.0118362663210274, "learning_rate": 4.709340513349194e-06, "loss": 0.3462, "step": 15358 }, { "epoch": 0.9352982370672593, "grad_norm": 0.9807983286445586, "learning_rate": 4.709303168954642e-06, "loss": 0.4581, "step": 15359 }, { "epoch": 0.9353591328441373, "grad_norm": 1.0160110938635236, "learning_rate": 4.709265822309298e-06, "loss": 0.4297, "step": 15360 }, { "epoch": 0.9354200286210151, "grad_norm": 1.1292545442921278, "learning_rate": 4.7092284734132e-06, "loss": 0.4062, "step": 15361 }, { "epoch": 0.935480924397893, "grad_norm": 1.1399370820653394, "learning_rate": 4.709191122266386e-06, "loss": 0.3248, "step": 15362 }, { "epoch": 0.9355418201747708, "grad_norm": 1.0357134514817865, "learning_rate": 4.709153768868894e-06, "loss": 0.3913, "step": 15363 }, { "epoch": 0.9356027159516488, "grad_norm": 1.0626177344467198, "learning_rate": 4.709116413220763e-06, "loss": 0.3642, "step": 15364 }, { "epoch": 0.9356636117285266, "grad_norm": 0.9991812694263088, "learning_rate": 4.709079055322029e-06, "loss": 0.386, "step": 15365 }, { "epoch": 0.9357245075054045, "grad_norm": 0.9335147385063244, "learning_rate": 4.709041695172731e-06, "loss": 0.416, "step": 15366 }, { "epoch": 0.9357854032822823, "grad_norm": 1.0292846001707403, "learning_rate": 4.709004332772908e-06, "loss": 0.446, "step": 15367 }, { "epoch": 0.9358462990591603, "grad_norm": 1.083373513228388, "learning_rate": 4.708966968122596e-06, "loss": 0.3449, "step": 15368 }, { "epoch": 0.9359071948360381, "grad_norm": 0.9975078064912579, "learning_rate": 4.7089296012218354e-06, "loss": 0.4, "step": 15369 }, { "epoch": 0.935968090612916, "grad_norm": 0.9929162533078592, "learning_rate": 4.7088922320706634e-06, "loss": 0.4135, "step": 15370 }, { "epoch": 0.9360289863897938, "grad_norm": 1.0129343012988314, "learning_rate": 4.708854860669117e-06, "loss": 0.3697, "step": 15371 }, { "epoch": 0.9360898821666718, "grad_norm": 0.9803363544851896, "learning_rate": 4.708817487017236e-06, "loss": 0.3818, "step": 15372 }, { "epoch": 0.9361507779435496, "grad_norm": 1.0485761603954464, "learning_rate": 4.708780111115058e-06, "loss": 0.3908, "step": 15373 }, { "epoch": 0.9362116737204275, "grad_norm": 1.0875345740125373, "learning_rate": 4.70874273296262e-06, "loss": 0.4132, "step": 15374 }, { "epoch": 0.9362725694973053, "grad_norm": 1.0755469705986143, "learning_rate": 4.708705352559961e-06, "loss": 0.387, "step": 15375 }, { "epoch": 0.9363334652741833, "grad_norm": 0.9868405606295292, "learning_rate": 4.7086679699071194e-06, "loss": 0.3911, "step": 15376 }, { "epoch": 0.9363943610510611, "grad_norm": 1.004100375907398, "learning_rate": 4.708630585004132e-06, "loss": 0.5026, "step": 15377 }, { "epoch": 0.936455256827939, "grad_norm": 0.9281969355992117, "learning_rate": 4.708593197851038e-06, "loss": 0.4437, "step": 15378 }, { "epoch": 0.9365161526048169, "grad_norm": 1.057019724786819, "learning_rate": 4.708555808447875e-06, "loss": 0.3605, "step": 15379 }, { "epoch": 0.9365770483816948, "grad_norm": 0.9753102793059005, "learning_rate": 4.708518416794682e-06, "loss": 0.4009, "step": 15380 }, { "epoch": 0.9366379441585726, "grad_norm": 0.9650378163758415, "learning_rate": 4.708481022891495e-06, "loss": 0.477, "step": 15381 }, { "epoch": 0.9366988399354504, "grad_norm": 0.9413133712292188, "learning_rate": 4.7084436267383546e-06, "loss": 0.4846, "step": 15382 }, { "epoch": 0.9367597357123284, "grad_norm": 0.9926517949724434, "learning_rate": 4.708406228335298e-06, "loss": 0.4582, "step": 15383 }, { "epoch": 0.9368206314892062, "grad_norm": 1.0000354722187548, "learning_rate": 4.708368827682362e-06, "loss": 0.4916, "step": 15384 }, { "epoch": 0.9368815272660841, "grad_norm": 1.0291018933231741, "learning_rate": 4.708331424779586e-06, "loss": 0.4383, "step": 15385 }, { "epoch": 0.9369424230429619, "grad_norm": 1.0690088228736023, "learning_rate": 4.708294019627008e-06, "loss": 0.3779, "step": 15386 }, { "epoch": 0.9370033188198399, "grad_norm": 1.1062111064474334, "learning_rate": 4.708256612224666e-06, "loss": 0.468, "step": 15387 }, { "epoch": 0.9370642145967177, "grad_norm": 0.9734583901359855, "learning_rate": 4.708219202572598e-06, "loss": 0.4288, "step": 15388 }, { "epoch": 0.9371251103735956, "grad_norm": 0.9519190816355115, "learning_rate": 4.708181790670843e-06, "loss": 0.4409, "step": 15389 }, { "epoch": 0.9371860061504734, "grad_norm": 0.9334279965095429, "learning_rate": 4.708144376519437e-06, "loss": 0.4137, "step": 15390 }, { "epoch": 0.9372469019273514, "grad_norm": 0.9894671386255539, "learning_rate": 4.70810696011842e-06, "loss": 0.4414, "step": 15391 }, { "epoch": 0.9373077977042292, "grad_norm": 0.9940425640993321, "learning_rate": 4.70806954146783e-06, "loss": 0.4628, "step": 15392 }, { "epoch": 0.9373686934811071, "grad_norm": 0.9262866856843776, "learning_rate": 4.7080321205677045e-06, "loss": 0.3902, "step": 15393 }, { "epoch": 0.9374295892579849, "grad_norm": 0.9095737643173865, "learning_rate": 4.707994697418081e-06, "loss": 0.3646, "step": 15394 }, { "epoch": 0.9374904850348629, "grad_norm": 1.0489703743559227, "learning_rate": 4.7079572720189995e-06, "loss": 0.4014, "step": 15395 }, { "epoch": 0.9375513808117407, "grad_norm": 0.9649591459002353, "learning_rate": 4.707919844370496e-06, "loss": 0.4541, "step": 15396 }, { "epoch": 0.9376122765886186, "grad_norm": 0.9833198326492133, "learning_rate": 4.70788241447261e-06, "loss": 0.35, "step": 15397 }, { "epoch": 0.9376731723654964, "grad_norm": 1.002200601116947, "learning_rate": 4.70784498232538e-06, "loss": 0.368, "step": 15398 }, { "epoch": 0.9377340681423744, "grad_norm": 0.991673582871368, "learning_rate": 4.707807547928842e-06, "loss": 0.4262, "step": 15399 }, { "epoch": 0.9377949639192522, "grad_norm": 0.9690099166527907, "learning_rate": 4.707770111283037e-06, "loss": 0.4943, "step": 15400 }, { "epoch": 0.9378558596961301, "grad_norm": 1.0035732464099465, "learning_rate": 4.707732672388001e-06, "loss": 0.412, "step": 15401 }, { "epoch": 0.9379167554730079, "grad_norm": 1.0727260068310487, "learning_rate": 4.707695231243773e-06, "loss": 0.4243, "step": 15402 }, { "epoch": 0.9379776512498859, "grad_norm": 0.9798693439422196, "learning_rate": 4.707657787850391e-06, "loss": 0.4178, "step": 15403 }, { "epoch": 0.9380385470267637, "grad_norm": 0.9896513579972929, "learning_rate": 4.707620342207893e-06, "loss": 0.3673, "step": 15404 }, { "epoch": 0.9380994428036415, "grad_norm": 1.1163411994364854, "learning_rate": 4.707582894316317e-06, "loss": 0.3713, "step": 15405 }, { "epoch": 0.9381603385805194, "grad_norm": 1.036964118942981, "learning_rate": 4.707545444175702e-06, "loss": 0.4135, "step": 15406 }, { "epoch": 0.9382212343573973, "grad_norm": 1.0144037172674887, "learning_rate": 4.707507991786086e-06, "loss": 0.4353, "step": 15407 }, { "epoch": 0.9382821301342752, "grad_norm": 0.9698343139216431, "learning_rate": 4.707470537147506e-06, "loss": 0.3874, "step": 15408 }, { "epoch": 0.938343025911153, "grad_norm": 1.0531446778159639, "learning_rate": 4.707433080260001e-06, "loss": 0.419, "step": 15409 }, { "epoch": 0.9384039216880309, "grad_norm": 0.9801890181185203, "learning_rate": 4.707395621123609e-06, "loss": 0.3509, "step": 15410 }, { "epoch": 0.9384648174649088, "grad_norm": 1.0819305814255409, "learning_rate": 4.707358159738369e-06, "loss": 0.3808, "step": 15411 }, { "epoch": 0.9385257132417867, "grad_norm": 0.9833419199172165, "learning_rate": 4.7073206961043175e-06, "loss": 0.4272, "step": 15412 }, { "epoch": 0.9385866090186645, "grad_norm": 1.0956498161284445, "learning_rate": 4.707283230221494e-06, "loss": 0.3855, "step": 15413 }, { "epoch": 0.9386475047955424, "grad_norm": 1.0076309617066277, "learning_rate": 4.707245762089936e-06, "loss": 0.4553, "step": 15414 }, { "epoch": 0.9387084005724203, "grad_norm": 1.1898515238467002, "learning_rate": 4.7072082917096824e-06, "loss": 0.385, "step": 15415 }, { "epoch": 0.9387692963492982, "grad_norm": 0.9985005580628903, "learning_rate": 4.70717081908077e-06, "loss": 0.4434, "step": 15416 }, { "epoch": 0.938830192126176, "grad_norm": 1.0832907495622286, "learning_rate": 4.707133344203239e-06, "loss": 0.3889, "step": 15417 }, { "epoch": 0.938891087903054, "grad_norm": 1.0784188094974478, "learning_rate": 4.7070958670771255e-06, "loss": 0.4338, "step": 15418 }, { "epoch": 0.9389519836799318, "grad_norm": 1.013991599387046, "learning_rate": 4.707058387702469e-06, "loss": 0.3869, "step": 15419 }, { "epoch": 0.9390128794568097, "grad_norm": 0.9943142669604172, "learning_rate": 4.707020906079307e-06, "loss": 0.3589, "step": 15420 }, { "epoch": 0.9390737752336875, "grad_norm": 1.0359601259677766, "learning_rate": 4.706983422207678e-06, "loss": 0.4455, "step": 15421 }, { "epoch": 0.9391346710105655, "grad_norm": 0.9751422206267297, "learning_rate": 4.706945936087621e-06, "loss": 0.4487, "step": 15422 }, { "epoch": 0.9391955667874433, "grad_norm": 1.0188870643790606, "learning_rate": 4.706908447719173e-06, "loss": 0.4213, "step": 15423 }, { "epoch": 0.9392564625643212, "grad_norm": 1.0027445823734804, "learning_rate": 4.706870957102372e-06, "loss": 0.39, "step": 15424 }, { "epoch": 0.939317358341199, "grad_norm": 0.9476095492936313, "learning_rate": 4.706833464237257e-06, "loss": 0.3986, "step": 15425 }, { "epoch": 0.939378254118077, "grad_norm": 0.9972864680850855, "learning_rate": 4.7067959691238655e-06, "loss": 0.4462, "step": 15426 }, { "epoch": 0.9394391498949548, "grad_norm": 0.982892387130019, "learning_rate": 4.7067584717622375e-06, "loss": 0.3548, "step": 15427 }, { "epoch": 0.9395000456718327, "grad_norm": 0.9577367698644874, "learning_rate": 4.706720972152409e-06, "loss": 0.4447, "step": 15428 }, { "epoch": 0.9395609414487105, "grad_norm": 1.0706986163155414, "learning_rate": 4.706683470294418e-06, "loss": 0.3979, "step": 15429 }, { "epoch": 0.9396218372255885, "grad_norm": 0.9980868975606432, "learning_rate": 4.706645966188306e-06, "loss": 0.4199, "step": 15430 }, { "epoch": 0.9396827330024663, "grad_norm": 1.004751083663695, "learning_rate": 4.7066084598341075e-06, "loss": 0.4492, "step": 15431 }, { "epoch": 0.9397436287793441, "grad_norm": 1.0914214695600888, "learning_rate": 4.706570951231863e-06, "loss": 0.3859, "step": 15432 }, { "epoch": 0.939804524556222, "grad_norm": 1.0306524463946876, "learning_rate": 4.706533440381609e-06, "loss": 0.4548, "step": 15433 }, { "epoch": 0.9398654203330999, "grad_norm": 0.9436146537821517, "learning_rate": 4.706495927283384e-06, "loss": 0.395, "step": 15434 }, { "epoch": 0.9399263161099778, "grad_norm": 1.0757603017785757, "learning_rate": 4.706458411937228e-06, "loss": 0.3696, "step": 15435 }, { "epoch": 0.9399872118868556, "grad_norm": 1.1749742418013684, "learning_rate": 4.706420894343178e-06, "loss": 0.4026, "step": 15436 }, { "epoch": 0.9400481076637335, "grad_norm": 1.0586749027210276, "learning_rate": 4.706383374501272e-06, "loss": 0.4173, "step": 15437 }, { "epoch": 0.9401090034406114, "grad_norm": 1.1059825757136577, "learning_rate": 4.706345852411549e-06, "loss": 0.3934, "step": 15438 }, { "epoch": 0.9401698992174893, "grad_norm": 0.9686464012829685, "learning_rate": 4.706308328074047e-06, "loss": 0.389, "step": 15439 }, { "epoch": 0.9402307949943671, "grad_norm": 1.0101059444837466, "learning_rate": 4.706270801488803e-06, "loss": 0.4158, "step": 15440 }, { "epoch": 0.940291690771245, "grad_norm": 1.0451306094203274, "learning_rate": 4.706233272655855e-06, "loss": 0.4256, "step": 15441 }, { "epoch": 0.9403525865481229, "grad_norm": 1.0718110351783363, "learning_rate": 4.706195741575244e-06, "loss": 0.3407, "step": 15442 }, { "epoch": 0.9404134823250008, "grad_norm": 1.0506453793085544, "learning_rate": 4.706158208247007e-06, "loss": 0.3728, "step": 15443 }, { "epoch": 0.9404743781018786, "grad_norm": 0.9568823663931211, "learning_rate": 4.7061206726711814e-06, "loss": 0.4753, "step": 15444 }, { "epoch": 0.9405352738787565, "grad_norm": 1.0121622131639547, "learning_rate": 4.706083134847806e-06, "loss": 0.4141, "step": 15445 }, { "epoch": 0.9405961696556344, "grad_norm": 0.9803874687190405, "learning_rate": 4.7060455947769185e-06, "loss": 0.4903, "step": 15446 }, { "epoch": 0.9406570654325123, "grad_norm": 0.8825482690238775, "learning_rate": 4.7060080524585586e-06, "loss": 0.449, "step": 15447 }, { "epoch": 0.9407179612093901, "grad_norm": 1.0555519003885598, "learning_rate": 4.705970507892763e-06, "loss": 0.4681, "step": 15448 }, { "epoch": 0.940778856986268, "grad_norm": 0.9381579884549932, "learning_rate": 4.70593296107957e-06, "loss": 0.3859, "step": 15449 }, { "epoch": 0.9408397527631459, "grad_norm": 0.9803559139432957, "learning_rate": 4.7058954120190184e-06, "loss": 0.3943, "step": 15450 }, { "epoch": 0.9409006485400238, "grad_norm": 0.9735996436163746, "learning_rate": 4.705857860711147e-06, "loss": 0.3875, "step": 15451 }, { "epoch": 0.9409615443169016, "grad_norm": 1.025893438755108, "learning_rate": 4.705820307155993e-06, "loss": 0.3422, "step": 15452 }, { "epoch": 0.9410224400937794, "grad_norm": 1.0418251346725584, "learning_rate": 4.705782751353596e-06, "loss": 0.3801, "step": 15453 }, { "epoch": 0.9410833358706574, "grad_norm": 1.0443371808776407, "learning_rate": 4.705745193303993e-06, "loss": 0.3691, "step": 15454 }, { "epoch": 0.9411442316475352, "grad_norm": 0.9878040338624016, "learning_rate": 4.705707633007222e-06, "loss": 0.3932, "step": 15455 }, { "epoch": 0.9412051274244131, "grad_norm": 1.053212935890553, "learning_rate": 4.7056700704633225e-06, "loss": 0.4683, "step": 15456 }, { "epoch": 0.9412660232012909, "grad_norm": 1.0280693210369944, "learning_rate": 4.7056325056723315e-06, "loss": 0.5544, "step": 15457 }, { "epoch": 0.9413269189781689, "grad_norm": 1.0152260774587605, "learning_rate": 4.705594938634289e-06, "loss": 0.4972, "step": 15458 }, { "epoch": 0.9413878147550467, "grad_norm": 0.9504524877046254, "learning_rate": 4.705557369349232e-06, "loss": 0.3448, "step": 15459 }, { "epoch": 0.9414487105319246, "grad_norm": 0.9573158191116423, "learning_rate": 4.7055197978171986e-06, "loss": 0.4385, "step": 15460 }, { "epoch": 0.9415096063088025, "grad_norm": 1.0617285157293435, "learning_rate": 4.705482224038228e-06, "loss": 0.353, "step": 15461 }, { "epoch": 0.9415705020856804, "grad_norm": 1.1353881274444122, "learning_rate": 4.705444648012357e-06, "loss": 0.4543, "step": 15462 }, { "epoch": 0.9416313978625582, "grad_norm": 1.0185604847971605, "learning_rate": 4.705407069739626e-06, "loss": 0.3343, "step": 15463 }, { "epoch": 0.9416922936394361, "grad_norm": 0.983725813353555, "learning_rate": 4.7053694892200715e-06, "loss": 0.4131, "step": 15464 }, { "epoch": 0.941753189416314, "grad_norm": 0.9789872995830201, "learning_rate": 4.705331906453733e-06, "loss": 0.3075, "step": 15465 }, { "epoch": 0.9418140851931919, "grad_norm": 1.0066141942178977, "learning_rate": 4.705294321440647e-06, "loss": 0.5265, "step": 15466 }, { "epoch": 0.9418749809700697, "grad_norm": 0.8958397020651891, "learning_rate": 4.705256734180854e-06, "loss": 0.4222, "step": 15467 }, { "epoch": 0.9419358767469476, "grad_norm": 1.0611364330035165, "learning_rate": 4.705219144674391e-06, "loss": 0.3252, "step": 15468 }, { "epoch": 0.9419967725238255, "grad_norm": 0.8995536221709522, "learning_rate": 4.705181552921296e-06, "loss": 0.4477, "step": 15469 }, { "epoch": 0.9420576683007034, "grad_norm": 0.9901383692017169, "learning_rate": 4.705143958921609e-06, "loss": 0.3826, "step": 15470 }, { "epoch": 0.9421185640775812, "grad_norm": 0.9538495550098203, "learning_rate": 4.7051063626753665e-06, "loss": 0.4115, "step": 15471 }, { "epoch": 0.9421794598544591, "grad_norm": 1.0280392069324404, "learning_rate": 4.7050687641826074e-06, "loss": 0.4345, "step": 15472 }, { "epoch": 0.942240355631337, "grad_norm": 1.0421552741944342, "learning_rate": 4.705031163443371e-06, "loss": 0.3917, "step": 15473 }, { "epoch": 0.9423012514082149, "grad_norm": 1.0495859264590621, "learning_rate": 4.704993560457694e-06, "loss": 0.3983, "step": 15474 }, { "epoch": 0.9423621471850927, "grad_norm": 1.0775617544844562, "learning_rate": 4.704955955225615e-06, "loss": 0.3752, "step": 15475 }, { "epoch": 0.9424230429619705, "grad_norm": 0.9681308880517018, "learning_rate": 4.704918347747173e-06, "loss": 0.3985, "step": 15476 }, { "epoch": 0.9424839387388485, "grad_norm": 1.053379729286246, "learning_rate": 4.7048807380224056e-06, "loss": 0.4136, "step": 15477 }, { "epoch": 0.9425448345157263, "grad_norm": 1.1159000292099013, "learning_rate": 4.704843126051352e-06, "loss": 0.3272, "step": 15478 }, { "epoch": 0.9426057302926042, "grad_norm": 1.0198081540134922, "learning_rate": 4.704805511834051e-06, "loss": 0.4213, "step": 15479 }, { "epoch": 0.942666626069482, "grad_norm": 0.9931701210255208, "learning_rate": 4.704767895370539e-06, "loss": 0.4305, "step": 15480 }, { "epoch": 0.94272752184636, "grad_norm": 1.0817863370849214, "learning_rate": 4.704730276660855e-06, "loss": 0.4032, "step": 15481 }, { "epoch": 0.9427884176232378, "grad_norm": 0.964530428220946, "learning_rate": 4.704692655705039e-06, "loss": 0.427, "step": 15482 }, { "epoch": 0.9428493134001157, "grad_norm": 0.984671765121385, "learning_rate": 4.704655032503127e-06, "loss": 0.4675, "step": 15483 }, { "epoch": 0.9429102091769935, "grad_norm": 1.0113758355977183, "learning_rate": 4.704617407055158e-06, "loss": 0.4317, "step": 15484 }, { "epoch": 0.9429711049538715, "grad_norm": 1.0154902311360607, "learning_rate": 4.704579779361172e-06, "loss": 0.4308, "step": 15485 }, { "epoch": 0.9430320007307493, "grad_norm": 0.9676523024277223, "learning_rate": 4.704542149421204e-06, "loss": 0.3673, "step": 15486 }, { "epoch": 0.9430928965076272, "grad_norm": 0.963839826224812, "learning_rate": 4.704504517235295e-06, "loss": 0.4578, "step": 15487 }, { "epoch": 0.943153792284505, "grad_norm": 1.0241409365376615, "learning_rate": 4.704466882803483e-06, "loss": 0.3773, "step": 15488 }, { "epoch": 0.943214688061383, "grad_norm": 0.9834144587993434, "learning_rate": 4.704429246125805e-06, "loss": 0.3786, "step": 15489 }, { "epoch": 0.9432755838382608, "grad_norm": 0.9549649941943512, "learning_rate": 4.704391607202302e-06, "loss": 0.4068, "step": 15490 }, { "epoch": 0.9433364796151387, "grad_norm": 1.0000502523284394, "learning_rate": 4.704353966033009e-06, "loss": 0.393, "step": 15491 }, { "epoch": 0.9433973753920165, "grad_norm": 0.8896880253982705, "learning_rate": 4.704316322617968e-06, "loss": 0.4723, "step": 15492 }, { "epoch": 0.9434582711688945, "grad_norm": 1.0403588150069067, "learning_rate": 4.7042786769572135e-06, "loss": 0.4141, "step": 15493 }, { "epoch": 0.9435191669457723, "grad_norm": 1.017927171452685, "learning_rate": 4.704241029050787e-06, "loss": 0.4424, "step": 15494 }, { "epoch": 0.9435800627226502, "grad_norm": 0.9433672604878901, "learning_rate": 4.704203378898724e-06, "loss": 0.4084, "step": 15495 }, { "epoch": 0.943640958499528, "grad_norm": 1.0115267311074643, "learning_rate": 4.704165726501065e-06, "loss": 0.4351, "step": 15496 }, { "epoch": 0.943701854276406, "grad_norm": 0.9652277326374739, "learning_rate": 4.704128071857849e-06, "loss": 0.5049, "step": 15497 }, { "epoch": 0.9437627500532838, "grad_norm": 1.0090132040059794, "learning_rate": 4.704090414969112e-06, "loss": 0.3892, "step": 15498 }, { "epoch": 0.9438236458301617, "grad_norm": 1.022057269297129, "learning_rate": 4.7040527558348935e-06, "loss": 0.3868, "step": 15499 }, { "epoch": 0.9438845416070396, "grad_norm": 0.9453988691411991, "learning_rate": 4.704015094455233e-06, "loss": 0.3814, "step": 15500 }, { "epoch": 0.9439454373839175, "grad_norm": 0.9912474881451627, "learning_rate": 4.703977430830167e-06, "loss": 0.3902, "step": 15501 }, { "epoch": 0.9440063331607953, "grad_norm": 0.9868863843935762, "learning_rate": 4.703939764959734e-06, "loss": 0.3744, "step": 15502 }, { "epoch": 0.9440672289376731, "grad_norm": 0.9686046865181784, "learning_rate": 4.703902096843973e-06, "loss": 0.4547, "step": 15503 }, { "epoch": 0.9441281247145511, "grad_norm": 0.9790956268013826, "learning_rate": 4.7038644264829234e-06, "loss": 0.4177, "step": 15504 }, { "epoch": 0.9441890204914289, "grad_norm": 0.9908551099169395, "learning_rate": 4.703826753876622e-06, "loss": 0.3732, "step": 15505 }, { "epoch": 0.9442499162683068, "grad_norm": 1.0087354183424682, "learning_rate": 4.703789079025108e-06, "loss": 0.4203, "step": 15506 }, { "epoch": 0.9443108120451846, "grad_norm": 1.0230914600605712, "learning_rate": 4.703751401928419e-06, "loss": 0.3713, "step": 15507 }, { "epoch": 0.9443717078220626, "grad_norm": 0.9827777209166896, "learning_rate": 4.703713722586594e-06, "loss": 0.4195, "step": 15508 }, { "epoch": 0.9444326035989404, "grad_norm": 0.9812061862075048, "learning_rate": 4.7036760409996715e-06, "loss": 0.4825, "step": 15509 }, { "epoch": 0.9444934993758183, "grad_norm": 1.0771028553801196, "learning_rate": 4.70363835716769e-06, "loss": 0.3808, "step": 15510 }, { "epoch": 0.9445543951526961, "grad_norm": 0.9776431672635214, "learning_rate": 4.7036006710906865e-06, "loss": 0.3875, "step": 15511 }, { "epoch": 0.9446152909295741, "grad_norm": 0.9270154633177324, "learning_rate": 4.703562982768701e-06, "loss": 0.3912, "step": 15512 }, { "epoch": 0.9446761867064519, "grad_norm": 0.9845279245165681, "learning_rate": 4.7035252922017715e-06, "loss": 0.4534, "step": 15513 }, { "epoch": 0.9447370824833298, "grad_norm": 1.0050093146162276, "learning_rate": 4.703487599389936e-06, "loss": 0.4482, "step": 15514 }, { "epoch": 0.9447979782602076, "grad_norm": 1.00499328292079, "learning_rate": 4.703449904333234e-06, "loss": 0.4135, "step": 15515 }, { "epoch": 0.9448588740370856, "grad_norm": 0.9589489987990313, "learning_rate": 4.703412207031702e-06, "loss": 0.3949, "step": 15516 }, { "epoch": 0.9449197698139634, "grad_norm": 0.9534089456879171, "learning_rate": 4.70337450748538e-06, "loss": 0.3966, "step": 15517 }, { "epoch": 0.9449806655908413, "grad_norm": 1.0037893703416099, "learning_rate": 4.703336805694306e-06, "loss": 0.3796, "step": 15518 }, { "epoch": 0.9450415613677191, "grad_norm": 1.0028872534944488, "learning_rate": 4.703299101658518e-06, "loss": 0.3991, "step": 15519 }, { "epoch": 0.9451024571445971, "grad_norm": 1.0125270770840555, "learning_rate": 4.703261395378054e-06, "loss": 0.3389, "step": 15520 }, { "epoch": 0.9451633529214749, "grad_norm": 1.097013680088937, "learning_rate": 4.703223686852954e-06, "loss": 0.3996, "step": 15521 }, { "epoch": 0.9452242486983528, "grad_norm": 1.0409087804217343, "learning_rate": 4.703185976083256e-06, "loss": 0.3979, "step": 15522 }, { "epoch": 0.9452851444752306, "grad_norm": 0.9794851800186709, "learning_rate": 4.703148263068996e-06, "loss": 0.4396, "step": 15523 }, { "epoch": 0.9453460402521086, "grad_norm": 0.9970367824669337, "learning_rate": 4.703110547810216e-06, "loss": 0.4164, "step": 15524 }, { "epoch": 0.9454069360289864, "grad_norm": 0.9625958970517955, "learning_rate": 4.703072830306952e-06, "loss": 0.4475, "step": 15525 }, { "epoch": 0.9454678318058642, "grad_norm": 1.0381381996197845, "learning_rate": 4.703035110559244e-06, "loss": 0.3717, "step": 15526 }, { "epoch": 0.9455287275827421, "grad_norm": 1.0640811935707537, "learning_rate": 4.702997388567128e-06, "loss": 0.4086, "step": 15527 }, { "epoch": 0.94558962335962, "grad_norm": 0.9275341907682377, "learning_rate": 4.702959664330646e-06, "loss": 0.4042, "step": 15528 }, { "epoch": 0.9456505191364979, "grad_norm": 0.9550801609846576, "learning_rate": 4.702921937849834e-06, "loss": 0.4629, "step": 15529 }, { "epoch": 0.9457114149133757, "grad_norm": 1.006225767113423, "learning_rate": 4.702884209124731e-06, "loss": 0.3828, "step": 15530 }, { "epoch": 0.9457723106902536, "grad_norm": 0.9430286416318324, "learning_rate": 4.702846478155374e-06, "loss": 0.4181, "step": 15531 }, { "epoch": 0.9458332064671315, "grad_norm": 0.9980446444398846, "learning_rate": 4.7028087449418046e-06, "loss": 0.4225, "step": 15532 }, { "epoch": 0.9458941022440094, "grad_norm": 0.9946490244858535, "learning_rate": 4.7027710094840584e-06, "loss": 0.3862, "step": 15533 }, { "epoch": 0.9459549980208872, "grad_norm": 0.9381299907772478, "learning_rate": 4.702733271782175e-06, "loss": 0.4871, "step": 15534 }, { "epoch": 0.9460158937977651, "grad_norm": 1.0084412346030538, "learning_rate": 4.702695531836193e-06, "loss": 0.4084, "step": 15535 }, { "epoch": 0.946076789574643, "grad_norm": 1.0117873311187904, "learning_rate": 4.702657789646151e-06, "loss": 0.4251, "step": 15536 }, { "epoch": 0.9461376853515209, "grad_norm": 0.9068286438575237, "learning_rate": 4.702620045212086e-06, "loss": 0.4716, "step": 15537 }, { "epoch": 0.9461985811283987, "grad_norm": 1.0008707392188734, "learning_rate": 4.702582298534038e-06, "loss": 0.4308, "step": 15538 }, { "epoch": 0.9462594769052766, "grad_norm": 0.9717763255854986, "learning_rate": 4.702544549612045e-06, "loss": 0.3861, "step": 15539 }, { "epoch": 0.9463203726821545, "grad_norm": 0.9301736801922272, "learning_rate": 4.7025067984461456e-06, "loss": 0.4189, "step": 15540 }, { "epoch": 0.9463812684590324, "grad_norm": 0.9360194855762759, "learning_rate": 4.702469045036377e-06, "loss": 0.3905, "step": 15541 }, { "epoch": 0.9464421642359102, "grad_norm": 0.9000084338058915, "learning_rate": 4.7024312893827805e-06, "loss": 0.4236, "step": 15542 }, { "epoch": 0.9465030600127882, "grad_norm": 1.1392660123749154, "learning_rate": 4.702393531485392e-06, "loss": 0.4223, "step": 15543 }, { "epoch": 0.946563955789666, "grad_norm": 1.103359482048946, "learning_rate": 4.70235577134425e-06, "loss": 0.3603, "step": 15544 }, { "epoch": 0.9466248515665439, "grad_norm": 1.0622613968886416, "learning_rate": 4.702318008959394e-06, "loss": 0.391, "step": 15545 }, { "epoch": 0.9466857473434217, "grad_norm": 0.9597630645765111, "learning_rate": 4.702280244330863e-06, "loss": 0.4304, "step": 15546 }, { "epoch": 0.9467466431202997, "grad_norm": 0.958921110870717, "learning_rate": 4.702242477458694e-06, "loss": 0.4371, "step": 15547 }, { "epoch": 0.9468075388971775, "grad_norm": 1.0352886015265275, "learning_rate": 4.7022047083429266e-06, "loss": 0.3619, "step": 15548 }, { "epoch": 0.9468684346740553, "grad_norm": 0.9953438716144426, "learning_rate": 4.702166936983598e-06, "loss": 0.3553, "step": 15549 }, { "epoch": 0.9469293304509332, "grad_norm": 0.9771117024786976, "learning_rate": 4.702129163380748e-06, "loss": 0.3571, "step": 15550 }, { "epoch": 0.9469902262278111, "grad_norm": 1.0057866569895668, "learning_rate": 4.7020913875344156e-06, "loss": 0.4241, "step": 15551 }, { "epoch": 0.947051122004689, "grad_norm": 1.0085581697878474, "learning_rate": 4.702053609444637e-06, "loss": 0.417, "step": 15552 }, { "epoch": 0.9471120177815668, "grad_norm": 1.003618368957257, "learning_rate": 4.702015829111452e-06, "loss": 0.3727, "step": 15553 }, { "epoch": 0.9471729135584447, "grad_norm": 0.9286175089969583, "learning_rate": 4.7019780465349e-06, "loss": 0.4322, "step": 15554 }, { "epoch": 0.9472338093353226, "grad_norm": 0.9262429737965463, "learning_rate": 4.701940261715018e-06, "loss": 0.4472, "step": 15555 }, { "epoch": 0.9472947051122005, "grad_norm": 1.054134514681312, "learning_rate": 4.7019024746518445e-06, "loss": 0.3858, "step": 15556 }, { "epoch": 0.9473556008890783, "grad_norm": 0.9808165463772412, "learning_rate": 4.7018646853454196e-06, "loss": 0.4414, "step": 15557 }, { "epoch": 0.9474164966659562, "grad_norm": 0.9920453752897104, "learning_rate": 4.70182689379578e-06, "loss": 0.3799, "step": 15558 }, { "epoch": 0.9474773924428341, "grad_norm": 1.033677841776666, "learning_rate": 4.701789100002965e-06, "loss": 0.448, "step": 15559 }, { "epoch": 0.947538288219712, "grad_norm": 0.9571897925106787, "learning_rate": 4.701751303967013e-06, "loss": 0.4365, "step": 15560 }, { "epoch": 0.9475991839965898, "grad_norm": 0.9624614550476454, "learning_rate": 4.701713505687962e-06, "loss": 0.3789, "step": 15561 }, { "epoch": 0.9476600797734677, "grad_norm": 1.1019472503314045, "learning_rate": 4.701675705165852e-06, "loss": 0.378, "step": 15562 }, { "epoch": 0.9477209755503456, "grad_norm": 0.9782442566258991, "learning_rate": 4.701637902400721e-06, "loss": 0.4186, "step": 15563 }, { "epoch": 0.9477818713272235, "grad_norm": 0.9701358711659095, "learning_rate": 4.701600097392606e-06, "loss": 0.3993, "step": 15564 }, { "epoch": 0.9478427671041013, "grad_norm": 1.0368015350985231, "learning_rate": 4.701562290141547e-06, "loss": 0.3553, "step": 15565 }, { "epoch": 0.9479036628809792, "grad_norm": 1.0048455431293983, "learning_rate": 4.701524480647583e-06, "loss": 0.4327, "step": 15566 }, { "epoch": 0.9479645586578571, "grad_norm": 1.073030971330014, "learning_rate": 4.7014866689107494e-06, "loss": 0.3084, "step": 15567 }, { "epoch": 0.948025454434735, "grad_norm": 0.9718652299981534, "learning_rate": 4.701448854931089e-06, "loss": 0.4569, "step": 15568 }, { "epoch": 0.9480863502116128, "grad_norm": 0.9699419670237773, "learning_rate": 4.701411038708638e-06, "loss": 0.5, "step": 15569 }, { "epoch": 0.9481472459884907, "grad_norm": 1.0017650325808192, "learning_rate": 4.701373220243433e-06, "loss": 0.3473, "step": 15570 }, { "epoch": 0.9482081417653686, "grad_norm": 0.9622983217928486, "learning_rate": 4.7013353995355175e-06, "loss": 0.4703, "step": 15571 }, { "epoch": 0.9482690375422465, "grad_norm": 0.9643408327616423, "learning_rate": 4.701297576584926e-06, "loss": 0.4308, "step": 15572 }, { "epoch": 0.9483299333191243, "grad_norm": 0.9831214075831657, "learning_rate": 4.701259751391699e-06, "loss": 0.4181, "step": 15573 }, { "epoch": 0.9483908290960021, "grad_norm": 0.982766548026788, "learning_rate": 4.701221923955873e-06, "loss": 0.4036, "step": 15574 }, { "epoch": 0.9484517248728801, "grad_norm": 1.047682802211518, "learning_rate": 4.701184094277489e-06, "loss": 0.3769, "step": 15575 }, { "epoch": 0.9485126206497579, "grad_norm": 0.9454077039856068, "learning_rate": 4.701146262356584e-06, "loss": 0.4324, "step": 15576 }, { "epoch": 0.9485735164266358, "grad_norm": 0.9897003118435133, "learning_rate": 4.7011084281931975e-06, "loss": 0.3986, "step": 15577 }, { "epoch": 0.9486344122035136, "grad_norm": 1.0071136090647832, "learning_rate": 4.701070591787367e-06, "loss": 0.4565, "step": 15578 }, { "epoch": 0.9486953079803916, "grad_norm": 1.1160552920516866, "learning_rate": 4.701032753139132e-06, "loss": 0.3673, "step": 15579 }, { "epoch": 0.9487562037572694, "grad_norm": 0.9614571654640519, "learning_rate": 4.70099491224853e-06, "loss": 0.4172, "step": 15580 }, { "epoch": 0.9488170995341473, "grad_norm": 0.9389734559780505, "learning_rate": 4.7009570691156e-06, "loss": 0.4182, "step": 15581 }, { "epoch": 0.9488779953110252, "grad_norm": 0.9801031918319099, "learning_rate": 4.7009192237403815e-06, "loss": 0.3537, "step": 15582 }, { "epoch": 0.9489388910879031, "grad_norm": 0.9927674461791578, "learning_rate": 4.700881376122912e-06, "loss": 0.3642, "step": 15583 }, { "epoch": 0.9489997868647809, "grad_norm": 1.0603558023592061, "learning_rate": 4.7008435262632295e-06, "loss": 0.371, "step": 15584 }, { "epoch": 0.9490606826416588, "grad_norm": 1.0000832341002155, "learning_rate": 4.700805674161374e-06, "loss": 0.3523, "step": 15585 }, { "epoch": 0.9491215784185367, "grad_norm": 1.029563552129999, "learning_rate": 4.700767819817384e-06, "loss": 0.3783, "step": 15586 }, { "epoch": 0.9491824741954146, "grad_norm": 1.0298563030525238, "learning_rate": 4.700729963231296e-06, "loss": 0.386, "step": 15587 }, { "epoch": 0.9492433699722924, "grad_norm": 1.0041373362089776, "learning_rate": 4.700692104403152e-06, "loss": 0.3868, "step": 15588 }, { "epoch": 0.9493042657491703, "grad_norm": 0.9739250121387054, "learning_rate": 4.700654243332987e-06, "loss": 0.4386, "step": 15589 }, { "epoch": 0.9493651615260482, "grad_norm": 1.0904530279375415, "learning_rate": 4.700616380020842e-06, "loss": 0.3843, "step": 15590 }, { "epoch": 0.9494260573029261, "grad_norm": 0.9085066632927996, "learning_rate": 4.700578514466755e-06, "loss": 0.4335, "step": 15591 }, { "epoch": 0.9494869530798039, "grad_norm": 0.9441157067701779, "learning_rate": 4.700540646670764e-06, "loss": 0.4185, "step": 15592 }, { "epoch": 0.9495478488566818, "grad_norm": 1.0970333619999755, "learning_rate": 4.700502776632907e-06, "loss": 0.3531, "step": 15593 }, { "epoch": 0.9496087446335597, "grad_norm": 0.935210551434011, "learning_rate": 4.7004649043532245e-06, "loss": 0.4607, "step": 15594 }, { "epoch": 0.9496696404104376, "grad_norm": 1.05370099774575, "learning_rate": 4.700427029831755e-06, "loss": 0.4068, "step": 15595 }, { "epoch": 0.9497305361873154, "grad_norm": 0.9358616154080452, "learning_rate": 4.700389153068535e-06, "loss": 0.4429, "step": 15596 }, { "epoch": 0.9497914319641932, "grad_norm": 0.972116111678017, "learning_rate": 4.7003512740636045e-06, "loss": 0.4047, "step": 15597 }, { "epoch": 0.9498523277410712, "grad_norm": 0.9729992175621054, "learning_rate": 4.700313392817002e-06, "loss": 0.4306, "step": 15598 }, { "epoch": 0.949913223517949, "grad_norm": 0.9622830333073816, "learning_rate": 4.700275509328765e-06, "loss": 0.4305, "step": 15599 }, { "epoch": 0.9499741192948269, "grad_norm": 1.0594056545045014, "learning_rate": 4.7002376235989346e-06, "loss": 0.4368, "step": 15600 }, { "epoch": 0.9500350150717047, "grad_norm": 1.0320311585674933, "learning_rate": 4.700199735627547e-06, "loss": 0.4044, "step": 15601 }, { "epoch": 0.9500959108485827, "grad_norm": 1.0231034530962981, "learning_rate": 4.700161845414641e-06, "loss": 0.407, "step": 15602 }, { "epoch": 0.9501568066254605, "grad_norm": 0.9865332180996641, "learning_rate": 4.700123952960257e-06, "loss": 0.453, "step": 15603 }, { "epoch": 0.9502177024023384, "grad_norm": 0.9146837472132715, "learning_rate": 4.700086058264433e-06, "loss": 0.423, "step": 15604 }, { "epoch": 0.9502785981792162, "grad_norm": 1.0267453601575915, "learning_rate": 4.700048161327206e-06, "loss": 0.387, "step": 15605 }, { "epoch": 0.9503394939560942, "grad_norm": 0.9841108459143909, "learning_rate": 4.700010262148615e-06, "loss": 0.4509, "step": 15606 }, { "epoch": 0.950400389732972, "grad_norm": 0.9408229407139752, "learning_rate": 4.6999723607287e-06, "loss": 0.4245, "step": 15607 }, { "epoch": 0.9504612855098499, "grad_norm": 0.9608200914118924, "learning_rate": 4.6999344570675e-06, "loss": 0.4315, "step": 15608 }, { "epoch": 0.9505221812867277, "grad_norm": 0.9445969036560958, "learning_rate": 4.6998965511650515e-06, "loss": 0.3887, "step": 15609 }, { "epoch": 0.9505830770636057, "grad_norm": 1.0595728581083617, "learning_rate": 4.699858643021394e-06, "loss": 0.333, "step": 15610 }, { "epoch": 0.9506439728404835, "grad_norm": 0.9939234564822147, "learning_rate": 4.699820732636566e-06, "loss": 0.3765, "step": 15611 }, { "epoch": 0.9507048686173614, "grad_norm": 1.0705116450905652, "learning_rate": 4.699782820010607e-06, "loss": 0.338, "step": 15612 }, { "epoch": 0.9507657643942392, "grad_norm": 1.0003480294093325, "learning_rate": 4.699744905143555e-06, "loss": 0.4418, "step": 15613 }, { "epoch": 0.9508266601711172, "grad_norm": 1.0568685289705162, "learning_rate": 4.699706988035449e-06, "loss": 0.3737, "step": 15614 }, { "epoch": 0.950887555947995, "grad_norm": 1.096500807320552, "learning_rate": 4.699669068686326e-06, "loss": 0.4092, "step": 15615 }, { "epoch": 0.9509484517248729, "grad_norm": 0.9564054042388743, "learning_rate": 4.699631147096227e-06, "loss": 0.4206, "step": 15616 }, { "epoch": 0.9510093475017507, "grad_norm": 1.0010995890412253, "learning_rate": 4.699593223265189e-06, "loss": 0.4624, "step": 15617 }, { "epoch": 0.9510702432786287, "grad_norm": 1.0438042764947113, "learning_rate": 4.699555297193251e-06, "loss": 0.4245, "step": 15618 }, { "epoch": 0.9511311390555065, "grad_norm": 0.9076763845606161, "learning_rate": 4.699517368880452e-06, "loss": 0.439, "step": 15619 }, { "epoch": 0.9511920348323843, "grad_norm": 1.0561007661479311, "learning_rate": 4.699479438326831e-06, "loss": 0.419, "step": 15620 }, { "epoch": 0.9512529306092622, "grad_norm": 0.9993092577332848, "learning_rate": 4.699441505532425e-06, "loss": 0.3617, "step": 15621 }, { "epoch": 0.9513138263861401, "grad_norm": 1.0316282002400445, "learning_rate": 4.699403570497275e-06, "loss": 0.3942, "step": 15622 }, { "epoch": 0.951374722163018, "grad_norm": 0.9547470334138156, "learning_rate": 4.699365633221417e-06, "loss": 0.4576, "step": 15623 }, { "epoch": 0.9514356179398958, "grad_norm": 1.0033035901256735, "learning_rate": 4.699327693704891e-06, "loss": 0.3912, "step": 15624 }, { "epoch": 0.9514965137167738, "grad_norm": 0.9618314977206117, "learning_rate": 4.699289751947737e-06, "loss": 0.4507, "step": 15625 }, { "epoch": 0.9515574094936516, "grad_norm": 0.9360067705291755, "learning_rate": 4.699251807949992e-06, "loss": 0.4177, "step": 15626 }, { "epoch": 0.9516183052705295, "grad_norm": 0.9470329934328992, "learning_rate": 4.699213861711694e-06, "loss": 0.3953, "step": 15627 }, { "epoch": 0.9516792010474073, "grad_norm": 1.0578593164466954, "learning_rate": 4.699175913232884e-06, "loss": 0.3703, "step": 15628 }, { "epoch": 0.9517400968242853, "grad_norm": 0.9667325648726263, "learning_rate": 4.699137962513598e-06, "loss": 0.4406, "step": 15629 }, { "epoch": 0.9518009926011631, "grad_norm": 1.0297335506645509, "learning_rate": 4.6991000095538765e-06, "loss": 0.3825, "step": 15630 }, { "epoch": 0.951861888378041, "grad_norm": 0.8882328097276048, "learning_rate": 4.699062054353758e-06, "loss": 0.5482, "step": 15631 }, { "epoch": 0.9519227841549188, "grad_norm": 0.9880238993904796, "learning_rate": 4.699024096913281e-06, "loss": 0.4386, "step": 15632 }, { "epoch": 0.9519836799317968, "grad_norm": 1.0108426241359965, "learning_rate": 4.698986137232483e-06, "loss": 0.3914, "step": 15633 }, { "epoch": 0.9520445757086746, "grad_norm": 1.0084512056491675, "learning_rate": 4.698948175311404e-06, "loss": 0.3483, "step": 15634 }, { "epoch": 0.9521054714855525, "grad_norm": 1.0649114302843115, "learning_rate": 4.698910211150083e-06, "loss": 0.3633, "step": 15635 }, { "epoch": 0.9521663672624303, "grad_norm": 0.9707179938013905, "learning_rate": 4.698872244748557e-06, "loss": 0.4085, "step": 15636 }, { "epoch": 0.9522272630393083, "grad_norm": 0.9187128535690054, "learning_rate": 4.698834276106866e-06, "loss": 0.3813, "step": 15637 }, { "epoch": 0.9522881588161861, "grad_norm": 0.9866485945689251, "learning_rate": 4.6987963052250485e-06, "loss": 0.3184, "step": 15638 }, { "epoch": 0.952349054593064, "grad_norm": 1.0096305902180323, "learning_rate": 4.698758332103143e-06, "loss": 0.463, "step": 15639 }, { "epoch": 0.9524099503699418, "grad_norm": 1.0081833504622464, "learning_rate": 4.698720356741188e-06, "loss": 0.4604, "step": 15640 }, { "epoch": 0.9524708461468198, "grad_norm": 1.0274082887562395, "learning_rate": 4.698682379139222e-06, "loss": 0.3712, "step": 15641 }, { "epoch": 0.9525317419236976, "grad_norm": 0.9536413002654134, "learning_rate": 4.698644399297285e-06, "loss": 0.4633, "step": 15642 }, { "epoch": 0.9525926377005755, "grad_norm": 1.055713221460871, "learning_rate": 4.698606417215414e-06, "loss": 0.4318, "step": 15643 }, { "epoch": 0.9526535334774533, "grad_norm": 0.9676201584213656, "learning_rate": 4.6985684328936495e-06, "loss": 0.4195, "step": 15644 }, { "epoch": 0.9527144292543313, "grad_norm": 0.9581430483000152, "learning_rate": 4.698530446332029e-06, "loss": 0.4506, "step": 15645 }, { "epoch": 0.9527753250312091, "grad_norm": 0.8441703406979063, "learning_rate": 4.698492457530592e-06, "loss": 0.493, "step": 15646 }, { "epoch": 0.9528362208080869, "grad_norm": 0.9872155670525917, "learning_rate": 4.698454466489375e-06, "loss": 0.4228, "step": 15647 }, { "epoch": 0.9528971165849648, "grad_norm": 1.0998063473599933, "learning_rate": 4.698416473208418e-06, "loss": 0.4464, "step": 15648 }, { "epoch": 0.9529580123618427, "grad_norm": 1.1636789484050951, "learning_rate": 4.6983784776877615e-06, "loss": 0.3339, "step": 15649 }, { "epoch": 0.9530189081387206, "grad_norm": 1.0443972391085898, "learning_rate": 4.698340479927442e-06, "loss": 0.413, "step": 15650 }, { "epoch": 0.9530798039155984, "grad_norm": 1.0061553404939647, "learning_rate": 4.698302479927499e-06, "loss": 0.4529, "step": 15651 }, { "epoch": 0.9531406996924763, "grad_norm": 1.0304961213897463, "learning_rate": 4.698264477687971e-06, "loss": 0.3688, "step": 15652 }, { "epoch": 0.9532015954693542, "grad_norm": 0.9927184132451342, "learning_rate": 4.698226473208898e-06, "loss": 0.4177, "step": 15653 }, { "epoch": 0.9532624912462321, "grad_norm": 1.1122417141057603, "learning_rate": 4.6981884664903165e-06, "loss": 0.4016, "step": 15654 }, { "epoch": 0.9533233870231099, "grad_norm": 0.9827812870725391, "learning_rate": 4.6981504575322665e-06, "loss": 0.3989, "step": 15655 }, { "epoch": 0.9533842827999878, "grad_norm": 0.9779013160711387, "learning_rate": 4.698112446334786e-06, "loss": 0.3823, "step": 15656 }, { "epoch": 0.9534451785768657, "grad_norm": 0.9939284680210345, "learning_rate": 4.698074432897915e-06, "loss": 0.4626, "step": 15657 }, { "epoch": 0.9535060743537436, "grad_norm": 0.9428418795383701, "learning_rate": 4.6980364172216906e-06, "loss": 0.4094, "step": 15658 }, { "epoch": 0.9535669701306214, "grad_norm": 0.9366624597355881, "learning_rate": 4.697998399306154e-06, "loss": 0.4511, "step": 15659 }, { "epoch": 0.9536278659074993, "grad_norm": 0.9488283265090997, "learning_rate": 4.69796037915134e-06, "loss": 0.3899, "step": 15660 }, { "epoch": 0.9536887616843772, "grad_norm": 0.9322018337235155, "learning_rate": 4.697922356757292e-06, "loss": 0.4215, "step": 15661 }, { "epoch": 0.9537496574612551, "grad_norm": 0.9614761424093335, "learning_rate": 4.697884332124045e-06, "loss": 0.4071, "step": 15662 }, { "epoch": 0.9538105532381329, "grad_norm": 1.0490139384676438, "learning_rate": 4.69784630525164e-06, "loss": 0.3741, "step": 15663 }, { "epoch": 0.9538714490150109, "grad_norm": 1.1262380351025154, "learning_rate": 4.697808276140114e-06, "loss": 0.3756, "step": 15664 }, { "epoch": 0.9539323447918887, "grad_norm": 0.9940857511159246, "learning_rate": 4.697770244789507e-06, "loss": 0.4129, "step": 15665 }, { "epoch": 0.9539932405687666, "grad_norm": 0.9509203069287632, "learning_rate": 4.697732211199857e-06, "loss": 0.414, "step": 15666 }, { "epoch": 0.9540541363456444, "grad_norm": 0.9571251262329182, "learning_rate": 4.697694175371203e-06, "loss": 0.4127, "step": 15667 }, { "epoch": 0.9541150321225224, "grad_norm": 0.9941580262164555, "learning_rate": 4.6976561373035855e-06, "loss": 0.4018, "step": 15668 }, { "epoch": 0.9541759278994002, "grad_norm": 0.9652510855139989, "learning_rate": 4.69761809699704e-06, "loss": 0.4502, "step": 15669 }, { "epoch": 0.954236823676278, "grad_norm": 1.0351935408929966, "learning_rate": 4.697580054451608e-06, "loss": 0.4543, "step": 15670 }, { "epoch": 0.9542977194531559, "grad_norm": 1.0337428398519832, "learning_rate": 4.697542009667326e-06, "loss": 0.3997, "step": 15671 }, { "epoch": 0.9543586152300338, "grad_norm": 1.0311331333647347, "learning_rate": 4.697503962644234e-06, "loss": 0.4019, "step": 15672 }, { "epoch": 0.9544195110069117, "grad_norm": 0.9876618357775833, "learning_rate": 4.697465913382372e-06, "loss": 0.4263, "step": 15673 }, { "epoch": 0.9544804067837895, "grad_norm": 1.0668656361842759, "learning_rate": 4.697427861881776e-06, "loss": 0.3964, "step": 15674 }, { "epoch": 0.9545413025606674, "grad_norm": 0.9661302350439626, "learning_rate": 4.697389808142487e-06, "loss": 0.4217, "step": 15675 }, { "epoch": 0.9546021983375453, "grad_norm": 0.9445438739345807, "learning_rate": 4.697351752164542e-06, "loss": 0.3844, "step": 15676 }, { "epoch": 0.9546630941144232, "grad_norm": 0.9757179008107623, "learning_rate": 4.697313693947981e-06, "loss": 0.3888, "step": 15677 }, { "epoch": 0.954723989891301, "grad_norm": 0.9226742254657161, "learning_rate": 4.697275633492843e-06, "loss": 0.3972, "step": 15678 }, { "epoch": 0.9547848856681789, "grad_norm": 0.9734147610591362, "learning_rate": 4.697237570799166e-06, "loss": 0.4511, "step": 15679 }, { "epoch": 0.9548457814450568, "grad_norm": 1.0641735174044935, "learning_rate": 4.697199505866989e-06, "loss": 0.4336, "step": 15680 }, { "epoch": 0.9549066772219347, "grad_norm": 1.0501629442377078, "learning_rate": 4.697161438696351e-06, "loss": 0.3718, "step": 15681 }, { "epoch": 0.9549675729988125, "grad_norm": 1.0728654500196944, "learning_rate": 4.6971233692872906e-06, "loss": 0.3724, "step": 15682 }, { "epoch": 0.9550284687756904, "grad_norm": 1.0287843260284186, "learning_rate": 4.697085297639846e-06, "loss": 0.3527, "step": 15683 }, { "epoch": 0.9550893645525683, "grad_norm": 0.920318539288463, "learning_rate": 4.6970472237540575e-06, "loss": 0.4426, "step": 15684 }, { "epoch": 0.9551502603294462, "grad_norm": 1.0436068112146328, "learning_rate": 4.6970091476299625e-06, "loss": 0.4313, "step": 15685 }, { "epoch": 0.955211156106324, "grad_norm": 1.0035648919887068, "learning_rate": 4.6969710692676e-06, "loss": 0.3472, "step": 15686 }, { "epoch": 0.9552720518832019, "grad_norm": 1.022543823180928, "learning_rate": 4.696932988667009e-06, "loss": 0.455, "step": 15687 }, { "epoch": 0.9553329476600798, "grad_norm": 1.0014310958468795, "learning_rate": 4.696894905828229e-06, "loss": 0.4361, "step": 15688 }, { "epoch": 0.9553938434369577, "grad_norm": 0.8835196887375717, "learning_rate": 4.696856820751298e-06, "loss": 0.4071, "step": 15689 }, { "epoch": 0.9554547392138355, "grad_norm": 0.9991264152803848, "learning_rate": 4.696818733436255e-06, "loss": 0.3512, "step": 15690 }, { "epoch": 0.9555156349907133, "grad_norm": 0.950264309661265, "learning_rate": 4.696780643883138e-06, "loss": 0.4053, "step": 15691 }, { "epoch": 0.9555765307675913, "grad_norm": 1.0961214159591304, "learning_rate": 4.696742552091987e-06, "loss": 0.3565, "step": 15692 }, { "epoch": 0.9556374265444691, "grad_norm": 1.0229000641314074, "learning_rate": 4.696704458062841e-06, "loss": 0.4018, "step": 15693 }, { "epoch": 0.955698322321347, "grad_norm": 0.9487103139787225, "learning_rate": 4.696666361795737e-06, "loss": 0.4455, "step": 15694 }, { "epoch": 0.9557592180982248, "grad_norm": 0.980013653190926, "learning_rate": 4.696628263290716e-06, "loss": 0.4011, "step": 15695 }, { "epoch": 0.9558201138751028, "grad_norm": 1.0590038209292132, "learning_rate": 4.696590162547816e-06, "loss": 0.3327, "step": 15696 }, { "epoch": 0.9558810096519806, "grad_norm": 0.9943445902530051, "learning_rate": 4.696552059567074e-06, "loss": 0.3615, "step": 15697 }, { "epoch": 0.9559419054288585, "grad_norm": 0.9510275502498828, "learning_rate": 4.6965139543485315e-06, "loss": 0.4413, "step": 15698 }, { "epoch": 0.9560028012057363, "grad_norm": 0.931344766675821, "learning_rate": 4.696475846892227e-06, "loss": 0.4272, "step": 15699 }, { "epoch": 0.9560636969826143, "grad_norm": 1.0117707245947316, "learning_rate": 4.696437737198198e-06, "loss": 0.382, "step": 15700 }, { "epoch": 0.9561245927594921, "grad_norm": 0.9994652970418629, "learning_rate": 4.696399625266484e-06, "loss": 0.3627, "step": 15701 }, { "epoch": 0.95618548853637, "grad_norm": 1.0849399233443984, "learning_rate": 4.696361511097123e-06, "loss": 0.3694, "step": 15702 }, { "epoch": 0.9562463843132478, "grad_norm": 0.9784022609908697, "learning_rate": 4.696323394690154e-06, "loss": 0.321, "step": 15703 }, { "epoch": 0.9563072800901258, "grad_norm": 0.9808396160518406, "learning_rate": 4.696285276045618e-06, "loss": 0.3738, "step": 15704 }, { "epoch": 0.9563681758670036, "grad_norm": 0.9516749225420321, "learning_rate": 4.696247155163551e-06, "loss": 0.436, "step": 15705 }, { "epoch": 0.9564290716438815, "grad_norm": 1.0557144609047018, "learning_rate": 4.696209032043994e-06, "loss": 0.4311, "step": 15706 }, { "epoch": 0.9564899674207594, "grad_norm": 1.0941021165659977, "learning_rate": 4.6961709066869845e-06, "loss": 0.4216, "step": 15707 }, { "epoch": 0.9565508631976373, "grad_norm": 1.0096800112509714, "learning_rate": 4.6961327790925615e-06, "loss": 0.4797, "step": 15708 }, { "epoch": 0.9566117589745151, "grad_norm": 1.1109377333248698, "learning_rate": 4.696094649260764e-06, "loss": 0.3423, "step": 15709 }, { "epoch": 0.956672654751393, "grad_norm": 0.9006866038500034, "learning_rate": 4.696056517191631e-06, "loss": 0.4028, "step": 15710 }, { "epoch": 0.9567335505282709, "grad_norm": 0.9715030210091836, "learning_rate": 4.696018382885202e-06, "loss": 0.4671, "step": 15711 }, { "epoch": 0.9567944463051488, "grad_norm": 0.9878196583003365, "learning_rate": 4.695980246341515e-06, "loss": 0.3596, "step": 15712 }, { "epoch": 0.9568553420820266, "grad_norm": 0.9935356112365478, "learning_rate": 4.6959421075606085e-06, "loss": 0.4056, "step": 15713 }, { "epoch": 0.9569162378589045, "grad_norm": 1.0650829431643776, "learning_rate": 4.6959039665425225e-06, "loss": 0.366, "step": 15714 }, { "epoch": 0.9569771336357824, "grad_norm": 0.9758756259886117, "learning_rate": 4.6958658232872945e-06, "loss": 0.446, "step": 15715 }, { "epoch": 0.9570380294126603, "grad_norm": 0.9911332809329517, "learning_rate": 4.6958276777949645e-06, "loss": 0.4407, "step": 15716 }, { "epoch": 0.9570989251895381, "grad_norm": 1.0191199367999921, "learning_rate": 4.69578953006557e-06, "loss": 0.4615, "step": 15717 }, { "epoch": 0.9571598209664159, "grad_norm": 1.1242652205769286, "learning_rate": 4.695751380099151e-06, "loss": 0.3732, "step": 15718 }, { "epoch": 0.9572207167432939, "grad_norm": 0.9482018426759021, "learning_rate": 4.695713227895747e-06, "loss": 0.4631, "step": 15719 }, { "epoch": 0.9572816125201717, "grad_norm": 1.0317406383652479, "learning_rate": 4.695675073455396e-06, "loss": 0.4376, "step": 15720 }, { "epoch": 0.9573425082970496, "grad_norm": 1.003845403773925, "learning_rate": 4.695636916778135e-06, "loss": 0.4352, "step": 15721 }, { "epoch": 0.9574034040739274, "grad_norm": 0.9598619311703361, "learning_rate": 4.6955987578640075e-06, "loss": 0.3887, "step": 15722 }, { "epoch": 0.9574642998508054, "grad_norm": 1.015105950691334, "learning_rate": 4.695560596713048e-06, "loss": 0.4564, "step": 15723 }, { "epoch": 0.9575251956276832, "grad_norm": 1.0679915118349994, "learning_rate": 4.695522433325297e-06, "loss": 0.3574, "step": 15724 }, { "epoch": 0.9575860914045611, "grad_norm": 0.944476804518845, "learning_rate": 4.6954842677007935e-06, "loss": 0.4857, "step": 15725 }, { "epoch": 0.9576469871814389, "grad_norm": 0.931241471242497, "learning_rate": 4.695446099839577e-06, "loss": 0.3716, "step": 15726 }, { "epoch": 0.9577078829583169, "grad_norm": 1.029529075046365, "learning_rate": 4.695407929741685e-06, "loss": 0.3455, "step": 15727 }, { "epoch": 0.9577687787351947, "grad_norm": 1.066290953725699, "learning_rate": 4.695369757407158e-06, "loss": 0.3227, "step": 15728 }, { "epoch": 0.9578296745120726, "grad_norm": 1.089816399601084, "learning_rate": 4.695331582836033e-06, "loss": 0.4285, "step": 15729 }, { "epoch": 0.9578905702889504, "grad_norm": 1.0422134865267052, "learning_rate": 4.695293406028349e-06, "loss": 0.403, "step": 15730 }, { "epoch": 0.9579514660658284, "grad_norm": 1.0062047463030406, "learning_rate": 4.695255226984147e-06, "loss": 0.4421, "step": 15731 }, { "epoch": 0.9580123618427062, "grad_norm": 0.9317882430977685, "learning_rate": 4.6952170457034645e-06, "loss": 0.4323, "step": 15732 }, { "epoch": 0.9580732576195841, "grad_norm": 1.2038097141102186, "learning_rate": 4.695178862186341e-06, "loss": 0.4225, "step": 15733 }, { "epoch": 0.9581341533964619, "grad_norm": 0.9224923525167112, "learning_rate": 4.695140676432813e-06, "loss": 0.4013, "step": 15734 }, { "epoch": 0.9581950491733399, "grad_norm": 1.028775555884671, "learning_rate": 4.695102488442923e-06, "loss": 0.4046, "step": 15735 }, { "epoch": 0.9582559449502177, "grad_norm": 1.046320514204825, "learning_rate": 4.695064298216708e-06, "loss": 0.3642, "step": 15736 }, { "epoch": 0.9583168407270956, "grad_norm": 0.9808866629086053, "learning_rate": 4.695026105754207e-06, "loss": 0.3893, "step": 15737 }, { "epoch": 0.9583777365039734, "grad_norm": 0.9470480043789162, "learning_rate": 4.6949879110554585e-06, "loss": 0.4219, "step": 15738 }, { "epoch": 0.9584386322808514, "grad_norm": 0.9608749339805315, "learning_rate": 4.6949497141205026e-06, "loss": 0.4534, "step": 15739 }, { "epoch": 0.9584995280577292, "grad_norm": 0.9420765756349896, "learning_rate": 4.694911514949377e-06, "loss": 0.4439, "step": 15740 }, { "epoch": 0.958560423834607, "grad_norm": 1.0073138233744834, "learning_rate": 4.694873313542122e-06, "loss": 0.3287, "step": 15741 }, { "epoch": 0.9586213196114849, "grad_norm": 0.9643587400231712, "learning_rate": 4.694835109898775e-06, "loss": 0.3522, "step": 15742 }, { "epoch": 0.9586822153883628, "grad_norm": 1.048760267477773, "learning_rate": 4.694796904019376e-06, "loss": 0.4475, "step": 15743 }, { "epoch": 0.9587431111652407, "grad_norm": 0.984591243077866, "learning_rate": 4.694758695903964e-06, "loss": 0.4204, "step": 15744 }, { "epoch": 0.9588040069421185, "grad_norm": 1.0754446895357739, "learning_rate": 4.694720485552576e-06, "loss": 0.3698, "step": 15745 }, { "epoch": 0.9588649027189965, "grad_norm": 0.9778594218231297, "learning_rate": 4.694682272965254e-06, "loss": 0.4128, "step": 15746 }, { "epoch": 0.9589257984958743, "grad_norm": 0.9583836940484213, "learning_rate": 4.694644058142035e-06, "loss": 0.4174, "step": 15747 }, { "epoch": 0.9589866942727522, "grad_norm": 0.9606785204314846, "learning_rate": 4.694605841082958e-06, "loss": 0.4157, "step": 15748 }, { "epoch": 0.95904759004963, "grad_norm": 1.1420793305495898, "learning_rate": 4.694567621788062e-06, "loss": 0.3789, "step": 15749 }, { "epoch": 0.959108485826508, "grad_norm": 0.9835700823539903, "learning_rate": 4.694529400257386e-06, "loss": 0.4545, "step": 15750 }, { "epoch": 0.9591693816033858, "grad_norm": 1.0710150175819058, "learning_rate": 4.694491176490969e-06, "loss": 0.3401, "step": 15751 }, { "epoch": 0.9592302773802637, "grad_norm": 0.9313321260680536, "learning_rate": 4.69445295048885e-06, "loss": 0.4975, "step": 15752 }, { "epoch": 0.9592911731571415, "grad_norm": 0.8893346073462153, "learning_rate": 4.694414722251068e-06, "loss": 0.4298, "step": 15753 }, { "epoch": 0.9593520689340195, "grad_norm": 1.1008427820425883, "learning_rate": 4.6943764917776625e-06, "loss": 0.4178, "step": 15754 }, { "epoch": 0.9594129647108973, "grad_norm": 1.0771304655759728, "learning_rate": 4.694338259068672e-06, "loss": 0.4021, "step": 15755 }, { "epoch": 0.9594738604877752, "grad_norm": 0.9433365482357176, "learning_rate": 4.6943000241241346e-06, "loss": 0.4226, "step": 15756 }, { "epoch": 0.959534756264653, "grad_norm": 0.9441207659395207, "learning_rate": 4.6942617869440896e-06, "loss": 0.4802, "step": 15757 }, { "epoch": 0.959595652041531, "grad_norm": 1.0335779190564272, "learning_rate": 4.694223547528577e-06, "loss": 0.4188, "step": 15758 }, { "epoch": 0.9596565478184088, "grad_norm": 0.9924084137138454, "learning_rate": 4.694185305877636e-06, "loss": 0.3873, "step": 15759 }, { "epoch": 0.9597174435952867, "grad_norm": 0.9557506391192366, "learning_rate": 4.694147061991303e-06, "loss": 0.4609, "step": 15760 }, { "epoch": 0.9597783393721645, "grad_norm": 0.9999242932042023, "learning_rate": 4.694108815869619e-06, "loss": 0.3833, "step": 15761 }, { "epoch": 0.9598392351490425, "grad_norm": 0.8902772416025114, "learning_rate": 4.694070567512623e-06, "loss": 0.4906, "step": 15762 }, { "epoch": 0.9599001309259203, "grad_norm": 1.1359992117520124, "learning_rate": 4.694032316920353e-06, "loss": 0.351, "step": 15763 }, { "epoch": 0.9599610267027981, "grad_norm": 0.9970164677904052, "learning_rate": 4.693994064092849e-06, "loss": 0.3608, "step": 15764 }, { "epoch": 0.960021922479676, "grad_norm": 0.9988696425733372, "learning_rate": 4.693955809030149e-06, "loss": 0.3571, "step": 15765 }, { "epoch": 0.960082818256554, "grad_norm": 1.0110073625310063, "learning_rate": 4.693917551732293e-06, "loss": 0.4199, "step": 15766 }, { "epoch": 0.9601437140334318, "grad_norm": 0.9794383285953204, "learning_rate": 4.693879292199319e-06, "loss": 0.3819, "step": 15767 }, { "epoch": 0.9602046098103096, "grad_norm": 1.001025440823613, "learning_rate": 4.693841030431267e-06, "loss": 0.4351, "step": 15768 }, { "epoch": 0.9602655055871875, "grad_norm": 0.8861663478130368, "learning_rate": 4.693802766428173e-06, "loss": 0.4273, "step": 15769 }, { "epoch": 0.9603264013640654, "grad_norm": 0.9519167374692189, "learning_rate": 4.693764500190081e-06, "loss": 0.4243, "step": 15770 }, { "epoch": 0.9603872971409433, "grad_norm": 0.996328119704942, "learning_rate": 4.6937262317170265e-06, "loss": 0.4159, "step": 15771 }, { "epoch": 0.9604481929178211, "grad_norm": 0.9914796588794976, "learning_rate": 4.693687961009049e-06, "loss": 0.4325, "step": 15772 }, { "epoch": 0.960509088694699, "grad_norm": 1.0108309424164716, "learning_rate": 4.693649688066189e-06, "loss": 0.4029, "step": 15773 }, { "epoch": 0.9605699844715769, "grad_norm": 0.9763352069015021, "learning_rate": 4.693611412888483e-06, "loss": 0.43, "step": 15774 }, { "epoch": 0.9606308802484548, "grad_norm": 1.0567756452478783, "learning_rate": 4.6935731354759714e-06, "loss": 0.3813, "step": 15775 }, { "epoch": 0.9606917760253326, "grad_norm": 1.004508853696566, "learning_rate": 4.693534855828694e-06, "loss": 0.3707, "step": 15776 }, { "epoch": 0.9607526718022105, "grad_norm": 0.9300452760079843, "learning_rate": 4.693496573946688e-06, "loss": 0.4656, "step": 15777 }, { "epoch": 0.9608135675790884, "grad_norm": 1.0386546297718935, "learning_rate": 4.693458289829994e-06, "loss": 0.386, "step": 15778 }, { "epoch": 0.9608744633559663, "grad_norm": 0.9847513544872992, "learning_rate": 4.6934200034786495e-06, "loss": 0.4321, "step": 15779 }, { "epoch": 0.9609353591328441, "grad_norm": 0.9877996283285398, "learning_rate": 4.693381714892695e-06, "loss": 0.406, "step": 15780 }, { "epoch": 0.960996254909722, "grad_norm": 1.0216062285412764, "learning_rate": 4.6933434240721685e-06, "loss": 0.3367, "step": 15781 }, { "epoch": 0.9610571506865999, "grad_norm": 0.9944385103437783, "learning_rate": 4.69330513101711e-06, "loss": 0.3208, "step": 15782 }, { "epoch": 0.9611180464634778, "grad_norm": 0.9397786302664747, "learning_rate": 4.693266835727557e-06, "loss": 0.3868, "step": 15783 }, { "epoch": 0.9611789422403556, "grad_norm": 0.9625896792728974, "learning_rate": 4.693228538203549e-06, "loss": 0.4061, "step": 15784 }, { "epoch": 0.9612398380172334, "grad_norm": 0.9949666652515011, "learning_rate": 4.693190238445126e-06, "loss": 0.3698, "step": 15785 }, { "epoch": 0.9613007337941114, "grad_norm": 1.0005494655560976, "learning_rate": 4.693151936452326e-06, "loss": 0.3994, "step": 15786 }, { "epoch": 0.9613616295709893, "grad_norm": 1.024181979931302, "learning_rate": 4.693113632225189e-06, "loss": 0.3552, "step": 15787 }, { "epoch": 0.9614225253478671, "grad_norm": 1.0132849052975001, "learning_rate": 4.693075325763753e-06, "loss": 0.3715, "step": 15788 }, { "epoch": 0.961483421124745, "grad_norm": 0.919745236344976, "learning_rate": 4.693037017068057e-06, "loss": 0.3849, "step": 15789 }, { "epoch": 0.9615443169016229, "grad_norm": 0.9639691326397435, "learning_rate": 4.692998706138142e-06, "loss": 0.4208, "step": 15790 }, { "epoch": 0.9616052126785007, "grad_norm": 1.0664708070246323, "learning_rate": 4.692960392974044e-06, "loss": 0.3694, "step": 15791 }, { "epoch": 0.9616661084553786, "grad_norm": 0.9791377529308887, "learning_rate": 4.6929220775758045e-06, "loss": 0.4009, "step": 15792 }, { "epoch": 0.9617270042322565, "grad_norm": 0.9765683189857197, "learning_rate": 4.6928837599434605e-06, "loss": 0.3656, "step": 15793 }, { "epoch": 0.9617879000091344, "grad_norm": 1.002517134881254, "learning_rate": 4.692845440077053e-06, "loss": 0.3959, "step": 15794 }, { "epoch": 0.9618487957860122, "grad_norm": 0.9290704440374264, "learning_rate": 4.69280711797662e-06, "loss": 0.4812, "step": 15795 }, { "epoch": 0.9619096915628901, "grad_norm": 0.9560573110387529, "learning_rate": 4.692768793642201e-06, "loss": 0.45, "step": 15796 }, { "epoch": 0.961970587339768, "grad_norm": 1.0803167557287583, "learning_rate": 4.692730467073834e-06, "loss": 0.3529, "step": 15797 }, { "epoch": 0.9620314831166459, "grad_norm": 0.977525378022753, "learning_rate": 4.69269213827156e-06, "loss": 0.4167, "step": 15798 }, { "epoch": 0.9620923788935237, "grad_norm": 1.0021164017111537, "learning_rate": 4.692653807235416e-06, "loss": 0.3992, "step": 15799 }, { "epoch": 0.9621532746704016, "grad_norm": 1.0264493431275528, "learning_rate": 4.692615473965441e-06, "loss": 0.4753, "step": 15800 }, { "epoch": 0.9622141704472795, "grad_norm": 0.937541458098711, "learning_rate": 4.692577138461676e-06, "loss": 0.4672, "step": 15801 }, { "epoch": 0.9622750662241574, "grad_norm": 0.9637609724528147, "learning_rate": 4.692538800724159e-06, "loss": 0.4107, "step": 15802 }, { "epoch": 0.9623359620010352, "grad_norm": 1.0396252429778858, "learning_rate": 4.692500460752929e-06, "loss": 0.4388, "step": 15803 }, { "epoch": 0.9623968577779131, "grad_norm": 0.9510715201789364, "learning_rate": 4.692462118548025e-06, "loss": 0.4887, "step": 15804 }, { "epoch": 0.962457753554791, "grad_norm": 0.995784316165877, "learning_rate": 4.692423774109486e-06, "loss": 0.3417, "step": 15805 }, { "epoch": 0.9625186493316689, "grad_norm": 1.0571098326366697, "learning_rate": 4.692385427437352e-06, "loss": 0.385, "step": 15806 }, { "epoch": 0.9625795451085467, "grad_norm": 1.0028828257999942, "learning_rate": 4.69234707853166e-06, "loss": 0.3589, "step": 15807 }, { "epoch": 0.9626404408854246, "grad_norm": 1.008612896110506, "learning_rate": 4.6923087273924515e-06, "loss": 0.4015, "step": 15808 }, { "epoch": 0.9627013366623025, "grad_norm": 0.9654175598979908, "learning_rate": 4.692270374019764e-06, "loss": 0.4017, "step": 15809 }, { "epoch": 0.9627622324391804, "grad_norm": 0.9372863633512717, "learning_rate": 4.692232018413637e-06, "loss": 0.4071, "step": 15810 }, { "epoch": 0.9628231282160582, "grad_norm": 0.9602604926152468, "learning_rate": 4.692193660574109e-06, "loss": 0.4246, "step": 15811 }, { "epoch": 0.962884023992936, "grad_norm": 1.0588617419333737, "learning_rate": 4.692155300501221e-06, "loss": 0.3748, "step": 15812 }, { "epoch": 0.962944919769814, "grad_norm": 0.9649409221784825, "learning_rate": 4.6921169381950105e-06, "loss": 0.4312, "step": 15813 }, { "epoch": 0.9630058155466918, "grad_norm": 1.0087593528253098, "learning_rate": 4.692078573655517e-06, "loss": 0.4463, "step": 15814 }, { "epoch": 0.9630667113235697, "grad_norm": 0.98823201636525, "learning_rate": 4.692040206882777e-06, "loss": 0.4147, "step": 15815 }, { "epoch": 0.9631276071004475, "grad_norm": 1.0375153395070338, "learning_rate": 4.692001837876835e-06, "loss": 0.4141, "step": 15816 }, { "epoch": 0.9631885028773255, "grad_norm": 1.0584132214821296, "learning_rate": 4.6919634666377256e-06, "loss": 0.336, "step": 15817 }, { "epoch": 0.9632493986542033, "grad_norm": 0.9754031892162897, "learning_rate": 4.69192509316549e-06, "loss": 0.3295, "step": 15818 }, { "epoch": 0.9633102944310812, "grad_norm": 0.9760155296964198, "learning_rate": 4.691886717460166e-06, "loss": 0.4358, "step": 15819 }, { "epoch": 0.963371190207959, "grad_norm": 1.0072080595323507, "learning_rate": 4.691848339521794e-06, "loss": 0.4168, "step": 15820 }, { "epoch": 0.963432085984837, "grad_norm": 1.060262489860657, "learning_rate": 4.691809959350413e-06, "loss": 0.381, "step": 15821 }, { "epoch": 0.9634929817617148, "grad_norm": 0.9991656093737155, "learning_rate": 4.69177157694606e-06, "loss": 0.3605, "step": 15822 }, { "epoch": 0.9635538775385927, "grad_norm": 0.8843874865974511, "learning_rate": 4.691733192308777e-06, "loss": 0.4673, "step": 15823 }, { "epoch": 0.9636147733154705, "grad_norm": 0.9269182625020483, "learning_rate": 4.691694805438601e-06, "loss": 0.4403, "step": 15824 }, { "epoch": 0.9636756690923485, "grad_norm": 0.9744768506132919, "learning_rate": 4.691656416335573e-06, "loss": 0.4615, "step": 15825 }, { "epoch": 0.9637365648692263, "grad_norm": 0.9838176528896124, "learning_rate": 4.69161802499973e-06, "loss": 0.3817, "step": 15826 }, { "epoch": 0.9637974606461042, "grad_norm": 1.0612904568565014, "learning_rate": 4.691579631431112e-06, "loss": 0.4035, "step": 15827 }, { "epoch": 0.9638583564229821, "grad_norm": 0.9659688700583825, "learning_rate": 4.691541235629759e-06, "loss": 0.4545, "step": 15828 }, { "epoch": 0.96391925219986, "grad_norm": 0.995651884280851, "learning_rate": 4.69150283759571e-06, "loss": 0.3912, "step": 15829 }, { "epoch": 0.9639801479767378, "grad_norm": 0.8855880823696598, "learning_rate": 4.6914644373290015e-06, "loss": 0.4518, "step": 15830 }, { "epoch": 0.9640410437536157, "grad_norm": 1.027364618633393, "learning_rate": 4.6914260348296754e-06, "loss": 0.3895, "step": 15831 }, { "epoch": 0.9641019395304936, "grad_norm": 1.0952629359709378, "learning_rate": 4.69138763009777e-06, "loss": 0.4023, "step": 15832 }, { "epoch": 0.9641628353073715, "grad_norm": 0.9663439796815931, "learning_rate": 4.6913492231333245e-06, "loss": 0.5133, "step": 15833 }, { "epoch": 0.9642237310842493, "grad_norm": 1.0576759511998128, "learning_rate": 4.6913108139363784e-06, "loss": 0.3992, "step": 15834 }, { "epoch": 0.9642846268611271, "grad_norm": 1.0063770814939015, "learning_rate": 4.69127240250697e-06, "loss": 0.4231, "step": 15835 }, { "epoch": 0.9643455226380051, "grad_norm": 1.1061748383666625, "learning_rate": 4.69123398884514e-06, "loss": 0.4519, "step": 15836 }, { "epoch": 0.964406418414883, "grad_norm": 1.0134290019617422, "learning_rate": 4.691195572950925e-06, "loss": 0.4179, "step": 15837 }, { "epoch": 0.9644673141917608, "grad_norm": 0.9900740415290714, "learning_rate": 4.691157154824365e-06, "loss": 0.4665, "step": 15838 }, { "epoch": 0.9645282099686386, "grad_norm": 1.0267689698842135, "learning_rate": 4.691118734465501e-06, "loss": 0.4233, "step": 15839 }, { "epoch": 0.9645891057455166, "grad_norm": 1.0507351380871315, "learning_rate": 4.691080311874369e-06, "loss": 0.4183, "step": 15840 }, { "epoch": 0.9646500015223944, "grad_norm": 0.9687389325126978, "learning_rate": 4.691041887051012e-06, "loss": 0.4037, "step": 15841 }, { "epoch": 0.9647108972992723, "grad_norm": 1.035987055532867, "learning_rate": 4.691003459995467e-06, "loss": 0.3898, "step": 15842 }, { "epoch": 0.9647717930761501, "grad_norm": 1.056346757791367, "learning_rate": 4.690965030707772e-06, "loss": 0.4087, "step": 15843 }, { "epoch": 0.9648326888530281, "grad_norm": 1.09556204805555, "learning_rate": 4.690926599187968e-06, "loss": 0.3295, "step": 15844 }, { "epoch": 0.9648935846299059, "grad_norm": 1.049598059420006, "learning_rate": 4.690888165436094e-06, "loss": 0.4009, "step": 15845 }, { "epoch": 0.9649544804067838, "grad_norm": 0.9454419686757409, "learning_rate": 4.690849729452187e-06, "loss": 0.4471, "step": 15846 }, { "epoch": 0.9650153761836616, "grad_norm": 1.0502189339532757, "learning_rate": 4.690811291236289e-06, "loss": 0.4175, "step": 15847 }, { "epoch": 0.9650762719605396, "grad_norm": 1.0030010266459244, "learning_rate": 4.690772850788438e-06, "loss": 0.4189, "step": 15848 }, { "epoch": 0.9651371677374174, "grad_norm": 0.9060756434873743, "learning_rate": 4.690734408108673e-06, "loss": 0.4271, "step": 15849 }, { "epoch": 0.9651980635142953, "grad_norm": 1.0080884783455526, "learning_rate": 4.690695963197034e-06, "loss": 0.3768, "step": 15850 }, { "epoch": 0.9652589592911731, "grad_norm": 0.976780886076034, "learning_rate": 4.690657516053558e-06, "loss": 0.3275, "step": 15851 }, { "epoch": 0.9653198550680511, "grad_norm": 0.9890946717856974, "learning_rate": 4.690619066678287e-06, "loss": 0.4504, "step": 15852 }, { "epoch": 0.9653807508449289, "grad_norm": 1.0067739968877574, "learning_rate": 4.6905806150712585e-06, "loss": 0.4006, "step": 15853 }, { "epoch": 0.9654416466218068, "grad_norm": 1.0244930900175016, "learning_rate": 4.6905421612325116e-06, "loss": 0.4298, "step": 15854 }, { "epoch": 0.9655025423986846, "grad_norm": 0.9865775713338815, "learning_rate": 4.690503705162087e-06, "loss": 0.434, "step": 15855 }, { "epoch": 0.9655634381755626, "grad_norm": 1.0790635214022049, "learning_rate": 4.690465246860022e-06, "loss": 0.4132, "step": 15856 }, { "epoch": 0.9656243339524404, "grad_norm": 1.0986308145280248, "learning_rate": 4.690426786326356e-06, "loss": 0.3909, "step": 15857 }, { "epoch": 0.9656852297293183, "grad_norm": 1.0622475838812562, "learning_rate": 4.69038832356113e-06, "loss": 0.3684, "step": 15858 }, { "epoch": 0.9657461255061961, "grad_norm": 1.0885136287770452, "learning_rate": 4.690349858564381e-06, "loss": 0.3492, "step": 15859 }, { "epoch": 0.965807021283074, "grad_norm": 1.0102430297875091, "learning_rate": 4.6903113913361486e-06, "loss": 0.4358, "step": 15860 }, { "epoch": 0.9658679170599519, "grad_norm": 1.1441216125821438, "learning_rate": 4.690272921876473e-06, "loss": 0.3889, "step": 15861 }, { "epoch": 0.9659288128368297, "grad_norm": 1.0170524910797412, "learning_rate": 4.690234450185393e-06, "loss": 0.3368, "step": 15862 }, { "epoch": 0.9659897086137076, "grad_norm": 0.8708941146288374, "learning_rate": 4.690195976262948e-06, "loss": 0.4357, "step": 15863 }, { "epoch": 0.9660506043905855, "grad_norm": 1.01695087329147, "learning_rate": 4.690157500109177e-06, "loss": 0.3919, "step": 15864 }, { "epoch": 0.9661115001674634, "grad_norm": 0.9911266871535529, "learning_rate": 4.690119021724119e-06, "loss": 0.3405, "step": 15865 }, { "epoch": 0.9661723959443412, "grad_norm": 0.9656494705734417, "learning_rate": 4.6900805411078136e-06, "loss": 0.4516, "step": 15866 }, { "epoch": 0.9662332917212191, "grad_norm": 1.0666430880314317, "learning_rate": 4.690042058260298e-06, "loss": 0.3851, "step": 15867 }, { "epoch": 0.966294187498097, "grad_norm": 1.087192515555129, "learning_rate": 4.690003573181614e-06, "loss": 0.3577, "step": 15868 }, { "epoch": 0.9663550832749749, "grad_norm": 0.9947394452952126, "learning_rate": 4.6899650858718005e-06, "loss": 0.4371, "step": 15869 }, { "epoch": 0.9664159790518527, "grad_norm": 1.0283060601365006, "learning_rate": 4.689926596330895e-06, "loss": 0.3764, "step": 15870 }, { "epoch": 0.9664768748287307, "grad_norm": 0.9364036371411315, "learning_rate": 4.689888104558939e-06, "loss": 0.4374, "step": 15871 }, { "epoch": 0.9665377706056085, "grad_norm": 1.0639731326872692, "learning_rate": 4.68984961055597e-06, "loss": 0.4393, "step": 15872 }, { "epoch": 0.9665986663824864, "grad_norm": 0.9881234850993533, "learning_rate": 4.689811114322027e-06, "loss": 0.4597, "step": 15873 }, { "epoch": 0.9666595621593642, "grad_norm": 0.9250970790063501, "learning_rate": 4.6897726158571514e-06, "loss": 0.4163, "step": 15874 }, { "epoch": 0.9667204579362422, "grad_norm": 1.0628106339488512, "learning_rate": 4.6897341151613805e-06, "loss": 0.3659, "step": 15875 }, { "epoch": 0.96678135371312, "grad_norm": 1.014695618935097, "learning_rate": 4.689695612234753e-06, "loss": 0.4352, "step": 15876 }, { "epoch": 0.9668422494899979, "grad_norm": 0.9967669165116858, "learning_rate": 4.68965710707731e-06, "loss": 0.4546, "step": 15877 }, { "epoch": 0.9669031452668757, "grad_norm": 1.0055448031243757, "learning_rate": 4.6896185996890894e-06, "loss": 0.4137, "step": 15878 }, { "epoch": 0.9669640410437537, "grad_norm": 1.0350861369921471, "learning_rate": 4.689580090070131e-06, "loss": 0.4226, "step": 15879 }, { "epoch": 0.9670249368206315, "grad_norm": 1.0307177487006194, "learning_rate": 4.6895415782204755e-06, "loss": 0.4114, "step": 15880 }, { "epoch": 0.9670858325975094, "grad_norm": 0.9217982958072088, "learning_rate": 4.689503064140158e-06, "loss": 0.4141, "step": 15881 }, { "epoch": 0.9671467283743872, "grad_norm": 1.0147661886744066, "learning_rate": 4.689464547829222e-06, "loss": 0.3438, "step": 15882 }, { "epoch": 0.9672076241512652, "grad_norm": 0.9717961241233196, "learning_rate": 4.689426029287705e-06, "loss": 0.392, "step": 15883 }, { "epoch": 0.967268519928143, "grad_norm": 1.0439727830282042, "learning_rate": 4.689387508515646e-06, "loss": 0.4093, "step": 15884 }, { "epoch": 0.9673294157050208, "grad_norm": 0.9510801518729814, "learning_rate": 4.6893489855130846e-06, "loss": 0.4186, "step": 15885 }, { "epoch": 0.9673903114818987, "grad_norm": 0.9483694457992801, "learning_rate": 4.689310460280059e-06, "loss": 0.41, "step": 15886 }, { "epoch": 0.9674512072587766, "grad_norm": 0.9522248377888225, "learning_rate": 4.6892719328166105e-06, "loss": 0.3971, "step": 15887 }, { "epoch": 0.9675121030356545, "grad_norm": 0.922375490417984, "learning_rate": 4.6892334031227775e-06, "loss": 0.4445, "step": 15888 }, { "epoch": 0.9675729988125323, "grad_norm": 1.0012469631405723, "learning_rate": 4.689194871198598e-06, "loss": 0.4133, "step": 15889 }, { "epoch": 0.9676338945894102, "grad_norm": 0.9991977178834368, "learning_rate": 4.689156337044113e-06, "loss": 0.3789, "step": 15890 }, { "epoch": 0.9676947903662881, "grad_norm": 0.9497286629932677, "learning_rate": 4.689117800659361e-06, "loss": 0.4118, "step": 15891 }, { "epoch": 0.967755686143166, "grad_norm": 0.9762744515131648, "learning_rate": 4.68907926204438e-06, "loss": 0.4178, "step": 15892 }, { "epoch": 0.9678165819200438, "grad_norm": 0.9569353520977573, "learning_rate": 4.689040721199212e-06, "loss": 0.413, "step": 15893 }, { "epoch": 0.9678774776969217, "grad_norm": 1.0418386458942637, "learning_rate": 4.689002178123895e-06, "loss": 0.3687, "step": 15894 }, { "epoch": 0.9679383734737996, "grad_norm": 1.1248825253487913, "learning_rate": 4.688963632818467e-06, "loss": 0.3935, "step": 15895 }, { "epoch": 0.9679992692506775, "grad_norm": 0.9951865783322256, "learning_rate": 4.6889250852829695e-06, "loss": 0.3866, "step": 15896 }, { "epoch": 0.9680601650275553, "grad_norm": 1.1026423096020967, "learning_rate": 4.68888653551744e-06, "loss": 0.4474, "step": 15897 }, { "epoch": 0.9681210608044332, "grad_norm": 0.907102713846249, "learning_rate": 4.688847983521918e-06, "loss": 0.4235, "step": 15898 }, { "epoch": 0.9681819565813111, "grad_norm": 0.9831096025285408, "learning_rate": 4.688809429296444e-06, "loss": 0.3874, "step": 15899 }, { "epoch": 0.968242852358189, "grad_norm": 1.0269750976003529, "learning_rate": 4.688770872841056e-06, "loss": 0.3669, "step": 15900 }, { "epoch": 0.9683037481350668, "grad_norm": 0.9122194651564797, "learning_rate": 4.688732314155794e-06, "loss": 0.4374, "step": 15901 }, { "epoch": 0.9683646439119447, "grad_norm": 0.9368486820043472, "learning_rate": 4.688693753240697e-06, "loss": 0.4075, "step": 15902 }, { "epoch": 0.9684255396888226, "grad_norm": 0.9679618498594588, "learning_rate": 4.6886551900958045e-06, "loss": 0.3641, "step": 15903 }, { "epoch": 0.9684864354657005, "grad_norm": 1.0355077279066274, "learning_rate": 4.688616624721155e-06, "loss": 0.4215, "step": 15904 }, { "epoch": 0.9685473312425783, "grad_norm": 0.9791544527593093, "learning_rate": 4.688578057116788e-06, "loss": 0.4095, "step": 15905 }, { "epoch": 0.9686082270194561, "grad_norm": 0.9119179022046497, "learning_rate": 4.6885394872827445e-06, "loss": 0.4395, "step": 15906 }, { "epoch": 0.9686691227963341, "grad_norm": 0.9968310372065873, "learning_rate": 4.688500915219062e-06, "loss": 0.4598, "step": 15907 }, { "epoch": 0.968730018573212, "grad_norm": 1.0284101427186363, "learning_rate": 4.68846234092578e-06, "loss": 0.4259, "step": 15908 }, { "epoch": 0.9687909143500898, "grad_norm": 0.9536168208704818, "learning_rate": 4.688423764402938e-06, "loss": 0.4646, "step": 15909 }, { "epoch": 0.9688518101269677, "grad_norm": 1.0388340430900842, "learning_rate": 4.688385185650576e-06, "loss": 0.4005, "step": 15910 }, { "epoch": 0.9689127059038456, "grad_norm": 0.9098044779335425, "learning_rate": 4.688346604668732e-06, "loss": 0.4775, "step": 15911 }, { "epoch": 0.9689736016807234, "grad_norm": 0.9566539421584879, "learning_rate": 4.688308021457446e-06, "loss": 0.3895, "step": 15912 }, { "epoch": 0.9690344974576013, "grad_norm": 1.0571020994107407, "learning_rate": 4.688269436016757e-06, "loss": 0.3825, "step": 15913 }, { "epoch": 0.9690953932344792, "grad_norm": 1.0038160281626856, "learning_rate": 4.688230848346705e-06, "loss": 0.3814, "step": 15914 }, { "epoch": 0.9691562890113571, "grad_norm": 1.1060756942106234, "learning_rate": 4.688192258447329e-06, "loss": 0.4319, "step": 15915 }, { "epoch": 0.9692171847882349, "grad_norm": 1.0047644751434825, "learning_rate": 4.688153666318669e-06, "loss": 0.351, "step": 15916 }, { "epoch": 0.9692780805651128, "grad_norm": 0.9619788737658024, "learning_rate": 4.688115071960762e-06, "loss": 0.4826, "step": 15917 }, { "epoch": 0.9693389763419907, "grad_norm": 0.996589933214505, "learning_rate": 4.6880764753736495e-06, "loss": 0.4106, "step": 15918 }, { "epoch": 0.9693998721188686, "grad_norm": 0.9688300900780064, "learning_rate": 4.68803787655737e-06, "loss": 0.4706, "step": 15919 }, { "epoch": 0.9694607678957464, "grad_norm": 0.97145847202073, "learning_rate": 4.6879992755119635e-06, "loss": 0.4102, "step": 15920 }, { "epoch": 0.9695216636726243, "grad_norm": 1.0154278137215251, "learning_rate": 4.6879606722374686e-06, "loss": 0.4841, "step": 15921 }, { "epoch": 0.9695825594495022, "grad_norm": 1.0608306285026856, "learning_rate": 4.687922066733925e-06, "loss": 0.3556, "step": 15922 }, { "epoch": 0.9696434552263801, "grad_norm": 0.9778408403228259, "learning_rate": 4.687883459001372e-06, "loss": 0.416, "step": 15923 }, { "epoch": 0.9697043510032579, "grad_norm": 0.9655194474401457, "learning_rate": 4.6878448490398484e-06, "loss": 0.4005, "step": 15924 }, { "epoch": 0.9697652467801358, "grad_norm": 1.0156227968552582, "learning_rate": 4.687806236849394e-06, "loss": 0.4114, "step": 15925 }, { "epoch": 0.9698261425570137, "grad_norm": 0.9882781517337215, "learning_rate": 4.687767622430049e-06, "loss": 0.4136, "step": 15926 }, { "epoch": 0.9698870383338916, "grad_norm": 0.9930767328273243, "learning_rate": 4.6877290057818505e-06, "loss": 0.4612, "step": 15927 }, { "epoch": 0.9699479341107694, "grad_norm": 1.0592519764859931, "learning_rate": 4.687690386904839e-06, "loss": 0.3079, "step": 15928 }, { "epoch": 0.9700088298876473, "grad_norm": 1.0082950971871243, "learning_rate": 4.687651765799055e-06, "loss": 0.4134, "step": 15929 }, { "epoch": 0.9700697256645252, "grad_norm": 1.0065590790825523, "learning_rate": 4.687613142464537e-06, "loss": 0.4369, "step": 15930 }, { "epoch": 0.970130621441403, "grad_norm": 0.991485189609904, "learning_rate": 4.687574516901323e-06, "loss": 0.3636, "step": 15931 }, { "epoch": 0.9701915172182809, "grad_norm": 1.0564913429384835, "learning_rate": 4.687535889109455e-06, "loss": 0.3984, "step": 15932 }, { "epoch": 0.9702524129951587, "grad_norm": 1.098792631361967, "learning_rate": 4.68749725908897e-06, "loss": 0.4189, "step": 15933 }, { "epoch": 0.9703133087720367, "grad_norm": 0.9447012876306536, "learning_rate": 4.687458626839908e-06, "loss": 0.3791, "step": 15934 }, { "epoch": 0.9703742045489145, "grad_norm": 1.0226505410169413, "learning_rate": 4.68741999236231e-06, "loss": 0.308, "step": 15935 }, { "epoch": 0.9704351003257924, "grad_norm": 1.052528657240363, "learning_rate": 4.687381355656213e-06, "loss": 0.3424, "step": 15936 }, { "epoch": 0.9704959961026702, "grad_norm": 1.0416890751177577, "learning_rate": 4.687342716721657e-06, "loss": 0.3989, "step": 15937 }, { "epoch": 0.9705568918795482, "grad_norm": 1.1157826994064977, "learning_rate": 4.687304075558683e-06, "loss": 0.3624, "step": 15938 }, { "epoch": 0.970617787656426, "grad_norm": 1.0691129822216192, "learning_rate": 4.687265432167328e-06, "loss": 0.4284, "step": 15939 }, { "epoch": 0.9706786834333039, "grad_norm": 1.0599089447101318, "learning_rate": 4.687226786547633e-06, "loss": 0.3847, "step": 15940 }, { "epoch": 0.9707395792101817, "grad_norm": 1.0329119473446204, "learning_rate": 4.687188138699636e-06, "loss": 0.3998, "step": 15941 }, { "epoch": 0.9708004749870597, "grad_norm": 1.0570270235228594, "learning_rate": 4.6871494886233785e-06, "loss": 0.333, "step": 15942 }, { "epoch": 0.9708613707639375, "grad_norm": 0.9717651811095603, "learning_rate": 4.687110836318897e-06, "loss": 0.4156, "step": 15943 }, { "epoch": 0.9709222665408154, "grad_norm": 0.9394060799503992, "learning_rate": 4.687072181786234e-06, "loss": 0.4095, "step": 15944 }, { "epoch": 0.9709831623176932, "grad_norm": 1.0040483618485636, "learning_rate": 4.687033525025426e-06, "loss": 0.3968, "step": 15945 }, { "epoch": 0.9710440580945712, "grad_norm": 1.0814727026091204, "learning_rate": 4.686994866036515e-06, "loss": 0.4141, "step": 15946 }, { "epoch": 0.971104953871449, "grad_norm": 1.0191127801962128, "learning_rate": 4.6869562048195375e-06, "loss": 0.4344, "step": 15947 }, { "epoch": 0.9711658496483269, "grad_norm": 0.9588360897853149, "learning_rate": 4.686917541374536e-06, "loss": 0.3952, "step": 15948 }, { "epoch": 0.9712267454252047, "grad_norm": 0.9349165050264795, "learning_rate": 4.686878875701548e-06, "loss": 0.4696, "step": 15949 }, { "epoch": 0.9712876412020827, "grad_norm": 0.9743428866362259, "learning_rate": 4.686840207800613e-06, "loss": 0.4486, "step": 15950 }, { "epoch": 0.9713485369789605, "grad_norm": 1.055602099066651, "learning_rate": 4.6868015376717705e-06, "loss": 0.4876, "step": 15951 }, { "epoch": 0.9714094327558384, "grad_norm": 0.948615004630432, "learning_rate": 4.68676286531506e-06, "loss": 0.415, "step": 15952 }, { "epoch": 0.9714703285327163, "grad_norm": 1.000961711797374, "learning_rate": 4.686724190730521e-06, "loss": 0.4468, "step": 15953 }, { "epoch": 0.9715312243095942, "grad_norm": 1.0524218266420842, "learning_rate": 4.686685513918193e-06, "loss": 0.442, "step": 15954 }, { "epoch": 0.971592120086472, "grad_norm": 0.9343369735946783, "learning_rate": 4.686646834878116e-06, "loss": 0.4425, "step": 15955 }, { "epoch": 0.9716530158633498, "grad_norm": 1.017777040556604, "learning_rate": 4.686608153610328e-06, "loss": 0.409, "step": 15956 }, { "epoch": 0.9717139116402278, "grad_norm": 0.9402862731854399, "learning_rate": 4.686569470114869e-06, "loss": 0.3666, "step": 15957 }, { "epoch": 0.9717748074171056, "grad_norm": 1.0332892501058129, "learning_rate": 4.686530784391779e-06, "loss": 0.3915, "step": 15958 }, { "epoch": 0.9718357031939835, "grad_norm": 0.9977419688217737, "learning_rate": 4.686492096441097e-06, "loss": 0.4026, "step": 15959 }, { "epoch": 0.9718965989708613, "grad_norm": 1.031234448548983, "learning_rate": 4.686453406262861e-06, "loss": 0.3856, "step": 15960 }, { "epoch": 0.9719574947477393, "grad_norm": 0.99297252267654, "learning_rate": 4.686414713857113e-06, "loss": 0.4172, "step": 15961 }, { "epoch": 0.9720183905246171, "grad_norm": 1.0475580566098028, "learning_rate": 4.68637601922389e-06, "loss": 0.3769, "step": 15962 }, { "epoch": 0.972079286301495, "grad_norm": 1.066420324153373, "learning_rate": 4.686337322363234e-06, "loss": 0.3696, "step": 15963 }, { "epoch": 0.9721401820783728, "grad_norm": 0.9532542010764609, "learning_rate": 4.686298623275182e-06, "loss": 0.4785, "step": 15964 }, { "epoch": 0.9722010778552508, "grad_norm": 0.9637573288388872, "learning_rate": 4.686259921959774e-06, "loss": 0.4497, "step": 15965 }, { "epoch": 0.9722619736321286, "grad_norm": 1.010319370745162, "learning_rate": 4.686221218417051e-06, "loss": 0.3531, "step": 15966 }, { "epoch": 0.9723228694090065, "grad_norm": 0.967503972134798, "learning_rate": 4.68618251264705e-06, "loss": 0.4544, "step": 15967 }, { "epoch": 0.9723837651858843, "grad_norm": 0.9739802888012055, "learning_rate": 4.686143804649813e-06, "loss": 0.3922, "step": 15968 }, { "epoch": 0.9724446609627623, "grad_norm": 1.025229240232345, "learning_rate": 4.686105094425377e-06, "loss": 0.3668, "step": 15969 }, { "epoch": 0.9725055567396401, "grad_norm": 0.9658678968876444, "learning_rate": 4.686066381973784e-06, "loss": 0.3997, "step": 15970 }, { "epoch": 0.972566452516518, "grad_norm": 0.9757350322620144, "learning_rate": 4.686027667295071e-06, "loss": 0.4279, "step": 15971 }, { "epoch": 0.9726273482933958, "grad_norm": 0.9715430235184794, "learning_rate": 4.685988950389278e-06, "loss": 0.5433, "step": 15972 }, { "epoch": 0.9726882440702738, "grad_norm": 1.210115785610484, "learning_rate": 4.685950231256446e-06, "loss": 0.3762, "step": 15973 }, { "epoch": 0.9727491398471516, "grad_norm": 0.9831189512029663, "learning_rate": 4.685911509896614e-06, "loss": 0.4332, "step": 15974 }, { "epoch": 0.9728100356240295, "grad_norm": 1.0325789856470784, "learning_rate": 4.685872786309819e-06, "loss": 0.4148, "step": 15975 }, { "epoch": 0.9728709314009073, "grad_norm": 0.9755967222740352, "learning_rate": 4.6858340604961025e-06, "loss": 0.3944, "step": 15976 }, { "epoch": 0.9729318271777853, "grad_norm": 0.948039024599039, "learning_rate": 4.685795332455505e-06, "loss": 0.3868, "step": 15977 }, { "epoch": 0.9729927229546631, "grad_norm": 1.0655919126220634, "learning_rate": 4.685756602188063e-06, "loss": 0.3642, "step": 15978 }, { "epoch": 0.973053618731541, "grad_norm": 1.0212144827356509, "learning_rate": 4.6857178696938185e-06, "loss": 0.3361, "step": 15979 }, { "epoch": 0.9731145145084188, "grad_norm": 0.9700608345973162, "learning_rate": 4.68567913497281e-06, "loss": 0.3407, "step": 15980 }, { "epoch": 0.9731754102852967, "grad_norm": 1.0020086642845822, "learning_rate": 4.685640398025077e-06, "loss": 0.4331, "step": 15981 }, { "epoch": 0.9732363060621746, "grad_norm": 0.9873600051854985, "learning_rate": 4.6856016588506595e-06, "loss": 0.3808, "step": 15982 }, { "epoch": 0.9732972018390524, "grad_norm": 0.9811131616228544, "learning_rate": 4.685562917449596e-06, "loss": 0.383, "step": 15983 }, { "epoch": 0.9733580976159303, "grad_norm": 1.046062444255707, "learning_rate": 4.685524173821927e-06, "loss": 0.3694, "step": 15984 }, { "epoch": 0.9734189933928082, "grad_norm": 0.9926630444478386, "learning_rate": 4.685485427967691e-06, "loss": 0.4494, "step": 15985 }, { "epoch": 0.9734798891696861, "grad_norm": 1.1446932847817521, "learning_rate": 4.685446679886928e-06, "loss": 0.395, "step": 15986 }, { "epoch": 0.9735407849465639, "grad_norm": 0.9870680405697121, "learning_rate": 4.685407929579676e-06, "loss": 0.3792, "step": 15987 }, { "epoch": 0.9736016807234418, "grad_norm": 0.980355565257264, "learning_rate": 4.685369177045977e-06, "loss": 0.4379, "step": 15988 }, { "epoch": 0.9736625765003197, "grad_norm": 1.0492152853618824, "learning_rate": 4.68533042228587e-06, "loss": 0.3843, "step": 15989 }, { "epoch": 0.9737234722771976, "grad_norm": 1.049304062811507, "learning_rate": 4.685291665299393e-06, "loss": 0.4153, "step": 15990 }, { "epoch": 0.9737843680540754, "grad_norm": 1.0310691224429611, "learning_rate": 4.685252906086586e-06, "loss": 0.3648, "step": 15991 }, { "epoch": 0.9738452638309534, "grad_norm": 1.055683753610328, "learning_rate": 4.68521414464749e-06, "loss": 0.4291, "step": 15992 }, { "epoch": 0.9739061596078312, "grad_norm": 0.9634302499981915, "learning_rate": 4.685175380982142e-06, "loss": 0.3777, "step": 15993 }, { "epoch": 0.9739670553847091, "grad_norm": 0.9949178224365038, "learning_rate": 4.685136615090583e-06, "loss": 0.3681, "step": 15994 }, { "epoch": 0.9740279511615869, "grad_norm": 0.9452630179459578, "learning_rate": 4.685097846972852e-06, "loss": 0.426, "step": 15995 }, { "epoch": 0.9740888469384649, "grad_norm": 0.9238130051020246, "learning_rate": 4.685059076628989e-06, "loss": 0.4119, "step": 15996 }, { "epoch": 0.9741497427153427, "grad_norm": 0.9391574190251094, "learning_rate": 4.685020304059034e-06, "loss": 0.389, "step": 15997 }, { "epoch": 0.9742106384922206, "grad_norm": 1.0124761542546947, "learning_rate": 4.684981529263025e-06, "loss": 0.4154, "step": 15998 }, { "epoch": 0.9742715342690984, "grad_norm": 1.0031011174207005, "learning_rate": 4.684942752241003e-06, "loss": 0.3752, "step": 15999 }, { "epoch": 0.9743324300459764, "grad_norm": 0.9216188016260024, "learning_rate": 4.684903972993006e-06, "loss": 0.4321, "step": 16000 }, { "epoch": 0.9743933258228542, "grad_norm": 0.9714202823538337, "learning_rate": 4.684865191519074e-06, "loss": 0.4519, "step": 16001 }, { "epoch": 0.974454221599732, "grad_norm": 1.0265692047779051, "learning_rate": 4.684826407819247e-06, "loss": 0.4344, "step": 16002 }, { "epoch": 0.9745151173766099, "grad_norm": 1.0192633691189683, "learning_rate": 4.684787621893565e-06, "loss": 0.3975, "step": 16003 }, { "epoch": 0.9745760131534879, "grad_norm": 1.0615568975536593, "learning_rate": 4.684748833742066e-06, "loss": 0.4202, "step": 16004 }, { "epoch": 0.9746369089303657, "grad_norm": 1.0407774268240753, "learning_rate": 4.68471004336479e-06, "loss": 0.4443, "step": 16005 }, { "epoch": 0.9746978047072435, "grad_norm": 0.9862344540869143, "learning_rate": 4.684671250761778e-06, "loss": 0.3732, "step": 16006 }, { "epoch": 0.9747587004841214, "grad_norm": 0.9781482708734746, "learning_rate": 4.684632455933068e-06, "loss": 0.4187, "step": 16007 }, { "epoch": 0.9748195962609993, "grad_norm": 0.9233510183948868, "learning_rate": 4.684593658878699e-06, "loss": 0.4149, "step": 16008 }, { "epoch": 0.9748804920378772, "grad_norm": 0.9631465810750971, "learning_rate": 4.684554859598712e-06, "loss": 0.3782, "step": 16009 }, { "epoch": 0.974941387814755, "grad_norm": 1.0111324831684168, "learning_rate": 4.684516058093146e-06, "loss": 0.4537, "step": 16010 }, { "epoch": 0.9750022835916329, "grad_norm": 0.9800329080885978, "learning_rate": 4.684477254362041e-06, "loss": 0.3863, "step": 16011 }, { "epoch": 0.9750631793685108, "grad_norm": 0.9739330529733358, "learning_rate": 4.684438448405435e-06, "loss": 0.4069, "step": 16012 }, { "epoch": 0.9751240751453887, "grad_norm": 0.9056320994634349, "learning_rate": 4.6843996402233685e-06, "loss": 0.4821, "step": 16013 }, { "epoch": 0.9751849709222665, "grad_norm": 1.070811361098962, "learning_rate": 4.684360829815881e-06, "loss": 0.3921, "step": 16014 }, { "epoch": 0.9752458666991444, "grad_norm": 1.132787129601741, "learning_rate": 4.684322017183013e-06, "loss": 0.3407, "step": 16015 }, { "epoch": 0.9753067624760223, "grad_norm": 0.9629370840845904, "learning_rate": 4.684283202324803e-06, "loss": 0.4623, "step": 16016 }, { "epoch": 0.9753676582529002, "grad_norm": 1.0319460595177392, "learning_rate": 4.68424438524129e-06, "loss": 0.3712, "step": 16017 }, { "epoch": 0.975428554029778, "grad_norm": 0.9710202993822502, "learning_rate": 4.684205565932514e-06, "loss": 0.3631, "step": 16018 }, { "epoch": 0.9754894498066559, "grad_norm": 0.9532413263246974, "learning_rate": 4.684166744398517e-06, "loss": 0.4756, "step": 16019 }, { "epoch": 0.9755503455835338, "grad_norm": 1.0028067739005075, "learning_rate": 4.684127920639334e-06, "loss": 0.3804, "step": 16020 }, { "epoch": 0.9756112413604117, "grad_norm": 1.016908328137009, "learning_rate": 4.684089094655007e-06, "loss": 0.444, "step": 16021 }, { "epoch": 0.9756721371372895, "grad_norm": 1.0171072179963674, "learning_rate": 4.684050266445577e-06, "loss": 0.3616, "step": 16022 }, { "epoch": 0.9757330329141674, "grad_norm": 1.0410487637113808, "learning_rate": 4.684011436011081e-06, "loss": 0.3861, "step": 16023 }, { "epoch": 0.9757939286910453, "grad_norm": 1.0052877869995331, "learning_rate": 4.6839726033515595e-06, "loss": 0.3807, "step": 16024 }, { "epoch": 0.9758548244679232, "grad_norm": 0.9777534449888076, "learning_rate": 4.683933768467052e-06, "loss": 0.3731, "step": 16025 }, { "epoch": 0.975915720244801, "grad_norm": 1.0509687262710758, "learning_rate": 4.683894931357598e-06, "loss": 0.3571, "step": 16026 }, { "epoch": 0.9759766160216788, "grad_norm": 1.0456952324255628, "learning_rate": 4.683856092023238e-06, "loss": 0.3664, "step": 16027 }, { "epoch": 0.9760375117985568, "grad_norm": 0.9706651837738491, "learning_rate": 4.683817250464011e-06, "loss": 0.3833, "step": 16028 }, { "epoch": 0.9760984075754346, "grad_norm": 1.0365588028151853, "learning_rate": 4.6837784066799555e-06, "loss": 0.4283, "step": 16029 }, { "epoch": 0.9761593033523125, "grad_norm": 0.9745347314261119, "learning_rate": 4.683739560671112e-06, "loss": 0.4422, "step": 16030 }, { "epoch": 0.9762201991291903, "grad_norm": 1.0313202721852124, "learning_rate": 4.683700712437521e-06, "loss": 0.3525, "step": 16031 }, { "epoch": 0.9762810949060683, "grad_norm": 1.035498949406615, "learning_rate": 4.68366186197922e-06, "loss": 0.4596, "step": 16032 }, { "epoch": 0.9763419906829461, "grad_norm": 0.9612178120577987, "learning_rate": 4.683623009296251e-06, "loss": 0.4352, "step": 16033 }, { "epoch": 0.976402886459824, "grad_norm": 0.9797806864327576, "learning_rate": 4.68358415438865e-06, "loss": 0.3771, "step": 16034 }, { "epoch": 0.9764637822367019, "grad_norm": 0.9752047859004709, "learning_rate": 4.683545297256461e-06, "loss": 0.4663, "step": 16035 }, { "epoch": 0.9765246780135798, "grad_norm": 0.9731185786964154, "learning_rate": 4.683506437899721e-06, "loss": 0.4457, "step": 16036 }, { "epoch": 0.9765855737904576, "grad_norm": 0.9933468747484472, "learning_rate": 4.68346757631847e-06, "loss": 0.4436, "step": 16037 }, { "epoch": 0.9766464695673355, "grad_norm": 1.0240317952931144, "learning_rate": 4.683428712512748e-06, "loss": 0.3898, "step": 16038 }, { "epoch": 0.9767073653442134, "grad_norm": 1.0264056271255824, "learning_rate": 4.6833898464825925e-06, "loss": 0.4137, "step": 16039 }, { "epoch": 0.9767682611210913, "grad_norm": 0.9631616144200231, "learning_rate": 4.6833509782280464e-06, "loss": 0.4178, "step": 16040 }, { "epoch": 0.9768291568979691, "grad_norm": 0.9530754508083609, "learning_rate": 4.683312107749147e-06, "loss": 0.4222, "step": 16041 }, { "epoch": 0.976890052674847, "grad_norm": 1.0672079009812212, "learning_rate": 4.6832732350459356e-06, "loss": 0.3655, "step": 16042 }, { "epoch": 0.9769509484517249, "grad_norm": 0.9592154616443241, "learning_rate": 4.6832343601184495e-06, "loss": 0.3928, "step": 16043 }, { "epoch": 0.9770118442286028, "grad_norm": 1.0079271612807843, "learning_rate": 4.68319548296673e-06, "loss": 0.4037, "step": 16044 }, { "epoch": 0.9770727400054806, "grad_norm": 1.0143981060373526, "learning_rate": 4.683156603590817e-06, "loss": 0.4179, "step": 16045 }, { "epoch": 0.9771336357823585, "grad_norm": 1.0190840492449915, "learning_rate": 4.6831177219907485e-06, "loss": 0.4234, "step": 16046 }, { "epoch": 0.9771945315592364, "grad_norm": 1.0865168038226711, "learning_rate": 4.683078838166566e-06, "loss": 0.357, "step": 16047 }, { "epoch": 0.9772554273361143, "grad_norm": 1.0384922017307576, "learning_rate": 4.683039952118308e-06, "loss": 0.4317, "step": 16048 }, { "epoch": 0.9773163231129921, "grad_norm": 0.9503428416860005, "learning_rate": 4.683001063846014e-06, "loss": 0.4242, "step": 16049 }, { "epoch": 0.97737721888987, "grad_norm": 1.0283600079776865, "learning_rate": 4.682962173349724e-06, "loss": 0.391, "step": 16050 }, { "epoch": 0.9774381146667479, "grad_norm": 0.8816529970139632, "learning_rate": 4.682923280629477e-06, "loss": 0.4116, "step": 16051 }, { "epoch": 0.9774990104436257, "grad_norm": 0.95985122434048, "learning_rate": 4.682884385685313e-06, "loss": 0.4317, "step": 16052 }, { "epoch": 0.9775599062205036, "grad_norm": 1.0209106229034313, "learning_rate": 4.682845488517273e-06, "loss": 0.3822, "step": 16053 }, { "epoch": 0.9776208019973814, "grad_norm": 0.9375500101544937, "learning_rate": 4.682806589125395e-06, "loss": 0.4777, "step": 16054 }, { "epoch": 0.9776816977742594, "grad_norm": 0.9150929748119551, "learning_rate": 4.682767687509719e-06, "loss": 0.4126, "step": 16055 }, { "epoch": 0.9777425935511372, "grad_norm": 0.9337011877757427, "learning_rate": 4.682728783670284e-06, "loss": 0.4687, "step": 16056 }, { "epoch": 0.9778034893280151, "grad_norm": 1.0577286704055204, "learning_rate": 4.682689877607131e-06, "loss": 0.3506, "step": 16057 }, { "epoch": 0.9778643851048929, "grad_norm": 1.1379819731834566, "learning_rate": 4.682650969320298e-06, "loss": 0.3584, "step": 16058 }, { "epoch": 0.9779252808817709, "grad_norm": 0.9222804390114773, "learning_rate": 4.682612058809826e-06, "loss": 0.459, "step": 16059 }, { "epoch": 0.9779861766586487, "grad_norm": 1.033357419555169, "learning_rate": 4.682573146075755e-06, "loss": 0.3622, "step": 16060 }, { "epoch": 0.9780470724355266, "grad_norm": 0.982177034213407, "learning_rate": 4.682534231118123e-06, "loss": 0.4596, "step": 16061 }, { "epoch": 0.9781079682124044, "grad_norm": 1.0305455332771616, "learning_rate": 4.682495313936971e-06, "loss": 0.3327, "step": 16062 }, { "epoch": 0.9781688639892824, "grad_norm": 1.0270135803702636, "learning_rate": 4.682456394532339e-06, "loss": 0.4613, "step": 16063 }, { "epoch": 0.9782297597661602, "grad_norm": 1.07859134807598, "learning_rate": 4.682417472904264e-06, "loss": 0.3866, "step": 16064 }, { "epoch": 0.9782906555430381, "grad_norm": 0.9823713710759832, "learning_rate": 4.682378549052788e-06, "loss": 0.3662, "step": 16065 }, { "epoch": 0.9783515513199159, "grad_norm": 1.0982957021478394, "learning_rate": 4.68233962297795e-06, "loss": 0.3778, "step": 16066 }, { "epoch": 0.9784124470967939, "grad_norm": 1.1117175401114243, "learning_rate": 4.682300694679791e-06, "loss": 0.4111, "step": 16067 }, { "epoch": 0.9784733428736717, "grad_norm": 0.982938359540381, "learning_rate": 4.6822617641583476e-06, "loss": 0.3754, "step": 16068 }, { "epoch": 0.9785342386505496, "grad_norm": 1.038040913090864, "learning_rate": 4.682222831413663e-06, "loss": 0.4205, "step": 16069 }, { "epoch": 0.9785951344274274, "grad_norm": 1.0721407747758993, "learning_rate": 4.682183896445775e-06, "loss": 0.3953, "step": 16070 }, { "epoch": 0.9786560302043054, "grad_norm": 0.9601703143918208, "learning_rate": 4.682144959254722e-06, "loss": 0.3689, "step": 16071 }, { "epoch": 0.9787169259811832, "grad_norm": 1.0024826632551733, "learning_rate": 4.682106019840545e-06, "loss": 0.4083, "step": 16072 }, { "epoch": 0.978777821758061, "grad_norm": 0.986938358871002, "learning_rate": 4.682067078203285e-06, "loss": 0.4178, "step": 16073 }, { "epoch": 0.978838717534939, "grad_norm": 1.0135808545810367, "learning_rate": 4.682028134342981e-06, "loss": 0.42, "step": 16074 }, { "epoch": 0.9788996133118169, "grad_norm": 0.9500175526140742, "learning_rate": 4.68198918825967e-06, "loss": 0.3962, "step": 16075 }, { "epoch": 0.9789605090886947, "grad_norm": 1.0466739293789553, "learning_rate": 4.6819502399533955e-06, "loss": 0.354, "step": 16076 }, { "epoch": 0.9790214048655725, "grad_norm": 1.040095040743104, "learning_rate": 4.681911289424195e-06, "loss": 0.419, "step": 16077 }, { "epoch": 0.9790823006424505, "grad_norm": 1.0532956551317816, "learning_rate": 4.681872336672108e-06, "loss": 0.3961, "step": 16078 }, { "epoch": 0.9791431964193283, "grad_norm": 0.9712483846083861, "learning_rate": 4.6818333816971755e-06, "loss": 0.4351, "step": 16079 }, { "epoch": 0.9792040921962062, "grad_norm": 1.1072268782268198, "learning_rate": 4.681794424499436e-06, "loss": 0.3454, "step": 16080 }, { "epoch": 0.979264987973084, "grad_norm": 0.9591100671626004, "learning_rate": 4.68175546507893e-06, "loss": 0.4756, "step": 16081 }, { "epoch": 0.979325883749962, "grad_norm": 0.9997700439303207, "learning_rate": 4.681716503435697e-06, "loss": 0.3936, "step": 16082 }, { "epoch": 0.9793867795268398, "grad_norm": 1.0006258904805212, "learning_rate": 4.6816775395697766e-06, "loss": 0.4062, "step": 16083 }, { "epoch": 0.9794476753037177, "grad_norm": 0.986567552035929, "learning_rate": 4.681638573481209e-06, "loss": 0.4059, "step": 16084 }, { "epoch": 0.9795085710805955, "grad_norm": 0.8847233075292745, "learning_rate": 4.681599605170033e-06, "loss": 0.421, "step": 16085 }, { "epoch": 0.9795694668574735, "grad_norm": 0.923586534415693, "learning_rate": 4.681560634636288e-06, "loss": 0.4319, "step": 16086 }, { "epoch": 0.9796303626343513, "grad_norm": 1.000473396834949, "learning_rate": 4.681521661880015e-06, "loss": 0.3932, "step": 16087 }, { "epoch": 0.9796912584112292, "grad_norm": 0.9796855113818156, "learning_rate": 4.681482686901253e-06, "loss": 0.4343, "step": 16088 }, { "epoch": 0.979752154188107, "grad_norm": 1.0487325640875638, "learning_rate": 4.681443709700041e-06, "loss": 0.4442, "step": 16089 }, { "epoch": 0.979813049964985, "grad_norm": 1.1067579600616035, "learning_rate": 4.681404730276421e-06, "loss": 0.3424, "step": 16090 }, { "epoch": 0.9798739457418628, "grad_norm": 0.9809068158903119, "learning_rate": 4.68136574863043e-06, "loss": 0.3976, "step": 16091 }, { "epoch": 0.9799348415187407, "grad_norm": 0.9882188067801941, "learning_rate": 4.68132676476211e-06, "loss": 0.3704, "step": 16092 }, { "epoch": 0.9799957372956185, "grad_norm": 0.9483875069296097, "learning_rate": 4.681287778671498e-06, "loss": 0.4677, "step": 16093 }, { "epoch": 0.9800566330724965, "grad_norm": 0.9850465541479403, "learning_rate": 4.681248790358637e-06, "loss": 0.3428, "step": 16094 }, { "epoch": 0.9801175288493743, "grad_norm": 0.9965540536160948, "learning_rate": 4.681209799823564e-06, "loss": 0.4024, "step": 16095 }, { "epoch": 0.9801784246262522, "grad_norm": 1.086147861315818, "learning_rate": 4.681170807066321e-06, "loss": 0.3623, "step": 16096 }, { "epoch": 0.98023932040313, "grad_norm": 0.974569709940748, "learning_rate": 4.681131812086945e-06, "loss": 0.4729, "step": 16097 }, { "epoch": 0.980300216180008, "grad_norm": 1.0420445326599375, "learning_rate": 4.6810928148854784e-06, "loss": 0.3781, "step": 16098 }, { "epoch": 0.9803611119568858, "grad_norm": 0.9581006188209618, "learning_rate": 4.68105381546196e-06, "loss": 0.4588, "step": 16099 }, { "epoch": 0.9804220077337636, "grad_norm": 1.0512932397991577, "learning_rate": 4.681014813816428e-06, "loss": 0.3714, "step": 16100 }, { "epoch": 0.9804829035106415, "grad_norm": 0.9860190586794039, "learning_rate": 4.680975809948925e-06, "loss": 0.4234, "step": 16101 }, { "epoch": 0.9805437992875194, "grad_norm": 1.0199980204936179, "learning_rate": 4.680936803859488e-06, "loss": 0.4325, "step": 16102 }, { "epoch": 0.9806046950643973, "grad_norm": 1.0520195224662927, "learning_rate": 4.680897795548158e-06, "loss": 0.3956, "step": 16103 }, { "epoch": 0.9806655908412751, "grad_norm": 1.0268495921725582, "learning_rate": 4.680858785014975e-06, "loss": 0.3218, "step": 16104 }, { "epoch": 0.980726486618153, "grad_norm": 0.9578661436979796, "learning_rate": 4.680819772259978e-06, "loss": 0.4214, "step": 16105 }, { "epoch": 0.9807873823950309, "grad_norm": 1.0167395367763374, "learning_rate": 4.6807807572832085e-06, "loss": 0.3483, "step": 16106 }, { "epoch": 0.9808482781719088, "grad_norm": 1.0849584240189116, "learning_rate": 4.6807417400847034e-06, "loss": 0.3724, "step": 16107 }, { "epoch": 0.9809091739487866, "grad_norm": 0.9155856847204409, "learning_rate": 4.6807027206645054e-06, "loss": 0.3965, "step": 16108 }, { "epoch": 0.9809700697256645, "grad_norm": 1.0047670236376236, "learning_rate": 4.680663699022651e-06, "loss": 0.3891, "step": 16109 }, { "epoch": 0.9810309655025424, "grad_norm": 0.9708547805231074, "learning_rate": 4.6806246751591824e-06, "loss": 0.3698, "step": 16110 }, { "epoch": 0.9810918612794203, "grad_norm": 1.0640191062441249, "learning_rate": 4.68058564907414e-06, "loss": 0.4137, "step": 16111 }, { "epoch": 0.9811527570562981, "grad_norm": 1.010296658180209, "learning_rate": 4.6805466207675605e-06, "loss": 0.383, "step": 16112 }, { "epoch": 0.981213652833176, "grad_norm": 0.9719134590426363, "learning_rate": 4.680507590239487e-06, "loss": 0.3553, "step": 16113 }, { "epoch": 0.9812745486100539, "grad_norm": 0.9051143568237352, "learning_rate": 4.6804685574899566e-06, "loss": 0.3869, "step": 16114 }, { "epoch": 0.9813354443869318, "grad_norm": 1.022842607772571, "learning_rate": 4.6804295225190095e-06, "loss": 0.4342, "step": 16115 }, { "epoch": 0.9813963401638096, "grad_norm": 1.077251120378669, "learning_rate": 4.680390485326688e-06, "loss": 0.3913, "step": 16116 }, { "epoch": 0.9814572359406876, "grad_norm": 0.917600285610873, "learning_rate": 4.680351445913029e-06, "loss": 0.5001, "step": 16117 }, { "epoch": 0.9815181317175654, "grad_norm": 0.9587024442206771, "learning_rate": 4.680312404278074e-06, "loss": 0.4067, "step": 16118 }, { "epoch": 0.9815790274944433, "grad_norm": 0.9772028519253518, "learning_rate": 4.680273360421861e-06, "loss": 0.4616, "step": 16119 }, { "epoch": 0.9816399232713211, "grad_norm": 0.972935781132346, "learning_rate": 4.680234314344431e-06, "loss": 0.443, "step": 16120 }, { "epoch": 0.9817008190481991, "grad_norm": 0.9846664000713535, "learning_rate": 4.680195266045824e-06, "loss": 0.4166, "step": 16121 }, { "epoch": 0.9817617148250769, "grad_norm": 1.0081747313263045, "learning_rate": 4.680156215526079e-06, "loss": 0.4241, "step": 16122 }, { "epoch": 0.9818226106019547, "grad_norm": 1.00919927557625, "learning_rate": 4.6801171627852365e-06, "loss": 0.3359, "step": 16123 }, { "epoch": 0.9818835063788326, "grad_norm": 1.1754687985626877, "learning_rate": 4.680078107823336e-06, "loss": 0.3967, "step": 16124 }, { "epoch": 0.9819444021557105, "grad_norm": 0.951413644728912, "learning_rate": 4.680039050640417e-06, "loss": 0.4555, "step": 16125 }, { "epoch": 0.9820052979325884, "grad_norm": 1.0205485398077945, "learning_rate": 4.67999999123652e-06, "loss": 0.4468, "step": 16126 }, { "epoch": 0.9820661937094662, "grad_norm": 0.9598223799294235, "learning_rate": 4.679960929611684e-06, "loss": 0.4091, "step": 16127 }, { "epoch": 0.9821270894863441, "grad_norm": 0.9935425064404672, "learning_rate": 4.679921865765949e-06, "loss": 0.4437, "step": 16128 }, { "epoch": 0.982187985263222, "grad_norm": 1.0177066634659662, "learning_rate": 4.679882799699355e-06, "loss": 0.4051, "step": 16129 }, { "epoch": 0.9822488810400999, "grad_norm": 1.0050184648421094, "learning_rate": 4.679843731411942e-06, "loss": 0.421, "step": 16130 }, { "epoch": 0.9823097768169777, "grad_norm": 0.9373526788419596, "learning_rate": 4.6798046609037495e-06, "loss": 0.4159, "step": 16131 }, { "epoch": 0.9823706725938556, "grad_norm": 1.0370747050299767, "learning_rate": 4.679765588174817e-06, "loss": 0.3774, "step": 16132 }, { "epoch": 0.9824315683707335, "grad_norm": 1.0387634827386758, "learning_rate": 4.679726513225185e-06, "loss": 0.3615, "step": 16133 }, { "epoch": 0.9824924641476114, "grad_norm": 0.9990885140282008, "learning_rate": 4.679687436054893e-06, "loss": 0.3916, "step": 16134 }, { "epoch": 0.9825533599244892, "grad_norm": 0.9850003507870537, "learning_rate": 4.67964835666398e-06, "loss": 0.3974, "step": 16135 }, { "epoch": 0.9826142557013671, "grad_norm": 1.0243744323561415, "learning_rate": 4.679609275052487e-06, "loss": 0.3245, "step": 16136 }, { "epoch": 0.982675151478245, "grad_norm": 1.024885198554054, "learning_rate": 4.679570191220454e-06, "loss": 0.4235, "step": 16137 }, { "epoch": 0.9827360472551229, "grad_norm": 1.0046802751710628, "learning_rate": 4.6795311051679195e-06, "loss": 0.3798, "step": 16138 }, { "epoch": 0.9827969430320007, "grad_norm": 1.0561550069015258, "learning_rate": 4.679492016894924e-06, "loss": 0.4443, "step": 16139 }, { "epoch": 0.9828578388088786, "grad_norm": 1.0532830254304235, "learning_rate": 4.6794529264015076e-06, "loss": 0.3456, "step": 16140 }, { "epoch": 0.9829187345857565, "grad_norm": 1.0292807559862742, "learning_rate": 4.6794138336877096e-06, "loss": 0.4771, "step": 16141 }, { "epoch": 0.9829796303626344, "grad_norm": 1.0383322886314648, "learning_rate": 4.679374738753571e-06, "loss": 0.3654, "step": 16142 }, { "epoch": 0.9830405261395122, "grad_norm": 1.0083676325324407, "learning_rate": 4.6793356415991295e-06, "loss": 0.3967, "step": 16143 }, { "epoch": 0.98310142191639, "grad_norm": 1.0055223413961543, "learning_rate": 4.679296542224426e-06, "loss": 0.4801, "step": 16144 }, { "epoch": 0.983162317693268, "grad_norm": 1.0066370695198557, "learning_rate": 4.679257440629502e-06, "loss": 0.3871, "step": 16145 }, { "epoch": 0.9832232134701459, "grad_norm": 0.9884705761222798, "learning_rate": 4.679218336814395e-06, "loss": 0.4462, "step": 16146 }, { "epoch": 0.9832841092470237, "grad_norm": 0.9981776057508186, "learning_rate": 4.679179230779145e-06, "loss": 0.4691, "step": 16147 }, { "epoch": 0.9833450050239015, "grad_norm": 0.9959897534524961, "learning_rate": 4.679140122523794e-06, "loss": 0.4283, "step": 16148 }, { "epoch": 0.9834059008007795, "grad_norm": 1.016166488692342, "learning_rate": 4.67910101204838e-06, "loss": 0.3559, "step": 16149 }, { "epoch": 0.9834667965776573, "grad_norm": 0.9013003195635438, "learning_rate": 4.6790618993529415e-06, "loss": 0.3854, "step": 16150 }, { "epoch": 0.9835276923545352, "grad_norm": 0.9991644634550092, "learning_rate": 4.679022784437521e-06, "loss": 0.4058, "step": 16151 }, { "epoch": 0.983588588131413, "grad_norm": 0.9466693126541921, "learning_rate": 4.678983667302158e-06, "loss": 0.3766, "step": 16152 }, { "epoch": 0.983649483908291, "grad_norm": 0.9725084170082295, "learning_rate": 4.678944547946892e-06, "loss": 0.3669, "step": 16153 }, { "epoch": 0.9837103796851688, "grad_norm": 0.9456898728804863, "learning_rate": 4.678905426371761e-06, "loss": 0.4579, "step": 16154 }, { "epoch": 0.9837712754620467, "grad_norm": 0.9619390252295359, "learning_rate": 4.6788663025768076e-06, "loss": 0.3686, "step": 16155 }, { "epoch": 0.9838321712389246, "grad_norm": 0.966221203757724, "learning_rate": 4.678827176562071e-06, "loss": 0.4125, "step": 16156 }, { "epoch": 0.9838930670158025, "grad_norm": 0.9901323566887316, "learning_rate": 4.67878804832759e-06, "loss": 0.4424, "step": 16157 }, { "epoch": 0.9839539627926803, "grad_norm": 1.0514115386523537, "learning_rate": 4.6787489178734046e-06, "loss": 0.3518, "step": 16158 }, { "epoch": 0.9840148585695582, "grad_norm": 0.9546906619365239, "learning_rate": 4.678709785199556e-06, "loss": 0.4743, "step": 16159 }, { "epoch": 0.9840757543464361, "grad_norm": 0.9960451244482077, "learning_rate": 4.678670650306082e-06, "loss": 0.3687, "step": 16160 }, { "epoch": 0.984136650123314, "grad_norm": 0.9731543488979095, "learning_rate": 4.678631513193025e-06, "loss": 0.4185, "step": 16161 }, { "epoch": 0.9841975459001918, "grad_norm": 0.9600063053610038, "learning_rate": 4.678592373860423e-06, "loss": 0.3614, "step": 16162 }, { "epoch": 0.9842584416770697, "grad_norm": 1.0171716950058605, "learning_rate": 4.678553232308315e-06, "loss": 0.365, "step": 16163 }, { "epoch": 0.9843193374539476, "grad_norm": 1.1952843313463575, "learning_rate": 4.678514088536744e-06, "loss": 0.4292, "step": 16164 }, { "epoch": 0.9843802332308255, "grad_norm": 0.9909746700323528, "learning_rate": 4.678474942545748e-06, "loss": 0.3848, "step": 16165 }, { "epoch": 0.9844411290077033, "grad_norm": 1.0011021012177936, "learning_rate": 4.678435794335367e-06, "loss": 0.3892, "step": 16166 }, { "epoch": 0.9845020247845812, "grad_norm": 0.9882362004365979, "learning_rate": 4.67839664390564e-06, "loss": 0.4694, "step": 16167 }, { "epoch": 0.9845629205614591, "grad_norm": 1.0026602961566797, "learning_rate": 4.678357491256608e-06, "loss": 0.4003, "step": 16168 }, { "epoch": 0.984623816338337, "grad_norm": 1.0347155292825287, "learning_rate": 4.678318336388311e-06, "loss": 0.446, "step": 16169 }, { "epoch": 0.9846847121152148, "grad_norm": 0.9918286802109743, "learning_rate": 4.678279179300788e-06, "loss": 0.3348, "step": 16170 }, { "epoch": 0.9847456078920926, "grad_norm": 0.9431744769046054, "learning_rate": 4.67824001999408e-06, "loss": 0.377, "step": 16171 }, { "epoch": 0.9848065036689706, "grad_norm": 1.0351802564948431, "learning_rate": 4.678200858468226e-06, "loss": 0.3706, "step": 16172 }, { "epoch": 0.9848673994458484, "grad_norm": 1.005463947053026, "learning_rate": 4.678161694723268e-06, "loss": 0.4018, "step": 16173 }, { "epoch": 0.9849282952227263, "grad_norm": 1.0825281472392674, "learning_rate": 4.678122528759242e-06, "loss": 0.4084, "step": 16174 }, { "epoch": 0.9849891909996041, "grad_norm": 0.9725223586242351, "learning_rate": 4.678083360576191e-06, "loss": 0.409, "step": 16175 }, { "epoch": 0.9850500867764821, "grad_norm": 0.9526734131330167, "learning_rate": 4.678044190174154e-06, "loss": 0.4568, "step": 16176 }, { "epoch": 0.9851109825533599, "grad_norm": 1.0282217868169534, "learning_rate": 4.67800501755317e-06, "loss": 0.4687, "step": 16177 }, { "epoch": 0.9851718783302378, "grad_norm": 0.989005063183578, "learning_rate": 4.67796584271328e-06, "loss": 0.4079, "step": 16178 }, { "epoch": 0.9852327741071156, "grad_norm": 1.025664254256082, "learning_rate": 4.677926665654524e-06, "loss": 0.3644, "step": 16179 }, { "epoch": 0.9852936698839936, "grad_norm": 1.0764304353065983, "learning_rate": 4.677887486376942e-06, "loss": 0.3507, "step": 16180 }, { "epoch": 0.9853545656608714, "grad_norm": 0.9615466498082826, "learning_rate": 4.677848304880573e-06, "loss": 0.4182, "step": 16181 }, { "epoch": 0.9854154614377493, "grad_norm": 1.0181817679781093, "learning_rate": 4.677809121165458e-06, "loss": 0.3942, "step": 16182 }, { "epoch": 0.9854763572146271, "grad_norm": 1.0226787905624835, "learning_rate": 4.677769935231635e-06, "loss": 0.4556, "step": 16183 }, { "epoch": 0.9855372529915051, "grad_norm": 1.0518877107974478, "learning_rate": 4.677730747079147e-06, "loss": 0.374, "step": 16184 }, { "epoch": 0.9855981487683829, "grad_norm": 0.9788495478161697, "learning_rate": 4.677691556708031e-06, "loss": 0.4208, "step": 16185 }, { "epoch": 0.9856590445452608, "grad_norm": 1.0112756967106205, "learning_rate": 4.6776523641183285e-06, "loss": 0.454, "step": 16186 }, { "epoch": 0.9857199403221386, "grad_norm": 1.0420856428076417, "learning_rate": 4.677613169310079e-06, "loss": 0.3666, "step": 16187 }, { "epoch": 0.9857808360990166, "grad_norm": 0.9073857087753471, "learning_rate": 4.677573972283322e-06, "loss": 0.4557, "step": 16188 }, { "epoch": 0.9858417318758944, "grad_norm": 0.9913517049153884, "learning_rate": 4.677534773038099e-06, "loss": 0.3973, "step": 16189 }, { "epoch": 0.9859026276527723, "grad_norm": 1.1358573787650539, "learning_rate": 4.677495571574448e-06, "loss": 0.3669, "step": 16190 }, { "epoch": 0.9859635234296501, "grad_norm": 1.0155112052358368, "learning_rate": 4.6774563678924105e-06, "loss": 0.4487, "step": 16191 }, { "epoch": 0.9860244192065281, "grad_norm": 1.05152131724092, "learning_rate": 4.677417161992025e-06, "loss": 0.3828, "step": 16192 }, { "epoch": 0.9860853149834059, "grad_norm": 1.0000123891616424, "learning_rate": 4.677377953873333e-06, "loss": 0.405, "step": 16193 }, { "epoch": 0.9861462107602837, "grad_norm": 0.8862788136490198, "learning_rate": 4.677338743536372e-06, "loss": 0.4228, "step": 16194 }, { "epoch": 0.9862071065371616, "grad_norm": 1.164779821431365, "learning_rate": 4.677299530981185e-06, "loss": 0.3727, "step": 16195 }, { "epoch": 0.9862680023140395, "grad_norm": 1.0029167159644998, "learning_rate": 4.677260316207811e-06, "loss": 0.4151, "step": 16196 }, { "epoch": 0.9863288980909174, "grad_norm": 0.9875876183585724, "learning_rate": 4.6772210992162885e-06, "loss": 0.3882, "step": 16197 }, { "epoch": 0.9863897938677952, "grad_norm": 1.028817312912461, "learning_rate": 4.677181880006658e-06, "loss": 0.4001, "step": 16198 }, { "epoch": 0.9864506896446732, "grad_norm": 0.934247123012719, "learning_rate": 4.677142658578961e-06, "loss": 0.4619, "step": 16199 }, { "epoch": 0.986511585421551, "grad_norm": 1.0240849132709928, "learning_rate": 4.677103434933235e-06, "loss": 0.4431, "step": 16200 }, { "epoch": 0.9865724811984289, "grad_norm": 0.9464470446721451, "learning_rate": 4.677064209069522e-06, "loss": 0.4381, "step": 16201 }, { "epoch": 0.9866333769753067, "grad_norm": 1.00541614281982, "learning_rate": 4.677024980987861e-06, "loss": 0.5006, "step": 16202 }, { "epoch": 0.9866942727521847, "grad_norm": 0.9638180397331491, "learning_rate": 4.6769857506882935e-06, "loss": 0.4448, "step": 16203 }, { "epoch": 0.9867551685290625, "grad_norm": 0.9398932825829419, "learning_rate": 4.676946518170856e-06, "loss": 0.4307, "step": 16204 }, { "epoch": 0.9868160643059404, "grad_norm": 0.9380486109255832, "learning_rate": 4.676907283435592e-06, "loss": 0.3884, "step": 16205 }, { "epoch": 0.9868769600828182, "grad_norm": 0.9621924635022957, "learning_rate": 4.67686804648254e-06, "loss": 0.4657, "step": 16206 }, { "epoch": 0.9869378558596962, "grad_norm": 0.9660342502006093, "learning_rate": 4.676828807311739e-06, "loss": 0.3937, "step": 16207 }, { "epoch": 0.986998751636574, "grad_norm": 1.0687941865528416, "learning_rate": 4.676789565923232e-06, "loss": 0.3989, "step": 16208 }, { "epoch": 0.9870596474134519, "grad_norm": 0.9190212608055365, "learning_rate": 4.676750322317056e-06, "loss": 0.4198, "step": 16209 }, { "epoch": 0.9871205431903297, "grad_norm": 1.0172618661496406, "learning_rate": 4.676711076493252e-06, "loss": 0.3802, "step": 16210 }, { "epoch": 0.9871814389672077, "grad_norm": 1.014059429188701, "learning_rate": 4.67667182845186e-06, "loss": 0.4297, "step": 16211 }, { "epoch": 0.9872423347440855, "grad_norm": 0.9795919558753168, "learning_rate": 4.67663257819292e-06, "loss": 0.4043, "step": 16212 }, { "epoch": 0.9873032305209634, "grad_norm": 0.9835405054225602, "learning_rate": 4.676593325716473e-06, "loss": 0.4463, "step": 16213 }, { "epoch": 0.9873641262978412, "grad_norm": 0.922253135448357, "learning_rate": 4.676554071022557e-06, "loss": 0.4658, "step": 16214 }, { "epoch": 0.9874250220747192, "grad_norm": 0.9430990731540834, "learning_rate": 4.676514814111213e-06, "loss": 0.4371, "step": 16215 }, { "epoch": 0.987485917851597, "grad_norm": 1.057143890570593, "learning_rate": 4.67647555498248e-06, "loss": 0.4254, "step": 16216 }, { "epoch": 0.9875468136284749, "grad_norm": 0.9793937589642389, "learning_rate": 4.6764362936364005e-06, "loss": 0.4169, "step": 16217 }, { "epoch": 0.9876077094053527, "grad_norm": 0.9197135180667426, "learning_rate": 4.676397030073012e-06, "loss": 0.42, "step": 16218 }, { "epoch": 0.9876686051822307, "grad_norm": 0.9846217698842454, "learning_rate": 4.676357764292356e-06, "loss": 0.4141, "step": 16219 }, { "epoch": 0.9877295009591085, "grad_norm": 0.9174668917323406, "learning_rate": 4.676318496294472e-06, "loss": 0.449, "step": 16220 }, { "epoch": 0.9877903967359863, "grad_norm": 0.9889657879589215, "learning_rate": 4.6762792260794e-06, "loss": 0.4522, "step": 16221 }, { "epoch": 0.9878512925128642, "grad_norm": 0.9508541306782256, "learning_rate": 4.6762399536471795e-06, "loss": 0.3814, "step": 16222 }, { "epoch": 0.9879121882897421, "grad_norm": 1.0515767055761887, "learning_rate": 4.676200678997851e-06, "loss": 0.3875, "step": 16223 }, { "epoch": 0.98797308406662, "grad_norm": 0.9603590053641187, "learning_rate": 4.676161402131455e-06, "loss": 0.3932, "step": 16224 }, { "epoch": 0.9880339798434978, "grad_norm": 1.0117894813880277, "learning_rate": 4.67612212304803e-06, "loss": 0.3874, "step": 16225 }, { "epoch": 0.9880948756203757, "grad_norm": 1.0522828902592163, "learning_rate": 4.676082841747618e-06, "loss": 0.3727, "step": 16226 }, { "epoch": 0.9881557713972536, "grad_norm": 0.9776665044221723, "learning_rate": 4.676043558230258e-06, "loss": 0.3737, "step": 16227 }, { "epoch": 0.9882166671741315, "grad_norm": 1.0727587903272258, "learning_rate": 4.6760042724959895e-06, "loss": 0.376, "step": 16228 }, { "epoch": 0.9882775629510093, "grad_norm": 1.0232208801492568, "learning_rate": 4.675964984544854e-06, "loss": 0.3801, "step": 16229 }, { "epoch": 0.9883384587278872, "grad_norm": 0.9738575270929011, "learning_rate": 4.67592569437689e-06, "loss": 0.3957, "step": 16230 }, { "epoch": 0.9883993545047651, "grad_norm": 0.9930744858831636, "learning_rate": 4.675886401992138e-06, "loss": 0.3897, "step": 16231 }, { "epoch": 0.988460250281643, "grad_norm": 0.9968090180715501, "learning_rate": 4.675847107390639e-06, "loss": 0.3548, "step": 16232 }, { "epoch": 0.9885211460585208, "grad_norm": 1.0300464206882167, "learning_rate": 4.675807810572431e-06, "loss": 0.3723, "step": 16233 }, { "epoch": 0.9885820418353987, "grad_norm": 1.0486667997720778, "learning_rate": 4.675768511537556e-06, "loss": 0.4075, "step": 16234 }, { "epoch": 0.9886429376122766, "grad_norm": 0.9823663995492986, "learning_rate": 4.675729210286053e-06, "loss": 0.4693, "step": 16235 }, { "epoch": 0.9887038333891545, "grad_norm": 0.9680351214269808, "learning_rate": 4.675689906817962e-06, "loss": 0.4094, "step": 16236 }, { "epoch": 0.9887647291660323, "grad_norm": 1.012288028667421, "learning_rate": 4.675650601133324e-06, "loss": 0.4484, "step": 16237 }, { "epoch": 0.9888256249429103, "grad_norm": 0.9595766429163871, "learning_rate": 4.6756112932321785e-06, "loss": 0.4557, "step": 16238 }, { "epoch": 0.9888865207197881, "grad_norm": 0.9380333830208059, "learning_rate": 4.675571983114565e-06, "loss": 0.3961, "step": 16239 }, { "epoch": 0.988947416496666, "grad_norm": 0.994118688232684, "learning_rate": 4.675532670780524e-06, "loss": 0.382, "step": 16240 }, { "epoch": 0.9890083122735438, "grad_norm": 1.0027412737144537, "learning_rate": 4.675493356230095e-06, "loss": 0.3985, "step": 16241 }, { "epoch": 0.9890692080504218, "grad_norm": 1.0628135104746894, "learning_rate": 4.67545403946332e-06, "loss": 0.3584, "step": 16242 }, { "epoch": 0.9891301038272996, "grad_norm": 0.9383163659044449, "learning_rate": 4.6754147204802365e-06, "loss": 0.4462, "step": 16243 }, { "epoch": 0.9891909996041774, "grad_norm": 0.9652222608521926, "learning_rate": 4.675375399280886e-06, "loss": 0.4032, "step": 16244 }, { "epoch": 0.9892518953810553, "grad_norm": 0.9751193039935881, "learning_rate": 4.675336075865309e-06, "loss": 0.4586, "step": 16245 }, { "epoch": 0.9893127911579332, "grad_norm": 0.9550536119656439, "learning_rate": 4.675296750233543e-06, "loss": 0.3988, "step": 16246 }, { "epoch": 0.9893736869348111, "grad_norm": 0.9971521914687201, "learning_rate": 4.675257422385632e-06, "loss": 0.4601, "step": 16247 }, { "epoch": 0.9894345827116889, "grad_norm": 0.9660228717676052, "learning_rate": 4.675218092321613e-06, "loss": 0.4205, "step": 16248 }, { "epoch": 0.9894954784885668, "grad_norm": 0.9985745948185358, "learning_rate": 4.675178760041526e-06, "loss": 0.3904, "step": 16249 }, { "epoch": 0.9895563742654447, "grad_norm": 0.9786207379776738, "learning_rate": 4.675139425545414e-06, "loss": 0.3652, "step": 16250 }, { "epoch": 0.9896172700423226, "grad_norm": 1.0894766890740644, "learning_rate": 4.675100088833314e-06, "loss": 0.4207, "step": 16251 }, { "epoch": 0.9896781658192004, "grad_norm": 0.9918779810691079, "learning_rate": 4.675060749905268e-06, "loss": 0.3709, "step": 16252 }, { "epoch": 0.9897390615960783, "grad_norm": 1.0569581006333355, "learning_rate": 4.675021408761316e-06, "loss": 0.4384, "step": 16253 }, { "epoch": 0.9897999573729562, "grad_norm": 1.0388311084455797, "learning_rate": 4.674982065401495e-06, "loss": 0.3873, "step": 16254 }, { "epoch": 0.9898608531498341, "grad_norm": 1.0331859436626376, "learning_rate": 4.674942719825849e-06, "loss": 0.4238, "step": 16255 }, { "epoch": 0.9899217489267119, "grad_norm": 0.9713212918706713, "learning_rate": 4.6749033720344165e-06, "loss": 0.4441, "step": 16256 }, { "epoch": 0.9899826447035898, "grad_norm": 1.0502130850545415, "learning_rate": 4.674864022027238e-06, "loss": 0.3587, "step": 16257 }, { "epoch": 0.9900435404804677, "grad_norm": 1.0576056258400464, "learning_rate": 4.674824669804353e-06, "loss": 0.4433, "step": 16258 }, { "epoch": 0.9901044362573456, "grad_norm": 0.995711838618686, "learning_rate": 4.674785315365801e-06, "loss": 0.3834, "step": 16259 }, { "epoch": 0.9901653320342234, "grad_norm": 1.058693850794683, "learning_rate": 4.674745958711624e-06, "loss": 0.4512, "step": 16260 }, { "epoch": 0.9902262278111013, "grad_norm": 1.058882062686604, "learning_rate": 4.67470659984186e-06, "loss": 0.3542, "step": 16261 }, { "epoch": 0.9902871235879792, "grad_norm": 0.9455845117074854, "learning_rate": 4.6746672387565515e-06, "loss": 0.4561, "step": 16262 }, { "epoch": 0.9903480193648571, "grad_norm": 1.0477889628798283, "learning_rate": 4.6746278754557364e-06, "loss": 0.4046, "step": 16263 }, { "epoch": 0.9904089151417349, "grad_norm": 1.045334828589822, "learning_rate": 4.674588509939456e-06, "loss": 0.3729, "step": 16264 }, { "epoch": 0.9904698109186127, "grad_norm": 1.045387815022639, "learning_rate": 4.67454914220775e-06, "loss": 0.3531, "step": 16265 }, { "epoch": 0.9905307066954907, "grad_norm": 1.068805419381018, "learning_rate": 4.6745097722606585e-06, "loss": 0.4229, "step": 16266 }, { "epoch": 0.9905916024723685, "grad_norm": 1.0297120174611025, "learning_rate": 4.674470400098221e-06, "loss": 0.3651, "step": 16267 }, { "epoch": 0.9906524982492464, "grad_norm": 1.0037499631015478, "learning_rate": 4.6744310257204785e-06, "loss": 0.3381, "step": 16268 }, { "epoch": 0.9907133940261242, "grad_norm": 0.9429108554023938, "learning_rate": 4.674391649127471e-06, "loss": 0.4049, "step": 16269 }, { "epoch": 0.9907742898030022, "grad_norm": 1.048716010246691, "learning_rate": 4.674352270319239e-06, "loss": 0.3775, "step": 16270 }, { "epoch": 0.99083518557988, "grad_norm": 1.087254879621394, "learning_rate": 4.674312889295821e-06, "loss": 0.41, "step": 16271 }, { "epoch": 0.9908960813567579, "grad_norm": 1.010346845811098, "learning_rate": 4.67427350605726e-06, "loss": 0.3114, "step": 16272 }, { "epoch": 0.9909569771336357, "grad_norm": 0.9940983342052173, "learning_rate": 4.6742341206035925e-06, "loss": 0.3603, "step": 16273 }, { "epoch": 0.9910178729105137, "grad_norm": 1.0463912584497175, "learning_rate": 4.674194732934861e-06, "loss": 0.4234, "step": 16274 }, { "epoch": 0.9910787686873915, "grad_norm": 1.0975208322177818, "learning_rate": 4.674155343051105e-06, "loss": 0.3633, "step": 16275 }, { "epoch": 0.9911396644642694, "grad_norm": 1.0539020486175188, "learning_rate": 4.674115950952366e-06, "loss": 0.3935, "step": 16276 }, { "epoch": 0.9912005602411472, "grad_norm": 1.0212191875958059, "learning_rate": 4.674076556638682e-06, "loss": 0.3857, "step": 16277 }, { "epoch": 0.9912614560180252, "grad_norm": 1.0007857106958076, "learning_rate": 4.6740371601100946e-06, "loss": 0.4116, "step": 16278 }, { "epoch": 0.991322351794903, "grad_norm": 0.9560539274369929, "learning_rate": 4.673997761366642e-06, "loss": 0.4426, "step": 16279 }, { "epoch": 0.9913832475717809, "grad_norm": 0.9681195240652467, "learning_rate": 4.673958360408367e-06, "loss": 0.3437, "step": 16280 }, { "epoch": 0.9914441433486588, "grad_norm": 0.9376118157833015, "learning_rate": 4.673918957235307e-06, "loss": 0.4358, "step": 16281 }, { "epoch": 0.9915050391255367, "grad_norm": 1.088477901801933, "learning_rate": 4.673879551847505e-06, "loss": 0.4287, "step": 16282 }, { "epoch": 0.9915659349024145, "grad_norm": 1.0542361561539992, "learning_rate": 4.673840144244999e-06, "loss": 0.4196, "step": 16283 }, { "epoch": 0.9916268306792924, "grad_norm": 1.0166804739813673, "learning_rate": 4.67380073442783e-06, "loss": 0.4449, "step": 16284 }, { "epoch": 0.9916877264561703, "grad_norm": 1.0871946062490336, "learning_rate": 4.673761322396038e-06, "loss": 0.3604, "step": 16285 }, { "epoch": 0.9917486222330482, "grad_norm": 1.089631797583479, "learning_rate": 4.673721908149663e-06, "loss": 0.3832, "step": 16286 }, { "epoch": 0.991809518009926, "grad_norm": 0.9313172533764063, "learning_rate": 4.673682491688746e-06, "loss": 0.4965, "step": 16287 }, { "epoch": 0.9918704137868039, "grad_norm": 1.032343556521044, "learning_rate": 4.673643073013325e-06, "loss": 0.4143, "step": 16288 }, { "epoch": 0.9919313095636818, "grad_norm": 0.8816777311605312, "learning_rate": 4.673603652123443e-06, "loss": 0.4712, "step": 16289 }, { "epoch": 0.9919922053405597, "grad_norm": 1.0215811968922381, "learning_rate": 4.673564229019139e-06, "loss": 0.3866, "step": 16290 }, { "epoch": 0.9920531011174375, "grad_norm": 1.0082187856503215, "learning_rate": 4.673524803700452e-06, "loss": 0.3834, "step": 16291 }, { "epoch": 0.9921139968943153, "grad_norm": 1.0788121597477218, "learning_rate": 4.673485376167423e-06, "loss": 0.3807, "step": 16292 }, { "epoch": 0.9921748926711933, "grad_norm": 0.9656254906201728, "learning_rate": 4.673445946420093e-06, "loss": 0.384, "step": 16293 }, { "epoch": 0.9922357884480711, "grad_norm": 0.9351576119490655, "learning_rate": 4.673406514458501e-06, "loss": 0.4762, "step": 16294 }, { "epoch": 0.992296684224949, "grad_norm": 1.0331636279333793, "learning_rate": 4.673367080282688e-06, "loss": 0.3496, "step": 16295 }, { "epoch": 0.9923575800018268, "grad_norm": 0.9649964983867222, "learning_rate": 4.673327643892693e-06, "loss": 0.3909, "step": 16296 }, { "epoch": 0.9924184757787048, "grad_norm": 0.9874064750430391, "learning_rate": 4.673288205288558e-06, "loss": 0.4001, "step": 16297 }, { "epoch": 0.9924793715555826, "grad_norm": 1.1034046345402293, "learning_rate": 4.673248764470322e-06, "loss": 0.4108, "step": 16298 }, { "epoch": 0.9925402673324605, "grad_norm": 1.0104716180378681, "learning_rate": 4.673209321438025e-06, "loss": 0.4031, "step": 16299 }, { "epoch": 0.9926011631093383, "grad_norm": 1.0055427585059935, "learning_rate": 4.673169876191708e-06, "loss": 0.3588, "step": 16300 }, { "epoch": 0.9926620588862163, "grad_norm": 1.0039599996053512, "learning_rate": 4.67313042873141e-06, "loss": 0.3987, "step": 16301 }, { "epoch": 0.9927229546630941, "grad_norm": 1.0324394408261715, "learning_rate": 4.673090979057172e-06, "loss": 0.5085, "step": 16302 }, { "epoch": 0.992783850439972, "grad_norm": 1.042344520617762, "learning_rate": 4.673051527169035e-06, "loss": 0.4309, "step": 16303 }, { "epoch": 0.9928447462168498, "grad_norm": 1.0709503507436016, "learning_rate": 4.673012073067037e-06, "loss": 0.4045, "step": 16304 }, { "epoch": 0.9929056419937278, "grad_norm": 1.0193122854160725, "learning_rate": 4.6729726167512206e-06, "loss": 0.414, "step": 16305 }, { "epoch": 0.9929665377706056, "grad_norm": 1.0527142396465807, "learning_rate": 4.6729331582216245e-06, "loss": 0.3968, "step": 16306 }, { "epoch": 0.9930274335474835, "grad_norm": 0.9593302473500128, "learning_rate": 4.672893697478288e-06, "loss": 0.4066, "step": 16307 }, { "epoch": 0.9930883293243613, "grad_norm": 1.0440406528333759, "learning_rate": 4.672854234521254e-06, "loss": 0.385, "step": 16308 }, { "epoch": 0.9931492251012393, "grad_norm": 0.9942177885955104, "learning_rate": 4.672814769350561e-06, "loss": 0.3434, "step": 16309 }, { "epoch": 0.9932101208781171, "grad_norm": 1.0346839969910682, "learning_rate": 4.67277530196625e-06, "loss": 0.3417, "step": 16310 }, { "epoch": 0.993271016654995, "grad_norm": 0.9433808844719926, "learning_rate": 4.67273583236836e-06, "loss": 0.4159, "step": 16311 }, { "epoch": 0.9933319124318728, "grad_norm": 0.9878233813770662, "learning_rate": 4.672696360556932e-06, "loss": 0.4439, "step": 16312 }, { "epoch": 0.9933928082087508, "grad_norm": 1.0364978064497319, "learning_rate": 4.672656886532007e-06, "loss": 0.3355, "step": 16313 }, { "epoch": 0.9934537039856286, "grad_norm": 0.9999217265040813, "learning_rate": 4.6726174102936225e-06, "loss": 0.421, "step": 16314 }, { "epoch": 0.9935145997625064, "grad_norm": 0.9940130745287283, "learning_rate": 4.672577931841822e-06, "loss": 0.349, "step": 16315 }, { "epoch": 0.9935754955393843, "grad_norm": 1.0401609164790568, "learning_rate": 4.672538451176643e-06, "loss": 0.3476, "step": 16316 }, { "epoch": 0.9936363913162622, "grad_norm": 1.0099047574044397, "learning_rate": 4.672498968298128e-06, "loss": 0.4202, "step": 16317 }, { "epoch": 0.9936972870931401, "grad_norm": 0.9872749304097747, "learning_rate": 4.672459483206316e-06, "loss": 0.3921, "step": 16318 }, { "epoch": 0.9937581828700179, "grad_norm": 1.037475639215681, "learning_rate": 4.6724199959012474e-06, "loss": 0.348, "step": 16319 }, { "epoch": 0.9938190786468959, "grad_norm": 1.002517908461545, "learning_rate": 4.672380506382963e-06, "loss": 0.4794, "step": 16320 }, { "epoch": 0.9938799744237737, "grad_norm": 1.0530432539505288, "learning_rate": 4.672341014651502e-06, "loss": 0.3368, "step": 16321 }, { "epoch": 0.9939408702006516, "grad_norm": 0.9732399869834257, "learning_rate": 4.6723015207069055e-06, "loss": 0.4205, "step": 16322 }, { "epoch": 0.9940017659775294, "grad_norm": 1.045106658192609, "learning_rate": 4.672262024549213e-06, "loss": 0.3985, "step": 16323 }, { "epoch": 0.9940626617544074, "grad_norm": 1.0895904773480876, "learning_rate": 4.6722225261784644e-06, "loss": 0.3688, "step": 16324 }, { "epoch": 0.9941235575312852, "grad_norm": 0.9682953590148965, "learning_rate": 4.672183025594702e-06, "loss": 0.475, "step": 16325 }, { "epoch": 0.9941844533081631, "grad_norm": 0.9529084986780416, "learning_rate": 4.672143522797964e-06, "loss": 0.4622, "step": 16326 }, { "epoch": 0.9942453490850409, "grad_norm": 0.9810007894919436, "learning_rate": 4.672104017788292e-06, "loss": 0.4108, "step": 16327 }, { "epoch": 0.9943062448619189, "grad_norm": 1.069183184988766, "learning_rate": 4.672064510565725e-06, "loss": 0.4384, "step": 16328 }, { "epoch": 0.9943671406387967, "grad_norm": 0.9309586033970184, "learning_rate": 4.672025001130304e-06, "loss": 0.4234, "step": 16329 }, { "epoch": 0.9944280364156746, "grad_norm": 0.9803140487046301, "learning_rate": 4.671985489482069e-06, "loss": 0.3775, "step": 16330 }, { "epoch": 0.9944889321925524, "grad_norm": 1.0324257934129235, "learning_rate": 4.67194597562106e-06, "loss": 0.3666, "step": 16331 }, { "epoch": 0.9945498279694304, "grad_norm": 1.0292243410557242, "learning_rate": 4.671906459547319e-06, "loss": 0.4693, "step": 16332 }, { "epoch": 0.9946107237463082, "grad_norm": 0.9547298523674533, "learning_rate": 4.671866941260883e-06, "loss": 0.4473, "step": 16333 }, { "epoch": 0.9946716195231861, "grad_norm": 1.0410101238831113, "learning_rate": 4.671827420761795e-06, "loss": 0.3463, "step": 16334 }, { "epoch": 0.9947325153000639, "grad_norm": 1.0170073206605121, "learning_rate": 4.671787898050094e-06, "loss": 0.3802, "step": 16335 }, { "epoch": 0.9947934110769419, "grad_norm": 1.0040332017322384, "learning_rate": 4.671748373125821e-06, "loss": 0.3739, "step": 16336 }, { "epoch": 0.9948543068538197, "grad_norm": 0.9305764389871382, "learning_rate": 4.6717088459890155e-06, "loss": 0.4706, "step": 16337 }, { "epoch": 0.9949152026306975, "grad_norm": 1.066649946797029, "learning_rate": 4.671669316639719e-06, "loss": 0.3241, "step": 16338 }, { "epoch": 0.9949760984075754, "grad_norm": 0.9878202871356185, "learning_rate": 4.6716297850779705e-06, "loss": 0.3692, "step": 16339 }, { "epoch": 0.9950369941844533, "grad_norm": 1.0464913951589545, "learning_rate": 4.67159025130381e-06, "loss": 0.4081, "step": 16340 }, { "epoch": 0.9950978899613312, "grad_norm": 0.998017930044991, "learning_rate": 4.67155071531728e-06, "loss": 0.4032, "step": 16341 }, { "epoch": 0.995158785738209, "grad_norm": 0.9994437242289504, "learning_rate": 4.671511177118419e-06, "loss": 0.4753, "step": 16342 }, { "epoch": 0.9952196815150869, "grad_norm": 0.9578218743138102, "learning_rate": 4.671471636707267e-06, "loss": 0.4031, "step": 16343 }, { "epoch": 0.9952805772919648, "grad_norm": 0.9807301862954403, "learning_rate": 4.671432094083864e-06, "loss": 0.3964, "step": 16344 }, { "epoch": 0.9953414730688427, "grad_norm": 1.04508241736064, "learning_rate": 4.6713925492482525e-06, "loss": 0.373, "step": 16345 }, { "epoch": 0.9954023688457205, "grad_norm": 0.9739951100955638, "learning_rate": 4.67135300220047e-06, "loss": 0.3399, "step": 16346 }, { "epoch": 0.9954632646225984, "grad_norm": 0.9875117413809921, "learning_rate": 4.6713134529405595e-06, "loss": 0.4112, "step": 16347 }, { "epoch": 0.9955241603994763, "grad_norm": 0.9820014426903569, "learning_rate": 4.67127390146856e-06, "loss": 0.3761, "step": 16348 }, { "epoch": 0.9955850561763542, "grad_norm": 0.9592900044563676, "learning_rate": 4.671234347784511e-06, "loss": 0.3881, "step": 16349 }, { "epoch": 0.995645951953232, "grad_norm": 1.0943232527860531, "learning_rate": 4.671194791888454e-06, "loss": 0.386, "step": 16350 }, { "epoch": 0.9957068477301099, "grad_norm": 1.0374583679423184, "learning_rate": 4.671155233780429e-06, "loss": 0.3919, "step": 16351 }, { "epoch": 0.9957677435069878, "grad_norm": 0.9594233777540153, "learning_rate": 4.671115673460476e-06, "loss": 0.4099, "step": 16352 }, { "epoch": 0.9958286392838657, "grad_norm": 1.0215389808999609, "learning_rate": 4.671076110928635e-06, "loss": 0.3976, "step": 16353 }, { "epoch": 0.9958895350607435, "grad_norm": 1.0232541294811832, "learning_rate": 4.671036546184948e-06, "loss": 0.3674, "step": 16354 }, { "epoch": 0.9959504308376214, "grad_norm": 1.0115287969654208, "learning_rate": 4.670996979229454e-06, "loss": 0.3629, "step": 16355 }, { "epoch": 0.9960113266144993, "grad_norm": 1.0503680985403778, "learning_rate": 4.670957410062193e-06, "loss": 0.3587, "step": 16356 }, { "epoch": 0.9960722223913772, "grad_norm": 1.034383768771892, "learning_rate": 4.670917838683205e-06, "loss": 0.4309, "step": 16357 }, { "epoch": 0.996133118168255, "grad_norm": 1.0450714434728452, "learning_rate": 4.6708782650925314e-06, "loss": 0.4784, "step": 16358 }, { "epoch": 0.9961940139451329, "grad_norm": 1.0107185255483133, "learning_rate": 4.670838689290212e-06, "loss": 0.4302, "step": 16359 }, { "epoch": 0.9962549097220108, "grad_norm": 1.1875008335532649, "learning_rate": 4.670799111276289e-06, "loss": 0.3747, "step": 16360 }, { "epoch": 0.9963158054988887, "grad_norm": 0.9629892059434939, "learning_rate": 4.670759531050799e-06, "loss": 0.4342, "step": 16361 }, { "epoch": 0.9963767012757665, "grad_norm": 1.031352796195665, "learning_rate": 4.670719948613785e-06, "loss": 0.3628, "step": 16362 }, { "epoch": 0.9964375970526445, "grad_norm": 0.9554157292621482, "learning_rate": 4.670680363965286e-06, "loss": 0.4285, "step": 16363 }, { "epoch": 0.9964984928295223, "grad_norm": 1.0399704544494819, "learning_rate": 4.6706407771053444e-06, "loss": 0.3899, "step": 16364 }, { "epoch": 0.9965593886064001, "grad_norm": 1.0844311429024895, "learning_rate": 4.6706011880339985e-06, "loss": 0.3914, "step": 16365 }, { "epoch": 0.996620284383278, "grad_norm": 1.0344217323527458, "learning_rate": 4.670561596751288e-06, "loss": 0.3942, "step": 16366 }, { "epoch": 0.9966811801601559, "grad_norm": 0.9326651887319554, "learning_rate": 4.6705220032572565e-06, "loss": 0.3986, "step": 16367 }, { "epoch": 0.9967420759370338, "grad_norm": 0.9620585839178937, "learning_rate": 4.670482407551941e-06, "loss": 0.3408, "step": 16368 }, { "epoch": 0.9968029717139116, "grad_norm": 0.9680724028117165, "learning_rate": 4.670442809635384e-06, "loss": 0.4608, "step": 16369 }, { "epoch": 0.9968638674907895, "grad_norm": 1.0301034152723434, "learning_rate": 4.670403209507624e-06, "loss": 0.38, "step": 16370 }, { "epoch": 0.9969247632676674, "grad_norm": 0.9910474368570111, "learning_rate": 4.670363607168703e-06, "loss": 0.4619, "step": 16371 }, { "epoch": 0.9969856590445453, "grad_norm": 0.931230593200955, "learning_rate": 4.670324002618661e-06, "loss": 0.3936, "step": 16372 }, { "epoch": 0.9970465548214231, "grad_norm": 0.9537297679479763, "learning_rate": 4.670284395857537e-06, "loss": 0.4222, "step": 16373 }, { "epoch": 0.997107450598301, "grad_norm": 1.0583320705781079, "learning_rate": 4.6702447868853724e-06, "loss": 0.4056, "step": 16374 }, { "epoch": 0.9971683463751789, "grad_norm": 1.0011696460622266, "learning_rate": 4.6702051757022085e-06, "loss": 0.3664, "step": 16375 }, { "epoch": 0.9972292421520568, "grad_norm": 0.9666764281351278, "learning_rate": 4.6701655623080845e-06, "loss": 0.4096, "step": 16376 }, { "epoch": 0.9972901379289346, "grad_norm": 0.9531675803440088, "learning_rate": 4.67012594670304e-06, "loss": 0.4165, "step": 16377 }, { "epoch": 0.9973510337058125, "grad_norm": 1.082327989042442, "learning_rate": 4.670086328887117e-06, "loss": 0.3907, "step": 16378 }, { "epoch": 0.9974119294826904, "grad_norm": 1.0683460335381358, "learning_rate": 4.670046708860355e-06, "loss": 0.4018, "step": 16379 }, { "epoch": 0.9974728252595683, "grad_norm": 0.970078006364967, "learning_rate": 4.670007086622795e-06, "loss": 0.4625, "step": 16380 }, { "epoch": 0.9975337210364461, "grad_norm": 1.0102176753823824, "learning_rate": 4.669967462174477e-06, "loss": 0.4453, "step": 16381 }, { "epoch": 0.997594616813324, "grad_norm": 0.9865567656183907, "learning_rate": 4.669927835515441e-06, "loss": 0.4652, "step": 16382 }, { "epoch": 0.9976555125902019, "grad_norm": 1.119278475225428, "learning_rate": 4.669888206645728e-06, "loss": 0.3714, "step": 16383 }, { "epoch": 0.9977164083670798, "grad_norm": 1.0315483628892081, "learning_rate": 4.669848575565377e-06, "loss": 0.3971, "step": 16384 }, { "epoch": 0.9977773041439576, "grad_norm": 1.0365113854117551, "learning_rate": 4.6698089422744296e-06, "loss": 0.3979, "step": 16385 }, { "epoch": 0.9978381999208354, "grad_norm": 0.980647724185664, "learning_rate": 4.669769306772925e-06, "loss": 0.407, "step": 16386 }, { "epoch": 0.9978990956977134, "grad_norm": 0.9935504122642005, "learning_rate": 4.669729669060907e-06, "loss": 0.4237, "step": 16387 }, { "epoch": 0.9979599914745912, "grad_norm": 0.9930618616457887, "learning_rate": 4.669690029138412e-06, "loss": 0.4295, "step": 16388 }, { "epoch": 0.9980208872514691, "grad_norm": 1.0576958614047776, "learning_rate": 4.669650387005482e-06, "loss": 0.3786, "step": 16389 }, { "epoch": 0.9980817830283469, "grad_norm": 0.972812520703079, "learning_rate": 4.669610742662157e-06, "loss": 0.4446, "step": 16390 }, { "epoch": 0.9981426788052249, "grad_norm": 1.0037943036141155, "learning_rate": 4.669571096108478e-06, "loss": 0.431, "step": 16391 }, { "epoch": 0.9982035745821027, "grad_norm": 1.0252114480908174, "learning_rate": 4.669531447344487e-06, "loss": 0.4084, "step": 16392 }, { "epoch": 0.9982644703589806, "grad_norm": 0.9739527266135527, "learning_rate": 4.66949179637022e-06, "loss": 0.4842, "step": 16393 }, { "epoch": 0.9983253661358584, "grad_norm": 1.0130386950581087, "learning_rate": 4.66945214318572e-06, "loss": 0.3732, "step": 16394 }, { "epoch": 0.9983862619127364, "grad_norm": 1.0405944187073484, "learning_rate": 4.669412487791028e-06, "loss": 0.36, "step": 16395 }, { "epoch": 0.9984471576896142, "grad_norm": 0.9583178599587318, "learning_rate": 4.669372830186183e-06, "loss": 0.4552, "step": 16396 }, { "epoch": 0.9985080534664921, "grad_norm": 1.0363012610134228, "learning_rate": 4.669333170371227e-06, "loss": 0.4199, "step": 16397 }, { "epoch": 0.9985689492433699, "grad_norm": 0.9873669037740708, "learning_rate": 4.6692935083461985e-06, "loss": 0.4166, "step": 16398 }, { "epoch": 0.9986298450202479, "grad_norm": 0.9992838399354966, "learning_rate": 4.66925384411114e-06, "loss": 0.3265, "step": 16399 }, { "epoch": 0.9986907407971257, "grad_norm": 0.9678830883731682, "learning_rate": 4.669214177666091e-06, "loss": 0.3793, "step": 16400 }, { "epoch": 0.9987516365740036, "grad_norm": 0.9196016628246546, "learning_rate": 4.669174509011089e-06, "loss": 0.4241, "step": 16401 }, { "epoch": 0.9988125323508815, "grad_norm": 0.9669122223366493, "learning_rate": 4.6691348381461795e-06, "loss": 0.42, "step": 16402 }, { "epoch": 0.9988734281277594, "grad_norm": 0.9615998768573056, "learning_rate": 4.6690951650714e-06, "loss": 0.4194, "step": 16403 }, { "epoch": 0.9989343239046372, "grad_norm": 0.9942423070328413, "learning_rate": 4.669055489786792e-06, "loss": 0.3746, "step": 16404 }, { "epoch": 0.9989952196815151, "grad_norm": 0.9562264871781344, "learning_rate": 4.669015812292395e-06, "loss": 0.3459, "step": 16405 }, { "epoch": 0.999056115458393, "grad_norm": 1.0082491958643642, "learning_rate": 4.668976132588249e-06, "loss": 0.4328, "step": 16406 }, { "epoch": 0.9991170112352709, "grad_norm": 0.9783937948739571, "learning_rate": 4.668936450674396e-06, "loss": 0.3405, "step": 16407 }, { "epoch": 0.9991779070121487, "grad_norm": 0.9957657852627952, "learning_rate": 4.668896766550875e-06, "loss": 0.3499, "step": 16408 }, { "epoch": 0.9992388027890265, "grad_norm": 1.0474684196312256, "learning_rate": 4.668857080217728e-06, "loss": 0.4395, "step": 16409 }, { "epoch": 0.9992996985659045, "grad_norm": 1.0007812562738438, "learning_rate": 4.668817391674994e-06, "loss": 0.4676, "step": 16410 }, { "epoch": 0.9993605943427823, "grad_norm": 1.020394506399858, "learning_rate": 4.668777700922715e-06, "loss": 0.3963, "step": 16411 }, { "epoch": 0.9994214901196602, "grad_norm": 1.097071825750329, "learning_rate": 4.668738007960928e-06, "loss": 0.3816, "step": 16412 }, { "epoch": 0.999482385896538, "grad_norm": 1.0260989571324572, "learning_rate": 4.6686983127896775e-06, "loss": 0.4521, "step": 16413 }, { "epoch": 0.999543281673416, "grad_norm": 0.9659077091295296, "learning_rate": 4.668658615409002e-06, "loss": 0.3752, "step": 16414 }, { "epoch": 0.9996041774502938, "grad_norm": 1.0294640623499853, "learning_rate": 4.668618915818942e-06, "loss": 0.405, "step": 16415 }, { "epoch": 0.9996650732271717, "grad_norm": 0.8393346663369416, "learning_rate": 4.668579214019538e-06, "loss": 0.4625, "step": 16416 }, { "epoch": 0.9997259690040495, "grad_norm": 0.9758446870082272, "learning_rate": 4.668539510010831e-06, "loss": 0.3582, "step": 16417 }, { "epoch": 0.9997868647809275, "grad_norm": 0.9648370106636013, "learning_rate": 4.668499803792861e-06, "loss": 0.3784, "step": 16418 }, { "epoch": 0.9998477605578053, "grad_norm": 0.960900385331089, "learning_rate": 4.6684600953656676e-06, "loss": 0.3498, "step": 16419 }, { "epoch": 0.9999086563346832, "grad_norm": 0.9539832337050741, "learning_rate": 4.668420384729293e-06, "loss": 0.4093, "step": 16420 }, { "epoch": 0.999969552111561, "grad_norm": 0.927549372575992, "learning_rate": 4.668380671883776e-06, "loss": 0.4226, "step": 16421 } ], "logging_steps": 1, "max_steps": 98526, "num_input_tokens_seen": 0, "num_train_epochs": 6, "save_steps": 16421, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 529625749782528.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }